1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "translate.h" 22 #include "translate-a64.h" 23 #include "qemu/log.h" 24 #include "disas/disas.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ 58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ 66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ 90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(cpu_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(cpu_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns 109 */ 110 static int get_a64_user_mem_index(DisasContext *s) 111 { 112 /* 113 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 114 * which is the usual mmu_idx for this cpu state. 115 */ 116 ARMMMUIdx useridx = s->mmu_idx; 117 118 if (s->unpriv) { 119 /* 120 * We have pre-computed the condition for AccType_UNPRIV. 121 * Therefore we should never get here with a mmu_idx for 122 * which we do not know the corresponding user mmu_idx. 123 */ 124 switch (useridx) { 125 case ARMMMUIdx_E10_1: 126 case ARMMMUIdx_E10_1_PAN: 127 useridx = ARMMMUIdx_E10_0; 128 break; 129 case ARMMMUIdx_E20_2: 130 case ARMMMUIdx_E20_2_PAN: 131 useridx = ARMMMUIdx_E20_0; 132 break; 133 default: 134 g_assert_not_reached(); 135 } 136 } 137 return arm_to_core_mmu_idx(useridx); 138 } 139 140 static void set_btype_raw(int val) 141 { 142 tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, 143 offsetof(CPUARMState, btype)); 144 } 145 146 static void set_btype(DisasContext *s, int val) 147 { 148 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 149 tcg_debug_assert(val >= 1 && val <= 3); 150 set_btype_raw(val); 151 s->btype = -1; 152 } 153 154 static void reset_btype(DisasContext *s) 155 { 156 if (s->btype != 0) { 157 set_btype_raw(0); 158 s->btype = 0; 159 } 160 } 161 162 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 163 { 164 assert(s->pc_save != -1); 165 if (tb_cflags(s->base.tb) & CF_PCREL) { 166 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 167 } else { 168 tcg_gen_movi_i64(dest, s->pc_curr + diff); 169 } 170 } 171 172 void gen_a64_update_pc(DisasContext *s, target_long diff) 173 { 174 gen_pc_plus_diff(s, cpu_pc, diff); 175 s->pc_save = s->pc_curr + diff; 176 } 177 178 /* 179 * Handle Top Byte Ignore (TBI) bits. 180 * 181 * If address tagging is enabled via the TCR TBI bits: 182 * + for EL2 and EL3 there is only one TBI bit, and if it is set 183 * then the address is zero-extended, clearing bits [63:56] 184 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 185 * and TBI1 controls addresses with bit 55 == 1. 186 * If the appropriate TBI bit is set for the address then 187 * the address is sign-extended from bit 55 into bits [63:56] 188 * 189 * Here We have concatenated TBI{1,0} into tbi. 190 */ 191 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 192 TCGv_i64 src, int tbi) 193 { 194 if (tbi == 0) { 195 /* Load unmodified address */ 196 tcg_gen_mov_i64(dst, src); 197 } else if (!regime_has_2_ranges(s->mmu_idx)) { 198 /* Force tag byte to all zero */ 199 tcg_gen_extract_i64(dst, src, 0, 56); 200 } else { 201 /* Sign-extend from bit 55. */ 202 tcg_gen_sextract_i64(dst, src, 0, 56); 203 204 switch (tbi) { 205 case 1: 206 /* tbi0 but !tbi1: only use the extension if positive */ 207 tcg_gen_and_i64(dst, dst, src); 208 break; 209 case 2: 210 /* !tbi0 but tbi1: only use the extension if negative */ 211 tcg_gen_or_i64(dst, dst, src); 212 break; 213 case 3: 214 /* tbi0 and tbi1: always use the extension */ 215 break; 216 default: 217 g_assert_not_reached(); 218 } 219 } 220 } 221 222 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 223 { 224 /* 225 * If address tagging is enabled for instructions via the TCR TBI bits, 226 * then loading an address into the PC will clear out any tag. 227 */ 228 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 229 s->pc_save = -1; 230 } 231 232 /* 233 * Handle MTE and/or TBI. 234 * 235 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 236 * for the tag to be present in the FAR_ELx register. But for user-only 237 * mode we do not have a TLB with which to implement this, so we must 238 * remove the top byte now. 239 * 240 * Always return a fresh temporary that we can increment independently 241 * of the write-back address. 242 */ 243 244 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 245 { 246 TCGv_i64 clean = tcg_temp_new_i64(); 247 #ifdef CONFIG_USER_ONLY 248 gen_top_byte_ignore(s, clean, addr, s->tbid); 249 #else 250 tcg_gen_mov_i64(clean, addr); 251 #endif 252 return clean; 253 } 254 255 /* Insert a zero tag into src, with the result at dst. */ 256 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 257 { 258 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 259 } 260 261 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 262 MMUAccessType acc, int log2_size) 263 { 264 gen_helper_probe_access(cpu_env, ptr, 265 tcg_constant_i32(acc), 266 tcg_constant_i32(get_mem_index(s)), 267 tcg_constant_i32(1 << log2_size)); 268 } 269 270 /* 271 * For MTE, check a single logical or atomic access. This probes a single 272 * address, the exact one specified. The size and alignment of the access 273 * is not relevant to MTE, per se, but watchpoints do require the size, 274 * and we want to recognize those before making any other changes to state. 275 */ 276 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 277 bool is_write, bool tag_checked, 278 MemOp memop, bool is_unpriv, 279 int core_idx) 280 { 281 if (tag_checked && s->mte_active[is_unpriv]) { 282 TCGv_i64 ret; 283 int desc = 0; 284 285 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 286 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 287 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 288 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 289 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop)); 290 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 291 292 ret = tcg_temp_new_i64(); 293 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 294 295 return ret; 296 } 297 return clean_data_tbi(s, addr); 298 } 299 300 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 301 bool tag_checked, MemOp memop) 302 { 303 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 304 false, get_mem_index(s)); 305 } 306 307 /* 308 * For MTE, check multiple logical sequential accesses. 309 */ 310 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 311 bool tag_checked, int total_size, MemOp single_mop) 312 { 313 if (tag_checked && s->mte_active[0]) { 314 TCGv_i64 ret; 315 int desc = 0; 316 317 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 318 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 319 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 320 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 321 desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop)); 322 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 323 324 ret = tcg_temp_new_i64(); 325 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 326 327 return ret; 328 } 329 return clean_data_tbi(s, addr); 330 } 331 332 /* 333 * Generate the special alignment check that applies to AccType_ATOMIC 334 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 335 * naturally aligned, but it must not cross a 16-byte boundary. 336 * See AArch64.CheckAlignment(). 337 */ 338 static void check_lse2_align(DisasContext *s, int rn, int imm, 339 bool is_write, MemOp mop) 340 { 341 TCGv_i32 tmp; 342 TCGv_i64 addr; 343 TCGLabel *over_label; 344 MMUAccessType type; 345 int mmu_idx; 346 347 tmp = tcg_temp_new_i32(); 348 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 349 tcg_gen_addi_i32(tmp, tmp, imm & 15); 350 tcg_gen_andi_i32(tmp, tmp, 15); 351 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 352 353 over_label = gen_new_label(); 354 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 355 356 addr = tcg_temp_new_i64(); 357 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 358 359 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 360 mmu_idx = get_mem_index(s); 361 gen_helper_unaligned_access(cpu_env, addr, tcg_constant_i32(type), 362 tcg_constant_i32(mmu_idx)); 363 364 gen_set_label(over_label); 365 366 } 367 368 /* Handle the alignment check for AccType_ATOMIC instructions. */ 369 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 370 { 371 MemOp size = mop & MO_SIZE; 372 373 if (size == MO_8) { 374 return mop; 375 } 376 377 /* 378 * If size == MO_128, this is a LDXP, and the operation is single-copy 379 * atomic for each doubleword, not the entire quadword; it still must 380 * be quadword aligned. 381 */ 382 if (size == MO_128) { 383 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 384 MO_ATOM_IFALIGN_PAIR); 385 } 386 if (dc_isar_feature(aa64_lse2, s)) { 387 check_lse2_align(s, rn, 0, true, mop); 388 } else { 389 mop |= MO_ALIGN; 390 } 391 return finalize_memop(s, mop); 392 } 393 394 /* Handle the alignment check for AccType_ORDERED instructions. */ 395 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 396 bool is_write, MemOp mop) 397 { 398 MemOp size = mop & MO_SIZE; 399 400 if (size == MO_8) { 401 return mop; 402 } 403 if (size == MO_128) { 404 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 405 MO_ATOM_IFALIGN_PAIR); 406 } 407 if (!dc_isar_feature(aa64_lse2, s)) { 408 mop |= MO_ALIGN; 409 } else if (!s->naa) { 410 check_lse2_align(s, rn, imm, is_write, mop); 411 } 412 return finalize_memop(s, mop); 413 } 414 415 typedef struct DisasCompare64 { 416 TCGCond cond; 417 TCGv_i64 value; 418 } DisasCompare64; 419 420 static void a64_test_cc(DisasCompare64 *c64, int cc) 421 { 422 DisasCompare c32; 423 424 arm_test_cc(&c32, cc); 425 426 /* 427 * Sign-extend the 32-bit value so that the GE/LT comparisons work 428 * properly. The NE/EQ comparisons are also fine with this choice. 429 */ 430 c64->cond = c32.cond; 431 c64->value = tcg_temp_new_i64(); 432 tcg_gen_ext_i32_i64(c64->value, c32.value); 433 } 434 435 static void gen_rebuild_hflags(DisasContext *s) 436 { 437 gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el)); 438 } 439 440 static void gen_exception_internal(int excp) 441 { 442 assert(excp_is_internal(excp)); 443 gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp)); 444 } 445 446 static void gen_exception_internal_insn(DisasContext *s, int excp) 447 { 448 gen_a64_update_pc(s, 0); 449 gen_exception_internal(excp); 450 s->base.is_jmp = DISAS_NORETURN; 451 } 452 453 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 454 { 455 gen_a64_update_pc(s, 0); 456 gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome)); 457 s->base.is_jmp = DISAS_NORETURN; 458 } 459 460 static void gen_step_complete_exception(DisasContext *s) 461 { 462 /* We just completed step of an insn. Move from Active-not-pending 463 * to Active-pending, and then also take the swstep exception. 464 * This corresponds to making the (IMPDEF) choice to prioritize 465 * swstep exceptions over asynchronous exceptions taken to an exception 466 * level where debug is disabled. This choice has the advantage that 467 * we do not need to maintain internal state corresponding to the 468 * ISV/EX syndrome bits between completion of the step and generation 469 * of the exception, and our syndrome information is always correct. 470 */ 471 gen_ss_advance(s); 472 gen_swstep_exception(s, 1, s->is_ldex); 473 s->base.is_jmp = DISAS_NORETURN; 474 } 475 476 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 477 { 478 if (s->ss_active) { 479 return false; 480 } 481 return translator_use_goto_tb(&s->base, dest); 482 } 483 484 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 485 { 486 if (use_goto_tb(s, s->pc_curr + diff)) { 487 /* 488 * For pcrel, the pc must always be up-to-date on entry to 489 * the linked TB, so that it can use simple additions for all 490 * further adjustments. For !pcrel, the linked TB is compiled 491 * to know its full virtual address, so we can delay the 492 * update to pc to the unlinked path. A long chain of links 493 * can thus avoid many updates to the PC. 494 */ 495 if (tb_cflags(s->base.tb) & CF_PCREL) { 496 gen_a64_update_pc(s, diff); 497 tcg_gen_goto_tb(n); 498 } else { 499 tcg_gen_goto_tb(n); 500 gen_a64_update_pc(s, diff); 501 } 502 tcg_gen_exit_tb(s->base.tb, n); 503 s->base.is_jmp = DISAS_NORETURN; 504 } else { 505 gen_a64_update_pc(s, diff); 506 if (s->ss_active) { 507 gen_step_complete_exception(s); 508 } else { 509 tcg_gen_lookup_and_goto_ptr(); 510 s->base.is_jmp = DISAS_NORETURN; 511 } 512 } 513 } 514 515 /* 516 * Register access functions 517 * 518 * These functions are used for directly accessing a register in where 519 * changes to the final register value are likely to be made. If you 520 * need to use a register for temporary calculation (e.g. index type 521 * operations) use the read_* form. 522 * 523 * B1.2.1 Register mappings 524 * 525 * In instruction register encoding 31 can refer to ZR (zero register) or 526 * the SP (stack pointer) depending on context. In QEMU's case we map SP 527 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 528 * This is the point of the _sp forms. 529 */ 530 TCGv_i64 cpu_reg(DisasContext *s, int reg) 531 { 532 if (reg == 31) { 533 TCGv_i64 t = tcg_temp_new_i64(); 534 tcg_gen_movi_i64(t, 0); 535 return t; 536 } else { 537 return cpu_X[reg]; 538 } 539 } 540 541 /* register access for when 31 == SP */ 542 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 543 { 544 return cpu_X[reg]; 545 } 546 547 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 548 * representing the register contents. This TCGv is an auto-freed 549 * temporary so it need not be explicitly freed, and may be modified. 550 */ 551 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 552 { 553 TCGv_i64 v = tcg_temp_new_i64(); 554 if (reg != 31) { 555 if (sf) { 556 tcg_gen_mov_i64(v, cpu_X[reg]); 557 } else { 558 tcg_gen_ext32u_i64(v, cpu_X[reg]); 559 } 560 } else { 561 tcg_gen_movi_i64(v, 0); 562 } 563 return v; 564 } 565 566 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 567 { 568 TCGv_i64 v = tcg_temp_new_i64(); 569 if (sf) { 570 tcg_gen_mov_i64(v, cpu_X[reg]); 571 } else { 572 tcg_gen_ext32u_i64(v, cpu_X[reg]); 573 } 574 return v; 575 } 576 577 /* Return the offset into CPUARMState of a slice (from 578 * the least significant end) of FP register Qn (ie 579 * Dn, Sn, Hn or Bn). 580 * (Note that this is not the same mapping as for A32; see cpu.h) 581 */ 582 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 583 { 584 return vec_reg_offset(s, regno, 0, size); 585 } 586 587 /* Offset of the high half of the 128 bit vector Qn */ 588 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 589 { 590 return vec_reg_offset(s, regno, 1, MO_64); 591 } 592 593 /* Convenience accessors for reading and writing single and double 594 * FP registers. Writing clears the upper parts of the associated 595 * 128 bit vector register, as required by the architecture. 596 * Note that unlike the GP register accessors, the values returned 597 * by the read functions must be manually freed. 598 */ 599 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 600 { 601 TCGv_i64 v = tcg_temp_new_i64(); 602 603 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); 604 return v; 605 } 606 607 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 608 { 609 TCGv_i32 v = tcg_temp_new_i32(); 610 611 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); 612 return v; 613 } 614 615 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); 620 return v; 621 } 622 623 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 624 * If SVE is not enabled, then there are only 128 bits in the vector. 625 */ 626 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 627 { 628 unsigned ofs = fp_reg_offset(s, rd, MO_64); 629 unsigned vsz = vec_full_reg_size(s); 630 631 /* Nop move, with side effect of clearing the tail. */ 632 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 633 } 634 635 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 636 { 637 unsigned ofs = fp_reg_offset(s, reg, MO_64); 638 639 tcg_gen_st_i64(v, cpu_env, ofs); 640 clear_vec_high(s, false, reg); 641 } 642 643 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 644 { 645 TCGv_i64 tmp = tcg_temp_new_i64(); 646 647 tcg_gen_extu_i32_i64(tmp, v); 648 write_fp_dreg(s, reg, tmp); 649 } 650 651 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 652 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 653 GVecGen2Fn *gvec_fn, int vece) 654 { 655 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 656 is_q ? 16 : 8, vec_full_reg_size(s)); 657 } 658 659 /* Expand a 2-operand + immediate AdvSIMD vector operation using 660 * an expander function. 661 */ 662 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 663 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 664 { 665 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 666 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 667 } 668 669 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 670 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 671 GVecGen3Fn *gvec_fn, int vece) 672 { 673 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 674 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 675 } 676 677 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 678 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 679 int rx, GVecGen4Fn *gvec_fn, int vece) 680 { 681 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 682 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 683 is_q ? 16 : 8, vec_full_reg_size(s)); 684 } 685 686 /* Expand a 2-operand operation using an out-of-line helper. */ 687 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 688 int rn, int data, gen_helper_gvec_2 *fn) 689 { 690 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 691 vec_full_reg_offset(s, rn), 692 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 693 } 694 695 /* Expand a 3-operand operation using an out-of-line helper. */ 696 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 697 int rn, int rm, int data, gen_helper_gvec_3 *fn) 698 { 699 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 700 vec_full_reg_offset(s, rn), 701 vec_full_reg_offset(s, rm), 702 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 703 } 704 705 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 706 * an out-of-line helper. 707 */ 708 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 709 int rm, bool is_fp16, int data, 710 gen_helper_gvec_3_ptr *fn) 711 { 712 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 713 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 714 vec_full_reg_offset(s, rn), 715 vec_full_reg_offset(s, rm), fpst, 716 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 717 } 718 719 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 720 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 721 int rm, gen_helper_gvec_3_ptr *fn) 722 { 723 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 724 725 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); 726 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 727 vec_full_reg_offset(s, rn), 728 vec_full_reg_offset(s, rm), qc_ptr, 729 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 730 } 731 732 /* Expand a 4-operand operation using an out-of-line helper. */ 733 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 734 int rm, int ra, int data, gen_helper_gvec_4 *fn) 735 { 736 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 737 vec_full_reg_offset(s, rn), 738 vec_full_reg_offset(s, rm), 739 vec_full_reg_offset(s, ra), 740 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 741 } 742 743 /* 744 * Expand a 4-operand + fpstatus pointer + simd data value operation using 745 * an out-of-line helper. 746 */ 747 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 748 int rm, int ra, bool is_fp16, int data, 749 gen_helper_gvec_4_ptr *fn) 750 { 751 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 752 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 753 vec_full_reg_offset(s, rn), 754 vec_full_reg_offset(s, rm), 755 vec_full_reg_offset(s, ra), fpst, 756 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 757 } 758 759 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 760 * than the 32 bit equivalent. 761 */ 762 static inline void gen_set_NZ64(TCGv_i64 result) 763 { 764 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 765 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 766 } 767 768 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 769 static inline void gen_logic_CC(int sf, TCGv_i64 result) 770 { 771 if (sf) { 772 gen_set_NZ64(result); 773 } else { 774 tcg_gen_extrl_i64_i32(cpu_ZF, result); 775 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 776 } 777 tcg_gen_movi_i32(cpu_CF, 0); 778 tcg_gen_movi_i32(cpu_VF, 0); 779 } 780 781 /* dest = T0 + T1; compute C, N, V and Z flags */ 782 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 783 { 784 TCGv_i64 result, flag, tmp; 785 result = tcg_temp_new_i64(); 786 flag = tcg_temp_new_i64(); 787 tmp = tcg_temp_new_i64(); 788 789 tcg_gen_movi_i64(tmp, 0); 790 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 791 792 tcg_gen_extrl_i64_i32(cpu_CF, flag); 793 794 gen_set_NZ64(result); 795 796 tcg_gen_xor_i64(flag, result, t0); 797 tcg_gen_xor_i64(tmp, t0, t1); 798 tcg_gen_andc_i64(flag, flag, tmp); 799 tcg_gen_extrh_i64_i32(cpu_VF, flag); 800 801 tcg_gen_mov_i64(dest, result); 802 } 803 804 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 805 { 806 TCGv_i32 t0_32 = tcg_temp_new_i32(); 807 TCGv_i32 t1_32 = tcg_temp_new_i32(); 808 TCGv_i32 tmp = tcg_temp_new_i32(); 809 810 tcg_gen_movi_i32(tmp, 0); 811 tcg_gen_extrl_i64_i32(t0_32, t0); 812 tcg_gen_extrl_i64_i32(t1_32, t1); 813 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 814 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 815 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 816 tcg_gen_xor_i32(tmp, t0_32, t1_32); 817 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 818 tcg_gen_extu_i32_i64(dest, cpu_NF); 819 } 820 821 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 822 { 823 if (sf) { 824 gen_add64_CC(dest, t0, t1); 825 } else { 826 gen_add32_CC(dest, t0, t1); 827 } 828 } 829 830 /* dest = T0 - T1; compute C, N, V and Z flags */ 831 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 832 { 833 /* 64 bit arithmetic */ 834 TCGv_i64 result, flag, tmp; 835 836 result = tcg_temp_new_i64(); 837 flag = tcg_temp_new_i64(); 838 tcg_gen_sub_i64(result, t0, t1); 839 840 gen_set_NZ64(result); 841 842 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 843 tcg_gen_extrl_i64_i32(cpu_CF, flag); 844 845 tcg_gen_xor_i64(flag, result, t0); 846 tmp = tcg_temp_new_i64(); 847 tcg_gen_xor_i64(tmp, t0, t1); 848 tcg_gen_and_i64(flag, flag, tmp); 849 tcg_gen_extrh_i64_i32(cpu_VF, flag); 850 tcg_gen_mov_i64(dest, result); 851 } 852 853 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 854 { 855 /* 32 bit arithmetic */ 856 TCGv_i32 t0_32 = tcg_temp_new_i32(); 857 TCGv_i32 t1_32 = tcg_temp_new_i32(); 858 TCGv_i32 tmp; 859 860 tcg_gen_extrl_i64_i32(t0_32, t0); 861 tcg_gen_extrl_i64_i32(t1_32, t1); 862 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 863 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 864 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 865 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 866 tmp = tcg_temp_new_i32(); 867 tcg_gen_xor_i32(tmp, t0_32, t1_32); 868 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 869 tcg_gen_extu_i32_i64(dest, cpu_NF); 870 } 871 872 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 873 { 874 if (sf) { 875 gen_sub64_CC(dest, t0, t1); 876 } else { 877 gen_sub32_CC(dest, t0, t1); 878 } 879 } 880 881 /* dest = T0 + T1 + CF; do not compute flags. */ 882 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 883 { 884 TCGv_i64 flag = tcg_temp_new_i64(); 885 tcg_gen_extu_i32_i64(flag, cpu_CF); 886 tcg_gen_add_i64(dest, t0, t1); 887 tcg_gen_add_i64(dest, dest, flag); 888 889 if (!sf) { 890 tcg_gen_ext32u_i64(dest, dest); 891 } 892 } 893 894 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 895 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 896 { 897 if (sf) { 898 TCGv_i64 result = tcg_temp_new_i64(); 899 TCGv_i64 cf_64 = tcg_temp_new_i64(); 900 TCGv_i64 vf_64 = tcg_temp_new_i64(); 901 TCGv_i64 tmp = tcg_temp_new_i64(); 902 TCGv_i64 zero = tcg_constant_i64(0); 903 904 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 905 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 906 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 907 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 908 gen_set_NZ64(result); 909 910 tcg_gen_xor_i64(vf_64, result, t0); 911 tcg_gen_xor_i64(tmp, t0, t1); 912 tcg_gen_andc_i64(vf_64, vf_64, tmp); 913 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 914 915 tcg_gen_mov_i64(dest, result); 916 } else { 917 TCGv_i32 t0_32 = tcg_temp_new_i32(); 918 TCGv_i32 t1_32 = tcg_temp_new_i32(); 919 TCGv_i32 tmp = tcg_temp_new_i32(); 920 TCGv_i32 zero = tcg_constant_i32(0); 921 922 tcg_gen_extrl_i64_i32(t0_32, t0); 923 tcg_gen_extrl_i64_i32(t1_32, t1); 924 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 925 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 926 927 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 928 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 929 tcg_gen_xor_i32(tmp, t0_32, t1_32); 930 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 931 tcg_gen_extu_i32_i64(dest, cpu_NF); 932 } 933 } 934 935 /* 936 * Load/Store generators 937 */ 938 939 /* 940 * Store from GPR register to memory. 941 */ 942 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 943 TCGv_i64 tcg_addr, MemOp memop, int memidx, 944 bool iss_valid, 945 unsigned int iss_srt, 946 bool iss_sf, bool iss_ar) 947 { 948 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 949 950 if (iss_valid) { 951 uint32_t syn; 952 953 syn = syn_data_abort_with_iss(0, 954 (memop & MO_SIZE), 955 false, 956 iss_srt, 957 iss_sf, 958 iss_ar, 959 0, 0, 0, 0, 0, false); 960 disas_set_insn_syndrome(s, syn); 961 } 962 } 963 964 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 965 TCGv_i64 tcg_addr, MemOp memop, 966 bool iss_valid, 967 unsigned int iss_srt, 968 bool iss_sf, bool iss_ar) 969 { 970 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 971 iss_valid, iss_srt, iss_sf, iss_ar); 972 } 973 974 /* 975 * Load from memory to GPR register 976 */ 977 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 978 MemOp memop, bool extend, int memidx, 979 bool iss_valid, unsigned int iss_srt, 980 bool iss_sf, bool iss_ar) 981 { 982 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 983 984 if (extend && (memop & MO_SIGN)) { 985 g_assert((memop & MO_SIZE) <= MO_32); 986 tcg_gen_ext32u_i64(dest, dest); 987 } 988 989 if (iss_valid) { 990 uint32_t syn; 991 992 syn = syn_data_abort_with_iss(0, 993 (memop & MO_SIZE), 994 (memop & MO_SIGN) != 0, 995 iss_srt, 996 iss_sf, 997 iss_ar, 998 0, 0, 0, 0, 0, false); 999 disas_set_insn_syndrome(s, syn); 1000 } 1001 } 1002 1003 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1004 MemOp memop, bool extend, 1005 bool iss_valid, unsigned int iss_srt, 1006 bool iss_sf, bool iss_ar) 1007 { 1008 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1009 iss_valid, iss_srt, iss_sf, iss_ar); 1010 } 1011 1012 /* 1013 * Store from FP register to memory 1014 */ 1015 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1016 { 1017 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1018 TCGv_i64 tmplo = tcg_temp_new_i64(); 1019 1020 tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); 1021 1022 if ((mop & MO_SIZE) < MO_128) { 1023 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1024 } else { 1025 TCGv_i64 tmphi = tcg_temp_new_i64(); 1026 TCGv_i128 t16 = tcg_temp_new_i128(); 1027 1028 tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); 1029 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1030 1031 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1032 } 1033 } 1034 1035 /* 1036 * Load from memory to FP register 1037 */ 1038 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1039 { 1040 /* This always zero-extends and writes to a full 128 bit wide vector */ 1041 TCGv_i64 tmplo = tcg_temp_new_i64(); 1042 TCGv_i64 tmphi = NULL; 1043 1044 if ((mop & MO_SIZE) < MO_128) { 1045 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1046 } else { 1047 TCGv_i128 t16 = tcg_temp_new_i128(); 1048 1049 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1050 1051 tmphi = tcg_temp_new_i64(); 1052 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1053 } 1054 1055 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); 1056 1057 if (tmphi) { 1058 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); 1059 } 1060 clear_vec_high(s, tmphi != NULL, destidx); 1061 } 1062 1063 /* 1064 * Vector load/store helpers. 1065 * 1066 * The principal difference between this and a FP load is that we don't 1067 * zero extend as we are filling a partial chunk of the vector register. 1068 * These functions don't support 128 bit loads/stores, which would be 1069 * normal load/store operations. 1070 * 1071 * The _i32 versions are useful when operating on 32 bit quantities 1072 * (eg for floating point single or using Neon helper functions). 1073 */ 1074 1075 /* Get value of an element within a vector register */ 1076 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1077 int element, MemOp memop) 1078 { 1079 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1080 switch ((unsigned)memop) { 1081 case MO_8: 1082 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); 1083 break; 1084 case MO_16: 1085 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); 1086 break; 1087 case MO_32: 1088 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); 1089 break; 1090 case MO_8|MO_SIGN: 1091 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); 1092 break; 1093 case MO_16|MO_SIGN: 1094 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); 1095 break; 1096 case MO_32|MO_SIGN: 1097 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); 1098 break; 1099 case MO_64: 1100 case MO_64|MO_SIGN: 1101 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); 1102 break; 1103 default: 1104 g_assert_not_reached(); 1105 } 1106 } 1107 1108 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1109 int element, MemOp memop) 1110 { 1111 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1112 switch (memop) { 1113 case MO_8: 1114 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); 1115 break; 1116 case MO_16: 1117 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); 1118 break; 1119 case MO_8|MO_SIGN: 1120 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); 1121 break; 1122 case MO_16|MO_SIGN: 1123 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); 1124 break; 1125 case MO_32: 1126 case MO_32|MO_SIGN: 1127 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); 1128 break; 1129 default: 1130 g_assert_not_reached(); 1131 } 1132 } 1133 1134 /* Set value of an element within a vector register */ 1135 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1136 int element, MemOp memop) 1137 { 1138 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1139 switch (memop) { 1140 case MO_8: 1141 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); 1142 break; 1143 case MO_16: 1144 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); 1145 break; 1146 case MO_32: 1147 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); 1148 break; 1149 case MO_64: 1150 tcg_gen_st_i64(tcg_src, cpu_env, vect_off); 1151 break; 1152 default: 1153 g_assert_not_reached(); 1154 } 1155 } 1156 1157 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1158 int destidx, int element, MemOp memop) 1159 { 1160 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1161 switch (memop) { 1162 case MO_8: 1163 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); 1164 break; 1165 case MO_16: 1166 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); 1167 break; 1168 case MO_32: 1169 tcg_gen_st_i32(tcg_src, cpu_env, vect_off); 1170 break; 1171 default: 1172 g_assert_not_reached(); 1173 } 1174 } 1175 1176 /* Store from vector register to memory */ 1177 static void do_vec_st(DisasContext *s, int srcidx, int element, 1178 TCGv_i64 tcg_addr, MemOp mop) 1179 { 1180 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1181 1182 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1183 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1184 } 1185 1186 /* Load from memory to vector register */ 1187 static void do_vec_ld(DisasContext *s, int destidx, int element, 1188 TCGv_i64 tcg_addr, MemOp mop) 1189 { 1190 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1191 1192 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1193 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1194 } 1195 1196 /* Check that FP/Neon access is enabled. If it is, return 1197 * true. If not, emit code to generate an appropriate exception, 1198 * and return false; the caller should not emit any code for 1199 * the instruction. Note that this check must happen after all 1200 * unallocated-encoding checks (otherwise the syndrome information 1201 * for the resulting exception will be incorrect). 1202 */ 1203 static bool fp_access_check_only(DisasContext *s) 1204 { 1205 if (s->fp_excp_el) { 1206 assert(!s->fp_access_checked); 1207 s->fp_access_checked = true; 1208 1209 gen_exception_insn_el(s, 0, EXCP_UDEF, 1210 syn_fp_access_trap(1, 0xe, false, 0), 1211 s->fp_excp_el); 1212 return false; 1213 } 1214 s->fp_access_checked = true; 1215 return true; 1216 } 1217 1218 static bool fp_access_check(DisasContext *s) 1219 { 1220 if (!fp_access_check_only(s)) { 1221 return false; 1222 } 1223 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1224 gen_exception_insn(s, 0, EXCP_UDEF, 1225 syn_smetrap(SME_ET_Streaming, false)); 1226 return false; 1227 } 1228 return true; 1229 } 1230 1231 /* 1232 * Check that SVE access is enabled. If it is, return true. 1233 * If not, emit code to generate an appropriate exception and return false. 1234 * This function corresponds to CheckSVEEnabled(). 1235 */ 1236 bool sve_access_check(DisasContext *s) 1237 { 1238 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1239 assert(dc_isar_feature(aa64_sme, s)); 1240 if (!sme_sm_enabled_check(s)) { 1241 goto fail_exit; 1242 } 1243 } else if (s->sve_excp_el) { 1244 gen_exception_insn_el(s, 0, EXCP_UDEF, 1245 syn_sve_access_trap(), s->sve_excp_el); 1246 goto fail_exit; 1247 } 1248 s->sve_access_checked = true; 1249 return fp_access_check(s); 1250 1251 fail_exit: 1252 /* Assert that we only raise one exception per instruction. */ 1253 assert(!s->sve_access_checked); 1254 s->sve_access_checked = true; 1255 return false; 1256 } 1257 1258 /* 1259 * Check that SME access is enabled, raise an exception if not. 1260 * Note that this function corresponds to CheckSMEAccess and is 1261 * only used directly for cpregs. 1262 */ 1263 static bool sme_access_check(DisasContext *s) 1264 { 1265 if (s->sme_excp_el) { 1266 gen_exception_insn_el(s, 0, EXCP_UDEF, 1267 syn_smetrap(SME_ET_AccessTrap, false), 1268 s->sme_excp_el); 1269 return false; 1270 } 1271 return true; 1272 } 1273 1274 /* This function corresponds to CheckSMEEnabled. */ 1275 bool sme_enabled_check(DisasContext *s) 1276 { 1277 /* 1278 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1279 * to be zero when fp_excp_el has priority. This is because we need 1280 * sme_excp_el by itself for cpregs access checks. 1281 */ 1282 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1283 s->fp_access_checked = true; 1284 return sme_access_check(s); 1285 } 1286 return fp_access_check_only(s); 1287 } 1288 1289 /* Common subroutine for CheckSMEAnd*Enabled. */ 1290 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1291 { 1292 if (!sme_enabled_check(s)) { 1293 return false; 1294 } 1295 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1296 gen_exception_insn(s, 0, EXCP_UDEF, 1297 syn_smetrap(SME_ET_NotStreaming, false)); 1298 return false; 1299 } 1300 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1301 gen_exception_insn(s, 0, EXCP_UDEF, 1302 syn_smetrap(SME_ET_InactiveZA, false)); 1303 return false; 1304 } 1305 return true; 1306 } 1307 1308 /* 1309 * This utility function is for doing register extension with an 1310 * optional shift. You will likely want to pass a temporary for the 1311 * destination register. See DecodeRegExtend() in the ARM ARM. 1312 */ 1313 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1314 int option, unsigned int shift) 1315 { 1316 int extsize = extract32(option, 0, 2); 1317 bool is_signed = extract32(option, 2, 1); 1318 1319 if (is_signed) { 1320 switch (extsize) { 1321 case 0: 1322 tcg_gen_ext8s_i64(tcg_out, tcg_in); 1323 break; 1324 case 1: 1325 tcg_gen_ext16s_i64(tcg_out, tcg_in); 1326 break; 1327 case 2: 1328 tcg_gen_ext32s_i64(tcg_out, tcg_in); 1329 break; 1330 case 3: 1331 tcg_gen_mov_i64(tcg_out, tcg_in); 1332 break; 1333 } 1334 } else { 1335 switch (extsize) { 1336 case 0: 1337 tcg_gen_ext8u_i64(tcg_out, tcg_in); 1338 break; 1339 case 1: 1340 tcg_gen_ext16u_i64(tcg_out, tcg_in); 1341 break; 1342 case 2: 1343 tcg_gen_ext32u_i64(tcg_out, tcg_in); 1344 break; 1345 case 3: 1346 tcg_gen_mov_i64(tcg_out, tcg_in); 1347 break; 1348 } 1349 } 1350 1351 if (shift) { 1352 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1353 } 1354 } 1355 1356 static inline void gen_check_sp_alignment(DisasContext *s) 1357 { 1358 /* The AArch64 architecture mandates that (if enabled via PSTATE 1359 * or SCTLR bits) there is a check that SP is 16-aligned on every 1360 * SP-relative load or store (with an exception generated if it is not). 1361 * In line with general QEMU practice regarding misaligned accesses, 1362 * we omit these checks for the sake of guest program performance. 1363 * This function is provided as a hook so we can more easily add these 1364 * checks in future (possibly as a "favour catching guest program bugs 1365 * over speed" user selectable option). 1366 */ 1367 } 1368 1369 /* 1370 * This provides a simple table based table lookup decoder. It is 1371 * intended to be used when the relevant bits for decode are too 1372 * awkwardly placed and switch/if based logic would be confusing and 1373 * deeply nested. Since it's a linear search through the table, tables 1374 * should be kept small. 1375 * 1376 * It returns the first handler where insn & mask == pattern, or 1377 * NULL if there is no match. 1378 * The table is terminated by an empty mask (i.e. 0) 1379 */ 1380 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1381 uint32_t insn) 1382 { 1383 const AArch64DecodeTable *tptr = table; 1384 1385 while (tptr->mask) { 1386 if ((insn & tptr->mask) == tptr->pattern) { 1387 return tptr->disas_fn; 1388 } 1389 tptr++; 1390 } 1391 return NULL; 1392 } 1393 1394 /* 1395 * The instruction disassembly implemented here matches 1396 * the instruction encoding classifications in chapter C4 1397 * of the ARM Architecture Reference Manual (DDI0487B_a); 1398 * classification names and decode diagrams here should generally 1399 * match up with those in the manual. 1400 */ 1401 1402 static bool trans_B(DisasContext *s, arg_i *a) 1403 { 1404 reset_btype(s); 1405 gen_goto_tb(s, 0, a->imm); 1406 return true; 1407 } 1408 1409 static bool trans_BL(DisasContext *s, arg_i *a) 1410 { 1411 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1412 reset_btype(s); 1413 gen_goto_tb(s, 0, a->imm); 1414 return true; 1415 } 1416 1417 1418 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1419 { 1420 DisasLabel match; 1421 TCGv_i64 tcg_cmp; 1422 1423 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1424 reset_btype(s); 1425 1426 match = gen_disas_label(s); 1427 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1428 tcg_cmp, 0, match.label); 1429 gen_goto_tb(s, 0, 4); 1430 set_disas_label(s, match); 1431 gen_goto_tb(s, 1, a->imm); 1432 return true; 1433 } 1434 1435 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1436 { 1437 DisasLabel match; 1438 TCGv_i64 tcg_cmp; 1439 1440 tcg_cmp = tcg_temp_new_i64(); 1441 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1442 1443 reset_btype(s); 1444 1445 match = gen_disas_label(s); 1446 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1447 tcg_cmp, 0, match.label); 1448 gen_goto_tb(s, 0, 4); 1449 set_disas_label(s, match); 1450 gen_goto_tb(s, 1, a->imm); 1451 return true; 1452 } 1453 1454 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1455 { 1456 reset_btype(s); 1457 if (a->cond < 0x0e) { 1458 /* genuinely conditional branches */ 1459 DisasLabel match = gen_disas_label(s); 1460 arm_gen_test_cc(a->cond, match.label); 1461 gen_goto_tb(s, 0, 4); 1462 set_disas_label(s, match); 1463 gen_goto_tb(s, 1, a->imm); 1464 } else { 1465 /* 0xe and 0xf are both "always" conditions */ 1466 gen_goto_tb(s, 0, a->imm); 1467 } 1468 return true; 1469 } 1470 1471 static void set_btype_for_br(DisasContext *s, int rn) 1472 { 1473 if (dc_isar_feature(aa64_bti, s)) { 1474 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1475 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 1476 } 1477 } 1478 1479 static void set_btype_for_blr(DisasContext *s) 1480 { 1481 if (dc_isar_feature(aa64_bti, s)) { 1482 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1483 set_btype(s, 2); 1484 } 1485 } 1486 1487 static bool trans_BR(DisasContext *s, arg_r *a) 1488 { 1489 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1490 set_btype_for_br(s, a->rn); 1491 s->base.is_jmp = DISAS_JUMP; 1492 return true; 1493 } 1494 1495 static bool trans_BLR(DisasContext *s, arg_r *a) 1496 { 1497 TCGv_i64 dst = cpu_reg(s, a->rn); 1498 TCGv_i64 lr = cpu_reg(s, 30); 1499 if (dst == lr) { 1500 TCGv_i64 tmp = tcg_temp_new_i64(); 1501 tcg_gen_mov_i64(tmp, dst); 1502 dst = tmp; 1503 } 1504 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1505 gen_a64_set_pc(s, dst); 1506 set_btype_for_blr(s); 1507 s->base.is_jmp = DISAS_JUMP; 1508 return true; 1509 } 1510 1511 static bool trans_RET(DisasContext *s, arg_r *a) 1512 { 1513 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1514 s->base.is_jmp = DISAS_JUMP; 1515 return true; 1516 } 1517 1518 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1519 TCGv_i64 modifier, bool use_key_a) 1520 { 1521 TCGv_i64 truedst; 1522 /* 1523 * Return the branch target for a BRAA/RETA/etc, which is either 1524 * just the destination dst, or that value with the pauth check 1525 * done and the code removed from the high bits. 1526 */ 1527 if (!s->pauth_active) { 1528 return dst; 1529 } 1530 1531 truedst = tcg_temp_new_i64(); 1532 if (use_key_a) { 1533 gen_helper_autia_combined(truedst, cpu_env, dst, modifier); 1534 } else { 1535 gen_helper_autib_combined(truedst, cpu_env, dst, modifier); 1536 } 1537 return truedst; 1538 } 1539 1540 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1541 { 1542 TCGv_i64 dst; 1543 1544 if (!dc_isar_feature(aa64_pauth, s)) { 1545 return false; 1546 } 1547 1548 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1549 gen_a64_set_pc(s, dst); 1550 set_btype_for_br(s, a->rn); 1551 s->base.is_jmp = DISAS_JUMP; 1552 return true; 1553 } 1554 1555 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1556 { 1557 TCGv_i64 dst, lr; 1558 1559 if (!dc_isar_feature(aa64_pauth, s)) { 1560 return false; 1561 } 1562 1563 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1564 lr = cpu_reg(s, 30); 1565 if (dst == lr) { 1566 TCGv_i64 tmp = tcg_temp_new_i64(); 1567 tcg_gen_mov_i64(tmp, dst); 1568 dst = tmp; 1569 } 1570 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1571 gen_a64_set_pc(s, dst); 1572 set_btype_for_blr(s); 1573 s->base.is_jmp = DISAS_JUMP; 1574 return true; 1575 } 1576 1577 static bool trans_RETA(DisasContext *s, arg_reta *a) 1578 { 1579 TCGv_i64 dst; 1580 1581 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1582 gen_a64_set_pc(s, dst); 1583 s->base.is_jmp = DISAS_JUMP; 1584 return true; 1585 } 1586 1587 static bool trans_BRA(DisasContext *s, arg_bra *a) 1588 { 1589 TCGv_i64 dst; 1590 1591 if (!dc_isar_feature(aa64_pauth, s)) { 1592 return false; 1593 } 1594 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1595 gen_a64_set_pc(s, dst); 1596 set_btype_for_br(s, a->rn); 1597 s->base.is_jmp = DISAS_JUMP; 1598 return true; 1599 } 1600 1601 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1602 { 1603 TCGv_i64 dst, lr; 1604 1605 if (!dc_isar_feature(aa64_pauth, s)) { 1606 return false; 1607 } 1608 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1609 lr = cpu_reg(s, 30); 1610 if (dst == lr) { 1611 TCGv_i64 tmp = tcg_temp_new_i64(); 1612 tcg_gen_mov_i64(tmp, dst); 1613 dst = tmp; 1614 } 1615 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1616 gen_a64_set_pc(s, dst); 1617 set_btype_for_blr(s); 1618 s->base.is_jmp = DISAS_JUMP; 1619 return true; 1620 } 1621 1622 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1623 { 1624 TCGv_i64 dst; 1625 1626 if (s->current_el == 0) { 1627 return false; 1628 } 1629 if (s->fgt_eret) { 1630 gen_exception_insn_el(s, 0, EXCP_UDEF, 0, 2); 1631 return true; 1632 } 1633 dst = tcg_temp_new_i64(); 1634 tcg_gen_ld_i64(dst, cpu_env, 1635 offsetof(CPUARMState, elr_el[s->current_el])); 1636 1637 translator_io_start(&s->base); 1638 1639 gen_helper_exception_return(cpu_env, dst); 1640 /* Must exit loop to check un-masked IRQs */ 1641 s->base.is_jmp = DISAS_EXIT; 1642 return true; 1643 } 1644 1645 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1646 { 1647 TCGv_i64 dst; 1648 1649 if (!dc_isar_feature(aa64_pauth, s)) { 1650 return false; 1651 } 1652 if (s->current_el == 0) { 1653 return false; 1654 } 1655 /* The FGT trap takes precedence over an auth trap. */ 1656 if (s->fgt_eret) { 1657 gen_exception_insn_el(s, 0, EXCP_UDEF, a->m ? 3 : 2, 2); 1658 return true; 1659 } 1660 dst = tcg_temp_new_i64(); 1661 tcg_gen_ld_i64(dst, cpu_env, 1662 offsetof(CPUARMState, elr_el[s->current_el])); 1663 1664 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1665 1666 translator_io_start(&s->base); 1667 1668 gen_helper_exception_return(cpu_env, dst); 1669 /* Must exit loop to check un-masked IRQs */ 1670 s->base.is_jmp = DISAS_EXIT; 1671 return true; 1672 } 1673 1674 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1675 { 1676 return true; 1677 } 1678 1679 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1680 { 1681 /* 1682 * When running in MTTCG we don't generate jumps to the yield and 1683 * WFE helpers as it won't affect the scheduling of other vCPUs. 1684 * If we wanted to more completely model WFE/SEV so we don't busy 1685 * spin unnecessarily we would need to do something more involved. 1686 */ 1687 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1688 s->base.is_jmp = DISAS_YIELD; 1689 } 1690 return true; 1691 } 1692 1693 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1694 { 1695 s->base.is_jmp = DISAS_WFI; 1696 return true; 1697 } 1698 1699 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1700 { 1701 /* 1702 * When running in MTTCG we don't generate jumps to the yield and 1703 * WFE helpers as it won't affect the scheduling of other vCPUs. 1704 * If we wanted to more completely model WFE/SEV so we don't busy 1705 * spin unnecessarily we would need to do something more involved. 1706 */ 1707 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1708 s->base.is_jmp = DISAS_WFE; 1709 } 1710 return true; 1711 } 1712 1713 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1714 { 1715 if (s->pauth_active) { 1716 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); 1717 } 1718 return true; 1719 } 1720 1721 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1722 { 1723 if (s->pauth_active) { 1724 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1725 } 1726 return true; 1727 } 1728 1729 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1730 { 1731 if (s->pauth_active) { 1732 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1733 } 1734 return true; 1735 } 1736 1737 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1738 { 1739 if (s->pauth_active) { 1740 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1741 } 1742 return true; 1743 } 1744 1745 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1746 { 1747 if (s->pauth_active) { 1748 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1749 } 1750 return true; 1751 } 1752 1753 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1754 { 1755 /* Without RAS, we must implement this as NOP. */ 1756 if (dc_isar_feature(aa64_ras, s)) { 1757 /* 1758 * QEMU does not have a source of physical SErrors, 1759 * so we are only concerned with virtual SErrors. 1760 * The pseudocode in the ARM for this case is 1761 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1762 * AArch64.vESBOperation(); 1763 * Most of the condition can be evaluated at translation time. 1764 * Test for EL2 present, and defer test for SEL2 to runtime. 1765 */ 1766 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1767 gen_helper_vesb(cpu_env); 1768 } 1769 } 1770 return true; 1771 } 1772 1773 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1774 { 1775 if (s->pauth_active) { 1776 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); 1777 } 1778 return true; 1779 } 1780 1781 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1782 { 1783 if (s->pauth_active) { 1784 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1785 } 1786 return true; 1787 } 1788 1789 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1790 { 1791 if (s->pauth_active) { 1792 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); 1793 } 1794 return true; 1795 } 1796 1797 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1798 { 1799 if (s->pauth_active) { 1800 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1801 } 1802 return true; 1803 } 1804 1805 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1806 { 1807 if (s->pauth_active) { 1808 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); 1809 } 1810 return true; 1811 } 1812 1813 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1814 { 1815 if (s->pauth_active) { 1816 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1817 } 1818 return true; 1819 } 1820 1821 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1822 { 1823 if (s->pauth_active) { 1824 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], tcg_constant_i64(0)); 1825 } 1826 return true; 1827 } 1828 1829 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1830 { 1831 if (s->pauth_active) { 1832 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1833 } 1834 return true; 1835 } 1836 1837 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1838 { 1839 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1840 return true; 1841 } 1842 1843 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1844 { 1845 /* We handle DSB and DMB the same way */ 1846 TCGBar bar; 1847 1848 switch (a->types) { 1849 case 1: /* MBReqTypes_Reads */ 1850 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1851 break; 1852 case 2: /* MBReqTypes_Writes */ 1853 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1854 break; 1855 default: /* MBReqTypes_All */ 1856 bar = TCG_BAR_SC | TCG_MO_ALL; 1857 break; 1858 } 1859 tcg_gen_mb(bar); 1860 return true; 1861 } 1862 1863 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1864 { 1865 /* 1866 * We need to break the TB after this insn to execute 1867 * self-modifying code correctly and also to take 1868 * any pending interrupts immediately. 1869 */ 1870 reset_btype(s); 1871 gen_goto_tb(s, 0, 4); 1872 return true; 1873 } 1874 1875 static bool trans_SB(DisasContext *s, arg_SB *a) 1876 { 1877 if (!dc_isar_feature(aa64_sb, s)) { 1878 return false; 1879 } 1880 /* 1881 * TODO: There is no speculation barrier opcode for TCG; 1882 * MB and end the TB instead. 1883 */ 1884 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1885 gen_goto_tb(s, 0, 4); 1886 return true; 1887 } 1888 1889 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 1890 { 1891 if (!dc_isar_feature(aa64_condm_4, s)) { 1892 return false; 1893 } 1894 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1895 return true; 1896 } 1897 1898 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 1899 { 1900 TCGv_i32 z; 1901 1902 if (!dc_isar_feature(aa64_condm_5, s)) { 1903 return false; 1904 } 1905 1906 z = tcg_temp_new_i32(); 1907 1908 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1909 1910 /* 1911 * (!C & !Z) << 31 1912 * (!(C | Z)) << 31 1913 * ~((C | Z) << 31) 1914 * ~-(C | Z) 1915 * (C | Z) - 1 1916 */ 1917 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1918 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1919 1920 /* !(Z & C) */ 1921 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1922 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1923 1924 /* (!C & Z) << 31 -> -(Z & ~C) */ 1925 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1926 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1927 1928 /* C | Z */ 1929 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1930 1931 return true; 1932 } 1933 1934 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 1935 { 1936 if (!dc_isar_feature(aa64_condm_5, s)) { 1937 return false; 1938 } 1939 1940 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1941 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1942 1943 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1944 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1945 1946 tcg_gen_movi_i32(cpu_NF, 0); 1947 tcg_gen_movi_i32(cpu_VF, 0); 1948 1949 return true; 1950 } 1951 1952 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 1953 { 1954 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1955 return false; 1956 } 1957 if (a->imm & 1) { 1958 set_pstate_bits(PSTATE_UAO); 1959 } else { 1960 clear_pstate_bits(PSTATE_UAO); 1961 } 1962 gen_rebuild_hflags(s); 1963 s->base.is_jmp = DISAS_TOO_MANY; 1964 return true; 1965 } 1966 1967 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 1968 { 1969 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1970 return false; 1971 } 1972 if (a->imm & 1) { 1973 set_pstate_bits(PSTATE_PAN); 1974 } else { 1975 clear_pstate_bits(PSTATE_PAN); 1976 } 1977 gen_rebuild_hflags(s); 1978 s->base.is_jmp = DISAS_TOO_MANY; 1979 return true; 1980 } 1981 1982 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 1983 { 1984 if (s->current_el == 0) { 1985 return false; 1986 } 1987 gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(a->imm & PSTATE_SP)); 1988 s->base.is_jmp = DISAS_TOO_MANY; 1989 return true; 1990 } 1991 1992 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 1993 { 1994 if (!dc_isar_feature(aa64_ssbs, s)) { 1995 return false; 1996 } 1997 if (a->imm & 1) { 1998 set_pstate_bits(PSTATE_SSBS); 1999 } else { 2000 clear_pstate_bits(PSTATE_SSBS); 2001 } 2002 /* Don't need to rebuild hflags since SSBS is a nop */ 2003 s->base.is_jmp = DISAS_TOO_MANY; 2004 return true; 2005 } 2006 2007 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2008 { 2009 if (!dc_isar_feature(aa64_dit, s)) { 2010 return false; 2011 } 2012 if (a->imm & 1) { 2013 set_pstate_bits(PSTATE_DIT); 2014 } else { 2015 clear_pstate_bits(PSTATE_DIT); 2016 } 2017 /* There's no need to rebuild hflags because DIT is a nop */ 2018 s->base.is_jmp = DISAS_TOO_MANY; 2019 return true; 2020 } 2021 2022 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2023 { 2024 if (dc_isar_feature(aa64_mte, s)) { 2025 /* Full MTE is enabled -- set the TCO bit as directed. */ 2026 if (a->imm & 1) { 2027 set_pstate_bits(PSTATE_TCO); 2028 } else { 2029 clear_pstate_bits(PSTATE_TCO); 2030 } 2031 gen_rebuild_hflags(s); 2032 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2033 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2034 return true; 2035 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2036 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2037 return true; 2038 } else { 2039 /* Insn not present */ 2040 return false; 2041 } 2042 } 2043 2044 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2045 { 2046 gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(a->imm)); 2047 s->base.is_jmp = DISAS_TOO_MANY; 2048 return true; 2049 } 2050 2051 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2052 { 2053 gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(a->imm)); 2054 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2055 s->base.is_jmp = DISAS_UPDATE_EXIT; 2056 return true; 2057 } 2058 2059 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2060 { 2061 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2062 return false; 2063 } 2064 if (sme_access_check(s)) { 2065 int old = s->pstate_sm | (s->pstate_za << 1); 2066 int new = a->imm * 3; 2067 2068 if ((old ^ new) & a->mask) { 2069 /* At least one bit changes. */ 2070 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), 2071 tcg_constant_i32(a->mask)); 2072 s->base.is_jmp = DISAS_TOO_MANY; 2073 } 2074 } 2075 return true; 2076 } 2077 2078 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2079 { 2080 TCGv_i32 tmp = tcg_temp_new_i32(); 2081 TCGv_i32 nzcv = tcg_temp_new_i32(); 2082 2083 /* build bit 31, N */ 2084 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2085 /* build bit 30, Z */ 2086 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2087 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2088 /* build bit 29, C */ 2089 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2090 /* build bit 28, V */ 2091 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2092 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2093 /* generate result */ 2094 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2095 } 2096 2097 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2098 { 2099 TCGv_i32 nzcv = tcg_temp_new_i32(); 2100 2101 /* take NZCV from R[t] */ 2102 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2103 2104 /* bit 31, N */ 2105 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2106 /* bit 30, Z */ 2107 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2108 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2109 /* bit 29, C */ 2110 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2111 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2112 /* bit 28, V */ 2113 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2114 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2115 } 2116 2117 static void gen_sysreg_undef(DisasContext *s, bool isread, 2118 uint8_t op0, uint8_t op1, uint8_t op2, 2119 uint8_t crn, uint8_t crm, uint8_t rt) 2120 { 2121 /* 2122 * Generate code to emit an UNDEF with correct syndrome 2123 * information for a failed system register access. 2124 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2125 * but if FEAT_IDST is implemented then read accesses to registers 2126 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2127 * syndrome. 2128 */ 2129 uint32_t syndrome; 2130 2131 if (isread && dc_isar_feature(aa64_ids, s) && 2132 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2133 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2134 } else { 2135 syndrome = syn_uncategorized(); 2136 } 2137 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2138 } 2139 2140 /* MRS - move from system register 2141 * MSR (register) - move to system register 2142 * SYS 2143 * SYSL 2144 * These are all essentially the same insn in 'read' and 'write' 2145 * versions, with varying op0 fields. 2146 */ 2147 static void handle_sys(DisasContext *s, bool isread, 2148 unsigned int op0, unsigned int op1, unsigned int op2, 2149 unsigned int crn, unsigned int crm, unsigned int rt) 2150 { 2151 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2152 crn, crm, op0, op1, op2); 2153 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2154 bool need_exit_tb = false; 2155 TCGv_ptr tcg_ri = NULL; 2156 TCGv_i64 tcg_rt; 2157 uint32_t syndrome; 2158 2159 if (crn == 11 || crn == 15) { 2160 /* 2161 * Check for TIDCP trap, which must take precedence over 2162 * the UNDEF for "no such register" etc. 2163 */ 2164 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2165 switch (s->current_el) { 2166 case 0: 2167 if (dc_isar_feature(aa64_tidcp1, s)) { 2168 gen_helper_tidcp_el0(cpu_env, tcg_constant_i32(syndrome)); 2169 } 2170 break; 2171 case 1: 2172 gen_helper_tidcp_el1(cpu_env, tcg_constant_i32(syndrome)); 2173 break; 2174 } 2175 } 2176 2177 if (!ri) { 2178 /* Unknown register; this might be a guest error or a QEMU 2179 * unimplemented feature. 2180 */ 2181 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2182 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2183 isread ? "read" : "write", op0, op1, crn, crm, op2); 2184 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2185 return; 2186 } 2187 2188 /* Check access permissions */ 2189 if (!cp_access_ok(s->current_el, ri, isread)) { 2190 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2191 return; 2192 } 2193 2194 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2195 /* Emit code to perform further access permissions checks at 2196 * runtime; this may result in an exception. 2197 */ 2198 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2199 gen_a64_update_pc(s, 0); 2200 tcg_ri = tcg_temp_new_ptr(); 2201 gen_helper_access_check_cp_reg(tcg_ri, cpu_env, 2202 tcg_constant_i32(key), 2203 tcg_constant_i32(syndrome), 2204 tcg_constant_i32(isread)); 2205 } else if (ri->type & ARM_CP_RAISES_EXC) { 2206 /* 2207 * The readfn or writefn might raise an exception; 2208 * synchronize the CPU state in case it does. 2209 */ 2210 gen_a64_update_pc(s, 0); 2211 } 2212 2213 /* Handle special cases first */ 2214 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2215 case 0: 2216 break; 2217 case ARM_CP_NOP: 2218 return; 2219 case ARM_CP_NZCV: 2220 tcg_rt = cpu_reg(s, rt); 2221 if (isread) { 2222 gen_get_nzcv(tcg_rt); 2223 } else { 2224 gen_set_nzcv(tcg_rt); 2225 } 2226 return; 2227 case ARM_CP_CURRENTEL: 2228 /* Reads as current EL value from pstate, which is 2229 * guaranteed to be constant by the tb flags. 2230 */ 2231 tcg_rt = cpu_reg(s, rt); 2232 tcg_gen_movi_i64(tcg_rt, s->current_el << 2); 2233 return; 2234 case ARM_CP_DC_ZVA: 2235 /* Writes clear the aligned block of memory which rt points into. */ 2236 if (s->mte_active[0]) { 2237 int desc = 0; 2238 2239 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2240 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2241 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2242 2243 tcg_rt = tcg_temp_new_i64(); 2244 gen_helper_mte_check_zva(tcg_rt, cpu_env, 2245 tcg_constant_i32(desc), cpu_reg(s, rt)); 2246 } else { 2247 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2248 } 2249 gen_helper_dc_zva(cpu_env, tcg_rt); 2250 return; 2251 case ARM_CP_DC_GVA: 2252 { 2253 TCGv_i64 clean_addr, tag; 2254 2255 /* 2256 * DC_GVA, like DC_ZVA, requires that we supply the original 2257 * pointer for an invalid page. Probe that address first. 2258 */ 2259 tcg_rt = cpu_reg(s, rt); 2260 clean_addr = clean_data_tbi(s, tcg_rt); 2261 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2262 2263 if (s->ata) { 2264 /* Extract the tag from the register to match STZGM. */ 2265 tag = tcg_temp_new_i64(); 2266 tcg_gen_shri_i64(tag, tcg_rt, 56); 2267 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2268 } 2269 } 2270 return; 2271 case ARM_CP_DC_GZVA: 2272 { 2273 TCGv_i64 clean_addr, tag; 2274 2275 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2276 tcg_rt = cpu_reg(s, rt); 2277 clean_addr = clean_data_tbi(s, tcg_rt); 2278 gen_helper_dc_zva(cpu_env, clean_addr); 2279 2280 if (s->ata) { 2281 /* Extract the tag from the register to match STZGM. */ 2282 tag = tcg_temp_new_i64(); 2283 tcg_gen_shri_i64(tag, tcg_rt, 56); 2284 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2285 } 2286 } 2287 return; 2288 default: 2289 g_assert_not_reached(); 2290 } 2291 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2292 return; 2293 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2294 return; 2295 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2296 return; 2297 } 2298 2299 if (ri->type & ARM_CP_IO) { 2300 /* I/O operations must end the TB here (whether read or write) */ 2301 need_exit_tb = translator_io_start(&s->base); 2302 } 2303 2304 tcg_rt = cpu_reg(s, rt); 2305 2306 if (isread) { 2307 if (ri->type & ARM_CP_CONST) { 2308 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2309 } else if (ri->readfn) { 2310 if (!tcg_ri) { 2311 tcg_ri = gen_lookup_cp_reg(key); 2312 } 2313 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri); 2314 } else { 2315 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); 2316 } 2317 } else { 2318 if (ri->type & ARM_CP_CONST) { 2319 /* If not forbidden by access permissions, treat as WI */ 2320 return; 2321 } else if (ri->writefn) { 2322 if (!tcg_ri) { 2323 tcg_ri = gen_lookup_cp_reg(key); 2324 } 2325 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt); 2326 } else { 2327 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); 2328 } 2329 } 2330 2331 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2332 /* 2333 * A write to any coprocessor register that ends a TB 2334 * must rebuild the hflags for the next TB. 2335 */ 2336 gen_rebuild_hflags(s); 2337 /* 2338 * We default to ending the TB on a coprocessor register write, 2339 * but allow this to be suppressed by the register definition 2340 * (usually only necessary to work around guest bugs). 2341 */ 2342 need_exit_tb = true; 2343 } 2344 if (need_exit_tb) { 2345 s->base.is_jmp = DISAS_UPDATE_EXIT; 2346 } 2347 } 2348 2349 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2350 { 2351 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2352 return true; 2353 } 2354 2355 static bool trans_SVC(DisasContext *s, arg_i *a) 2356 { 2357 /* 2358 * For SVC, HVC and SMC we advance the single-step state 2359 * machine before taking the exception. This is architecturally 2360 * mandated, to ensure that single-stepping a system call 2361 * instruction works properly. 2362 */ 2363 uint32_t syndrome = syn_aa64_svc(a->imm); 2364 if (s->fgt_svc) { 2365 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2366 return true; 2367 } 2368 gen_ss_advance(s); 2369 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2370 return true; 2371 } 2372 2373 static bool trans_HVC(DisasContext *s, arg_i *a) 2374 { 2375 if (s->current_el == 0) { 2376 unallocated_encoding(s); 2377 return true; 2378 } 2379 /* 2380 * The pre HVC helper handles cases when HVC gets trapped 2381 * as an undefined insn by runtime configuration. 2382 */ 2383 gen_a64_update_pc(s, 0); 2384 gen_helper_pre_hvc(cpu_env); 2385 /* Architecture requires ss advance before we do the actual work */ 2386 gen_ss_advance(s); 2387 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), 2); 2388 return true; 2389 } 2390 2391 static bool trans_SMC(DisasContext *s, arg_i *a) 2392 { 2393 if (s->current_el == 0) { 2394 unallocated_encoding(s); 2395 return true; 2396 } 2397 gen_a64_update_pc(s, 0); 2398 gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2399 /* Architecture requires ss advance before we do the actual work */ 2400 gen_ss_advance(s); 2401 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2402 return true; 2403 } 2404 2405 static bool trans_BRK(DisasContext *s, arg_i *a) 2406 { 2407 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2408 return true; 2409 } 2410 2411 static bool trans_HLT(DisasContext *s, arg_i *a) 2412 { 2413 /* 2414 * HLT. This has two purposes. 2415 * Architecturally, it is an external halting debug instruction. 2416 * Since QEMU doesn't implement external debug, we treat this as 2417 * it is required for halting debug disabled: it will UNDEF. 2418 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2419 */ 2420 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2421 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2422 } else { 2423 unallocated_encoding(s); 2424 } 2425 return true; 2426 } 2427 2428 /* 2429 * Load/Store exclusive instructions are implemented by remembering 2430 * the value/address loaded, and seeing if these are the same 2431 * when the store is performed. This is not actually the architecturally 2432 * mandated semantics, but it works for typical guest code sequences 2433 * and avoids having to monitor regular stores. 2434 * 2435 * The store exclusive uses the atomic cmpxchg primitives to avoid 2436 * races in multi-threaded linux-user and when MTTCG softmmu is 2437 * enabled. 2438 */ 2439 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2440 int size, bool is_pair) 2441 { 2442 int idx = get_mem_index(s); 2443 TCGv_i64 dirty_addr, clean_addr; 2444 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2445 2446 s->is_ldex = true; 2447 dirty_addr = cpu_reg_sp(s, rn); 2448 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2449 2450 g_assert(size <= 3); 2451 if (is_pair) { 2452 g_assert(size >= 2); 2453 if (size == 2) { 2454 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2455 if (s->be_data == MO_LE) { 2456 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2457 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2458 } else { 2459 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2460 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2461 } 2462 } else { 2463 TCGv_i128 t16 = tcg_temp_new_i128(); 2464 2465 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2466 2467 if (s->be_data == MO_LE) { 2468 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2469 cpu_exclusive_high, t16); 2470 } else { 2471 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2472 cpu_exclusive_val, t16); 2473 } 2474 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2475 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2476 } 2477 } else { 2478 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2479 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2480 } 2481 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2482 } 2483 2484 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2485 int rn, int size, int is_pair) 2486 { 2487 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2488 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2489 * [addr] = {Rt}; 2490 * if (is_pair) { 2491 * [addr + datasize] = {Rt2}; 2492 * } 2493 * {Rd} = 0; 2494 * } else { 2495 * {Rd} = 1; 2496 * } 2497 * env->exclusive_addr = -1; 2498 */ 2499 TCGLabel *fail_label = gen_new_label(); 2500 TCGLabel *done_label = gen_new_label(); 2501 TCGv_i64 tmp, clean_addr; 2502 MemOp memop; 2503 2504 /* 2505 * FIXME: We are out of spec here. We have recorded only the address 2506 * from load_exclusive, not the entire range, and we assume that the 2507 * size of the access on both sides match. The architecture allows the 2508 * store to be smaller than the load, so long as the stored bytes are 2509 * within the range recorded by the load. 2510 */ 2511 2512 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2513 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2514 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2515 2516 /* 2517 * The write, and any associated faults, only happen if the virtual 2518 * and physical addresses pass the exclusive monitor check. These 2519 * faults are exceedingly unlikely, because normally the guest uses 2520 * the exact same address register for the load_exclusive, and we 2521 * would have recognized these faults there. 2522 * 2523 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2524 * unaligned 4-byte write within the range of an aligned 8-byte load. 2525 * With LSE2, the store would need to cross a 16-byte boundary when the 2526 * load did not, which would mean the store is outside the range 2527 * recorded for the monitor, which would have failed a corrected monitor 2528 * check above. For now, we assume no size change and retain the 2529 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2530 * 2531 * It is possible to trigger an MTE fault, by performing the load with 2532 * a virtual address with a valid tag and performing the store with the 2533 * same virtual address and a different invalid tag. 2534 */ 2535 memop = size + is_pair; 2536 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2537 memop |= MO_ALIGN; 2538 } 2539 memop = finalize_memop(s, memop); 2540 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2541 2542 tmp = tcg_temp_new_i64(); 2543 if (is_pair) { 2544 if (size == 2) { 2545 if (s->be_data == MO_LE) { 2546 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2547 } else { 2548 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2549 } 2550 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2551 cpu_exclusive_val, tmp, 2552 get_mem_index(s), memop); 2553 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2554 } else { 2555 TCGv_i128 t16 = tcg_temp_new_i128(); 2556 TCGv_i128 c16 = tcg_temp_new_i128(); 2557 TCGv_i64 a, b; 2558 2559 if (s->be_data == MO_LE) { 2560 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2561 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2562 cpu_exclusive_high); 2563 } else { 2564 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2565 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2566 cpu_exclusive_val); 2567 } 2568 2569 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2570 get_mem_index(s), memop); 2571 2572 a = tcg_temp_new_i64(); 2573 b = tcg_temp_new_i64(); 2574 if (s->be_data == MO_LE) { 2575 tcg_gen_extr_i128_i64(a, b, t16); 2576 } else { 2577 tcg_gen_extr_i128_i64(b, a, t16); 2578 } 2579 2580 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2581 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2582 tcg_gen_or_i64(tmp, a, b); 2583 2584 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2585 } 2586 } else { 2587 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2588 cpu_reg(s, rt), get_mem_index(s), memop); 2589 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2590 } 2591 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2592 tcg_gen_br(done_label); 2593 2594 gen_set_label(fail_label); 2595 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2596 gen_set_label(done_label); 2597 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2598 } 2599 2600 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2601 int rn, int size) 2602 { 2603 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2604 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2605 int memidx = get_mem_index(s); 2606 TCGv_i64 clean_addr; 2607 MemOp memop; 2608 2609 if (rn == 31) { 2610 gen_check_sp_alignment(s); 2611 } 2612 memop = check_atomic_align(s, rn, size); 2613 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2614 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2615 memidx, memop); 2616 } 2617 2618 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2619 int rn, int size) 2620 { 2621 TCGv_i64 s1 = cpu_reg(s, rs); 2622 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2623 TCGv_i64 t1 = cpu_reg(s, rt); 2624 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2625 TCGv_i64 clean_addr; 2626 int memidx = get_mem_index(s); 2627 MemOp memop; 2628 2629 if (rn == 31) { 2630 gen_check_sp_alignment(s); 2631 } 2632 2633 /* This is a single atomic access, despite the "pair". */ 2634 memop = check_atomic_align(s, rn, size + 1); 2635 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2636 2637 if (size == 2) { 2638 TCGv_i64 cmp = tcg_temp_new_i64(); 2639 TCGv_i64 val = tcg_temp_new_i64(); 2640 2641 if (s->be_data == MO_LE) { 2642 tcg_gen_concat32_i64(val, t1, t2); 2643 tcg_gen_concat32_i64(cmp, s1, s2); 2644 } else { 2645 tcg_gen_concat32_i64(val, t2, t1); 2646 tcg_gen_concat32_i64(cmp, s2, s1); 2647 } 2648 2649 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2650 2651 if (s->be_data == MO_LE) { 2652 tcg_gen_extr32_i64(s1, s2, cmp); 2653 } else { 2654 tcg_gen_extr32_i64(s2, s1, cmp); 2655 } 2656 } else { 2657 TCGv_i128 cmp = tcg_temp_new_i128(); 2658 TCGv_i128 val = tcg_temp_new_i128(); 2659 2660 if (s->be_data == MO_LE) { 2661 tcg_gen_concat_i64_i128(val, t1, t2); 2662 tcg_gen_concat_i64_i128(cmp, s1, s2); 2663 } else { 2664 tcg_gen_concat_i64_i128(val, t2, t1); 2665 tcg_gen_concat_i64_i128(cmp, s2, s1); 2666 } 2667 2668 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2669 2670 if (s->be_data == MO_LE) { 2671 tcg_gen_extr_i128_i64(s1, s2, cmp); 2672 } else { 2673 tcg_gen_extr_i128_i64(s2, s1, cmp); 2674 } 2675 } 2676 } 2677 2678 /* 2679 * Compute the ISS.SF bit for syndrome information if an exception 2680 * is taken on a load or store. This indicates whether the instruction 2681 * is accessing a 32-bit or 64-bit register. This logic is derived 2682 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2683 */ 2684 static bool ldst_iss_sf(int size, bool sign, bool ext) 2685 { 2686 2687 if (sign) { 2688 /* 2689 * Signed loads are 64 bit results if we are not going to 2690 * do a zero-extend from 32 to 64 after the load. 2691 * (For a store, sign and ext are always false.) 2692 */ 2693 return !ext; 2694 } else { 2695 /* Unsigned loads/stores work at the specified size */ 2696 return size == MO_64; 2697 } 2698 } 2699 2700 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2701 { 2702 if (a->rn == 31) { 2703 gen_check_sp_alignment(s); 2704 } 2705 if (a->lasr) { 2706 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2707 } 2708 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2709 return true; 2710 } 2711 2712 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2713 { 2714 if (a->rn == 31) { 2715 gen_check_sp_alignment(s); 2716 } 2717 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2718 if (a->lasr) { 2719 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2720 } 2721 return true; 2722 } 2723 2724 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2725 { 2726 TCGv_i64 clean_addr; 2727 MemOp memop; 2728 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2729 2730 /* 2731 * StoreLORelease is the same as Store-Release for QEMU, but 2732 * needs the feature-test. 2733 */ 2734 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2735 return false; 2736 } 2737 /* Generate ISS for non-exclusive accesses including LASR. */ 2738 if (a->rn == 31) { 2739 gen_check_sp_alignment(s); 2740 } 2741 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2742 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 2743 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2744 true, a->rn != 31, memop); 2745 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 2746 iss_sf, a->lasr); 2747 return true; 2748 } 2749 2750 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 2751 { 2752 TCGv_i64 clean_addr; 2753 MemOp memop; 2754 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2755 2756 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2757 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2758 return false; 2759 } 2760 /* Generate ISS for non-exclusive accesses including LASR. */ 2761 if (a->rn == 31) { 2762 gen_check_sp_alignment(s); 2763 } 2764 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 2765 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2766 false, a->rn != 31, memop); 2767 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 2768 a->rt, iss_sf, a->lasr); 2769 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2770 return true; 2771 } 2772 2773 static bool trans_STXP(DisasContext *s, arg_stxr *a) 2774 { 2775 if (a->rn == 31) { 2776 gen_check_sp_alignment(s); 2777 } 2778 if (a->lasr) { 2779 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2780 } 2781 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 2782 return true; 2783 } 2784 2785 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 2786 { 2787 if (a->rn == 31) { 2788 gen_check_sp_alignment(s); 2789 } 2790 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 2791 if (a->lasr) { 2792 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2793 } 2794 return true; 2795 } 2796 2797 static bool trans_CASP(DisasContext *s, arg_CASP *a) 2798 { 2799 if (!dc_isar_feature(aa64_atomics, s)) { 2800 return false; 2801 } 2802 if (((a->rt | a->rs) & 1) != 0) { 2803 return false; 2804 } 2805 2806 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 2807 return true; 2808 } 2809 2810 static bool trans_CAS(DisasContext *s, arg_CAS *a) 2811 { 2812 if (!dc_isar_feature(aa64_atomics, s)) { 2813 return false; 2814 } 2815 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 2816 return true; 2817 } 2818 2819 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 2820 { 2821 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 2822 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 2823 TCGv_i64 clean_addr = tcg_temp_new_i64(); 2824 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 2825 2826 gen_pc_plus_diff(s, clean_addr, a->imm); 2827 do_gpr_ld(s, tcg_rt, clean_addr, memop, 2828 false, true, a->rt, iss_sf, false); 2829 return true; 2830 } 2831 2832 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 2833 { 2834 /* Load register (literal), vector version */ 2835 TCGv_i64 clean_addr; 2836 MemOp memop; 2837 2838 if (!fp_access_check(s)) { 2839 return true; 2840 } 2841 memop = finalize_memop_asimd(s, a->sz); 2842 clean_addr = tcg_temp_new_i64(); 2843 gen_pc_plus_diff(s, clean_addr, a->imm); 2844 do_fp_ld(s, a->rt, clean_addr, memop); 2845 return true; 2846 } 2847 2848 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 2849 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 2850 uint64_t offset, bool is_store, MemOp mop) 2851 { 2852 if (a->rn == 31) { 2853 gen_check_sp_alignment(s); 2854 } 2855 2856 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 2857 if (!a->p) { 2858 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 2859 } 2860 2861 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 2862 (a->w || a->rn != 31), 2 << a->sz, mop); 2863 } 2864 2865 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 2866 TCGv_i64 dirty_addr, uint64_t offset) 2867 { 2868 if (a->w) { 2869 if (a->p) { 2870 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 2871 } 2872 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 2873 } 2874 } 2875 2876 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 2877 { 2878 uint64_t offset = a->imm << a->sz; 2879 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2880 MemOp mop = finalize_memop(s, a->sz); 2881 2882 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 2883 tcg_rt = cpu_reg(s, a->rt); 2884 tcg_rt2 = cpu_reg(s, a->rt2); 2885 /* 2886 * We built mop above for the single logical access -- rebuild it 2887 * now for the paired operation. 2888 * 2889 * With LSE2, non-sign-extending pairs are treated atomically if 2890 * aligned, and if unaligned one of the pair will be completely 2891 * within a 16-byte block and that element will be atomic. 2892 * Otherwise each element is separately atomic. 2893 * In all cases, issue one operation with the correct atomicity. 2894 */ 2895 mop = a->sz + 1; 2896 if (s->align_mem) { 2897 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 2898 } 2899 mop = finalize_memop_pair(s, mop); 2900 if (a->sz == 2) { 2901 TCGv_i64 tmp = tcg_temp_new_i64(); 2902 2903 if (s->be_data == MO_LE) { 2904 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 2905 } else { 2906 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 2907 } 2908 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 2909 } else { 2910 TCGv_i128 tmp = tcg_temp_new_i128(); 2911 2912 if (s->be_data == MO_LE) { 2913 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 2914 } else { 2915 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 2916 } 2917 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 2918 } 2919 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2920 return true; 2921 } 2922 2923 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 2924 { 2925 uint64_t offset = a->imm << a->sz; 2926 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 2927 MemOp mop = finalize_memop(s, a->sz); 2928 2929 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 2930 tcg_rt = cpu_reg(s, a->rt); 2931 tcg_rt2 = cpu_reg(s, a->rt2); 2932 2933 /* 2934 * We built mop above for the single logical access -- rebuild it 2935 * now for the paired operation. 2936 * 2937 * With LSE2, non-sign-extending pairs are treated atomically if 2938 * aligned, and if unaligned one of the pair will be completely 2939 * within a 16-byte block and that element will be atomic. 2940 * Otherwise each element is separately atomic. 2941 * In all cases, issue one operation with the correct atomicity. 2942 * 2943 * This treats sign-extending loads like zero-extending loads, 2944 * since that reuses the most code below. 2945 */ 2946 mop = a->sz + 1; 2947 if (s->align_mem) { 2948 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 2949 } 2950 mop = finalize_memop_pair(s, mop); 2951 if (a->sz == 2) { 2952 int o2 = s->be_data == MO_LE ? 32 : 0; 2953 int o1 = o2 ^ 32; 2954 2955 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 2956 if (a->sign) { 2957 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 2958 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 2959 } else { 2960 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 2961 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 2962 } 2963 } else { 2964 TCGv_i128 tmp = tcg_temp_new_i128(); 2965 2966 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 2967 if (s->be_data == MO_LE) { 2968 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 2969 } else { 2970 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 2971 } 2972 } 2973 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2974 return true; 2975 } 2976 2977 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 2978 { 2979 uint64_t offset = a->imm << a->sz; 2980 TCGv_i64 clean_addr, dirty_addr; 2981 MemOp mop; 2982 2983 if (!fp_access_check(s)) { 2984 return true; 2985 } 2986 2987 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 2988 mop = finalize_memop_asimd(s, a->sz); 2989 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 2990 do_fp_st(s, a->rt, clean_addr, mop); 2991 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 2992 do_fp_st(s, a->rt2, clean_addr, mop); 2993 op_addr_ldstpair_post(s, a, dirty_addr, offset); 2994 return true; 2995 } 2996 2997 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 2998 { 2999 uint64_t offset = a->imm << a->sz; 3000 TCGv_i64 clean_addr, dirty_addr; 3001 MemOp mop; 3002 3003 if (!fp_access_check(s)) { 3004 return true; 3005 } 3006 3007 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3008 mop = finalize_memop_asimd(s, a->sz); 3009 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3010 do_fp_ld(s, a->rt, clean_addr, mop); 3011 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3012 do_fp_ld(s, a->rt2, clean_addr, mop); 3013 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3014 return true; 3015 } 3016 3017 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3018 { 3019 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3020 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3021 MemOp mop; 3022 TCGv_i128 tmp; 3023 3024 /* STGP only comes in one size. */ 3025 tcg_debug_assert(a->sz == MO_64); 3026 3027 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3028 return false; 3029 } 3030 3031 if (a->rn == 31) { 3032 gen_check_sp_alignment(s); 3033 } 3034 3035 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3036 if (!a->p) { 3037 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3038 } 3039 3040 clean_addr = clean_data_tbi(s, dirty_addr); 3041 tcg_rt = cpu_reg(s, a->rt); 3042 tcg_rt2 = cpu_reg(s, a->rt2); 3043 3044 /* 3045 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3046 * and one tag operation. We implement it as one single aligned 16-byte 3047 * memory operation for convenience. Note that the alignment ensures 3048 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3049 */ 3050 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3051 3052 tmp = tcg_temp_new_i128(); 3053 if (s->be_data == MO_LE) { 3054 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3055 } else { 3056 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3057 } 3058 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3059 3060 /* Perform the tag store, if tag access enabled. */ 3061 if (s->ata) { 3062 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3063 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); 3064 } else { 3065 gen_helper_stg(cpu_env, dirty_addr, dirty_addr); 3066 } 3067 } 3068 3069 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3070 return true; 3071 } 3072 3073 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3074 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3075 uint64_t offset, bool is_store, MemOp mop) 3076 { 3077 int memidx; 3078 3079 if (a->rn == 31) { 3080 gen_check_sp_alignment(s); 3081 } 3082 3083 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3084 if (!a->p) { 3085 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3086 } 3087 memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3088 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3089 a->w || a->rn != 31, 3090 mop, a->unpriv, memidx); 3091 } 3092 3093 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3094 TCGv_i64 dirty_addr, uint64_t offset) 3095 { 3096 if (a->w) { 3097 if (a->p) { 3098 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3099 } 3100 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3101 } 3102 } 3103 3104 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3105 { 3106 bool iss_sf, iss_valid = !a->w; 3107 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3108 int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3109 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3110 3111 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3112 3113 tcg_rt = cpu_reg(s, a->rt); 3114 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3115 3116 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3117 iss_valid, a->rt, iss_sf, false); 3118 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3119 return true; 3120 } 3121 3122 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3123 { 3124 bool iss_sf, iss_valid = !a->w; 3125 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3126 int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3127 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3128 3129 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3130 3131 tcg_rt = cpu_reg(s, a->rt); 3132 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3133 3134 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3135 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3136 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3137 return true; 3138 } 3139 3140 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3141 { 3142 TCGv_i64 clean_addr, dirty_addr; 3143 MemOp mop; 3144 3145 if (!fp_access_check(s)) { 3146 return true; 3147 } 3148 mop = finalize_memop_asimd(s, a->sz); 3149 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3150 do_fp_st(s, a->rt, clean_addr, mop); 3151 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3152 return true; 3153 } 3154 3155 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3156 { 3157 TCGv_i64 clean_addr, dirty_addr; 3158 MemOp mop; 3159 3160 if (!fp_access_check(s)) { 3161 return true; 3162 } 3163 mop = finalize_memop_asimd(s, a->sz); 3164 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3165 do_fp_ld(s, a->rt, clean_addr, mop); 3166 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3167 return true; 3168 } 3169 3170 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3171 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3172 bool is_store, MemOp memop) 3173 { 3174 TCGv_i64 tcg_rm; 3175 3176 if (a->rn == 31) { 3177 gen_check_sp_alignment(s); 3178 } 3179 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3180 3181 tcg_rm = read_cpu_reg(s, a->rm, 1); 3182 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3183 3184 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3185 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3186 } 3187 3188 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3189 { 3190 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3191 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3192 MemOp memop; 3193 3194 if (extract32(a->opt, 1, 1) == 0) { 3195 return false; 3196 } 3197 3198 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3199 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3200 tcg_rt = cpu_reg(s, a->rt); 3201 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3202 a->ext, true, a->rt, iss_sf, false); 3203 return true; 3204 } 3205 3206 static bool trans_STR(DisasContext *s, arg_ldst *a) 3207 { 3208 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3209 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3210 MemOp memop; 3211 3212 if (extract32(a->opt, 1, 1) == 0) { 3213 return false; 3214 } 3215 3216 memop = finalize_memop(s, a->sz); 3217 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3218 tcg_rt = cpu_reg(s, a->rt); 3219 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3220 return true; 3221 } 3222 3223 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3224 { 3225 TCGv_i64 clean_addr, dirty_addr; 3226 MemOp memop; 3227 3228 if (extract32(a->opt, 1, 1) == 0) { 3229 return false; 3230 } 3231 3232 if (!fp_access_check(s)) { 3233 return true; 3234 } 3235 3236 memop = finalize_memop_asimd(s, a->sz); 3237 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3238 do_fp_ld(s, a->rt, clean_addr, memop); 3239 return true; 3240 } 3241 3242 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3243 { 3244 TCGv_i64 clean_addr, dirty_addr; 3245 MemOp memop; 3246 3247 if (extract32(a->opt, 1, 1) == 0) { 3248 return false; 3249 } 3250 3251 if (!fp_access_check(s)) { 3252 return true; 3253 } 3254 3255 memop = finalize_memop_asimd(s, a->sz); 3256 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3257 do_fp_st(s, a->rt, clean_addr, memop); 3258 return true; 3259 } 3260 3261 3262 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3263 int sign, bool invert) 3264 { 3265 MemOp mop = a->sz | sign; 3266 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3267 3268 if (a->rn == 31) { 3269 gen_check_sp_alignment(s); 3270 } 3271 mop = check_atomic_align(s, a->rn, mop); 3272 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3273 a->rn != 31, mop); 3274 tcg_rs = read_cpu_reg(s, a->rs, true); 3275 tcg_rt = cpu_reg(s, a->rt); 3276 if (invert) { 3277 tcg_gen_not_i64(tcg_rs, tcg_rs); 3278 } 3279 /* 3280 * The tcg atomic primitives are all full barriers. Therefore we 3281 * can ignore the Acquire and Release bits of this instruction. 3282 */ 3283 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3284 3285 if (mop & MO_SIGN) { 3286 switch (a->sz) { 3287 case MO_8: 3288 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3289 break; 3290 case MO_16: 3291 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3292 break; 3293 case MO_32: 3294 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3295 break; 3296 case MO_64: 3297 break; 3298 default: 3299 g_assert_not_reached(); 3300 } 3301 } 3302 return true; 3303 } 3304 3305 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3306 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3307 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3308 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) 3309 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3310 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3311 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3312 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3313 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3314 3315 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3316 { 3317 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3318 TCGv_i64 clean_addr; 3319 MemOp mop; 3320 3321 if (!dc_isar_feature(aa64_atomics, s) || 3322 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3323 return false; 3324 } 3325 if (a->rn == 31) { 3326 gen_check_sp_alignment(s); 3327 } 3328 mop = check_atomic_align(s, a->rn, a->sz); 3329 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3330 a->rn != 31, mop); 3331 /* 3332 * LDAPR* are a special case because they are a simple load, not a 3333 * fetch-and-do-something op. 3334 * The architectural consistency requirements here are weaker than 3335 * full load-acquire (we only need "load-acquire processor consistent"), 3336 * but we choose to implement them as full LDAQ. 3337 */ 3338 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3339 true, a->rt, iss_sf, true); 3340 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3341 return true; 3342 } 3343 3344 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3345 { 3346 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3347 MemOp memop; 3348 3349 /* Load with pointer authentication */ 3350 if (!dc_isar_feature(aa64_pauth, s)) { 3351 return false; 3352 } 3353 3354 if (a->rn == 31) { 3355 gen_check_sp_alignment(s); 3356 } 3357 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3358 3359 if (s->pauth_active) { 3360 if (!a->m) { 3361 gen_helper_autda_combined(dirty_addr, cpu_env, dirty_addr, 3362 tcg_constant_i64(0)); 3363 } else { 3364 gen_helper_autdb_combined(dirty_addr, cpu_env, dirty_addr, 3365 tcg_constant_i64(0)); 3366 } 3367 } 3368 3369 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3370 3371 memop = finalize_memop(s, MO_64); 3372 3373 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3374 clean_addr = gen_mte_check1(s, dirty_addr, false, 3375 a->w || a->rn != 31, memop); 3376 3377 tcg_rt = cpu_reg(s, a->rt); 3378 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3379 /* extend */ false, /* iss_valid */ !a->w, 3380 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3381 3382 if (a->w) { 3383 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3384 } 3385 return true; 3386 } 3387 3388 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3389 { 3390 TCGv_i64 clean_addr, dirty_addr; 3391 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3392 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3393 3394 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3395 return false; 3396 } 3397 3398 if (a->rn == 31) { 3399 gen_check_sp_alignment(s); 3400 } 3401 3402 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3403 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3404 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3405 clean_addr = clean_data_tbi(s, dirty_addr); 3406 3407 /* 3408 * Load-AcquirePC semantics; we implement as the slightly more 3409 * restrictive Load-Acquire. 3410 */ 3411 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3412 a->rt, iss_sf, true); 3413 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3414 return true; 3415 } 3416 3417 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3418 { 3419 TCGv_i64 clean_addr, dirty_addr; 3420 MemOp mop = a->sz; 3421 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3422 3423 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3424 return false; 3425 } 3426 3427 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3428 3429 if (a->rn == 31) { 3430 gen_check_sp_alignment(s); 3431 } 3432 3433 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3434 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3435 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3436 clean_addr = clean_data_tbi(s, dirty_addr); 3437 3438 /* Store-Release semantics */ 3439 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3440 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3441 return true; 3442 } 3443 3444 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3445 { 3446 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3447 MemOp endian, align, mop; 3448 3449 int total; /* total bytes */ 3450 int elements; /* elements per vector */ 3451 int r; 3452 int size = a->sz; 3453 3454 if (!a->p && a->rm != 0) { 3455 /* For non-postindexed accesses the Rm field must be 0 */ 3456 return false; 3457 } 3458 if (size == 3 && !a->q && a->selem != 1) { 3459 return false; 3460 } 3461 if (!fp_access_check(s)) { 3462 return true; 3463 } 3464 3465 if (a->rn == 31) { 3466 gen_check_sp_alignment(s); 3467 } 3468 3469 /* For our purposes, bytes are always little-endian. */ 3470 endian = s->be_data; 3471 if (size == 0) { 3472 endian = MO_LE; 3473 } 3474 3475 total = a->rpt * a->selem * (a->q ? 16 : 8); 3476 tcg_rn = cpu_reg_sp(s, a->rn); 3477 3478 /* 3479 * Issue the MTE check vs the logical repeat count, before we 3480 * promote consecutive little-endian elements below. 3481 */ 3482 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3483 finalize_memop_asimd(s, size)); 3484 3485 /* 3486 * Consecutive little-endian elements from a single register 3487 * can be promoted to a larger little-endian operation. 3488 */ 3489 align = MO_ALIGN; 3490 if (a->selem == 1 && endian == MO_LE) { 3491 align = pow2_align(size); 3492 size = 3; 3493 } 3494 if (!s->align_mem) { 3495 align = 0; 3496 } 3497 mop = endian | size | align; 3498 3499 elements = (a->q ? 16 : 8) >> size; 3500 tcg_ebytes = tcg_constant_i64(1 << size); 3501 for (r = 0; r < a->rpt; r++) { 3502 int e; 3503 for (e = 0; e < elements; e++) { 3504 int xs; 3505 for (xs = 0; xs < a->selem; xs++) { 3506 int tt = (a->rt + r + xs) % 32; 3507 do_vec_ld(s, tt, e, clean_addr, mop); 3508 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3509 } 3510 } 3511 } 3512 3513 /* 3514 * For non-quad operations, setting a slice of the low 64 bits of 3515 * the register clears the high 64 bits (in the ARM ARM pseudocode 3516 * this is implicit in the fact that 'rval' is a 64 bit wide 3517 * variable). For quad operations, we might still need to zero 3518 * the high bits of SVE. 3519 */ 3520 for (r = 0; r < a->rpt * a->selem; r++) { 3521 int tt = (a->rt + r) % 32; 3522 clear_vec_high(s, a->q, tt); 3523 } 3524 3525 if (a->p) { 3526 if (a->rm == 31) { 3527 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3528 } else { 3529 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3530 } 3531 } 3532 return true; 3533 } 3534 3535 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3536 { 3537 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3538 MemOp endian, align, mop; 3539 3540 int total; /* total bytes */ 3541 int elements; /* elements per vector */ 3542 int r; 3543 int size = a->sz; 3544 3545 if (!a->p && a->rm != 0) { 3546 /* For non-postindexed accesses the Rm field must be 0 */ 3547 return false; 3548 } 3549 if (size == 3 && !a->q && a->selem != 1) { 3550 return false; 3551 } 3552 if (!fp_access_check(s)) { 3553 return true; 3554 } 3555 3556 if (a->rn == 31) { 3557 gen_check_sp_alignment(s); 3558 } 3559 3560 /* For our purposes, bytes are always little-endian. */ 3561 endian = s->be_data; 3562 if (size == 0) { 3563 endian = MO_LE; 3564 } 3565 3566 total = a->rpt * a->selem * (a->q ? 16 : 8); 3567 tcg_rn = cpu_reg_sp(s, a->rn); 3568 3569 /* 3570 * Issue the MTE check vs the logical repeat count, before we 3571 * promote consecutive little-endian elements below. 3572 */ 3573 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3574 finalize_memop_asimd(s, size)); 3575 3576 /* 3577 * Consecutive little-endian elements from a single register 3578 * can be promoted to a larger little-endian operation. 3579 */ 3580 align = MO_ALIGN; 3581 if (a->selem == 1 && endian == MO_LE) { 3582 align = pow2_align(size); 3583 size = 3; 3584 } 3585 if (!s->align_mem) { 3586 align = 0; 3587 } 3588 mop = endian | size | align; 3589 3590 elements = (a->q ? 16 : 8) >> size; 3591 tcg_ebytes = tcg_constant_i64(1 << size); 3592 for (r = 0; r < a->rpt; r++) { 3593 int e; 3594 for (e = 0; e < elements; e++) { 3595 int xs; 3596 for (xs = 0; xs < a->selem; xs++) { 3597 int tt = (a->rt + r + xs) % 32; 3598 do_vec_st(s, tt, e, clean_addr, mop); 3599 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3600 } 3601 } 3602 } 3603 3604 if (a->p) { 3605 if (a->rm == 31) { 3606 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3607 } else { 3608 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3609 } 3610 } 3611 return true; 3612 } 3613 3614 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3615 { 3616 int xs, total, rt; 3617 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3618 MemOp mop; 3619 3620 if (!a->p && a->rm != 0) { 3621 return false; 3622 } 3623 if (!fp_access_check(s)) { 3624 return true; 3625 } 3626 3627 if (a->rn == 31) { 3628 gen_check_sp_alignment(s); 3629 } 3630 3631 total = a->selem << a->scale; 3632 tcg_rn = cpu_reg_sp(s, a->rn); 3633 3634 mop = finalize_memop_asimd(s, a->scale); 3635 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3636 total, mop); 3637 3638 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3639 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3640 do_vec_st(s, rt, a->index, clean_addr, mop); 3641 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3642 } 3643 3644 if (a->p) { 3645 if (a->rm == 31) { 3646 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3647 } else { 3648 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3649 } 3650 } 3651 return true; 3652 } 3653 3654 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3655 { 3656 int xs, total, rt; 3657 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3658 MemOp mop; 3659 3660 if (!a->p && a->rm != 0) { 3661 return false; 3662 } 3663 if (!fp_access_check(s)) { 3664 return true; 3665 } 3666 3667 if (a->rn == 31) { 3668 gen_check_sp_alignment(s); 3669 } 3670 3671 total = a->selem << a->scale; 3672 tcg_rn = cpu_reg_sp(s, a->rn); 3673 3674 mop = finalize_memop_asimd(s, a->scale); 3675 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3676 total, mop); 3677 3678 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3679 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3680 do_vec_ld(s, rt, a->index, clean_addr, mop); 3681 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3682 } 3683 3684 if (a->p) { 3685 if (a->rm == 31) { 3686 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3687 } else { 3688 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3689 } 3690 } 3691 return true; 3692 } 3693 3694 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3695 { 3696 int xs, total, rt; 3697 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3698 MemOp mop; 3699 3700 if (!a->p && a->rm != 0) { 3701 return false; 3702 } 3703 if (!fp_access_check(s)) { 3704 return true; 3705 } 3706 3707 if (a->rn == 31) { 3708 gen_check_sp_alignment(s); 3709 } 3710 3711 total = a->selem << a->scale; 3712 tcg_rn = cpu_reg_sp(s, a->rn); 3713 3714 mop = finalize_memop_asimd(s, a->scale); 3715 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3716 total, mop); 3717 3718 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3719 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3720 /* Load and replicate to all elements */ 3721 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3722 3723 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3724 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3725 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3726 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3727 } 3728 3729 if (a->p) { 3730 if (a->rm == 31) { 3731 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3732 } else { 3733 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3734 } 3735 } 3736 return true; 3737 } 3738 3739 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 3740 { 3741 TCGv_i64 addr, clean_addr, tcg_rt; 3742 int size = 4 << s->dcz_blocksize; 3743 3744 if (!dc_isar_feature(aa64_mte, s)) { 3745 return false; 3746 } 3747 if (s->current_el == 0) { 3748 return false; 3749 } 3750 3751 if (a->rn == 31) { 3752 gen_check_sp_alignment(s); 3753 } 3754 3755 addr = read_cpu_reg_sp(s, a->rn, true); 3756 tcg_gen_addi_i64(addr, addr, a->imm); 3757 tcg_rt = cpu_reg(s, a->rt); 3758 3759 if (s->ata) { 3760 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); 3761 } 3762 /* 3763 * The non-tags portion of STZGM is mostly like DC_ZVA, 3764 * except the alignment happens before the access. 3765 */ 3766 clean_addr = clean_data_tbi(s, addr); 3767 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3768 gen_helper_dc_zva(cpu_env, clean_addr); 3769 return true; 3770 } 3771 3772 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 3773 { 3774 TCGv_i64 addr, clean_addr, tcg_rt; 3775 3776 if (!dc_isar_feature(aa64_mte, s)) { 3777 return false; 3778 } 3779 if (s->current_el == 0) { 3780 return false; 3781 } 3782 3783 if (a->rn == 31) { 3784 gen_check_sp_alignment(s); 3785 } 3786 3787 addr = read_cpu_reg_sp(s, a->rn, true); 3788 tcg_gen_addi_i64(addr, addr, a->imm); 3789 tcg_rt = cpu_reg(s, a->rt); 3790 3791 if (s->ata) { 3792 gen_helper_stgm(cpu_env, addr, tcg_rt); 3793 } else { 3794 MMUAccessType acc = MMU_DATA_STORE; 3795 int size = 4 << s->gm_blocksize; 3796 3797 clean_addr = clean_data_tbi(s, addr); 3798 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3799 gen_probe_access(s, clean_addr, acc, size); 3800 } 3801 return true; 3802 } 3803 3804 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 3805 { 3806 TCGv_i64 addr, clean_addr, tcg_rt; 3807 3808 if (!dc_isar_feature(aa64_mte, s)) { 3809 return false; 3810 } 3811 if (s->current_el == 0) { 3812 return false; 3813 } 3814 3815 if (a->rn == 31) { 3816 gen_check_sp_alignment(s); 3817 } 3818 3819 addr = read_cpu_reg_sp(s, a->rn, true); 3820 tcg_gen_addi_i64(addr, addr, a->imm); 3821 tcg_rt = cpu_reg(s, a->rt); 3822 3823 if (s->ata) { 3824 gen_helper_ldgm(tcg_rt, cpu_env, addr); 3825 } else { 3826 MMUAccessType acc = MMU_DATA_LOAD; 3827 int size = 4 << s->gm_blocksize; 3828 3829 clean_addr = clean_data_tbi(s, addr); 3830 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 3831 gen_probe_access(s, clean_addr, acc, size); 3832 /* The result tags are zeros. */ 3833 tcg_gen_movi_i64(tcg_rt, 0); 3834 } 3835 return true; 3836 } 3837 3838 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 3839 { 3840 TCGv_i64 addr, clean_addr, tcg_rt; 3841 3842 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3843 return false; 3844 } 3845 3846 if (a->rn == 31) { 3847 gen_check_sp_alignment(s); 3848 } 3849 3850 addr = read_cpu_reg_sp(s, a->rn, true); 3851 if (!a->p) { 3852 /* pre-index or signed offset */ 3853 tcg_gen_addi_i64(addr, addr, a->imm); 3854 } 3855 3856 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 3857 tcg_rt = cpu_reg(s, a->rt); 3858 if (s->ata) { 3859 gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); 3860 } else { 3861 /* 3862 * Tag access disabled: we must check for aborts on the load 3863 * load from [rn+offset], and then insert a 0 tag into rt. 3864 */ 3865 clean_addr = clean_data_tbi(s, addr); 3866 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 3867 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 3868 } 3869 3870 if (a->w) { 3871 /* pre-index or post-index */ 3872 if (a->p) { 3873 /* post-index */ 3874 tcg_gen_addi_i64(addr, addr, a->imm); 3875 } 3876 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 3877 } 3878 return true; 3879 } 3880 3881 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 3882 { 3883 TCGv_i64 addr, tcg_rt; 3884 3885 if (a->rn == 31) { 3886 gen_check_sp_alignment(s); 3887 } 3888 3889 addr = read_cpu_reg_sp(s, a->rn, true); 3890 if (!a->p) { 3891 /* pre-index or signed offset */ 3892 tcg_gen_addi_i64(addr, addr, a->imm); 3893 } 3894 tcg_rt = cpu_reg_sp(s, a->rt); 3895 if (!s->ata) { 3896 /* 3897 * For STG and ST2G, we need to check alignment and probe memory. 3898 * TODO: For STZG and STZ2G, we could rely on the stores below, 3899 * at least for system mode; user-only won't enforce alignment. 3900 */ 3901 if (is_pair) { 3902 gen_helper_st2g_stub(cpu_env, addr); 3903 } else { 3904 gen_helper_stg_stub(cpu_env, addr); 3905 } 3906 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3907 if (is_pair) { 3908 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); 3909 } else { 3910 gen_helper_stg_parallel(cpu_env, addr, tcg_rt); 3911 } 3912 } else { 3913 if (is_pair) { 3914 gen_helper_st2g(cpu_env, addr, tcg_rt); 3915 } else { 3916 gen_helper_stg(cpu_env, addr, tcg_rt); 3917 } 3918 } 3919 3920 if (is_zero) { 3921 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 3922 TCGv_i64 zero64 = tcg_constant_i64(0); 3923 TCGv_i128 zero128 = tcg_temp_new_i128(); 3924 int mem_index = get_mem_index(s); 3925 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 3926 3927 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 3928 3929 /* This is 1 or 2 atomic 16-byte operations. */ 3930 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 3931 if (is_pair) { 3932 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 3933 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 3934 } 3935 } 3936 3937 if (a->w) { 3938 /* pre-index or post-index */ 3939 if (a->p) { 3940 /* post-index */ 3941 tcg_gen_addi_i64(addr, addr, a->imm); 3942 } 3943 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 3944 } 3945 return true; 3946 } 3947 3948 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 3949 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 3950 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 3951 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 3952 3953 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 3954 3955 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 3956 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 3957 { 3958 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 3959 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 3960 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 3961 3962 fn(tcg_rd, tcg_rn, tcg_imm); 3963 if (!a->sf) { 3964 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 3965 } 3966 return true; 3967 } 3968 3969 /* 3970 * PC-rel. addressing 3971 */ 3972 3973 static bool trans_ADR(DisasContext *s, arg_ri *a) 3974 { 3975 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 3976 return true; 3977 } 3978 3979 static bool trans_ADRP(DisasContext *s, arg_ri *a) 3980 { 3981 int64_t offset = (int64_t)a->imm << 12; 3982 3983 /* The page offset is ok for CF_PCREL. */ 3984 offset -= s->pc_curr & 0xfff; 3985 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 3986 return true; 3987 } 3988 3989 /* 3990 * Add/subtract (immediate) 3991 */ 3992 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 3993 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 3994 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 3995 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 3996 3997 /* 3998 * Add/subtract (immediate, with tags) 3999 */ 4000 4001 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4002 bool sub_op) 4003 { 4004 TCGv_i64 tcg_rn, tcg_rd; 4005 int imm; 4006 4007 imm = a->uimm6 << LOG2_TAG_GRANULE; 4008 if (sub_op) { 4009 imm = -imm; 4010 } 4011 4012 tcg_rn = cpu_reg_sp(s, a->rn); 4013 tcg_rd = cpu_reg_sp(s, a->rd); 4014 4015 if (s->ata) { 4016 gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, 4017 tcg_constant_i32(imm), 4018 tcg_constant_i32(a->uimm4)); 4019 } else { 4020 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4021 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4022 } 4023 return true; 4024 } 4025 4026 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4027 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4028 4029 /* The input should be a value in the bottom e bits (with higher 4030 * bits zero); returns that value replicated into every element 4031 * of size e in a 64 bit integer. 4032 */ 4033 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4034 { 4035 assert(e != 0); 4036 while (e < 64) { 4037 mask |= mask << e; 4038 e *= 2; 4039 } 4040 return mask; 4041 } 4042 4043 /* 4044 * Logical (immediate) 4045 */ 4046 4047 /* 4048 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4049 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4050 * value (ie should cause a guest UNDEF exception), and true if they are 4051 * valid, in which case the decoded bit pattern is written to result. 4052 */ 4053 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4054 unsigned int imms, unsigned int immr) 4055 { 4056 uint64_t mask; 4057 unsigned e, levels, s, r; 4058 int len; 4059 4060 assert(immn < 2 && imms < 64 && immr < 64); 4061 4062 /* The bit patterns we create here are 64 bit patterns which 4063 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4064 * 64 bits each. Each element contains the same value: a run 4065 * of between 1 and e-1 non-zero bits, rotated within the 4066 * element by between 0 and e-1 bits. 4067 * 4068 * The element size and run length are encoded into immn (1 bit) 4069 * and imms (6 bits) as follows: 4070 * 64 bit elements: immn = 1, imms = <length of run - 1> 4071 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4072 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4073 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4074 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4075 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4076 * Notice that immn = 0, imms = 11111x is the only combination 4077 * not covered by one of the above options; this is reserved. 4078 * Further, <length of run - 1> all-ones is a reserved pattern. 4079 * 4080 * In all cases the rotation is by immr % e (and immr is 6 bits). 4081 */ 4082 4083 /* First determine the element size */ 4084 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4085 if (len < 1) { 4086 /* This is the immn == 0, imms == 0x11111x case */ 4087 return false; 4088 } 4089 e = 1 << len; 4090 4091 levels = e - 1; 4092 s = imms & levels; 4093 r = immr & levels; 4094 4095 if (s == levels) { 4096 /* <length of run - 1> mustn't be all-ones. */ 4097 return false; 4098 } 4099 4100 /* Create the value of one element: s+1 set bits rotated 4101 * by r within the element (which is e bits wide)... 4102 */ 4103 mask = MAKE_64BIT_MASK(0, s + 1); 4104 if (r) { 4105 mask = (mask >> r) | (mask << (e - r)); 4106 mask &= MAKE_64BIT_MASK(0, e); 4107 } 4108 /* ...then replicate the element over the whole 64 bit value */ 4109 mask = bitfield_replicate(mask, e); 4110 *result = mask; 4111 return true; 4112 } 4113 4114 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4115 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4116 { 4117 TCGv_i64 tcg_rd, tcg_rn; 4118 uint64_t imm; 4119 4120 /* Some immediate field values are reserved. */ 4121 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4122 extract32(a->dbm, 0, 6), 4123 extract32(a->dbm, 6, 6))) { 4124 return false; 4125 } 4126 if (!a->sf) { 4127 imm &= 0xffffffffull; 4128 } 4129 4130 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4131 tcg_rn = cpu_reg(s, a->rn); 4132 4133 fn(tcg_rd, tcg_rn, imm); 4134 if (set_cc) { 4135 gen_logic_CC(a->sf, tcg_rd); 4136 } 4137 if (!a->sf) { 4138 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4139 } 4140 return true; 4141 } 4142 4143 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4144 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4145 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4146 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4147 4148 /* 4149 * Move wide (immediate) 4150 */ 4151 4152 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4153 { 4154 int pos = a->hw << 4; 4155 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4156 return true; 4157 } 4158 4159 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4160 { 4161 int pos = a->hw << 4; 4162 uint64_t imm = a->imm; 4163 4164 imm = ~(imm << pos); 4165 if (!a->sf) { 4166 imm = (uint32_t)imm; 4167 } 4168 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4169 return true; 4170 } 4171 4172 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4173 { 4174 int pos = a->hw << 4; 4175 TCGv_i64 tcg_rd, tcg_im; 4176 4177 tcg_rd = cpu_reg(s, a->rd); 4178 tcg_im = tcg_constant_i64(a->imm); 4179 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4180 if (!a->sf) { 4181 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4182 } 4183 return true; 4184 } 4185 4186 /* 4187 * Bitfield 4188 */ 4189 4190 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4191 { 4192 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4193 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4194 unsigned int bitsize = a->sf ? 64 : 32; 4195 unsigned int ri = a->immr; 4196 unsigned int si = a->imms; 4197 unsigned int pos, len; 4198 4199 if (si >= ri) { 4200 /* Wd<s-r:0> = Wn<s:r> */ 4201 len = (si - ri) + 1; 4202 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4203 if (!a->sf) { 4204 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4205 } 4206 } else { 4207 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4208 len = si + 1; 4209 pos = (bitsize - ri) & (bitsize - 1); 4210 4211 if (len < ri) { 4212 /* 4213 * Sign extend the destination field from len to fill the 4214 * balance of the word. Let the deposit below insert all 4215 * of those sign bits. 4216 */ 4217 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4218 len = ri; 4219 } 4220 4221 /* 4222 * We start with zero, and we haven't modified any bits outside 4223 * bitsize, therefore no final zero-extension is unneeded for !sf. 4224 */ 4225 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4226 } 4227 return true; 4228 } 4229 4230 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4231 { 4232 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4233 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4234 unsigned int bitsize = a->sf ? 64 : 32; 4235 unsigned int ri = a->immr; 4236 unsigned int si = a->imms; 4237 unsigned int pos, len; 4238 4239 tcg_rd = cpu_reg(s, a->rd); 4240 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4241 4242 if (si >= ri) { 4243 /* Wd<s-r:0> = Wn<s:r> */ 4244 len = (si - ri) + 1; 4245 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4246 } else { 4247 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4248 len = si + 1; 4249 pos = (bitsize - ri) & (bitsize - 1); 4250 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4251 } 4252 return true; 4253 } 4254 4255 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4256 { 4257 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4258 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4259 unsigned int bitsize = a->sf ? 64 : 32; 4260 unsigned int ri = a->immr; 4261 unsigned int si = a->imms; 4262 unsigned int pos, len; 4263 4264 tcg_rd = cpu_reg(s, a->rd); 4265 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4266 4267 if (si >= ri) { 4268 /* Wd<s-r:0> = Wn<s:r> */ 4269 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4270 len = (si - ri) + 1; 4271 pos = 0; 4272 } else { 4273 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4274 len = si + 1; 4275 pos = (bitsize - ri) & (bitsize - 1); 4276 } 4277 4278 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4279 if (!a->sf) { 4280 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4281 } 4282 return true; 4283 } 4284 4285 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4286 { 4287 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4288 4289 tcg_rd = cpu_reg(s, a->rd); 4290 4291 if (unlikely(a->imm == 0)) { 4292 /* 4293 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4294 * so an extract from bit 0 is a special case. 4295 */ 4296 if (a->sf) { 4297 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4298 } else { 4299 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4300 } 4301 } else { 4302 tcg_rm = cpu_reg(s, a->rm); 4303 tcg_rn = cpu_reg(s, a->rn); 4304 4305 if (a->sf) { 4306 /* Specialization to ROR happens in EXTRACT2. */ 4307 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4308 } else { 4309 TCGv_i32 t0 = tcg_temp_new_i32(); 4310 4311 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4312 if (a->rm == a->rn) { 4313 tcg_gen_rotri_i32(t0, t0, a->imm); 4314 } else { 4315 TCGv_i32 t1 = tcg_temp_new_i32(); 4316 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4317 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4318 } 4319 tcg_gen_extu_i32_i64(tcg_rd, t0); 4320 } 4321 } 4322 return true; 4323 } 4324 4325 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4326 * Note that it is the caller's responsibility to ensure that the 4327 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4328 * mandated semantics for out of range shifts. 4329 */ 4330 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4331 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4332 { 4333 switch (shift_type) { 4334 case A64_SHIFT_TYPE_LSL: 4335 tcg_gen_shl_i64(dst, src, shift_amount); 4336 break; 4337 case A64_SHIFT_TYPE_LSR: 4338 tcg_gen_shr_i64(dst, src, shift_amount); 4339 break; 4340 case A64_SHIFT_TYPE_ASR: 4341 if (!sf) { 4342 tcg_gen_ext32s_i64(dst, src); 4343 } 4344 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4345 break; 4346 case A64_SHIFT_TYPE_ROR: 4347 if (sf) { 4348 tcg_gen_rotr_i64(dst, src, shift_amount); 4349 } else { 4350 TCGv_i32 t0, t1; 4351 t0 = tcg_temp_new_i32(); 4352 t1 = tcg_temp_new_i32(); 4353 tcg_gen_extrl_i64_i32(t0, src); 4354 tcg_gen_extrl_i64_i32(t1, shift_amount); 4355 tcg_gen_rotr_i32(t0, t0, t1); 4356 tcg_gen_extu_i32_i64(dst, t0); 4357 } 4358 break; 4359 default: 4360 assert(FALSE); /* all shift types should be handled */ 4361 break; 4362 } 4363 4364 if (!sf) { /* zero extend final result */ 4365 tcg_gen_ext32u_i64(dst, dst); 4366 } 4367 } 4368 4369 /* Shift a TCGv src by immediate, put result in dst. 4370 * The shift amount must be in range (this should always be true as the 4371 * relevant instructions will UNDEF on bad shift immediates). 4372 */ 4373 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4374 enum a64_shift_type shift_type, unsigned int shift_i) 4375 { 4376 assert(shift_i < (sf ? 64 : 32)); 4377 4378 if (shift_i == 0) { 4379 tcg_gen_mov_i64(dst, src); 4380 } else { 4381 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4382 } 4383 } 4384 4385 /* Logical (shifted register) 4386 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4387 * +----+-----+-----------+-------+---+------+--------+------+------+ 4388 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4389 * +----+-----+-----------+-------+---+------+--------+------+------+ 4390 */ 4391 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4392 { 4393 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4394 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4395 4396 sf = extract32(insn, 31, 1); 4397 opc = extract32(insn, 29, 2); 4398 shift_type = extract32(insn, 22, 2); 4399 invert = extract32(insn, 21, 1); 4400 rm = extract32(insn, 16, 5); 4401 shift_amount = extract32(insn, 10, 6); 4402 rn = extract32(insn, 5, 5); 4403 rd = extract32(insn, 0, 5); 4404 4405 if (!sf && (shift_amount & (1 << 5))) { 4406 unallocated_encoding(s); 4407 return; 4408 } 4409 4410 tcg_rd = cpu_reg(s, rd); 4411 4412 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4413 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4414 * register-register MOV and MVN, so it is worth special casing. 4415 */ 4416 tcg_rm = cpu_reg(s, rm); 4417 if (invert) { 4418 tcg_gen_not_i64(tcg_rd, tcg_rm); 4419 if (!sf) { 4420 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4421 } 4422 } else { 4423 if (sf) { 4424 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4425 } else { 4426 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4427 } 4428 } 4429 return; 4430 } 4431 4432 tcg_rm = read_cpu_reg(s, rm, sf); 4433 4434 if (shift_amount) { 4435 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4436 } 4437 4438 tcg_rn = cpu_reg(s, rn); 4439 4440 switch (opc | (invert << 2)) { 4441 case 0: /* AND */ 4442 case 3: /* ANDS */ 4443 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4444 break; 4445 case 1: /* ORR */ 4446 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4447 break; 4448 case 2: /* EOR */ 4449 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4450 break; 4451 case 4: /* BIC */ 4452 case 7: /* BICS */ 4453 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4454 break; 4455 case 5: /* ORN */ 4456 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4457 break; 4458 case 6: /* EON */ 4459 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4460 break; 4461 default: 4462 assert(FALSE); 4463 break; 4464 } 4465 4466 if (!sf) { 4467 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4468 } 4469 4470 if (opc == 3) { 4471 gen_logic_CC(sf, tcg_rd); 4472 } 4473 } 4474 4475 /* 4476 * Add/subtract (extended register) 4477 * 4478 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4479 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4480 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4481 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4482 * 4483 * sf: 0 -> 32bit, 1 -> 64bit 4484 * op: 0 -> add , 1 -> sub 4485 * S: 1 -> set flags 4486 * opt: 00 4487 * option: extension type (see DecodeRegExtend) 4488 * imm3: optional shift to Rm 4489 * 4490 * Rd = Rn + LSL(extend(Rm), amount) 4491 */ 4492 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4493 { 4494 int rd = extract32(insn, 0, 5); 4495 int rn = extract32(insn, 5, 5); 4496 int imm3 = extract32(insn, 10, 3); 4497 int option = extract32(insn, 13, 3); 4498 int rm = extract32(insn, 16, 5); 4499 int opt = extract32(insn, 22, 2); 4500 bool setflags = extract32(insn, 29, 1); 4501 bool sub_op = extract32(insn, 30, 1); 4502 bool sf = extract32(insn, 31, 1); 4503 4504 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4505 TCGv_i64 tcg_rd; 4506 TCGv_i64 tcg_result; 4507 4508 if (imm3 > 4 || opt != 0) { 4509 unallocated_encoding(s); 4510 return; 4511 } 4512 4513 /* non-flag setting ops may use SP */ 4514 if (!setflags) { 4515 tcg_rd = cpu_reg_sp(s, rd); 4516 } else { 4517 tcg_rd = cpu_reg(s, rd); 4518 } 4519 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4520 4521 tcg_rm = read_cpu_reg(s, rm, sf); 4522 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4523 4524 tcg_result = tcg_temp_new_i64(); 4525 4526 if (!setflags) { 4527 if (sub_op) { 4528 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4529 } else { 4530 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4531 } 4532 } else { 4533 if (sub_op) { 4534 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4535 } else { 4536 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4537 } 4538 } 4539 4540 if (sf) { 4541 tcg_gen_mov_i64(tcg_rd, tcg_result); 4542 } else { 4543 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4544 } 4545 } 4546 4547 /* 4548 * Add/subtract (shifted register) 4549 * 4550 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4551 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4552 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 4553 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4554 * 4555 * sf: 0 -> 32bit, 1 -> 64bit 4556 * op: 0 -> add , 1 -> sub 4557 * S: 1 -> set flags 4558 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 4559 * imm6: Shift amount to apply to Rm before the add/sub 4560 */ 4561 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 4562 { 4563 int rd = extract32(insn, 0, 5); 4564 int rn = extract32(insn, 5, 5); 4565 int imm6 = extract32(insn, 10, 6); 4566 int rm = extract32(insn, 16, 5); 4567 int shift_type = extract32(insn, 22, 2); 4568 bool setflags = extract32(insn, 29, 1); 4569 bool sub_op = extract32(insn, 30, 1); 4570 bool sf = extract32(insn, 31, 1); 4571 4572 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4573 TCGv_i64 tcg_rn, tcg_rm; 4574 TCGv_i64 tcg_result; 4575 4576 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 4577 unallocated_encoding(s); 4578 return; 4579 } 4580 4581 tcg_rn = read_cpu_reg(s, rn, sf); 4582 tcg_rm = read_cpu_reg(s, rm, sf); 4583 4584 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 4585 4586 tcg_result = tcg_temp_new_i64(); 4587 4588 if (!setflags) { 4589 if (sub_op) { 4590 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4591 } else { 4592 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4593 } 4594 } else { 4595 if (sub_op) { 4596 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4597 } else { 4598 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4599 } 4600 } 4601 4602 if (sf) { 4603 tcg_gen_mov_i64(tcg_rd, tcg_result); 4604 } else { 4605 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4606 } 4607 } 4608 4609 /* Data-processing (3 source) 4610 * 4611 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 4612 * +--+------+-----------+------+------+----+------+------+------+ 4613 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 4614 * +--+------+-----------+------+------+----+------+------+------+ 4615 */ 4616 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 4617 { 4618 int rd = extract32(insn, 0, 5); 4619 int rn = extract32(insn, 5, 5); 4620 int ra = extract32(insn, 10, 5); 4621 int rm = extract32(insn, 16, 5); 4622 int op_id = (extract32(insn, 29, 3) << 4) | 4623 (extract32(insn, 21, 3) << 1) | 4624 extract32(insn, 15, 1); 4625 bool sf = extract32(insn, 31, 1); 4626 bool is_sub = extract32(op_id, 0, 1); 4627 bool is_high = extract32(op_id, 2, 1); 4628 bool is_signed = false; 4629 TCGv_i64 tcg_op1; 4630 TCGv_i64 tcg_op2; 4631 TCGv_i64 tcg_tmp; 4632 4633 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 4634 switch (op_id) { 4635 case 0x42: /* SMADDL */ 4636 case 0x43: /* SMSUBL */ 4637 case 0x44: /* SMULH */ 4638 is_signed = true; 4639 break; 4640 case 0x0: /* MADD (32bit) */ 4641 case 0x1: /* MSUB (32bit) */ 4642 case 0x40: /* MADD (64bit) */ 4643 case 0x41: /* MSUB (64bit) */ 4644 case 0x4a: /* UMADDL */ 4645 case 0x4b: /* UMSUBL */ 4646 case 0x4c: /* UMULH */ 4647 break; 4648 default: 4649 unallocated_encoding(s); 4650 return; 4651 } 4652 4653 if (is_high) { 4654 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 4655 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4656 TCGv_i64 tcg_rn = cpu_reg(s, rn); 4657 TCGv_i64 tcg_rm = cpu_reg(s, rm); 4658 4659 if (is_signed) { 4660 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 4661 } else { 4662 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 4663 } 4664 return; 4665 } 4666 4667 tcg_op1 = tcg_temp_new_i64(); 4668 tcg_op2 = tcg_temp_new_i64(); 4669 tcg_tmp = tcg_temp_new_i64(); 4670 4671 if (op_id < 0x42) { 4672 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 4673 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 4674 } else { 4675 if (is_signed) { 4676 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 4677 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 4678 } else { 4679 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 4680 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 4681 } 4682 } 4683 4684 if (ra == 31 && !is_sub) { 4685 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 4686 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 4687 } else { 4688 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 4689 if (is_sub) { 4690 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 4691 } else { 4692 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 4693 } 4694 } 4695 4696 if (!sf) { 4697 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 4698 } 4699 } 4700 4701 /* Add/subtract (with carry) 4702 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 4703 * +--+--+--+------------------------+------+-------------+------+-----+ 4704 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 4705 * +--+--+--+------------------------+------+-------------+------+-----+ 4706 */ 4707 4708 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 4709 { 4710 unsigned int sf, op, setflags, rm, rn, rd; 4711 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 4712 4713 sf = extract32(insn, 31, 1); 4714 op = extract32(insn, 30, 1); 4715 setflags = extract32(insn, 29, 1); 4716 rm = extract32(insn, 16, 5); 4717 rn = extract32(insn, 5, 5); 4718 rd = extract32(insn, 0, 5); 4719 4720 tcg_rd = cpu_reg(s, rd); 4721 tcg_rn = cpu_reg(s, rn); 4722 4723 if (op) { 4724 tcg_y = tcg_temp_new_i64(); 4725 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 4726 } else { 4727 tcg_y = cpu_reg(s, rm); 4728 } 4729 4730 if (setflags) { 4731 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 4732 } else { 4733 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 4734 } 4735 } 4736 4737 /* 4738 * Rotate right into flags 4739 * 31 30 29 21 15 10 5 4 0 4740 * +--+--+--+-----------------+--------+-----------+------+--+------+ 4741 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 4742 * +--+--+--+-----------------+--------+-----------+------+--+------+ 4743 */ 4744 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 4745 { 4746 int mask = extract32(insn, 0, 4); 4747 int o2 = extract32(insn, 4, 1); 4748 int rn = extract32(insn, 5, 5); 4749 int imm6 = extract32(insn, 15, 6); 4750 int sf_op_s = extract32(insn, 29, 3); 4751 TCGv_i64 tcg_rn; 4752 TCGv_i32 nzcv; 4753 4754 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 4755 unallocated_encoding(s); 4756 return; 4757 } 4758 4759 tcg_rn = read_cpu_reg(s, rn, 1); 4760 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 4761 4762 nzcv = tcg_temp_new_i32(); 4763 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 4764 4765 if (mask & 8) { /* N */ 4766 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 4767 } 4768 if (mask & 4) { /* Z */ 4769 tcg_gen_not_i32(cpu_ZF, nzcv); 4770 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 4771 } 4772 if (mask & 2) { /* C */ 4773 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 4774 } 4775 if (mask & 1) { /* V */ 4776 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 4777 } 4778 } 4779 4780 /* 4781 * Evaluate into flags 4782 * 31 30 29 21 15 14 10 5 4 0 4783 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 4784 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 4785 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 4786 */ 4787 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 4788 { 4789 int o3_mask = extract32(insn, 0, 5); 4790 int rn = extract32(insn, 5, 5); 4791 int o2 = extract32(insn, 15, 6); 4792 int sz = extract32(insn, 14, 1); 4793 int sf_op_s = extract32(insn, 29, 3); 4794 TCGv_i32 tmp; 4795 int shift; 4796 4797 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 4798 !dc_isar_feature(aa64_condm_4, s)) { 4799 unallocated_encoding(s); 4800 return; 4801 } 4802 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 4803 4804 tmp = tcg_temp_new_i32(); 4805 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 4806 tcg_gen_shli_i32(cpu_NF, tmp, shift); 4807 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 4808 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 4809 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 4810 } 4811 4812 /* Conditional compare (immediate / register) 4813 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 4814 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 4815 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 4816 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 4817 * [1] y [0] [0] 4818 */ 4819 static void disas_cc(DisasContext *s, uint32_t insn) 4820 { 4821 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 4822 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 4823 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 4824 DisasCompare c; 4825 4826 if (!extract32(insn, 29, 1)) { 4827 unallocated_encoding(s); 4828 return; 4829 } 4830 if (insn & (1 << 10 | 1 << 4)) { 4831 unallocated_encoding(s); 4832 return; 4833 } 4834 sf = extract32(insn, 31, 1); 4835 op = extract32(insn, 30, 1); 4836 is_imm = extract32(insn, 11, 1); 4837 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 4838 cond = extract32(insn, 12, 4); 4839 rn = extract32(insn, 5, 5); 4840 nzcv = extract32(insn, 0, 4); 4841 4842 /* Set T0 = !COND. */ 4843 tcg_t0 = tcg_temp_new_i32(); 4844 arm_test_cc(&c, cond); 4845 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 4846 4847 /* Load the arguments for the new comparison. */ 4848 if (is_imm) { 4849 tcg_y = tcg_temp_new_i64(); 4850 tcg_gen_movi_i64(tcg_y, y); 4851 } else { 4852 tcg_y = cpu_reg(s, y); 4853 } 4854 tcg_rn = cpu_reg(s, rn); 4855 4856 /* Set the flags for the new comparison. */ 4857 tcg_tmp = tcg_temp_new_i64(); 4858 if (op) { 4859 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 4860 } else { 4861 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 4862 } 4863 4864 /* If COND was false, force the flags to #nzcv. Compute two masks 4865 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 4866 * For tcg hosts that support ANDC, we can make do with just T1. 4867 * In either case, allow the tcg optimizer to delete any unused mask. 4868 */ 4869 tcg_t1 = tcg_temp_new_i32(); 4870 tcg_t2 = tcg_temp_new_i32(); 4871 tcg_gen_neg_i32(tcg_t1, tcg_t0); 4872 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 4873 4874 if (nzcv & 8) { /* N */ 4875 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 4876 } else { 4877 if (TCG_TARGET_HAS_andc_i32) { 4878 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 4879 } else { 4880 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 4881 } 4882 } 4883 if (nzcv & 4) { /* Z */ 4884 if (TCG_TARGET_HAS_andc_i32) { 4885 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 4886 } else { 4887 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 4888 } 4889 } else { 4890 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 4891 } 4892 if (nzcv & 2) { /* C */ 4893 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 4894 } else { 4895 if (TCG_TARGET_HAS_andc_i32) { 4896 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 4897 } else { 4898 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 4899 } 4900 } 4901 if (nzcv & 1) { /* V */ 4902 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 4903 } else { 4904 if (TCG_TARGET_HAS_andc_i32) { 4905 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 4906 } else { 4907 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 4908 } 4909 } 4910 } 4911 4912 /* Conditional select 4913 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 4914 * +----+----+---+-----------------+------+------+-----+------+------+ 4915 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 4916 * +----+----+---+-----------------+------+------+-----+------+------+ 4917 */ 4918 static void disas_cond_select(DisasContext *s, uint32_t insn) 4919 { 4920 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 4921 TCGv_i64 tcg_rd, zero; 4922 DisasCompare64 c; 4923 4924 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 4925 /* S == 1 or op2<1> == 1 */ 4926 unallocated_encoding(s); 4927 return; 4928 } 4929 sf = extract32(insn, 31, 1); 4930 else_inv = extract32(insn, 30, 1); 4931 rm = extract32(insn, 16, 5); 4932 cond = extract32(insn, 12, 4); 4933 else_inc = extract32(insn, 10, 1); 4934 rn = extract32(insn, 5, 5); 4935 rd = extract32(insn, 0, 5); 4936 4937 tcg_rd = cpu_reg(s, rd); 4938 4939 a64_test_cc(&c, cond); 4940 zero = tcg_constant_i64(0); 4941 4942 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 4943 /* CSET & CSETM. */ 4944 if (else_inv) { 4945 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 4946 tcg_rd, c.value, zero); 4947 } else { 4948 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 4949 tcg_rd, c.value, zero); 4950 } 4951 } else { 4952 TCGv_i64 t_true = cpu_reg(s, rn); 4953 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 4954 if (else_inv && else_inc) { 4955 tcg_gen_neg_i64(t_false, t_false); 4956 } else if (else_inv) { 4957 tcg_gen_not_i64(t_false, t_false); 4958 } else if (else_inc) { 4959 tcg_gen_addi_i64(t_false, t_false, 1); 4960 } 4961 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 4962 } 4963 4964 if (!sf) { 4965 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4966 } 4967 } 4968 4969 static void handle_clz(DisasContext *s, unsigned int sf, 4970 unsigned int rn, unsigned int rd) 4971 { 4972 TCGv_i64 tcg_rd, tcg_rn; 4973 tcg_rd = cpu_reg(s, rd); 4974 tcg_rn = cpu_reg(s, rn); 4975 4976 if (sf) { 4977 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 4978 } else { 4979 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 4980 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 4981 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 4982 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 4983 } 4984 } 4985 4986 static void handle_cls(DisasContext *s, unsigned int sf, 4987 unsigned int rn, unsigned int rd) 4988 { 4989 TCGv_i64 tcg_rd, tcg_rn; 4990 tcg_rd = cpu_reg(s, rd); 4991 tcg_rn = cpu_reg(s, rn); 4992 4993 if (sf) { 4994 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 4995 } else { 4996 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 4997 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 4998 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 4999 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5000 } 5001 } 5002 5003 static void handle_rbit(DisasContext *s, unsigned int sf, 5004 unsigned int rn, unsigned int rd) 5005 { 5006 TCGv_i64 tcg_rd, tcg_rn; 5007 tcg_rd = cpu_reg(s, rd); 5008 tcg_rn = cpu_reg(s, rn); 5009 5010 if (sf) { 5011 gen_helper_rbit64(tcg_rd, tcg_rn); 5012 } else { 5013 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5014 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5015 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5016 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5017 } 5018 } 5019 5020 /* REV with sf==1, opcode==3 ("REV64") */ 5021 static void handle_rev64(DisasContext *s, unsigned int sf, 5022 unsigned int rn, unsigned int rd) 5023 { 5024 if (!sf) { 5025 unallocated_encoding(s); 5026 return; 5027 } 5028 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5029 } 5030 5031 /* REV with sf==0, opcode==2 5032 * REV32 (sf==1, opcode==2) 5033 */ 5034 static void handle_rev32(DisasContext *s, unsigned int sf, 5035 unsigned int rn, unsigned int rd) 5036 { 5037 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5038 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5039 5040 if (sf) { 5041 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5042 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5043 } else { 5044 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5045 } 5046 } 5047 5048 /* REV16 (opcode==1) */ 5049 static void handle_rev16(DisasContext *s, unsigned int sf, 5050 unsigned int rn, unsigned int rd) 5051 { 5052 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5053 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5054 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5055 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5056 5057 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5058 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5059 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5060 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5061 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5062 } 5063 5064 /* Data-processing (1 source) 5065 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5066 * +----+---+---+-----------------+---------+--------+------+------+ 5067 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5068 * +----+---+---+-----------------+---------+--------+------+------+ 5069 */ 5070 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5071 { 5072 unsigned int sf, opcode, opcode2, rn, rd; 5073 TCGv_i64 tcg_rd; 5074 5075 if (extract32(insn, 29, 1)) { 5076 unallocated_encoding(s); 5077 return; 5078 } 5079 5080 sf = extract32(insn, 31, 1); 5081 opcode = extract32(insn, 10, 6); 5082 opcode2 = extract32(insn, 16, 5); 5083 rn = extract32(insn, 5, 5); 5084 rd = extract32(insn, 0, 5); 5085 5086 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5087 5088 switch (MAP(sf, opcode2, opcode)) { 5089 case MAP(0, 0x00, 0x00): /* RBIT */ 5090 case MAP(1, 0x00, 0x00): 5091 handle_rbit(s, sf, rn, rd); 5092 break; 5093 case MAP(0, 0x00, 0x01): /* REV16 */ 5094 case MAP(1, 0x00, 0x01): 5095 handle_rev16(s, sf, rn, rd); 5096 break; 5097 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5098 case MAP(1, 0x00, 0x02): 5099 handle_rev32(s, sf, rn, rd); 5100 break; 5101 case MAP(1, 0x00, 0x03): /* REV64 */ 5102 handle_rev64(s, sf, rn, rd); 5103 break; 5104 case MAP(0, 0x00, 0x04): /* CLZ */ 5105 case MAP(1, 0x00, 0x04): 5106 handle_clz(s, sf, rn, rd); 5107 break; 5108 case MAP(0, 0x00, 0x05): /* CLS */ 5109 case MAP(1, 0x00, 0x05): 5110 handle_cls(s, sf, rn, rd); 5111 break; 5112 case MAP(1, 0x01, 0x00): /* PACIA */ 5113 if (s->pauth_active) { 5114 tcg_rd = cpu_reg(s, rd); 5115 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5116 } else if (!dc_isar_feature(aa64_pauth, s)) { 5117 goto do_unallocated; 5118 } 5119 break; 5120 case MAP(1, 0x01, 0x01): /* PACIB */ 5121 if (s->pauth_active) { 5122 tcg_rd = cpu_reg(s, rd); 5123 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5124 } else if (!dc_isar_feature(aa64_pauth, s)) { 5125 goto do_unallocated; 5126 } 5127 break; 5128 case MAP(1, 0x01, 0x02): /* PACDA */ 5129 if (s->pauth_active) { 5130 tcg_rd = cpu_reg(s, rd); 5131 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5132 } else if (!dc_isar_feature(aa64_pauth, s)) { 5133 goto do_unallocated; 5134 } 5135 break; 5136 case MAP(1, 0x01, 0x03): /* PACDB */ 5137 if (s->pauth_active) { 5138 tcg_rd = cpu_reg(s, rd); 5139 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5140 } else if (!dc_isar_feature(aa64_pauth, s)) { 5141 goto do_unallocated; 5142 } 5143 break; 5144 case MAP(1, 0x01, 0x04): /* AUTIA */ 5145 if (s->pauth_active) { 5146 tcg_rd = cpu_reg(s, rd); 5147 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5148 } else if (!dc_isar_feature(aa64_pauth, s)) { 5149 goto do_unallocated; 5150 } 5151 break; 5152 case MAP(1, 0x01, 0x05): /* AUTIB */ 5153 if (s->pauth_active) { 5154 tcg_rd = cpu_reg(s, rd); 5155 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5156 } else if (!dc_isar_feature(aa64_pauth, s)) { 5157 goto do_unallocated; 5158 } 5159 break; 5160 case MAP(1, 0x01, 0x06): /* AUTDA */ 5161 if (s->pauth_active) { 5162 tcg_rd = cpu_reg(s, rd); 5163 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5164 } else if (!dc_isar_feature(aa64_pauth, s)) { 5165 goto do_unallocated; 5166 } 5167 break; 5168 case MAP(1, 0x01, 0x07): /* AUTDB */ 5169 if (s->pauth_active) { 5170 tcg_rd = cpu_reg(s, rd); 5171 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5172 } else if (!dc_isar_feature(aa64_pauth, s)) { 5173 goto do_unallocated; 5174 } 5175 break; 5176 case MAP(1, 0x01, 0x08): /* PACIZA */ 5177 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5178 goto do_unallocated; 5179 } else if (s->pauth_active) { 5180 tcg_rd = cpu_reg(s, rd); 5181 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5182 } 5183 break; 5184 case MAP(1, 0x01, 0x09): /* PACIZB */ 5185 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5186 goto do_unallocated; 5187 } else if (s->pauth_active) { 5188 tcg_rd = cpu_reg(s, rd); 5189 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5190 } 5191 break; 5192 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5193 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5194 goto do_unallocated; 5195 } else if (s->pauth_active) { 5196 tcg_rd = cpu_reg(s, rd); 5197 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5198 } 5199 break; 5200 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5201 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5202 goto do_unallocated; 5203 } else if (s->pauth_active) { 5204 tcg_rd = cpu_reg(s, rd); 5205 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5206 } 5207 break; 5208 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5209 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5210 goto do_unallocated; 5211 } else if (s->pauth_active) { 5212 tcg_rd = cpu_reg(s, rd); 5213 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5214 } 5215 break; 5216 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5217 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5218 goto do_unallocated; 5219 } else if (s->pauth_active) { 5220 tcg_rd = cpu_reg(s, rd); 5221 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5222 } 5223 break; 5224 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5225 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5226 goto do_unallocated; 5227 } else if (s->pauth_active) { 5228 tcg_rd = cpu_reg(s, rd); 5229 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5230 } 5231 break; 5232 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5233 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5234 goto do_unallocated; 5235 } else if (s->pauth_active) { 5236 tcg_rd = cpu_reg(s, rd); 5237 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5238 } 5239 break; 5240 case MAP(1, 0x01, 0x10): /* XPACI */ 5241 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5242 goto do_unallocated; 5243 } else if (s->pauth_active) { 5244 tcg_rd = cpu_reg(s, rd); 5245 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd); 5246 } 5247 break; 5248 case MAP(1, 0x01, 0x11): /* XPACD */ 5249 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5250 goto do_unallocated; 5251 } else if (s->pauth_active) { 5252 tcg_rd = cpu_reg(s, rd); 5253 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd); 5254 } 5255 break; 5256 default: 5257 do_unallocated: 5258 unallocated_encoding(s); 5259 break; 5260 } 5261 5262 #undef MAP 5263 } 5264 5265 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5266 unsigned int rm, unsigned int rn, unsigned int rd) 5267 { 5268 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5269 tcg_rd = cpu_reg(s, rd); 5270 5271 if (!sf && is_signed) { 5272 tcg_n = tcg_temp_new_i64(); 5273 tcg_m = tcg_temp_new_i64(); 5274 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5275 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5276 } else { 5277 tcg_n = read_cpu_reg(s, rn, sf); 5278 tcg_m = read_cpu_reg(s, rm, sf); 5279 } 5280 5281 if (is_signed) { 5282 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5283 } else { 5284 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5285 } 5286 5287 if (!sf) { /* zero extend final result */ 5288 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5289 } 5290 } 5291 5292 /* LSLV, LSRV, ASRV, RORV */ 5293 static void handle_shift_reg(DisasContext *s, 5294 enum a64_shift_type shift_type, unsigned int sf, 5295 unsigned int rm, unsigned int rn, unsigned int rd) 5296 { 5297 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5298 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5299 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5300 5301 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5302 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5303 } 5304 5305 /* CRC32[BHWX], CRC32C[BHWX] */ 5306 static void handle_crc32(DisasContext *s, 5307 unsigned int sf, unsigned int sz, bool crc32c, 5308 unsigned int rm, unsigned int rn, unsigned int rd) 5309 { 5310 TCGv_i64 tcg_acc, tcg_val; 5311 TCGv_i32 tcg_bytes; 5312 5313 if (!dc_isar_feature(aa64_crc32, s) 5314 || (sf == 1 && sz != 3) 5315 || (sf == 0 && sz == 3)) { 5316 unallocated_encoding(s); 5317 return; 5318 } 5319 5320 if (sz == 3) { 5321 tcg_val = cpu_reg(s, rm); 5322 } else { 5323 uint64_t mask; 5324 switch (sz) { 5325 case 0: 5326 mask = 0xFF; 5327 break; 5328 case 1: 5329 mask = 0xFFFF; 5330 break; 5331 case 2: 5332 mask = 0xFFFFFFFF; 5333 break; 5334 default: 5335 g_assert_not_reached(); 5336 } 5337 tcg_val = tcg_temp_new_i64(); 5338 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5339 } 5340 5341 tcg_acc = cpu_reg(s, rn); 5342 tcg_bytes = tcg_constant_i32(1 << sz); 5343 5344 if (crc32c) { 5345 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5346 } else { 5347 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5348 } 5349 } 5350 5351 /* Data-processing (2 source) 5352 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5353 * +----+---+---+-----------------+------+--------+------+------+ 5354 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5355 * +----+---+---+-----------------+------+--------+------+------+ 5356 */ 5357 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5358 { 5359 unsigned int sf, rm, opcode, rn, rd, setflag; 5360 sf = extract32(insn, 31, 1); 5361 setflag = extract32(insn, 29, 1); 5362 rm = extract32(insn, 16, 5); 5363 opcode = extract32(insn, 10, 6); 5364 rn = extract32(insn, 5, 5); 5365 rd = extract32(insn, 0, 5); 5366 5367 if (setflag && opcode != 0) { 5368 unallocated_encoding(s); 5369 return; 5370 } 5371 5372 switch (opcode) { 5373 case 0: /* SUBP(S) */ 5374 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5375 goto do_unallocated; 5376 } else { 5377 TCGv_i64 tcg_n, tcg_m, tcg_d; 5378 5379 tcg_n = read_cpu_reg_sp(s, rn, true); 5380 tcg_m = read_cpu_reg_sp(s, rm, true); 5381 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5382 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5383 tcg_d = cpu_reg(s, rd); 5384 5385 if (setflag) { 5386 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5387 } else { 5388 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5389 } 5390 } 5391 break; 5392 case 2: /* UDIV */ 5393 handle_div(s, false, sf, rm, rn, rd); 5394 break; 5395 case 3: /* SDIV */ 5396 handle_div(s, true, sf, rm, rn, rd); 5397 break; 5398 case 4: /* IRG */ 5399 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5400 goto do_unallocated; 5401 } 5402 if (s->ata) { 5403 gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, 5404 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5405 } else { 5406 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5407 cpu_reg_sp(s, rn)); 5408 } 5409 break; 5410 case 5: /* GMI */ 5411 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5412 goto do_unallocated; 5413 } else { 5414 TCGv_i64 t = tcg_temp_new_i64(); 5415 5416 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5417 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5418 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5419 } 5420 break; 5421 case 8: /* LSLV */ 5422 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5423 break; 5424 case 9: /* LSRV */ 5425 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5426 break; 5427 case 10: /* ASRV */ 5428 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5429 break; 5430 case 11: /* RORV */ 5431 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5432 break; 5433 case 12: /* PACGA */ 5434 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5435 goto do_unallocated; 5436 } 5437 gen_helper_pacga(cpu_reg(s, rd), cpu_env, 5438 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5439 break; 5440 case 16: 5441 case 17: 5442 case 18: 5443 case 19: 5444 case 20: 5445 case 21: 5446 case 22: 5447 case 23: /* CRC32 */ 5448 { 5449 int sz = extract32(opcode, 0, 2); 5450 bool crc32c = extract32(opcode, 2, 1); 5451 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5452 break; 5453 } 5454 default: 5455 do_unallocated: 5456 unallocated_encoding(s); 5457 break; 5458 } 5459 } 5460 5461 /* 5462 * Data processing - register 5463 * 31 30 29 28 25 21 20 16 10 0 5464 * +--+---+--+---+-------+-----+-------+-------+---------+ 5465 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5466 * +--+---+--+---+-------+-----+-------+-------+---------+ 5467 */ 5468 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5469 { 5470 int op0 = extract32(insn, 30, 1); 5471 int op1 = extract32(insn, 28, 1); 5472 int op2 = extract32(insn, 21, 4); 5473 int op3 = extract32(insn, 10, 6); 5474 5475 if (!op1) { 5476 if (op2 & 8) { 5477 if (op2 & 1) { 5478 /* Add/sub (extended register) */ 5479 disas_add_sub_ext_reg(s, insn); 5480 } else { 5481 /* Add/sub (shifted register) */ 5482 disas_add_sub_reg(s, insn); 5483 } 5484 } else { 5485 /* Logical (shifted register) */ 5486 disas_logic_reg(s, insn); 5487 } 5488 return; 5489 } 5490 5491 switch (op2) { 5492 case 0x0: 5493 switch (op3) { 5494 case 0x00: /* Add/subtract (with carry) */ 5495 disas_adc_sbc(s, insn); 5496 break; 5497 5498 case 0x01: /* Rotate right into flags */ 5499 case 0x21: 5500 disas_rotate_right_into_flags(s, insn); 5501 break; 5502 5503 case 0x02: /* Evaluate into flags */ 5504 case 0x12: 5505 case 0x22: 5506 case 0x32: 5507 disas_evaluate_into_flags(s, insn); 5508 break; 5509 5510 default: 5511 goto do_unallocated; 5512 } 5513 break; 5514 5515 case 0x2: /* Conditional compare */ 5516 disas_cc(s, insn); /* both imm and reg forms */ 5517 break; 5518 5519 case 0x4: /* Conditional select */ 5520 disas_cond_select(s, insn); 5521 break; 5522 5523 case 0x6: /* Data-processing */ 5524 if (op0) { /* (1 source) */ 5525 disas_data_proc_1src(s, insn); 5526 } else { /* (2 source) */ 5527 disas_data_proc_2src(s, insn); 5528 } 5529 break; 5530 case 0x8 ... 0xf: /* (3 source) */ 5531 disas_data_proc_3src(s, insn); 5532 break; 5533 5534 default: 5535 do_unallocated: 5536 unallocated_encoding(s); 5537 break; 5538 } 5539 } 5540 5541 static void handle_fp_compare(DisasContext *s, int size, 5542 unsigned int rn, unsigned int rm, 5543 bool cmp_with_zero, bool signal_all_nans) 5544 { 5545 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 5546 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 5547 5548 if (size == MO_64) { 5549 TCGv_i64 tcg_vn, tcg_vm; 5550 5551 tcg_vn = read_fp_dreg(s, rn); 5552 if (cmp_with_zero) { 5553 tcg_vm = tcg_constant_i64(0); 5554 } else { 5555 tcg_vm = read_fp_dreg(s, rm); 5556 } 5557 if (signal_all_nans) { 5558 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5559 } else { 5560 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5561 } 5562 } else { 5563 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 5564 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 5565 5566 read_vec_element_i32(s, tcg_vn, rn, 0, size); 5567 if (cmp_with_zero) { 5568 tcg_gen_movi_i32(tcg_vm, 0); 5569 } else { 5570 read_vec_element_i32(s, tcg_vm, rm, 0, size); 5571 } 5572 5573 switch (size) { 5574 case MO_32: 5575 if (signal_all_nans) { 5576 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5577 } else { 5578 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5579 } 5580 break; 5581 case MO_16: 5582 if (signal_all_nans) { 5583 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5584 } else { 5585 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5586 } 5587 break; 5588 default: 5589 g_assert_not_reached(); 5590 } 5591 } 5592 5593 gen_set_nzcv(tcg_flags); 5594 } 5595 5596 /* Floating point compare 5597 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 5598 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5599 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 5600 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5601 */ 5602 static void disas_fp_compare(DisasContext *s, uint32_t insn) 5603 { 5604 unsigned int mos, type, rm, op, rn, opc, op2r; 5605 int size; 5606 5607 mos = extract32(insn, 29, 3); 5608 type = extract32(insn, 22, 2); 5609 rm = extract32(insn, 16, 5); 5610 op = extract32(insn, 14, 2); 5611 rn = extract32(insn, 5, 5); 5612 opc = extract32(insn, 3, 2); 5613 op2r = extract32(insn, 0, 3); 5614 5615 if (mos || op || op2r) { 5616 unallocated_encoding(s); 5617 return; 5618 } 5619 5620 switch (type) { 5621 case 0: 5622 size = MO_32; 5623 break; 5624 case 1: 5625 size = MO_64; 5626 break; 5627 case 3: 5628 size = MO_16; 5629 if (dc_isar_feature(aa64_fp16, s)) { 5630 break; 5631 } 5632 /* fallthru */ 5633 default: 5634 unallocated_encoding(s); 5635 return; 5636 } 5637 5638 if (!fp_access_check(s)) { 5639 return; 5640 } 5641 5642 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 5643 } 5644 5645 /* Floating point conditional compare 5646 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5647 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 5648 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 5649 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 5650 */ 5651 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 5652 { 5653 unsigned int mos, type, rm, cond, rn, op, nzcv; 5654 TCGLabel *label_continue = NULL; 5655 int size; 5656 5657 mos = extract32(insn, 29, 3); 5658 type = extract32(insn, 22, 2); 5659 rm = extract32(insn, 16, 5); 5660 cond = extract32(insn, 12, 4); 5661 rn = extract32(insn, 5, 5); 5662 op = extract32(insn, 4, 1); 5663 nzcv = extract32(insn, 0, 4); 5664 5665 if (mos) { 5666 unallocated_encoding(s); 5667 return; 5668 } 5669 5670 switch (type) { 5671 case 0: 5672 size = MO_32; 5673 break; 5674 case 1: 5675 size = MO_64; 5676 break; 5677 case 3: 5678 size = MO_16; 5679 if (dc_isar_feature(aa64_fp16, s)) { 5680 break; 5681 } 5682 /* fallthru */ 5683 default: 5684 unallocated_encoding(s); 5685 return; 5686 } 5687 5688 if (!fp_access_check(s)) { 5689 return; 5690 } 5691 5692 if (cond < 0x0e) { /* not always */ 5693 TCGLabel *label_match = gen_new_label(); 5694 label_continue = gen_new_label(); 5695 arm_gen_test_cc(cond, label_match); 5696 /* nomatch: */ 5697 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 5698 tcg_gen_br(label_continue); 5699 gen_set_label(label_match); 5700 } 5701 5702 handle_fp_compare(s, size, rn, rm, false, op); 5703 5704 if (cond < 0x0e) { 5705 gen_set_label(label_continue); 5706 } 5707 } 5708 5709 /* Floating point conditional select 5710 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 5711 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 5712 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 5713 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 5714 */ 5715 static void disas_fp_csel(DisasContext *s, uint32_t insn) 5716 { 5717 unsigned int mos, type, rm, cond, rn, rd; 5718 TCGv_i64 t_true, t_false; 5719 DisasCompare64 c; 5720 MemOp sz; 5721 5722 mos = extract32(insn, 29, 3); 5723 type = extract32(insn, 22, 2); 5724 rm = extract32(insn, 16, 5); 5725 cond = extract32(insn, 12, 4); 5726 rn = extract32(insn, 5, 5); 5727 rd = extract32(insn, 0, 5); 5728 5729 if (mos) { 5730 unallocated_encoding(s); 5731 return; 5732 } 5733 5734 switch (type) { 5735 case 0: 5736 sz = MO_32; 5737 break; 5738 case 1: 5739 sz = MO_64; 5740 break; 5741 case 3: 5742 sz = MO_16; 5743 if (dc_isar_feature(aa64_fp16, s)) { 5744 break; 5745 } 5746 /* fallthru */ 5747 default: 5748 unallocated_encoding(s); 5749 return; 5750 } 5751 5752 if (!fp_access_check(s)) { 5753 return; 5754 } 5755 5756 /* Zero extend sreg & hreg inputs to 64 bits now. */ 5757 t_true = tcg_temp_new_i64(); 5758 t_false = tcg_temp_new_i64(); 5759 read_vec_element(s, t_true, rn, 0, sz); 5760 read_vec_element(s, t_false, rm, 0, sz); 5761 5762 a64_test_cc(&c, cond); 5763 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 5764 t_true, t_false); 5765 5766 /* Note that sregs & hregs write back zeros to the high bits, 5767 and we've already done the zero-extension. */ 5768 write_fp_dreg(s, rd, t_true); 5769 } 5770 5771 /* Floating-point data-processing (1 source) - half precision */ 5772 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 5773 { 5774 TCGv_ptr fpst = NULL; 5775 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 5776 TCGv_i32 tcg_res = tcg_temp_new_i32(); 5777 5778 switch (opcode) { 5779 case 0x0: /* FMOV */ 5780 tcg_gen_mov_i32(tcg_res, tcg_op); 5781 break; 5782 case 0x1: /* FABS */ 5783 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 5784 break; 5785 case 0x2: /* FNEG */ 5786 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 5787 break; 5788 case 0x3: /* FSQRT */ 5789 fpst = fpstatus_ptr(FPST_FPCR_F16); 5790 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 5791 break; 5792 case 0x8: /* FRINTN */ 5793 case 0x9: /* FRINTP */ 5794 case 0xa: /* FRINTM */ 5795 case 0xb: /* FRINTZ */ 5796 case 0xc: /* FRINTA */ 5797 { 5798 TCGv_i32 tcg_rmode; 5799 5800 fpst = fpstatus_ptr(FPST_FPCR_F16); 5801 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 5802 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 5803 gen_restore_rmode(tcg_rmode, fpst); 5804 break; 5805 } 5806 case 0xe: /* FRINTX */ 5807 fpst = fpstatus_ptr(FPST_FPCR_F16); 5808 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 5809 break; 5810 case 0xf: /* FRINTI */ 5811 fpst = fpstatus_ptr(FPST_FPCR_F16); 5812 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 5813 break; 5814 default: 5815 g_assert_not_reached(); 5816 } 5817 5818 write_fp_sreg(s, rd, tcg_res); 5819 } 5820 5821 /* Floating-point data-processing (1 source) - single precision */ 5822 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 5823 { 5824 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 5825 TCGv_i32 tcg_op, tcg_res; 5826 TCGv_ptr fpst; 5827 int rmode = -1; 5828 5829 tcg_op = read_fp_sreg(s, rn); 5830 tcg_res = tcg_temp_new_i32(); 5831 5832 switch (opcode) { 5833 case 0x0: /* FMOV */ 5834 tcg_gen_mov_i32(tcg_res, tcg_op); 5835 goto done; 5836 case 0x1: /* FABS */ 5837 gen_helper_vfp_abss(tcg_res, tcg_op); 5838 goto done; 5839 case 0x2: /* FNEG */ 5840 gen_helper_vfp_negs(tcg_res, tcg_op); 5841 goto done; 5842 case 0x3: /* FSQRT */ 5843 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 5844 goto done; 5845 case 0x6: /* BFCVT */ 5846 gen_fpst = gen_helper_bfcvt; 5847 break; 5848 case 0x8: /* FRINTN */ 5849 case 0x9: /* FRINTP */ 5850 case 0xa: /* FRINTM */ 5851 case 0xb: /* FRINTZ */ 5852 case 0xc: /* FRINTA */ 5853 rmode = opcode & 7; 5854 gen_fpst = gen_helper_rints; 5855 break; 5856 case 0xe: /* FRINTX */ 5857 gen_fpst = gen_helper_rints_exact; 5858 break; 5859 case 0xf: /* FRINTI */ 5860 gen_fpst = gen_helper_rints; 5861 break; 5862 case 0x10: /* FRINT32Z */ 5863 rmode = FPROUNDING_ZERO; 5864 gen_fpst = gen_helper_frint32_s; 5865 break; 5866 case 0x11: /* FRINT32X */ 5867 gen_fpst = gen_helper_frint32_s; 5868 break; 5869 case 0x12: /* FRINT64Z */ 5870 rmode = FPROUNDING_ZERO; 5871 gen_fpst = gen_helper_frint64_s; 5872 break; 5873 case 0x13: /* FRINT64X */ 5874 gen_fpst = gen_helper_frint64_s; 5875 break; 5876 default: 5877 g_assert_not_reached(); 5878 } 5879 5880 fpst = fpstatus_ptr(FPST_FPCR); 5881 if (rmode >= 0) { 5882 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 5883 gen_fpst(tcg_res, tcg_op, fpst); 5884 gen_restore_rmode(tcg_rmode, fpst); 5885 } else { 5886 gen_fpst(tcg_res, tcg_op, fpst); 5887 } 5888 5889 done: 5890 write_fp_sreg(s, rd, tcg_res); 5891 } 5892 5893 /* Floating-point data-processing (1 source) - double precision */ 5894 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 5895 { 5896 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 5897 TCGv_i64 tcg_op, tcg_res; 5898 TCGv_ptr fpst; 5899 int rmode = -1; 5900 5901 switch (opcode) { 5902 case 0x0: /* FMOV */ 5903 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 5904 return; 5905 } 5906 5907 tcg_op = read_fp_dreg(s, rn); 5908 tcg_res = tcg_temp_new_i64(); 5909 5910 switch (opcode) { 5911 case 0x1: /* FABS */ 5912 gen_helper_vfp_absd(tcg_res, tcg_op); 5913 goto done; 5914 case 0x2: /* FNEG */ 5915 gen_helper_vfp_negd(tcg_res, tcg_op); 5916 goto done; 5917 case 0x3: /* FSQRT */ 5918 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); 5919 goto done; 5920 case 0x8: /* FRINTN */ 5921 case 0x9: /* FRINTP */ 5922 case 0xa: /* FRINTM */ 5923 case 0xb: /* FRINTZ */ 5924 case 0xc: /* FRINTA */ 5925 rmode = opcode & 7; 5926 gen_fpst = gen_helper_rintd; 5927 break; 5928 case 0xe: /* FRINTX */ 5929 gen_fpst = gen_helper_rintd_exact; 5930 break; 5931 case 0xf: /* FRINTI */ 5932 gen_fpst = gen_helper_rintd; 5933 break; 5934 case 0x10: /* FRINT32Z */ 5935 rmode = FPROUNDING_ZERO; 5936 gen_fpst = gen_helper_frint32_d; 5937 break; 5938 case 0x11: /* FRINT32X */ 5939 gen_fpst = gen_helper_frint32_d; 5940 break; 5941 case 0x12: /* FRINT64Z */ 5942 rmode = FPROUNDING_ZERO; 5943 gen_fpst = gen_helper_frint64_d; 5944 break; 5945 case 0x13: /* FRINT64X */ 5946 gen_fpst = gen_helper_frint64_d; 5947 break; 5948 default: 5949 g_assert_not_reached(); 5950 } 5951 5952 fpst = fpstatus_ptr(FPST_FPCR); 5953 if (rmode >= 0) { 5954 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 5955 gen_fpst(tcg_res, tcg_op, fpst); 5956 gen_restore_rmode(tcg_rmode, fpst); 5957 } else { 5958 gen_fpst(tcg_res, tcg_op, fpst); 5959 } 5960 5961 done: 5962 write_fp_dreg(s, rd, tcg_res); 5963 } 5964 5965 static void handle_fp_fcvt(DisasContext *s, int opcode, 5966 int rd, int rn, int dtype, int ntype) 5967 { 5968 switch (ntype) { 5969 case 0x0: 5970 { 5971 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 5972 if (dtype == 1) { 5973 /* Single to double */ 5974 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 5975 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); 5976 write_fp_dreg(s, rd, tcg_rd); 5977 } else { 5978 /* Single to half */ 5979 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 5980 TCGv_i32 ahp = get_ahp_flag(); 5981 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 5982 5983 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 5984 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 5985 write_fp_sreg(s, rd, tcg_rd); 5986 } 5987 break; 5988 } 5989 case 0x1: 5990 { 5991 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 5992 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 5993 if (dtype == 0) { 5994 /* Double to single */ 5995 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); 5996 } else { 5997 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 5998 TCGv_i32 ahp = get_ahp_flag(); 5999 /* Double to half */ 6000 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6001 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6002 } 6003 write_fp_sreg(s, rd, tcg_rd); 6004 break; 6005 } 6006 case 0x3: 6007 { 6008 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6009 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6010 TCGv_i32 tcg_ahp = get_ahp_flag(); 6011 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6012 if (dtype == 0) { 6013 /* Half to single */ 6014 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6015 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6016 write_fp_sreg(s, rd, tcg_rd); 6017 } else { 6018 /* Half to double */ 6019 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6020 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6021 write_fp_dreg(s, rd, tcg_rd); 6022 } 6023 break; 6024 } 6025 default: 6026 g_assert_not_reached(); 6027 } 6028 } 6029 6030 /* Floating point data-processing (1 source) 6031 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6032 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6033 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6034 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6035 */ 6036 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6037 { 6038 int mos = extract32(insn, 29, 3); 6039 int type = extract32(insn, 22, 2); 6040 int opcode = extract32(insn, 15, 6); 6041 int rn = extract32(insn, 5, 5); 6042 int rd = extract32(insn, 0, 5); 6043 6044 if (mos) { 6045 goto do_unallocated; 6046 } 6047 6048 switch (opcode) { 6049 case 0x4: case 0x5: case 0x7: 6050 { 6051 /* FCVT between half, single and double precision */ 6052 int dtype = extract32(opcode, 0, 2); 6053 if (type == 2 || dtype == type) { 6054 goto do_unallocated; 6055 } 6056 if (!fp_access_check(s)) { 6057 return; 6058 } 6059 6060 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6061 break; 6062 } 6063 6064 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6065 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6066 goto do_unallocated; 6067 } 6068 /* fall through */ 6069 case 0x0 ... 0x3: 6070 case 0x8 ... 0xc: 6071 case 0xe ... 0xf: 6072 /* 32-to-32 and 64-to-64 ops */ 6073 switch (type) { 6074 case 0: 6075 if (!fp_access_check(s)) { 6076 return; 6077 } 6078 handle_fp_1src_single(s, opcode, rd, rn); 6079 break; 6080 case 1: 6081 if (!fp_access_check(s)) { 6082 return; 6083 } 6084 handle_fp_1src_double(s, opcode, rd, rn); 6085 break; 6086 case 3: 6087 if (!dc_isar_feature(aa64_fp16, s)) { 6088 goto do_unallocated; 6089 } 6090 6091 if (!fp_access_check(s)) { 6092 return; 6093 } 6094 handle_fp_1src_half(s, opcode, rd, rn); 6095 break; 6096 default: 6097 goto do_unallocated; 6098 } 6099 break; 6100 6101 case 0x6: 6102 switch (type) { 6103 case 1: /* BFCVT */ 6104 if (!dc_isar_feature(aa64_bf16, s)) { 6105 goto do_unallocated; 6106 } 6107 if (!fp_access_check(s)) { 6108 return; 6109 } 6110 handle_fp_1src_single(s, opcode, rd, rn); 6111 break; 6112 default: 6113 goto do_unallocated; 6114 } 6115 break; 6116 6117 default: 6118 do_unallocated: 6119 unallocated_encoding(s); 6120 break; 6121 } 6122 } 6123 6124 /* Floating-point data-processing (2 source) - single precision */ 6125 static void handle_fp_2src_single(DisasContext *s, int opcode, 6126 int rd, int rn, int rm) 6127 { 6128 TCGv_i32 tcg_op1; 6129 TCGv_i32 tcg_op2; 6130 TCGv_i32 tcg_res; 6131 TCGv_ptr fpst; 6132 6133 tcg_res = tcg_temp_new_i32(); 6134 fpst = fpstatus_ptr(FPST_FPCR); 6135 tcg_op1 = read_fp_sreg(s, rn); 6136 tcg_op2 = read_fp_sreg(s, rm); 6137 6138 switch (opcode) { 6139 case 0x0: /* FMUL */ 6140 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6141 break; 6142 case 0x1: /* FDIV */ 6143 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6144 break; 6145 case 0x2: /* FADD */ 6146 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6147 break; 6148 case 0x3: /* FSUB */ 6149 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6150 break; 6151 case 0x4: /* FMAX */ 6152 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6153 break; 6154 case 0x5: /* FMIN */ 6155 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6156 break; 6157 case 0x6: /* FMAXNM */ 6158 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6159 break; 6160 case 0x7: /* FMINNM */ 6161 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6162 break; 6163 case 0x8: /* FNMUL */ 6164 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6165 gen_helper_vfp_negs(tcg_res, tcg_res); 6166 break; 6167 } 6168 6169 write_fp_sreg(s, rd, tcg_res); 6170 } 6171 6172 /* Floating-point data-processing (2 source) - double precision */ 6173 static void handle_fp_2src_double(DisasContext *s, int opcode, 6174 int rd, int rn, int rm) 6175 { 6176 TCGv_i64 tcg_op1; 6177 TCGv_i64 tcg_op2; 6178 TCGv_i64 tcg_res; 6179 TCGv_ptr fpst; 6180 6181 tcg_res = tcg_temp_new_i64(); 6182 fpst = fpstatus_ptr(FPST_FPCR); 6183 tcg_op1 = read_fp_dreg(s, rn); 6184 tcg_op2 = read_fp_dreg(s, rm); 6185 6186 switch (opcode) { 6187 case 0x0: /* FMUL */ 6188 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6189 break; 6190 case 0x1: /* FDIV */ 6191 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6192 break; 6193 case 0x2: /* FADD */ 6194 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6195 break; 6196 case 0x3: /* FSUB */ 6197 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6198 break; 6199 case 0x4: /* FMAX */ 6200 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6201 break; 6202 case 0x5: /* FMIN */ 6203 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6204 break; 6205 case 0x6: /* FMAXNM */ 6206 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6207 break; 6208 case 0x7: /* FMINNM */ 6209 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6210 break; 6211 case 0x8: /* FNMUL */ 6212 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6213 gen_helper_vfp_negd(tcg_res, tcg_res); 6214 break; 6215 } 6216 6217 write_fp_dreg(s, rd, tcg_res); 6218 } 6219 6220 /* Floating-point data-processing (2 source) - half precision */ 6221 static void handle_fp_2src_half(DisasContext *s, int opcode, 6222 int rd, int rn, int rm) 6223 { 6224 TCGv_i32 tcg_op1; 6225 TCGv_i32 tcg_op2; 6226 TCGv_i32 tcg_res; 6227 TCGv_ptr fpst; 6228 6229 tcg_res = tcg_temp_new_i32(); 6230 fpst = fpstatus_ptr(FPST_FPCR_F16); 6231 tcg_op1 = read_fp_hreg(s, rn); 6232 tcg_op2 = read_fp_hreg(s, rm); 6233 6234 switch (opcode) { 6235 case 0x0: /* FMUL */ 6236 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6237 break; 6238 case 0x1: /* FDIV */ 6239 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6240 break; 6241 case 0x2: /* FADD */ 6242 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6243 break; 6244 case 0x3: /* FSUB */ 6245 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6246 break; 6247 case 0x4: /* FMAX */ 6248 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6249 break; 6250 case 0x5: /* FMIN */ 6251 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6252 break; 6253 case 0x6: /* FMAXNM */ 6254 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6255 break; 6256 case 0x7: /* FMINNM */ 6257 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6258 break; 6259 case 0x8: /* FNMUL */ 6260 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6261 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6262 break; 6263 default: 6264 g_assert_not_reached(); 6265 } 6266 6267 write_fp_sreg(s, rd, tcg_res); 6268 } 6269 6270 /* Floating point data-processing (2 source) 6271 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6272 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6273 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6274 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6275 */ 6276 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6277 { 6278 int mos = extract32(insn, 29, 3); 6279 int type = extract32(insn, 22, 2); 6280 int rd = extract32(insn, 0, 5); 6281 int rn = extract32(insn, 5, 5); 6282 int rm = extract32(insn, 16, 5); 6283 int opcode = extract32(insn, 12, 4); 6284 6285 if (opcode > 8 || mos) { 6286 unallocated_encoding(s); 6287 return; 6288 } 6289 6290 switch (type) { 6291 case 0: 6292 if (!fp_access_check(s)) { 6293 return; 6294 } 6295 handle_fp_2src_single(s, opcode, rd, rn, rm); 6296 break; 6297 case 1: 6298 if (!fp_access_check(s)) { 6299 return; 6300 } 6301 handle_fp_2src_double(s, opcode, rd, rn, rm); 6302 break; 6303 case 3: 6304 if (!dc_isar_feature(aa64_fp16, s)) { 6305 unallocated_encoding(s); 6306 return; 6307 } 6308 if (!fp_access_check(s)) { 6309 return; 6310 } 6311 handle_fp_2src_half(s, opcode, rd, rn, rm); 6312 break; 6313 default: 6314 unallocated_encoding(s); 6315 } 6316 } 6317 6318 /* Floating-point data-processing (3 source) - single precision */ 6319 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6320 int rd, int rn, int rm, int ra) 6321 { 6322 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6323 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6324 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6325 6326 tcg_op1 = read_fp_sreg(s, rn); 6327 tcg_op2 = read_fp_sreg(s, rm); 6328 tcg_op3 = read_fp_sreg(s, ra); 6329 6330 /* These are fused multiply-add, and must be done as one 6331 * floating point operation with no rounding between the 6332 * multiplication and addition steps. 6333 * NB that doing the negations here as separate steps is 6334 * correct : an input NaN should come out with its sign bit 6335 * flipped if it is a negated-input. 6336 */ 6337 if (o1 == true) { 6338 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6339 } 6340 6341 if (o0 != o1) { 6342 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6343 } 6344 6345 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6346 6347 write_fp_sreg(s, rd, tcg_res); 6348 } 6349 6350 /* Floating-point data-processing (3 source) - double precision */ 6351 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6352 int rd, int rn, int rm, int ra) 6353 { 6354 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6355 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6356 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6357 6358 tcg_op1 = read_fp_dreg(s, rn); 6359 tcg_op2 = read_fp_dreg(s, rm); 6360 tcg_op3 = read_fp_dreg(s, ra); 6361 6362 /* These are fused multiply-add, and must be done as one 6363 * floating point operation with no rounding between the 6364 * multiplication and addition steps. 6365 * NB that doing the negations here as separate steps is 6366 * correct : an input NaN should come out with its sign bit 6367 * flipped if it is a negated-input. 6368 */ 6369 if (o1 == true) { 6370 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6371 } 6372 6373 if (o0 != o1) { 6374 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6375 } 6376 6377 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6378 6379 write_fp_dreg(s, rd, tcg_res); 6380 } 6381 6382 /* Floating-point data-processing (3 source) - half precision */ 6383 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6384 int rd, int rn, int rm, int ra) 6385 { 6386 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6387 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6388 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6389 6390 tcg_op1 = read_fp_hreg(s, rn); 6391 tcg_op2 = read_fp_hreg(s, rm); 6392 tcg_op3 = read_fp_hreg(s, ra); 6393 6394 /* These are fused multiply-add, and must be done as one 6395 * floating point operation with no rounding between the 6396 * multiplication and addition steps. 6397 * NB that doing the negations here as separate steps is 6398 * correct : an input NaN should come out with its sign bit 6399 * flipped if it is a negated-input. 6400 */ 6401 if (o1 == true) { 6402 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6403 } 6404 6405 if (o0 != o1) { 6406 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6407 } 6408 6409 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6410 6411 write_fp_sreg(s, rd, tcg_res); 6412 } 6413 6414 /* Floating point data-processing (3 source) 6415 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6416 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6417 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6418 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6419 */ 6420 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6421 { 6422 int mos = extract32(insn, 29, 3); 6423 int type = extract32(insn, 22, 2); 6424 int rd = extract32(insn, 0, 5); 6425 int rn = extract32(insn, 5, 5); 6426 int ra = extract32(insn, 10, 5); 6427 int rm = extract32(insn, 16, 5); 6428 bool o0 = extract32(insn, 15, 1); 6429 bool o1 = extract32(insn, 21, 1); 6430 6431 if (mos) { 6432 unallocated_encoding(s); 6433 return; 6434 } 6435 6436 switch (type) { 6437 case 0: 6438 if (!fp_access_check(s)) { 6439 return; 6440 } 6441 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 6442 break; 6443 case 1: 6444 if (!fp_access_check(s)) { 6445 return; 6446 } 6447 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 6448 break; 6449 case 3: 6450 if (!dc_isar_feature(aa64_fp16, s)) { 6451 unallocated_encoding(s); 6452 return; 6453 } 6454 if (!fp_access_check(s)) { 6455 return; 6456 } 6457 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 6458 break; 6459 default: 6460 unallocated_encoding(s); 6461 } 6462 } 6463 6464 /* Floating point immediate 6465 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 6466 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6467 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 6468 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6469 */ 6470 static void disas_fp_imm(DisasContext *s, uint32_t insn) 6471 { 6472 int rd = extract32(insn, 0, 5); 6473 int imm5 = extract32(insn, 5, 5); 6474 int imm8 = extract32(insn, 13, 8); 6475 int type = extract32(insn, 22, 2); 6476 int mos = extract32(insn, 29, 3); 6477 uint64_t imm; 6478 MemOp sz; 6479 6480 if (mos || imm5) { 6481 unallocated_encoding(s); 6482 return; 6483 } 6484 6485 switch (type) { 6486 case 0: 6487 sz = MO_32; 6488 break; 6489 case 1: 6490 sz = MO_64; 6491 break; 6492 case 3: 6493 sz = MO_16; 6494 if (dc_isar_feature(aa64_fp16, s)) { 6495 break; 6496 } 6497 /* fallthru */ 6498 default: 6499 unallocated_encoding(s); 6500 return; 6501 } 6502 6503 if (!fp_access_check(s)) { 6504 return; 6505 } 6506 6507 imm = vfp_expand_imm(sz, imm8); 6508 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 6509 } 6510 6511 /* Handle floating point <=> fixed point conversions. Note that we can 6512 * also deal with fp <=> integer conversions as a special case (scale == 64) 6513 * OPTME: consider handling that special case specially or at least skipping 6514 * the call to scalbn in the helpers for zero shifts. 6515 */ 6516 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 6517 bool itof, int rmode, int scale, int sf, int type) 6518 { 6519 bool is_signed = !(opcode & 1); 6520 TCGv_ptr tcg_fpstatus; 6521 TCGv_i32 tcg_shift, tcg_single; 6522 TCGv_i64 tcg_double; 6523 6524 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 6525 6526 tcg_shift = tcg_constant_i32(64 - scale); 6527 6528 if (itof) { 6529 TCGv_i64 tcg_int = cpu_reg(s, rn); 6530 if (!sf) { 6531 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 6532 6533 if (is_signed) { 6534 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 6535 } else { 6536 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 6537 } 6538 6539 tcg_int = tcg_extend; 6540 } 6541 6542 switch (type) { 6543 case 1: /* float64 */ 6544 tcg_double = tcg_temp_new_i64(); 6545 if (is_signed) { 6546 gen_helper_vfp_sqtod(tcg_double, tcg_int, 6547 tcg_shift, tcg_fpstatus); 6548 } else { 6549 gen_helper_vfp_uqtod(tcg_double, tcg_int, 6550 tcg_shift, tcg_fpstatus); 6551 } 6552 write_fp_dreg(s, rd, tcg_double); 6553 break; 6554 6555 case 0: /* float32 */ 6556 tcg_single = tcg_temp_new_i32(); 6557 if (is_signed) { 6558 gen_helper_vfp_sqtos(tcg_single, tcg_int, 6559 tcg_shift, tcg_fpstatus); 6560 } else { 6561 gen_helper_vfp_uqtos(tcg_single, tcg_int, 6562 tcg_shift, tcg_fpstatus); 6563 } 6564 write_fp_sreg(s, rd, tcg_single); 6565 break; 6566 6567 case 3: /* float16 */ 6568 tcg_single = tcg_temp_new_i32(); 6569 if (is_signed) { 6570 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 6571 tcg_shift, tcg_fpstatus); 6572 } else { 6573 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 6574 tcg_shift, tcg_fpstatus); 6575 } 6576 write_fp_sreg(s, rd, tcg_single); 6577 break; 6578 6579 default: 6580 g_assert_not_reached(); 6581 } 6582 } else { 6583 TCGv_i64 tcg_int = cpu_reg(s, rd); 6584 TCGv_i32 tcg_rmode; 6585 6586 if (extract32(opcode, 2, 1)) { 6587 /* There are too many rounding modes to all fit into rmode, 6588 * so FCVTA[US] is a special case. 6589 */ 6590 rmode = FPROUNDING_TIEAWAY; 6591 } 6592 6593 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 6594 6595 switch (type) { 6596 case 1: /* float64 */ 6597 tcg_double = read_fp_dreg(s, rn); 6598 if (is_signed) { 6599 if (!sf) { 6600 gen_helper_vfp_tosld(tcg_int, tcg_double, 6601 tcg_shift, tcg_fpstatus); 6602 } else { 6603 gen_helper_vfp_tosqd(tcg_int, tcg_double, 6604 tcg_shift, tcg_fpstatus); 6605 } 6606 } else { 6607 if (!sf) { 6608 gen_helper_vfp_tould(tcg_int, tcg_double, 6609 tcg_shift, tcg_fpstatus); 6610 } else { 6611 gen_helper_vfp_touqd(tcg_int, tcg_double, 6612 tcg_shift, tcg_fpstatus); 6613 } 6614 } 6615 if (!sf) { 6616 tcg_gen_ext32u_i64(tcg_int, tcg_int); 6617 } 6618 break; 6619 6620 case 0: /* float32 */ 6621 tcg_single = read_fp_sreg(s, rn); 6622 if (sf) { 6623 if (is_signed) { 6624 gen_helper_vfp_tosqs(tcg_int, tcg_single, 6625 tcg_shift, tcg_fpstatus); 6626 } else { 6627 gen_helper_vfp_touqs(tcg_int, tcg_single, 6628 tcg_shift, tcg_fpstatus); 6629 } 6630 } else { 6631 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6632 if (is_signed) { 6633 gen_helper_vfp_tosls(tcg_dest, tcg_single, 6634 tcg_shift, tcg_fpstatus); 6635 } else { 6636 gen_helper_vfp_touls(tcg_dest, tcg_single, 6637 tcg_shift, tcg_fpstatus); 6638 } 6639 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6640 } 6641 break; 6642 6643 case 3: /* float16 */ 6644 tcg_single = read_fp_sreg(s, rn); 6645 if (sf) { 6646 if (is_signed) { 6647 gen_helper_vfp_tosqh(tcg_int, tcg_single, 6648 tcg_shift, tcg_fpstatus); 6649 } else { 6650 gen_helper_vfp_touqh(tcg_int, tcg_single, 6651 tcg_shift, tcg_fpstatus); 6652 } 6653 } else { 6654 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6655 if (is_signed) { 6656 gen_helper_vfp_toslh(tcg_dest, tcg_single, 6657 tcg_shift, tcg_fpstatus); 6658 } else { 6659 gen_helper_vfp_toulh(tcg_dest, tcg_single, 6660 tcg_shift, tcg_fpstatus); 6661 } 6662 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6663 } 6664 break; 6665 6666 default: 6667 g_assert_not_reached(); 6668 } 6669 6670 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 6671 } 6672 } 6673 6674 /* Floating point <-> fixed point conversions 6675 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 6676 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 6677 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 6678 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 6679 */ 6680 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 6681 { 6682 int rd = extract32(insn, 0, 5); 6683 int rn = extract32(insn, 5, 5); 6684 int scale = extract32(insn, 10, 6); 6685 int opcode = extract32(insn, 16, 3); 6686 int rmode = extract32(insn, 19, 2); 6687 int type = extract32(insn, 22, 2); 6688 bool sbit = extract32(insn, 29, 1); 6689 bool sf = extract32(insn, 31, 1); 6690 bool itof; 6691 6692 if (sbit || (!sf && scale < 32)) { 6693 unallocated_encoding(s); 6694 return; 6695 } 6696 6697 switch (type) { 6698 case 0: /* float32 */ 6699 case 1: /* float64 */ 6700 break; 6701 case 3: /* float16 */ 6702 if (dc_isar_feature(aa64_fp16, s)) { 6703 break; 6704 } 6705 /* fallthru */ 6706 default: 6707 unallocated_encoding(s); 6708 return; 6709 } 6710 6711 switch ((rmode << 3) | opcode) { 6712 case 0x2: /* SCVTF */ 6713 case 0x3: /* UCVTF */ 6714 itof = true; 6715 break; 6716 case 0x18: /* FCVTZS */ 6717 case 0x19: /* FCVTZU */ 6718 itof = false; 6719 break; 6720 default: 6721 unallocated_encoding(s); 6722 return; 6723 } 6724 6725 if (!fp_access_check(s)) { 6726 return; 6727 } 6728 6729 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 6730 } 6731 6732 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 6733 { 6734 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 6735 * without conversion. 6736 */ 6737 6738 if (itof) { 6739 TCGv_i64 tcg_rn = cpu_reg(s, rn); 6740 TCGv_i64 tmp; 6741 6742 switch (type) { 6743 case 0: 6744 /* 32 bit */ 6745 tmp = tcg_temp_new_i64(); 6746 tcg_gen_ext32u_i64(tmp, tcg_rn); 6747 write_fp_dreg(s, rd, tmp); 6748 break; 6749 case 1: 6750 /* 64 bit */ 6751 write_fp_dreg(s, rd, tcg_rn); 6752 break; 6753 case 2: 6754 /* 64 bit to top half. */ 6755 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); 6756 clear_vec_high(s, true, rd); 6757 break; 6758 case 3: 6759 /* 16 bit */ 6760 tmp = tcg_temp_new_i64(); 6761 tcg_gen_ext16u_i64(tmp, tcg_rn); 6762 write_fp_dreg(s, rd, tmp); 6763 break; 6764 default: 6765 g_assert_not_reached(); 6766 } 6767 } else { 6768 TCGv_i64 tcg_rd = cpu_reg(s, rd); 6769 6770 switch (type) { 6771 case 0: 6772 /* 32 bit */ 6773 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); 6774 break; 6775 case 1: 6776 /* 64 bit */ 6777 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); 6778 break; 6779 case 2: 6780 /* 64 bits from top half */ 6781 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); 6782 break; 6783 case 3: 6784 /* 16 bit */ 6785 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); 6786 break; 6787 default: 6788 g_assert_not_reached(); 6789 } 6790 } 6791 } 6792 6793 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 6794 { 6795 TCGv_i64 t = read_fp_dreg(s, rn); 6796 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 6797 6798 gen_helper_fjcvtzs(t, t, fpstatus); 6799 6800 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 6801 tcg_gen_extrh_i64_i32(cpu_ZF, t); 6802 tcg_gen_movi_i32(cpu_CF, 0); 6803 tcg_gen_movi_i32(cpu_NF, 0); 6804 tcg_gen_movi_i32(cpu_VF, 0); 6805 } 6806 6807 /* Floating point <-> integer conversions 6808 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 6809 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 6810 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 6811 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 6812 */ 6813 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 6814 { 6815 int rd = extract32(insn, 0, 5); 6816 int rn = extract32(insn, 5, 5); 6817 int opcode = extract32(insn, 16, 3); 6818 int rmode = extract32(insn, 19, 2); 6819 int type = extract32(insn, 22, 2); 6820 bool sbit = extract32(insn, 29, 1); 6821 bool sf = extract32(insn, 31, 1); 6822 bool itof = false; 6823 6824 if (sbit) { 6825 goto do_unallocated; 6826 } 6827 6828 switch (opcode) { 6829 case 2: /* SCVTF */ 6830 case 3: /* UCVTF */ 6831 itof = true; 6832 /* fallthru */ 6833 case 4: /* FCVTAS */ 6834 case 5: /* FCVTAU */ 6835 if (rmode != 0) { 6836 goto do_unallocated; 6837 } 6838 /* fallthru */ 6839 case 0: /* FCVT[NPMZ]S */ 6840 case 1: /* FCVT[NPMZ]U */ 6841 switch (type) { 6842 case 0: /* float32 */ 6843 case 1: /* float64 */ 6844 break; 6845 case 3: /* float16 */ 6846 if (!dc_isar_feature(aa64_fp16, s)) { 6847 goto do_unallocated; 6848 } 6849 break; 6850 default: 6851 goto do_unallocated; 6852 } 6853 if (!fp_access_check(s)) { 6854 return; 6855 } 6856 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 6857 break; 6858 6859 default: 6860 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 6861 case 0b01100110: /* FMOV half <-> 32-bit int */ 6862 case 0b01100111: 6863 case 0b11100110: /* FMOV half <-> 64-bit int */ 6864 case 0b11100111: 6865 if (!dc_isar_feature(aa64_fp16, s)) { 6866 goto do_unallocated; 6867 } 6868 /* fallthru */ 6869 case 0b00000110: /* FMOV 32-bit */ 6870 case 0b00000111: 6871 case 0b10100110: /* FMOV 64-bit */ 6872 case 0b10100111: 6873 case 0b11001110: /* FMOV top half of 128-bit */ 6874 case 0b11001111: 6875 if (!fp_access_check(s)) { 6876 return; 6877 } 6878 itof = opcode & 1; 6879 handle_fmov(s, rd, rn, type, itof); 6880 break; 6881 6882 case 0b00111110: /* FJCVTZS */ 6883 if (!dc_isar_feature(aa64_jscvt, s)) { 6884 goto do_unallocated; 6885 } else if (fp_access_check(s)) { 6886 handle_fjcvtzs(s, rd, rn); 6887 } 6888 break; 6889 6890 default: 6891 do_unallocated: 6892 unallocated_encoding(s); 6893 return; 6894 } 6895 break; 6896 } 6897 } 6898 6899 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 6900 * 31 30 29 28 25 24 0 6901 * +---+---+---+---------+-----------------------------+ 6902 * | | 0 | | 1 1 1 1 | | 6903 * +---+---+---+---------+-----------------------------+ 6904 */ 6905 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 6906 { 6907 if (extract32(insn, 24, 1)) { 6908 /* Floating point data-processing (3 source) */ 6909 disas_fp_3src(s, insn); 6910 } else if (extract32(insn, 21, 1) == 0) { 6911 /* Floating point to fixed point conversions */ 6912 disas_fp_fixed_conv(s, insn); 6913 } else { 6914 switch (extract32(insn, 10, 2)) { 6915 case 1: 6916 /* Floating point conditional compare */ 6917 disas_fp_ccomp(s, insn); 6918 break; 6919 case 2: 6920 /* Floating point data-processing (2 source) */ 6921 disas_fp_2src(s, insn); 6922 break; 6923 case 3: 6924 /* Floating point conditional select */ 6925 disas_fp_csel(s, insn); 6926 break; 6927 case 0: 6928 switch (ctz32(extract32(insn, 12, 4))) { 6929 case 0: /* [15:12] == xxx1 */ 6930 /* Floating point immediate */ 6931 disas_fp_imm(s, insn); 6932 break; 6933 case 1: /* [15:12] == xx10 */ 6934 /* Floating point compare */ 6935 disas_fp_compare(s, insn); 6936 break; 6937 case 2: /* [15:12] == x100 */ 6938 /* Floating point data-processing (1 source) */ 6939 disas_fp_1src(s, insn); 6940 break; 6941 case 3: /* [15:12] == 1000 */ 6942 unallocated_encoding(s); 6943 break; 6944 default: /* [15:12] == 0000 */ 6945 /* Floating point <-> integer conversions */ 6946 disas_fp_int_conv(s, insn); 6947 break; 6948 } 6949 break; 6950 } 6951 } 6952 } 6953 6954 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 6955 int pos) 6956 { 6957 /* Extract 64 bits from the middle of two concatenated 64 bit 6958 * vector register slices left:right. The extracted bits start 6959 * at 'pos' bits into the right (least significant) side. 6960 * We return the result in tcg_right, and guarantee not to 6961 * trash tcg_left. 6962 */ 6963 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 6964 assert(pos > 0 && pos < 64); 6965 6966 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 6967 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 6968 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 6969 } 6970 6971 /* EXT 6972 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 6973 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 6974 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 6975 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 6976 */ 6977 static void disas_simd_ext(DisasContext *s, uint32_t insn) 6978 { 6979 int is_q = extract32(insn, 30, 1); 6980 int op2 = extract32(insn, 22, 2); 6981 int imm4 = extract32(insn, 11, 4); 6982 int rm = extract32(insn, 16, 5); 6983 int rn = extract32(insn, 5, 5); 6984 int rd = extract32(insn, 0, 5); 6985 int pos = imm4 << 3; 6986 TCGv_i64 tcg_resl, tcg_resh; 6987 6988 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 6989 unallocated_encoding(s); 6990 return; 6991 } 6992 6993 if (!fp_access_check(s)) { 6994 return; 6995 } 6996 6997 tcg_resh = tcg_temp_new_i64(); 6998 tcg_resl = tcg_temp_new_i64(); 6999 7000 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7001 * either extracting 128 bits from a 128:128 concatenation, or 7002 * extracting 64 bits from a 64:64 concatenation. 7003 */ 7004 if (!is_q) { 7005 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7006 if (pos != 0) { 7007 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7008 do_ext64(s, tcg_resh, tcg_resl, pos); 7009 } 7010 } else { 7011 TCGv_i64 tcg_hh; 7012 typedef struct { 7013 int reg; 7014 int elt; 7015 } EltPosns; 7016 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7017 EltPosns *elt = eltposns; 7018 7019 if (pos >= 64) { 7020 elt++; 7021 pos -= 64; 7022 } 7023 7024 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7025 elt++; 7026 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7027 elt++; 7028 if (pos != 0) { 7029 do_ext64(s, tcg_resh, tcg_resl, pos); 7030 tcg_hh = tcg_temp_new_i64(); 7031 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7032 do_ext64(s, tcg_hh, tcg_resh, pos); 7033 } 7034 } 7035 7036 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7037 if (is_q) { 7038 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7039 } 7040 clear_vec_high(s, is_q, rd); 7041 } 7042 7043 /* TBL/TBX 7044 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7045 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7046 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7047 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7048 */ 7049 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7050 { 7051 int op2 = extract32(insn, 22, 2); 7052 int is_q = extract32(insn, 30, 1); 7053 int rm = extract32(insn, 16, 5); 7054 int rn = extract32(insn, 5, 5); 7055 int rd = extract32(insn, 0, 5); 7056 int is_tbx = extract32(insn, 12, 1); 7057 int len = (extract32(insn, 13, 2) + 1) * 16; 7058 7059 if (op2 != 0) { 7060 unallocated_encoding(s); 7061 return; 7062 } 7063 7064 if (!fp_access_check(s)) { 7065 return; 7066 } 7067 7068 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7069 vec_full_reg_offset(s, rm), cpu_env, 7070 is_q ? 16 : 8, vec_full_reg_size(s), 7071 (len << 6) | (is_tbx << 5) | rn, 7072 gen_helper_simd_tblx); 7073 } 7074 7075 /* ZIP/UZP/TRN 7076 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7077 * +---+---+-------------+------+---+------+---+------------------+------+ 7078 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7079 * +---+---+-------------+------+---+------+---+------------------+------+ 7080 */ 7081 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7082 { 7083 int rd = extract32(insn, 0, 5); 7084 int rn = extract32(insn, 5, 5); 7085 int rm = extract32(insn, 16, 5); 7086 int size = extract32(insn, 22, 2); 7087 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7088 * bit 2 indicates 1 vs 2 variant of the insn. 7089 */ 7090 int opcode = extract32(insn, 12, 2); 7091 bool part = extract32(insn, 14, 1); 7092 bool is_q = extract32(insn, 30, 1); 7093 int esize = 8 << size; 7094 int i; 7095 int datasize = is_q ? 128 : 64; 7096 int elements = datasize / esize; 7097 TCGv_i64 tcg_res[2], tcg_ele; 7098 7099 if (opcode == 0 || (size == 3 && !is_q)) { 7100 unallocated_encoding(s); 7101 return; 7102 } 7103 7104 if (!fp_access_check(s)) { 7105 return; 7106 } 7107 7108 tcg_res[0] = tcg_temp_new_i64(); 7109 tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; 7110 tcg_ele = tcg_temp_new_i64(); 7111 7112 for (i = 0; i < elements; i++) { 7113 int o, w; 7114 7115 switch (opcode) { 7116 case 1: /* UZP1/2 */ 7117 { 7118 int midpoint = elements / 2; 7119 if (i < midpoint) { 7120 read_vec_element(s, tcg_ele, rn, 2 * i + part, size); 7121 } else { 7122 read_vec_element(s, tcg_ele, rm, 7123 2 * (i - midpoint) + part, size); 7124 } 7125 break; 7126 } 7127 case 2: /* TRN1/2 */ 7128 if (i & 1) { 7129 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); 7130 } else { 7131 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); 7132 } 7133 break; 7134 case 3: /* ZIP1/2 */ 7135 { 7136 int base = part * elements / 2; 7137 if (i & 1) { 7138 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); 7139 } else { 7140 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); 7141 } 7142 break; 7143 } 7144 default: 7145 g_assert_not_reached(); 7146 } 7147 7148 w = (i * esize) / 64; 7149 o = (i * esize) % 64; 7150 if (o == 0) { 7151 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 7152 } else { 7153 tcg_gen_shli_i64(tcg_ele, tcg_ele, o); 7154 tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); 7155 } 7156 } 7157 7158 for (i = 0; i <= is_q; ++i) { 7159 write_vec_element(s, tcg_res[i], rd, i, MO_64); 7160 } 7161 clear_vec_high(s, is_q, rd); 7162 } 7163 7164 /* 7165 * do_reduction_op helper 7166 * 7167 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7168 * important for correct NaN propagation that we do these 7169 * operations in exactly the order specified by the pseudocode. 7170 * 7171 * This is a recursive function, TCG temps should be freed by the 7172 * calling function once it is done with the values. 7173 */ 7174 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7175 int esize, int size, int vmap, TCGv_ptr fpst) 7176 { 7177 if (esize == size) { 7178 int element; 7179 MemOp msize = esize == 16 ? MO_16 : MO_32; 7180 TCGv_i32 tcg_elem; 7181 7182 /* We should have one register left here */ 7183 assert(ctpop8(vmap) == 1); 7184 element = ctz32(vmap); 7185 assert(element < 8); 7186 7187 tcg_elem = tcg_temp_new_i32(); 7188 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7189 return tcg_elem; 7190 } else { 7191 int bits = size / 2; 7192 int shift = ctpop8(vmap) / 2; 7193 int vmap_lo = (vmap >> shift) & vmap; 7194 int vmap_hi = (vmap & ~vmap_lo); 7195 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7196 7197 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7198 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7199 tcg_res = tcg_temp_new_i32(); 7200 7201 switch (fpopcode) { 7202 case 0x0c: /* fmaxnmv half-precision */ 7203 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7204 break; 7205 case 0x0f: /* fmaxv half-precision */ 7206 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7207 break; 7208 case 0x1c: /* fminnmv half-precision */ 7209 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7210 break; 7211 case 0x1f: /* fminv half-precision */ 7212 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7213 break; 7214 case 0x2c: /* fmaxnmv */ 7215 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7216 break; 7217 case 0x2f: /* fmaxv */ 7218 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7219 break; 7220 case 0x3c: /* fminnmv */ 7221 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7222 break; 7223 case 0x3f: /* fminv */ 7224 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7225 break; 7226 default: 7227 g_assert_not_reached(); 7228 } 7229 return tcg_res; 7230 } 7231 } 7232 7233 /* AdvSIMD across lanes 7234 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7235 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7236 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7237 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7238 */ 7239 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7240 { 7241 int rd = extract32(insn, 0, 5); 7242 int rn = extract32(insn, 5, 5); 7243 int size = extract32(insn, 22, 2); 7244 int opcode = extract32(insn, 12, 5); 7245 bool is_q = extract32(insn, 30, 1); 7246 bool is_u = extract32(insn, 29, 1); 7247 bool is_fp = false; 7248 bool is_min = false; 7249 int esize; 7250 int elements; 7251 int i; 7252 TCGv_i64 tcg_res, tcg_elt; 7253 7254 switch (opcode) { 7255 case 0x1b: /* ADDV */ 7256 if (is_u) { 7257 unallocated_encoding(s); 7258 return; 7259 } 7260 /* fall through */ 7261 case 0x3: /* SADDLV, UADDLV */ 7262 case 0xa: /* SMAXV, UMAXV */ 7263 case 0x1a: /* SMINV, UMINV */ 7264 if (size == 3 || (size == 2 && !is_q)) { 7265 unallocated_encoding(s); 7266 return; 7267 } 7268 break; 7269 case 0xc: /* FMAXNMV, FMINNMV */ 7270 case 0xf: /* FMAXV, FMINV */ 7271 /* Bit 1 of size field encodes min vs max and the actual size 7272 * depends on the encoding of the U bit. If not set (and FP16 7273 * enabled) then we do half-precision float instead of single 7274 * precision. 7275 */ 7276 is_min = extract32(size, 1, 1); 7277 is_fp = true; 7278 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7279 size = 1; 7280 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7281 unallocated_encoding(s); 7282 return; 7283 } else { 7284 size = 2; 7285 } 7286 break; 7287 default: 7288 unallocated_encoding(s); 7289 return; 7290 } 7291 7292 if (!fp_access_check(s)) { 7293 return; 7294 } 7295 7296 esize = 8 << size; 7297 elements = (is_q ? 128 : 64) / esize; 7298 7299 tcg_res = tcg_temp_new_i64(); 7300 tcg_elt = tcg_temp_new_i64(); 7301 7302 /* These instructions operate across all lanes of a vector 7303 * to produce a single result. We can guarantee that a 64 7304 * bit intermediate is sufficient: 7305 * + for [US]ADDLV the maximum element size is 32 bits, and 7306 * the result type is 64 bits 7307 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7308 * same as the element size, which is 32 bits at most 7309 * For the integer operations we can choose to work at 64 7310 * or 32 bits and truncate at the end; for simplicity 7311 * we use 64 bits always. The floating point 7312 * ops do require 32 bit intermediates, though. 7313 */ 7314 if (!is_fp) { 7315 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7316 7317 for (i = 1; i < elements; i++) { 7318 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7319 7320 switch (opcode) { 7321 case 0x03: /* SADDLV / UADDLV */ 7322 case 0x1b: /* ADDV */ 7323 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7324 break; 7325 case 0x0a: /* SMAXV / UMAXV */ 7326 if (is_u) { 7327 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7328 } else { 7329 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7330 } 7331 break; 7332 case 0x1a: /* SMINV / UMINV */ 7333 if (is_u) { 7334 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7335 } else { 7336 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7337 } 7338 break; 7339 default: 7340 g_assert_not_reached(); 7341 } 7342 7343 } 7344 } else { 7345 /* Floating point vector reduction ops which work across 32 7346 * bit (single) or 16 bit (half-precision) intermediates. 7347 * Note that correct NaN propagation requires that we do these 7348 * operations in exactly the order specified by the pseudocode. 7349 */ 7350 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7351 int fpopcode = opcode | is_min << 4 | is_u << 5; 7352 int vmap = (1 << elements) - 1; 7353 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7354 (is_q ? 128 : 64), vmap, fpst); 7355 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7356 } 7357 7358 /* Now truncate the result to the width required for the final output */ 7359 if (opcode == 0x03) { 7360 /* SADDLV, UADDLV: result is 2*esize */ 7361 size++; 7362 } 7363 7364 switch (size) { 7365 case 0: 7366 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7367 break; 7368 case 1: 7369 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7370 break; 7371 case 2: 7372 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7373 break; 7374 case 3: 7375 break; 7376 default: 7377 g_assert_not_reached(); 7378 } 7379 7380 write_fp_dreg(s, rd, tcg_res); 7381 } 7382 7383 /* DUP (Element, Vector) 7384 * 7385 * 31 30 29 21 20 16 15 10 9 5 4 0 7386 * +---+---+-------------------+--------+-------------+------+------+ 7387 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7388 * +---+---+-------------------+--------+-------------+------+------+ 7389 * 7390 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7391 */ 7392 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7393 int imm5) 7394 { 7395 int size = ctz32(imm5); 7396 int index; 7397 7398 if (size > 3 || (size == 3 && !is_q)) { 7399 unallocated_encoding(s); 7400 return; 7401 } 7402 7403 if (!fp_access_check(s)) { 7404 return; 7405 } 7406 7407 index = imm5 >> (size + 1); 7408 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 7409 vec_reg_offset(s, rn, index, size), 7410 is_q ? 16 : 8, vec_full_reg_size(s)); 7411 } 7412 7413 /* DUP (element, scalar) 7414 * 31 21 20 16 15 10 9 5 4 0 7415 * +-----------------------+--------+-------------+------+------+ 7416 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7417 * +-----------------------+--------+-------------+------+------+ 7418 */ 7419 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 7420 int imm5) 7421 { 7422 int size = ctz32(imm5); 7423 int index; 7424 TCGv_i64 tmp; 7425 7426 if (size > 3) { 7427 unallocated_encoding(s); 7428 return; 7429 } 7430 7431 if (!fp_access_check(s)) { 7432 return; 7433 } 7434 7435 index = imm5 >> (size + 1); 7436 7437 /* This instruction just extracts the specified element and 7438 * zero-extends it into the bottom of the destination register. 7439 */ 7440 tmp = tcg_temp_new_i64(); 7441 read_vec_element(s, tmp, rn, index, size); 7442 write_fp_dreg(s, rd, tmp); 7443 } 7444 7445 /* DUP (General) 7446 * 7447 * 31 30 29 21 20 16 15 10 9 5 4 0 7448 * +---+---+-------------------+--------+-------------+------+------+ 7449 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 7450 * +---+---+-------------------+--------+-------------+------+------+ 7451 * 7452 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7453 */ 7454 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 7455 int imm5) 7456 { 7457 int size = ctz32(imm5); 7458 uint32_t dofs, oprsz, maxsz; 7459 7460 if (size > 3 || ((size == 3) && !is_q)) { 7461 unallocated_encoding(s); 7462 return; 7463 } 7464 7465 if (!fp_access_check(s)) { 7466 return; 7467 } 7468 7469 dofs = vec_full_reg_offset(s, rd); 7470 oprsz = is_q ? 16 : 8; 7471 maxsz = vec_full_reg_size(s); 7472 7473 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 7474 } 7475 7476 /* INS (Element) 7477 * 7478 * 31 21 20 16 15 14 11 10 9 5 4 0 7479 * +-----------------------+--------+------------+---+------+------+ 7480 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7481 * +-----------------------+--------+------------+---+------+------+ 7482 * 7483 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7484 * index: encoded in imm5<4:size+1> 7485 */ 7486 static void handle_simd_inse(DisasContext *s, int rd, int rn, 7487 int imm4, int imm5) 7488 { 7489 int size = ctz32(imm5); 7490 int src_index, dst_index; 7491 TCGv_i64 tmp; 7492 7493 if (size > 3) { 7494 unallocated_encoding(s); 7495 return; 7496 } 7497 7498 if (!fp_access_check(s)) { 7499 return; 7500 } 7501 7502 dst_index = extract32(imm5, 1+size, 5); 7503 src_index = extract32(imm4, size, 4); 7504 7505 tmp = tcg_temp_new_i64(); 7506 7507 read_vec_element(s, tmp, rn, src_index, size); 7508 write_vec_element(s, tmp, rd, dst_index, size); 7509 7510 /* INS is considered a 128-bit write for SVE. */ 7511 clear_vec_high(s, true, rd); 7512 } 7513 7514 7515 /* INS (General) 7516 * 7517 * 31 21 20 16 15 10 9 5 4 0 7518 * +-----------------------+--------+-------------+------+------+ 7519 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 7520 * +-----------------------+--------+-------------+------+------+ 7521 * 7522 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7523 * index: encoded in imm5<4:size+1> 7524 */ 7525 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 7526 { 7527 int size = ctz32(imm5); 7528 int idx; 7529 7530 if (size > 3) { 7531 unallocated_encoding(s); 7532 return; 7533 } 7534 7535 if (!fp_access_check(s)) { 7536 return; 7537 } 7538 7539 idx = extract32(imm5, 1 + size, 4 - size); 7540 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 7541 7542 /* INS is considered a 128-bit write for SVE. */ 7543 clear_vec_high(s, true, rd); 7544 } 7545 7546 /* 7547 * UMOV (General) 7548 * SMOV (General) 7549 * 7550 * 31 30 29 21 20 16 15 12 10 9 5 4 0 7551 * +---+---+-------------------+--------+-------------+------+------+ 7552 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 7553 * +---+---+-------------------+--------+-------------+------+------+ 7554 * 7555 * U: unsigned when set 7556 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7557 */ 7558 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 7559 int rn, int rd, int imm5) 7560 { 7561 int size = ctz32(imm5); 7562 int element; 7563 TCGv_i64 tcg_rd; 7564 7565 /* Check for UnallocatedEncodings */ 7566 if (is_signed) { 7567 if (size > 2 || (size == 2 && !is_q)) { 7568 unallocated_encoding(s); 7569 return; 7570 } 7571 } else { 7572 if (size > 3 7573 || (size < 3 && is_q) 7574 || (size == 3 && !is_q)) { 7575 unallocated_encoding(s); 7576 return; 7577 } 7578 } 7579 7580 if (!fp_access_check(s)) { 7581 return; 7582 } 7583 7584 element = extract32(imm5, 1+size, 4); 7585 7586 tcg_rd = cpu_reg(s, rd); 7587 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 7588 if (is_signed && !is_q) { 7589 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7590 } 7591 } 7592 7593 /* AdvSIMD copy 7594 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7595 * +---+---+----+-----------------+------+---+------+---+------+------+ 7596 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7597 * +---+---+----+-----------------+------+---+------+---+------+------+ 7598 */ 7599 static void disas_simd_copy(DisasContext *s, uint32_t insn) 7600 { 7601 int rd = extract32(insn, 0, 5); 7602 int rn = extract32(insn, 5, 5); 7603 int imm4 = extract32(insn, 11, 4); 7604 int op = extract32(insn, 29, 1); 7605 int is_q = extract32(insn, 30, 1); 7606 int imm5 = extract32(insn, 16, 5); 7607 7608 if (op) { 7609 if (is_q) { 7610 /* INS (element) */ 7611 handle_simd_inse(s, rd, rn, imm4, imm5); 7612 } else { 7613 unallocated_encoding(s); 7614 } 7615 } else { 7616 switch (imm4) { 7617 case 0: 7618 /* DUP (element - vector) */ 7619 handle_simd_dupe(s, is_q, rd, rn, imm5); 7620 break; 7621 case 1: 7622 /* DUP (general) */ 7623 handle_simd_dupg(s, is_q, rd, rn, imm5); 7624 break; 7625 case 3: 7626 if (is_q) { 7627 /* INS (general) */ 7628 handle_simd_insg(s, rd, rn, imm5); 7629 } else { 7630 unallocated_encoding(s); 7631 } 7632 break; 7633 case 5: 7634 case 7: 7635 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 7636 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 7637 break; 7638 default: 7639 unallocated_encoding(s); 7640 break; 7641 } 7642 } 7643 } 7644 7645 /* AdvSIMD modified immediate 7646 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 7647 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 7648 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 7649 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 7650 * 7651 * There are a number of operations that can be carried out here: 7652 * MOVI - move (shifted) imm into register 7653 * MVNI - move inverted (shifted) imm into register 7654 * ORR - bitwise OR of (shifted) imm with register 7655 * BIC - bitwise clear of (shifted) imm with register 7656 * With ARMv8.2 we also have: 7657 * FMOV half-precision 7658 */ 7659 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 7660 { 7661 int rd = extract32(insn, 0, 5); 7662 int cmode = extract32(insn, 12, 4); 7663 int o2 = extract32(insn, 11, 1); 7664 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 7665 bool is_neg = extract32(insn, 29, 1); 7666 bool is_q = extract32(insn, 30, 1); 7667 uint64_t imm = 0; 7668 7669 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 7670 /* Check for FMOV (vector, immediate) - half-precision */ 7671 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 7672 unallocated_encoding(s); 7673 return; 7674 } 7675 } 7676 7677 if (!fp_access_check(s)) { 7678 return; 7679 } 7680 7681 if (cmode == 15 && o2 && !is_neg) { 7682 /* FMOV (vector, immediate) - half-precision */ 7683 imm = vfp_expand_imm(MO_16, abcdefgh); 7684 /* now duplicate across the lanes */ 7685 imm = dup_const(MO_16, imm); 7686 } else { 7687 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 7688 } 7689 7690 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 7691 /* MOVI or MVNI, with MVNI negation handled above. */ 7692 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 7693 vec_full_reg_size(s), imm); 7694 } else { 7695 /* ORR or BIC, with BIC negation to AND handled above. */ 7696 if (is_neg) { 7697 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 7698 } else { 7699 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 7700 } 7701 } 7702 } 7703 7704 /* AdvSIMD scalar copy 7705 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7706 * +-----+----+-----------------+------+---+------+---+------+------+ 7707 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7708 * +-----+----+-----------------+------+---+------+---+------+------+ 7709 */ 7710 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 7711 { 7712 int rd = extract32(insn, 0, 5); 7713 int rn = extract32(insn, 5, 5); 7714 int imm4 = extract32(insn, 11, 4); 7715 int imm5 = extract32(insn, 16, 5); 7716 int op = extract32(insn, 29, 1); 7717 7718 if (op != 0 || imm4 != 0) { 7719 unallocated_encoding(s); 7720 return; 7721 } 7722 7723 /* DUP (element, scalar) */ 7724 handle_simd_dupes(s, rd, rn, imm5); 7725 } 7726 7727 /* AdvSIMD scalar pairwise 7728 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7729 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 7730 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7731 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 7732 */ 7733 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 7734 { 7735 int u = extract32(insn, 29, 1); 7736 int size = extract32(insn, 22, 2); 7737 int opcode = extract32(insn, 12, 5); 7738 int rn = extract32(insn, 5, 5); 7739 int rd = extract32(insn, 0, 5); 7740 TCGv_ptr fpst; 7741 7742 /* For some ops (the FP ones), size[1] is part of the encoding. 7743 * For ADDP strictly it is not but size[1] is always 1 for valid 7744 * encodings. 7745 */ 7746 opcode |= (extract32(size, 1, 1) << 5); 7747 7748 switch (opcode) { 7749 case 0x3b: /* ADDP */ 7750 if (u || size != 3) { 7751 unallocated_encoding(s); 7752 return; 7753 } 7754 if (!fp_access_check(s)) { 7755 return; 7756 } 7757 7758 fpst = NULL; 7759 break; 7760 case 0xc: /* FMAXNMP */ 7761 case 0xd: /* FADDP */ 7762 case 0xf: /* FMAXP */ 7763 case 0x2c: /* FMINNMP */ 7764 case 0x2f: /* FMINP */ 7765 /* FP op, size[0] is 32 or 64 bit*/ 7766 if (!u) { 7767 if (!dc_isar_feature(aa64_fp16, s)) { 7768 unallocated_encoding(s); 7769 return; 7770 } else { 7771 size = MO_16; 7772 } 7773 } else { 7774 size = extract32(size, 0, 1) ? MO_64 : MO_32; 7775 } 7776 7777 if (!fp_access_check(s)) { 7778 return; 7779 } 7780 7781 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7782 break; 7783 default: 7784 unallocated_encoding(s); 7785 return; 7786 } 7787 7788 if (size == MO_64) { 7789 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 7790 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 7791 TCGv_i64 tcg_res = tcg_temp_new_i64(); 7792 7793 read_vec_element(s, tcg_op1, rn, 0, MO_64); 7794 read_vec_element(s, tcg_op2, rn, 1, MO_64); 7795 7796 switch (opcode) { 7797 case 0x3b: /* ADDP */ 7798 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 7799 break; 7800 case 0xc: /* FMAXNMP */ 7801 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 7802 break; 7803 case 0xd: /* FADDP */ 7804 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 7805 break; 7806 case 0xf: /* FMAXP */ 7807 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 7808 break; 7809 case 0x2c: /* FMINNMP */ 7810 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 7811 break; 7812 case 0x2f: /* FMINP */ 7813 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 7814 break; 7815 default: 7816 g_assert_not_reached(); 7817 } 7818 7819 write_fp_dreg(s, rd, tcg_res); 7820 } else { 7821 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 7822 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 7823 TCGv_i32 tcg_res = tcg_temp_new_i32(); 7824 7825 read_vec_element_i32(s, tcg_op1, rn, 0, size); 7826 read_vec_element_i32(s, tcg_op2, rn, 1, size); 7827 7828 if (size == MO_16) { 7829 switch (opcode) { 7830 case 0xc: /* FMAXNMP */ 7831 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 7832 break; 7833 case 0xd: /* FADDP */ 7834 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 7835 break; 7836 case 0xf: /* FMAXP */ 7837 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 7838 break; 7839 case 0x2c: /* FMINNMP */ 7840 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 7841 break; 7842 case 0x2f: /* FMINP */ 7843 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 7844 break; 7845 default: 7846 g_assert_not_reached(); 7847 } 7848 } else { 7849 switch (opcode) { 7850 case 0xc: /* FMAXNMP */ 7851 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 7852 break; 7853 case 0xd: /* FADDP */ 7854 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 7855 break; 7856 case 0xf: /* FMAXP */ 7857 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 7858 break; 7859 case 0x2c: /* FMINNMP */ 7860 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 7861 break; 7862 case 0x2f: /* FMINP */ 7863 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 7864 break; 7865 default: 7866 g_assert_not_reached(); 7867 } 7868 } 7869 7870 write_fp_sreg(s, rd, tcg_res); 7871 } 7872 } 7873 7874 /* 7875 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 7876 * 7877 * This code is handles the common shifting code and is used by both 7878 * the vector and scalar code. 7879 */ 7880 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 7881 TCGv_i64 tcg_rnd, bool accumulate, 7882 bool is_u, int size, int shift) 7883 { 7884 bool extended_result = false; 7885 bool round = tcg_rnd != NULL; 7886 int ext_lshift = 0; 7887 TCGv_i64 tcg_src_hi; 7888 7889 if (round && size == 3) { 7890 extended_result = true; 7891 ext_lshift = 64 - shift; 7892 tcg_src_hi = tcg_temp_new_i64(); 7893 } else if (shift == 64) { 7894 if (!accumulate && is_u) { 7895 /* result is zero */ 7896 tcg_gen_movi_i64(tcg_res, 0); 7897 return; 7898 } 7899 } 7900 7901 /* Deal with the rounding step */ 7902 if (round) { 7903 if (extended_result) { 7904 TCGv_i64 tcg_zero = tcg_constant_i64(0); 7905 if (!is_u) { 7906 /* take care of sign extending tcg_res */ 7907 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 7908 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 7909 tcg_src, tcg_src_hi, 7910 tcg_rnd, tcg_zero); 7911 } else { 7912 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 7913 tcg_src, tcg_zero, 7914 tcg_rnd, tcg_zero); 7915 } 7916 } else { 7917 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 7918 } 7919 } 7920 7921 /* Now do the shift right */ 7922 if (round && extended_result) { 7923 /* extended case, >64 bit precision required */ 7924 if (ext_lshift == 0) { 7925 /* special case, only high bits matter */ 7926 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 7927 } else { 7928 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 7929 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 7930 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 7931 } 7932 } else { 7933 if (is_u) { 7934 if (shift == 64) { 7935 /* essentially shifting in 64 zeros */ 7936 tcg_gen_movi_i64(tcg_src, 0); 7937 } else { 7938 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 7939 } 7940 } else { 7941 if (shift == 64) { 7942 /* effectively extending the sign-bit */ 7943 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 7944 } else { 7945 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 7946 } 7947 } 7948 } 7949 7950 if (accumulate) { 7951 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 7952 } else { 7953 tcg_gen_mov_i64(tcg_res, tcg_src); 7954 } 7955 } 7956 7957 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 7958 static void handle_scalar_simd_shri(DisasContext *s, 7959 bool is_u, int immh, int immb, 7960 int opcode, int rn, int rd) 7961 { 7962 const int size = 3; 7963 int immhb = immh << 3 | immb; 7964 int shift = 2 * (8 << size) - immhb; 7965 bool accumulate = false; 7966 bool round = false; 7967 bool insert = false; 7968 TCGv_i64 tcg_rn; 7969 TCGv_i64 tcg_rd; 7970 TCGv_i64 tcg_round; 7971 7972 if (!extract32(immh, 3, 1)) { 7973 unallocated_encoding(s); 7974 return; 7975 } 7976 7977 if (!fp_access_check(s)) { 7978 return; 7979 } 7980 7981 switch (opcode) { 7982 case 0x02: /* SSRA / USRA (accumulate) */ 7983 accumulate = true; 7984 break; 7985 case 0x04: /* SRSHR / URSHR (rounding) */ 7986 round = true; 7987 break; 7988 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 7989 accumulate = round = true; 7990 break; 7991 case 0x08: /* SRI */ 7992 insert = true; 7993 break; 7994 } 7995 7996 if (round) { 7997 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 7998 } else { 7999 tcg_round = NULL; 8000 } 8001 8002 tcg_rn = read_fp_dreg(s, rn); 8003 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8004 8005 if (insert) { 8006 /* shift count same as element size is valid but does nothing; 8007 * special case to avoid potential shift by 64. 8008 */ 8009 int esize = 8 << size; 8010 if (shift != esize) { 8011 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8012 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8013 } 8014 } else { 8015 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8016 accumulate, is_u, size, shift); 8017 } 8018 8019 write_fp_dreg(s, rd, tcg_rd); 8020 } 8021 8022 /* SHL/SLI - Scalar shift left */ 8023 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8024 int immh, int immb, int opcode, 8025 int rn, int rd) 8026 { 8027 int size = 32 - clz32(immh) - 1; 8028 int immhb = immh << 3 | immb; 8029 int shift = immhb - (8 << size); 8030 TCGv_i64 tcg_rn; 8031 TCGv_i64 tcg_rd; 8032 8033 if (!extract32(immh, 3, 1)) { 8034 unallocated_encoding(s); 8035 return; 8036 } 8037 8038 if (!fp_access_check(s)) { 8039 return; 8040 } 8041 8042 tcg_rn = read_fp_dreg(s, rn); 8043 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8044 8045 if (insert) { 8046 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8047 } else { 8048 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8049 } 8050 8051 write_fp_dreg(s, rd, tcg_rd); 8052 } 8053 8054 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8055 * (signed/unsigned) narrowing */ 8056 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8057 bool is_u_shift, bool is_u_narrow, 8058 int immh, int immb, int opcode, 8059 int rn, int rd) 8060 { 8061 int immhb = immh << 3 | immb; 8062 int size = 32 - clz32(immh) - 1; 8063 int esize = 8 << size; 8064 int shift = (2 * esize) - immhb; 8065 int elements = is_scalar ? 1 : (64 / esize); 8066 bool round = extract32(opcode, 0, 1); 8067 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8068 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8069 TCGv_i32 tcg_rd_narrowed; 8070 TCGv_i64 tcg_final; 8071 8072 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8073 { gen_helper_neon_narrow_sat_s8, 8074 gen_helper_neon_unarrow_sat8 }, 8075 { gen_helper_neon_narrow_sat_s16, 8076 gen_helper_neon_unarrow_sat16 }, 8077 { gen_helper_neon_narrow_sat_s32, 8078 gen_helper_neon_unarrow_sat32 }, 8079 { NULL, NULL }, 8080 }; 8081 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8082 gen_helper_neon_narrow_sat_u8, 8083 gen_helper_neon_narrow_sat_u16, 8084 gen_helper_neon_narrow_sat_u32, 8085 NULL 8086 }; 8087 NeonGenNarrowEnvFn *narrowfn; 8088 8089 int i; 8090 8091 assert(size < 4); 8092 8093 if (extract32(immh, 3, 1)) { 8094 unallocated_encoding(s); 8095 return; 8096 } 8097 8098 if (!fp_access_check(s)) { 8099 return; 8100 } 8101 8102 if (is_u_shift) { 8103 narrowfn = unsigned_narrow_fns[size]; 8104 } else { 8105 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8106 } 8107 8108 tcg_rn = tcg_temp_new_i64(); 8109 tcg_rd = tcg_temp_new_i64(); 8110 tcg_rd_narrowed = tcg_temp_new_i32(); 8111 tcg_final = tcg_temp_new_i64(); 8112 8113 if (round) { 8114 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8115 } else { 8116 tcg_round = NULL; 8117 } 8118 8119 for (i = 0; i < elements; i++) { 8120 read_vec_element(s, tcg_rn, rn, i, ldop); 8121 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8122 false, is_u_shift, size+1, shift); 8123 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); 8124 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8125 if (i == 0) { 8126 tcg_gen_mov_i64(tcg_final, tcg_rd); 8127 } else { 8128 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8129 } 8130 } 8131 8132 if (!is_q) { 8133 write_vec_element(s, tcg_final, rd, 0, MO_64); 8134 } else { 8135 write_vec_element(s, tcg_final, rd, 1, MO_64); 8136 } 8137 clear_vec_high(s, is_q, rd); 8138 } 8139 8140 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8141 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8142 bool src_unsigned, bool dst_unsigned, 8143 int immh, int immb, int rn, int rd) 8144 { 8145 int immhb = immh << 3 | immb; 8146 int size = 32 - clz32(immh) - 1; 8147 int shift = immhb - (8 << size); 8148 int pass; 8149 8150 assert(immh != 0); 8151 assert(!(scalar && is_q)); 8152 8153 if (!scalar) { 8154 if (!is_q && extract32(immh, 3, 1)) { 8155 unallocated_encoding(s); 8156 return; 8157 } 8158 8159 /* Since we use the variable-shift helpers we must 8160 * replicate the shift count into each element of 8161 * the tcg_shift value. 8162 */ 8163 switch (size) { 8164 case 0: 8165 shift |= shift << 8; 8166 /* fall through */ 8167 case 1: 8168 shift |= shift << 16; 8169 break; 8170 case 2: 8171 case 3: 8172 break; 8173 default: 8174 g_assert_not_reached(); 8175 } 8176 } 8177 8178 if (!fp_access_check(s)) { 8179 return; 8180 } 8181 8182 if (size == 3) { 8183 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8184 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8185 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8186 { NULL, gen_helper_neon_qshl_u64 }, 8187 }; 8188 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8189 int maxpass = is_q ? 2 : 1; 8190 8191 for (pass = 0; pass < maxpass; pass++) { 8192 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8193 8194 read_vec_element(s, tcg_op, rn, pass, MO_64); 8195 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8196 write_vec_element(s, tcg_op, rd, pass, MO_64); 8197 } 8198 clear_vec_high(s, is_q, rd); 8199 } else { 8200 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8201 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8202 { 8203 { gen_helper_neon_qshl_s8, 8204 gen_helper_neon_qshl_s16, 8205 gen_helper_neon_qshl_s32 }, 8206 { gen_helper_neon_qshlu_s8, 8207 gen_helper_neon_qshlu_s16, 8208 gen_helper_neon_qshlu_s32 } 8209 }, { 8210 { NULL, NULL, NULL }, 8211 { gen_helper_neon_qshl_u8, 8212 gen_helper_neon_qshl_u16, 8213 gen_helper_neon_qshl_u32 } 8214 } 8215 }; 8216 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8217 MemOp memop = scalar ? size : MO_32; 8218 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8219 8220 for (pass = 0; pass < maxpass; pass++) { 8221 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8222 8223 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8224 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8225 if (scalar) { 8226 switch (size) { 8227 case 0: 8228 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8229 break; 8230 case 1: 8231 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8232 break; 8233 case 2: 8234 break; 8235 default: 8236 g_assert_not_reached(); 8237 } 8238 write_fp_sreg(s, rd, tcg_op); 8239 } else { 8240 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8241 } 8242 } 8243 8244 if (!scalar) { 8245 clear_vec_high(s, is_q, rd); 8246 } 8247 } 8248 } 8249 8250 /* Common vector code for handling integer to FP conversion */ 8251 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8252 int elements, int is_signed, 8253 int fracbits, int size) 8254 { 8255 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8256 TCGv_i32 tcg_shift = NULL; 8257 8258 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8259 int pass; 8260 8261 if (fracbits || size == MO_64) { 8262 tcg_shift = tcg_constant_i32(fracbits); 8263 } 8264 8265 if (size == MO_64) { 8266 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8267 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8268 8269 for (pass = 0; pass < elements; pass++) { 8270 read_vec_element(s, tcg_int64, rn, pass, mop); 8271 8272 if (is_signed) { 8273 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8274 tcg_shift, tcg_fpst); 8275 } else { 8276 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8277 tcg_shift, tcg_fpst); 8278 } 8279 if (elements == 1) { 8280 write_fp_dreg(s, rd, tcg_double); 8281 } else { 8282 write_vec_element(s, tcg_double, rd, pass, MO_64); 8283 } 8284 } 8285 } else { 8286 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8287 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8288 8289 for (pass = 0; pass < elements; pass++) { 8290 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8291 8292 switch (size) { 8293 case MO_32: 8294 if (fracbits) { 8295 if (is_signed) { 8296 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8297 tcg_shift, tcg_fpst); 8298 } else { 8299 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8300 tcg_shift, tcg_fpst); 8301 } 8302 } else { 8303 if (is_signed) { 8304 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8305 } else { 8306 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8307 } 8308 } 8309 break; 8310 case MO_16: 8311 if (fracbits) { 8312 if (is_signed) { 8313 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8314 tcg_shift, tcg_fpst); 8315 } else { 8316 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8317 tcg_shift, tcg_fpst); 8318 } 8319 } else { 8320 if (is_signed) { 8321 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8322 } else { 8323 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8324 } 8325 } 8326 break; 8327 default: 8328 g_assert_not_reached(); 8329 } 8330 8331 if (elements == 1) { 8332 write_fp_sreg(s, rd, tcg_float); 8333 } else { 8334 write_vec_element_i32(s, tcg_float, rd, pass, size); 8335 } 8336 } 8337 } 8338 8339 clear_vec_high(s, elements << size == 16, rd); 8340 } 8341 8342 /* UCVTF/SCVTF - Integer to FP conversion */ 8343 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8344 bool is_q, bool is_u, 8345 int immh, int immb, int opcode, 8346 int rn, int rd) 8347 { 8348 int size, elements, fracbits; 8349 int immhb = immh << 3 | immb; 8350 8351 if (immh & 8) { 8352 size = MO_64; 8353 if (!is_scalar && !is_q) { 8354 unallocated_encoding(s); 8355 return; 8356 } 8357 } else if (immh & 4) { 8358 size = MO_32; 8359 } else if (immh & 2) { 8360 size = MO_16; 8361 if (!dc_isar_feature(aa64_fp16, s)) { 8362 unallocated_encoding(s); 8363 return; 8364 } 8365 } else { 8366 /* immh == 0 would be a failure of the decode logic */ 8367 g_assert(immh == 1); 8368 unallocated_encoding(s); 8369 return; 8370 } 8371 8372 if (is_scalar) { 8373 elements = 1; 8374 } else { 8375 elements = (8 << is_q) >> size; 8376 } 8377 fracbits = (16 << size) - immhb; 8378 8379 if (!fp_access_check(s)) { 8380 return; 8381 } 8382 8383 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 8384 } 8385 8386 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 8387 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 8388 bool is_q, bool is_u, 8389 int immh, int immb, int rn, int rd) 8390 { 8391 int immhb = immh << 3 | immb; 8392 int pass, size, fracbits; 8393 TCGv_ptr tcg_fpstatus; 8394 TCGv_i32 tcg_rmode, tcg_shift; 8395 8396 if (immh & 0x8) { 8397 size = MO_64; 8398 if (!is_scalar && !is_q) { 8399 unallocated_encoding(s); 8400 return; 8401 } 8402 } else if (immh & 0x4) { 8403 size = MO_32; 8404 } else if (immh & 0x2) { 8405 size = MO_16; 8406 if (!dc_isar_feature(aa64_fp16, s)) { 8407 unallocated_encoding(s); 8408 return; 8409 } 8410 } else { 8411 /* Should have split out AdvSIMD modified immediate earlier. */ 8412 assert(immh == 1); 8413 unallocated_encoding(s); 8414 return; 8415 } 8416 8417 if (!fp_access_check(s)) { 8418 return; 8419 } 8420 8421 assert(!(is_scalar && is_q)); 8422 8423 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8424 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 8425 fracbits = (16 << size) - immhb; 8426 tcg_shift = tcg_constant_i32(fracbits); 8427 8428 if (size == MO_64) { 8429 int maxpass = is_scalar ? 1 : 2; 8430 8431 for (pass = 0; pass < maxpass; pass++) { 8432 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8433 8434 read_vec_element(s, tcg_op, rn, pass, MO_64); 8435 if (is_u) { 8436 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8437 } else { 8438 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8439 } 8440 write_vec_element(s, tcg_op, rd, pass, MO_64); 8441 } 8442 clear_vec_high(s, is_q, rd); 8443 } else { 8444 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 8445 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 8446 8447 switch (size) { 8448 case MO_16: 8449 if (is_u) { 8450 fn = gen_helper_vfp_touhh; 8451 } else { 8452 fn = gen_helper_vfp_toshh; 8453 } 8454 break; 8455 case MO_32: 8456 if (is_u) { 8457 fn = gen_helper_vfp_touls; 8458 } else { 8459 fn = gen_helper_vfp_tosls; 8460 } 8461 break; 8462 default: 8463 g_assert_not_reached(); 8464 } 8465 8466 for (pass = 0; pass < maxpass; pass++) { 8467 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8468 8469 read_vec_element_i32(s, tcg_op, rn, pass, size); 8470 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8471 if (is_scalar) { 8472 write_fp_sreg(s, rd, tcg_op); 8473 } else { 8474 write_vec_element_i32(s, tcg_op, rd, pass, size); 8475 } 8476 } 8477 if (!is_scalar) { 8478 clear_vec_high(s, is_q, rd); 8479 } 8480 } 8481 8482 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8483 } 8484 8485 /* AdvSIMD scalar shift by immediate 8486 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 8487 * +-----+---+-------------+------+------+--------+---+------+------+ 8488 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 8489 * +-----+---+-------------+------+------+--------+---+------+------+ 8490 * 8491 * This is the scalar version so it works on a fixed sized registers 8492 */ 8493 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 8494 { 8495 int rd = extract32(insn, 0, 5); 8496 int rn = extract32(insn, 5, 5); 8497 int opcode = extract32(insn, 11, 5); 8498 int immb = extract32(insn, 16, 3); 8499 int immh = extract32(insn, 19, 4); 8500 bool is_u = extract32(insn, 29, 1); 8501 8502 if (immh == 0) { 8503 unallocated_encoding(s); 8504 return; 8505 } 8506 8507 switch (opcode) { 8508 case 0x08: /* SRI */ 8509 if (!is_u) { 8510 unallocated_encoding(s); 8511 return; 8512 } 8513 /* fall through */ 8514 case 0x00: /* SSHR / USHR */ 8515 case 0x02: /* SSRA / USRA */ 8516 case 0x04: /* SRSHR / URSHR */ 8517 case 0x06: /* SRSRA / URSRA */ 8518 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 8519 break; 8520 case 0x0a: /* SHL / SLI */ 8521 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 8522 break; 8523 case 0x1c: /* SCVTF, UCVTF */ 8524 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 8525 opcode, rn, rd); 8526 break; 8527 case 0x10: /* SQSHRUN, SQSHRUN2 */ 8528 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 8529 if (!is_u) { 8530 unallocated_encoding(s); 8531 return; 8532 } 8533 handle_vec_simd_sqshrn(s, true, false, false, true, 8534 immh, immb, opcode, rn, rd); 8535 break; 8536 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 8537 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 8538 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 8539 immh, immb, opcode, rn, rd); 8540 break; 8541 case 0xc: /* SQSHLU */ 8542 if (!is_u) { 8543 unallocated_encoding(s); 8544 return; 8545 } 8546 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 8547 break; 8548 case 0xe: /* SQSHL, UQSHL */ 8549 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 8550 break; 8551 case 0x1f: /* FCVTZS, FCVTZU */ 8552 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 8553 break; 8554 default: 8555 unallocated_encoding(s); 8556 break; 8557 } 8558 } 8559 8560 /* AdvSIMD scalar three different 8561 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 8562 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8563 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 8564 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8565 */ 8566 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 8567 { 8568 bool is_u = extract32(insn, 29, 1); 8569 int size = extract32(insn, 22, 2); 8570 int opcode = extract32(insn, 12, 4); 8571 int rm = extract32(insn, 16, 5); 8572 int rn = extract32(insn, 5, 5); 8573 int rd = extract32(insn, 0, 5); 8574 8575 if (is_u) { 8576 unallocated_encoding(s); 8577 return; 8578 } 8579 8580 switch (opcode) { 8581 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8582 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8583 case 0xd: /* SQDMULL, SQDMULL2 */ 8584 if (size == 0 || size == 3) { 8585 unallocated_encoding(s); 8586 return; 8587 } 8588 break; 8589 default: 8590 unallocated_encoding(s); 8591 return; 8592 } 8593 8594 if (!fp_access_check(s)) { 8595 return; 8596 } 8597 8598 if (size == 2) { 8599 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8600 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8601 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8602 8603 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 8604 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 8605 8606 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 8607 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); 8608 8609 switch (opcode) { 8610 case 0xd: /* SQDMULL, SQDMULL2 */ 8611 break; 8612 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8613 tcg_gen_neg_i64(tcg_res, tcg_res); 8614 /* fall through */ 8615 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8616 read_vec_element(s, tcg_op1, rd, 0, MO_64); 8617 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, 8618 tcg_res, tcg_op1); 8619 break; 8620 default: 8621 g_assert_not_reached(); 8622 } 8623 8624 write_fp_dreg(s, rd, tcg_res); 8625 } else { 8626 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 8627 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 8628 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8629 8630 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 8631 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); 8632 8633 switch (opcode) { 8634 case 0xd: /* SQDMULL, SQDMULL2 */ 8635 break; 8636 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8637 gen_helper_neon_negl_u32(tcg_res, tcg_res); 8638 /* fall through */ 8639 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8640 { 8641 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 8642 read_vec_element(s, tcg_op3, rd, 0, MO_32); 8643 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, 8644 tcg_res, tcg_op3); 8645 break; 8646 } 8647 default: 8648 g_assert_not_reached(); 8649 } 8650 8651 tcg_gen_ext32u_i64(tcg_res, tcg_res); 8652 write_fp_dreg(s, rd, tcg_res); 8653 } 8654 } 8655 8656 static void handle_3same_64(DisasContext *s, int opcode, bool u, 8657 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 8658 { 8659 /* Handle 64x64->64 opcodes which are shared between the scalar 8660 * and vector 3-same groups. We cover every opcode where size == 3 8661 * is valid in either the three-reg-same (integer, not pairwise) 8662 * or scalar-three-reg-same groups. 8663 */ 8664 TCGCond cond; 8665 8666 switch (opcode) { 8667 case 0x1: /* SQADD */ 8668 if (u) { 8669 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8670 } else { 8671 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8672 } 8673 break; 8674 case 0x5: /* SQSUB */ 8675 if (u) { 8676 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8677 } else { 8678 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8679 } 8680 break; 8681 case 0x6: /* CMGT, CMHI */ 8682 cond = u ? TCG_COND_GTU : TCG_COND_GT; 8683 do_cmop: 8684 /* 64 bit integer comparison, result = test ? -1 : 0. */ 8685 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 8686 break; 8687 case 0x7: /* CMGE, CMHS */ 8688 cond = u ? TCG_COND_GEU : TCG_COND_GE; 8689 goto do_cmop; 8690 case 0x11: /* CMTST, CMEQ */ 8691 if (u) { 8692 cond = TCG_COND_EQ; 8693 goto do_cmop; 8694 } 8695 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 8696 break; 8697 case 0x8: /* SSHL, USHL */ 8698 if (u) { 8699 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 8700 } else { 8701 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 8702 } 8703 break; 8704 case 0x9: /* SQSHL, UQSHL */ 8705 if (u) { 8706 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8707 } else { 8708 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8709 } 8710 break; 8711 case 0xa: /* SRSHL, URSHL */ 8712 if (u) { 8713 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 8714 } else { 8715 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 8716 } 8717 break; 8718 case 0xb: /* SQRSHL, UQRSHL */ 8719 if (u) { 8720 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8721 } else { 8722 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 8723 } 8724 break; 8725 case 0x10: /* ADD, SUB */ 8726 if (u) { 8727 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 8728 } else { 8729 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 8730 } 8731 break; 8732 default: 8733 g_assert_not_reached(); 8734 } 8735 } 8736 8737 /* Handle the 3-same-operands float operations; shared by the scalar 8738 * and vector encodings. The caller must filter out any encodings 8739 * not allocated for the encoding it is dealing with. 8740 */ 8741 static void handle_3same_float(DisasContext *s, int size, int elements, 8742 int fpopcode, int rd, int rn, int rm) 8743 { 8744 int pass; 8745 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 8746 8747 for (pass = 0; pass < elements; pass++) { 8748 if (size) { 8749 /* Double */ 8750 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8751 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8752 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8753 8754 read_vec_element(s, tcg_op1, rn, pass, MO_64); 8755 read_vec_element(s, tcg_op2, rm, pass, MO_64); 8756 8757 switch (fpopcode) { 8758 case 0x39: /* FMLS */ 8759 /* As usual for ARM, separate negation for fused multiply-add */ 8760 gen_helper_vfp_negd(tcg_op1, tcg_op1); 8761 /* fall through */ 8762 case 0x19: /* FMLA */ 8763 read_vec_element(s, tcg_res, rd, pass, MO_64); 8764 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 8765 tcg_res, fpst); 8766 break; 8767 case 0x18: /* FMAXNM */ 8768 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8769 break; 8770 case 0x1a: /* FADD */ 8771 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8772 break; 8773 case 0x1b: /* FMULX */ 8774 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 8775 break; 8776 case 0x1c: /* FCMEQ */ 8777 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8778 break; 8779 case 0x1e: /* FMAX */ 8780 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8781 break; 8782 case 0x1f: /* FRECPS */ 8783 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8784 break; 8785 case 0x38: /* FMINNM */ 8786 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8787 break; 8788 case 0x3a: /* FSUB */ 8789 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 8790 break; 8791 case 0x3e: /* FMIN */ 8792 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8793 break; 8794 case 0x3f: /* FRSQRTS */ 8795 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8796 break; 8797 case 0x5b: /* FMUL */ 8798 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 8799 break; 8800 case 0x5c: /* FCMGE */ 8801 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8802 break; 8803 case 0x5d: /* FACGE */ 8804 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8805 break; 8806 case 0x5f: /* FDIV */ 8807 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 8808 break; 8809 case 0x7a: /* FABD */ 8810 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 8811 gen_helper_vfp_absd(tcg_res, tcg_res); 8812 break; 8813 case 0x7c: /* FCMGT */ 8814 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8815 break; 8816 case 0x7d: /* FACGT */ 8817 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 8818 break; 8819 default: 8820 g_assert_not_reached(); 8821 } 8822 8823 write_vec_element(s, tcg_res, rd, pass, MO_64); 8824 } else { 8825 /* Single */ 8826 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8827 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8828 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8829 8830 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 8831 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 8832 8833 switch (fpopcode) { 8834 case 0x39: /* FMLS */ 8835 /* As usual for ARM, separate negation for fused multiply-add */ 8836 gen_helper_vfp_negs(tcg_op1, tcg_op1); 8837 /* fall through */ 8838 case 0x19: /* FMLA */ 8839 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 8840 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 8841 tcg_res, fpst); 8842 break; 8843 case 0x1a: /* FADD */ 8844 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8845 break; 8846 case 0x1b: /* FMULX */ 8847 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 8848 break; 8849 case 0x1c: /* FCMEQ */ 8850 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8851 break; 8852 case 0x1e: /* FMAX */ 8853 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8854 break; 8855 case 0x1f: /* FRECPS */ 8856 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8857 break; 8858 case 0x18: /* FMAXNM */ 8859 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8860 break; 8861 case 0x38: /* FMINNM */ 8862 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8863 break; 8864 case 0x3a: /* FSUB */ 8865 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 8866 break; 8867 case 0x3e: /* FMIN */ 8868 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8869 break; 8870 case 0x3f: /* FRSQRTS */ 8871 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8872 break; 8873 case 0x5b: /* FMUL */ 8874 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 8875 break; 8876 case 0x5c: /* FCMGE */ 8877 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8878 break; 8879 case 0x5d: /* FACGE */ 8880 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8881 break; 8882 case 0x5f: /* FDIV */ 8883 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 8884 break; 8885 case 0x7a: /* FABD */ 8886 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 8887 gen_helper_vfp_abss(tcg_res, tcg_res); 8888 break; 8889 case 0x7c: /* FCMGT */ 8890 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8891 break; 8892 case 0x7d: /* FACGT */ 8893 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 8894 break; 8895 default: 8896 g_assert_not_reached(); 8897 } 8898 8899 if (elements == 1) { 8900 /* scalar single so clear high part */ 8901 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8902 8903 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 8904 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 8905 } else { 8906 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 8907 } 8908 } 8909 } 8910 8911 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 8912 } 8913 8914 /* AdvSIMD scalar three same 8915 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 8916 * +-----+---+-----------+------+---+------+--------+---+------+------+ 8917 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 8918 * +-----+---+-----------+------+---+------+--------+---+------+------+ 8919 */ 8920 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 8921 { 8922 int rd = extract32(insn, 0, 5); 8923 int rn = extract32(insn, 5, 5); 8924 int opcode = extract32(insn, 11, 5); 8925 int rm = extract32(insn, 16, 5); 8926 int size = extract32(insn, 22, 2); 8927 bool u = extract32(insn, 29, 1); 8928 TCGv_i64 tcg_rd; 8929 8930 if (opcode >= 0x18) { 8931 /* Floating point: U, size[1] and opcode indicate operation */ 8932 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 8933 switch (fpopcode) { 8934 case 0x1b: /* FMULX */ 8935 case 0x1f: /* FRECPS */ 8936 case 0x3f: /* FRSQRTS */ 8937 case 0x5d: /* FACGE */ 8938 case 0x7d: /* FACGT */ 8939 case 0x1c: /* FCMEQ */ 8940 case 0x5c: /* FCMGE */ 8941 case 0x7c: /* FCMGT */ 8942 case 0x7a: /* FABD */ 8943 break; 8944 default: 8945 unallocated_encoding(s); 8946 return; 8947 } 8948 8949 if (!fp_access_check(s)) { 8950 return; 8951 } 8952 8953 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 8954 return; 8955 } 8956 8957 switch (opcode) { 8958 case 0x1: /* SQADD, UQADD */ 8959 case 0x5: /* SQSUB, UQSUB */ 8960 case 0x9: /* SQSHL, UQSHL */ 8961 case 0xb: /* SQRSHL, UQRSHL */ 8962 break; 8963 case 0x8: /* SSHL, USHL */ 8964 case 0xa: /* SRSHL, URSHL */ 8965 case 0x6: /* CMGT, CMHI */ 8966 case 0x7: /* CMGE, CMHS */ 8967 case 0x11: /* CMTST, CMEQ */ 8968 case 0x10: /* ADD, SUB (vector) */ 8969 if (size != 3) { 8970 unallocated_encoding(s); 8971 return; 8972 } 8973 break; 8974 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 8975 if (size != 1 && size != 2) { 8976 unallocated_encoding(s); 8977 return; 8978 } 8979 break; 8980 default: 8981 unallocated_encoding(s); 8982 return; 8983 } 8984 8985 if (!fp_access_check(s)) { 8986 return; 8987 } 8988 8989 tcg_rd = tcg_temp_new_i64(); 8990 8991 if (size == 3) { 8992 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 8993 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 8994 8995 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 8996 } else { 8997 /* Do a single operation on the lowest element in the vector. 8998 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 8999 * no side effects for all these operations. 9000 * OPTME: special-purpose helpers would avoid doing some 9001 * unnecessary work in the helper for the 8 and 16 bit cases. 9002 */ 9003 NeonGenTwoOpEnvFn *genenvfn; 9004 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9005 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9006 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9007 9008 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9009 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9010 9011 switch (opcode) { 9012 case 0x1: /* SQADD, UQADD */ 9013 { 9014 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9015 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9016 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9017 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9018 }; 9019 genenvfn = fns[size][u]; 9020 break; 9021 } 9022 case 0x5: /* SQSUB, UQSUB */ 9023 { 9024 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9025 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9026 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9027 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9028 }; 9029 genenvfn = fns[size][u]; 9030 break; 9031 } 9032 case 0x9: /* SQSHL, UQSHL */ 9033 { 9034 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9035 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9036 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9037 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9038 }; 9039 genenvfn = fns[size][u]; 9040 break; 9041 } 9042 case 0xb: /* SQRSHL, UQRSHL */ 9043 { 9044 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9045 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9046 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9047 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9048 }; 9049 genenvfn = fns[size][u]; 9050 break; 9051 } 9052 case 0x16: /* SQDMULH, SQRDMULH */ 9053 { 9054 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9055 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9056 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9057 }; 9058 assert(size == 1 || size == 2); 9059 genenvfn = fns[size - 1][u]; 9060 break; 9061 } 9062 default: 9063 g_assert_not_reached(); 9064 } 9065 9066 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); 9067 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9068 } 9069 9070 write_fp_dreg(s, rd, tcg_rd); 9071 } 9072 9073 /* AdvSIMD scalar three same FP16 9074 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9075 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9076 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9077 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9078 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9079 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9080 */ 9081 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9082 uint32_t insn) 9083 { 9084 int rd = extract32(insn, 0, 5); 9085 int rn = extract32(insn, 5, 5); 9086 int opcode = extract32(insn, 11, 3); 9087 int rm = extract32(insn, 16, 5); 9088 bool u = extract32(insn, 29, 1); 9089 bool a = extract32(insn, 23, 1); 9090 int fpopcode = opcode | (a << 3) | (u << 4); 9091 TCGv_ptr fpst; 9092 TCGv_i32 tcg_op1; 9093 TCGv_i32 tcg_op2; 9094 TCGv_i32 tcg_res; 9095 9096 switch (fpopcode) { 9097 case 0x03: /* FMULX */ 9098 case 0x04: /* FCMEQ (reg) */ 9099 case 0x07: /* FRECPS */ 9100 case 0x0f: /* FRSQRTS */ 9101 case 0x14: /* FCMGE (reg) */ 9102 case 0x15: /* FACGE */ 9103 case 0x1a: /* FABD */ 9104 case 0x1c: /* FCMGT (reg) */ 9105 case 0x1d: /* FACGT */ 9106 break; 9107 default: 9108 unallocated_encoding(s); 9109 return; 9110 } 9111 9112 if (!dc_isar_feature(aa64_fp16, s)) { 9113 unallocated_encoding(s); 9114 } 9115 9116 if (!fp_access_check(s)) { 9117 return; 9118 } 9119 9120 fpst = fpstatus_ptr(FPST_FPCR_F16); 9121 9122 tcg_op1 = read_fp_hreg(s, rn); 9123 tcg_op2 = read_fp_hreg(s, rm); 9124 tcg_res = tcg_temp_new_i32(); 9125 9126 switch (fpopcode) { 9127 case 0x03: /* FMULX */ 9128 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9129 break; 9130 case 0x04: /* FCMEQ (reg) */ 9131 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9132 break; 9133 case 0x07: /* FRECPS */ 9134 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9135 break; 9136 case 0x0f: /* FRSQRTS */ 9137 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9138 break; 9139 case 0x14: /* FCMGE (reg) */ 9140 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9141 break; 9142 case 0x15: /* FACGE */ 9143 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9144 break; 9145 case 0x1a: /* FABD */ 9146 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9147 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9148 break; 9149 case 0x1c: /* FCMGT (reg) */ 9150 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9151 break; 9152 case 0x1d: /* FACGT */ 9153 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9154 break; 9155 default: 9156 g_assert_not_reached(); 9157 } 9158 9159 write_fp_sreg(s, rd, tcg_res); 9160 } 9161 9162 /* AdvSIMD scalar three same extra 9163 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9164 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9165 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9166 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9167 */ 9168 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9169 uint32_t insn) 9170 { 9171 int rd = extract32(insn, 0, 5); 9172 int rn = extract32(insn, 5, 5); 9173 int opcode = extract32(insn, 11, 4); 9174 int rm = extract32(insn, 16, 5); 9175 int size = extract32(insn, 22, 2); 9176 bool u = extract32(insn, 29, 1); 9177 TCGv_i32 ele1, ele2, ele3; 9178 TCGv_i64 res; 9179 bool feature; 9180 9181 switch (u * 16 + opcode) { 9182 case 0x10: /* SQRDMLAH (vector) */ 9183 case 0x11: /* SQRDMLSH (vector) */ 9184 if (size != 1 && size != 2) { 9185 unallocated_encoding(s); 9186 return; 9187 } 9188 feature = dc_isar_feature(aa64_rdm, s); 9189 break; 9190 default: 9191 unallocated_encoding(s); 9192 return; 9193 } 9194 if (!feature) { 9195 unallocated_encoding(s); 9196 return; 9197 } 9198 if (!fp_access_check(s)) { 9199 return; 9200 } 9201 9202 /* Do a single operation on the lowest element in the vector. 9203 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9204 * with no side effects for all these operations. 9205 * OPTME: special-purpose helpers would avoid doing some 9206 * unnecessary work in the helper for the 16 bit cases. 9207 */ 9208 ele1 = tcg_temp_new_i32(); 9209 ele2 = tcg_temp_new_i32(); 9210 ele3 = tcg_temp_new_i32(); 9211 9212 read_vec_element_i32(s, ele1, rn, 0, size); 9213 read_vec_element_i32(s, ele2, rm, 0, size); 9214 read_vec_element_i32(s, ele3, rd, 0, size); 9215 9216 switch (opcode) { 9217 case 0x0: /* SQRDMLAH */ 9218 if (size == 1) { 9219 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3); 9220 } else { 9221 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3); 9222 } 9223 break; 9224 case 0x1: /* SQRDMLSH */ 9225 if (size == 1) { 9226 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3); 9227 } else { 9228 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3); 9229 } 9230 break; 9231 default: 9232 g_assert_not_reached(); 9233 } 9234 9235 res = tcg_temp_new_i64(); 9236 tcg_gen_extu_i32_i64(res, ele3); 9237 write_fp_dreg(s, rd, res); 9238 } 9239 9240 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9241 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9242 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9243 { 9244 /* Handle 64->64 opcodes which are shared between the scalar and 9245 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9246 * is valid in either group and also the double-precision fp ops. 9247 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9248 * requires them. 9249 */ 9250 TCGCond cond; 9251 9252 switch (opcode) { 9253 case 0x4: /* CLS, CLZ */ 9254 if (u) { 9255 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9256 } else { 9257 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9258 } 9259 break; 9260 case 0x5: /* NOT */ 9261 /* This opcode is shared with CNT and RBIT but we have earlier 9262 * enforced that size == 3 if and only if this is the NOT insn. 9263 */ 9264 tcg_gen_not_i64(tcg_rd, tcg_rn); 9265 break; 9266 case 0x7: /* SQABS, SQNEG */ 9267 if (u) { 9268 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); 9269 } else { 9270 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); 9271 } 9272 break; 9273 case 0xa: /* CMLT */ 9274 cond = TCG_COND_LT; 9275 do_cmop: 9276 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ 9277 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); 9278 break; 9279 case 0x8: /* CMGT, CMGE */ 9280 cond = u ? TCG_COND_GE : TCG_COND_GT; 9281 goto do_cmop; 9282 case 0x9: /* CMEQ, CMLE */ 9283 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9284 goto do_cmop; 9285 case 0xb: /* ABS, NEG */ 9286 if (u) { 9287 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9288 } else { 9289 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9290 } 9291 break; 9292 case 0x2f: /* FABS */ 9293 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9294 break; 9295 case 0x6f: /* FNEG */ 9296 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9297 break; 9298 case 0x7f: /* FSQRT */ 9299 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); 9300 break; 9301 case 0x1a: /* FCVTNS */ 9302 case 0x1b: /* FCVTMS */ 9303 case 0x1c: /* FCVTAS */ 9304 case 0x3a: /* FCVTPS */ 9305 case 0x3b: /* FCVTZS */ 9306 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9307 break; 9308 case 0x5a: /* FCVTNU */ 9309 case 0x5b: /* FCVTMU */ 9310 case 0x5c: /* FCVTAU */ 9311 case 0x7a: /* FCVTPU */ 9312 case 0x7b: /* FCVTZU */ 9313 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9314 break; 9315 case 0x18: /* FRINTN */ 9316 case 0x19: /* FRINTM */ 9317 case 0x38: /* FRINTP */ 9318 case 0x39: /* FRINTZ */ 9319 case 0x58: /* FRINTA */ 9320 case 0x79: /* FRINTI */ 9321 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 9322 break; 9323 case 0x59: /* FRINTX */ 9324 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 9325 break; 9326 case 0x1e: /* FRINT32Z */ 9327 case 0x5e: /* FRINT32X */ 9328 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 9329 break; 9330 case 0x1f: /* FRINT64Z */ 9331 case 0x5f: /* FRINT64X */ 9332 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 9333 break; 9334 default: 9335 g_assert_not_reached(); 9336 } 9337 } 9338 9339 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 9340 bool is_scalar, bool is_u, bool is_q, 9341 int size, int rn, int rd) 9342 { 9343 bool is_double = (size == MO_64); 9344 TCGv_ptr fpst; 9345 9346 if (!fp_access_check(s)) { 9347 return; 9348 } 9349 9350 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9351 9352 if (is_double) { 9353 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9354 TCGv_i64 tcg_zero = tcg_constant_i64(0); 9355 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9356 NeonGenTwoDoubleOpFn *genfn; 9357 bool swap = false; 9358 int pass; 9359 9360 switch (opcode) { 9361 case 0x2e: /* FCMLT (zero) */ 9362 swap = true; 9363 /* fallthrough */ 9364 case 0x2c: /* FCMGT (zero) */ 9365 genfn = gen_helper_neon_cgt_f64; 9366 break; 9367 case 0x2d: /* FCMEQ (zero) */ 9368 genfn = gen_helper_neon_ceq_f64; 9369 break; 9370 case 0x6d: /* FCMLE (zero) */ 9371 swap = true; 9372 /* fall through */ 9373 case 0x6c: /* FCMGE (zero) */ 9374 genfn = gen_helper_neon_cge_f64; 9375 break; 9376 default: 9377 g_assert_not_reached(); 9378 } 9379 9380 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9381 read_vec_element(s, tcg_op, rn, pass, MO_64); 9382 if (swap) { 9383 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9384 } else { 9385 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9386 } 9387 write_vec_element(s, tcg_res, rd, pass, MO_64); 9388 } 9389 9390 clear_vec_high(s, !is_scalar, rd); 9391 } else { 9392 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9393 TCGv_i32 tcg_zero = tcg_constant_i32(0); 9394 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9395 NeonGenTwoSingleOpFn *genfn; 9396 bool swap = false; 9397 int pass, maxpasses; 9398 9399 if (size == MO_16) { 9400 switch (opcode) { 9401 case 0x2e: /* FCMLT (zero) */ 9402 swap = true; 9403 /* fall through */ 9404 case 0x2c: /* FCMGT (zero) */ 9405 genfn = gen_helper_advsimd_cgt_f16; 9406 break; 9407 case 0x2d: /* FCMEQ (zero) */ 9408 genfn = gen_helper_advsimd_ceq_f16; 9409 break; 9410 case 0x6d: /* FCMLE (zero) */ 9411 swap = true; 9412 /* fall through */ 9413 case 0x6c: /* FCMGE (zero) */ 9414 genfn = gen_helper_advsimd_cge_f16; 9415 break; 9416 default: 9417 g_assert_not_reached(); 9418 } 9419 } else { 9420 switch (opcode) { 9421 case 0x2e: /* FCMLT (zero) */ 9422 swap = true; 9423 /* fall through */ 9424 case 0x2c: /* FCMGT (zero) */ 9425 genfn = gen_helper_neon_cgt_f32; 9426 break; 9427 case 0x2d: /* FCMEQ (zero) */ 9428 genfn = gen_helper_neon_ceq_f32; 9429 break; 9430 case 0x6d: /* FCMLE (zero) */ 9431 swap = true; 9432 /* fall through */ 9433 case 0x6c: /* FCMGE (zero) */ 9434 genfn = gen_helper_neon_cge_f32; 9435 break; 9436 default: 9437 g_assert_not_reached(); 9438 } 9439 } 9440 9441 if (is_scalar) { 9442 maxpasses = 1; 9443 } else { 9444 int vector_size = 8 << is_q; 9445 maxpasses = vector_size >> size; 9446 } 9447 9448 for (pass = 0; pass < maxpasses; pass++) { 9449 read_vec_element_i32(s, tcg_op, rn, pass, size); 9450 if (swap) { 9451 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9452 } else { 9453 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9454 } 9455 if (is_scalar) { 9456 write_fp_sreg(s, rd, tcg_res); 9457 } else { 9458 write_vec_element_i32(s, tcg_res, rd, pass, size); 9459 } 9460 } 9461 9462 if (!is_scalar) { 9463 clear_vec_high(s, is_q, rd); 9464 } 9465 } 9466 } 9467 9468 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 9469 bool is_scalar, bool is_u, bool is_q, 9470 int size, int rn, int rd) 9471 { 9472 bool is_double = (size == 3); 9473 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9474 9475 if (is_double) { 9476 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9477 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9478 int pass; 9479 9480 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9481 read_vec_element(s, tcg_op, rn, pass, MO_64); 9482 switch (opcode) { 9483 case 0x3d: /* FRECPE */ 9484 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 9485 break; 9486 case 0x3f: /* FRECPX */ 9487 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 9488 break; 9489 case 0x7d: /* FRSQRTE */ 9490 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 9491 break; 9492 default: 9493 g_assert_not_reached(); 9494 } 9495 write_vec_element(s, tcg_res, rd, pass, MO_64); 9496 } 9497 clear_vec_high(s, !is_scalar, rd); 9498 } else { 9499 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9500 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9501 int pass, maxpasses; 9502 9503 if (is_scalar) { 9504 maxpasses = 1; 9505 } else { 9506 maxpasses = is_q ? 4 : 2; 9507 } 9508 9509 for (pass = 0; pass < maxpasses; pass++) { 9510 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 9511 9512 switch (opcode) { 9513 case 0x3c: /* URECPE */ 9514 gen_helper_recpe_u32(tcg_res, tcg_op); 9515 break; 9516 case 0x3d: /* FRECPE */ 9517 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 9518 break; 9519 case 0x3f: /* FRECPX */ 9520 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 9521 break; 9522 case 0x7d: /* FRSQRTE */ 9523 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 9524 break; 9525 default: 9526 g_assert_not_reached(); 9527 } 9528 9529 if (is_scalar) { 9530 write_fp_sreg(s, rd, tcg_res); 9531 } else { 9532 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9533 } 9534 } 9535 if (!is_scalar) { 9536 clear_vec_high(s, is_q, rd); 9537 } 9538 } 9539 } 9540 9541 static void handle_2misc_narrow(DisasContext *s, bool scalar, 9542 int opcode, bool u, bool is_q, 9543 int size, int rn, int rd) 9544 { 9545 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 9546 * in the source becomes a size element in the destination). 9547 */ 9548 int pass; 9549 TCGv_i32 tcg_res[2]; 9550 int destelt = is_q ? 2 : 0; 9551 int passes = scalar ? 1 : 2; 9552 9553 if (scalar) { 9554 tcg_res[1] = tcg_constant_i32(0); 9555 } 9556 9557 for (pass = 0; pass < passes; pass++) { 9558 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9559 NeonGenNarrowFn *genfn = NULL; 9560 NeonGenNarrowEnvFn *genenvfn = NULL; 9561 9562 if (scalar) { 9563 read_vec_element(s, tcg_op, rn, pass, size + 1); 9564 } else { 9565 read_vec_element(s, tcg_op, rn, pass, MO_64); 9566 } 9567 tcg_res[pass] = tcg_temp_new_i32(); 9568 9569 switch (opcode) { 9570 case 0x12: /* XTN, SQXTUN */ 9571 { 9572 static NeonGenNarrowFn * const xtnfns[3] = { 9573 gen_helper_neon_narrow_u8, 9574 gen_helper_neon_narrow_u16, 9575 tcg_gen_extrl_i64_i32, 9576 }; 9577 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 9578 gen_helper_neon_unarrow_sat8, 9579 gen_helper_neon_unarrow_sat16, 9580 gen_helper_neon_unarrow_sat32, 9581 }; 9582 if (u) { 9583 genenvfn = sqxtunfns[size]; 9584 } else { 9585 genfn = xtnfns[size]; 9586 } 9587 break; 9588 } 9589 case 0x14: /* SQXTN, UQXTN */ 9590 { 9591 static NeonGenNarrowEnvFn * const fns[3][2] = { 9592 { gen_helper_neon_narrow_sat_s8, 9593 gen_helper_neon_narrow_sat_u8 }, 9594 { gen_helper_neon_narrow_sat_s16, 9595 gen_helper_neon_narrow_sat_u16 }, 9596 { gen_helper_neon_narrow_sat_s32, 9597 gen_helper_neon_narrow_sat_u32 }, 9598 }; 9599 genenvfn = fns[size][u]; 9600 break; 9601 } 9602 case 0x16: /* FCVTN, FCVTN2 */ 9603 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 9604 if (size == 2) { 9605 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); 9606 } else { 9607 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9608 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9609 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9610 TCGv_i32 ahp = get_ahp_flag(); 9611 9612 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 9613 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9614 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9615 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 9616 } 9617 break; 9618 case 0x36: /* BFCVTN, BFCVTN2 */ 9619 { 9620 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9621 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 9622 } 9623 break; 9624 case 0x56: /* FCVTXN, FCVTXN2 */ 9625 /* 64 bit to 32 bit float conversion 9626 * with von Neumann rounding (round to odd) 9627 */ 9628 assert(size == 2); 9629 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); 9630 break; 9631 default: 9632 g_assert_not_reached(); 9633 } 9634 9635 if (genfn) { 9636 genfn(tcg_res[pass], tcg_op); 9637 } else if (genenvfn) { 9638 genenvfn(tcg_res[pass], cpu_env, tcg_op); 9639 } 9640 } 9641 9642 for (pass = 0; pass < 2; pass++) { 9643 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 9644 } 9645 clear_vec_high(s, is_q, rd); 9646 } 9647 9648 /* Remaining saturating accumulating ops */ 9649 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 9650 bool is_q, int size, int rn, int rd) 9651 { 9652 bool is_double = (size == 3); 9653 9654 if (is_double) { 9655 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 9656 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9657 int pass; 9658 9659 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9660 read_vec_element(s, tcg_rn, rn, pass, MO_64); 9661 read_vec_element(s, tcg_rd, rd, pass, MO_64); 9662 9663 if (is_u) { /* USQADD */ 9664 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9665 } else { /* SUQADD */ 9666 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9667 } 9668 write_vec_element(s, tcg_rd, rd, pass, MO_64); 9669 } 9670 clear_vec_high(s, !is_scalar, rd); 9671 } else { 9672 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9673 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9674 int pass, maxpasses; 9675 9676 if (is_scalar) { 9677 maxpasses = 1; 9678 } else { 9679 maxpasses = is_q ? 4 : 2; 9680 } 9681 9682 for (pass = 0; pass < maxpasses; pass++) { 9683 if (is_scalar) { 9684 read_vec_element_i32(s, tcg_rn, rn, pass, size); 9685 read_vec_element_i32(s, tcg_rd, rd, pass, size); 9686 } else { 9687 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 9688 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 9689 } 9690 9691 if (is_u) { /* USQADD */ 9692 switch (size) { 9693 case 0: 9694 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9695 break; 9696 case 1: 9697 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9698 break; 9699 case 2: 9700 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9701 break; 9702 default: 9703 g_assert_not_reached(); 9704 } 9705 } else { /* SUQADD */ 9706 switch (size) { 9707 case 0: 9708 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9709 break; 9710 case 1: 9711 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9712 break; 9713 case 2: 9714 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 9715 break; 9716 default: 9717 g_assert_not_reached(); 9718 } 9719 } 9720 9721 if (is_scalar) { 9722 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 9723 } 9724 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 9725 } 9726 clear_vec_high(s, is_q, rd); 9727 } 9728 } 9729 9730 /* AdvSIMD scalar two reg misc 9731 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 9732 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 9733 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 9734 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 9735 */ 9736 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 9737 { 9738 int rd = extract32(insn, 0, 5); 9739 int rn = extract32(insn, 5, 5); 9740 int opcode = extract32(insn, 12, 5); 9741 int size = extract32(insn, 22, 2); 9742 bool u = extract32(insn, 29, 1); 9743 bool is_fcvt = false; 9744 int rmode; 9745 TCGv_i32 tcg_rmode; 9746 TCGv_ptr tcg_fpstatus; 9747 9748 switch (opcode) { 9749 case 0x3: /* USQADD / SUQADD*/ 9750 if (!fp_access_check(s)) { 9751 return; 9752 } 9753 handle_2misc_satacc(s, true, u, false, size, rn, rd); 9754 return; 9755 case 0x7: /* SQABS / SQNEG */ 9756 break; 9757 case 0xa: /* CMLT */ 9758 if (u) { 9759 unallocated_encoding(s); 9760 return; 9761 } 9762 /* fall through */ 9763 case 0x8: /* CMGT, CMGE */ 9764 case 0x9: /* CMEQ, CMLE */ 9765 case 0xb: /* ABS, NEG */ 9766 if (size != 3) { 9767 unallocated_encoding(s); 9768 return; 9769 } 9770 break; 9771 case 0x12: /* SQXTUN */ 9772 if (!u) { 9773 unallocated_encoding(s); 9774 return; 9775 } 9776 /* fall through */ 9777 case 0x14: /* SQXTN, UQXTN */ 9778 if (size == 3) { 9779 unallocated_encoding(s); 9780 return; 9781 } 9782 if (!fp_access_check(s)) { 9783 return; 9784 } 9785 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 9786 return; 9787 case 0xc ... 0xf: 9788 case 0x16 ... 0x1d: 9789 case 0x1f: 9790 /* Floating point: U, size[1] and opcode indicate operation; 9791 * size[0] indicates single or double precision. 9792 */ 9793 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 9794 size = extract32(size, 0, 1) ? 3 : 2; 9795 switch (opcode) { 9796 case 0x2c: /* FCMGT (zero) */ 9797 case 0x2d: /* FCMEQ (zero) */ 9798 case 0x2e: /* FCMLT (zero) */ 9799 case 0x6c: /* FCMGE (zero) */ 9800 case 0x6d: /* FCMLE (zero) */ 9801 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 9802 return; 9803 case 0x1d: /* SCVTF */ 9804 case 0x5d: /* UCVTF */ 9805 { 9806 bool is_signed = (opcode == 0x1d); 9807 if (!fp_access_check(s)) { 9808 return; 9809 } 9810 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 9811 return; 9812 } 9813 case 0x3d: /* FRECPE */ 9814 case 0x3f: /* FRECPX */ 9815 case 0x7d: /* FRSQRTE */ 9816 if (!fp_access_check(s)) { 9817 return; 9818 } 9819 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 9820 return; 9821 case 0x1a: /* FCVTNS */ 9822 case 0x1b: /* FCVTMS */ 9823 case 0x3a: /* FCVTPS */ 9824 case 0x3b: /* FCVTZS */ 9825 case 0x5a: /* FCVTNU */ 9826 case 0x5b: /* FCVTMU */ 9827 case 0x7a: /* FCVTPU */ 9828 case 0x7b: /* FCVTZU */ 9829 is_fcvt = true; 9830 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 9831 break; 9832 case 0x1c: /* FCVTAS */ 9833 case 0x5c: /* FCVTAU */ 9834 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 9835 is_fcvt = true; 9836 rmode = FPROUNDING_TIEAWAY; 9837 break; 9838 case 0x56: /* FCVTXN, FCVTXN2 */ 9839 if (size == 2) { 9840 unallocated_encoding(s); 9841 return; 9842 } 9843 if (!fp_access_check(s)) { 9844 return; 9845 } 9846 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 9847 return; 9848 default: 9849 unallocated_encoding(s); 9850 return; 9851 } 9852 break; 9853 default: 9854 unallocated_encoding(s); 9855 return; 9856 } 9857 9858 if (!fp_access_check(s)) { 9859 return; 9860 } 9861 9862 if (is_fcvt) { 9863 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 9864 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9865 } else { 9866 tcg_fpstatus = NULL; 9867 tcg_rmode = NULL; 9868 } 9869 9870 if (size == 3) { 9871 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9872 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9873 9874 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 9875 write_fp_dreg(s, rd, tcg_rd); 9876 } else { 9877 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9878 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9879 9880 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9881 9882 switch (opcode) { 9883 case 0x7: /* SQABS, SQNEG */ 9884 { 9885 NeonGenOneOpEnvFn *genfn; 9886 static NeonGenOneOpEnvFn * const fns[3][2] = { 9887 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 9888 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 9889 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 9890 }; 9891 genfn = fns[size][u]; 9892 genfn(tcg_rd, cpu_env, tcg_rn); 9893 break; 9894 } 9895 case 0x1a: /* FCVTNS */ 9896 case 0x1b: /* FCVTMS */ 9897 case 0x1c: /* FCVTAS */ 9898 case 0x3a: /* FCVTPS */ 9899 case 0x3b: /* FCVTZS */ 9900 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 9901 tcg_fpstatus); 9902 break; 9903 case 0x5a: /* FCVTNU */ 9904 case 0x5b: /* FCVTMU */ 9905 case 0x5c: /* FCVTAU */ 9906 case 0x7a: /* FCVTPU */ 9907 case 0x7b: /* FCVTZU */ 9908 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 9909 tcg_fpstatus); 9910 break; 9911 default: 9912 g_assert_not_reached(); 9913 } 9914 9915 write_fp_sreg(s, rd, tcg_rd); 9916 } 9917 9918 if (is_fcvt) { 9919 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9920 } 9921 } 9922 9923 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 9924 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 9925 int immh, int immb, int opcode, int rn, int rd) 9926 { 9927 int size = 32 - clz32(immh) - 1; 9928 int immhb = immh << 3 | immb; 9929 int shift = 2 * (8 << size) - immhb; 9930 GVecGen2iFn *gvec_fn; 9931 9932 if (extract32(immh, 3, 1) && !is_q) { 9933 unallocated_encoding(s); 9934 return; 9935 } 9936 tcg_debug_assert(size <= 3); 9937 9938 if (!fp_access_check(s)) { 9939 return; 9940 } 9941 9942 switch (opcode) { 9943 case 0x02: /* SSRA / USRA (accumulate) */ 9944 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 9945 break; 9946 9947 case 0x08: /* SRI */ 9948 gvec_fn = gen_gvec_sri; 9949 break; 9950 9951 case 0x00: /* SSHR / USHR */ 9952 if (is_u) { 9953 if (shift == 8 << size) { 9954 /* Shift count the same size as element size produces zero. */ 9955 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 9956 is_q ? 16 : 8, vec_full_reg_size(s), 0); 9957 return; 9958 } 9959 gvec_fn = tcg_gen_gvec_shri; 9960 } else { 9961 /* Shift count the same size as element size produces all sign. */ 9962 if (shift == 8 << size) { 9963 shift -= 1; 9964 } 9965 gvec_fn = tcg_gen_gvec_sari; 9966 } 9967 break; 9968 9969 case 0x04: /* SRSHR / URSHR (rounding) */ 9970 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 9971 break; 9972 9973 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 9974 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 9975 break; 9976 9977 default: 9978 g_assert_not_reached(); 9979 } 9980 9981 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 9982 } 9983 9984 /* SHL/SLI - Vector shift left */ 9985 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 9986 int immh, int immb, int opcode, int rn, int rd) 9987 { 9988 int size = 32 - clz32(immh) - 1; 9989 int immhb = immh << 3 | immb; 9990 int shift = immhb - (8 << size); 9991 9992 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 9993 assert(size >= 0 && size <= 3); 9994 9995 if (extract32(immh, 3, 1) && !is_q) { 9996 unallocated_encoding(s); 9997 return; 9998 } 9999 10000 if (!fp_access_check(s)) { 10001 return; 10002 } 10003 10004 if (insert) { 10005 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10006 } else { 10007 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10008 } 10009 } 10010 10011 /* USHLL/SHLL - Vector shift left with widening */ 10012 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10013 int immh, int immb, int opcode, int rn, int rd) 10014 { 10015 int size = 32 - clz32(immh) - 1; 10016 int immhb = immh << 3 | immb; 10017 int shift = immhb - (8 << size); 10018 int dsize = 64; 10019 int esize = 8 << size; 10020 int elements = dsize/esize; 10021 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10022 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10023 int i; 10024 10025 if (size >= 3) { 10026 unallocated_encoding(s); 10027 return; 10028 } 10029 10030 if (!fp_access_check(s)) { 10031 return; 10032 } 10033 10034 /* For the LL variants the store is larger than the load, 10035 * so if rd == rn we would overwrite parts of our input. 10036 * So load everything right now and use shifts in the main loop. 10037 */ 10038 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10039 10040 for (i = 0; i < elements; i++) { 10041 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10042 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10043 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10044 write_vec_element(s, tcg_rd, rd, i, size + 1); 10045 } 10046 } 10047 10048 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10049 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10050 int immh, int immb, int opcode, int rn, int rd) 10051 { 10052 int immhb = immh << 3 | immb; 10053 int size = 32 - clz32(immh) - 1; 10054 int dsize = 64; 10055 int esize = 8 << size; 10056 int elements = dsize/esize; 10057 int shift = (2 * esize) - immhb; 10058 bool round = extract32(opcode, 0, 1); 10059 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10060 TCGv_i64 tcg_round; 10061 int i; 10062 10063 if (extract32(immh, 3, 1)) { 10064 unallocated_encoding(s); 10065 return; 10066 } 10067 10068 if (!fp_access_check(s)) { 10069 return; 10070 } 10071 10072 tcg_rn = tcg_temp_new_i64(); 10073 tcg_rd = tcg_temp_new_i64(); 10074 tcg_final = tcg_temp_new_i64(); 10075 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10076 10077 if (round) { 10078 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10079 } else { 10080 tcg_round = NULL; 10081 } 10082 10083 for (i = 0; i < elements; i++) { 10084 read_vec_element(s, tcg_rn, rn, i, size+1); 10085 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10086 false, true, size+1, shift); 10087 10088 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10089 } 10090 10091 if (!is_q) { 10092 write_vec_element(s, tcg_final, rd, 0, MO_64); 10093 } else { 10094 write_vec_element(s, tcg_final, rd, 1, MO_64); 10095 } 10096 10097 clear_vec_high(s, is_q, rd); 10098 } 10099 10100 10101 /* AdvSIMD shift by immediate 10102 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10103 * +---+---+---+-------------+------+------+--------+---+------+------+ 10104 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10105 * +---+---+---+-------------+------+------+--------+---+------+------+ 10106 */ 10107 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10108 { 10109 int rd = extract32(insn, 0, 5); 10110 int rn = extract32(insn, 5, 5); 10111 int opcode = extract32(insn, 11, 5); 10112 int immb = extract32(insn, 16, 3); 10113 int immh = extract32(insn, 19, 4); 10114 bool is_u = extract32(insn, 29, 1); 10115 bool is_q = extract32(insn, 30, 1); 10116 10117 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10118 assert(immh != 0); 10119 10120 switch (opcode) { 10121 case 0x08: /* SRI */ 10122 if (!is_u) { 10123 unallocated_encoding(s); 10124 return; 10125 } 10126 /* fall through */ 10127 case 0x00: /* SSHR / USHR */ 10128 case 0x02: /* SSRA / USRA (accumulate) */ 10129 case 0x04: /* SRSHR / URSHR (rounding) */ 10130 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10131 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10132 break; 10133 case 0x0a: /* SHL / SLI */ 10134 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10135 break; 10136 case 0x10: /* SHRN */ 10137 case 0x11: /* RSHRN / SQRSHRUN */ 10138 if (is_u) { 10139 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10140 opcode, rn, rd); 10141 } else { 10142 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10143 } 10144 break; 10145 case 0x12: /* SQSHRN / UQSHRN */ 10146 case 0x13: /* SQRSHRN / UQRSHRN */ 10147 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10148 opcode, rn, rd); 10149 break; 10150 case 0x14: /* SSHLL / USHLL */ 10151 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10152 break; 10153 case 0x1c: /* SCVTF / UCVTF */ 10154 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10155 opcode, rn, rd); 10156 break; 10157 case 0xc: /* SQSHLU */ 10158 if (!is_u) { 10159 unallocated_encoding(s); 10160 return; 10161 } 10162 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10163 break; 10164 case 0xe: /* SQSHL, UQSHL */ 10165 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10166 break; 10167 case 0x1f: /* FCVTZS/ FCVTZU */ 10168 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10169 return; 10170 default: 10171 unallocated_encoding(s); 10172 return; 10173 } 10174 } 10175 10176 /* Generate code to do a "long" addition or subtraction, ie one done in 10177 * TCGv_i64 on vector lanes twice the width specified by size. 10178 */ 10179 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10180 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10181 { 10182 static NeonGenTwo64OpFn * const fns[3][2] = { 10183 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10184 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10185 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10186 }; 10187 NeonGenTwo64OpFn *genfn; 10188 assert(size < 3); 10189 10190 genfn = fns[size][is_sub]; 10191 genfn(tcg_res, tcg_op1, tcg_op2); 10192 } 10193 10194 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10195 int opcode, int rd, int rn, int rm) 10196 { 10197 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10198 TCGv_i64 tcg_res[2]; 10199 int pass, accop; 10200 10201 tcg_res[0] = tcg_temp_new_i64(); 10202 tcg_res[1] = tcg_temp_new_i64(); 10203 10204 /* Does this op do an adding accumulate, a subtracting accumulate, 10205 * or no accumulate at all? 10206 */ 10207 switch (opcode) { 10208 case 5: 10209 case 8: 10210 case 9: 10211 accop = 1; 10212 break; 10213 case 10: 10214 case 11: 10215 accop = -1; 10216 break; 10217 default: 10218 accop = 0; 10219 break; 10220 } 10221 10222 if (accop != 0) { 10223 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10224 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10225 } 10226 10227 /* size == 2 means two 32x32->64 operations; this is worth special 10228 * casing because we can generally handle it inline. 10229 */ 10230 if (size == 2) { 10231 for (pass = 0; pass < 2; pass++) { 10232 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10233 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10234 TCGv_i64 tcg_passres; 10235 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10236 10237 int elt = pass + is_q * 2; 10238 10239 read_vec_element(s, tcg_op1, rn, elt, memop); 10240 read_vec_element(s, tcg_op2, rm, elt, memop); 10241 10242 if (accop == 0) { 10243 tcg_passres = tcg_res[pass]; 10244 } else { 10245 tcg_passres = tcg_temp_new_i64(); 10246 } 10247 10248 switch (opcode) { 10249 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10250 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10251 break; 10252 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10253 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10254 break; 10255 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10256 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10257 { 10258 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10259 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10260 10261 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10262 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10263 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10264 tcg_passres, 10265 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10266 break; 10267 } 10268 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10269 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10270 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10271 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10272 break; 10273 case 9: /* SQDMLAL, SQDMLAL2 */ 10274 case 11: /* SQDMLSL, SQDMLSL2 */ 10275 case 13: /* SQDMULL, SQDMULL2 */ 10276 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10277 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 10278 tcg_passres, tcg_passres); 10279 break; 10280 default: 10281 g_assert_not_reached(); 10282 } 10283 10284 if (opcode == 9 || opcode == 11) { 10285 /* saturating accumulate ops */ 10286 if (accop < 0) { 10287 tcg_gen_neg_i64(tcg_passres, tcg_passres); 10288 } 10289 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 10290 tcg_res[pass], tcg_passres); 10291 } else if (accop > 0) { 10292 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10293 } else if (accop < 0) { 10294 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10295 } 10296 } 10297 } else { 10298 /* size 0 or 1, generally helper functions */ 10299 for (pass = 0; pass < 2; pass++) { 10300 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10301 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10302 TCGv_i64 tcg_passres; 10303 int elt = pass + is_q * 2; 10304 10305 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 10306 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 10307 10308 if (accop == 0) { 10309 tcg_passres = tcg_res[pass]; 10310 } else { 10311 tcg_passres = tcg_temp_new_i64(); 10312 } 10313 10314 switch (opcode) { 10315 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10316 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10317 { 10318 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 10319 static NeonGenWidenFn * const widenfns[2][2] = { 10320 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10321 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10322 }; 10323 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10324 10325 widenfn(tcg_op2_64, tcg_op2); 10326 widenfn(tcg_passres, tcg_op1); 10327 gen_neon_addl(size, (opcode == 2), tcg_passres, 10328 tcg_passres, tcg_op2_64); 10329 break; 10330 } 10331 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10332 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10333 if (size == 0) { 10334 if (is_u) { 10335 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 10336 } else { 10337 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 10338 } 10339 } else { 10340 if (is_u) { 10341 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 10342 } else { 10343 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 10344 } 10345 } 10346 break; 10347 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10348 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10349 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10350 if (size == 0) { 10351 if (is_u) { 10352 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 10353 } else { 10354 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 10355 } 10356 } else { 10357 if (is_u) { 10358 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 10359 } else { 10360 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10361 } 10362 } 10363 break; 10364 case 9: /* SQDMLAL, SQDMLAL2 */ 10365 case 11: /* SQDMLSL, SQDMLSL2 */ 10366 case 13: /* SQDMULL, SQDMULL2 */ 10367 assert(size == 1); 10368 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10369 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 10370 tcg_passres, tcg_passres); 10371 break; 10372 default: 10373 g_assert_not_reached(); 10374 } 10375 10376 if (accop != 0) { 10377 if (opcode == 9 || opcode == 11) { 10378 /* saturating accumulate ops */ 10379 if (accop < 0) { 10380 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 10381 } 10382 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 10383 tcg_res[pass], 10384 tcg_passres); 10385 } else { 10386 gen_neon_addl(size, (accop < 0), tcg_res[pass], 10387 tcg_res[pass], tcg_passres); 10388 } 10389 } 10390 } 10391 } 10392 10393 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 10394 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 10395 } 10396 10397 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 10398 int opcode, int rd, int rn, int rm) 10399 { 10400 TCGv_i64 tcg_res[2]; 10401 int part = is_q ? 2 : 0; 10402 int pass; 10403 10404 for (pass = 0; pass < 2; pass++) { 10405 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10406 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10407 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 10408 static NeonGenWidenFn * const widenfns[3][2] = { 10409 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10410 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10411 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 10412 }; 10413 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10414 10415 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10416 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 10417 widenfn(tcg_op2_wide, tcg_op2); 10418 tcg_res[pass] = tcg_temp_new_i64(); 10419 gen_neon_addl(size, (opcode == 3), 10420 tcg_res[pass], tcg_op1, tcg_op2_wide); 10421 } 10422 10423 for (pass = 0; pass < 2; pass++) { 10424 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10425 } 10426 } 10427 10428 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 10429 { 10430 tcg_gen_addi_i64(in, in, 1U << 31); 10431 tcg_gen_extrh_i64_i32(res, in); 10432 } 10433 10434 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 10435 int opcode, int rd, int rn, int rm) 10436 { 10437 TCGv_i32 tcg_res[2]; 10438 int part = is_q ? 2 : 0; 10439 int pass; 10440 10441 for (pass = 0; pass < 2; pass++) { 10442 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10443 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10444 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 10445 static NeonGenNarrowFn * const narrowfns[3][2] = { 10446 { gen_helper_neon_narrow_high_u8, 10447 gen_helper_neon_narrow_round_high_u8 }, 10448 { gen_helper_neon_narrow_high_u16, 10449 gen_helper_neon_narrow_round_high_u16 }, 10450 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 10451 }; 10452 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 10453 10454 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10455 read_vec_element(s, tcg_op2, rm, pass, MO_64); 10456 10457 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 10458 10459 tcg_res[pass] = tcg_temp_new_i32(); 10460 gennarrow(tcg_res[pass], tcg_wideres); 10461 } 10462 10463 for (pass = 0; pass < 2; pass++) { 10464 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 10465 } 10466 clear_vec_high(s, is_q, rd); 10467 } 10468 10469 /* AdvSIMD three different 10470 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 10471 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10472 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 10473 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10474 */ 10475 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 10476 { 10477 /* Instructions in this group fall into three basic classes 10478 * (in each case with the operation working on each element in 10479 * the input vectors): 10480 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 10481 * 128 bit input) 10482 * (2) wide 64 x 128 -> 128 10483 * (3) narrowing 128 x 128 -> 64 10484 * Here we do initial decode, catch unallocated cases and 10485 * dispatch to separate functions for each class. 10486 */ 10487 int is_q = extract32(insn, 30, 1); 10488 int is_u = extract32(insn, 29, 1); 10489 int size = extract32(insn, 22, 2); 10490 int opcode = extract32(insn, 12, 4); 10491 int rm = extract32(insn, 16, 5); 10492 int rn = extract32(insn, 5, 5); 10493 int rd = extract32(insn, 0, 5); 10494 10495 switch (opcode) { 10496 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 10497 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 10498 /* 64 x 128 -> 128 */ 10499 if (size == 3) { 10500 unallocated_encoding(s); 10501 return; 10502 } 10503 if (!fp_access_check(s)) { 10504 return; 10505 } 10506 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 10507 break; 10508 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 10509 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 10510 /* 128 x 128 -> 64 */ 10511 if (size == 3) { 10512 unallocated_encoding(s); 10513 return; 10514 } 10515 if (!fp_access_check(s)) { 10516 return; 10517 } 10518 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 10519 break; 10520 case 14: /* PMULL, PMULL2 */ 10521 if (is_u) { 10522 unallocated_encoding(s); 10523 return; 10524 } 10525 switch (size) { 10526 case 0: /* PMULL.P8 */ 10527 if (!fp_access_check(s)) { 10528 return; 10529 } 10530 /* The Q field specifies lo/hi half input for this insn. */ 10531 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10532 gen_helper_neon_pmull_h); 10533 break; 10534 10535 case 3: /* PMULL.P64 */ 10536 if (!dc_isar_feature(aa64_pmull, s)) { 10537 unallocated_encoding(s); 10538 return; 10539 } 10540 if (!fp_access_check(s)) { 10541 return; 10542 } 10543 /* The Q field specifies lo/hi half input for this insn. */ 10544 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10545 gen_helper_gvec_pmull_q); 10546 break; 10547 10548 default: 10549 unallocated_encoding(s); 10550 break; 10551 } 10552 return; 10553 case 9: /* SQDMLAL, SQDMLAL2 */ 10554 case 11: /* SQDMLSL, SQDMLSL2 */ 10555 case 13: /* SQDMULL, SQDMULL2 */ 10556 if (is_u || size == 0) { 10557 unallocated_encoding(s); 10558 return; 10559 } 10560 /* fall through */ 10561 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10562 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10563 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10564 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10565 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10566 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10567 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 10568 /* 64 x 64 -> 128 */ 10569 if (size == 3) { 10570 unallocated_encoding(s); 10571 return; 10572 } 10573 if (!fp_access_check(s)) { 10574 return; 10575 } 10576 10577 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 10578 break; 10579 default: 10580 /* opcode 15 not allocated */ 10581 unallocated_encoding(s); 10582 break; 10583 } 10584 } 10585 10586 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 10587 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 10588 { 10589 int rd = extract32(insn, 0, 5); 10590 int rn = extract32(insn, 5, 5); 10591 int rm = extract32(insn, 16, 5); 10592 int size = extract32(insn, 22, 2); 10593 bool is_u = extract32(insn, 29, 1); 10594 bool is_q = extract32(insn, 30, 1); 10595 10596 if (!fp_access_check(s)) { 10597 return; 10598 } 10599 10600 switch (size + 4 * is_u) { 10601 case 0: /* AND */ 10602 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 10603 return; 10604 case 1: /* BIC */ 10605 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 10606 return; 10607 case 2: /* ORR */ 10608 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 10609 return; 10610 case 3: /* ORN */ 10611 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 10612 return; 10613 case 4: /* EOR */ 10614 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 10615 return; 10616 10617 case 5: /* BSL bitwise select */ 10618 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 10619 return; 10620 case 6: /* BIT, bitwise insert if true */ 10621 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 10622 return; 10623 case 7: /* BIF, bitwise insert if false */ 10624 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 10625 return; 10626 10627 default: 10628 g_assert_not_reached(); 10629 } 10630 } 10631 10632 /* Pairwise op subgroup of C3.6.16. 10633 * 10634 * This is called directly or via the handle_3same_float for float pairwise 10635 * operations where the opcode and size are calculated differently. 10636 */ 10637 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 10638 int size, int rn, int rm, int rd) 10639 { 10640 TCGv_ptr fpst; 10641 int pass; 10642 10643 /* Floating point operations need fpst */ 10644 if (opcode >= 0x58) { 10645 fpst = fpstatus_ptr(FPST_FPCR); 10646 } else { 10647 fpst = NULL; 10648 } 10649 10650 if (!fp_access_check(s)) { 10651 return; 10652 } 10653 10654 /* These operations work on the concatenated rm:rn, with each pair of 10655 * adjacent elements being operated on to produce an element in the result. 10656 */ 10657 if (size == 3) { 10658 TCGv_i64 tcg_res[2]; 10659 10660 for (pass = 0; pass < 2; pass++) { 10661 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10662 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10663 int passreg = (pass == 0) ? rn : rm; 10664 10665 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 10666 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 10667 tcg_res[pass] = tcg_temp_new_i64(); 10668 10669 switch (opcode) { 10670 case 0x17: /* ADDP */ 10671 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 10672 break; 10673 case 0x58: /* FMAXNMP */ 10674 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10675 break; 10676 case 0x5a: /* FADDP */ 10677 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10678 break; 10679 case 0x5e: /* FMAXP */ 10680 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10681 break; 10682 case 0x78: /* FMINNMP */ 10683 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10684 break; 10685 case 0x7e: /* FMINP */ 10686 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10687 break; 10688 default: 10689 g_assert_not_reached(); 10690 } 10691 } 10692 10693 for (pass = 0; pass < 2; pass++) { 10694 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10695 } 10696 } else { 10697 int maxpass = is_q ? 4 : 2; 10698 TCGv_i32 tcg_res[4]; 10699 10700 for (pass = 0; pass < maxpass; pass++) { 10701 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10702 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10703 NeonGenTwoOpFn *genfn = NULL; 10704 int passreg = pass < (maxpass / 2) ? rn : rm; 10705 int passelt = (is_q && (pass & 1)) ? 2 : 0; 10706 10707 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 10708 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 10709 tcg_res[pass] = tcg_temp_new_i32(); 10710 10711 switch (opcode) { 10712 case 0x17: /* ADDP */ 10713 { 10714 static NeonGenTwoOpFn * const fns[3] = { 10715 gen_helper_neon_padd_u8, 10716 gen_helper_neon_padd_u16, 10717 tcg_gen_add_i32, 10718 }; 10719 genfn = fns[size]; 10720 break; 10721 } 10722 case 0x14: /* SMAXP, UMAXP */ 10723 { 10724 static NeonGenTwoOpFn * const fns[3][2] = { 10725 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 10726 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 10727 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 10728 }; 10729 genfn = fns[size][u]; 10730 break; 10731 } 10732 case 0x15: /* SMINP, UMINP */ 10733 { 10734 static NeonGenTwoOpFn * const fns[3][2] = { 10735 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 10736 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 10737 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 10738 }; 10739 genfn = fns[size][u]; 10740 break; 10741 } 10742 /* The FP operations are all on single floats (32 bit) */ 10743 case 0x58: /* FMAXNMP */ 10744 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10745 break; 10746 case 0x5a: /* FADDP */ 10747 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10748 break; 10749 case 0x5e: /* FMAXP */ 10750 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10751 break; 10752 case 0x78: /* FMINNMP */ 10753 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10754 break; 10755 case 0x7e: /* FMINP */ 10756 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 10757 break; 10758 default: 10759 g_assert_not_reached(); 10760 } 10761 10762 /* FP ops called directly, otherwise call now */ 10763 if (genfn) { 10764 genfn(tcg_res[pass], tcg_op1, tcg_op2); 10765 } 10766 } 10767 10768 for (pass = 0; pass < maxpass; pass++) { 10769 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 10770 } 10771 clear_vec_high(s, is_q, rd); 10772 } 10773 } 10774 10775 /* Floating point op subgroup of C3.6.16. */ 10776 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 10777 { 10778 /* For floating point ops, the U, size[1] and opcode bits 10779 * together indicate the operation. size[0] indicates single 10780 * or double. 10781 */ 10782 int fpopcode = extract32(insn, 11, 5) 10783 | (extract32(insn, 23, 1) << 5) 10784 | (extract32(insn, 29, 1) << 6); 10785 int is_q = extract32(insn, 30, 1); 10786 int size = extract32(insn, 22, 1); 10787 int rm = extract32(insn, 16, 5); 10788 int rn = extract32(insn, 5, 5); 10789 int rd = extract32(insn, 0, 5); 10790 10791 int datasize = is_q ? 128 : 64; 10792 int esize = 32 << size; 10793 int elements = datasize / esize; 10794 10795 if (size == 1 && !is_q) { 10796 unallocated_encoding(s); 10797 return; 10798 } 10799 10800 switch (fpopcode) { 10801 case 0x58: /* FMAXNMP */ 10802 case 0x5a: /* FADDP */ 10803 case 0x5e: /* FMAXP */ 10804 case 0x78: /* FMINNMP */ 10805 case 0x7e: /* FMINP */ 10806 if (size && !is_q) { 10807 unallocated_encoding(s); 10808 return; 10809 } 10810 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 10811 rn, rm, rd); 10812 return; 10813 case 0x1b: /* FMULX */ 10814 case 0x1f: /* FRECPS */ 10815 case 0x3f: /* FRSQRTS */ 10816 case 0x5d: /* FACGE */ 10817 case 0x7d: /* FACGT */ 10818 case 0x19: /* FMLA */ 10819 case 0x39: /* FMLS */ 10820 case 0x18: /* FMAXNM */ 10821 case 0x1a: /* FADD */ 10822 case 0x1c: /* FCMEQ */ 10823 case 0x1e: /* FMAX */ 10824 case 0x38: /* FMINNM */ 10825 case 0x3a: /* FSUB */ 10826 case 0x3e: /* FMIN */ 10827 case 0x5b: /* FMUL */ 10828 case 0x5c: /* FCMGE */ 10829 case 0x5f: /* FDIV */ 10830 case 0x7a: /* FABD */ 10831 case 0x7c: /* FCMGT */ 10832 if (!fp_access_check(s)) { 10833 return; 10834 } 10835 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 10836 return; 10837 10838 case 0x1d: /* FMLAL */ 10839 case 0x3d: /* FMLSL */ 10840 case 0x59: /* FMLAL2 */ 10841 case 0x79: /* FMLSL2 */ 10842 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 10843 unallocated_encoding(s); 10844 return; 10845 } 10846 if (fp_access_check(s)) { 10847 int is_s = extract32(insn, 23, 1); 10848 int is_2 = extract32(insn, 29, 1); 10849 int data = (is_2 << 1) | is_s; 10850 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 10851 vec_full_reg_offset(s, rn), 10852 vec_full_reg_offset(s, rm), cpu_env, 10853 is_q ? 16 : 8, vec_full_reg_size(s), 10854 data, gen_helper_gvec_fmlal_a64); 10855 } 10856 return; 10857 10858 default: 10859 unallocated_encoding(s); 10860 return; 10861 } 10862 } 10863 10864 /* Integer op subgroup of C3.6.16. */ 10865 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 10866 { 10867 int is_q = extract32(insn, 30, 1); 10868 int u = extract32(insn, 29, 1); 10869 int size = extract32(insn, 22, 2); 10870 int opcode = extract32(insn, 11, 5); 10871 int rm = extract32(insn, 16, 5); 10872 int rn = extract32(insn, 5, 5); 10873 int rd = extract32(insn, 0, 5); 10874 int pass; 10875 TCGCond cond; 10876 10877 switch (opcode) { 10878 case 0x13: /* MUL, PMUL */ 10879 if (u && size != 0) { 10880 unallocated_encoding(s); 10881 return; 10882 } 10883 /* fall through */ 10884 case 0x0: /* SHADD, UHADD */ 10885 case 0x2: /* SRHADD, URHADD */ 10886 case 0x4: /* SHSUB, UHSUB */ 10887 case 0xc: /* SMAX, UMAX */ 10888 case 0xd: /* SMIN, UMIN */ 10889 case 0xe: /* SABD, UABD */ 10890 case 0xf: /* SABA, UABA */ 10891 case 0x12: /* MLA, MLS */ 10892 if (size == 3) { 10893 unallocated_encoding(s); 10894 return; 10895 } 10896 break; 10897 case 0x16: /* SQDMULH, SQRDMULH */ 10898 if (size == 0 || size == 3) { 10899 unallocated_encoding(s); 10900 return; 10901 } 10902 break; 10903 default: 10904 if (size == 3 && !is_q) { 10905 unallocated_encoding(s); 10906 return; 10907 } 10908 break; 10909 } 10910 10911 if (!fp_access_check(s)) { 10912 return; 10913 } 10914 10915 switch (opcode) { 10916 case 0x01: /* SQADD, UQADD */ 10917 if (u) { 10918 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 10919 } else { 10920 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 10921 } 10922 return; 10923 case 0x05: /* SQSUB, UQSUB */ 10924 if (u) { 10925 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 10926 } else { 10927 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 10928 } 10929 return; 10930 case 0x08: /* SSHL, USHL */ 10931 if (u) { 10932 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 10933 } else { 10934 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 10935 } 10936 return; 10937 case 0x0c: /* SMAX, UMAX */ 10938 if (u) { 10939 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 10940 } else { 10941 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 10942 } 10943 return; 10944 case 0x0d: /* SMIN, UMIN */ 10945 if (u) { 10946 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 10947 } else { 10948 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 10949 } 10950 return; 10951 case 0xe: /* SABD, UABD */ 10952 if (u) { 10953 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 10954 } else { 10955 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 10956 } 10957 return; 10958 case 0xf: /* SABA, UABA */ 10959 if (u) { 10960 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 10961 } else { 10962 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 10963 } 10964 return; 10965 case 0x10: /* ADD, SUB */ 10966 if (u) { 10967 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 10968 } else { 10969 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 10970 } 10971 return; 10972 case 0x13: /* MUL, PMUL */ 10973 if (!u) { /* MUL */ 10974 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 10975 } else { /* PMUL */ 10976 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 10977 } 10978 return; 10979 case 0x12: /* MLA, MLS */ 10980 if (u) { 10981 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 10982 } else { 10983 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 10984 } 10985 return; 10986 case 0x16: /* SQDMULH, SQRDMULH */ 10987 { 10988 static gen_helper_gvec_3_ptr * const fns[2][2] = { 10989 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 10990 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 10991 }; 10992 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 10993 } 10994 return; 10995 case 0x11: 10996 if (!u) { /* CMTST */ 10997 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 10998 return; 10999 } 11000 /* else CMEQ */ 11001 cond = TCG_COND_EQ; 11002 goto do_gvec_cmp; 11003 case 0x06: /* CMGT, CMHI */ 11004 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11005 goto do_gvec_cmp; 11006 case 0x07: /* CMGE, CMHS */ 11007 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11008 do_gvec_cmp: 11009 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11010 vec_full_reg_offset(s, rn), 11011 vec_full_reg_offset(s, rm), 11012 is_q ? 16 : 8, vec_full_reg_size(s)); 11013 return; 11014 } 11015 11016 if (size == 3) { 11017 assert(is_q); 11018 for (pass = 0; pass < 2; pass++) { 11019 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11020 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11021 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11022 11023 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11024 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11025 11026 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11027 11028 write_vec_element(s, tcg_res, rd, pass, MO_64); 11029 } 11030 } else { 11031 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11032 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11033 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11034 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11035 NeonGenTwoOpFn *genfn = NULL; 11036 NeonGenTwoOpEnvFn *genenvfn = NULL; 11037 11038 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11039 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11040 11041 switch (opcode) { 11042 case 0x0: /* SHADD, UHADD */ 11043 { 11044 static NeonGenTwoOpFn * const fns[3][2] = { 11045 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11046 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11047 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11048 }; 11049 genfn = fns[size][u]; 11050 break; 11051 } 11052 case 0x2: /* SRHADD, URHADD */ 11053 { 11054 static NeonGenTwoOpFn * const fns[3][2] = { 11055 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11056 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11057 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11058 }; 11059 genfn = fns[size][u]; 11060 break; 11061 } 11062 case 0x4: /* SHSUB, UHSUB */ 11063 { 11064 static NeonGenTwoOpFn * const fns[3][2] = { 11065 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11066 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11067 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11068 }; 11069 genfn = fns[size][u]; 11070 break; 11071 } 11072 case 0x9: /* SQSHL, UQSHL */ 11073 { 11074 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11075 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11076 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11077 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11078 }; 11079 genenvfn = fns[size][u]; 11080 break; 11081 } 11082 case 0xa: /* SRSHL, URSHL */ 11083 { 11084 static NeonGenTwoOpFn * const fns[3][2] = { 11085 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11086 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11087 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11088 }; 11089 genfn = fns[size][u]; 11090 break; 11091 } 11092 case 0xb: /* SQRSHL, UQRSHL */ 11093 { 11094 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11095 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11096 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11097 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11098 }; 11099 genenvfn = fns[size][u]; 11100 break; 11101 } 11102 default: 11103 g_assert_not_reached(); 11104 } 11105 11106 if (genenvfn) { 11107 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); 11108 } else { 11109 genfn(tcg_res, tcg_op1, tcg_op2); 11110 } 11111 11112 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11113 } 11114 } 11115 clear_vec_high(s, is_q, rd); 11116 } 11117 11118 /* AdvSIMD three same 11119 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11120 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11121 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11122 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11123 */ 11124 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11125 { 11126 int opcode = extract32(insn, 11, 5); 11127 11128 switch (opcode) { 11129 case 0x3: /* logic ops */ 11130 disas_simd_3same_logic(s, insn); 11131 break; 11132 case 0x17: /* ADDP */ 11133 case 0x14: /* SMAXP, UMAXP */ 11134 case 0x15: /* SMINP, UMINP */ 11135 { 11136 /* Pairwise operations */ 11137 int is_q = extract32(insn, 30, 1); 11138 int u = extract32(insn, 29, 1); 11139 int size = extract32(insn, 22, 2); 11140 int rm = extract32(insn, 16, 5); 11141 int rn = extract32(insn, 5, 5); 11142 int rd = extract32(insn, 0, 5); 11143 if (opcode == 0x17) { 11144 if (u || (size == 3 && !is_q)) { 11145 unallocated_encoding(s); 11146 return; 11147 } 11148 } else { 11149 if (size == 3) { 11150 unallocated_encoding(s); 11151 return; 11152 } 11153 } 11154 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11155 break; 11156 } 11157 case 0x18 ... 0x31: 11158 /* floating point ops, sz[1] and U are part of opcode */ 11159 disas_simd_3same_float(s, insn); 11160 break; 11161 default: 11162 disas_simd_3same_int(s, insn); 11163 break; 11164 } 11165 } 11166 11167 /* 11168 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11169 * 11170 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11171 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11172 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11173 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11174 * 11175 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11176 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11177 * 11178 */ 11179 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11180 { 11181 int opcode = extract32(insn, 11, 3); 11182 int u = extract32(insn, 29, 1); 11183 int a = extract32(insn, 23, 1); 11184 int is_q = extract32(insn, 30, 1); 11185 int rm = extract32(insn, 16, 5); 11186 int rn = extract32(insn, 5, 5); 11187 int rd = extract32(insn, 0, 5); 11188 /* 11189 * For these floating point ops, the U, a and opcode bits 11190 * together indicate the operation. 11191 */ 11192 int fpopcode = opcode | (a << 3) | (u << 4); 11193 int datasize = is_q ? 128 : 64; 11194 int elements = datasize / 16; 11195 bool pairwise; 11196 TCGv_ptr fpst; 11197 int pass; 11198 11199 switch (fpopcode) { 11200 case 0x0: /* FMAXNM */ 11201 case 0x1: /* FMLA */ 11202 case 0x2: /* FADD */ 11203 case 0x3: /* FMULX */ 11204 case 0x4: /* FCMEQ */ 11205 case 0x6: /* FMAX */ 11206 case 0x7: /* FRECPS */ 11207 case 0x8: /* FMINNM */ 11208 case 0x9: /* FMLS */ 11209 case 0xa: /* FSUB */ 11210 case 0xe: /* FMIN */ 11211 case 0xf: /* FRSQRTS */ 11212 case 0x13: /* FMUL */ 11213 case 0x14: /* FCMGE */ 11214 case 0x15: /* FACGE */ 11215 case 0x17: /* FDIV */ 11216 case 0x1a: /* FABD */ 11217 case 0x1c: /* FCMGT */ 11218 case 0x1d: /* FACGT */ 11219 pairwise = false; 11220 break; 11221 case 0x10: /* FMAXNMP */ 11222 case 0x12: /* FADDP */ 11223 case 0x16: /* FMAXP */ 11224 case 0x18: /* FMINNMP */ 11225 case 0x1e: /* FMINP */ 11226 pairwise = true; 11227 break; 11228 default: 11229 unallocated_encoding(s); 11230 return; 11231 } 11232 11233 if (!dc_isar_feature(aa64_fp16, s)) { 11234 unallocated_encoding(s); 11235 return; 11236 } 11237 11238 if (!fp_access_check(s)) { 11239 return; 11240 } 11241 11242 fpst = fpstatus_ptr(FPST_FPCR_F16); 11243 11244 if (pairwise) { 11245 int maxpass = is_q ? 8 : 4; 11246 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11247 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11248 TCGv_i32 tcg_res[8]; 11249 11250 for (pass = 0; pass < maxpass; pass++) { 11251 int passreg = pass < (maxpass / 2) ? rn : rm; 11252 int passelt = (pass << 1) & (maxpass - 1); 11253 11254 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 11255 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 11256 tcg_res[pass] = tcg_temp_new_i32(); 11257 11258 switch (fpopcode) { 11259 case 0x10: /* FMAXNMP */ 11260 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 11261 fpst); 11262 break; 11263 case 0x12: /* FADDP */ 11264 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11265 break; 11266 case 0x16: /* FMAXP */ 11267 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11268 break; 11269 case 0x18: /* FMINNMP */ 11270 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 11271 fpst); 11272 break; 11273 case 0x1e: /* FMINP */ 11274 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11275 break; 11276 default: 11277 g_assert_not_reached(); 11278 } 11279 } 11280 11281 for (pass = 0; pass < maxpass; pass++) { 11282 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 11283 } 11284 } else { 11285 for (pass = 0; pass < elements; pass++) { 11286 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11287 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11288 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11289 11290 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 11291 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 11292 11293 switch (fpopcode) { 11294 case 0x0: /* FMAXNM */ 11295 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11296 break; 11297 case 0x1: /* FMLA */ 11298 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11299 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11300 fpst); 11301 break; 11302 case 0x2: /* FADD */ 11303 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 11304 break; 11305 case 0x3: /* FMULX */ 11306 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 11307 break; 11308 case 0x4: /* FCMEQ */ 11309 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11310 break; 11311 case 0x6: /* FMAX */ 11312 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 11313 break; 11314 case 0x7: /* FRECPS */ 11315 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11316 break; 11317 case 0x8: /* FMINNM */ 11318 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11319 break; 11320 case 0x9: /* FMLS */ 11321 /* As usual for ARM, separate negation for fused multiply-add */ 11322 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 11323 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11324 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11325 fpst); 11326 break; 11327 case 0xa: /* FSUB */ 11328 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11329 break; 11330 case 0xe: /* FMIN */ 11331 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 11332 break; 11333 case 0xf: /* FRSQRTS */ 11334 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11335 break; 11336 case 0x13: /* FMUL */ 11337 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 11338 break; 11339 case 0x14: /* FCMGE */ 11340 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11341 break; 11342 case 0x15: /* FACGE */ 11343 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11344 break; 11345 case 0x17: /* FDIV */ 11346 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 11347 break; 11348 case 0x1a: /* FABD */ 11349 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11350 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 11351 break; 11352 case 0x1c: /* FCMGT */ 11353 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11354 break; 11355 case 0x1d: /* FACGT */ 11356 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11357 break; 11358 default: 11359 g_assert_not_reached(); 11360 } 11361 11362 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11363 } 11364 } 11365 11366 clear_vec_high(s, is_q, rd); 11367 } 11368 11369 /* AdvSIMD three same extra 11370 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 11371 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11372 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 11373 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11374 */ 11375 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 11376 { 11377 int rd = extract32(insn, 0, 5); 11378 int rn = extract32(insn, 5, 5); 11379 int opcode = extract32(insn, 11, 4); 11380 int rm = extract32(insn, 16, 5); 11381 int size = extract32(insn, 22, 2); 11382 bool u = extract32(insn, 29, 1); 11383 bool is_q = extract32(insn, 30, 1); 11384 bool feature; 11385 int rot; 11386 11387 switch (u * 16 + opcode) { 11388 case 0x10: /* SQRDMLAH (vector) */ 11389 case 0x11: /* SQRDMLSH (vector) */ 11390 if (size != 1 && size != 2) { 11391 unallocated_encoding(s); 11392 return; 11393 } 11394 feature = dc_isar_feature(aa64_rdm, s); 11395 break; 11396 case 0x02: /* SDOT (vector) */ 11397 case 0x12: /* UDOT (vector) */ 11398 if (size != MO_32) { 11399 unallocated_encoding(s); 11400 return; 11401 } 11402 feature = dc_isar_feature(aa64_dp, s); 11403 break; 11404 case 0x03: /* USDOT */ 11405 if (size != MO_32) { 11406 unallocated_encoding(s); 11407 return; 11408 } 11409 feature = dc_isar_feature(aa64_i8mm, s); 11410 break; 11411 case 0x04: /* SMMLA */ 11412 case 0x14: /* UMMLA */ 11413 case 0x05: /* USMMLA */ 11414 if (!is_q || size != MO_32) { 11415 unallocated_encoding(s); 11416 return; 11417 } 11418 feature = dc_isar_feature(aa64_i8mm, s); 11419 break; 11420 case 0x18: /* FCMLA, #0 */ 11421 case 0x19: /* FCMLA, #90 */ 11422 case 0x1a: /* FCMLA, #180 */ 11423 case 0x1b: /* FCMLA, #270 */ 11424 case 0x1c: /* FCADD, #90 */ 11425 case 0x1e: /* FCADD, #270 */ 11426 if (size == 0 11427 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 11428 || (size == 3 && !is_q)) { 11429 unallocated_encoding(s); 11430 return; 11431 } 11432 feature = dc_isar_feature(aa64_fcma, s); 11433 break; 11434 case 0x1d: /* BFMMLA */ 11435 if (size != MO_16 || !is_q) { 11436 unallocated_encoding(s); 11437 return; 11438 } 11439 feature = dc_isar_feature(aa64_bf16, s); 11440 break; 11441 case 0x1f: 11442 switch (size) { 11443 case 1: /* BFDOT */ 11444 case 3: /* BFMLAL{B,T} */ 11445 feature = dc_isar_feature(aa64_bf16, s); 11446 break; 11447 default: 11448 unallocated_encoding(s); 11449 return; 11450 } 11451 break; 11452 default: 11453 unallocated_encoding(s); 11454 return; 11455 } 11456 if (!feature) { 11457 unallocated_encoding(s); 11458 return; 11459 } 11460 if (!fp_access_check(s)) { 11461 return; 11462 } 11463 11464 switch (opcode) { 11465 case 0x0: /* SQRDMLAH (vector) */ 11466 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 11467 return; 11468 11469 case 0x1: /* SQRDMLSH (vector) */ 11470 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 11471 return; 11472 11473 case 0x2: /* SDOT / UDOT */ 11474 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 11475 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 11476 return; 11477 11478 case 0x3: /* USDOT */ 11479 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 11480 return; 11481 11482 case 0x04: /* SMMLA, UMMLA */ 11483 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 11484 u ? gen_helper_gvec_ummla_b 11485 : gen_helper_gvec_smmla_b); 11486 return; 11487 case 0x05: /* USMMLA */ 11488 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 11489 return; 11490 11491 case 0x8: /* FCMLA, #0 */ 11492 case 0x9: /* FCMLA, #90 */ 11493 case 0xa: /* FCMLA, #180 */ 11494 case 0xb: /* FCMLA, #270 */ 11495 rot = extract32(opcode, 0, 2); 11496 switch (size) { 11497 case 1: 11498 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 11499 gen_helper_gvec_fcmlah); 11500 break; 11501 case 2: 11502 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11503 gen_helper_gvec_fcmlas); 11504 break; 11505 case 3: 11506 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11507 gen_helper_gvec_fcmlad); 11508 break; 11509 default: 11510 g_assert_not_reached(); 11511 } 11512 return; 11513 11514 case 0xc: /* FCADD, #90 */ 11515 case 0xe: /* FCADD, #270 */ 11516 rot = extract32(opcode, 1, 1); 11517 switch (size) { 11518 case 1: 11519 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11520 gen_helper_gvec_fcaddh); 11521 break; 11522 case 2: 11523 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11524 gen_helper_gvec_fcadds); 11525 break; 11526 case 3: 11527 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11528 gen_helper_gvec_fcaddd); 11529 break; 11530 default: 11531 g_assert_not_reached(); 11532 } 11533 return; 11534 11535 case 0xd: /* BFMMLA */ 11536 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 11537 return; 11538 case 0xf: 11539 switch (size) { 11540 case 1: /* BFDOT */ 11541 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 11542 break; 11543 case 3: /* BFMLAL{B,T} */ 11544 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 11545 gen_helper_gvec_bfmlal); 11546 break; 11547 default: 11548 g_assert_not_reached(); 11549 } 11550 return; 11551 11552 default: 11553 g_assert_not_reached(); 11554 } 11555 } 11556 11557 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 11558 int size, int rn, int rd) 11559 { 11560 /* Handle 2-reg-misc ops which are widening (so each size element 11561 * in the source becomes a 2*size element in the destination. 11562 * The only instruction like this is FCVTL. 11563 */ 11564 int pass; 11565 11566 if (size == 3) { 11567 /* 32 -> 64 bit fp conversion */ 11568 TCGv_i64 tcg_res[2]; 11569 int srcelt = is_q ? 2 : 0; 11570 11571 for (pass = 0; pass < 2; pass++) { 11572 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11573 tcg_res[pass] = tcg_temp_new_i64(); 11574 11575 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 11576 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); 11577 } 11578 for (pass = 0; pass < 2; pass++) { 11579 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11580 } 11581 } else { 11582 /* 16 -> 32 bit fp conversion */ 11583 int srcelt = is_q ? 4 : 0; 11584 TCGv_i32 tcg_res[4]; 11585 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 11586 TCGv_i32 ahp = get_ahp_flag(); 11587 11588 for (pass = 0; pass < 4; pass++) { 11589 tcg_res[pass] = tcg_temp_new_i32(); 11590 11591 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 11592 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 11593 fpst, ahp); 11594 } 11595 for (pass = 0; pass < 4; pass++) { 11596 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11597 } 11598 } 11599 } 11600 11601 static void handle_rev(DisasContext *s, int opcode, bool u, 11602 bool is_q, int size, int rn, int rd) 11603 { 11604 int op = (opcode << 1) | u; 11605 int opsz = op + size; 11606 int grp_size = 3 - opsz; 11607 int dsize = is_q ? 128 : 64; 11608 int i; 11609 11610 if (opsz >= 3) { 11611 unallocated_encoding(s); 11612 return; 11613 } 11614 11615 if (!fp_access_check(s)) { 11616 return; 11617 } 11618 11619 if (size == 0) { 11620 /* Special case bytes, use bswap op on each group of elements */ 11621 int groups = dsize / (8 << grp_size); 11622 11623 for (i = 0; i < groups; i++) { 11624 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 11625 11626 read_vec_element(s, tcg_tmp, rn, i, grp_size); 11627 switch (grp_size) { 11628 case MO_16: 11629 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11630 break; 11631 case MO_32: 11632 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11633 break; 11634 case MO_64: 11635 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 11636 break; 11637 default: 11638 g_assert_not_reached(); 11639 } 11640 write_vec_element(s, tcg_tmp, rd, i, grp_size); 11641 } 11642 clear_vec_high(s, is_q, rd); 11643 } else { 11644 int revmask = (1 << grp_size) - 1; 11645 int esize = 8 << size; 11646 int elements = dsize / esize; 11647 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 11648 TCGv_i64 tcg_rd[2]; 11649 11650 for (i = 0; i < 2; i++) { 11651 tcg_rd[i] = tcg_temp_new_i64(); 11652 tcg_gen_movi_i64(tcg_rd[i], 0); 11653 } 11654 11655 for (i = 0; i < elements; i++) { 11656 int e_rev = (i & 0xf) ^ revmask; 11657 int w = (e_rev * esize) / 64; 11658 int o = (e_rev * esize) % 64; 11659 11660 read_vec_element(s, tcg_rn, rn, i, size); 11661 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 11662 } 11663 11664 for (i = 0; i < 2; i++) { 11665 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 11666 } 11667 clear_vec_high(s, true, rd); 11668 } 11669 } 11670 11671 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 11672 bool is_q, int size, int rn, int rd) 11673 { 11674 /* Implement the pairwise operations from 2-misc: 11675 * SADDLP, UADDLP, SADALP, UADALP. 11676 * These all add pairs of elements in the input to produce a 11677 * double-width result element in the output (possibly accumulating). 11678 */ 11679 bool accum = (opcode == 0x6); 11680 int maxpass = is_q ? 2 : 1; 11681 int pass; 11682 TCGv_i64 tcg_res[2]; 11683 11684 if (size == 2) { 11685 /* 32 + 32 -> 64 op */ 11686 MemOp memop = size + (u ? 0 : MO_SIGN); 11687 11688 for (pass = 0; pass < maxpass; pass++) { 11689 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11690 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11691 11692 tcg_res[pass] = tcg_temp_new_i64(); 11693 11694 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 11695 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 11696 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11697 if (accum) { 11698 read_vec_element(s, tcg_op1, rd, pass, MO_64); 11699 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 11700 } 11701 } 11702 } else { 11703 for (pass = 0; pass < maxpass; pass++) { 11704 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11705 NeonGenOne64OpFn *genfn; 11706 static NeonGenOne64OpFn * const fns[2][2] = { 11707 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 11708 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 11709 }; 11710 11711 genfn = fns[size][u]; 11712 11713 tcg_res[pass] = tcg_temp_new_i64(); 11714 11715 read_vec_element(s, tcg_op, rn, pass, MO_64); 11716 genfn(tcg_res[pass], tcg_op); 11717 11718 if (accum) { 11719 read_vec_element(s, tcg_op, rd, pass, MO_64); 11720 if (size == 0) { 11721 gen_helper_neon_addl_u16(tcg_res[pass], 11722 tcg_res[pass], tcg_op); 11723 } else { 11724 gen_helper_neon_addl_u32(tcg_res[pass], 11725 tcg_res[pass], tcg_op); 11726 } 11727 } 11728 } 11729 } 11730 if (!is_q) { 11731 tcg_res[1] = tcg_constant_i64(0); 11732 } 11733 for (pass = 0; pass < 2; pass++) { 11734 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11735 } 11736 } 11737 11738 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 11739 { 11740 /* Implement SHLL and SHLL2 */ 11741 int pass; 11742 int part = is_q ? 2 : 0; 11743 TCGv_i64 tcg_res[2]; 11744 11745 for (pass = 0; pass < 2; pass++) { 11746 static NeonGenWidenFn * const widenfns[3] = { 11747 gen_helper_neon_widen_u8, 11748 gen_helper_neon_widen_u16, 11749 tcg_gen_extu_i32_i64, 11750 }; 11751 NeonGenWidenFn *widenfn = widenfns[size]; 11752 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11753 11754 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 11755 tcg_res[pass] = tcg_temp_new_i64(); 11756 widenfn(tcg_res[pass], tcg_op); 11757 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 11758 } 11759 11760 for (pass = 0; pass < 2; pass++) { 11761 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11762 } 11763 } 11764 11765 /* AdvSIMD two reg misc 11766 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 11767 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11768 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 11769 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 11770 */ 11771 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 11772 { 11773 int size = extract32(insn, 22, 2); 11774 int opcode = extract32(insn, 12, 5); 11775 bool u = extract32(insn, 29, 1); 11776 bool is_q = extract32(insn, 30, 1); 11777 int rn = extract32(insn, 5, 5); 11778 int rd = extract32(insn, 0, 5); 11779 bool need_fpstatus = false; 11780 int rmode = -1; 11781 TCGv_i32 tcg_rmode; 11782 TCGv_ptr tcg_fpstatus; 11783 11784 switch (opcode) { 11785 case 0x0: /* REV64, REV32 */ 11786 case 0x1: /* REV16 */ 11787 handle_rev(s, opcode, u, is_q, size, rn, rd); 11788 return; 11789 case 0x5: /* CNT, NOT, RBIT */ 11790 if (u && size == 0) { 11791 /* NOT */ 11792 break; 11793 } else if (u && size == 1) { 11794 /* RBIT */ 11795 break; 11796 } else if (!u && size == 0) { 11797 /* CNT */ 11798 break; 11799 } 11800 unallocated_encoding(s); 11801 return; 11802 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 11803 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 11804 if (size == 3) { 11805 unallocated_encoding(s); 11806 return; 11807 } 11808 if (!fp_access_check(s)) { 11809 return; 11810 } 11811 11812 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 11813 return; 11814 case 0x4: /* CLS, CLZ */ 11815 if (size == 3) { 11816 unallocated_encoding(s); 11817 return; 11818 } 11819 break; 11820 case 0x2: /* SADDLP, UADDLP */ 11821 case 0x6: /* SADALP, UADALP */ 11822 if (size == 3) { 11823 unallocated_encoding(s); 11824 return; 11825 } 11826 if (!fp_access_check(s)) { 11827 return; 11828 } 11829 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 11830 return; 11831 case 0x13: /* SHLL, SHLL2 */ 11832 if (u == 0 || size == 3) { 11833 unallocated_encoding(s); 11834 return; 11835 } 11836 if (!fp_access_check(s)) { 11837 return; 11838 } 11839 handle_shll(s, is_q, size, rn, rd); 11840 return; 11841 case 0xa: /* CMLT */ 11842 if (u == 1) { 11843 unallocated_encoding(s); 11844 return; 11845 } 11846 /* fall through */ 11847 case 0x8: /* CMGT, CMGE */ 11848 case 0x9: /* CMEQ, CMLE */ 11849 case 0xb: /* ABS, NEG */ 11850 if (size == 3 && !is_q) { 11851 unallocated_encoding(s); 11852 return; 11853 } 11854 break; 11855 case 0x3: /* SUQADD, USQADD */ 11856 if (size == 3 && !is_q) { 11857 unallocated_encoding(s); 11858 return; 11859 } 11860 if (!fp_access_check(s)) { 11861 return; 11862 } 11863 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 11864 return; 11865 case 0x7: /* SQABS, SQNEG */ 11866 if (size == 3 && !is_q) { 11867 unallocated_encoding(s); 11868 return; 11869 } 11870 break; 11871 case 0xc ... 0xf: 11872 case 0x16 ... 0x1f: 11873 { 11874 /* Floating point: U, size[1] and opcode indicate operation; 11875 * size[0] indicates single or double precision. 11876 */ 11877 int is_double = extract32(size, 0, 1); 11878 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 11879 size = is_double ? 3 : 2; 11880 switch (opcode) { 11881 case 0x2f: /* FABS */ 11882 case 0x6f: /* FNEG */ 11883 if (size == 3 && !is_q) { 11884 unallocated_encoding(s); 11885 return; 11886 } 11887 break; 11888 case 0x1d: /* SCVTF */ 11889 case 0x5d: /* UCVTF */ 11890 { 11891 bool is_signed = (opcode == 0x1d) ? true : false; 11892 int elements = is_double ? 2 : is_q ? 4 : 2; 11893 if (is_double && !is_q) { 11894 unallocated_encoding(s); 11895 return; 11896 } 11897 if (!fp_access_check(s)) { 11898 return; 11899 } 11900 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 11901 return; 11902 } 11903 case 0x2c: /* FCMGT (zero) */ 11904 case 0x2d: /* FCMEQ (zero) */ 11905 case 0x2e: /* FCMLT (zero) */ 11906 case 0x6c: /* FCMGE (zero) */ 11907 case 0x6d: /* FCMLE (zero) */ 11908 if (size == 3 && !is_q) { 11909 unallocated_encoding(s); 11910 return; 11911 } 11912 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 11913 return; 11914 case 0x7f: /* FSQRT */ 11915 if (size == 3 && !is_q) { 11916 unallocated_encoding(s); 11917 return; 11918 } 11919 break; 11920 case 0x1a: /* FCVTNS */ 11921 case 0x1b: /* FCVTMS */ 11922 case 0x3a: /* FCVTPS */ 11923 case 0x3b: /* FCVTZS */ 11924 case 0x5a: /* FCVTNU */ 11925 case 0x5b: /* FCVTMU */ 11926 case 0x7a: /* FCVTPU */ 11927 case 0x7b: /* FCVTZU */ 11928 need_fpstatus = true; 11929 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 11930 if (size == 3 && !is_q) { 11931 unallocated_encoding(s); 11932 return; 11933 } 11934 break; 11935 case 0x5c: /* FCVTAU */ 11936 case 0x1c: /* FCVTAS */ 11937 need_fpstatus = true; 11938 rmode = FPROUNDING_TIEAWAY; 11939 if (size == 3 && !is_q) { 11940 unallocated_encoding(s); 11941 return; 11942 } 11943 break; 11944 case 0x3c: /* URECPE */ 11945 if (size == 3) { 11946 unallocated_encoding(s); 11947 return; 11948 } 11949 /* fall through */ 11950 case 0x3d: /* FRECPE */ 11951 case 0x7d: /* FRSQRTE */ 11952 if (size == 3 && !is_q) { 11953 unallocated_encoding(s); 11954 return; 11955 } 11956 if (!fp_access_check(s)) { 11957 return; 11958 } 11959 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 11960 return; 11961 case 0x56: /* FCVTXN, FCVTXN2 */ 11962 if (size == 2) { 11963 unallocated_encoding(s); 11964 return; 11965 } 11966 /* fall through */ 11967 case 0x16: /* FCVTN, FCVTN2 */ 11968 /* handle_2misc_narrow does a 2*size -> size operation, but these 11969 * instructions encode the source size rather than dest size. 11970 */ 11971 if (!fp_access_check(s)) { 11972 return; 11973 } 11974 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11975 return; 11976 case 0x36: /* BFCVTN, BFCVTN2 */ 11977 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 11978 unallocated_encoding(s); 11979 return; 11980 } 11981 if (!fp_access_check(s)) { 11982 return; 11983 } 11984 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11985 return; 11986 case 0x17: /* FCVTL, FCVTL2 */ 11987 if (!fp_access_check(s)) { 11988 return; 11989 } 11990 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 11991 return; 11992 case 0x18: /* FRINTN */ 11993 case 0x19: /* FRINTM */ 11994 case 0x38: /* FRINTP */ 11995 case 0x39: /* FRINTZ */ 11996 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 11997 /* fall through */ 11998 case 0x59: /* FRINTX */ 11999 case 0x79: /* FRINTI */ 12000 need_fpstatus = true; 12001 if (size == 3 && !is_q) { 12002 unallocated_encoding(s); 12003 return; 12004 } 12005 break; 12006 case 0x58: /* FRINTA */ 12007 rmode = FPROUNDING_TIEAWAY; 12008 need_fpstatus = true; 12009 if (size == 3 && !is_q) { 12010 unallocated_encoding(s); 12011 return; 12012 } 12013 break; 12014 case 0x7c: /* URSQRTE */ 12015 if (size == 3) { 12016 unallocated_encoding(s); 12017 return; 12018 } 12019 break; 12020 case 0x1e: /* FRINT32Z */ 12021 case 0x1f: /* FRINT64Z */ 12022 rmode = FPROUNDING_ZERO; 12023 /* fall through */ 12024 case 0x5e: /* FRINT32X */ 12025 case 0x5f: /* FRINT64X */ 12026 need_fpstatus = true; 12027 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12028 unallocated_encoding(s); 12029 return; 12030 } 12031 break; 12032 default: 12033 unallocated_encoding(s); 12034 return; 12035 } 12036 break; 12037 } 12038 default: 12039 unallocated_encoding(s); 12040 return; 12041 } 12042 12043 if (!fp_access_check(s)) { 12044 return; 12045 } 12046 12047 if (need_fpstatus || rmode >= 0) { 12048 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12049 } else { 12050 tcg_fpstatus = NULL; 12051 } 12052 if (rmode >= 0) { 12053 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12054 } else { 12055 tcg_rmode = NULL; 12056 } 12057 12058 switch (opcode) { 12059 case 0x5: 12060 if (u && size == 0) { /* NOT */ 12061 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12062 return; 12063 } 12064 break; 12065 case 0x8: /* CMGT, CMGE */ 12066 if (u) { 12067 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12068 } else { 12069 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12070 } 12071 return; 12072 case 0x9: /* CMEQ, CMLE */ 12073 if (u) { 12074 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12075 } else { 12076 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12077 } 12078 return; 12079 case 0xa: /* CMLT */ 12080 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12081 return; 12082 case 0xb: 12083 if (u) { /* ABS, NEG */ 12084 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12085 } else { 12086 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12087 } 12088 return; 12089 } 12090 12091 if (size == 3) { 12092 /* All 64-bit element operations can be shared with scalar 2misc */ 12093 int pass; 12094 12095 /* Coverity claims (size == 3 && !is_q) has been eliminated 12096 * from all paths leading to here. 12097 */ 12098 tcg_debug_assert(is_q); 12099 for (pass = 0; pass < 2; pass++) { 12100 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12101 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12102 12103 read_vec_element(s, tcg_op, rn, pass, MO_64); 12104 12105 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12106 tcg_rmode, tcg_fpstatus); 12107 12108 write_vec_element(s, tcg_res, rd, pass, MO_64); 12109 } 12110 } else { 12111 int pass; 12112 12113 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12114 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12115 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12116 12117 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12118 12119 if (size == 2) { 12120 /* Special cases for 32 bit elements */ 12121 switch (opcode) { 12122 case 0x4: /* CLS */ 12123 if (u) { 12124 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12125 } else { 12126 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12127 } 12128 break; 12129 case 0x7: /* SQABS, SQNEG */ 12130 if (u) { 12131 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); 12132 } else { 12133 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); 12134 } 12135 break; 12136 case 0x2f: /* FABS */ 12137 gen_helper_vfp_abss(tcg_res, tcg_op); 12138 break; 12139 case 0x6f: /* FNEG */ 12140 gen_helper_vfp_negs(tcg_res, tcg_op); 12141 break; 12142 case 0x7f: /* FSQRT */ 12143 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 12144 break; 12145 case 0x1a: /* FCVTNS */ 12146 case 0x1b: /* FCVTMS */ 12147 case 0x1c: /* FCVTAS */ 12148 case 0x3a: /* FCVTPS */ 12149 case 0x3b: /* FCVTZS */ 12150 gen_helper_vfp_tosls(tcg_res, tcg_op, 12151 tcg_constant_i32(0), tcg_fpstatus); 12152 break; 12153 case 0x5a: /* FCVTNU */ 12154 case 0x5b: /* FCVTMU */ 12155 case 0x5c: /* FCVTAU */ 12156 case 0x7a: /* FCVTPU */ 12157 case 0x7b: /* FCVTZU */ 12158 gen_helper_vfp_touls(tcg_res, tcg_op, 12159 tcg_constant_i32(0), tcg_fpstatus); 12160 break; 12161 case 0x18: /* FRINTN */ 12162 case 0x19: /* FRINTM */ 12163 case 0x38: /* FRINTP */ 12164 case 0x39: /* FRINTZ */ 12165 case 0x58: /* FRINTA */ 12166 case 0x79: /* FRINTI */ 12167 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12168 break; 12169 case 0x59: /* FRINTX */ 12170 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12171 break; 12172 case 0x7c: /* URSQRTE */ 12173 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12174 break; 12175 case 0x1e: /* FRINT32Z */ 12176 case 0x5e: /* FRINT32X */ 12177 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12178 break; 12179 case 0x1f: /* FRINT64Z */ 12180 case 0x5f: /* FRINT64X */ 12181 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12182 break; 12183 default: 12184 g_assert_not_reached(); 12185 } 12186 } else { 12187 /* Use helpers for 8 and 16 bit elements */ 12188 switch (opcode) { 12189 case 0x5: /* CNT, RBIT */ 12190 /* For these two insns size is part of the opcode specifier 12191 * (handled earlier); they always operate on byte elements. 12192 */ 12193 if (u) { 12194 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12195 } else { 12196 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 12197 } 12198 break; 12199 case 0x7: /* SQABS, SQNEG */ 12200 { 12201 NeonGenOneOpEnvFn *genfn; 12202 static NeonGenOneOpEnvFn * const fns[2][2] = { 12203 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 12204 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 12205 }; 12206 genfn = fns[size][u]; 12207 genfn(tcg_res, cpu_env, tcg_op); 12208 break; 12209 } 12210 case 0x4: /* CLS, CLZ */ 12211 if (u) { 12212 if (size == 0) { 12213 gen_helper_neon_clz_u8(tcg_res, tcg_op); 12214 } else { 12215 gen_helper_neon_clz_u16(tcg_res, tcg_op); 12216 } 12217 } else { 12218 if (size == 0) { 12219 gen_helper_neon_cls_s8(tcg_res, tcg_op); 12220 } else { 12221 gen_helper_neon_cls_s16(tcg_res, tcg_op); 12222 } 12223 } 12224 break; 12225 default: 12226 g_assert_not_reached(); 12227 } 12228 } 12229 12230 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 12231 } 12232 } 12233 clear_vec_high(s, is_q, rd); 12234 12235 if (tcg_rmode) { 12236 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12237 } 12238 } 12239 12240 /* AdvSIMD [scalar] two register miscellaneous (FP16) 12241 * 12242 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 12243 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12244 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 12245 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12246 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 12247 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 12248 * 12249 * This actually covers two groups where scalar access is governed by 12250 * bit 28. A bunch of the instructions (float to integral) only exist 12251 * in the vector form and are un-allocated for the scalar decode. Also 12252 * in the scalar decode Q is always 1. 12253 */ 12254 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 12255 { 12256 int fpop, opcode, a, u; 12257 int rn, rd; 12258 bool is_q; 12259 bool is_scalar; 12260 bool only_in_vector = false; 12261 12262 int pass; 12263 TCGv_i32 tcg_rmode = NULL; 12264 TCGv_ptr tcg_fpstatus = NULL; 12265 bool need_fpst = true; 12266 int rmode = -1; 12267 12268 if (!dc_isar_feature(aa64_fp16, s)) { 12269 unallocated_encoding(s); 12270 return; 12271 } 12272 12273 rd = extract32(insn, 0, 5); 12274 rn = extract32(insn, 5, 5); 12275 12276 a = extract32(insn, 23, 1); 12277 u = extract32(insn, 29, 1); 12278 is_scalar = extract32(insn, 28, 1); 12279 is_q = extract32(insn, 30, 1); 12280 12281 opcode = extract32(insn, 12, 5); 12282 fpop = deposit32(opcode, 5, 1, a); 12283 fpop = deposit32(fpop, 6, 1, u); 12284 12285 switch (fpop) { 12286 case 0x1d: /* SCVTF */ 12287 case 0x5d: /* UCVTF */ 12288 { 12289 int elements; 12290 12291 if (is_scalar) { 12292 elements = 1; 12293 } else { 12294 elements = (is_q ? 8 : 4); 12295 } 12296 12297 if (!fp_access_check(s)) { 12298 return; 12299 } 12300 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 12301 return; 12302 } 12303 break; 12304 case 0x2c: /* FCMGT (zero) */ 12305 case 0x2d: /* FCMEQ (zero) */ 12306 case 0x2e: /* FCMLT (zero) */ 12307 case 0x6c: /* FCMGE (zero) */ 12308 case 0x6d: /* FCMLE (zero) */ 12309 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 12310 return; 12311 case 0x3d: /* FRECPE */ 12312 case 0x3f: /* FRECPX */ 12313 break; 12314 case 0x18: /* FRINTN */ 12315 only_in_vector = true; 12316 rmode = FPROUNDING_TIEEVEN; 12317 break; 12318 case 0x19: /* FRINTM */ 12319 only_in_vector = true; 12320 rmode = FPROUNDING_NEGINF; 12321 break; 12322 case 0x38: /* FRINTP */ 12323 only_in_vector = true; 12324 rmode = FPROUNDING_POSINF; 12325 break; 12326 case 0x39: /* FRINTZ */ 12327 only_in_vector = true; 12328 rmode = FPROUNDING_ZERO; 12329 break; 12330 case 0x58: /* FRINTA */ 12331 only_in_vector = true; 12332 rmode = FPROUNDING_TIEAWAY; 12333 break; 12334 case 0x59: /* FRINTX */ 12335 case 0x79: /* FRINTI */ 12336 only_in_vector = true; 12337 /* current rounding mode */ 12338 break; 12339 case 0x1a: /* FCVTNS */ 12340 rmode = FPROUNDING_TIEEVEN; 12341 break; 12342 case 0x1b: /* FCVTMS */ 12343 rmode = FPROUNDING_NEGINF; 12344 break; 12345 case 0x1c: /* FCVTAS */ 12346 rmode = FPROUNDING_TIEAWAY; 12347 break; 12348 case 0x3a: /* FCVTPS */ 12349 rmode = FPROUNDING_POSINF; 12350 break; 12351 case 0x3b: /* FCVTZS */ 12352 rmode = FPROUNDING_ZERO; 12353 break; 12354 case 0x5a: /* FCVTNU */ 12355 rmode = FPROUNDING_TIEEVEN; 12356 break; 12357 case 0x5b: /* FCVTMU */ 12358 rmode = FPROUNDING_NEGINF; 12359 break; 12360 case 0x5c: /* FCVTAU */ 12361 rmode = FPROUNDING_TIEAWAY; 12362 break; 12363 case 0x7a: /* FCVTPU */ 12364 rmode = FPROUNDING_POSINF; 12365 break; 12366 case 0x7b: /* FCVTZU */ 12367 rmode = FPROUNDING_ZERO; 12368 break; 12369 case 0x2f: /* FABS */ 12370 case 0x6f: /* FNEG */ 12371 need_fpst = false; 12372 break; 12373 case 0x7d: /* FRSQRTE */ 12374 case 0x7f: /* FSQRT (vector) */ 12375 break; 12376 default: 12377 unallocated_encoding(s); 12378 return; 12379 } 12380 12381 12382 /* Check additional constraints for the scalar encoding */ 12383 if (is_scalar) { 12384 if (!is_q) { 12385 unallocated_encoding(s); 12386 return; 12387 } 12388 /* FRINTxx is only in the vector form */ 12389 if (only_in_vector) { 12390 unallocated_encoding(s); 12391 return; 12392 } 12393 } 12394 12395 if (!fp_access_check(s)) { 12396 return; 12397 } 12398 12399 if (rmode >= 0 || need_fpst) { 12400 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 12401 } 12402 12403 if (rmode >= 0) { 12404 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12405 } 12406 12407 if (is_scalar) { 12408 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 12409 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12410 12411 switch (fpop) { 12412 case 0x1a: /* FCVTNS */ 12413 case 0x1b: /* FCVTMS */ 12414 case 0x1c: /* FCVTAS */ 12415 case 0x3a: /* FCVTPS */ 12416 case 0x3b: /* FCVTZS */ 12417 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12418 break; 12419 case 0x3d: /* FRECPE */ 12420 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12421 break; 12422 case 0x3f: /* FRECPX */ 12423 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 12424 break; 12425 case 0x5a: /* FCVTNU */ 12426 case 0x5b: /* FCVTMU */ 12427 case 0x5c: /* FCVTAU */ 12428 case 0x7a: /* FCVTPU */ 12429 case 0x7b: /* FCVTZU */ 12430 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12431 break; 12432 case 0x6f: /* FNEG */ 12433 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12434 break; 12435 case 0x7d: /* FRSQRTE */ 12436 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12437 break; 12438 default: 12439 g_assert_not_reached(); 12440 } 12441 12442 /* limit any sign extension going on */ 12443 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 12444 write_fp_sreg(s, rd, tcg_res); 12445 } else { 12446 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 12447 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12448 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12449 12450 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 12451 12452 switch (fpop) { 12453 case 0x1a: /* FCVTNS */ 12454 case 0x1b: /* FCVTMS */ 12455 case 0x1c: /* FCVTAS */ 12456 case 0x3a: /* FCVTPS */ 12457 case 0x3b: /* FCVTZS */ 12458 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12459 break; 12460 case 0x3d: /* FRECPE */ 12461 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12462 break; 12463 case 0x5a: /* FCVTNU */ 12464 case 0x5b: /* FCVTMU */ 12465 case 0x5c: /* FCVTAU */ 12466 case 0x7a: /* FCVTPU */ 12467 case 0x7b: /* FCVTZU */ 12468 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12469 break; 12470 case 0x18: /* FRINTN */ 12471 case 0x19: /* FRINTM */ 12472 case 0x38: /* FRINTP */ 12473 case 0x39: /* FRINTZ */ 12474 case 0x58: /* FRINTA */ 12475 case 0x79: /* FRINTI */ 12476 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 12477 break; 12478 case 0x59: /* FRINTX */ 12479 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 12480 break; 12481 case 0x2f: /* FABS */ 12482 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 12483 break; 12484 case 0x6f: /* FNEG */ 12485 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12486 break; 12487 case 0x7d: /* FRSQRTE */ 12488 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12489 break; 12490 case 0x7f: /* FSQRT */ 12491 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 12492 break; 12493 default: 12494 g_assert_not_reached(); 12495 } 12496 12497 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12498 } 12499 12500 clear_vec_high(s, is_q, rd); 12501 } 12502 12503 if (tcg_rmode) { 12504 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12505 } 12506 } 12507 12508 /* AdvSIMD scalar x indexed element 12509 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12510 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12511 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12512 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12513 * AdvSIMD vector x indexed element 12514 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12515 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12516 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12517 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12518 */ 12519 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 12520 { 12521 /* This encoding has two kinds of instruction: 12522 * normal, where we perform elt x idxelt => elt for each 12523 * element in the vector 12524 * long, where we perform elt x idxelt and generate a result of 12525 * double the width of the input element 12526 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 12527 */ 12528 bool is_scalar = extract32(insn, 28, 1); 12529 bool is_q = extract32(insn, 30, 1); 12530 bool u = extract32(insn, 29, 1); 12531 int size = extract32(insn, 22, 2); 12532 int l = extract32(insn, 21, 1); 12533 int m = extract32(insn, 20, 1); 12534 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 12535 int rm = extract32(insn, 16, 4); 12536 int opcode = extract32(insn, 12, 4); 12537 int h = extract32(insn, 11, 1); 12538 int rn = extract32(insn, 5, 5); 12539 int rd = extract32(insn, 0, 5); 12540 bool is_long = false; 12541 int is_fp = 0; 12542 bool is_fp16 = false; 12543 int index; 12544 TCGv_ptr fpst; 12545 12546 switch (16 * u + opcode) { 12547 case 0x08: /* MUL */ 12548 case 0x10: /* MLA */ 12549 case 0x14: /* MLS */ 12550 if (is_scalar) { 12551 unallocated_encoding(s); 12552 return; 12553 } 12554 break; 12555 case 0x02: /* SMLAL, SMLAL2 */ 12556 case 0x12: /* UMLAL, UMLAL2 */ 12557 case 0x06: /* SMLSL, SMLSL2 */ 12558 case 0x16: /* UMLSL, UMLSL2 */ 12559 case 0x0a: /* SMULL, SMULL2 */ 12560 case 0x1a: /* UMULL, UMULL2 */ 12561 if (is_scalar) { 12562 unallocated_encoding(s); 12563 return; 12564 } 12565 is_long = true; 12566 break; 12567 case 0x03: /* SQDMLAL, SQDMLAL2 */ 12568 case 0x07: /* SQDMLSL, SQDMLSL2 */ 12569 case 0x0b: /* SQDMULL, SQDMULL2 */ 12570 is_long = true; 12571 break; 12572 case 0x0c: /* SQDMULH */ 12573 case 0x0d: /* SQRDMULH */ 12574 break; 12575 case 0x01: /* FMLA */ 12576 case 0x05: /* FMLS */ 12577 case 0x09: /* FMUL */ 12578 case 0x19: /* FMULX */ 12579 is_fp = 1; 12580 break; 12581 case 0x1d: /* SQRDMLAH */ 12582 case 0x1f: /* SQRDMLSH */ 12583 if (!dc_isar_feature(aa64_rdm, s)) { 12584 unallocated_encoding(s); 12585 return; 12586 } 12587 break; 12588 case 0x0e: /* SDOT */ 12589 case 0x1e: /* UDOT */ 12590 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 12591 unallocated_encoding(s); 12592 return; 12593 } 12594 break; 12595 case 0x0f: 12596 switch (size) { 12597 case 0: /* SUDOT */ 12598 case 2: /* USDOT */ 12599 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 12600 unallocated_encoding(s); 12601 return; 12602 } 12603 size = MO_32; 12604 break; 12605 case 1: /* BFDOT */ 12606 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12607 unallocated_encoding(s); 12608 return; 12609 } 12610 size = MO_32; 12611 break; 12612 case 3: /* BFMLAL{B,T} */ 12613 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12614 unallocated_encoding(s); 12615 return; 12616 } 12617 /* can't set is_fp without other incorrect size checks */ 12618 size = MO_16; 12619 break; 12620 default: 12621 unallocated_encoding(s); 12622 return; 12623 } 12624 break; 12625 case 0x11: /* FCMLA #0 */ 12626 case 0x13: /* FCMLA #90 */ 12627 case 0x15: /* FCMLA #180 */ 12628 case 0x17: /* FCMLA #270 */ 12629 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 12630 unallocated_encoding(s); 12631 return; 12632 } 12633 is_fp = 2; 12634 break; 12635 case 0x00: /* FMLAL */ 12636 case 0x04: /* FMLSL */ 12637 case 0x18: /* FMLAL2 */ 12638 case 0x1c: /* FMLSL2 */ 12639 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 12640 unallocated_encoding(s); 12641 return; 12642 } 12643 size = MO_16; 12644 /* is_fp, but we pass cpu_env not fp_status. */ 12645 break; 12646 default: 12647 unallocated_encoding(s); 12648 return; 12649 } 12650 12651 switch (is_fp) { 12652 case 1: /* normal fp */ 12653 /* convert insn encoded size to MemOp size */ 12654 switch (size) { 12655 case 0: /* half-precision */ 12656 size = MO_16; 12657 is_fp16 = true; 12658 break; 12659 case MO_32: /* single precision */ 12660 case MO_64: /* double precision */ 12661 break; 12662 default: 12663 unallocated_encoding(s); 12664 return; 12665 } 12666 break; 12667 12668 case 2: /* complex fp */ 12669 /* Each indexable element is a complex pair. */ 12670 size += 1; 12671 switch (size) { 12672 case MO_32: 12673 if (h && !is_q) { 12674 unallocated_encoding(s); 12675 return; 12676 } 12677 is_fp16 = true; 12678 break; 12679 case MO_64: 12680 break; 12681 default: 12682 unallocated_encoding(s); 12683 return; 12684 } 12685 break; 12686 12687 default: /* integer */ 12688 switch (size) { 12689 case MO_8: 12690 case MO_64: 12691 unallocated_encoding(s); 12692 return; 12693 } 12694 break; 12695 } 12696 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 12697 unallocated_encoding(s); 12698 return; 12699 } 12700 12701 /* Given MemOp size, adjust register and indexing. */ 12702 switch (size) { 12703 case MO_16: 12704 index = h << 2 | l << 1 | m; 12705 break; 12706 case MO_32: 12707 index = h << 1 | l; 12708 rm |= m << 4; 12709 break; 12710 case MO_64: 12711 if (l || !is_q) { 12712 unallocated_encoding(s); 12713 return; 12714 } 12715 index = h; 12716 rm |= m << 4; 12717 break; 12718 default: 12719 g_assert_not_reached(); 12720 } 12721 12722 if (!fp_access_check(s)) { 12723 return; 12724 } 12725 12726 if (is_fp) { 12727 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 12728 } else { 12729 fpst = NULL; 12730 } 12731 12732 switch (16 * u + opcode) { 12733 case 0x0e: /* SDOT */ 12734 case 0x1e: /* UDOT */ 12735 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12736 u ? gen_helper_gvec_udot_idx_b 12737 : gen_helper_gvec_sdot_idx_b); 12738 return; 12739 case 0x0f: 12740 switch (extract32(insn, 22, 2)) { 12741 case 0: /* SUDOT */ 12742 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12743 gen_helper_gvec_sudot_idx_b); 12744 return; 12745 case 1: /* BFDOT */ 12746 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12747 gen_helper_gvec_bfdot_idx); 12748 return; 12749 case 2: /* USDOT */ 12750 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 12751 gen_helper_gvec_usdot_idx_b); 12752 return; 12753 case 3: /* BFMLAL{B,T} */ 12754 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 12755 gen_helper_gvec_bfmlal_idx); 12756 return; 12757 } 12758 g_assert_not_reached(); 12759 case 0x11: /* FCMLA #0 */ 12760 case 0x13: /* FCMLA #90 */ 12761 case 0x15: /* FCMLA #180 */ 12762 case 0x17: /* FCMLA #270 */ 12763 { 12764 int rot = extract32(insn, 13, 2); 12765 int data = (index << 2) | rot; 12766 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 12767 vec_full_reg_offset(s, rn), 12768 vec_full_reg_offset(s, rm), 12769 vec_full_reg_offset(s, rd), fpst, 12770 is_q ? 16 : 8, vec_full_reg_size(s), data, 12771 size == MO_64 12772 ? gen_helper_gvec_fcmlas_idx 12773 : gen_helper_gvec_fcmlah_idx); 12774 } 12775 return; 12776 12777 case 0x00: /* FMLAL */ 12778 case 0x04: /* FMLSL */ 12779 case 0x18: /* FMLAL2 */ 12780 case 0x1c: /* FMLSL2 */ 12781 { 12782 int is_s = extract32(opcode, 2, 1); 12783 int is_2 = u; 12784 int data = (index << 2) | (is_2 << 1) | is_s; 12785 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 12786 vec_full_reg_offset(s, rn), 12787 vec_full_reg_offset(s, rm), cpu_env, 12788 is_q ? 16 : 8, vec_full_reg_size(s), 12789 data, gen_helper_gvec_fmlal_idx_a64); 12790 } 12791 return; 12792 12793 case 0x08: /* MUL */ 12794 if (!is_long && !is_scalar) { 12795 static gen_helper_gvec_3 * const fns[3] = { 12796 gen_helper_gvec_mul_idx_h, 12797 gen_helper_gvec_mul_idx_s, 12798 gen_helper_gvec_mul_idx_d, 12799 }; 12800 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 12801 vec_full_reg_offset(s, rn), 12802 vec_full_reg_offset(s, rm), 12803 is_q ? 16 : 8, vec_full_reg_size(s), 12804 index, fns[size - 1]); 12805 return; 12806 } 12807 break; 12808 12809 case 0x10: /* MLA */ 12810 if (!is_long && !is_scalar) { 12811 static gen_helper_gvec_4 * const fns[3] = { 12812 gen_helper_gvec_mla_idx_h, 12813 gen_helper_gvec_mla_idx_s, 12814 gen_helper_gvec_mla_idx_d, 12815 }; 12816 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 12817 vec_full_reg_offset(s, rn), 12818 vec_full_reg_offset(s, rm), 12819 vec_full_reg_offset(s, rd), 12820 is_q ? 16 : 8, vec_full_reg_size(s), 12821 index, fns[size - 1]); 12822 return; 12823 } 12824 break; 12825 12826 case 0x14: /* MLS */ 12827 if (!is_long && !is_scalar) { 12828 static gen_helper_gvec_4 * const fns[3] = { 12829 gen_helper_gvec_mls_idx_h, 12830 gen_helper_gvec_mls_idx_s, 12831 gen_helper_gvec_mls_idx_d, 12832 }; 12833 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 12834 vec_full_reg_offset(s, rn), 12835 vec_full_reg_offset(s, rm), 12836 vec_full_reg_offset(s, rd), 12837 is_q ? 16 : 8, vec_full_reg_size(s), 12838 index, fns[size - 1]); 12839 return; 12840 } 12841 break; 12842 } 12843 12844 if (size == 3) { 12845 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 12846 int pass; 12847 12848 assert(is_fp && is_q && !is_long); 12849 12850 read_vec_element(s, tcg_idx, rm, index, MO_64); 12851 12852 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 12853 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12854 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12855 12856 read_vec_element(s, tcg_op, rn, pass, MO_64); 12857 12858 switch (16 * u + opcode) { 12859 case 0x05: /* FMLS */ 12860 /* As usual for ARM, separate negation for fused multiply-add */ 12861 gen_helper_vfp_negd(tcg_op, tcg_op); 12862 /* fall through */ 12863 case 0x01: /* FMLA */ 12864 read_vec_element(s, tcg_res, rd, pass, MO_64); 12865 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 12866 break; 12867 case 0x09: /* FMUL */ 12868 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 12869 break; 12870 case 0x19: /* FMULX */ 12871 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 12872 break; 12873 default: 12874 g_assert_not_reached(); 12875 } 12876 12877 write_vec_element(s, tcg_res, rd, pass, MO_64); 12878 } 12879 12880 clear_vec_high(s, !is_scalar, rd); 12881 } else if (!is_long) { 12882 /* 32 bit floating point, or 16 or 32 bit integer. 12883 * For the 16 bit scalar case we use the usual Neon helpers and 12884 * rely on the fact that 0 op 0 == 0 with no side effects. 12885 */ 12886 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 12887 int pass, maxpasses; 12888 12889 if (is_scalar) { 12890 maxpasses = 1; 12891 } else { 12892 maxpasses = is_q ? 4 : 2; 12893 } 12894 12895 read_vec_element_i32(s, tcg_idx, rm, index, size); 12896 12897 if (size == 1 && !is_scalar) { 12898 /* The simplest way to handle the 16x16 indexed ops is to duplicate 12899 * the index into both halves of the 32 bit tcg_idx and then use 12900 * the usual Neon helpers. 12901 */ 12902 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 12903 } 12904 12905 for (pass = 0; pass < maxpasses; pass++) { 12906 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12907 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12908 12909 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 12910 12911 switch (16 * u + opcode) { 12912 case 0x08: /* MUL */ 12913 case 0x10: /* MLA */ 12914 case 0x14: /* MLS */ 12915 { 12916 static NeonGenTwoOpFn * const fns[2][2] = { 12917 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 12918 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 12919 }; 12920 NeonGenTwoOpFn *genfn; 12921 bool is_sub = opcode == 0x4; 12922 12923 if (size == 1) { 12924 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 12925 } else { 12926 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 12927 } 12928 if (opcode == 0x8) { 12929 break; 12930 } 12931 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 12932 genfn = fns[size - 1][is_sub]; 12933 genfn(tcg_res, tcg_op, tcg_res); 12934 break; 12935 } 12936 case 0x05: /* FMLS */ 12937 case 0x01: /* FMLA */ 12938 read_vec_element_i32(s, tcg_res, rd, pass, 12939 is_scalar ? size : MO_32); 12940 switch (size) { 12941 case 1: 12942 if (opcode == 0x5) { 12943 /* As usual for ARM, separate negation for fused 12944 * multiply-add */ 12945 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 12946 } 12947 if (is_scalar) { 12948 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 12949 tcg_res, fpst); 12950 } else { 12951 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 12952 tcg_res, fpst); 12953 } 12954 break; 12955 case 2: 12956 if (opcode == 0x5) { 12957 /* As usual for ARM, separate negation for 12958 * fused multiply-add */ 12959 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 12960 } 12961 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 12962 tcg_res, fpst); 12963 break; 12964 default: 12965 g_assert_not_reached(); 12966 } 12967 break; 12968 case 0x09: /* FMUL */ 12969 switch (size) { 12970 case 1: 12971 if (is_scalar) { 12972 gen_helper_advsimd_mulh(tcg_res, tcg_op, 12973 tcg_idx, fpst); 12974 } else { 12975 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 12976 tcg_idx, fpst); 12977 } 12978 break; 12979 case 2: 12980 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 12981 break; 12982 default: 12983 g_assert_not_reached(); 12984 } 12985 break; 12986 case 0x19: /* FMULX */ 12987 switch (size) { 12988 case 1: 12989 if (is_scalar) { 12990 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 12991 tcg_idx, fpst); 12992 } else { 12993 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 12994 tcg_idx, fpst); 12995 } 12996 break; 12997 case 2: 12998 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 12999 break; 13000 default: 13001 g_assert_not_reached(); 13002 } 13003 break; 13004 case 0x0c: /* SQDMULH */ 13005 if (size == 1) { 13006 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, 13007 tcg_op, tcg_idx); 13008 } else { 13009 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, 13010 tcg_op, tcg_idx); 13011 } 13012 break; 13013 case 0x0d: /* SQRDMULH */ 13014 if (size == 1) { 13015 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, 13016 tcg_op, tcg_idx); 13017 } else { 13018 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, 13019 tcg_op, tcg_idx); 13020 } 13021 break; 13022 case 0x1d: /* SQRDMLAH */ 13023 read_vec_element_i32(s, tcg_res, rd, pass, 13024 is_scalar ? size : MO_32); 13025 if (size == 1) { 13026 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env, 13027 tcg_op, tcg_idx, tcg_res); 13028 } else { 13029 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env, 13030 tcg_op, tcg_idx, tcg_res); 13031 } 13032 break; 13033 case 0x1f: /* SQRDMLSH */ 13034 read_vec_element_i32(s, tcg_res, rd, pass, 13035 is_scalar ? size : MO_32); 13036 if (size == 1) { 13037 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env, 13038 tcg_op, tcg_idx, tcg_res); 13039 } else { 13040 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env, 13041 tcg_op, tcg_idx, tcg_res); 13042 } 13043 break; 13044 default: 13045 g_assert_not_reached(); 13046 } 13047 13048 if (is_scalar) { 13049 write_fp_sreg(s, rd, tcg_res); 13050 } else { 13051 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13052 } 13053 } 13054 13055 clear_vec_high(s, is_q, rd); 13056 } else { 13057 /* long ops: 16x16->32 or 32x32->64 */ 13058 TCGv_i64 tcg_res[2]; 13059 int pass; 13060 bool satop = extract32(opcode, 0, 1); 13061 MemOp memop = MO_32; 13062 13063 if (satop || !u) { 13064 memop |= MO_SIGN; 13065 } 13066 13067 if (size == 2) { 13068 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13069 13070 read_vec_element(s, tcg_idx, rm, index, memop); 13071 13072 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13073 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13074 TCGv_i64 tcg_passres; 13075 int passelt; 13076 13077 if (is_scalar) { 13078 passelt = 0; 13079 } else { 13080 passelt = pass + (is_q * 2); 13081 } 13082 13083 read_vec_element(s, tcg_op, rn, passelt, memop); 13084 13085 tcg_res[pass] = tcg_temp_new_i64(); 13086 13087 if (opcode == 0xa || opcode == 0xb) { 13088 /* Non-accumulating ops */ 13089 tcg_passres = tcg_res[pass]; 13090 } else { 13091 tcg_passres = tcg_temp_new_i64(); 13092 } 13093 13094 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13095 13096 if (satop) { 13097 /* saturating, doubling */ 13098 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 13099 tcg_passres, tcg_passres); 13100 } 13101 13102 if (opcode == 0xa || opcode == 0xb) { 13103 continue; 13104 } 13105 13106 /* Accumulating op: handle accumulate step */ 13107 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13108 13109 switch (opcode) { 13110 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13111 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13112 break; 13113 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13114 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13115 break; 13116 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13117 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13118 /* fall through */ 13119 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13120 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 13121 tcg_res[pass], 13122 tcg_passres); 13123 break; 13124 default: 13125 g_assert_not_reached(); 13126 } 13127 } 13128 13129 clear_vec_high(s, !is_scalar, rd); 13130 } else { 13131 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13132 13133 assert(size == 1); 13134 read_vec_element_i32(s, tcg_idx, rm, index, size); 13135 13136 if (!is_scalar) { 13137 /* The simplest way to handle the 16x16 indexed ops is to 13138 * duplicate the index into both halves of the 32 bit tcg_idx 13139 * and then use the usual Neon helpers. 13140 */ 13141 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13142 } 13143 13144 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13145 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13146 TCGv_i64 tcg_passres; 13147 13148 if (is_scalar) { 13149 read_vec_element_i32(s, tcg_op, rn, pass, size); 13150 } else { 13151 read_vec_element_i32(s, tcg_op, rn, 13152 pass + (is_q * 2), MO_32); 13153 } 13154 13155 tcg_res[pass] = tcg_temp_new_i64(); 13156 13157 if (opcode == 0xa || opcode == 0xb) { 13158 /* Non-accumulating ops */ 13159 tcg_passres = tcg_res[pass]; 13160 } else { 13161 tcg_passres = tcg_temp_new_i64(); 13162 } 13163 13164 if (memop & MO_SIGN) { 13165 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 13166 } else { 13167 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 13168 } 13169 if (satop) { 13170 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 13171 tcg_passres, tcg_passres); 13172 } 13173 13174 if (opcode == 0xa || opcode == 0xb) { 13175 continue; 13176 } 13177 13178 /* Accumulating op: handle accumulate step */ 13179 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13180 13181 switch (opcode) { 13182 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13183 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 13184 tcg_passres); 13185 break; 13186 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13187 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 13188 tcg_passres); 13189 break; 13190 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13191 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 13192 /* fall through */ 13193 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13194 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 13195 tcg_res[pass], 13196 tcg_passres); 13197 break; 13198 default: 13199 g_assert_not_reached(); 13200 } 13201 } 13202 13203 if (is_scalar) { 13204 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 13205 } 13206 } 13207 13208 if (is_scalar) { 13209 tcg_res[1] = tcg_constant_i64(0); 13210 } 13211 13212 for (pass = 0; pass < 2; pass++) { 13213 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13214 } 13215 } 13216 } 13217 13218 /* Crypto AES 13219 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13220 * +-----------------+------+-----------+--------+-----+------+------+ 13221 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13222 * +-----------------+------+-----------+--------+-----+------+------+ 13223 */ 13224 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 13225 { 13226 int size = extract32(insn, 22, 2); 13227 int opcode = extract32(insn, 12, 5); 13228 int rn = extract32(insn, 5, 5); 13229 int rd = extract32(insn, 0, 5); 13230 gen_helper_gvec_2 *genfn2 = NULL; 13231 gen_helper_gvec_3 *genfn3 = NULL; 13232 13233 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 13234 unallocated_encoding(s); 13235 return; 13236 } 13237 13238 switch (opcode) { 13239 case 0x4: /* AESE */ 13240 genfn3 = gen_helper_crypto_aese; 13241 break; 13242 case 0x6: /* AESMC */ 13243 genfn2 = gen_helper_crypto_aesmc; 13244 break; 13245 case 0x5: /* AESD */ 13246 genfn3 = gen_helper_crypto_aesd; 13247 break; 13248 case 0x7: /* AESIMC */ 13249 genfn2 = gen_helper_crypto_aesimc; 13250 break; 13251 default: 13252 unallocated_encoding(s); 13253 return; 13254 } 13255 13256 if (!fp_access_check(s)) { 13257 return; 13258 } 13259 if (genfn2) { 13260 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2); 13261 } else { 13262 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3); 13263 } 13264 } 13265 13266 /* Crypto three-reg SHA 13267 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 13268 * +-----------------+------+---+------+---+--------+-----+------+------+ 13269 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 13270 * +-----------------+------+---+------+---+--------+-----+------+------+ 13271 */ 13272 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 13273 { 13274 int size = extract32(insn, 22, 2); 13275 int opcode = extract32(insn, 12, 3); 13276 int rm = extract32(insn, 16, 5); 13277 int rn = extract32(insn, 5, 5); 13278 int rd = extract32(insn, 0, 5); 13279 gen_helper_gvec_3 *genfn; 13280 bool feature; 13281 13282 if (size != 0) { 13283 unallocated_encoding(s); 13284 return; 13285 } 13286 13287 switch (opcode) { 13288 case 0: /* SHA1C */ 13289 genfn = gen_helper_crypto_sha1c; 13290 feature = dc_isar_feature(aa64_sha1, s); 13291 break; 13292 case 1: /* SHA1P */ 13293 genfn = gen_helper_crypto_sha1p; 13294 feature = dc_isar_feature(aa64_sha1, s); 13295 break; 13296 case 2: /* SHA1M */ 13297 genfn = gen_helper_crypto_sha1m; 13298 feature = dc_isar_feature(aa64_sha1, s); 13299 break; 13300 case 3: /* SHA1SU0 */ 13301 genfn = gen_helper_crypto_sha1su0; 13302 feature = dc_isar_feature(aa64_sha1, s); 13303 break; 13304 case 4: /* SHA256H */ 13305 genfn = gen_helper_crypto_sha256h; 13306 feature = dc_isar_feature(aa64_sha256, s); 13307 break; 13308 case 5: /* SHA256H2 */ 13309 genfn = gen_helper_crypto_sha256h2; 13310 feature = dc_isar_feature(aa64_sha256, s); 13311 break; 13312 case 6: /* SHA256SU1 */ 13313 genfn = gen_helper_crypto_sha256su1; 13314 feature = dc_isar_feature(aa64_sha256, s); 13315 break; 13316 default: 13317 unallocated_encoding(s); 13318 return; 13319 } 13320 13321 if (!feature) { 13322 unallocated_encoding(s); 13323 return; 13324 } 13325 13326 if (!fp_access_check(s)) { 13327 return; 13328 } 13329 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 13330 } 13331 13332 /* Crypto two-reg SHA 13333 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13334 * +-----------------+------+-----------+--------+-----+------+------+ 13335 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13336 * +-----------------+------+-----------+--------+-----+------+------+ 13337 */ 13338 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 13339 { 13340 int size = extract32(insn, 22, 2); 13341 int opcode = extract32(insn, 12, 5); 13342 int rn = extract32(insn, 5, 5); 13343 int rd = extract32(insn, 0, 5); 13344 gen_helper_gvec_2 *genfn; 13345 bool feature; 13346 13347 if (size != 0) { 13348 unallocated_encoding(s); 13349 return; 13350 } 13351 13352 switch (opcode) { 13353 case 0: /* SHA1H */ 13354 feature = dc_isar_feature(aa64_sha1, s); 13355 genfn = gen_helper_crypto_sha1h; 13356 break; 13357 case 1: /* SHA1SU1 */ 13358 feature = dc_isar_feature(aa64_sha1, s); 13359 genfn = gen_helper_crypto_sha1su1; 13360 break; 13361 case 2: /* SHA256SU0 */ 13362 feature = dc_isar_feature(aa64_sha256, s); 13363 genfn = gen_helper_crypto_sha256su0; 13364 break; 13365 default: 13366 unallocated_encoding(s); 13367 return; 13368 } 13369 13370 if (!feature) { 13371 unallocated_encoding(s); 13372 return; 13373 } 13374 13375 if (!fp_access_check(s)) { 13376 return; 13377 } 13378 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 13379 } 13380 13381 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 13382 { 13383 tcg_gen_rotli_i64(d, m, 1); 13384 tcg_gen_xor_i64(d, d, n); 13385 } 13386 13387 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 13388 { 13389 tcg_gen_rotli_vec(vece, d, m, 1); 13390 tcg_gen_xor_vec(vece, d, d, n); 13391 } 13392 13393 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 13394 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 13395 { 13396 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 13397 static const GVecGen3 op = { 13398 .fni8 = gen_rax1_i64, 13399 .fniv = gen_rax1_vec, 13400 .opt_opc = vecop_list, 13401 .fno = gen_helper_crypto_rax1, 13402 .vece = MO_64, 13403 }; 13404 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 13405 } 13406 13407 /* Crypto three-reg SHA512 13408 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13409 * +-----------------------+------+---+---+-----+--------+------+------+ 13410 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 13411 * +-----------------------+------+---+---+-----+--------+------+------+ 13412 */ 13413 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 13414 { 13415 int opcode = extract32(insn, 10, 2); 13416 int o = extract32(insn, 14, 1); 13417 int rm = extract32(insn, 16, 5); 13418 int rn = extract32(insn, 5, 5); 13419 int rd = extract32(insn, 0, 5); 13420 bool feature; 13421 gen_helper_gvec_3 *oolfn = NULL; 13422 GVecGen3Fn *gvecfn = NULL; 13423 13424 if (o == 0) { 13425 switch (opcode) { 13426 case 0: /* SHA512H */ 13427 feature = dc_isar_feature(aa64_sha512, s); 13428 oolfn = gen_helper_crypto_sha512h; 13429 break; 13430 case 1: /* SHA512H2 */ 13431 feature = dc_isar_feature(aa64_sha512, s); 13432 oolfn = gen_helper_crypto_sha512h2; 13433 break; 13434 case 2: /* SHA512SU1 */ 13435 feature = dc_isar_feature(aa64_sha512, s); 13436 oolfn = gen_helper_crypto_sha512su1; 13437 break; 13438 case 3: /* RAX1 */ 13439 feature = dc_isar_feature(aa64_sha3, s); 13440 gvecfn = gen_gvec_rax1; 13441 break; 13442 default: 13443 g_assert_not_reached(); 13444 } 13445 } else { 13446 switch (opcode) { 13447 case 0: /* SM3PARTW1 */ 13448 feature = dc_isar_feature(aa64_sm3, s); 13449 oolfn = gen_helper_crypto_sm3partw1; 13450 break; 13451 case 1: /* SM3PARTW2 */ 13452 feature = dc_isar_feature(aa64_sm3, s); 13453 oolfn = gen_helper_crypto_sm3partw2; 13454 break; 13455 case 2: /* SM4EKEY */ 13456 feature = dc_isar_feature(aa64_sm4, s); 13457 oolfn = gen_helper_crypto_sm4ekey; 13458 break; 13459 default: 13460 unallocated_encoding(s); 13461 return; 13462 } 13463 } 13464 13465 if (!feature) { 13466 unallocated_encoding(s); 13467 return; 13468 } 13469 13470 if (!fp_access_check(s)) { 13471 return; 13472 } 13473 13474 if (oolfn) { 13475 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 13476 } else { 13477 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 13478 } 13479 } 13480 13481 /* Crypto two-reg SHA512 13482 * 31 12 11 10 9 5 4 0 13483 * +-----------------------------------------+--------+------+------+ 13484 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 13485 * +-----------------------------------------+--------+------+------+ 13486 */ 13487 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 13488 { 13489 int opcode = extract32(insn, 10, 2); 13490 int rn = extract32(insn, 5, 5); 13491 int rd = extract32(insn, 0, 5); 13492 bool feature; 13493 13494 switch (opcode) { 13495 case 0: /* SHA512SU0 */ 13496 feature = dc_isar_feature(aa64_sha512, s); 13497 break; 13498 case 1: /* SM4E */ 13499 feature = dc_isar_feature(aa64_sm4, s); 13500 break; 13501 default: 13502 unallocated_encoding(s); 13503 return; 13504 } 13505 13506 if (!feature) { 13507 unallocated_encoding(s); 13508 return; 13509 } 13510 13511 if (!fp_access_check(s)) { 13512 return; 13513 } 13514 13515 switch (opcode) { 13516 case 0: /* SHA512SU0 */ 13517 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 13518 break; 13519 case 1: /* SM4E */ 13520 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 13521 break; 13522 default: 13523 g_assert_not_reached(); 13524 } 13525 } 13526 13527 /* Crypto four-register 13528 * 31 23 22 21 20 16 15 14 10 9 5 4 0 13529 * +-------------------+-----+------+---+------+------+------+ 13530 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 13531 * +-------------------+-----+------+---+------+------+------+ 13532 */ 13533 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 13534 { 13535 int op0 = extract32(insn, 21, 2); 13536 int rm = extract32(insn, 16, 5); 13537 int ra = extract32(insn, 10, 5); 13538 int rn = extract32(insn, 5, 5); 13539 int rd = extract32(insn, 0, 5); 13540 bool feature; 13541 13542 switch (op0) { 13543 case 0: /* EOR3 */ 13544 case 1: /* BCAX */ 13545 feature = dc_isar_feature(aa64_sha3, s); 13546 break; 13547 case 2: /* SM3SS1 */ 13548 feature = dc_isar_feature(aa64_sm3, s); 13549 break; 13550 default: 13551 unallocated_encoding(s); 13552 return; 13553 } 13554 13555 if (!feature) { 13556 unallocated_encoding(s); 13557 return; 13558 } 13559 13560 if (!fp_access_check(s)) { 13561 return; 13562 } 13563 13564 if (op0 < 2) { 13565 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 13566 int pass; 13567 13568 tcg_op1 = tcg_temp_new_i64(); 13569 tcg_op2 = tcg_temp_new_i64(); 13570 tcg_op3 = tcg_temp_new_i64(); 13571 tcg_res[0] = tcg_temp_new_i64(); 13572 tcg_res[1] = tcg_temp_new_i64(); 13573 13574 for (pass = 0; pass < 2; pass++) { 13575 read_vec_element(s, tcg_op1, rn, pass, MO_64); 13576 read_vec_element(s, tcg_op2, rm, pass, MO_64); 13577 read_vec_element(s, tcg_op3, ra, pass, MO_64); 13578 13579 if (op0 == 0) { 13580 /* EOR3 */ 13581 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 13582 } else { 13583 /* BCAX */ 13584 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 13585 } 13586 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 13587 } 13588 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 13589 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 13590 } else { 13591 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 13592 13593 tcg_op1 = tcg_temp_new_i32(); 13594 tcg_op2 = tcg_temp_new_i32(); 13595 tcg_op3 = tcg_temp_new_i32(); 13596 tcg_res = tcg_temp_new_i32(); 13597 tcg_zero = tcg_constant_i32(0); 13598 13599 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 13600 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 13601 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 13602 13603 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 13604 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 13605 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 13606 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 13607 13608 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 13609 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 13610 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 13611 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 13612 } 13613 } 13614 13615 /* Crypto XAR 13616 * 31 21 20 16 15 10 9 5 4 0 13617 * +-----------------------+------+--------+------+------+ 13618 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 13619 * +-----------------------+------+--------+------+------+ 13620 */ 13621 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 13622 { 13623 int rm = extract32(insn, 16, 5); 13624 int imm6 = extract32(insn, 10, 6); 13625 int rn = extract32(insn, 5, 5); 13626 int rd = extract32(insn, 0, 5); 13627 13628 if (!dc_isar_feature(aa64_sha3, s)) { 13629 unallocated_encoding(s); 13630 return; 13631 } 13632 13633 if (!fp_access_check(s)) { 13634 return; 13635 } 13636 13637 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 13638 vec_full_reg_offset(s, rn), 13639 vec_full_reg_offset(s, rm), imm6, 16, 13640 vec_full_reg_size(s)); 13641 } 13642 13643 /* Crypto three-reg imm2 13644 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13645 * +-----------------------+------+-----+------+--------+------+------+ 13646 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 13647 * +-----------------------+------+-----+------+--------+------+------+ 13648 */ 13649 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 13650 { 13651 static gen_helper_gvec_3 * const fns[4] = { 13652 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 13653 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 13654 }; 13655 int opcode = extract32(insn, 10, 2); 13656 int imm2 = extract32(insn, 12, 2); 13657 int rm = extract32(insn, 16, 5); 13658 int rn = extract32(insn, 5, 5); 13659 int rd = extract32(insn, 0, 5); 13660 13661 if (!dc_isar_feature(aa64_sm3, s)) { 13662 unallocated_encoding(s); 13663 return; 13664 } 13665 13666 if (!fp_access_check(s)) { 13667 return; 13668 } 13669 13670 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 13671 } 13672 13673 /* C3.6 Data processing - SIMD, inc Crypto 13674 * 13675 * As the decode gets a little complex we are using a table based 13676 * approach for this part of the decode. 13677 */ 13678 static const AArch64DecodeTable data_proc_simd[] = { 13679 /* pattern , mask , fn */ 13680 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 13681 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 13682 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 13683 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 13684 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 13685 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 13686 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 13687 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 13688 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 13689 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 13690 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 13691 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 13692 { 0x2e000000, 0xbf208400, disas_simd_ext }, 13693 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 13694 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 13695 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 13696 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 13697 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 13698 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 13699 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 13700 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 13701 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 13702 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 13703 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 13704 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 13705 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 13706 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 13707 { 0xce800000, 0xffe00000, disas_crypto_xar }, 13708 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 13709 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 13710 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 13711 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 13712 { 0x00000000, 0x00000000, NULL } 13713 }; 13714 13715 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 13716 { 13717 /* Note that this is called with all non-FP cases from 13718 * table C3-6 so it must UNDEF for entries not specifically 13719 * allocated to instructions in that table. 13720 */ 13721 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 13722 if (fn) { 13723 fn(s, insn); 13724 } else { 13725 unallocated_encoding(s); 13726 } 13727 } 13728 13729 /* C3.6 Data processing - SIMD and floating point */ 13730 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 13731 { 13732 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 13733 disas_data_proc_fp(s, insn); 13734 } else { 13735 /* SIMD, including crypto */ 13736 disas_data_proc_simd(s, insn); 13737 } 13738 } 13739 13740 static bool trans_OK(DisasContext *s, arg_OK *a) 13741 { 13742 return true; 13743 } 13744 13745 static bool trans_FAIL(DisasContext *s, arg_OK *a) 13746 { 13747 s->is_nonstreaming = true; 13748 return true; 13749 } 13750 13751 /** 13752 * is_guarded_page: 13753 * @env: The cpu environment 13754 * @s: The DisasContext 13755 * 13756 * Return true if the page is guarded. 13757 */ 13758 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 13759 { 13760 uint64_t addr = s->base.pc_first; 13761 #ifdef CONFIG_USER_ONLY 13762 return page_get_flags(addr) & PAGE_BTI; 13763 #else 13764 CPUTLBEntryFull *full; 13765 void *host; 13766 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 13767 int flags; 13768 13769 /* 13770 * We test this immediately after reading an insn, which means 13771 * that the TLB entry must be present and valid, and thus this 13772 * access will never raise an exception. 13773 */ 13774 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 13775 false, &host, &full, 0); 13776 assert(!(flags & TLB_INVALID_MASK)); 13777 13778 return full->guarded; 13779 #endif 13780 } 13781 13782 /** 13783 * btype_destination_ok: 13784 * @insn: The instruction at the branch destination 13785 * @bt: SCTLR_ELx.BT 13786 * @btype: PSTATE.BTYPE, and is non-zero 13787 * 13788 * On a guarded page, there are a limited number of insns 13789 * that may be present at the branch target: 13790 * - branch target identifiers, 13791 * - paciasp, pacibsp, 13792 * - BRK insn 13793 * - HLT insn 13794 * Anything else causes a Branch Target Exception. 13795 * 13796 * Return true if the branch is compatible, false to raise BTITRAP. 13797 */ 13798 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 13799 { 13800 if ((insn & 0xfffff01fu) == 0xd503201fu) { 13801 /* HINT space */ 13802 switch (extract32(insn, 5, 7)) { 13803 case 0b011001: /* PACIASP */ 13804 case 0b011011: /* PACIBSP */ 13805 /* 13806 * If SCTLR_ELx.BT, then PACI*SP are not compatible 13807 * with btype == 3. Otherwise all btype are ok. 13808 */ 13809 return !bt || btype != 3; 13810 case 0b100000: /* BTI */ 13811 /* Not compatible with any btype. */ 13812 return false; 13813 case 0b100010: /* BTI c */ 13814 /* Not compatible with btype == 3 */ 13815 return btype != 3; 13816 case 0b100100: /* BTI j */ 13817 /* Not compatible with btype == 2 */ 13818 return btype != 2; 13819 case 0b100110: /* BTI jc */ 13820 /* Compatible with any btype. */ 13821 return true; 13822 } 13823 } else { 13824 switch (insn & 0xffe0001fu) { 13825 case 0xd4200000u: /* BRK */ 13826 case 0xd4400000u: /* HLT */ 13827 /* Give priority to the breakpoint exception. */ 13828 return true; 13829 } 13830 } 13831 return false; 13832 } 13833 13834 /* C3.1 A64 instruction index by encoding */ 13835 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 13836 { 13837 switch (extract32(insn, 25, 4)) { 13838 case 0x5: 13839 case 0xd: /* Data processing - register */ 13840 disas_data_proc_reg(s, insn); 13841 break; 13842 case 0x7: 13843 case 0xf: /* Data processing - SIMD and floating point */ 13844 disas_data_proc_simd_fp(s, insn); 13845 break; 13846 default: 13847 unallocated_encoding(s); 13848 break; 13849 } 13850 } 13851 13852 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 13853 CPUState *cpu) 13854 { 13855 DisasContext *dc = container_of(dcbase, DisasContext, base); 13856 CPUARMState *env = cpu->env_ptr; 13857 ARMCPU *arm_cpu = env_archcpu(env); 13858 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 13859 int bound, core_mmu_idx; 13860 13861 dc->isar = &arm_cpu->isar; 13862 dc->condjmp = 0; 13863 dc->pc_save = dc->base.pc_first; 13864 dc->aarch64 = true; 13865 dc->thumb = false; 13866 dc->sctlr_b = 0; 13867 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 13868 dc->condexec_mask = 0; 13869 dc->condexec_cond = 0; 13870 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 13871 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 13872 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 13873 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 13874 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 13875 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 13876 #if !defined(CONFIG_USER_ONLY) 13877 dc->user = (dc->current_el == 0); 13878 #endif 13879 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 13880 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 13881 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 13882 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 13883 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 13884 dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET); 13885 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 13886 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 13887 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 13888 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 13889 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 13890 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 13891 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 13892 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 13893 dc->ata = EX_TBFLAG_A64(tb_flags, ATA); 13894 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 13895 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 13896 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 13897 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 13898 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 13899 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 13900 dc->vec_len = 0; 13901 dc->vec_stride = 0; 13902 dc->cp_regs = arm_cpu->cp_regs; 13903 dc->features = env->features; 13904 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 13905 dc->gm_blocksize = arm_cpu->gm_blocksize; 13906 13907 #ifdef CONFIG_USER_ONLY 13908 /* In sve_probe_page, we assume TBI is enabled. */ 13909 tcg_debug_assert(dc->tbid & 1); 13910 #endif 13911 13912 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 13913 13914 /* Single step state. The code-generation logic here is: 13915 * SS_ACTIVE == 0: 13916 * generate code with no special handling for single-stepping (except 13917 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 13918 * this happens anyway because those changes are all system register or 13919 * PSTATE writes). 13920 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 13921 * emit code for one insn 13922 * emit code to clear PSTATE.SS 13923 * emit code to generate software step exception for completed step 13924 * end TB (as usual for having generated an exception) 13925 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 13926 * emit code to generate a software step exception 13927 * end the TB 13928 */ 13929 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 13930 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 13931 dc->is_ldex = false; 13932 13933 /* Bound the number of insns to execute to those left on the page. */ 13934 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 13935 13936 /* If architectural single step active, limit to 1. */ 13937 if (dc->ss_active) { 13938 bound = 1; 13939 } 13940 dc->base.max_insns = MIN(dc->base.max_insns, bound); 13941 } 13942 13943 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 13944 { 13945 } 13946 13947 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 13948 { 13949 DisasContext *dc = container_of(dcbase, DisasContext, base); 13950 target_ulong pc_arg = dc->base.pc_next; 13951 13952 if (tb_cflags(dcbase->tb) & CF_PCREL) { 13953 pc_arg &= ~TARGET_PAGE_MASK; 13954 } 13955 tcg_gen_insn_start(pc_arg, 0, 0); 13956 dc->insn_start = tcg_last_op(); 13957 } 13958 13959 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 13960 { 13961 DisasContext *s = container_of(dcbase, DisasContext, base); 13962 CPUARMState *env = cpu->env_ptr; 13963 uint64_t pc = s->base.pc_next; 13964 uint32_t insn; 13965 13966 /* Singlestep exceptions have the highest priority. */ 13967 if (s->ss_active && !s->pstate_ss) { 13968 /* Singlestep state is Active-pending. 13969 * If we're in this state at the start of a TB then either 13970 * a) we just took an exception to an EL which is being debugged 13971 * and this is the first insn in the exception handler 13972 * b) debug exceptions were masked and we just unmasked them 13973 * without changing EL (eg by clearing PSTATE.D) 13974 * In either case we're going to take a swstep exception in the 13975 * "did not step an insn" case, and so the syndrome ISV and EX 13976 * bits should be zero. 13977 */ 13978 assert(s->base.num_insns == 1); 13979 gen_swstep_exception(s, 0, 0); 13980 s->base.is_jmp = DISAS_NORETURN; 13981 s->base.pc_next = pc + 4; 13982 return; 13983 } 13984 13985 if (pc & 3) { 13986 /* 13987 * PC alignment fault. This has priority over the instruction abort 13988 * that we would receive from a translation fault via arm_ldl_code. 13989 * This should only be possible after an indirect branch, at the 13990 * start of the TB. 13991 */ 13992 assert(s->base.num_insns == 1); 13993 gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc)); 13994 s->base.is_jmp = DISAS_NORETURN; 13995 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 13996 return; 13997 } 13998 13999 s->pc_curr = pc; 14000 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14001 s->insn = insn; 14002 s->base.pc_next = pc + 4; 14003 14004 s->fp_access_checked = false; 14005 s->sve_access_checked = false; 14006 14007 if (s->pstate_il) { 14008 /* 14009 * Illegal execution state. This has priority over BTI 14010 * exceptions, but comes after instruction abort exceptions. 14011 */ 14012 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14013 return; 14014 } 14015 14016 if (dc_isar_feature(aa64_bti, s)) { 14017 if (s->base.num_insns == 1) { 14018 /* 14019 * At the first insn of the TB, compute s->guarded_page. 14020 * We delayed computing this until successfully reading 14021 * the first insn of the TB, above. This (mostly) ensures 14022 * that the softmmu tlb entry has been populated, and the 14023 * page table GP bit is available. 14024 * 14025 * Note that we need to compute this even if btype == 0, 14026 * because this value is used for BR instructions later 14027 * where ENV is not available. 14028 */ 14029 s->guarded_page = is_guarded_page(env, s); 14030 14031 /* First insn can have btype set to non-zero. */ 14032 tcg_debug_assert(s->btype >= 0); 14033 14034 /* 14035 * Note that the Branch Target Exception has fairly high 14036 * priority -- below debugging exceptions but above most 14037 * everything else. This allows us to handle this now 14038 * instead of waiting until the insn is otherwise decoded. 14039 */ 14040 if (s->btype != 0 14041 && s->guarded_page 14042 && !btype_destination_ok(insn, s->bt, s->btype)) { 14043 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14044 return; 14045 } 14046 } else { 14047 /* Not the first insn: btype must be 0. */ 14048 tcg_debug_assert(s->btype == 0); 14049 } 14050 } 14051 14052 s->is_nonstreaming = false; 14053 if (s->sme_trap_nonstreaming) { 14054 disas_sme_fa64(s, insn); 14055 } 14056 14057 if (!disas_a64(s, insn) && 14058 !disas_sme(s, insn) && 14059 !disas_sve(s, insn)) { 14060 disas_a64_legacy(s, insn); 14061 } 14062 14063 /* 14064 * After execution of most insns, btype is reset to 0. 14065 * Note that we set btype == -1 when the insn sets btype. 14066 */ 14067 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14068 reset_btype(s); 14069 } 14070 } 14071 14072 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14073 { 14074 DisasContext *dc = container_of(dcbase, DisasContext, base); 14075 14076 if (unlikely(dc->ss_active)) { 14077 /* Note that this means single stepping WFI doesn't halt the CPU. 14078 * For conditional branch insns this is harmless unreachable code as 14079 * gen_goto_tb() has already handled emitting the debug exception 14080 * (and thus a tb-jump is not possible when singlestepping). 14081 */ 14082 switch (dc->base.is_jmp) { 14083 default: 14084 gen_a64_update_pc(dc, 4); 14085 /* fall through */ 14086 case DISAS_EXIT: 14087 case DISAS_JUMP: 14088 gen_step_complete_exception(dc); 14089 break; 14090 case DISAS_NORETURN: 14091 break; 14092 } 14093 } else { 14094 switch (dc->base.is_jmp) { 14095 case DISAS_NEXT: 14096 case DISAS_TOO_MANY: 14097 gen_goto_tb(dc, 1, 4); 14098 break; 14099 default: 14100 case DISAS_UPDATE_EXIT: 14101 gen_a64_update_pc(dc, 4); 14102 /* fall through */ 14103 case DISAS_EXIT: 14104 tcg_gen_exit_tb(NULL, 0); 14105 break; 14106 case DISAS_UPDATE_NOCHAIN: 14107 gen_a64_update_pc(dc, 4); 14108 /* fall through */ 14109 case DISAS_JUMP: 14110 tcg_gen_lookup_and_goto_ptr(); 14111 break; 14112 case DISAS_NORETURN: 14113 case DISAS_SWI: 14114 break; 14115 case DISAS_WFE: 14116 gen_a64_update_pc(dc, 4); 14117 gen_helper_wfe(cpu_env); 14118 break; 14119 case DISAS_YIELD: 14120 gen_a64_update_pc(dc, 4); 14121 gen_helper_yield(cpu_env); 14122 break; 14123 case DISAS_WFI: 14124 /* 14125 * This is a special case because we don't want to just halt 14126 * the CPU if trying to debug across a WFI. 14127 */ 14128 gen_a64_update_pc(dc, 4); 14129 gen_helper_wfi(cpu_env, tcg_constant_i32(4)); 14130 /* 14131 * The helper doesn't necessarily throw an exception, but we 14132 * must go back to the main loop to check for interrupts anyway. 14133 */ 14134 tcg_gen_exit_tb(NULL, 0); 14135 break; 14136 } 14137 } 14138 } 14139 14140 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 14141 CPUState *cpu, FILE *logfile) 14142 { 14143 DisasContext *dc = container_of(dcbase, DisasContext, base); 14144 14145 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 14146 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 14147 } 14148 14149 const TranslatorOps aarch64_translator_ops = { 14150 .init_disas_context = aarch64_tr_init_disas_context, 14151 .tb_start = aarch64_tr_tb_start, 14152 .insn_start = aarch64_tr_insn_start, 14153 .translate_insn = aarch64_tr_translate_insn, 14154 .tb_stop = aarch64_tr_tb_stop, 14155 .disas_log = aarch64_tr_disas_log, 14156 }; 14157