1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "exec/exec-all.h" 22 #include "translate.h" 23 #include "translate-a64.h" 24 #include "qemu/log.h" 25 #include "arm_ldst.h" 26 #include "semihosting/semihost.h" 27 #include "cpregs.h" 28 29 static TCGv_i64 cpu_X[32]; 30 static TCGv_i64 cpu_pc; 31 32 /* Load/store exclusive handling */ 33 static TCGv_i64 cpu_exclusive_high; 34 35 static const char *regnames[] = { 36 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 37 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 38 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 39 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 40 }; 41 42 enum a64_shift_type { 43 A64_SHIFT_TYPE_LSL = 0, 44 A64_SHIFT_TYPE_LSR = 1, 45 A64_SHIFT_TYPE_ASR = 2, 46 A64_SHIFT_TYPE_ROR = 3 47 }; 48 49 /* 50 * Helpers for extracting complex instruction fields 51 */ 52 53 /* 54 * For load/store with an unsigned 12 bit immediate scaled by the element 55 * size. The input has the immediate field in bits [14:3] and the element 56 * size in [2:0]. 57 */ uimm_scaled(DisasContext * s,int x)58 static int uimm_scaled(DisasContext *s, int x) 59 { 60 unsigned imm = x >> 3; 61 unsigned scale = extract32(x, 0, 3); 62 return imm << scale; 63 } 64 65 /* For load/store memory tags: scale offset by LOG2_TAG_GRANULE */ scale_by_log2_tag_granule(DisasContext * s,int x)66 static int scale_by_log2_tag_granule(DisasContext *s, int x) 67 { 68 return x << LOG2_TAG_GRANULE; 69 } 70 71 /* 72 * Include the generated decoders. 73 */ 74 75 #include "decode-sme-fa64.c.inc" 76 #include "decode-a64.c.inc" 77 78 /* Table based decoder typedefs - used when the relevant bits for decode 79 * are too awkwardly scattered across the instruction (eg SIMD). 80 */ 81 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 82 83 typedef struct AArch64DecodeTable { 84 uint32_t pattern; 85 uint32_t mask; 86 AArch64DecodeFn *disas_fn; 87 } AArch64DecodeTable; 88 89 /* initialize TCG globals. */ a64_translate_init(void)90 void a64_translate_init(void) 91 { 92 int i; 93 94 cpu_pc = tcg_global_mem_new_i64(tcg_env, 95 offsetof(CPUARMState, pc), 96 "pc"); 97 for (i = 0; i < 32; i++) { 98 cpu_X[i] = tcg_global_mem_new_i64(tcg_env, 99 offsetof(CPUARMState, xregs[i]), 100 regnames[i]); 101 } 102 103 cpu_exclusive_high = tcg_global_mem_new_i64(tcg_env, 104 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 105 } 106 107 /* 108 * Return the core mmu_idx to use for A64 load/store insns which 109 * have a "unprivileged load/store" variant. Those insns access 110 * EL0 if executed from an EL which has control over EL0 (usually 111 * EL1) but behave like normal loads and stores if executed from 112 * elsewhere (eg EL3). 113 * 114 * @unpriv : true for the unprivileged encoding; false for the 115 * normal encoding (in which case we will return the same 116 * thing as get_mem_index(). 117 */ get_a64_user_mem_index(DisasContext * s,bool unpriv)118 static int get_a64_user_mem_index(DisasContext *s, bool unpriv) 119 { 120 /* 121 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 122 * which is the usual mmu_idx for this cpu state. 123 */ 124 ARMMMUIdx useridx = s->mmu_idx; 125 126 if (unpriv && s->unpriv) { 127 /* 128 * We have pre-computed the condition for AccType_UNPRIV. 129 * Therefore we should never get here with a mmu_idx for 130 * which we do not know the corresponding user mmu_idx. 131 */ 132 switch (useridx) { 133 case ARMMMUIdx_E10_1: 134 case ARMMMUIdx_E10_1_PAN: 135 useridx = ARMMMUIdx_E10_0; 136 break; 137 case ARMMMUIdx_E20_2: 138 case ARMMMUIdx_E20_2_PAN: 139 useridx = ARMMMUIdx_E20_0; 140 break; 141 default: 142 g_assert_not_reached(); 143 } 144 } 145 return arm_to_core_mmu_idx(useridx); 146 } 147 set_btype_raw(int val)148 static void set_btype_raw(int val) 149 { 150 tcg_gen_st_i32(tcg_constant_i32(val), tcg_env, 151 offsetof(CPUARMState, btype)); 152 } 153 set_btype(DisasContext * s,int val)154 static void set_btype(DisasContext *s, int val) 155 { 156 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 157 tcg_debug_assert(val >= 1 && val <= 3); 158 set_btype_raw(val); 159 s->btype = -1; 160 } 161 reset_btype(DisasContext * s)162 static void reset_btype(DisasContext *s) 163 { 164 if (s->btype != 0) { 165 set_btype_raw(0); 166 s->btype = 0; 167 } 168 } 169 gen_pc_plus_diff(DisasContext * s,TCGv_i64 dest,target_long diff)170 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 171 { 172 assert(s->pc_save != -1); 173 if (tb_cflags(s->base.tb) & CF_PCREL) { 174 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 175 } else { 176 tcg_gen_movi_i64(dest, s->pc_curr + diff); 177 } 178 } 179 gen_a64_update_pc(DisasContext * s,target_long diff)180 void gen_a64_update_pc(DisasContext *s, target_long diff) 181 { 182 gen_pc_plus_diff(s, cpu_pc, diff); 183 s->pc_save = s->pc_curr + diff; 184 } 185 186 /* 187 * Handle Top Byte Ignore (TBI) bits. 188 * 189 * If address tagging is enabled via the TCR TBI bits: 190 * + for EL2 and EL3 there is only one TBI bit, and if it is set 191 * then the address is zero-extended, clearing bits [63:56] 192 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 193 * and TBI1 controls addresses with bit 55 == 1. 194 * If the appropriate TBI bit is set for the address then 195 * the address is sign-extended from bit 55 into bits [63:56] 196 * 197 * Here We have concatenated TBI{1,0} into tbi. 198 */ gen_top_byte_ignore(DisasContext * s,TCGv_i64 dst,TCGv_i64 src,int tbi)199 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 200 TCGv_i64 src, int tbi) 201 { 202 if (tbi == 0) { 203 /* Load unmodified address */ 204 tcg_gen_mov_i64(dst, src); 205 } else if (!regime_has_2_ranges(s->mmu_idx)) { 206 /* Force tag byte to all zero */ 207 tcg_gen_extract_i64(dst, src, 0, 56); 208 } else { 209 /* Sign-extend from bit 55. */ 210 tcg_gen_sextract_i64(dst, src, 0, 56); 211 212 switch (tbi) { 213 case 1: 214 /* tbi0 but !tbi1: only use the extension if positive */ 215 tcg_gen_and_i64(dst, dst, src); 216 break; 217 case 2: 218 /* !tbi0 but tbi1: only use the extension if negative */ 219 tcg_gen_or_i64(dst, dst, src); 220 break; 221 case 3: 222 /* tbi0 and tbi1: always use the extension */ 223 break; 224 default: 225 g_assert_not_reached(); 226 } 227 } 228 } 229 gen_a64_set_pc(DisasContext * s,TCGv_i64 src)230 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 231 { 232 /* 233 * If address tagging is enabled for instructions via the TCR TBI bits, 234 * then loading an address into the PC will clear out any tag. 235 */ 236 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 237 s->pc_save = -1; 238 } 239 240 /* 241 * Handle MTE and/or TBI. 242 * 243 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 244 * for the tag to be present in the FAR_ELx register. But for user-only 245 * mode we do not have a TLB with which to implement this, so we must 246 * remove the top byte now. 247 * 248 * Always return a fresh temporary that we can increment independently 249 * of the write-back address. 250 */ 251 clean_data_tbi(DisasContext * s,TCGv_i64 addr)252 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 253 { 254 TCGv_i64 clean = tcg_temp_new_i64(); 255 #ifdef CONFIG_USER_ONLY 256 gen_top_byte_ignore(s, clean, addr, s->tbid); 257 #else 258 tcg_gen_mov_i64(clean, addr); 259 #endif 260 return clean; 261 } 262 263 /* Insert a zero tag into src, with the result at dst. */ gen_address_with_allocation_tag0(TCGv_i64 dst,TCGv_i64 src)264 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 265 { 266 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 267 } 268 gen_probe_access(DisasContext * s,TCGv_i64 ptr,MMUAccessType acc,int log2_size)269 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 270 MMUAccessType acc, int log2_size) 271 { 272 gen_helper_probe_access(tcg_env, ptr, 273 tcg_constant_i32(acc), 274 tcg_constant_i32(get_mem_index(s)), 275 tcg_constant_i32(1 << log2_size)); 276 } 277 278 /* 279 * For MTE, check a single logical or atomic access. This probes a single 280 * address, the exact one specified. The size and alignment of the access 281 * is not relevant to MTE, per se, but watchpoints do require the size, 282 * and we want to recognize those before making any other changes to state. 283 */ gen_mte_check1_mmuidx(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop,bool is_unpriv,int core_idx)284 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 285 bool is_write, bool tag_checked, 286 MemOp memop, bool is_unpriv, 287 int core_idx) 288 { 289 if (tag_checked && s->mte_active[is_unpriv]) { 290 TCGv_i64 ret; 291 int desc = 0; 292 293 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 294 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 295 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 296 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 297 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(memop)); 298 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1); 299 300 ret = tcg_temp_new_i64(); 301 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 302 303 return ret; 304 } 305 return clean_data_tbi(s, addr); 306 } 307 gen_mte_check1(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,MemOp memop)308 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 309 bool tag_checked, MemOp memop) 310 { 311 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop, 312 false, get_mem_index(s)); 313 } 314 315 /* 316 * For MTE, check multiple logical sequential accesses. 317 */ gen_mte_checkN(DisasContext * s,TCGv_i64 addr,bool is_write,bool tag_checked,int total_size,MemOp single_mop)318 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 319 bool tag_checked, int total_size, MemOp single_mop) 320 { 321 if (tag_checked && s->mte_active[0]) { 322 TCGv_i64 ret; 323 int desc = 0; 324 325 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 326 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 327 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 328 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 329 desc = FIELD_DP32(desc, MTEDESC, ALIGN, memop_alignment_bits(single_mop)); 330 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1); 331 332 ret = tcg_temp_new_i64(); 333 gen_helper_mte_check(ret, tcg_env, tcg_constant_i32(desc), addr); 334 335 return ret; 336 } 337 return clean_data_tbi(s, addr); 338 } 339 340 /* 341 * Generate the special alignment check that applies to AccType_ATOMIC 342 * and AccType_ORDERED insns under FEAT_LSE2: the access need not be 343 * naturally aligned, but it must not cross a 16-byte boundary. 344 * See AArch64.CheckAlignment(). 345 */ check_lse2_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)346 static void check_lse2_align(DisasContext *s, int rn, int imm, 347 bool is_write, MemOp mop) 348 { 349 TCGv_i32 tmp; 350 TCGv_i64 addr; 351 TCGLabel *over_label; 352 MMUAccessType type; 353 int mmu_idx; 354 355 tmp = tcg_temp_new_i32(); 356 tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn)); 357 tcg_gen_addi_i32(tmp, tmp, imm & 15); 358 tcg_gen_andi_i32(tmp, tmp, 15); 359 tcg_gen_addi_i32(tmp, tmp, memop_size(mop)); 360 361 over_label = gen_new_label(); 362 tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label); 363 364 addr = tcg_temp_new_i64(); 365 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm); 366 367 type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD, 368 mmu_idx = get_mem_index(s); 369 gen_helper_unaligned_access(tcg_env, addr, tcg_constant_i32(type), 370 tcg_constant_i32(mmu_idx)); 371 372 gen_set_label(over_label); 373 374 } 375 376 /* Handle the alignment check for AccType_ATOMIC instructions. */ check_atomic_align(DisasContext * s,int rn,MemOp mop)377 static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop) 378 { 379 MemOp size = mop & MO_SIZE; 380 381 if (size == MO_8) { 382 return mop; 383 } 384 385 /* 386 * If size == MO_128, this is a LDXP, and the operation is single-copy 387 * atomic for each doubleword, not the entire quadword; it still must 388 * be quadword aligned. 389 */ 390 if (size == MO_128) { 391 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 392 MO_ATOM_IFALIGN_PAIR); 393 } 394 if (dc_isar_feature(aa64_lse2, s)) { 395 check_lse2_align(s, rn, 0, true, mop); 396 } else { 397 mop |= MO_ALIGN; 398 } 399 return finalize_memop(s, mop); 400 } 401 402 /* Handle the alignment check for AccType_ORDERED instructions. */ check_ordered_align(DisasContext * s,int rn,int imm,bool is_write,MemOp mop)403 static MemOp check_ordered_align(DisasContext *s, int rn, int imm, 404 bool is_write, MemOp mop) 405 { 406 MemOp size = mop & MO_SIZE; 407 408 if (size == MO_8) { 409 return mop; 410 } 411 if (size == MO_128) { 412 return finalize_memop_atom(s, MO_128 | MO_ALIGN, 413 MO_ATOM_IFALIGN_PAIR); 414 } 415 if (!dc_isar_feature(aa64_lse2, s)) { 416 mop |= MO_ALIGN; 417 } else if (!s->naa) { 418 check_lse2_align(s, rn, imm, is_write, mop); 419 } 420 return finalize_memop(s, mop); 421 } 422 423 typedef struct DisasCompare64 { 424 TCGCond cond; 425 TCGv_i64 value; 426 } DisasCompare64; 427 a64_test_cc(DisasCompare64 * c64,int cc)428 static void a64_test_cc(DisasCompare64 *c64, int cc) 429 { 430 DisasCompare c32; 431 432 arm_test_cc(&c32, cc); 433 434 /* 435 * Sign-extend the 32-bit value so that the GE/LT comparisons work 436 * properly. The NE/EQ comparisons are also fine with this choice. 437 */ 438 c64->cond = c32.cond; 439 c64->value = tcg_temp_new_i64(); 440 tcg_gen_ext_i32_i64(c64->value, c32.value); 441 } 442 gen_rebuild_hflags(DisasContext * s)443 static void gen_rebuild_hflags(DisasContext *s) 444 { 445 gen_helper_rebuild_hflags_a64(tcg_env, tcg_constant_i32(s->current_el)); 446 } 447 gen_exception_internal(int excp)448 static void gen_exception_internal(int excp) 449 { 450 assert(excp_is_internal(excp)); 451 gen_helper_exception_internal(tcg_env, tcg_constant_i32(excp)); 452 } 453 gen_exception_internal_insn(DisasContext * s,int excp)454 static void gen_exception_internal_insn(DisasContext *s, int excp) 455 { 456 gen_a64_update_pc(s, 0); 457 gen_exception_internal(excp); 458 s->base.is_jmp = DISAS_NORETURN; 459 } 460 gen_exception_bkpt_insn(DisasContext * s,uint32_t syndrome)461 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 462 { 463 gen_a64_update_pc(s, 0); 464 gen_helper_exception_bkpt_insn(tcg_env, tcg_constant_i32(syndrome)); 465 s->base.is_jmp = DISAS_NORETURN; 466 } 467 gen_step_complete_exception(DisasContext * s)468 static void gen_step_complete_exception(DisasContext *s) 469 { 470 /* We just completed step of an insn. Move from Active-not-pending 471 * to Active-pending, and then also take the swstep exception. 472 * This corresponds to making the (IMPDEF) choice to prioritize 473 * swstep exceptions over asynchronous exceptions taken to an exception 474 * level where debug is disabled. This choice has the advantage that 475 * we do not need to maintain internal state corresponding to the 476 * ISV/EX syndrome bits between completion of the step and generation 477 * of the exception, and our syndrome information is always correct. 478 */ 479 gen_ss_advance(s); 480 gen_swstep_exception(s, 1, s->is_ldex); 481 s->base.is_jmp = DISAS_NORETURN; 482 } 483 use_goto_tb(DisasContext * s,uint64_t dest)484 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 485 { 486 if (s->ss_active) { 487 return false; 488 } 489 return translator_use_goto_tb(&s->base, dest); 490 } 491 gen_goto_tb(DisasContext * s,int n,int64_t diff)492 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 493 { 494 if (use_goto_tb(s, s->pc_curr + diff)) { 495 /* 496 * For pcrel, the pc must always be up-to-date on entry to 497 * the linked TB, so that it can use simple additions for all 498 * further adjustments. For !pcrel, the linked TB is compiled 499 * to know its full virtual address, so we can delay the 500 * update to pc to the unlinked path. A long chain of links 501 * can thus avoid many updates to the PC. 502 */ 503 if (tb_cflags(s->base.tb) & CF_PCREL) { 504 gen_a64_update_pc(s, diff); 505 tcg_gen_goto_tb(n); 506 } else { 507 tcg_gen_goto_tb(n); 508 gen_a64_update_pc(s, diff); 509 } 510 tcg_gen_exit_tb(s->base.tb, n); 511 s->base.is_jmp = DISAS_NORETURN; 512 } else { 513 gen_a64_update_pc(s, diff); 514 if (s->ss_active) { 515 gen_step_complete_exception(s); 516 } else { 517 tcg_gen_lookup_and_goto_ptr(); 518 s->base.is_jmp = DISAS_NORETURN; 519 } 520 } 521 } 522 523 /* 524 * Register access functions 525 * 526 * These functions are used for directly accessing a register in where 527 * changes to the final register value are likely to be made. If you 528 * need to use a register for temporary calculation (e.g. index type 529 * operations) use the read_* form. 530 * 531 * B1.2.1 Register mappings 532 * 533 * In instruction register encoding 31 can refer to ZR (zero register) or 534 * the SP (stack pointer) depending on context. In QEMU's case we map SP 535 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 536 * This is the point of the _sp forms. 537 */ cpu_reg(DisasContext * s,int reg)538 TCGv_i64 cpu_reg(DisasContext *s, int reg) 539 { 540 if (reg == 31) { 541 TCGv_i64 t = tcg_temp_new_i64(); 542 tcg_gen_movi_i64(t, 0); 543 return t; 544 } else { 545 return cpu_X[reg]; 546 } 547 } 548 549 /* register access for when 31 == SP */ cpu_reg_sp(DisasContext * s,int reg)550 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 551 { 552 return cpu_X[reg]; 553 } 554 555 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 556 * representing the register contents. This TCGv is an auto-freed 557 * temporary so it need not be explicitly freed, and may be modified. 558 */ read_cpu_reg(DisasContext * s,int reg,int sf)559 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 560 { 561 TCGv_i64 v = tcg_temp_new_i64(); 562 if (reg != 31) { 563 if (sf) { 564 tcg_gen_mov_i64(v, cpu_X[reg]); 565 } else { 566 tcg_gen_ext32u_i64(v, cpu_X[reg]); 567 } 568 } else { 569 tcg_gen_movi_i64(v, 0); 570 } 571 return v; 572 } 573 read_cpu_reg_sp(DisasContext * s,int reg,int sf)574 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 575 { 576 TCGv_i64 v = tcg_temp_new_i64(); 577 if (sf) { 578 tcg_gen_mov_i64(v, cpu_X[reg]); 579 } else { 580 tcg_gen_ext32u_i64(v, cpu_X[reg]); 581 } 582 return v; 583 } 584 585 /* Return the offset into CPUARMState of a slice (from 586 * the least significant end) of FP register Qn (ie 587 * Dn, Sn, Hn or Bn). 588 * (Note that this is not the same mapping as for A32; see cpu.h) 589 */ fp_reg_offset(DisasContext * s,int regno,MemOp size)590 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 591 { 592 return vec_reg_offset(s, regno, 0, size); 593 } 594 595 /* Offset of the high half of the 128 bit vector Qn */ fp_reg_hi_offset(DisasContext * s,int regno)596 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 597 { 598 return vec_reg_offset(s, regno, 1, MO_64); 599 } 600 601 /* Convenience accessors for reading and writing single and double 602 * FP registers. Writing clears the upper parts of the associated 603 * 128 bit vector register, as required by the architecture. 604 * Note that unlike the GP register accessors, the values returned 605 * by the read functions must be manually freed. 606 */ read_fp_dreg(DisasContext * s,int reg)607 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 608 { 609 TCGv_i64 v = tcg_temp_new_i64(); 610 611 tcg_gen_ld_i64(v, tcg_env, fp_reg_offset(s, reg, MO_64)); 612 return v; 613 } 614 read_fp_sreg(DisasContext * s,int reg)615 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 616 { 617 TCGv_i32 v = tcg_temp_new_i32(); 618 619 tcg_gen_ld_i32(v, tcg_env, fp_reg_offset(s, reg, MO_32)); 620 return v; 621 } 622 read_fp_hreg(DisasContext * s,int reg)623 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 624 { 625 TCGv_i32 v = tcg_temp_new_i32(); 626 627 tcg_gen_ld16u_i32(v, tcg_env, fp_reg_offset(s, reg, MO_16)); 628 return v; 629 } 630 631 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 632 * If SVE is not enabled, then there are only 128 bits in the vector. 633 */ clear_vec_high(DisasContext * s,bool is_q,int rd)634 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 635 { 636 unsigned ofs = fp_reg_offset(s, rd, MO_64); 637 unsigned vsz = vec_full_reg_size(s); 638 639 /* Nop move, with side effect of clearing the tail. */ 640 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 641 } 642 write_fp_dreg(DisasContext * s,int reg,TCGv_i64 v)643 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 644 { 645 unsigned ofs = fp_reg_offset(s, reg, MO_64); 646 647 tcg_gen_st_i64(v, tcg_env, ofs); 648 clear_vec_high(s, false, reg); 649 } 650 write_fp_sreg(DisasContext * s,int reg,TCGv_i32 v)651 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 652 { 653 TCGv_i64 tmp = tcg_temp_new_i64(); 654 655 tcg_gen_extu_i32_i64(tmp, v); 656 write_fp_dreg(s, reg, tmp); 657 } 658 659 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ gen_gvec_fn2(DisasContext * s,bool is_q,int rd,int rn,GVecGen2Fn * gvec_fn,int vece)660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 661 GVecGen2Fn *gvec_fn, int vece) 662 { 663 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 664 is_q ? 16 : 8, vec_full_reg_size(s)); 665 } 666 667 /* Expand a 2-operand + immediate AdvSIMD vector operation using 668 * an expander function. 669 */ gen_gvec_fn2i(DisasContext * s,bool is_q,int rd,int rn,int64_t imm,GVecGen2iFn * gvec_fn,int vece)670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 671 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 672 { 673 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 674 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 675 } 676 677 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ gen_gvec_fn3(DisasContext * s,bool is_q,int rd,int rn,int rm,GVecGen3Fn * gvec_fn,int vece)678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 679 GVecGen3Fn *gvec_fn, int vece) 680 { 681 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 682 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 683 } 684 685 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ gen_gvec_fn4(DisasContext * s,bool is_q,int rd,int rn,int rm,int rx,GVecGen4Fn * gvec_fn,int vece)686 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 687 int rx, GVecGen4Fn *gvec_fn, int vece) 688 { 689 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 690 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 691 is_q ? 16 : 8, vec_full_reg_size(s)); 692 } 693 694 /* Expand a 2-operand operation using an out-of-line helper. */ gen_gvec_op2_ool(DisasContext * s,bool is_q,int rd,int rn,int data,gen_helper_gvec_2 * fn)695 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 696 int rn, int data, gen_helper_gvec_2 *fn) 697 { 698 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 699 vec_full_reg_offset(s, rn), 700 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 701 } 702 703 /* Expand a 3-operand operation using an out-of-line helper. */ gen_gvec_op3_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int data,gen_helper_gvec_3 * fn)704 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 705 int rn, int rm, int data, gen_helper_gvec_3 *fn) 706 { 707 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 708 vec_full_reg_offset(s, rn), 709 vec_full_reg_offset(s, rm), 710 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 711 } 712 713 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 714 * an out-of-line helper. 715 */ gen_gvec_op3_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,bool is_fp16,int data,gen_helper_gvec_3_ptr * fn)716 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 717 int rm, bool is_fp16, int data, 718 gen_helper_gvec_3_ptr *fn) 719 { 720 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 721 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 722 vec_full_reg_offset(s, rn), 723 vec_full_reg_offset(s, rm), fpst, 724 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 725 } 726 727 /* Expand a 4-operand operation using an out-of-line helper. */ gen_gvec_op4_ool(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4 * fn)728 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 729 int rm, int ra, int data, gen_helper_gvec_4 *fn) 730 { 731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 732 vec_full_reg_offset(s, rn), 733 vec_full_reg_offset(s, rm), 734 vec_full_reg_offset(s, ra), 735 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 736 } 737 738 /* 739 * Expand a 4-operand operation using an out-of-line helper that takes 740 * a pointer to the CPU env. 741 */ gen_gvec_op4_env(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,int data,gen_helper_gvec_4_ptr * fn)742 static void gen_gvec_op4_env(DisasContext *s, bool is_q, int rd, int rn, 743 int rm, int ra, int data, 744 gen_helper_gvec_4_ptr *fn) 745 { 746 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 747 vec_full_reg_offset(s, rn), 748 vec_full_reg_offset(s, rm), 749 vec_full_reg_offset(s, ra), 750 tcg_env, 751 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 752 } 753 754 /* 755 * Expand a 4-operand + fpstatus pointer + simd data value operation using 756 * an out-of-line helper. 757 */ gen_gvec_op4_fpst(DisasContext * s,bool is_q,int rd,int rn,int rm,int ra,bool is_fp16,int data,gen_helper_gvec_4_ptr * fn)758 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 759 int rm, int ra, bool is_fp16, int data, 760 gen_helper_gvec_4_ptr *fn) 761 { 762 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 763 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 764 vec_full_reg_offset(s, rn), 765 vec_full_reg_offset(s, rm), 766 vec_full_reg_offset(s, ra), fpst, 767 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 768 } 769 770 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 771 * than the 32 bit equivalent. 772 */ gen_set_NZ64(TCGv_i64 result)773 static inline void gen_set_NZ64(TCGv_i64 result) 774 { 775 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 776 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 777 } 778 779 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ gen_logic_CC(int sf,TCGv_i64 result)780 static inline void gen_logic_CC(int sf, TCGv_i64 result) 781 { 782 if (sf) { 783 gen_set_NZ64(result); 784 } else { 785 tcg_gen_extrl_i64_i32(cpu_ZF, result); 786 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 787 } 788 tcg_gen_movi_i32(cpu_CF, 0); 789 tcg_gen_movi_i32(cpu_VF, 0); 790 } 791 792 /* dest = T0 + T1; compute C, N, V and Z flags */ gen_add64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)793 static void gen_add64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 794 { 795 TCGv_i64 result, flag, tmp; 796 result = tcg_temp_new_i64(); 797 flag = tcg_temp_new_i64(); 798 tmp = tcg_temp_new_i64(); 799 800 tcg_gen_movi_i64(tmp, 0); 801 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 802 803 tcg_gen_extrl_i64_i32(cpu_CF, flag); 804 805 gen_set_NZ64(result); 806 807 tcg_gen_xor_i64(flag, result, t0); 808 tcg_gen_xor_i64(tmp, t0, t1); 809 tcg_gen_andc_i64(flag, flag, tmp); 810 tcg_gen_extrh_i64_i32(cpu_VF, flag); 811 812 tcg_gen_mov_i64(dest, result); 813 } 814 gen_add32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)815 static void gen_add32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 816 { 817 TCGv_i32 t0_32 = tcg_temp_new_i32(); 818 TCGv_i32 t1_32 = tcg_temp_new_i32(); 819 TCGv_i32 tmp = tcg_temp_new_i32(); 820 821 tcg_gen_movi_i32(tmp, 0); 822 tcg_gen_extrl_i64_i32(t0_32, t0); 823 tcg_gen_extrl_i64_i32(t1_32, t1); 824 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 825 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 826 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 827 tcg_gen_xor_i32(tmp, t0_32, t1_32); 828 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 829 tcg_gen_extu_i32_i64(dest, cpu_NF); 830 } 831 gen_add_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)832 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 833 { 834 if (sf) { 835 gen_add64_CC(dest, t0, t1); 836 } else { 837 gen_add32_CC(dest, t0, t1); 838 } 839 } 840 841 /* dest = T0 - T1; compute C, N, V and Z flags */ gen_sub64_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)842 static void gen_sub64_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 843 { 844 /* 64 bit arithmetic */ 845 TCGv_i64 result, flag, tmp; 846 847 result = tcg_temp_new_i64(); 848 flag = tcg_temp_new_i64(); 849 tcg_gen_sub_i64(result, t0, t1); 850 851 gen_set_NZ64(result); 852 853 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 854 tcg_gen_extrl_i64_i32(cpu_CF, flag); 855 856 tcg_gen_xor_i64(flag, result, t0); 857 tmp = tcg_temp_new_i64(); 858 tcg_gen_xor_i64(tmp, t0, t1); 859 tcg_gen_and_i64(flag, flag, tmp); 860 tcg_gen_extrh_i64_i32(cpu_VF, flag); 861 tcg_gen_mov_i64(dest, result); 862 } 863 gen_sub32_CC(TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)864 static void gen_sub32_CC(TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 865 { 866 /* 32 bit arithmetic */ 867 TCGv_i32 t0_32 = tcg_temp_new_i32(); 868 TCGv_i32 t1_32 = tcg_temp_new_i32(); 869 TCGv_i32 tmp; 870 871 tcg_gen_extrl_i64_i32(t0_32, t0); 872 tcg_gen_extrl_i64_i32(t1_32, t1); 873 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 874 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 875 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 876 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 877 tmp = tcg_temp_new_i32(); 878 tcg_gen_xor_i32(tmp, t0_32, t1_32); 879 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 880 tcg_gen_extu_i32_i64(dest, cpu_NF); 881 } 882 gen_sub_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)883 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 884 { 885 if (sf) { 886 gen_sub64_CC(dest, t0, t1); 887 } else { 888 gen_sub32_CC(dest, t0, t1); 889 } 890 } 891 892 /* dest = T0 + T1 + CF; do not compute flags. */ gen_adc(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)893 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 894 { 895 TCGv_i64 flag = tcg_temp_new_i64(); 896 tcg_gen_extu_i32_i64(flag, cpu_CF); 897 tcg_gen_add_i64(dest, t0, t1); 898 tcg_gen_add_i64(dest, dest, flag); 899 900 if (!sf) { 901 tcg_gen_ext32u_i64(dest, dest); 902 } 903 } 904 905 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ gen_adc_CC(int sf,TCGv_i64 dest,TCGv_i64 t0,TCGv_i64 t1)906 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 907 { 908 if (sf) { 909 TCGv_i64 result = tcg_temp_new_i64(); 910 TCGv_i64 cf_64 = tcg_temp_new_i64(); 911 TCGv_i64 vf_64 = tcg_temp_new_i64(); 912 TCGv_i64 tmp = tcg_temp_new_i64(); 913 TCGv_i64 zero = tcg_constant_i64(0); 914 915 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 916 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 917 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 918 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 919 gen_set_NZ64(result); 920 921 tcg_gen_xor_i64(vf_64, result, t0); 922 tcg_gen_xor_i64(tmp, t0, t1); 923 tcg_gen_andc_i64(vf_64, vf_64, tmp); 924 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 925 926 tcg_gen_mov_i64(dest, result); 927 } else { 928 TCGv_i32 t0_32 = tcg_temp_new_i32(); 929 TCGv_i32 t1_32 = tcg_temp_new_i32(); 930 TCGv_i32 tmp = tcg_temp_new_i32(); 931 TCGv_i32 zero = tcg_constant_i32(0); 932 933 tcg_gen_extrl_i64_i32(t0_32, t0); 934 tcg_gen_extrl_i64_i32(t1_32, t1); 935 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 936 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 937 938 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 939 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 940 tcg_gen_xor_i32(tmp, t0_32, t1_32); 941 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 942 tcg_gen_extu_i32_i64(dest, cpu_NF); 943 } 944 } 945 946 /* 947 * Load/Store generators 948 */ 949 950 /* 951 * Store from GPR register to memory. 952 */ do_gpr_st_memidx(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)953 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 954 TCGv_i64 tcg_addr, MemOp memop, int memidx, 955 bool iss_valid, 956 unsigned int iss_srt, 957 bool iss_sf, bool iss_ar) 958 { 959 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 960 961 if (iss_valid) { 962 uint32_t syn; 963 964 syn = syn_data_abort_with_iss(0, 965 (memop & MO_SIZE), 966 false, 967 iss_srt, 968 iss_sf, 969 iss_ar, 970 0, 0, 0, 0, 0, false); 971 disas_set_insn_syndrome(s, syn); 972 } 973 } 974 do_gpr_st(DisasContext * s,TCGv_i64 source,TCGv_i64 tcg_addr,MemOp memop,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)975 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 976 TCGv_i64 tcg_addr, MemOp memop, 977 bool iss_valid, 978 unsigned int iss_srt, 979 bool iss_sf, bool iss_ar) 980 { 981 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 982 iss_valid, iss_srt, iss_sf, iss_ar); 983 } 984 985 /* 986 * Load from memory to GPR register 987 */ do_gpr_ld_memidx(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,int memidx,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)988 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 989 MemOp memop, bool extend, int memidx, 990 bool iss_valid, unsigned int iss_srt, 991 bool iss_sf, bool iss_ar) 992 { 993 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 994 995 if (extend && (memop & MO_SIGN)) { 996 g_assert((memop & MO_SIZE) <= MO_32); 997 tcg_gen_ext32u_i64(dest, dest); 998 } 999 1000 if (iss_valid) { 1001 uint32_t syn; 1002 1003 syn = syn_data_abort_with_iss(0, 1004 (memop & MO_SIZE), 1005 (memop & MO_SIGN) != 0, 1006 iss_srt, 1007 iss_sf, 1008 iss_ar, 1009 0, 0, 0, 0, 0, false); 1010 disas_set_insn_syndrome(s, syn); 1011 } 1012 } 1013 do_gpr_ld(DisasContext * s,TCGv_i64 dest,TCGv_i64 tcg_addr,MemOp memop,bool extend,bool iss_valid,unsigned int iss_srt,bool iss_sf,bool iss_ar)1014 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 1015 MemOp memop, bool extend, 1016 bool iss_valid, unsigned int iss_srt, 1017 bool iss_sf, bool iss_ar) 1018 { 1019 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 1020 iss_valid, iss_srt, iss_sf, iss_ar); 1021 } 1022 1023 /* 1024 * Store from FP register to memory 1025 */ do_fp_st(DisasContext * s,int srcidx,TCGv_i64 tcg_addr,MemOp mop)1026 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop) 1027 { 1028 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 1029 TCGv_i64 tmplo = tcg_temp_new_i64(); 1030 1031 tcg_gen_ld_i64(tmplo, tcg_env, fp_reg_offset(s, srcidx, MO_64)); 1032 1033 if ((mop & MO_SIZE) < MO_128) { 1034 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1035 } else { 1036 TCGv_i64 tmphi = tcg_temp_new_i64(); 1037 TCGv_i128 t16 = tcg_temp_new_i128(); 1038 1039 tcg_gen_ld_i64(tmphi, tcg_env, fp_reg_hi_offset(s, srcidx)); 1040 tcg_gen_concat_i64_i128(t16, tmplo, tmphi); 1041 1042 tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop); 1043 } 1044 } 1045 1046 /* 1047 * Load from memory to FP register 1048 */ do_fp_ld(DisasContext * s,int destidx,TCGv_i64 tcg_addr,MemOp mop)1049 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop) 1050 { 1051 /* This always zero-extends and writes to a full 128 bit wide vector */ 1052 TCGv_i64 tmplo = tcg_temp_new_i64(); 1053 TCGv_i64 tmphi = NULL; 1054 1055 if ((mop & MO_SIZE) < MO_128) { 1056 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1057 } else { 1058 TCGv_i128 t16 = tcg_temp_new_i128(); 1059 1060 tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop); 1061 1062 tmphi = tcg_temp_new_i64(); 1063 tcg_gen_extr_i128_i64(tmplo, tmphi, t16); 1064 } 1065 1066 tcg_gen_st_i64(tmplo, tcg_env, fp_reg_offset(s, destidx, MO_64)); 1067 1068 if (tmphi) { 1069 tcg_gen_st_i64(tmphi, tcg_env, fp_reg_hi_offset(s, destidx)); 1070 } 1071 clear_vec_high(s, tmphi != NULL, destidx); 1072 } 1073 1074 /* 1075 * Vector load/store helpers. 1076 * 1077 * The principal difference between this and a FP load is that we don't 1078 * zero extend as we are filling a partial chunk of the vector register. 1079 * These functions don't support 128 bit loads/stores, which would be 1080 * normal load/store operations. 1081 * 1082 * The _i32 versions are useful when operating on 32 bit quantities 1083 * (eg for floating point single or using Neon helper functions). 1084 */ 1085 1086 /* Get value of an element within a vector register */ read_vec_element(DisasContext * s,TCGv_i64 tcg_dest,int srcidx,int element,MemOp memop)1087 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1088 int element, MemOp memop) 1089 { 1090 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1091 switch ((unsigned)memop) { 1092 case MO_8: 1093 tcg_gen_ld8u_i64(tcg_dest, tcg_env, vect_off); 1094 break; 1095 case MO_16: 1096 tcg_gen_ld16u_i64(tcg_dest, tcg_env, vect_off); 1097 break; 1098 case MO_32: 1099 tcg_gen_ld32u_i64(tcg_dest, tcg_env, vect_off); 1100 break; 1101 case MO_8|MO_SIGN: 1102 tcg_gen_ld8s_i64(tcg_dest, tcg_env, vect_off); 1103 break; 1104 case MO_16|MO_SIGN: 1105 tcg_gen_ld16s_i64(tcg_dest, tcg_env, vect_off); 1106 break; 1107 case MO_32|MO_SIGN: 1108 tcg_gen_ld32s_i64(tcg_dest, tcg_env, vect_off); 1109 break; 1110 case MO_64: 1111 case MO_64|MO_SIGN: 1112 tcg_gen_ld_i64(tcg_dest, tcg_env, vect_off); 1113 break; 1114 default: 1115 g_assert_not_reached(); 1116 } 1117 } 1118 read_vec_element_i32(DisasContext * s,TCGv_i32 tcg_dest,int srcidx,int element,MemOp memop)1119 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1120 int element, MemOp memop) 1121 { 1122 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1123 switch (memop) { 1124 case MO_8: 1125 tcg_gen_ld8u_i32(tcg_dest, tcg_env, vect_off); 1126 break; 1127 case MO_16: 1128 tcg_gen_ld16u_i32(tcg_dest, tcg_env, vect_off); 1129 break; 1130 case MO_8|MO_SIGN: 1131 tcg_gen_ld8s_i32(tcg_dest, tcg_env, vect_off); 1132 break; 1133 case MO_16|MO_SIGN: 1134 tcg_gen_ld16s_i32(tcg_dest, tcg_env, vect_off); 1135 break; 1136 case MO_32: 1137 case MO_32|MO_SIGN: 1138 tcg_gen_ld_i32(tcg_dest, tcg_env, vect_off); 1139 break; 1140 default: 1141 g_assert_not_reached(); 1142 } 1143 } 1144 1145 /* Set value of an element within a vector register */ write_vec_element(DisasContext * s,TCGv_i64 tcg_src,int destidx,int element,MemOp memop)1146 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1147 int element, MemOp memop) 1148 { 1149 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1150 switch (memop) { 1151 case MO_8: 1152 tcg_gen_st8_i64(tcg_src, tcg_env, vect_off); 1153 break; 1154 case MO_16: 1155 tcg_gen_st16_i64(tcg_src, tcg_env, vect_off); 1156 break; 1157 case MO_32: 1158 tcg_gen_st32_i64(tcg_src, tcg_env, vect_off); 1159 break; 1160 case MO_64: 1161 tcg_gen_st_i64(tcg_src, tcg_env, vect_off); 1162 break; 1163 default: 1164 g_assert_not_reached(); 1165 } 1166 } 1167 write_vec_element_i32(DisasContext * s,TCGv_i32 tcg_src,int destidx,int element,MemOp memop)1168 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1169 int destidx, int element, MemOp memop) 1170 { 1171 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1172 switch (memop) { 1173 case MO_8: 1174 tcg_gen_st8_i32(tcg_src, tcg_env, vect_off); 1175 break; 1176 case MO_16: 1177 tcg_gen_st16_i32(tcg_src, tcg_env, vect_off); 1178 break; 1179 case MO_32: 1180 tcg_gen_st_i32(tcg_src, tcg_env, vect_off); 1181 break; 1182 default: 1183 g_assert_not_reached(); 1184 } 1185 } 1186 1187 /* Store from vector register to memory */ do_vec_st(DisasContext * s,int srcidx,int element,TCGv_i64 tcg_addr,MemOp mop)1188 static void do_vec_st(DisasContext *s, int srcidx, int element, 1189 TCGv_i64 tcg_addr, MemOp mop) 1190 { 1191 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1192 1193 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1194 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1195 } 1196 1197 /* Load from memory to vector register */ do_vec_ld(DisasContext * s,int destidx,int element,TCGv_i64 tcg_addr,MemOp mop)1198 static void do_vec_ld(DisasContext *s, int destidx, int element, 1199 TCGv_i64 tcg_addr, MemOp mop) 1200 { 1201 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1202 1203 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1204 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1205 } 1206 1207 /* Check that FP/Neon access is enabled. If it is, return 1208 * true. If not, emit code to generate an appropriate exception, 1209 * and return false; the caller should not emit any code for 1210 * the instruction. Note that this check must happen after all 1211 * unallocated-encoding checks (otherwise the syndrome information 1212 * for the resulting exception will be incorrect). 1213 */ fp_access_check_only(DisasContext * s)1214 static bool fp_access_check_only(DisasContext *s) 1215 { 1216 if (s->fp_excp_el) { 1217 assert(!s->fp_access_checked); 1218 s->fp_access_checked = -1; 1219 1220 gen_exception_insn_el(s, 0, EXCP_UDEF, 1221 syn_fp_access_trap(1, 0xe, false, 0), 1222 s->fp_excp_el); 1223 return false; 1224 } 1225 s->fp_access_checked = 1; 1226 return true; 1227 } 1228 fp_access_check(DisasContext * s)1229 static bool fp_access_check(DisasContext *s) 1230 { 1231 if (!fp_access_check_only(s)) { 1232 return false; 1233 } 1234 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1235 gen_exception_insn(s, 0, EXCP_UDEF, 1236 syn_smetrap(SME_ET_Streaming, false)); 1237 return false; 1238 } 1239 return true; 1240 } 1241 1242 /* 1243 * Check that SVE access is enabled. If it is, return true. 1244 * If not, emit code to generate an appropriate exception and return false. 1245 * This function corresponds to CheckSVEEnabled(). 1246 */ sve_access_check(DisasContext * s)1247 bool sve_access_check(DisasContext *s) 1248 { 1249 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1250 bool ret; 1251 1252 assert(dc_isar_feature(aa64_sme, s)); 1253 ret = sme_sm_enabled_check(s); 1254 s->sve_access_checked = (ret ? 1 : -1); 1255 return ret; 1256 } 1257 if (s->sve_excp_el) { 1258 /* Assert that we only raise one exception per instruction. */ 1259 assert(!s->sve_access_checked); 1260 gen_exception_insn_el(s, 0, EXCP_UDEF, 1261 syn_sve_access_trap(), s->sve_excp_el); 1262 s->sve_access_checked = -1; 1263 return false; 1264 } 1265 s->sve_access_checked = 1; 1266 return fp_access_check(s); 1267 } 1268 1269 /* 1270 * Check that SME access is enabled, raise an exception if not. 1271 * Note that this function corresponds to CheckSMEAccess and is 1272 * only used directly for cpregs. 1273 */ sme_access_check(DisasContext * s)1274 static bool sme_access_check(DisasContext *s) 1275 { 1276 if (s->sme_excp_el) { 1277 gen_exception_insn_el(s, 0, EXCP_UDEF, 1278 syn_smetrap(SME_ET_AccessTrap, false), 1279 s->sme_excp_el); 1280 return false; 1281 } 1282 return true; 1283 } 1284 1285 /* This function corresponds to CheckSMEEnabled. */ sme_enabled_check(DisasContext * s)1286 bool sme_enabled_check(DisasContext *s) 1287 { 1288 /* 1289 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1290 * to be zero when fp_excp_el has priority. This is because we need 1291 * sme_excp_el by itself for cpregs access checks. 1292 */ 1293 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1294 bool ret = sme_access_check(s); 1295 s->fp_access_checked = (ret ? 1 : -1); 1296 return ret; 1297 } 1298 return fp_access_check_only(s); 1299 } 1300 1301 /* Common subroutine for CheckSMEAnd*Enabled. */ sme_enabled_check_with_svcr(DisasContext * s,unsigned req)1302 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1303 { 1304 if (!sme_enabled_check(s)) { 1305 return false; 1306 } 1307 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1308 gen_exception_insn(s, 0, EXCP_UDEF, 1309 syn_smetrap(SME_ET_NotStreaming, false)); 1310 return false; 1311 } 1312 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1313 gen_exception_insn(s, 0, EXCP_UDEF, 1314 syn_smetrap(SME_ET_InactiveZA, false)); 1315 return false; 1316 } 1317 return true; 1318 } 1319 1320 /* 1321 * Expanders for AdvSIMD translation functions. 1322 */ 1323 do_gvec_op2_ool(DisasContext * s,arg_qrr_e * a,int data,gen_helper_gvec_2 * fn)1324 static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, 1325 gen_helper_gvec_2 *fn) 1326 { 1327 if (!a->q && a->esz == MO_64) { 1328 return false; 1329 } 1330 if (fp_access_check(s)) { 1331 gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); 1332 } 1333 return true; 1334 } 1335 do_gvec_op3_ool(DisasContext * s,arg_qrrr_e * a,int data,gen_helper_gvec_3 * fn)1336 static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, 1337 gen_helper_gvec_3 *fn) 1338 { 1339 if (!a->q && a->esz == MO_64) { 1340 return false; 1341 } 1342 if (fp_access_check(s)) { 1343 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); 1344 } 1345 return true; 1346 } 1347 do_gvec_fn3(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1348 static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1349 { 1350 if (!a->q && a->esz == MO_64) { 1351 return false; 1352 } 1353 if (fp_access_check(s)) { 1354 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1355 } 1356 return true; 1357 } 1358 do_gvec_fn3_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1359 static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1360 { 1361 if (a->esz == MO_64) { 1362 return false; 1363 } 1364 if (fp_access_check(s)) { 1365 gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); 1366 } 1367 return true; 1368 } 1369 do_gvec_fn3_no8_no64(DisasContext * s,arg_qrrr_e * a,GVecGen3Fn * fn)1370 static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) 1371 { 1372 if (a->esz == MO_8) { 1373 return false; 1374 } 1375 return do_gvec_fn3_no64(s, a, fn); 1376 } 1377 do_gvec_fn4(DisasContext * s,arg_qrrrr_e * a,GVecGen4Fn * fn)1378 static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) 1379 { 1380 if (!a->q && a->esz == MO_64) { 1381 return false; 1382 } 1383 if (fp_access_check(s)) { 1384 gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); 1385 } 1386 return true; 1387 } 1388 1389 /* 1390 * This utility function is for doing register extension with an 1391 * optional shift. You will likely want to pass a temporary for the 1392 * destination register. See DecodeRegExtend() in the ARM ARM. 1393 */ ext_and_shift_reg(TCGv_i64 tcg_out,TCGv_i64 tcg_in,int option,unsigned int shift)1394 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1395 int option, unsigned int shift) 1396 { 1397 int extsize = extract32(option, 0, 2); 1398 bool is_signed = extract32(option, 2, 1); 1399 1400 tcg_gen_ext_i64(tcg_out, tcg_in, extsize | (is_signed ? MO_SIGN : 0)); 1401 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1402 } 1403 gen_check_sp_alignment(DisasContext * s)1404 static inline void gen_check_sp_alignment(DisasContext *s) 1405 { 1406 /* The AArch64 architecture mandates that (if enabled via PSTATE 1407 * or SCTLR bits) there is a check that SP is 16-aligned on every 1408 * SP-relative load or store (with an exception generated if it is not). 1409 * In line with general QEMU practice regarding misaligned accesses, 1410 * we omit these checks for the sake of guest program performance. 1411 * This function is provided as a hook so we can more easily add these 1412 * checks in future (possibly as a "favour catching guest program bugs 1413 * over speed" user selectable option). 1414 */ 1415 } 1416 1417 /* 1418 * This provides a simple table based table lookup decoder. It is 1419 * intended to be used when the relevant bits for decode are too 1420 * awkwardly placed and switch/if based logic would be confusing and 1421 * deeply nested. Since it's a linear search through the table, tables 1422 * should be kept small. 1423 * 1424 * It returns the first handler where insn & mask == pattern, or 1425 * NULL if there is no match. 1426 * The table is terminated by an empty mask (i.e. 0) 1427 */ lookup_disas_fn(const AArch64DecodeTable * table,uint32_t insn)1428 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1429 uint32_t insn) 1430 { 1431 const AArch64DecodeTable *tptr = table; 1432 1433 while (tptr->mask) { 1434 if ((insn & tptr->mask) == tptr->pattern) { 1435 return tptr->disas_fn; 1436 } 1437 tptr++; 1438 } 1439 return NULL; 1440 } 1441 1442 /* 1443 * The instruction disassembly implemented here matches 1444 * the instruction encoding classifications in chapter C4 1445 * of the ARM Architecture Reference Manual (DDI0487B_a); 1446 * classification names and decode diagrams here should generally 1447 * match up with those in the manual. 1448 */ 1449 trans_B(DisasContext * s,arg_i * a)1450 static bool trans_B(DisasContext *s, arg_i *a) 1451 { 1452 reset_btype(s); 1453 gen_goto_tb(s, 0, a->imm); 1454 return true; 1455 } 1456 trans_BL(DisasContext * s,arg_i * a)1457 static bool trans_BL(DisasContext *s, arg_i *a) 1458 { 1459 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1460 reset_btype(s); 1461 gen_goto_tb(s, 0, a->imm); 1462 return true; 1463 } 1464 1465 trans_CBZ(DisasContext * s,arg_cbz * a)1466 static bool trans_CBZ(DisasContext *s, arg_cbz *a) 1467 { 1468 DisasLabel match; 1469 TCGv_i64 tcg_cmp; 1470 1471 tcg_cmp = read_cpu_reg(s, a->rt, a->sf); 1472 reset_btype(s); 1473 1474 match = gen_disas_label(s); 1475 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1476 tcg_cmp, 0, match.label); 1477 gen_goto_tb(s, 0, 4); 1478 set_disas_label(s, match); 1479 gen_goto_tb(s, 1, a->imm); 1480 return true; 1481 } 1482 trans_TBZ(DisasContext * s,arg_tbz * a)1483 static bool trans_TBZ(DisasContext *s, arg_tbz *a) 1484 { 1485 DisasLabel match; 1486 TCGv_i64 tcg_cmp; 1487 1488 tcg_cmp = tcg_temp_new_i64(); 1489 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, a->rt), 1ULL << a->bitpos); 1490 1491 reset_btype(s); 1492 1493 match = gen_disas_label(s); 1494 tcg_gen_brcondi_i64(a->nz ? TCG_COND_NE : TCG_COND_EQ, 1495 tcg_cmp, 0, match.label); 1496 gen_goto_tb(s, 0, 4); 1497 set_disas_label(s, match); 1498 gen_goto_tb(s, 1, a->imm); 1499 return true; 1500 } 1501 trans_B_cond(DisasContext * s,arg_B_cond * a)1502 static bool trans_B_cond(DisasContext *s, arg_B_cond *a) 1503 { 1504 /* BC.cond is only present with FEAT_HBC */ 1505 if (a->c && !dc_isar_feature(aa64_hbc, s)) { 1506 return false; 1507 } 1508 reset_btype(s); 1509 if (a->cond < 0x0e) { 1510 /* genuinely conditional branches */ 1511 DisasLabel match = gen_disas_label(s); 1512 arm_gen_test_cc(a->cond, match.label); 1513 gen_goto_tb(s, 0, 4); 1514 set_disas_label(s, match); 1515 gen_goto_tb(s, 1, a->imm); 1516 } else { 1517 /* 0xe and 0xf are both "always" conditions */ 1518 gen_goto_tb(s, 0, a->imm); 1519 } 1520 return true; 1521 } 1522 set_btype_for_br(DisasContext * s,int rn)1523 static void set_btype_for_br(DisasContext *s, int rn) 1524 { 1525 if (dc_isar_feature(aa64_bti, s)) { 1526 /* BR to {x16,x17} or !guard -> 1, else 3. */ 1527 if (rn == 16 || rn == 17) { 1528 set_btype(s, 1); 1529 } else { 1530 TCGv_i64 pc = tcg_temp_new_i64(); 1531 gen_pc_plus_diff(s, pc, 0); 1532 gen_helper_guarded_page_br(tcg_env, pc); 1533 s->btype = -1; 1534 } 1535 } 1536 } 1537 set_btype_for_blr(DisasContext * s)1538 static void set_btype_for_blr(DisasContext *s) 1539 { 1540 if (dc_isar_feature(aa64_bti, s)) { 1541 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 1542 set_btype(s, 2); 1543 } 1544 } 1545 trans_BR(DisasContext * s,arg_r * a)1546 static bool trans_BR(DisasContext *s, arg_r *a) 1547 { 1548 set_btype_for_br(s, a->rn); 1549 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1550 s->base.is_jmp = DISAS_JUMP; 1551 return true; 1552 } 1553 trans_BLR(DisasContext * s,arg_r * a)1554 static bool trans_BLR(DisasContext *s, arg_r *a) 1555 { 1556 TCGv_i64 dst = cpu_reg(s, a->rn); 1557 TCGv_i64 lr = cpu_reg(s, 30); 1558 if (dst == lr) { 1559 TCGv_i64 tmp = tcg_temp_new_i64(); 1560 tcg_gen_mov_i64(tmp, dst); 1561 dst = tmp; 1562 } 1563 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1564 gen_a64_set_pc(s, dst); 1565 set_btype_for_blr(s); 1566 s->base.is_jmp = DISAS_JUMP; 1567 return true; 1568 } 1569 trans_RET(DisasContext * s,arg_r * a)1570 static bool trans_RET(DisasContext *s, arg_r *a) 1571 { 1572 gen_a64_set_pc(s, cpu_reg(s, a->rn)); 1573 s->base.is_jmp = DISAS_JUMP; 1574 return true; 1575 } 1576 auth_branch_target(DisasContext * s,TCGv_i64 dst,TCGv_i64 modifier,bool use_key_a)1577 static TCGv_i64 auth_branch_target(DisasContext *s, TCGv_i64 dst, 1578 TCGv_i64 modifier, bool use_key_a) 1579 { 1580 TCGv_i64 truedst; 1581 /* 1582 * Return the branch target for a BRAA/RETA/etc, which is either 1583 * just the destination dst, or that value with the pauth check 1584 * done and the code removed from the high bits. 1585 */ 1586 if (!s->pauth_active) { 1587 return dst; 1588 } 1589 1590 truedst = tcg_temp_new_i64(); 1591 if (use_key_a) { 1592 gen_helper_autia_combined(truedst, tcg_env, dst, modifier); 1593 } else { 1594 gen_helper_autib_combined(truedst, tcg_env, dst, modifier); 1595 } 1596 return truedst; 1597 } 1598 trans_BRAZ(DisasContext * s,arg_braz * a)1599 static bool trans_BRAZ(DisasContext *s, arg_braz *a) 1600 { 1601 TCGv_i64 dst; 1602 1603 if (!dc_isar_feature(aa64_pauth, s)) { 1604 return false; 1605 } 1606 1607 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1608 set_btype_for_br(s, a->rn); 1609 gen_a64_set_pc(s, dst); 1610 s->base.is_jmp = DISAS_JUMP; 1611 return true; 1612 } 1613 trans_BLRAZ(DisasContext * s,arg_braz * a)1614 static bool trans_BLRAZ(DisasContext *s, arg_braz *a) 1615 { 1616 TCGv_i64 dst, lr; 1617 1618 if (!dc_isar_feature(aa64_pauth, s)) { 1619 return false; 1620 } 1621 1622 dst = auth_branch_target(s, cpu_reg(s, a->rn), tcg_constant_i64(0), !a->m); 1623 lr = cpu_reg(s, 30); 1624 if (dst == lr) { 1625 TCGv_i64 tmp = tcg_temp_new_i64(); 1626 tcg_gen_mov_i64(tmp, dst); 1627 dst = tmp; 1628 } 1629 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1630 gen_a64_set_pc(s, dst); 1631 set_btype_for_blr(s); 1632 s->base.is_jmp = DISAS_JUMP; 1633 return true; 1634 } 1635 trans_RETA(DisasContext * s,arg_reta * a)1636 static bool trans_RETA(DisasContext *s, arg_reta *a) 1637 { 1638 TCGv_i64 dst; 1639 1640 dst = auth_branch_target(s, cpu_reg(s, 30), cpu_X[31], !a->m); 1641 gen_a64_set_pc(s, dst); 1642 s->base.is_jmp = DISAS_JUMP; 1643 return true; 1644 } 1645 trans_BRA(DisasContext * s,arg_bra * a)1646 static bool trans_BRA(DisasContext *s, arg_bra *a) 1647 { 1648 TCGv_i64 dst; 1649 1650 if (!dc_isar_feature(aa64_pauth, s)) { 1651 return false; 1652 } 1653 dst = auth_branch_target(s, cpu_reg(s,a->rn), cpu_reg_sp(s, a->rm), !a->m); 1654 gen_a64_set_pc(s, dst); 1655 set_btype_for_br(s, a->rn); 1656 s->base.is_jmp = DISAS_JUMP; 1657 return true; 1658 } 1659 trans_BLRA(DisasContext * s,arg_bra * a)1660 static bool trans_BLRA(DisasContext *s, arg_bra *a) 1661 { 1662 TCGv_i64 dst, lr; 1663 1664 if (!dc_isar_feature(aa64_pauth, s)) { 1665 return false; 1666 } 1667 dst = auth_branch_target(s, cpu_reg(s, a->rn), cpu_reg_sp(s, a->rm), !a->m); 1668 lr = cpu_reg(s, 30); 1669 if (dst == lr) { 1670 TCGv_i64 tmp = tcg_temp_new_i64(); 1671 tcg_gen_mov_i64(tmp, dst); 1672 dst = tmp; 1673 } 1674 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 1675 gen_a64_set_pc(s, dst); 1676 set_btype_for_blr(s); 1677 s->base.is_jmp = DISAS_JUMP; 1678 return true; 1679 } 1680 trans_ERET(DisasContext * s,arg_ERET * a)1681 static bool trans_ERET(DisasContext *s, arg_ERET *a) 1682 { 1683 TCGv_i64 dst; 1684 1685 if (s->current_el == 0) { 1686 return false; 1687 } 1688 if (s->trap_eret) { 1689 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(0), 2); 1690 return true; 1691 } 1692 dst = tcg_temp_new_i64(); 1693 tcg_gen_ld_i64(dst, tcg_env, 1694 offsetof(CPUARMState, elr_el[s->current_el])); 1695 1696 translator_io_start(&s->base); 1697 1698 gen_helper_exception_return(tcg_env, dst); 1699 /* Must exit loop to check un-masked IRQs */ 1700 s->base.is_jmp = DISAS_EXIT; 1701 return true; 1702 } 1703 trans_ERETA(DisasContext * s,arg_reta * a)1704 static bool trans_ERETA(DisasContext *s, arg_reta *a) 1705 { 1706 TCGv_i64 dst; 1707 1708 if (!dc_isar_feature(aa64_pauth, s)) { 1709 return false; 1710 } 1711 if (s->current_el == 0) { 1712 return false; 1713 } 1714 /* The FGT trap takes precedence over an auth trap. */ 1715 if (s->trap_eret) { 1716 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(a->m ? 3 : 2), 2); 1717 return true; 1718 } 1719 dst = tcg_temp_new_i64(); 1720 tcg_gen_ld_i64(dst, tcg_env, 1721 offsetof(CPUARMState, elr_el[s->current_el])); 1722 1723 dst = auth_branch_target(s, dst, cpu_X[31], !a->m); 1724 1725 translator_io_start(&s->base); 1726 1727 gen_helper_exception_return(tcg_env, dst); 1728 /* Must exit loop to check un-masked IRQs */ 1729 s->base.is_jmp = DISAS_EXIT; 1730 return true; 1731 } 1732 trans_NOP(DisasContext * s,arg_NOP * a)1733 static bool trans_NOP(DisasContext *s, arg_NOP *a) 1734 { 1735 return true; 1736 } 1737 trans_YIELD(DisasContext * s,arg_YIELD * a)1738 static bool trans_YIELD(DisasContext *s, arg_YIELD *a) 1739 { 1740 /* 1741 * When running in MTTCG we don't generate jumps to the yield and 1742 * WFE helpers as it won't affect the scheduling of other vCPUs. 1743 * If we wanted to more completely model WFE/SEV so we don't busy 1744 * spin unnecessarily we would need to do something more involved. 1745 */ 1746 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1747 s->base.is_jmp = DISAS_YIELD; 1748 } 1749 return true; 1750 } 1751 trans_WFI(DisasContext * s,arg_WFI * a)1752 static bool trans_WFI(DisasContext *s, arg_WFI *a) 1753 { 1754 s->base.is_jmp = DISAS_WFI; 1755 return true; 1756 } 1757 trans_WFE(DisasContext * s,arg_WFI * a)1758 static bool trans_WFE(DisasContext *s, arg_WFI *a) 1759 { 1760 /* 1761 * When running in MTTCG we don't generate jumps to the yield and 1762 * WFE helpers as it won't affect the scheduling of other vCPUs. 1763 * If we wanted to more completely model WFE/SEV so we don't busy 1764 * spin unnecessarily we would need to do something more involved. 1765 */ 1766 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1767 s->base.is_jmp = DISAS_WFE; 1768 } 1769 return true; 1770 } 1771 trans_WFIT(DisasContext * s,arg_WFIT * a)1772 static bool trans_WFIT(DisasContext *s, arg_WFIT *a) 1773 { 1774 if (!dc_isar_feature(aa64_wfxt, s)) { 1775 return false; 1776 } 1777 1778 /* 1779 * Because we need to pass the register value to the helper, 1780 * it's easier to emit the code now, unlike trans_WFI which 1781 * defers it to aarch64_tr_tb_stop(). That means we need to 1782 * check ss_active so that single-stepping a WFIT doesn't halt. 1783 */ 1784 if (s->ss_active) { 1785 /* Act like a NOP under architectural singlestep */ 1786 return true; 1787 } 1788 1789 gen_a64_update_pc(s, 4); 1790 gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); 1791 /* Go back to the main loop to check for interrupts */ 1792 s->base.is_jmp = DISAS_EXIT; 1793 return true; 1794 } 1795 trans_WFET(DisasContext * s,arg_WFET * a)1796 static bool trans_WFET(DisasContext *s, arg_WFET *a) 1797 { 1798 if (!dc_isar_feature(aa64_wfxt, s)) { 1799 return false; 1800 } 1801 1802 /* 1803 * We rely here on our WFE implementation being a NOP, so we 1804 * don't need to do anything different to handle the WFET timeout 1805 * from what trans_WFE does. 1806 */ 1807 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1808 s->base.is_jmp = DISAS_WFE; 1809 } 1810 return true; 1811 } 1812 trans_XPACLRI(DisasContext * s,arg_XPACLRI * a)1813 static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) 1814 { 1815 if (s->pauth_active) { 1816 gen_helper_xpaci(cpu_X[30], tcg_env, cpu_X[30]); 1817 } 1818 return true; 1819 } 1820 trans_PACIA1716(DisasContext * s,arg_PACIA1716 * a)1821 static bool trans_PACIA1716(DisasContext *s, arg_PACIA1716 *a) 1822 { 1823 if (s->pauth_active) { 1824 gen_helper_pacia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1825 } 1826 return true; 1827 } 1828 trans_PACIB1716(DisasContext * s,arg_PACIB1716 * a)1829 static bool trans_PACIB1716(DisasContext *s, arg_PACIB1716 *a) 1830 { 1831 if (s->pauth_active) { 1832 gen_helper_pacib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1833 } 1834 return true; 1835 } 1836 trans_AUTIA1716(DisasContext * s,arg_AUTIA1716 * a)1837 static bool trans_AUTIA1716(DisasContext *s, arg_AUTIA1716 *a) 1838 { 1839 if (s->pauth_active) { 1840 gen_helper_autia(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1841 } 1842 return true; 1843 } 1844 trans_AUTIB1716(DisasContext * s,arg_AUTIB1716 * a)1845 static bool trans_AUTIB1716(DisasContext *s, arg_AUTIB1716 *a) 1846 { 1847 if (s->pauth_active) { 1848 gen_helper_autib(cpu_X[17], tcg_env, cpu_X[17], cpu_X[16]); 1849 } 1850 return true; 1851 } 1852 trans_ESB(DisasContext * s,arg_ESB * a)1853 static bool trans_ESB(DisasContext *s, arg_ESB *a) 1854 { 1855 /* Without RAS, we must implement this as NOP. */ 1856 if (dc_isar_feature(aa64_ras, s)) { 1857 /* 1858 * QEMU does not have a source of physical SErrors, 1859 * so we are only concerned with virtual SErrors. 1860 * The pseudocode in the ARM for this case is 1861 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1862 * AArch64.vESBOperation(); 1863 * Most of the condition can be evaluated at translation time. 1864 * Test for EL2 present, and defer test for SEL2 to runtime. 1865 */ 1866 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1867 gen_helper_vesb(tcg_env); 1868 } 1869 } 1870 return true; 1871 } 1872 trans_PACIAZ(DisasContext * s,arg_PACIAZ * a)1873 static bool trans_PACIAZ(DisasContext *s, arg_PACIAZ *a) 1874 { 1875 if (s->pauth_active) { 1876 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1877 } 1878 return true; 1879 } 1880 trans_PACIASP(DisasContext * s,arg_PACIASP * a)1881 static bool trans_PACIASP(DisasContext *s, arg_PACIASP *a) 1882 { 1883 if (s->pauth_active) { 1884 gen_helper_pacia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1885 } 1886 return true; 1887 } 1888 trans_PACIBZ(DisasContext * s,arg_PACIBZ * a)1889 static bool trans_PACIBZ(DisasContext *s, arg_PACIBZ *a) 1890 { 1891 if (s->pauth_active) { 1892 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1893 } 1894 return true; 1895 } 1896 trans_PACIBSP(DisasContext * s,arg_PACIBSP * a)1897 static bool trans_PACIBSP(DisasContext *s, arg_PACIBSP *a) 1898 { 1899 if (s->pauth_active) { 1900 gen_helper_pacib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1901 } 1902 return true; 1903 } 1904 trans_AUTIAZ(DisasContext * s,arg_AUTIAZ * a)1905 static bool trans_AUTIAZ(DisasContext *s, arg_AUTIAZ *a) 1906 { 1907 if (s->pauth_active) { 1908 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1909 } 1910 return true; 1911 } 1912 trans_AUTIASP(DisasContext * s,arg_AUTIASP * a)1913 static bool trans_AUTIASP(DisasContext *s, arg_AUTIASP *a) 1914 { 1915 if (s->pauth_active) { 1916 gen_helper_autia(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1917 } 1918 return true; 1919 } 1920 trans_AUTIBZ(DisasContext * s,arg_AUTIBZ * a)1921 static bool trans_AUTIBZ(DisasContext *s, arg_AUTIBZ *a) 1922 { 1923 if (s->pauth_active) { 1924 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], tcg_constant_i64(0)); 1925 } 1926 return true; 1927 } 1928 trans_AUTIBSP(DisasContext * s,arg_AUTIBSP * a)1929 static bool trans_AUTIBSP(DisasContext *s, arg_AUTIBSP *a) 1930 { 1931 if (s->pauth_active) { 1932 gen_helper_autib(cpu_X[30], tcg_env, cpu_X[30], cpu_X[31]); 1933 } 1934 return true; 1935 } 1936 trans_CLREX(DisasContext * s,arg_CLREX * a)1937 static bool trans_CLREX(DisasContext *s, arg_CLREX *a) 1938 { 1939 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1940 return true; 1941 } 1942 trans_DSB_DMB(DisasContext * s,arg_DSB_DMB * a)1943 static bool trans_DSB_DMB(DisasContext *s, arg_DSB_DMB *a) 1944 { 1945 /* We handle DSB and DMB the same way */ 1946 TCGBar bar; 1947 1948 switch (a->types) { 1949 case 1: /* MBReqTypes_Reads */ 1950 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1951 break; 1952 case 2: /* MBReqTypes_Writes */ 1953 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1954 break; 1955 default: /* MBReqTypes_All */ 1956 bar = TCG_BAR_SC | TCG_MO_ALL; 1957 break; 1958 } 1959 tcg_gen_mb(bar); 1960 return true; 1961 } 1962 trans_ISB(DisasContext * s,arg_ISB * a)1963 static bool trans_ISB(DisasContext *s, arg_ISB *a) 1964 { 1965 /* 1966 * We need to break the TB after this insn to execute 1967 * self-modifying code correctly and also to take 1968 * any pending interrupts immediately. 1969 */ 1970 reset_btype(s); 1971 gen_goto_tb(s, 0, 4); 1972 return true; 1973 } 1974 trans_SB(DisasContext * s,arg_SB * a)1975 static bool trans_SB(DisasContext *s, arg_SB *a) 1976 { 1977 if (!dc_isar_feature(aa64_sb, s)) { 1978 return false; 1979 } 1980 /* 1981 * TODO: There is no speculation barrier opcode for TCG; 1982 * MB and end the TB instead. 1983 */ 1984 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1985 gen_goto_tb(s, 0, 4); 1986 return true; 1987 } 1988 trans_CFINV(DisasContext * s,arg_CFINV * a)1989 static bool trans_CFINV(DisasContext *s, arg_CFINV *a) 1990 { 1991 if (!dc_isar_feature(aa64_condm_4, s)) { 1992 return false; 1993 } 1994 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1995 return true; 1996 } 1997 trans_XAFLAG(DisasContext * s,arg_XAFLAG * a)1998 static bool trans_XAFLAG(DisasContext *s, arg_XAFLAG *a) 1999 { 2000 TCGv_i32 z; 2001 2002 if (!dc_isar_feature(aa64_condm_5, s)) { 2003 return false; 2004 } 2005 2006 z = tcg_temp_new_i32(); 2007 2008 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 2009 2010 /* 2011 * (!C & !Z) << 31 2012 * (!(C | Z)) << 31 2013 * ~((C | Z) << 31) 2014 * ~-(C | Z) 2015 * (C | Z) - 1 2016 */ 2017 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 2018 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 2019 2020 /* !(Z & C) */ 2021 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 2022 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 2023 2024 /* (!C & Z) << 31 -> -(Z & ~C) */ 2025 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 2026 tcg_gen_neg_i32(cpu_VF, cpu_VF); 2027 2028 /* C | Z */ 2029 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 2030 2031 return true; 2032 } 2033 trans_AXFLAG(DisasContext * s,arg_AXFLAG * a)2034 static bool trans_AXFLAG(DisasContext *s, arg_AXFLAG *a) 2035 { 2036 if (!dc_isar_feature(aa64_condm_5, s)) { 2037 return false; 2038 } 2039 2040 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 2041 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 2042 2043 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 2044 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 2045 2046 tcg_gen_movi_i32(cpu_NF, 0); 2047 tcg_gen_movi_i32(cpu_VF, 0); 2048 2049 return true; 2050 } 2051 trans_MSR_i_UAO(DisasContext * s,arg_i * a)2052 static bool trans_MSR_i_UAO(DisasContext *s, arg_i *a) 2053 { 2054 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 2055 return false; 2056 } 2057 if (a->imm & 1) { 2058 set_pstate_bits(PSTATE_UAO); 2059 } else { 2060 clear_pstate_bits(PSTATE_UAO); 2061 } 2062 gen_rebuild_hflags(s); 2063 s->base.is_jmp = DISAS_TOO_MANY; 2064 return true; 2065 } 2066 trans_MSR_i_PAN(DisasContext * s,arg_i * a)2067 static bool trans_MSR_i_PAN(DisasContext *s, arg_i *a) 2068 { 2069 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 2070 return false; 2071 } 2072 if (a->imm & 1) { 2073 set_pstate_bits(PSTATE_PAN); 2074 } else { 2075 clear_pstate_bits(PSTATE_PAN); 2076 } 2077 gen_rebuild_hflags(s); 2078 s->base.is_jmp = DISAS_TOO_MANY; 2079 return true; 2080 } 2081 trans_MSR_i_SPSEL(DisasContext * s,arg_i * a)2082 static bool trans_MSR_i_SPSEL(DisasContext *s, arg_i *a) 2083 { 2084 if (s->current_el == 0) { 2085 return false; 2086 } 2087 gen_helper_msr_i_spsel(tcg_env, tcg_constant_i32(a->imm & PSTATE_SP)); 2088 s->base.is_jmp = DISAS_TOO_MANY; 2089 return true; 2090 } 2091 trans_MSR_i_SBSS(DisasContext * s,arg_i * a)2092 static bool trans_MSR_i_SBSS(DisasContext *s, arg_i *a) 2093 { 2094 if (!dc_isar_feature(aa64_ssbs, s)) { 2095 return false; 2096 } 2097 if (a->imm & 1) { 2098 set_pstate_bits(PSTATE_SSBS); 2099 } else { 2100 clear_pstate_bits(PSTATE_SSBS); 2101 } 2102 /* Don't need to rebuild hflags since SSBS is a nop */ 2103 s->base.is_jmp = DISAS_TOO_MANY; 2104 return true; 2105 } 2106 trans_MSR_i_DIT(DisasContext * s,arg_i * a)2107 static bool trans_MSR_i_DIT(DisasContext *s, arg_i *a) 2108 { 2109 if (!dc_isar_feature(aa64_dit, s)) { 2110 return false; 2111 } 2112 if (a->imm & 1) { 2113 set_pstate_bits(PSTATE_DIT); 2114 } else { 2115 clear_pstate_bits(PSTATE_DIT); 2116 } 2117 /* There's no need to rebuild hflags because DIT is a nop */ 2118 s->base.is_jmp = DISAS_TOO_MANY; 2119 return true; 2120 } 2121 trans_MSR_i_TCO(DisasContext * s,arg_i * a)2122 static bool trans_MSR_i_TCO(DisasContext *s, arg_i *a) 2123 { 2124 if (dc_isar_feature(aa64_mte, s)) { 2125 /* Full MTE is enabled -- set the TCO bit as directed. */ 2126 if (a->imm & 1) { 2127 set_pstate_bits(PSTATE_TCO); 2128 } else { 2129 clear_pstate_bits(PSTATE_TCO); 2130 } 2131 gen_rebuild_hflags(s); 2132 /* Many factors, including TCO, go into MTE_ACTIVE. */ 2133 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2134 return true; 2135 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 2136 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 2137 return true; 2138 } else { 2139 /* Insn not present */ 2140 return false; 2141 } 2142 } 2143 trans_MSR_i_DAIFSET(DisasContext * s,arg_i * a)2144 static bool trans_MSR_i_DAIFSET(DisasContext *s, arg_i *a) 2145 { 2146 gen_helper_msr_i_daifset(tcg_env, tcg_constant_i32(a->imm)); 2147 s->base.is_jmp = DISAS_TOO_MANY; 2148 return true; 2149 } 2150 trans_MSR_i_DAIFCLEAR(DisasContext * s,arg_i * a)2151 static bool trans_MSR_i_DAIFCLEAR(DisasContext *s, arg_i *a) 2152 { 2153 gen_helper_msr_i_daifclear(tcg_env, tcg_constant_i32(a->imm)); 2154 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2155 s->base.is_jmp = DISAS_UPDATE_EXIT; 2156 return true; 2157 } 2158 trans_MSR_i_ALLINT(DisasContext * s,arg_i * a)2159 static bool trans_MSR_i_ALLINT(DisasContext *s, arg_i *a) 2160 { 2161 if (!dc_isar_feature(aa64_nmi, s) || s->current_el == 0) { 2162 return false; 2163 } 2164 2165 if (a->imm == 0) { 2166 clear_pstate_bits(PSTATE_ALLINT); 2167 } else if (s->current_el > 1) { 2168 set_pstate_bits(PSTATE_ALLINT); 2169 } else { 2170 gen_helper_msr_set_allint_el1(tcg_env); 2171 } 2172 2173 /* Exit the cpu loop to re-evaluate pending IRQs. */ 2174 s->base.is_jmp = DISAS_UPDATE_EXIT; 2175 return true; 2176 } 2177 trans_MSR_i_SVCR(DisasContext * s,arg_MSR_i_SVCR * a)2178 static bool trans_MSR_i_SVCR(DisasContext *s, arg_MSR_i_SVCR *a) 2179 { 2180 if (!dc_isar_feature(aa64_sme, s) || a->mask == 0) { 2181 return false; 2182 } 2183 if (sme_access_check(s)) { 2184 int old = s->pstate_sm | (s->pstate_za << 1); 2185 int new = a->imm * 3; 2186 2187 if ((old ^ new) & a->mask) { 2188 /* At least one bit changes. */ 2189 gen_helper_set_svcr(tcg_env, tcg_constant_i32(new), 2190 tcg_constant_i32(a->mask)); 2191 s->base.is_jmp = DISAS_TOO_MANY; 2192 } 2193 } 2194 return true; 2195 } 2196 gen_get_nzcv(TCGv_i64 tcg_rt)2197 static void gen_get_nzcv(TCGv_i64 tcg_rt) 2198 { 2199 TCGv_i32 tmp = tcg_temp_new_i32(); 2200 TCGv_i32 nzcv = tcg_temp_new_i32(); 2201 2202 /* build bit 31, N */ 2203 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 2204 /* build bit 30, Z */ 2205 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 2206 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 2207 /* build bit 29, C */ 2208 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 2209 /* build bit 28, V */ 2210 tcg_gen_shri_i32(tmp, cpu_VF, 31); 2211 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 2212 /* generate result */ 2213 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 2214 } 2215 gen_set_nzcv(TCGv_i64 tcg_rt)2216 static void gen_set_nzcv(TCGv_i64 tcg_rt) 2217 { 2218 TCGv_i32 nzcv = tcg_temp_new_i32(); 2219 2220 /* take NZCV from R[t] */ 2221 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 2222 2223 /* bit 31, N */ 2224 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 2225 /* bit 30, Z */ 2226 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 2227 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 2228 /* bit 29, C */ 2229 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 2230 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 2231 /* bit 28, V */ 2232 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 2233 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 2234 } 2235 gen_sysreg_undef(DisasContext * s,bool isread,uint8_t op0,uint8_t op1,uint8_t op2,uint8_t crn,uint8_t crm,uint8_t rt)2236 static void gen_sysreg_undef(DisasContext *s, bool isread, 2237 uint8_t op0, uint8_t op1, uint8_t op2, 2238 uint8_t crn, uint8_t crm, uint8_t rt) 2239 { 2240 /* 2241 * Generate code to emit an UNDEF with correct syndrome 2242 * information for a failed system register access. 2243 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 2244 * but if FEAT_IDST is implemented then read accesses to registers 2245 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 2246 * syndrome. 2247 */ 2248 uint32_t syndrome; 2249 2250 if (isread && dc_isar_feature(aa64_ids, s) && 2251 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 2252 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2253 } else { 2254 syndrome = syn_uncategorized(); 2255 } 2256 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 2257 } 2258 2259 /* MRS - move from system register 2260 * MSR (register) - move to system register 2261 * SYS 2262 * SYSL 2263 * These are all essentially the same insn in 'read' and 'write' 2264 * versions, with varying op0 fields. 2265 */ handle_sys(DisasContext * s,bool isread,unsigned int op0,unsigned int op1,unsigned int op2,unsigned int crn,unsigned int crm,unsigned int rt)2266 static void handle_sys(DisasContext *s, bool isread, 2267 unsigned int op0, unsigned int op1, unsigned int op2, 2268 unsigned int crn, unsigned int crm, unsigned int rt) 2269 { 2270 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2271 crn, crm, op0, op1, op2); 2272 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 2273 bool need_exit_tb = false; 2274 bool nv_trap_to_el2 = false; 2275 bool nv_redirect_reg = false; 2276 bool skip_fp_access_checks = false; 2277 bool nv2_mem_redirect = false; 2278 TCGv_ptr tcg_ri = NULL; 2279 TCGv_i64 tcg_rt; 2280 uint32_t syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 2281 2282 if (crn == 11 || crn == 15) { 2283 /* 2284 * Check for TIDCP trap, which must take precedence over 2285 * the UNDEF for "no such register" etc. 2286 */ 2287 switch (s->current_el) { 2288 case 0: 2289 if (dc_isar_feature(aa64_tidcp1, s)) { 2290 gen_helper_tidcp_el0(tcg_env, tcg_constant_i32(syndrome)); 2291 } 2292 break; 2293 case 1: 2294 gen_helper_tidcp_el1(tcg_env, tcg_constant_i32(syndrome)); 2295 break; 2296 } 2297 } 2298 2299 if (!ri) { 2300 /* Unknown register; this might be a guest error or a QEMU 2301 * unimplemented feature. 2302 */ 2303 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 2304 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 2305 isread ? "read" : "write", op0, op1, crn, crm, op2); 2306 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2307 return; 2308 } 2309 2310 if (s->nv2 && ri->nv2_redirect_offset) { 2311 /* 2312 * Some registers always redirect to memory; some only do so if 2313 * HCR_EL2.NV1 is 0, and some only if NV1 is 1 (these come in 2314 * pairs which share an offset; see the table in R_CSRPQ). 2315 */ 2316 if (ri->nv2_redirect_offset & NV2_REDIR_NV1) { 2317 nv2_mem_redirect = s->nv1; 2318 } else if (ri->nv2_redirect_offset & NV2_REDIR_NO_NV1) { 2319 nv2_mem_redirect = !s->nv1; 2320 } else { 2321 nv2_mem_redirect = true; 2322 } 2323 } 2324 2325 /* Check access permissions */ 2326 if (!cp_access_ok(s->current_el, ri, isread)) { 2327 /* 2328 * FEAT_NV/NV2 handling does not do the usual FP access checks 2329 * for registers only accessible at EL2 (though it *does* do them 2330 * for registers accessible at EL1). 2331 */ 2332 skip_fp_access_checks = true; 2333 if (s->nv2 && (ri->type & ARM_CP_NV2_REDIRECT)) { 2334 /* 2335 * This is one of the few EL2 registers which should redirect 2336 * to the equivalent EL1 register. We do that after running 2337 * the EL2 register's accessfn. 2338 */ 2339 nv_redirect_reg = true; 2340 assert(!nv2_mem_redirect); 2341 } else if (nv2_mem_redirect) { 2342 /* 2343 * NV2 redirect-to-memory takes precedence over trap to EL2 or 2344 * UNDEF to EL1. 2345 */ 2346 } else if (s->nv && arm_cpreg_traps_in_nv(ri)) { 2347 /* 2348 * This register / instruction exists and is an EL2 register, so 2349 * we must trap to EL2 if accessed in nested virtualization EL1 2350 * instead of UNDEFing. We'll do that after the usual access checks. 2351 * (This makes a difference only for a couple of registers like 2352 * VSTTBR_EL2 where the "UNDEF if NonSecure" should take priority 2353 * over the trap-to-EL2. Most trapped-by-FEAT_NV registers have 2354 * an accessfn which does nothing when called from EL1, because 2355 * the trap-to-EL3 controls which would apply to that register 2356 * at EL2 don't take priority over the FEAT_NV trap-to-EL2.) 2357 */ 2358 nv_trap_to_el2 = true; 2359 } else { 2360 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 2361 return; 2362 } 2363 } 2364 2365 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 2366 /* Emit code to perform further access permissions checks at 2367 * runtime; this may result in an exception. 2368 */ 2369 gen_a64_update_pc(s, 0); 2370 tcg_ri = tcg_temp_new_ptr(); 2371 gen_helper_access_check_cp_reg(tcg_ri, tcg_env, 2372 tcg_constant_i32(key), 2373 tcg_constant_i32(syndrome), 2374 tcg_constant_i32(isread)); 2375 } else if (ri->type & ARM_CP_RAISES_EXC) { 2376 /* 2377 * The readfn or writefn might raise an exception; 2378 * synchronize the CPU state in case it does. 2379 */ 2380 gen_a64_update_pc(s, 0); 2381 } 2382 2383 if (!skip_fp_access_checks) { 2384 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2385 return; 2386 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2387 return; 2388 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2389 return; 2390 } 2391 } 2392 2393 if (nv_trap_to_el2) { 2394 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2395 return; 2396 } 2397 2398 if (nv_redirect_reg) { 2399 /* 2400 * FEAT_NV2 redirection of an EL2 register to an EL1 register. 2401 * Conveniently in all cases the encoding of the EL1 register is 2402 * identical to the EL2 register except that opc1 is 0. 2403 * Get the reginfo for the EL1 register to use for the actual access. 2404 * We don't use the EL1 register's access function, and 2405 * fine-grained-traps on EL1 also do not apply here. 2406 */ 2407 key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 2408 crn, crm, op0, 0, op2); 2409 ri = get_arm_cp_reginfo(s->cp_regs, key); 2410 assert(ri); 2411 assert(cp_access_ok(s->current_el, ri, isread)); 2412 /* 2413 * We might not have done an update_pc earlier, so check we don't 2414 * need it. We could support this in future if necessary. 2415 */ 2416 assert(!(ri->type & ARM_CP_RAISES_EXC)); 2417 } 2418 2419 if (nv2_mem_redirect) { 2420 /* 2421 * This system register is being redirected into an EL2 memory access. 2422 * This means it is not an IO operation, doesn't change hflags, 2423 * and need not end the TB, because it has no side effects. 2424 * 2425 * The access is 64-bit single copy atomic, guaranteed aligned because 2426 * of the definition of VCNR_EL2. Its endianness depends on 2427 * SCTLR_EL2.EE, not on the data endianness of EL1. 2428 * It is done under either the EL2 translation regime or the EL2&0 2429 * translation regime, depending on HCR_EL2.E2H. It behaves as if 2430 * PSTATE.PAN is 0. 2431 */ 2432 TCGv_i64 ptr = tcg_temp_new_i64(); 2433 MemOp mop = MO_64 | MO_ALIGN | MO_ATOM_IFALIGN; 2434 ARMMMUIdx armmemidx = s->nv2_mem_e20 ? ARMMMUIdx_E20_2 : ARMMMUIdx_E2; 2435 int memidx = arm_to_core_mmu_idx(armmemidx); 2436 uint32_t syn; 2437 2438 mop |= (s->nv2_mem_be ? MO_BE : MO_LE); 2439 2440 tcg_gen_ld_i64(ptr, tcg_env, offsetof(CPUARMState, cp15.vncr_el2)); 2441 tcg_gen_addi_i64(ptr, ptr, 2442 (ri->nv2_redirect_offset & ~NV2_REDIR_FLAG_MASK)); 2443 tcg_rt = cpu_reg(s, rt); 2444 2445 syn = syn_data_abort_vncr(0, !isread, 0); 2446 disas_set_insn_syndrome(s, syn); 2447 if (isread) { 2448 tcg_gen_qemu_ld_i64(tcg_rt, ptr, memidx, mop); 2449 } else { 2450 tcg_gen_qemu_st_i64(tcg_rt, ptr, memidx, mop); 2451 } 2452 return; 2453 } 2454 2455 /* Handle special cases first */ 2456 switch (ri->type & ARM_CP_SPECIAL_MASK) { 2457 case 0: 2458 break; 2459 case ARM_CP_NOP: 2460 return; 2461 case ARM_CP_NZCV: 2462 tcg_rt = cpu_reg(s, rt); 2463 if (isread) { 2464 gen_get_nzcv(tcg_rt); 2465 } else { 2466 gen_set_nzcv(tcg_rt); 2467 } 2468 return; 2469 case ARM_CP_CURRENTEL: 2470 { 2471 /* 2472 * Reads as current EL value from pstate, which is 2473 * guaranteed to be constant by the tb flags. 2474 * For nested virt we should report EL2. 2475 */ 2476 int el = s->nv ? 2 : s->current_el; 2477 tcg_rt = cpu_reg(s, rt); 2478 tcg_gen_movi_i64(tcg_rt, el << 2); 2479 return; 2480 } 2481 case ARM_CP_DC_ZVA: 2482 /* Writes clear the aligned block of memory which rt points into. */ 2483 if (s->mte_active[0]) { 2484 int desc = 0; 2485 2486 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2487 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2488 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2489 2490 tcg_rt = tcg_temp_new_i64(); 2491 gen_helper_mte_check_zva(tcg_rt, tcg_env, 2492 tcg_constant_i32(desc), cpu_reg(s, rt)); 2493 } else { 2494 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2495 } 2496 gen_helper_dc_zva(tcg_env, tcg_rt); 2497 return; 2498 case ARM_CP_DC_GVA: 2499 { 2500 TCGv_i64 clean_addr, tag; 2501 2502 /* 2503 * DC_GVA, like DC_ZVA, requires that we supply the original 2504 * pointer for an invalid page. Probe that address first. 2505 */ 2506 tcg_rt = cpu_reg(s, rt); 2507 clean_addr = clean_data_tbi(s, tcg_rt); 2508 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2509 2510 if (s->ata[0]) { 2511 /* Extract the tag from the register to match STZGM. */ 2512 tag = tcg_temp_new_i64(); 2513 tcg_gen_shri_i64(tag, tcg_rt, 56); 2514 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2515 } 2516 } 2517 return; 2518 case ARM_CP_DC_GZVA: 2519 { 2520 TCGv_i64 clean_addr, tag; 2521 2522 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2523 tcg_rt = cpu_reg(s, rt); 2524 clean_addr = clean_data_tbi(s, tcg_rt); 2525 gen_helper_dc_zva(tcg_env, clean_addr); 2526 2527 if (s->ata[0]) { 2528 /* Extract the tag from the register to match STZGM. */ 2529 tag = tcg_temp_new_i64(); 2530 tcg_gen_shri_i64(tag, tcg_rt, 56); 2531 gen_helper_stzgm_tags(tcg_env, clean_addr, tag); 2532 } 2533 } 2534 return; 2535 default: 2536 g_assert_not_reached(); 2537 } 2538 2539 if (ri->type & ARM_CP_IO) { 2540 /* I/O operations must end the TB here (whether read or write) */ 2541 need_exit_tb = translator_io_start(&s->base); 2542 } 2543 2544 tcg_rt = cpu_reg(s, rt); 2545 2546 if (isread) { 2547 if (ri->type & ARM_CP_CONST) { 2548 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2549 } else if (ri->readfn) { 2550 if (!tcg_ri) { 2551 tcg_ri = gen_lookup_cp_reg(key); 2552 } 2553 gen_helper_get_cp_reg64(tcg_rt, tcg_env, tcg_ri); 2554 } else { 2555 tcg_gen_ld_i64(tcg_rt, tcg_env, ri->fieldoffset); 2556 } 2557 } else { 2558 if (ri->type & ARM_CP_CONST) { 2559 /* If not forbidden by access permissions, treat as WI */ 2560 return; 2561 } else if (ri->writefn) { 2562 if (!tcg_ri) { 2563 tcg_ri = gen_lookup_cp_reg(key); 2564 } 2565 gen_helper_set_cp_reg64(tcg_env, tcg_ri, tcg_rt); 2566 } else { 2567 tcg_gen_st_i64(tcg_rt, tcg_env, ri->fieldoffset); 2568 } 2569 } 2570 2571 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2572 /* 2573 * A write to any coprocessor register that ends a TB 2574 * must rebuild the hflags for the next TB. 2575 */ 2576 gen_rebuild_hflags(s); 2577 /* 2578 * We default to ending the TB on a coprocessor register write, 2579 * but allow this to be suppressed by the register definition 2580 * (usually only necessary to work around guest bugs). 2581 */ 2582 need_exit_tb = true; 2583 } 2584 if (need_exit_tb) { 2585 s->base.is_jmp = DISAS_UPDATE_EXIT; 2586 } 2587 } 2588 trans_SYS(DisasContext * s,arg_SYS * a)2589 static bool trans_SYS(DisasContext *s, arg_SYS *a) 2590 { 2591 handle_sys(s, a->l, a->op0, a->op1, a->op2, a->crn, a->crm, a->rt); 2592 return true; 2593 } 2594 trans_SVC(DisasContext * s,arg_i * a)2595 static bool trans_SVC(DisasContext *s, arg_i *a) 2596 { 2597 /* 2598 * For SVC, HVC and SMC we advance the single-step state 2599 * machine before taking the exception. This is architecturally 2600 * mandated, to ensure that single-stepping a system call 2601 * instruction works properly. 2602 */ 2603 uint32_t syndrome = syn_aa64_svc(a->imm); 2604 if (s->fgt_svc) { 2605 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2606 return true; 2607 } 2608 gen_ss_advance(s); 2609 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2610 return true; 2611 } 2612 trans_HVC(DisasContext * s,arg_i * a)2613 static bool trans_HVC(DisasContext *s, arg_i *a) 2614 { 2615 int target_el = s->current_el == 3 ? 3 : 2; 2616 2617 if (s->current_el == 0) { 2618 unallocated_encoding(s); 2619 return true; 2620 } 2621 /* 2622 * The pre HVC helper handles cases when HVC gets trapped 2623 * as an undefined insn by runtime configuration. 2624 */ 2625 gen_a64_update_pc(s, 0); 2626 gen_helper_pre_hvc(tcg_env); 2627 /* Architecture requires ss advance before we do the actual work */ 2628 gen_ss_advance(s); 2629 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(a->imm), target_el); 2630 return true; 2631 } 2632 trans_SMC(DisasContext * s,arg_i * a)2633 static bool trans_SMC(DisasContext *s, arg_i *a) 2634 { 2635 if (s->current_el == 0) { 2636 unallocated_encoding(s); 2637 return true; 2638 } 2639 gen_a64_update_pc(s, 0); 2640 gen_helper_pre_smc(tcg_env, tcg_constant_i32(syn_aa64_smc(a->imm))); 2641 /* Architecture requires ss advance before we do the actual work */ 2642 gen_ss_advance(s); 2643 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(a->imm), 3); 2644 return true; 2645 } 2646 trans_BRK(DisasContext * s,arg_i * a)2647 static bool trans_BRK(DisasContext *s, arg_i *a) 2648 { 2649 gen_exception_bkpt_insn(s, syn_aa64_bkpt(a->imm)); 2650 return true; 2651 } 2652 trans_HLT(DisasContext * s,arg_i * a)2653 static bool trans_HLT(DisasContext *s, arg_i *a) 2654 { 2655 /* 2656 * HLT. This has two purposes. 2657 * Architecturally, it is an external halting debug instruction. 2658 * Since QEMU doesn't implement external debug, we treat this as 2659 * it is required for halting debug disabled: it will UNDEF. 2660 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2661 */ 2662 if (semihosting_enabled(s->current_el == 0) && a->imm == 0xf000) { 2663 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2664 } else { 2665 unallocated_encoding(s); 2666 } 2667 return true; 2668 } 2669 2670 /* 2671 * Load/Store exclusive instructions are implemented by remembering 2672 * the value/address loaded, and seeing if these are the same 2673 * when the store is performed. This is not actually the architecturally 2674 * mandated semantics, but it works for typical guest code sequences 2675 * and avoids having to monitor regular stores. 2676 * 2677 * The store exclusive uses the atomic cmpxchg primitives to avoid 2678 * races in multi-threaded linux-user and when MTTCG softmmu is 2679 * enabled. 2680 */ gen_load_exclusive(DisasContext * s,int rt,int rt2,int rn,int size,bool is_pair)2681 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn, 2682 int size, bool is_pair) 2683 { 2684 int idx = get_mem_index(s); 2685 TCGv_i64 dirty_addr, clean_addr; 2686 MemOp memop = check_atomic_align(s, rn, size + is_pair); 2687 2688 s->is_ldex = true; 2689 dirty_addr = cpu_reg_sp(s, rn); 2690 clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop); 2691 2692 g_assert(size <= 3); 2693 if (is_pair) { 2694 g_assert(size >= 2); 2695 if (size == 2) { 2696 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2697 if (s->be_data == MO_LE) { 2698 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2699 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2700 } else { 2701 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2702 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2703 } 2704 } else { 2705 TCGv_i128 t16 = tcg_temp_new_i128(); 2706 2707 tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop); 2708 2709 if (s->be_data == MO_LE) { 2710 tcg_gen_extr_i128_i64(cpu_exclusive_val, 2711 cpu_exclusive_high, t16); 2712 } else { 2713 tcg_gen_extr_i128_i64(cpu_exclusive_high, 2714 cpu_exclusive_val, t16); 2715 } 2716 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2717 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2718 } 2719 } else { 2720 tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop); 2721 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2722 } 2723 tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr); 2724 } 2725 gen_store_exclusive(DisasContext * s,int rd,int rt,int rt2,int rn,int size,int is_pair)2726 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2727 int rn, int size, int is_pair) 2728 { 2729 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2730 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2731 * [addr] = {Rt}; 2732 * if (is_pair) { 2733 * [addr + datasize] = {Rt2}; 2734 * } 2735 * {Rd} = 0; 2736 * } else { 2737 * {Rd} = 1; 2738 * } 2739 * env->exclusive_addr = -1; 2740 */ 2741 TCGLabel *fail_label = gen_new_label(); 2742 TCGLabel *done_label = gen_new_label(); 2743 TCGv_i64 tmp, clean_addr; 2744 MemOp memop; 2745 2746 /* 2747 * FIXME: We are out of spec here. We have recorded only the address 2748 * from load_exclusive, not the entire range, and we assume that the 2749 * size of the access on both sides match. The architecture allows the 2750 * store to be smaller than the load, so long as the stored bytes are 2751 * within the range recorded by the load. 2752 */ 2753 2754 /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */ 2755 clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); 2756 tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label); 2757 2758 /* 2759 * The write, and any associated faults, only happen if the virtual 2760 * and physical addresses pass the exclusive monitor check. These 2761 * faults are exceedingly unlikely, because normally the guest uses 2762 * the exact same address register for the load_exclusive, and we 2763 * would have recognized these faults there. 2764 * 2765 * It is possible to trigger an alignment fault pre-LSE2, e.g. with an 2766 * unaligned 4-byte write within the range of an aligned 8-byte load. 2767 * With LSE2, the store would need to cross a 16-byte boundary when the 2768 * load did not, which would mean the store is outside the range 2769 * recorded for the monitor, which would have failed a corrected monitor 2770 * check above. For now, we assume no size change and retain the 2771 * MO_ALIGN to let tcg know what we checked in the load_exclusive. 2772 * 2773 * It is possible to trigger an MTE fault, by performing the load with 2774 * a virtual address with a valid tag and performing the store with the 2775 * same virtual address and a different invalid tag. 2776 */ 2777 memop = size + is_pair; 2778 if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) { 2779 memop |= MO_ALIGN; 2780 } 2781 memop = finalize_memop(s, memop); 2782 gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2783 2784 tmp = tcg_temp_new_i64(); 2785 if (is_pair) { 2786 if (size == 2) { 2787 if (s->be_data == MO_LE) { 2788 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2789 } else { 2790 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2791 } 2792 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2793 cpu_exclusive_val, tmp, 2794 get_mem_index(s), memop); 2795 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2796 } else { 2797 TCGv_i128 t16 = tcg_temp_new_i128(); 2798 TCGv_i128 c16 = tcg_temp_new_i128(); 2799 TCGv_i64 a, b; 2800 2801 if (s->be_data == MO_LE) { 2802 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2803 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2804 cpu_exclusive_high); 2805 } else { 2806 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2807 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2808 cpu_exclusive_val); 2809 } 2810 2811 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2812 get_mem_index(s), memop); 2813 2814 a = tcg_temp_new_i64(); 2815 b = tcg_temp_new_i64(); 2816 if (s->be_data == MO_LE) { 2817 tcg_gen_extr_i128_i64(a, b, t16); 2818 } else { 2819 tcg_gen_extr_i128_i64(b, a, t16); 2820 } 2821 2822 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2823 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2824 tcg_gen_or_i64(tmp, a, b); 2825 2826 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2827 } 2828 } else { 2829 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2830 cpu_reg(s, rt), get_mem_index(s), memop); 2831 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2832 } 2833 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2834 tcg_gen_br(done_label); 2835 2836 gen_set_label(fail_label); 2837 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2838 gen_set_label(done_label); 2839 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2840 } 2841 gen_compare_and_swap(DisasContext * s,int rs,int rt,int rn,int size)2842 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2843 int rn, int size) 2844 { 2845 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2846 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2847 int memidx = get_mem_index(s); 2848 TCGv_i64 clean_addr; 2849 MemOp memop; 2850 2851 if (rn == 31) { 2852 gen_check_sp_alignment(s); 2853 } 2854 memop = check_atomic_align(s, rn, size); 2855 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2856 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, 2857 memidx, memop); 2858 } 2859 gen_compare_and_swap_pair(DisasContext * s,int rs,int rt,int rn,int size)2860 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2861 int rn, int size) 2862 { 2863 TCGv_i64 s1 = cpu_reg(s, rs); 2864 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2865 TCGv_i64 t1 = cpu_reg(s, rt); 2866 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2867 TCGv_i64 clean_addr; 2868 int memidx = get_mem_index(s); 2869 MemOp memop; 2870 2871 if (rn == 31) { 2872 gen_check_sp_alignment(s); 2873 } 2874 2875 /* This is a single atomic access, despite the "pair". */ 2876 memop = check_atomic_align(s, rn, size + 1); 2877 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop); 2878 2879 if (size == 2) { 2880 TCGv_i64 cmp = tcg_temp_new_i64(); 2881 TCGv_i64 val = tcg_temp_new_i64(); 2882 2883 if (s->be_data == MO_LE) { 2884 tcg_gen_concat32_i64(val, t1, t2); 2885 tcg_gen_concat32_i64(cmp, s1, s2); 2886 } else { 2887 tcg_gen_concat32_i64(val, t2, t1); 2888 tcg_gen_concat32_i64(cmp, s2, s1); 2889 } 2890 2891 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop); 2892 2893 if (s->be_data == MO_LE) { 2894 tcg_gen_extr32_i64(s1, s2, cmp); 2895 } else { 2896 tcg_gen_extr32_i64(s2, s1, cmp); 2897 } 2898 } else { 2899 TCGv_i128 cmp = tcg_temp_new_i128(); 2900 TCGv_i128 val = tcg_temp_new_i128(); 2901 2902 if (s->be_data == MO_LE) { 2903 tcg_gen_concat_i64_i128(val, t1, t2); 2904 tcg_gen_concat_i64_i128(cmp, s1, s2); 2905 } else { 2906 tcg_gen_concat_i64_i128(val, t2, t1); 2907 tcg_gen_concat_i64_i128(cmp, s2, s1); 2908 } 2909 2910 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop); 2911 2912 if (s->be_data == MO_LE) { 2913 tcg_gen_extr_i128_i64(s1, s2, cmp); 2914 } else { 2915 tcg_gen_extr_i128_i64(s2, s1, cmp); 2916 } 2917 } 2918 } 2919 2920 /* 2921 * Compute the ISS.SF bit for syndrome information if an exception 2922 * is taken on a load or store. This indicates whether the instruction 2923 * is accessing a 32-bit or 64-bit register. This logic is derived 2924 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2925 */ ldst_iss_sf(int size,bool sign,bool ext)2926 static bool ldst_iss_sf(int size, bool sign, bool ext) 2927 { 2928 2929 if (sign) { 2930 /* 2931 * Signed loads are 64 bit results if we are not going to 2932 * do a zero-extend from 32 to 64 after the load. 2933 * (For a store, sign and ext are always false.) 2934 */ 2935 return !ext; 2936 } else { 2937 /* Unsigned loads/stores work at the specified size */ 2938 return size == MO_64; 2939 } 2940 } 2941 trans_STXR(DisasContext * s,arg_stxr * a)2942 static bool trans_STXR(DisasContext *s, arg_stxr *a) 2943 { 2944 if (a->rn == 31) { 2945 gen_check_sp_alignment(s); 2946 } 2947 if (a->lasr) { 2948 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2949 } 2950 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, false); 2951 return true; 2952 } 2953 trans_LDXR(DisasContext * s,arg_stxr * a)2954 static bool trans_LDXR(DisasContext *s, arg_stxr *a) 2955 { 2956 if (a->rn == 31) { 2957 gen_check_sp_alignment(s); 2958 } 2959 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, false); 2960 if (a->lasr) { 2961 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2962 } 2963 return true; 2964 } 2965 trans_STLR(DisasContext * s,arg_stlr * a)2966 static bool trans_STLR(DisasContext *s, arg_stlr *a) 2967 { 2968 TCGv_i64 clean_addr; 2969 MemOp memop; 2970 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2971 2972 /* 2973 * StoreLORelease is the same as Store-Release for QEMU, but 2974 * needs the feature-test. 2975 */ 2976 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 2977 return false; 2978 } 2979 /* Generate ISS for non-exclusive accesses including LASR. */ 2980 if (a->rn == 31) { 2981 gen_check_sp_alignment(s); 2982 } 2983 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2984 memop = check_ordered_align(s, a->rn, 0, true, a->sz); 2985 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 2986 true, a->rn != 31, memop); 2987 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, memop, true, a->rt, 2988 iss_sf, a->lasr); 2989 return true; 2990 } 2991 trans_LDAR(DisasContext * s,arg_stlr * a)2992 static bool trans_LDAR(DisasContext *s, arg_stlr *a) 2993 { 2994 TCGv_i64 clean_addr; 2995 MemOp memop; 2996 bool iss_sf = ldst_iss_sf(a->sz, false, false); 2997 2998 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2999 if (!a->lasr && !dc_isar_feature(aa64_lor, s)) { 3000 return false; 3001 } 3002 /* Generate ISS for non-exclusive accesses including LASR. */ 3003 if (a->rn == 31) { 3004 gen_check_sp_alignment(s); 3005 } 3006 memop = check_ordered_align(s, a->rn, 0, false, a->sz); 3007 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), 3008 false, a->rn != 31, memop); 3009 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, memop, false, true, 3010 a->rt, iss_sf, a->lasr); 3011 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3012 return true; 3013 } 3014 trans_STXP(DisasContext * s,arg_stxr * a)3015 static bool trans_STXP(DisasContext *s, arg_stxr *a) 3016 { 3017 if (a->rn == 31) { 3018 gen_check_sp_alignment(s); 3019 } 3020 if (a->lasr) { 3021 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3022 } 3023 gen_store_exclusive(s, a->rs, a->rt, a->rt2, a->rn, a->sz, true); 3024 return true; 3025 } 3026 trans_LDXP(DisasContext * s,arg_stxr * a)3027 static bool trans_LDXP(DisasContext *s, arg_stxr *a) 3028 { 3029 if (a->rn == 31) { 3030 gen_check_sp_alignment(s); 3031 } 3032 gen_load_exclusive(s, a->rt, a->rt2, a->rn, a->sz, true); 3033 if (a->lasr) { 3034 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3035 } 3036 return true; 3037 } 3038 trans_CASP(DisasContext * s,arg_CASP * a)3039 static bool trans_CASP(DisasContext *s, arg_CASP *a) 3040 { 3041 if (!dc_isar_feature(aa64_atomics, s)) { 3042 return false; 3043 } 3044 if (((a->rt | a->rs) & 1) != 0) { 3045 return false; 3046 } 3047 3048 gen_compare_and_swap_pair(s, a->rs, a->rt, a->rn, a->sz); 3049 return true; 3050 } 3051 trans_CAS(DisasContext * s,arg_CAS * a)3052 static bool trans_CAS(DisasContext *s, arg_CAS *a) 3053 { 3054 if (!dc_isar_feature(aa64_atomics, s)) { 3055 return false; 3056 } 3057 gen_compare_and_swap(s, a->rs, a->rt, a->rn, a->sz); 3058 return true; 3059 } 3060 trans_LD_lit(DisasContext * s,arg_ldlit * a)3061 static bool trans_LD_lit(DisasContext *s, arg_ldlit *a) 3062 { 3063 bool iss_sf = ldst_iss_sf(a->sz, a->sign, false); 3064 TCGv_i64 tcg_rt = cpu_reg(s, a->rt); 3065 TCGv_i64 clean_addr = tcg_temp_new_i64(); 3066 MemOp memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3067 3068 gen_pc_plus_diff(s, clean_addr, a->imm); 3069 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3070 false, true, a->rt, iss_sf, false); 3071 return true; 3072 } 3073 trans_LD_lit_v(DisasContext * s,arg_ldlit * a)3074 static bool trans_LD_lit_v(DisasContext *s, arg_ldlit *a) 3075 { 3076 /* Load register (literal), vector version */ 3077 TCGv_i64 clean_addr; 3078 MemOp memop; 3079 3080 if (!fp_access_check(s)) { 3081 return true; 3082 } 3083 memop = finalize_memop_asimd(s, a->sz); 3084 clean_addr = tcg_temp_new_i64(); 3085 gen_pc_plus_diff(s, clean_addr, a->imm); 3086 do_fp_ld(s, a->rt, clean_addr, memop); 3087 return true; 3088 } 3089 op_addr_ldstpair_pre(DisasContext * s,arg_ldstpair * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3090 static void op_addr_ldstpair_pre(DisasContext *s, arg_ldstpair *a, 3091 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3092 uint64_t offset, bool is_store, MemOp mop) 3093 { 3094 if (a->rn == 31) { 3095 gen_check_sp_alignment(s); 3096 } 3097 3098 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3099 if (!a->p) { 3100 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3101 } 3102 3103 *clean_addr = gen_mte_checkN(s, *dirty_addr, is_store, 3104 (a->w || a->rn != 31), 2 << a->sz, mop); 3105 } 3106 op_addr_ldstpair_post(DisasContext * s,arg_ldstpair * a,TCGv_i64 dirty_addr,uint64_t offset)3107 static void op_addr_ldstpair_post(DisasContext *s, arg_ldstpair *a, 3108 TCGv_i64 dirty_addr, uint64_t offset) 3109 { 3110 if (a->w) { 3111 if (a->p) { 3112 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3113 } 3114 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3115 } 3116 } 3117 trans_STP(DisasContext * s,arg_ldstpair * a)3118 static bool trans_STP(DisasContext *s, arg_ldstpair *a) 3119 { 3120 uint64_t offset = a->imm << a->sz; 3121 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3122 MemOp mop = finalize_memop(s, a->sz); 3123 3124 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3125 tcg_rt = cpu_reg(s, a->rt); 3126 tcg_rt2 = cpu_reg(s, a->rt2); 3127 /* 3128 * We built mop above for the single logical access -- rebuild it 3129 * now for the paired operation. 3130 * 3131 * With LSE2, non-sign-extending pairs are treated atomically if 3132 * aligned, and if unaligned one of the pair will be completely 3133 * within a 16-byte block and that element will be atomic. 3134 * Otherwise each element is separately atomic. 3135 * In all cases, issue one operation with the correct atomicity. 3136 */ 3137 mop = a->sz + 1; 3138 if (s->align_mem) { 3139 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3140 } 3141 mop = finalize_memop_pair(s, mop); 3142 if (a->sz == 2) { 3143 TCGv_i64 tmp = tcg_temp_new_i64(); 3144 3145 if (s->be_data == MO_LE) { 3146 tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2); 3147 } else { 3148 tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt); 3149 } 3150 tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop); 3151 } else { 3152 TCGv_i128 tmp = tcg_temp_new_i128(); 3153 3154 if (s->be_data == MO_LE) { 3155 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3156 } else { 3157 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3158 } 3159 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3160 } 3161 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3162 return true; 3163 } 3164 trans_LDP(DisasContext * s,arg_ldstpair * a)3165 static bool trans_LDP(DisasContext *s, arg_ldstpair *a) 3166 { 3167 uint64_t offset = a->imm << a->sz; 3168 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3169 MemOp mop = finalize_memop(s, a->sz); 3170 3171 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3172 tcg_rt = cpu_reg(s, a->rt); 3173 tcg_rt2 = cpu_reg(s, a->rt2); 3174 3175 /* 3176 * We built mop above for the single logical access -- rebuild it 3177 * now for the paired operation. 3178 * 3179 * With LSE2, non-sign-extending pairs are treated atomically if 3180 * aligned, and if unaligned one of the pair will be completely 3181 * within a 16-byte block and that element will be atomic. 3182 * Otherwise each element is separately atomic. 3183 * In all cases, issue one operation with the correct atomicity. 3184 * 3185 * This treats sign-extending loads like zero-extending loads, 3186 * since that reuses the most code below. 3187 */ 3188 mop = a->sz + 1; 3189 if (s->align_mem) { 3190 mop |= (a->sz == 2 ? MO_ALIGN_4 : MO_ALIGN_8); 3191 } 3192 mop = finalize_memop_pair(s, mop); 3193 if (a->sz == 2) { 3194 int o2 = s->be_data == MO_LE ? 32 : 0; 3195 int o1 = o2 ^ 32; 3196 3197 tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop); 3198 if (a->sign) { 3199 tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32); 3200 tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32); 3201 } else { 3202 tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32); 3203 tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32); 3204 } 3205 } else { 3206 TCGv_i128 tmp = tcg_temp_new_i128(); 3207 3208 tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop); 3209 if (s->be_data == MO_LE) { 3210 tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp); 3211 } else { 3212 tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp); 3213 } 3214 } 3215 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3216 return true; 3217 } 3218 trans_STP_v(DisasContext * s,arg_ldstpair * a)3219 static bool trans_STP_v(DisasContext *s, arg_ldstpair *a) 3220 { 3221 uint64_t offset = a->imm << a->sz; 3222 TCGv_i64 clean_addr, dirty_addr; 3223 MemOp mop; 3224 3225 if (!fp_access_check(s)) { 3226 return true; 3227 } 3228 3229 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3230 mop = finalize_memop_asimd(s, a->sz); 3231 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, true, mop); 3232 do_fp_st(s, a->rt, clean_addr, mop); 3233 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3234 do_fp_st(s, a->rt2, clean_addr, mop); 3235 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3236 return true; 3237 } 3238 trans_LDP_v(DisasContext * s,arg_ldstpair * a)3239 static bool trans_LDP_v(DisasContext *s, arg_ldstpair *a) 3240 { 3241 uint64_t offset = a->imm << a->sz; 3242 TCGv_i64 clean_addr, dirty_addr; 3243 MemOp mop; 3244 3245 if (!fp_access_check(s)) { 3246 return true; 3247 } 3248 3249 /* LSE2 does not merge FP pairs; leave these as separate operations. */ 3250 mop = finalize_memop_asimd(s, a->sz); 3251 op_addr_ldstpair_pre(s, a, &clean_addr, &dirty_addr, offset, false, mop); 3252 do_fp_ld(s, a->rt, clean_addr, mop); 3253 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << a->sz); 3254 do_fp_ld(s, a->rt2, clean_addr, mop); 3255 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3256 return true; 3257 } 3258 trans_STGP(DisasContext * s,arg_ldstpair * a)3259 static bool trans_STGP(DisasContext *s, arg_ldstpair *a) 3260 { 3261 TCGv_i64 clean_addr, dirty_addr, tcg_rt, tcg_rt2; 3262 uint64_t offset = a->imm << LOG2_TAG_GRANULE; 3263 MemOp mop; 3264 TCGv_i128 tmp; 3265 3266 /* STGP only comes in one size. */ 3267 tcg_debug_assert(a->sz == MO_64); 3268 3269 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 3270 return false; 3271 } 3272 3273 if (a->rn == 31) { 3274 gen_check_sp_alignment(s); 3275 } 3276 3277 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3278 if (!a->p) { 3279 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3280 } 3281 3282 clean_addr = clean_data_tbi(s, dirty_addr); 3283 tcg_rt = cpu_reg(s, a->rt); 3284 tcg_rt2 = cpu_reg(s, a->rt2); 3285 3286 /* 3287 * STGP is defined as two 8-byte memory operations, aligned to TAG_GRANULE, 3288 * and one tag operation. We implement it as one single aligned 16-byte 3289 * memory operation for convenience. Note that the alignment ensures 3290 * MO_ATOM_IFALIGN_PAIR produces 8-byte atomicity for the memory store. 3291 */ 3292 mop = finalize_memop_atom(s, MO_128 | MO_ALIGN, MO_ATOM_IFALIGN_PAIR); 3293 3294 tmp = tcg_temp_new_i128(); 3295 if (s->be_data == MO_LE) { 3296 tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2); 3297 } else { 3298 tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt); 3299 } 3300 tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); 3301 3302 /* Perform the tag store, if tag access enabled. */ 3303 if (s->ata[0]) { 3304 if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3305 gen_helper_stg_parallel(tcg_env, dirty_addr, dirty_addr); 3306 } else { 3307 gen_helper_stg(tcg_env, dirty_addr, dirty_addr); 3308 } 3309 } 3310 3311 op_addr_ldstpair_post(s, a, dirty_addr, offset); 3312 return true; 3313 } 3314 op_addr_ldst_imm_pre(DisasContext * s,arg_ldst_imm * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,uint64_t offset,bool is_store,MemOp mop)3315 static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, 3316 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3317 uint64_t offset, bool is_store, MemOp mop) 3318 { 3319 int memidx; 3320 3321 if (a->rn == 31) { 3322 gen_check_sp_alignment(s); 3323 } 3324 3325 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3326 if (!a->p) { 3327 tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); 3328 } 3329 memidx = get_a64_user_mem_index(s, a->unpriv); 3330 *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, 3331 a->w || a->rn != 31, 3332 mop, a->unpriv, memidx); 3333 } 3334 op_addr_ldst_imm_post(DisasContext * s,arg_ldst_imm * a,TCGv_i64 dirty_addr,uint64_t offset)3335 static void op_addr_ldst_imm_post(DisasContext *s, arg_ldst_imm *a, 3336 TCGv_i64 dirty_addr, uint64_t offset) 3337 { 3338 if (a->w) { 3339 if (a->p) { 3340 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3341 } 3342 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3343 } 3344 } 3345 trans_STR_i(DisasContext * s,arg_ldst_imm * a)3346 static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) 3347 { 3348 bool iss_sf, iss_valid = !a->w; 3349 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3350 int memidx = get_a64_user_mem_index(s, a->unpriv); 3351 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3352 3353 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3354 3355 tcg_rt = cpu_reg(s, a->rt); 3356 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3357 3358 do_gpr_st_memidx(s, tcg_rt, clean_addr, mop, memidx, 3359 iss_valid, a->rt, iss_sf, false); 3360 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3361 return true; 3362 } 3363 trans_LDR_i(DisasContext * s,arg_ldst_imm * a)3364 static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) 3365 { 3366 bool iss_sf, iss_valid = !a->w; 3367 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3368 int memidx = get_a64_user_mem_index(s, a->unpriv); 3369 MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3370 3371 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3372 3373 tcg_rt = cpu_reg(s, a->rt); 3374 iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3375 3376 do_gpr_ld_memidx(s, tcg_rt, clean_addr, mop, 3377 a->ext, memidx, iss_valid, a->rt, iss_sf, false); 3378 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3379 return true; 3380 } 3381 trans_STR_v_i(DisasContext * s,arg_ldst_imm * a)3382 static bool trans_STR_v_i(DisasContext *s, arg_ldst_imm *a) 3383 { 3384 TCGv_i64 clean_addr, dirty_addr; 3385 MemOp mop; 3386 3387 if (!fp_access_check(s)) { 3388 return true; 3389 } 3390 mop = finalize_memop_asimd(s, a->sz); 3391 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); 3392 do_fp_st(s, a->rt, clean_addr, mop); 3393 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3394 return true; 3395 } 3396 trans_LDR_v_i(DisasContext * s,arg_ldst_imm * a)3397 static bool trans_LDR_v_i(DisasContext *s, arg_ldst_imm *a) 3398 { 3399 TCGv_i64 clean_addr, dirty_addr; 3400 MemOp mop; 3401 3402 if (!fp_access_check(s)) { 3403 return true; 3404 } 3405 mop = finalize_memop_asimd(s, a->sz); 3406 op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); 3407 do_fp_ld(s, a->rt, clean_addr, mop); 3408 op_addr_ldst_imm_post(s, a, dirty_addr, a->imm); 3409 return true; 3410 } 3411 op_addr_ldst_pre(DisasContext * s,arg_ldst * a,TCGv_i64 * clean_addr,TCGv_i64 * dirty_addr,bool is_store,MemOp memop)3412 static void op_addr_ldst_pre(DisasContext *s, arg_ldst *a, 3413 TCGv_i64 *clean_addr, TCGv_i64 *dirty_addr, 3414 bool is_store, MemOp memop) 3415 { 3416 TCGv_i64 tcg_rm; 3417 3418 if (a->rn == 31) { 3419 gen_check_sp_alignment(s); 3420 } 3421 *dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3422 3423 tcg_rm = read_cpu_reg(s, a->rm, 1); 3424 ext_and_shift_reg(tcg_rm, tcg_rm, a->opt, a->s ? a->sz : 0); 3425 3426 tcg_gen_add_i64(*dirty_addr, *dirty_addr, tcg_rm); 3427 *clean_addr = gen_mte_check1(s, *dirty_addr, is_store, true, memop); 3428 } 3429 trans_LDR(DisasContext * s,arg_ldst * a)3430 static bool trans_LDR(DisasContext *s, arg_ldst *a) 3431 { 3432 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3433 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3434 MemOp memop; 3435 3436 if (extract32(a->opt, 1, 1) == 0) { 3437 return false; 3438 } 3439 3440 memop = finalize_memop(s, a->sz + a->sign * MO_SIGN); 3441 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3442 tcg_rt = cpu_reg(s, a->rt); 3443 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3444 a->ext, true, a->rt, iss_sf, false); 3445 return true; 3446 } 3447 trans_STR(DisasContext * s,arg_ldst * a)3448 static bool trans_STR(DisasContext *s, arg_ldst *a) 3449 { 3450 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3451 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3452 MemOp memop; 3453 3454 if (extract32(a->opt, 1, 1) == 0) { 3455 return false; 3456 } 3457 3458 memop = finalize_memop(s, a->sz); 3459 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3460 tcg_rt = cpu_reg(s, a->rt); 3461 do_gpr_st(s, tcg_rt, clean_addr, memop, true, a->rt, iss_sf, false); 3462 return true; 3463 } 3464 trans_LDR_v(DisasContext * s,arg_ldst * a)3465 static bool trans_LDR_v(DisasContext *s, arg_ldst *a) 3466 { 3467 TCGv_i64 clean_addr, dirty_addr; 3468 MemOp memop; 3469 3470 if (extract32(a->opt, 1, 1) == 0) { 3471 return false; 3472 } 3473 3474 if (!fp_access_check(s)) { 3475 return true; 3476 } 3477 3478 memop = finalize_memop_asimd(s, a->sz); 3479 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, false, memop); 3480 do_fp_ld(s, a->rt, clean_addr, memop); 3481 return true; 3482 } 3483 trans_STR_v(DisasContext * s,arg_ldst * a)3484 static bool trans_STR_v(DisasContext *s, arg_ldst *a) 3485 { 3486 TCGv_i64 clean_addr, dirty_addr; 3487 MemOp memop; 3488 3489 if (extract32(a->opt, 1, 1) == 0) { 3490 return false; 3491 } 3492 3493 if (!fp_access_check(s)) { 3494 return true; 3495 } 3496 3497 memop = finalize_memop_asimd(s, a->sz); 3498 op_addr_ldst_pre(s, a, &clean_addr, &dirty_addr, true, memop); 3499 do_fp_st(s, a->rt, clean_addr, memop); 3500 return true; 3501 } 3502 3503 do_atomic_ld(DisasContext * s,arg_atomic * a,AtomicThreeOpFn * fn,int sign,bool invert)3504 static bool do_atomic_ld(DisasContext *s, arg_atomic *a, AtomicThreeOpFn *fn, 3505 int sign, bool invert) 3506 { 3507 MemOp mop = a->sz | sign; 3508 TCGv_i64 clean_addr, tcg_rs, tcg_rt; 3509 3510 if (a->rn == 31) { 3511 gen_check_sp_alignment(s); 3512 } 3513 mop = check_atomic_align(s, a->rn, mop); 3514 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3515 a->rn != 31, mop); 3516 tcg_rs = read_cpu_reg(s, a->rs, true); 3517 tcg_rt = cpu_reg(s, a->rt); 3518 if (invert) { 3519 tcg_gen_not_i64(tcg_rs, tcg_rs); 3520 } 3521 /* 3522 * The tcg atomic primitives are all full barriers. Therefore we 3523 * can ignore the Acquire and Release bits of this instruction. 3524 */ 3525 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3526 3527 if (mop & MO_SIGN) { 3528 switch (a->sz) { 3529 case MO_8: 3530 tcg_gen_ext8u_i64(tcg_rt, tcg_rt); 3531 break; 3532 case MO_16: 3533 tcg_gen_ext16u_i64(tcg_rt, tcg_rt); 3534 break; 3535 case MO_32: 3536 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3537 break; 3538 case MO_64: 3539 break; 3540 default: 3541 g_assert_not_reached(); 3542 } 3543 } 3544 return true; 3545 } 3546 3547 TRANS_FEAT(LDADD, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_add_i64, 0, false) 3548 TRANS_FEAT(LDCLR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_and_i64, 0, true) 3549 TRANS_FEAT(LDEOR, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_xor_i64, 0, false) 3550 TRANS_FEAT(LDSET, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_or_i64, 0, false) TRANS_FEAT(LDSMAX,aa64_atomics,do_atomic_ld,a,tcg_gen_atomic_fetch_smax_i64,MO_SIGN,false)3551 TRANS_FEAT(LDSMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smax_i64, MO_SIGN, false) 3552 TRANS_FEAT(LDSMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_smin_i64, MO_SIGN, false) 3553 TRANS_FEAT(LDUMAX, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umax_i64, 0, false) 3554 TRANS_FEAT(LDUMIN, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_fetch_umin_i64, 0, false) 3555 TRANS_FEAT(SWP, aa64_atomics, do_atomic_ld, a, tcg_gen_atomic_xchg_i64, 0, false) 3556 3557 static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) 3558 { 3559 bool iss_sf = ldst_iss_sf(a->sz, false, false); 3560 TCGv_i64 clean_addr; 3561 MemOp mop; 3562 3563 if (!dc_isar_feature(aa64_atomics, s) || 3564 !dc_isar_feature(aa64_rcpc_8_3, s)) { 3565 return false; 3566 } 3567 if (a->rn == 31) { 3568 gen_check_sp_alignment(s); 3569 } 3570 mop = check_ordered_align(s, a->rn, 0, false, a->sz); 3571 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, 3572 a->rn != 31, mop); 3573 /* 3574 * LDAPR* are a special case because they are a simple load, not a 3575 * fetch-and-do-something op. 3576 * The architectural consistency requirements here are weaker than 3577 * full load-acquire (we only need "load-acquire processor consistent"), 3578 * but we choose to implement them as full LDAQ. 3579 */ 3580 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, false, 3581 true, a->rt, iss_sf, true); 3582 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3583 return true; 3584 } 3585 trans_LDRA(DisasContext * s,arg_LDRA * a)3586 static bool trans_LDRA(DisasContext *s, arg_LDRA *a) 3587 { 3588 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3589 MemOp memop; 3590 3591 /* Load with pointer authentication */ 3592 if (!dc_isar_feature(aa64_pauth, s)) { 3593 return false; 3594 } 3595 3596 if (a->rn == 31) { 3597 gen_check_sp_alignment(s); 3598 } 3599 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3600 3601 if (s->pauth_active) { 3602 if (!a->m) { 3603 gen_helper_autda_combined(dirty_addr, tcg_env, dirty_addr, 3604 tcg_constant_i64(0)); 3605 } else { 3606 gen_helper_autdb_combined(dirty_addr, tcg_env, dirty_addr, 3607 tcg_constant_i64(0)); 3608 } 3609 } 3610 3611 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3612 3613 memop = finalize_memop(s, MO_64); 3614 3615 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3616 clean_addr = gen_mte_check1(s, dirty_addr, false, 3617 a->w || a->rn != 31, memop); 3618 3619 tcg_rt = cpu_reg(s, a->rt); 3620 do_gpr_ld(s, tcg_rt, clean_addr, memop, 3621 /* extend */ false, /* iss_valid */ !a->w, 3622 /* iss_srt */ a->rt, /* iss_sf */ true, /* iss_ar */ false); 3623 3624 if (a->w) { 3625 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), dirty_addr); 3626 } 3627 return true; 3628 } 3629 trans_LDAPR_i(DisasContext * s,arg_ldapr_stlr_i * a)3630 static bool trans_LDAPR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3631 { 3632 TCGv_i64 clean_addr, dirty_addr; 3633 MemOp mop = a->sz | (a->sign ? MO_SIGN : 0); 3634 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3635 3636 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3637 return false; 3638 } 3639 3640 if (a->rn == 31) { 3641 gen_check_sp_alignment(s); 3642 } 3643 3644 mop = check_ordered_align(s, a->rn, a->imm, false, mop); 3645 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3646 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3647 clean_addr = clean_data_tbi(s, dirty_addr); 3648 3649 /* 3650 * Load-AcquirePC semantics; we implement as the slightly more 3651 * restrictive Load-Acquire. 3652 */ 3653 do_gpr_ld(s, cpu_reg(s, a->rt), clean_addr, mop, a->ext, true, 3654 a->rt, iss_sf, true); 3655 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3656 return true; 3657 } 3658 trans_STLR_i(DisasContext * s,arg_ldapr_stlr_i * a)3659 static bool trans_STLR_i(DisasContext *s, arg_ldapr_stlr_i *a) 3660 { 3661 TCGv_i64 clean_addr, dirty_addr; 3662 MemOp mop = a->sz; 3663 bool iss_sf = ldst_iss_sf(a->sz, a->sign, a->ext); 3664 3665 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3666 return false; 3667 } 3668 3669 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3670 3671 if (a->rn == 31) { 3672 gen_check_sp_alignment(s); 3673 } 3674 3675 mop = check_ordered_align(s, a->rn, a->imm, true, mop); 3676 dirty_addr = read_cpu_reg_sp(s, a->rn, 1); 3677 tcg_gen_addi_i64(dirty_addr, dirty_addr, a->imm); 3678 clean_addr = clean_data_tbi(s, dirty_addr); 3679 3680 /* Store-Release semantics */ 3681 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3682 do_gpr_st(s, cpu_reg(s, a->rt), clean_addr, mop, true, a->rt, iss_sf, true); 3683 return true; 3684 } 3685 trans_LD_mult(DisasContext * s,arg_ldst_mult * a)3686 static bool trans_LD_mult(DisasContext *s, arg_ldst_mult *a) 3687 { 3688 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3689 MemOp endian, align, mop; 3690 3691 int total; /* total bytes */ 3692 int elements; /* elements per vector */ 3693 int r; 3694 int size = a->sz; 3695 3696 if (!a->p && a->rm != 0) { 3697 /* For non-postindexed accesses the Rm field must be 0 */ 3698 return false; 3699 } 3700 if (size == 3 && !a->q && a->selem != 1) { 3701 return false; 3702 } 3703 if (!fp_access_check(s)) { 3704 return true; 3705 } 3706 3707 if (a->rn == 31) { 3708 gen_check_sp_alignment(s); 3709 } 3710 3711 /* For our purposes, bytes are always little-endian. */ 3712 endian = s->be_data; 3713 if (size == 0) { 3714 endian = MO_LE; 3715 } 3716 3717 total = a->rpt * a->selem * (a->q ? 16 : 8); 3718 tcg_rn = cpu_reg_sp(s, a->rn); 3719 3720 /* 3721 * Issue the MTE check vs the logical repeat count, before we 3722 * promote consecutive little-endian elements below. 3723 */ 3724 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, total, 3725 finalize_memop_asimd(s, size)); 3726 3727 /* 3728 * Consecutive little-endian elements from a single register 3729 * can be promoted to a larger little-endian operation. 3730 */ 3731 align = MO_ALIGN; 3732 if (a->selem == 1 && endian == MO_LE) { 3733 align = pow2_align(size); 3734 size = 3; 3735 } 3736 if (!s->align_mem) { 3737 align = 0; 3738 } 3739 mop = endian | size | align; 3740 3741 elements = (a->q ? 16 : 8) >> size; 3742 tcg_ebytes = tcg_constant_i64(1 << size); 3743 for (r = 0; r < a->rpt; r++) { 3744 int e; 3745 for (e = 0; e < elements; e++) { 3746 int xs; 3747 for (xs = 0; xs < a->selem; xs++) { 3748 int tt = (a->rt + r + xs) % 32; 3749 do_vec_ld(s, tt, e, clean_addr, mop); 3750 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3751 } 3752 } 3753 } 3754 3755 /* 3756 * For non-quad operations, setting a slice of the low 64 bits of 3757 * the register clears the high 64 bits (in the ARM ARM pseudocode 3758 * this is implicit in the fact that 'rval' is a 64 bit wide 3759 * variable). For quad operations, we might still need to zero 3760 * the high bits of SVE. 3761 */ 3762 for (r = 0; r < a->rpt * a->selem; r++) { 3763 int tt = (a->rt + r) % 32; 3764 clear_vec_high(s, a->q, tt); 3765 } 3766 3767 if (a->p) { 3768 if (a->rm == 31) { 3769 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3770 } else { 3771 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3772 } 3773 } 3774 return true; 3775 } 3776 trans_ST_mult(DisasContext * s,arg_ldst_mult * a)3777 static bool trans_ST_mult(DisasContext *s, arg_ldst_mult *a) 3778 { 3779 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3780 MemOp endian, align, mop; 3781 3782 int total; /* total bytes */ 3783 int elements; /* elements per vector */ 3784 int r; 3785 int size = a->sz; 3786 3787 if (!a->p && a->rm != 0) { 3788 /* For non-postindexed accesses the Rm field must be 0 */ 3789 return false; 3790 } 3791 if (size == 3 && !a->q && a->selem != 1) { 3792 return false; 3793 } 3794 if (!fp_access_check(s)) { 3795 return true; 3796 } 3797 3798 if (a->rn == 31) { 3799 gen_check_sp_alignment(s); 3800 } 3801 3802 /* For our purposes, bytes are always little-endian. */ 3803 endian = s->be_data; 3804 if (size == 0) { 3805 endian = MO_LE; 3806 } 3807 3808 total = a->rpt * a->selem * (a->q ? 16 : 8); 3809 tcg_rn = cpu_reg_sp(s, a->rn); 3810 3811 /* 3812 * Issue the MTE check vs the logical repeat count, before we 3813 * promote consecutive little-endian elements below. 3814 */ 3815 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, total, 3816 finalize_memop_asimd(s, size)); 3817 3818 /* 3819 * Consecutive little-endian elements from a single register 3820 * can be promoted to a larger little-endian operation. 3821 */ 3822 align = MO_ALIGN; 3823 if (a->selem == 1 && endian == MO_LE) { 3824 align = pow2_align(size); 3825 size = 3; 3826 } 3827 if (!s->align_mem) { 3828 align = 0; 3829 } 3830 mop = endian | size | align; 3831 3832 elements = (a->q ? 16 : 8) >> size; 3833 tcg_ebytes = tcg_constant_i64(1 << size); 3834 for (r = 0; r < a->rpt; r++) { 3835 int e; 3836 for (e = 0; e < elements; e++) { 3837 int xs; 3838 for (xs = 0; xs < a->selem; xs++) { 3839 int tt = (a->rt + r + xs) % 32; 3840 do_vec_st(s, tt, e, clean_addr, mop); 3841 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3842 } 3843 } 3844 } 3845 3846 if (a->p) { 3847 if (a->rm == 31) { 3848 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3849 } else { 3850 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3851 } 3852 } 3853 return true; 3854 } 3855 trans_ST_single(DisasContext * s,arg_ldst_single * a)3856 static bool trans_ST_single(DisasContext *s, arg_ldst_single *a) 3857 { 3858 int xs, total, rt; 3859 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3860 MemOp mop; 3861 3862 if (!a->p && a->rm != 0) { 3863 return false; 3864 } 3865 if (!fp_access_check(s)) { 3866 return true; 3867 } 3868 3869 if (a->rn == 31) { 3870 gen_check_sp_alignment(s); 3871 } 3872 3873 total = a->selem << a->scale; 3874 tcg_rn = cpu_reg_sp(s, a->rn); 3875 3876 mop = finalize_memop_asimd(s, a->scale); 3877 clean_addr = gen_mte_checkN(s, tcg_rn, true, a->p || a->rn != 31, 3878 total, mop); 3879 3880 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3881 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3882 do_vec_st(s, rt, a->index, clean_addr, mop); 3883 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3884 } 3885 3886 if (a->p) { 3887 if (a->rm == 31) { 3888 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3889 } else { 3890 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3891 } 3892 } 3893 return true; 3894 } 3895 trans_LD_single(DisasContext * s,arg_ldst_single * a)3896 static bool trans_LD_single(DisasContext *s, arg_ldst_single *a) 3897 { 3898 int xs, total, rt; 3899 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3900 MemOp mop; 3901 3902 if (!a->p && a->rm != 0) { 3903 return false; 3904 } 3905 if (!fp_access_check(s)) { 3906 return true; 3907 } 3908 3909 if (a->rn == 31) { 3910 gen_check_sp_alignment(s); 3911 } 3912 3913 total = a->selem << a->scale; 3914 tcg_rn = cpu_reg_sp(s, a->rn); 3915 3916 mop = finalize_memop_asimd(s, a->scale); 3917 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3918 total, mop); 3919 3920 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3921 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3922 do_vec_ld(s, rt, a->index, clean_addr, mop); 3923 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3924 } 3925 3926 if (a->p) { 3927 if (a->rm == 31) { 3928 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3929 } else { 3930 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3931 } 3932 } 3933 return true; 3934 } 3935 trans_LD_single_repl(DisasContext * s,arg_LD_single_repl * a)3936 static bool trans_LD_single_repl(DisasContext *s, arg_LD_single_repl *a) 3937 { 3938 int xs, total, rt; 3939 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3940 MemOp mop; 3941 3942 if (!a->p && a->rm != 0) { 3943 return false; 3944 } 3945 if (!fp_access_check(s)) { 3946 return true; 3947 } 3948 3949 if (a->rn == 31) { 3950 gen_check_sp_alignment(s); 3951 } 3952 3953 total = a->selem << a->scale; 3954 tcg_rn = cpu_reg_sp(s, a->rn); 3955 3956 mop = finalize_memop_asimd(s, a->scale); 3957 clean_addr = gen_mte_checkN(s, tcg_rn, false, a->p || a->rn != 31, 3958 total, mop); 3959 3960 tcg_ebytes = tcg_constant_i64(1 << a->scale); 3961 for (xs = 0, rt = a->rt; xs < a->selem; xs++, rt = (rt + 1) % 32) { 3962 /* Load and replicate to all elements */ 3963 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3964 3965 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3966 tcg_gen_gvec_dup_i64(a->scale, vec_full_reg_offset(s, rt), 3967 (a->q + 1) * 8, vec_full_reg_size(s), tcg_tmp); 3968 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3969 } 3970 3971 if (a->p) { 3972 if (a->rm == 31) { 3973 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3974 } else { 3975 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, a->rm)); 3976 } 3977 } 3978 return true; 3979 } 3980 trans_STZGM(DisasContext * s,arg_ldst_tag * a)3981 static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) 3982 { 3983 TCGv_i64 addr, clean_addr, tcg_rt; 3984 int size = 4 << s->dcz_blocksize; 3985 3986 if (!dc_isar_feature(aa64_mte, s)) { 3987 return false; 3988 } 3989 if (s->current_el == 0) { 3990 return false; 3991 } 3992 3993 if (a->rn == 31) { 3994 gen_check_sp_alignment(s); 3995 } 3996 3997 addr = read_cpu_reg_sp(s, a->rn, true); 3998 tcg_gen_addi_i64(addr, addr, a->imm); 3999 tcg_rt = cpu_reg(s, a->rt); 4000 4001 if (s->ata[0]) { 4002 gen_helper_stzgm_tags(tcg_env, addr, tcg_rt); 4003 } 4004 /* 4005 * The non-tags portion of STZGM is mostly like DC_ZVA, 4006 * except the alignment happens before the access. 4007 */ 4008 clean_addr = clean_data_tbi(s, addr); 4009 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4010 gen_helper_dc_zva(tcg_env, clean_addr); 4011 return true; 4012 } 4013 trans_STGM(DisasContext * s,arg_ldst_tag * a)4014 static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) 4015 { 4016 TCGv_i64 addr, clean_addr, tcg_rt; 4017 4018 if (!dc_isar_feature(aa64_mte, s)) { 4019 return false; 4020 } 4021 if (s->current_el == 0) { 4022 return false; 4023 } 4024 4025 if (a->rn == 31) { 4026 gen_check_sp_alignment(s); 4027 } 4028 4029 addr = read_cpu_reg_sp(s, a->rn, true); 4030 tcg_gen_addi_i64(addr, addr, a->imm); 4031 tcg_rt = cpu_reg(s, a->rt); 4032 4033 if (s->ata[0]) { 4034 gen_helper_stgm(tcg_env, addr, tcg_rt); 4035 } else { 4036 MMUAccessType acc = MMU_DATA_STORE; 4037 int size = 4 << s->gm_blocksize; 4038 4039 clean_addr = clean_data_tbi(s, addr); 4040 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4041 gen_probe_access(s, clean_addr, acc, size); 4042 } 4043 return true; 4044 } 4045 trans_LDGM(DisasContext * s,arg_ldst_tag * a)4046 static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) 4047 { 4048 TCGv_i64 addr, clean_addr, tcg_rt; 4049 4050 if (!dc_isar_feature(aa64_mte, s)) { 4051 return false; 4052 } 4053 if (s->current_el == 0) { 4054 return false; 4055 } 4056 4057 if (a->rn == 31) { 4058 gen_check_sp_alignment(s); 4059 } 4060 4061 addr = read_cpu_reg_sp(s, a->rn, true); 4062 tcg_gen_addi_i64(addr, addr, a->imm); 4063 tcg_rt = cpu_reg(s, a->rt); 4064 4065 if (s->ata[0]) { 4066 gen_helper_ldgm(tcg_rt, tcg_env, addr); 4067 } else { 4068 MMUAccessType acc = MMU_DATA_LOAD; 4069 int size = 4 << s->gm_blocksize; 4070 4071 clean_addr = clean_data_tbi(s, addr); 4072 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4073 gen_probe_access(s, clean_addr, acc, size); 4074 /* The result tags are zeros. */ 4075 tcg_gen_movi_i64(tcg_rt, 0); 4076 } 4077 return true; 4078 } 4079 trans_LDG(DisasContext * s,arg_ldst_tag * a)4080 static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) 4081 { 4082 TCGv_i64 addr, clean_addr, tcg_rt; 4083 4084 if (!dc_isar_feature(aa64_mte_insn_reg, s)) { 4085 return false; 4086 } 4087 4088 if (a->rn == 31) { 4089 gen_check_sp_alignment(s); 4090 } 4091 4092 addr = read_cpu_reg_sp(s, a->rn, true); 4093 if (!a->p) { 4094 /* pre-index or signed offset */ 4095 tcg_gen_addi_i64(addr, addr, a->imm); 4096 } 4097 4098 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4099 tcg_rt = cpu_reg(s, a->rt); 4100 if (s->ata[0]) { 4101 gen_helper_ldg(tcg_rt, tcg_env, addr, tcg_rt); 4102 } else { 4103 /* 4104 * Tag access disabled: we must check for aborts on the load 4105 * load from [rn+offset], and then insert a 0 tag into rt. 4106 */ 4107 clean_addr = clean_data_tbi(s, addr); 4108 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4109 gen_address_with_allocation_tag0(tcg_rt, tcg_rt); 4110 } 4111 4112 if (a->w) { 4113 /* pre-index or post-index */ 4114 if (a->p) { 4115 /* post-index */ 4116 tcg_gen_addi_i64(addr, addr, a->imm); 4117 } 4118 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4119 } 4120 return true; 4121 } 4122 do_STG(DisasContext * s,arg_ldst_tag * a,bool is_zero,bool is_pair)4123 static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) 4124 { 4125 TCGv_i64 addr, tcg_rt; 4126 4127 if (a->rn == 31) { 4128 gen_check_sp_alignment(s); 4129 } 4130 4131 addr = read_cpu_reg_sp(s, a->rn, true); 4132 if (!a->p) { 4133 /* pre-index or signed offset */ 4134 tcg_gen_addi_i64(addr, addr, a->imm); 4135 } 4136 tcg_rt = cpu_reg_sp(s, a->rt); 4137 if (!s->ata[0]) { 4138 /* 4139 * For STG and ST2G, we need to check alignment and probe memory. 4140 * TODO: For STZG and STZ2G, we could rely on the stores below, 4141 * at least for system mode; user-only won't enforce alignment. 4142 */ 4143 if (is_pair) { 4144 gen_helper_st2g_stub(tcg_env, addr); 4145 } else { 4146 gen_helper_stg_stub(tcg_env, addr); 4147 } 4148 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4149 if (is_pair) { 4150 gen_helper_st2g_parallel(tcg_env, addr, tcg_rt); 4151 } else { 4152 gen_helper_stg_parallel(tcg_env, addr, tcg_rt); 4153 } 4154 } else { 4155 if (is_pair) { 4156 gen_helper_st2g(tcg_env, addr, tcg_rt); 4157 } else { 4158 gen_helper_stg(tcg_env, addr, tcg_rt); 4159 } 4160 } 4161 4162 if (is_zero) { 4163 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4164 TCGv_i64 zero64 = tcg_constant_i64(0); 4165 TCGv_i128 zero128 = tcg_temp_new_i128(); 4166 int mem_index = get_mem_index(s); 4167 MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN); 4168 4169 tcg_gen_concat_i64_i128(zero128, zero64, zero64); 4170 4171 /* This is 1 or 2 atomic 16-byte operations. */ 4172 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4173 if (is_pair) { 4174 tcg_gen_addi_i64(clean_addr, clean_addr, 16); 4175 tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop); 4176 } 4177 } 4178 4179 if (a->w) { 4180 /* pre-index or post-index */ 4181 if (a->p) { 4182 /* post-index */ 4183 tcg_gen_addi_i64(addr, addr, a->imm); 4184 } 4185 tcg_gen_mov_i64(cpu_reg_sp(s, a->rn), addr); 4186 } 4187 return true; 4188 } 4189 4190 TRANS_FEAT(STG, aa64_mte_insn_reg, do_STG, a, false, false) 4191 TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) 4192 TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) 4193 TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) 4194 4195 typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); 4196 do_SET(DisasContext * s,arg_set * a,bool is_epilogue,bool is_setg,SetFn fn)4197 static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, 4198 bool is_setg, SetFn fn) 4199 { 4200 int memidx; 4201 uint32_t syndrome, desc = 0; 4202 4203 if (is_setg && !dc_isar_feature(aa64_mte, s)) { 4204 return false; 4205 } 4206 4207 /* 4208 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4209 * us to pull this check before the CheckMOPSEnabled() test 4210 * (which we do in the helper function) 4211 */ 4212 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4213 a->rd == 31 || a->rn == 31) { 4214 return false; 4215 } 4216 4217 memidx = get_a64_user_mem_index(s, a->unpriv); 4218 4219 /* 4220 * We pass option_a == true, matching our implementation; 4221 * we pass wrong_option == false: helper function may set that bit. 4222 */ 4223 syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, 4224 is_epilogue, false, true, a->rd, a->rs, a->rn); 4225 4226 if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { 4227 /* We may need to do MTE tag checking, so assemble the descriptor */ 4228 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 4229 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 4230 desc = FIELD_DP32(desc, MTEDESC, WRITE, true); 4231 /* SIZEM1 and ALIGN we leave 0 (byte write) */ 4232 } 4233 /* The helper function always needs the memidx even with MTE disabled */ 4234 desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); 4235 4236 /* 4237 * The helper needs the register numbers, but since they're in 4238 * the syndrome anyway, we let it extract them from there rather 4239 * than passing in an extra three integer arguments. 4240 */ 4241 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); 4242 return true; 4243 } 4244 4245 TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) 4246 TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) 4247 TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) 4248 TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) 4249 TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) 4250 TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) 4251 4252 typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); 4253 do_CPY(DisasContext * s,arg_cpy * a,bool is_epilogue,CpyFn fn)4254 static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) 4255 { 4256 int rmemidx, wmemidx; 4257 uint32_t syndrome, rdesc = 0, wdesc = 0; 4258 bool wunpriv = extract32(a->options, 0, 1); 4259 bool runpriv = extract32(a->options, 1, 1); 4260 4261 /* 4262 * UNPREDICTABLE cases: we choose to UNDEF, which allows 4263 * us to pull this check before the CheckMOPSEnabled() test 4264 * (which we do in the helper function) 4265 */ 4266 if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || 4267 a->rd == 31 || a->rs == 31 || a->rn == 31) { 4268 return false; 4269 } 4270 4271 rmemidx = get_a64_user_mem_index(s, runpriv); 4272 wmemidx = get_a64_user_mem_index(s, wunpriv); 4273 4274 /* 4275 * We pass option_a == true, matching our implementation; 4276 * we pass wrong_option == false: helper function may set that bit. 4277 */ 4278 syndrome = syn_mop(false, false, a->options, is_epilogue, 4279 false, true, a->rd, a->rs, a->rn); 4280 4281 /* If we need to do MTE tag checking, assemble the descriptors */ 4282 if (s->mte_active[runpriv]) { 4283 rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); 4284 rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); 4285 } 4286 if (s->mte_active[wunpriv]) { 4287 wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); 4288 wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); 4289 wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); 4290 } 4291 /* The helper function needs these parts of the descriptor regardless */ 4292 rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); 4293 wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); 4294 4295 /* 4296 * The helper needs the register numbers, but since they're in 4297 * the syndrome anyway, we let it extract them from there rather 4298 * than passing in an extra three integer arguments. 4299 */ 4300 fn(tcg_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), 4301 tcg_constant_i32(rdesc)); 4302 return true; 4303 } 4304 4305 TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) 4306 TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) 4307 TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) 4308 TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) 4309 TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) 4310 TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) 4311 4312 typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); 4313 gen_rri(DisasContext * s,arg_rri_sf * a,bool rd_sp,bool rn_sp,ArithTwoOp * fn)4314 static bool gen_rri(DisasContext *s, arg_rri_sf *a, 4315 bool rd_sp, bool rn_sp, ArithTwoOp *fn) 4316 { 4317 TCGv_i64 tcg_rn = rn_sp ? cpu_reg_sp(s, a->rn) : cpu_reg(s, a->rn); 4318 TCGv_i64 tcg_rd = rd_sp ? cpu_reg_sp(s, a->rd) : cpu_reg(s, a->rd); 4319 TCGv_i64 tcg_imm = tcg_constant_i64(a->imm); 4320 4321 fn(tcg_rd, tcg_rn, tcg_imm); 4322 if (!a->sf) { 4323 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4324 } 4325 return true; 4326 } 4327 4328 /* 4329 * PC-rel. addressing 4330 */ 4331 trans_ADR(DisasContext * s,arg_ri * a)4332 static bool trans_ADR(DisasContext *s, arg_ri *a) 4333 { 4334 gen_pc_plus_diff(s, cpu_reg(s, a->rd), a->imm); 4335 return true; 4336 } 4337 trans_ADRP(DisasContext * s,arg_ri * a)4338 static bool trans_ADRP(DisasContext *s, arg_ri *a) 4339 { 4340 int64_t offset = (int64_t)a->imm << 12; 4341 4342 /* The page offset is ok for CF_PCREL. */ 4343 offset -= s->pc_curr & 0xfff; 4344 gen_pc_plus_diff(s, cpu_reg(s, a->rd), offset); 4345 return true; 4346 } 4347 4348 /* 4349 * Add/subtract (immediate) 4350 */ 4351 TRANS(ADD_i, gen_rri, a, 1, 1, tcg_gen_add_i64) 4352 TRANS(SUB_i, gen_rri, a, 1, 1, tcg_gen_sub_i64) 4353 TRANS(ADDS_i, gen_rri, a, 0, 1, a->sf ? gen_add64_CC : gen_add32_CC) 4354 TRANS(SUBS_i, gen_rri, a, 0, 1, a->sf ? gen_sub64_CC : gen_sub32_CC) 4355 4356 /* 4357 * Add/subtract (immediate, with tags) 4358 */ 4359 gen_add_sub_imm_with_tags(DisasContext * s,arg_rri_tag * a,bool sub_op)4360 static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, 4361 bool sub_op) 4362 { 4363 TCGv_i64 tcg_rn, tcg_rd; 4364 int imm; 4365 4366 imm = a->uimm6 << LOG2_TAG_GRANULE; 4367 if (sub_op) { 4368 imm = -imm; 4369 } 4370 4371 tcg_rn = cpu_reg_sp(s, a->rn); 4372 tcg_rd = cpu_reg_sp(s, a->rd); 4373 4374 if (s->ata[0]) { 4375 gen_helper_addsubg(tcg_rd, tcg_env, tcg_rn, 4376 tcg_constant_i32(imm), 4377 tcg_constant_i32(a->uimm4)); 4378 } else { 4379 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4380 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4381 } 4382 return true; 4383 } 4384 TRANS_FEAT(ADDG_i,aa64_mte_insn_reg,gen_add_sub_imm_with_tags,a,false)4385 TRANS_FEAT(ADDG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, false) 4386 TRANS_FEAT(SUBG_i, aa64_mte_insn_reg, gen_add_sub_imm_with_tags, a, true) 4387 4388 /* The input should be a value in the bottom e bits (with higher 4389 * bits zero); returns that value replicated into every element 4390 * of size e in a 64 bit integer. 4391 */ 4392 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4393 { 4394 assert(e != 0); 4395 while (e < 64) { 4396 mask |= mask << e; 4397 e *= 2; 4398 } 4399 return mask; 4400 } 4401 4402 /* 4403 * Logical (immediate) 4404 */ 4405 4406 /* 4407 * Simplified variant of pseudocode DecodeBitMasks() for the case where we 4408 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4409 * value (ie should cause a guest UNDEF exception), and true if they are 4410 * valid, in which case the decoded bit pattern is written to result. 4411 */ logic_imm_decode_wmask(uint64_t * result,unsigned int immn,unsigned int imms,unsigned int immr)4412 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4413 unsigned int imms, unsigned int immr) 4414 { 4415 uint64_t mask; 4416 unsigned e, levels, s, r; 4417 int len; 4418 4419 assert(immn < 2 && imms < 64 && immr < 64); 4420 4421 /* The bit patterns we create here are 64 bit patterns which 4422 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4423 * 64 bits each. Each element contains the same value: a run 4424 * of between 1 and e-1 non-zero bits, rotated within the 4425 * element by between 0 and e-1 bits. 4426 * 4427 * The element size and run length are encoded into immn (1 bit) 4428 * and imms (6 bits) as follows: 4429 * 64 bit elements: immn = 1, imms = <length of run - 1> 4430 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4431 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4432 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4433 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4434 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4435 * Notice that immn = 0, imms = 11111x is the only combination 4436 * not covered by one of the above options; this is reserved. 4437 * Further, <length of run - 1> all-ones is a reserved pattern. 4438 * 4439 * In all cases the rotation is by immr % e (and immr is 6 bits). 4440 */ 4441 4442 /* First determine the element size */ 4443 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4444 if (len < 1) { 4445 /* This is the immn == 0, imms == 0x11111x case */ 4446 return false; 4447 } 4448 e = 1 << len; 4449 4450 levels = e - 1; 4451 s = imms & levels; 4452 r = immr & levels; 4453 4454 if (s == levels) { 4455 /* <length of run - 1> mustn't be all-ones. */ 4456 return false; 4457 } 4458 4459 /* Create the value of one element: s+1 set bits rotated 4460 * by r within the element (which is e bits wide)... 4461 */ 4462 mask = MAKE_64BIT_MASK(0, s + 1); 4463 if (r) { 4464 mask = (mask >> r) | (mask << (e - r)); 4465 mask &= MAKE_64BIT_MASK(0, e); 4466 } 4467 /* ...then replicate the element over the whole 64 bit value */ 4468 mask = bitfield_replicate(mask, e); 4469 *result = mask; 4470 return true; 4471 } 4472 gen_rri_log(DisasContext * s,arg_rri_log * a,bool set_cc,void (* fn)(TCGv_i64,TCGv_i64,int64_t))4473 static bool gen_rri_log(DisasContext *s, arg_rri_log *a, bool set_cc, 4474 void (*fn)(TCGv_i64, TCGv_i64, int64_t)) 4475 { 4476 TCGv_i64 tcg_rd, tcg_rn; 4477 uint64_t imm; 4478 4479 /* Some immediate field values are reserved. */ 4480 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1), 4481 extract32(a->dbm, 0, 6), 4482 extract32(a->dbm, 6, 6))) { 4483 return false; 4484 } 4485 if (!a->sf) { 4486 imm &= 0xffffffffull; 4487 } 4488 4489 tcg_rd = set_cc ? cpu_reg(s, a->rd) : cpu_reg_sp(s, a->rd); 4490 tcg_rn = cpu_reg(s, a->rn); 4491 4492 fn(tcg_rd, tcg_rn, imm); 4493 if (set_cc) { 4494 gen_logic_CC(a->sf, tcg_rd); 4495 } 4496 if (!a->sf) { 4497 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4498 } 4499 return true; 4500 } 4501 TRANS(AND_i,gen_rri_log,a,false,tcg_gen_andi_i64)4502 TRANS(AND_i, gen_rri_log, a, false, tcg_gen_andi_i64) 4503 TRANS(ORR_i, gen_rri_log, a, false, tcg_gen_ori_i64) 4504 TRANS(EOR_i, gen_rri_log, a, false, tcg_gen_xori_i64) 4505 TRANS(ANDS_i, gen_rri_log, a, true, tcg_gen_andi_i64) 4506 4507 /* 4508 * Move wide (immediate) 4509 */ 4510 4511 static bool trans_MOVZ(DisasContext *s, arg_movw *a) 4512 { 4513 int pos = a->hw << 4; 4514 tcg_gen_movi_i64(cpu_reg(s, a->rd), (uint64_t)a->imm << pos); 4515 return true; 4516 } 4517 trans_MOVN(DisasContext * s,arg_movw * a)4518 static bool trans_MOVN(DisasContext *s, arg_movw *a) 4519 { 4520 int pos = a->hw << 4; 4521 uint64_t imm = a->imm; 4522 4523 imm = ~(imm << pos); 4524 if (!a->sf) { 4525 imm = (uint32_t)imm; 4526 } 4527 tcg_gen_movi_i64(cpu_reg(s, a->rd), imm); 4528 return true; 4529 } 4530 trans_MOVK(DisasContext * s,arg_movw * a)4531 static bool trans_MOVK(DisasContext *s, arg_movw *a) 4532 { 4533 int pos = a->hw << 4; 4534 TCGv_i64 tcg_rd, tcg_im; 4535 4536 tcg_rd = cpu_reg(s, a->rd); 4537 tcg_im = tcg_constant_i64(a->imm); 4538 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_im, pos, 16); 4539 if (!a->sf) { 4540 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4541 } 4542 return true; 4543 } 4544 4545 /* 4546 * Bitfield 4547 */ 4548 trans_SBFM(DisasContext * s,arg_SBFM * a)4549 static bool trans_SBFM(DisasContext *s, arg_SBFM *a) 4550 { 4551 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4552 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4553 unsigned int bitsize = a->sf ? 64 : 32; 4554 unsigned int ri = a->immr; 4555 unsigned int si = a->imms; 4556 unsigned int pos, len; 4557 4558 if (si >= ri) { 4559 /* Wd<s-r:0> = Wn<s:r> */ 4560 len = (si - ri) + 1; 4561 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4562 if (!a->sf) { 4563 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4564 } 4565 } else { 4566 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4567 len = si + 1; 4568 pos = (bitsize - ri) & (bitsize - 1); 4569 4570 if (len < ri) { 4571 /* 4572 * Sign extend the destination field from len to fill the 4573 * balance of the word. Let the deposit below insert all 4574 * of those sign bits. 4575 */ 4576 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4577 len = ri; 4578 } 4579 4580 /* 4581 * We start with zero, and we haven't modified any bits outside 4582 * bitsize, therefore no final zero-extension is unneeded for !sf. 4583 */ 4584 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4585 } 4586 return true; 4587 } 4588 trans_UBFM(DisasContext * s,arg_UBFM * a)4589 static bool trans_UBFM(DisasContext *s, arg_UBFM *a) 4590 { 4591 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4592 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4593 unsigned int bitsize = a->sf ? 64 : 32; 4594 unsigned int ri = a->immr; 4595 unsigned int si = a->imms; 4596 unsigned int pos, len; 4597 4598 tcg_rd = cpu_reg(s, a->rd); 4599 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4600 4601 if (si >= ri) { 4602 /* Wd<s-r:0> = Wn<s:r> */ 4603 len = (si - ri) + 1; 4604 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4605 } else { 4606 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4607 len = si + 1; 4608 pos = (bitsize - ri) & (bitsize - 1); 4609 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4610 } 4611 return true; 4612 } 4613 trans_BFM(DisasContext * s,arg_BFM * a)4614 static bool trans_BFM(DisasContext *s, arg_BFM *a) 4615 { 4616 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4617 TCGv_i64 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4618 unsigned int bitsize = a->sf ? 64 : 32; 4619 unsigned int ri = a->immr; 4620 unsigned int si = a->imms; 4621 unsigned int pos, len; 4622 4623 tcg_rd = cpu_reg(s, a->rd); 4624 tcg_tmp = read_cpu_reg(s, a->rn, 1); 4625 4626 if (si >= ri) { 4627 /* Wd<s-r:0> = Wn<s:r> */ 4628 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4629 len = (si - ri) + 1; 4630 pos = 0; 4631 } else { 4632 /* Wd<32+s-r,32-r> = Wn<s:0> */ 4633 len = si + 1; 4634 pos = (bitsize - ri) & (bitsize - 1); 4635 } 4636 4637 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4638 if (!a->sf) { 4639 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4640 } 4641 return true; 4642 } 4643 trans_EXTR(DisasContext * s,arg_extract * a)4644 static bool trans_EXTR(DisasContext *s, arg_extract *a) 4645 { 4646 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4647 4648 tcg_rd = cpu_reg(s, a->rd); 4649 4650 if (unlikely(a->imm == 0)) { 4651 /* 4652 * tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4653 * so an extract from bit 0 is a special case. 4654 */ 4655 if (a->sf) { 4656 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, a->rm)); 4657 } else { 4658 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, a->rm)); 4659 } 4660 } else { 4661 tcg_rm = cpu_reg(s, a->rm); 4662 tcg_rn = cpu_reg(s, a->rn); 4663 4664 if (a->sf) { 4665 /* Specialization to ROR happens in EXTRACT2. */ 4666 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, a->imm); 4667 } else { 4668 TCGv_i32 t0 = tcg_temp_new_i32(); 4669 4670 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4671 if (a->rm == a->rn) { 4672 tcg_gen_rotri_i32(t0, t0, a->imm); 4673 } else { 4674 TCGv_i32 t1 = tcg_temp_new_i32(); 4675 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4676 tcg_gen_extract2_i32(t0, t0, t1, a->imm); 4677 } 4678 tcg_gen_extu_i32_i64(tcg_rd, t0); 4679 } 4680 } 4681 return true; 4682 } 4683 trans_TBL_TBX(DisasContext * s,arg_TBL_TBX * a)4684 static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX *a) 4685 { 4686 if (fp_access_check(s)) { 4687 int len = (a->len + 1) * 16; 4688 4689 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd), 4690 vec_full_reg_offset(s, a->rm), tcg_env, 4691 a->q ? 16 : 8, vec_full_reg_size(s), 4692 (len << 6) | (a->tbx << 5) | a->rn, 4693 gen_helper_simd_tblx); 4694 } 4695 return true; 4696 } 4697 4698 typedef int simd_permute_idx_fn(int i, int part, int elements); 4699 do_simd_permute(DisasContext * s,arg_qrrr_e * a,simd_permute_idx_fn * fn,int part)4700 static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a, 4701 simd_permute_idx_fn *fn, int part) 4702 { 4703 MemOp esz = a->esz; 4704 int datasize = a->q ? 16 : 8; 4705 int elements = datasize >> esz; 4706 TCGv_i64 tcg_res[2], tcg_ele; 4707 4708 if (esz == MO_64 && !a->q) { 4709 return false; 4710 } 4711 if (!fp_access_check(s)) { 4712 return true; 4713 } 4714 4715 tcg_res[0] = tcg_temp_new_i64(); 4716 tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL; 4717 tcg_ele = tcg_temp_new_i64(); 4718 4719 for (int i = 0; i < elements; i++) { 4720 int o, w, idx; 4721 4722 idx = fn(i, part, elements); 4723 read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn), 4724 idx & (elements - 1), esz); 4725 4726 w = (i << (esz + 3)) / 64; 4727 o = (i << (esz + 3)) % 64; 4728 if (o == 0) { 4729 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 4730 } else { 4731 tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz); 4732 } 4733 } 4734 4735 for (int i = a->q; i >= 0; --i) { 4736 write_vec_element(s, tcg_res[i], a->rd, i, MO_64); 4737 } 4738 clear_vec_high(s, a->q, a->rd); 4739 return true; 4740 } 4741 permute_load_uzp(int i,int part,int elements)4742 static int permute_load_uzp(int i, int part, int elements) 4743 { 4744 return 2 * i + part; 4745 } 4746 4747 TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0) 4748 TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1) 4749 permute_load_trn(int i,int part,int elements)4750 static int permute_load_trn(int i, int part, int elements) 4751 { 4752 return (i & 1) * elements + (i & ~1) + part; 4753 } 4754 4755 TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0) 4756 TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1) 4757 permute_load_zip(int i,int part,int elements)4758 static int permute_load_zip(int i, int part, int elements) 4759 { 4760 return (i & 1) * elements + ((part * elements + i) >> 1); 4761 } 4762 4763 TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0) 4764 TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1) 4765 4766 /* 4767 * Cryptographic AES, SHA, SHA512 4768 */ 4769 4770 TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) 4771 TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) 4772 TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) 4773 TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) 4774 4775 TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) 4776 TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) 4777 TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) 4778 TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) 4779 4780 TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) 4781 TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) 4782 TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) 4783 4784 TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) 4785 TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) 4786 TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) 4787 4788 TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) 4789 TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) 4790 TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) TRANS_FEAT(RAX1,aa64_sha3,do_gvec_fn3,a,gen_gvec_rax1)4791 TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) 4792 TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) 4793 TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) 4794 TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) 4795 4796 TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) 4797 TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) 4798 4799 TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) 4800 TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) 4801 4802 static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) 4803 { 4804 if (!dc_isar_feature(aa64_sm3, s)) { 4805 return false; 4806 } 4807 if (fp_access_check(s)) { 4808 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 4809 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 4810 TCGv_i32 tcg_op3 = tcg_temp_new_i32(); 4811 TCGv_i32 tcg_res = tcg_temp_new_i32(); 4812 unsigned vsz, dofs; 4813 4814 read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); 4815 read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); 4816 read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); 4817 4818 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 4819 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 4820 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 4821 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 4822 4823 /* Clear the whole register first, then store bits [127:96]. */ 4824 vsz = vec_full_reg_size(s); 4825 dofs = vec_full_reg_offset(s, a->rd); 4826 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 4827 write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); 4828 } 4829 return true; 4830 } 4831 do_crypto3i(DisasContext * s,arg_crypto3i * a,gen_helper_gvec_3 * fn)4832 static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) 4833 { 4834 if (fp_access_check(s)) { 4835 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); 4836 } 4837 return true; 4838 } TRANS_FEAT(SM3TT1A,aa64_sm3,do_crypto3i,a,gen_helper_crypto_sm3tt1a)4839 TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) 4840 TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) 4841 TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) 4842 TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) 4843 4844 static bool trans_XAR(DisasContext *s, arg_XAR *a) 4845 { 4846 if (!dc_isar_feature(aa64_sha3, s)) { 4847 return false; 4848 } 4849 if (fp_access_check(s)) { 4850 gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), 4851 vec_full_reg_offset(s, a->rn), 4852 vec_full_reg_offset(s, a->rm), a->imm, 16, 4853 vec_full_reg_size(s)); 4854 } 4855 return true; 4856 } 4857 4858 /* 4859 * Advanced SIMD copy 4860 */ 4861 decode_esz_idx(int imm,MemOp * pesz,unsigned * pidx)4862 static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) 4863 { 4864 unsigned esz = ctz32(imm); 4865 if (esz <= MO_64) { 4866 *pesz = esz; 4867 *pidx = imm >> (esz + 1); 4868 return true; 4869 } 4870 return false; 4871 } 4872 trans_DUP_element_s(DisasContext * s,arg_DUP_element_s * a)4873 static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) 4874 { 4875 MemOp esz; 4876 unsigned idx; 4877 4878 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4879 return false; 4880 } 4881 if (fp_access_check(s)) { 4882 /* 4883 * This instruction just extracts the specified element and 4884 * zero-extends it into the bottom of the destination register. 4885 */ 4886 TCGv_i64 tmp = tcg_temp_new_i64(); 4887 read_vec_element(s, tmp, a->rn, idx, esz); 4888 write_fp_dreg(s, a->rd, tmp); 4889 } 4890 return true; 4891 } 4892 trans_DUP_element_v(DisasContext * s,arg_DUP_element_v * a)4893 static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) 4894 { 4895 MemOp esz; 4896 unsigned idx; 4897 4898 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4899 return false; 4900 } 4901 if (esz == MO_64 && !a->q) { 4902 return false; 4903 } 4904 if (fp_access_check(s)) { 4905 tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), 4906 vec_reg_offset(s, a->rn, idx, esz), 4907 a->q ? 16 : 8, vec_full_reg_size(s)); 4908 } 4909 return true; 4910 } 4911 trans_DUP_general(DisasContext * s,arg_DUP_general * a)4912 static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) 4913 { 4914 MemOp esz; 4915 unsigned idx; 4916 4917 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4918 return false; 4919 } 4920 if (esz == MO_64 && !a->q) { 4921 return false; 4922 } 4923 if (fp_access_check(s)) { 4924 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), 4925 a->q ? 16 : 8, vec_full_reg_size(s), 4926 cpu_reg(s, a->rn)); 4927 } 4928 return true; 4929 } 4930 do_smov_umov(DisasContext * s,arg_SMOV * a,MemOp is_signed)4931 static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) 4932 { 4933 MemOp esz; 4934 unsigned idx; 4935 4936 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4937 return false; 4938 } 4939 if (is_signed) { 4940 if (esz == MO_64 || (esz == MO_32 && !a->q)) { 4941 return false; 4942 } 4943 } else { 4944 if (esz == MO_64 ? !a->q : a->q) { 4945 return false; 4946 } 4947 } 4948 if (fp_access_check(s)) { 4949 TCGv_i64 tcg_rd = cpu_reg(s, a->rd); 4950 read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); 4951 if (is_signed && !a->q) { 4952 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4953 } 4954 } 4955 return true; 4956 } 4957 TRANS(SMOV,do_smov_umov,a,MO_SIGN)4958 TRANS(SMOV, do_smov_umov, a, MO_SIGN) 4959 TRANS(UMOV, do_smov_umov, a, 0) 4960 4961 static bool trans_INS_general(DisasContext *s, arg_INS_general *a) 4962 { 4963 MemOp esz; 4964 unsigned idx; 4965 4966 if (!decode_esz_idx(a->imm, &esz, &idx)) { 4967 return false; 4968 } 4969 if (fp_access_check(s)) { 4970 write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); 4971 clear_vec_high(s, true, a->rd); 4972 } 4973 return true; 4974 } 4975 trans_INS_element(DisasContext * s,arg_INS_element * a)4976 static bool trans_INS_element(DisasContext *s, arg_INS_element *a) 4977 { 4978 MemOp esz; 4979 unsigned didx, sidx; 4980 4981 if (!decode_esz_idx(a->di, &esz, &didx)) { 4982 return false; 4983 } 4984 sidx = a->si >> esz; 4985 if (fp_access_check(s)) { 4986 TCGv_i64 tmp = tcg_temp_new_i64(); 4987 4988 read_vec_element(s, tmp, a->rn, sidx, esz); 4989 write_vec_element(s, tmp, a->rd, didx, esz); 4990 4991 /* INS is considered a 128-bit write for SVE. */ 4992 clear_vec_high(s, true, a->rd); 4993 } 4994 return true; 4995 } 4996 4997 /* 4998 * Advanced SIMD three same 4999 */ 5000 5001 typedef struct FPScalar { 5002 void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5003 void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 5004 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); 5005 } FPScalar; 5006 do_fp3_scalar(DisasContext * s,arg_rrr_e * a,const FPScalar * f)5007 static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) 5008 { 5009 switch (a->esz) { 5010 case MO_64: 5011 if (fp_access_check(s)) { 5012 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5013 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5014 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5015 write_fp_dreg(s, a->rd, t0); 5016 } 5017 break; 5018 case MO_32: 5019 if (fp_access_check(s)) { 5020 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 5021 TCGv_i32 t1 = read_fp_sreg(s, a->rm); 5022 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 5023 write_fp_sreg(s, a->rd, t0); 5024 } 5025 break; 5026 case MO_16: 5027 if (!dc_isar_feature(aa64_fp16, s)) { 5028 return false; 5029 } 5030 if (fp_access_check(s)) { 5031 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 5032 TCGv_i32 t1 = read_fp_hreg(s, a->rm); 5033 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 5034 write_fp_sreg(s, a->rd, t0); 5035 } 5036 break; 5037 default: 5038 return false; 5039 } 5040 return true; 5041 } 5042 5043 static const FPScalar f_scalar_fadd = { 5044 gen_helper_vfp_addh, 5045 gen_helper_vfp_adds, 5046 gen_helper_vfp_addd, 5047 }; 5048 TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) 5049 5050 static const FPScalar f_scalar_fsub = { 5051 gen_helper_vfp_subh, 5052 gen_helper_vfp_subs, 5053 gen_helper_vfp_subd, 5054 }; 5055 TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) 5056 5057 static const FPScalar f_scalar_fdiv = { 5058 gen_helper_vfp_divh, 5059 gen_helper_vfp_divs, 5060 gen_helper_vfp_divd, 5061 }; 5062 TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) 5063 5064 static const FPScalar f_scalar_fmul = { 5065 gen_helper_vfp_mulh, 5066 gen_helper_vfp_muls, 5067 gen_helper_vfp_muld, 5068 }; 5069 TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) 5070 5071 static const FPScalar f_scalar_fmax = { 5072 gen_helper_advsimd_maxh, 5073 gen_helper_vfp_maxs, 5074 gen_helper_vfp_maxd, 5075 }; 5076 TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) 5077 5078 static const FPScalar f_scalar_fmin = { 5079 gen_helper_advsimd_minh, 5080 gen_helper_vfp_mins, 5081 gen_helper_vfp_mind, 5082 }; 5083 TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) 5084 5085 static const FPScalar f_scalar_fmaxnm = { 5086 gen_helper_advsimd_maxnumh, 5087 gen_helper_vfp_maxnums, 5088 gen_helper_vfp_maxnumd, 5089 }; 5090 TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) 5091 5092 static const FPScalar f_scalar_fminnm = { 5093 gen_helper_advsimd_minnumh, 5094 gen_helper_vfp_minnums, 5095 gen_helper_vfp_minnumd, 5096 }; 5097 TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) 5098 5099 static const FPScalar f_scalar_fmulx = { 5100 gen_helper_advsimd_mulxh, 5101 gen_helper_vfp_mulxs, 5102 gen_helper_vfp_mulxd, 5103 }; 5104 TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) 5105 gen_fnmul_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5106 static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5107 { 5108 gen_helper_vfp_mulh(d, n, m, s); 5109 gen_vfp_negh(d, d); 5110 } 5111 gen_fnmul_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5112 static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5113 { 5114 gen_helper_vfp_muls(d, n, m, s); 5115 gen_vfp_negs(d, d); 5116 } 5117 gen_fnmul_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5118 static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5119 { 5120 gen_helper_vfp_muld(d, n, m, s); 5121 gen_vfp_negd(d, d); 5122 } 5123 5124 static const FPScalar f_scalar_fnmul = { 5125 gen_fnmul_h, 5126 gen_fnmul_s, 5127 gen_fnmul_d, 5128 }; 5129 TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) 5130 5131 static const FPScalar f_scalar_fcmeq = { 5132 gen_helper_advsimd_ceq_f16, 5133 gen_helper_neon_ceq_f32, 5134 gen_helper_neon_ceq_f64, 5135 }; 5136 TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) 5137 5138 static const FPScalar f_scalar_fcmge = { 5139 gen_helper_advsimd_cge_f16, 5140 gen_helper_neon_cge_f32, 5141 gen_helper_neon_cge_f64, 5142 }; 5143 TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) 5144 5145 static const FPScalar f_scalar_fcmgt = { 5146 gen_helper_advsimd_cgt_f16, 5147 gen_helper_neon_cgt_f32, 5148 gen_helper_neon_cgt_f64, 5149 }; 5150 TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) 5151 5152 static const FPScalar f_scalar_facge = { 5153 gen_helper_advsimd_acge_f16, 5154 gen_helper_neon_acge_f32, 5155 gen_helper_neon_acge_f64, 5156 }; 5157 TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) 5158 5159 static const FPScalar f_scalar_facgt = { 5160 gen_helper_advsimd_acgt_f16, 5161 gen_helper_neon_acgt_f32, 5162 gen_helper_neon_acgt_f64, 5163 }; 5164 TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) 5165 gen_fabd_h(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5166 static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5167 { 5168 gen_helper_vfp_subh(d, n, m, s); 5169 gen_vfp_absh(d, d); 5170 } 5171 gen_fabd_s(TCGv_i32 d,TCGv_i32 n,TCGv_i32 m,TCGv_ptr s)5172 static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) 5173 { 5174 gen_helper_vfp_subs(d, n, m, s); 5175 gen_vfp_abss(d, d); 5176 } 5177 gen_fabd_d(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m,TCGv_ptr s)5178 static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) 5179 { 5180 gen_helper_vfp_subd(d, n, m, s); 5181 gen_vfp_absd(d, d); 5182 } 5183 5184 static const FPScalar f_scalar_fabd = { 5185 gen_fabd_h, 5186 gen_fabd_s, 5187 gen_fabd_d, 5188 }; 5189 TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) 5190 5191 static const FPScalar f_scalar_frecps = { 5192 gen_helper_recpsf_f16, 5193 gen_helper_recpsf_f32, 5194 gen_helper_recpsf_f64, 5195 }; 5196 TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) 5197 5198 static const FPScalar f_scalar_frsqrts = { 5199 gen_helper_rsqrtsf_f16, 5200 gen_helper_rsqrtsf_f32, 5201 gen_helper_rsqrtsf_f64, 5202 }; 5203 TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) 5204 do_satacc_s(DisasContext * s,arg_rrr_e * a,MemOp sgn_n,MemOp sgn_m,void (* gen_bhs)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64,MemOp),void (* gen_d)(TCGv_i64,TCGv_i64,TCGv_i64,TCGv_i64))5205 static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, 5206 MemOp sgn_n, MemOp sgn_m, 5207 void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), 5208 void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) 5209 { 5210 TCGv_i64 t0, t1, t2, qc; 5211 MemOp esz = a->esz; 5212 5213 if (!fp_access_check(s)) { 5214 return true; 5215 } 5216 5217 t0 = tcg_temp_new_i64(); 5218 t1 = tcg_temp_new_i64(); 5219 t2 = tcg_temp_new_i64(); 5220 qc = tcg_temp_new_i64(); 5221 read_vec_element(s, t1, a->rn, 0, esz | sgn_n); 5222 read_vec_element(s, t2, a->rm, 0, esz | sgn_m); 5223 tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5224 5225 if (esz == MO_64) { 5226 gen_d(t0, qc, t1, t2); 5227 } else { 5228 gen_bhs(t0, qc, t1, t2, esz); 5229 tcg_gen_ext_i64(t0, t0, esz); 5230 } 5231 5232 write_fp_dreg(s, a->rd, t0); 5233 tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); 5234 return true; 5235 } 5236 TRANS(SQADD_s,do_satacc_s,a,MO_SIGN,MO_SIGN,gen_sqadd_bhs,gen_sqadd_d)5237 TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) 5238 TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) 5239 TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) 5240 TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) 5241 TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) 5242 TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) 5243 5244 static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, 5245 void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) 5246 { 5247 if (fp_access_check(s)) { 5248 TCGv_i64 t0 = tcg_temp_new_i64(); 5249 TCGv_i64 t1 = tcg_temp_new_i64(); 5250 5251 read_vec_element(s, t0, a->rn, 0, MO_64); 5252 read_vec_element(s, t1, a->rm, 0, MO_64); 5253 fn(t0, t0, t1); 5254 write_fp_dreg(s, a->rd, t0); 5255 } 5256 return true; 5257 } 5258 5259 TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) 5260 TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) 5261 TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) 5262 TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) 5263 TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) 5264 TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) 5265 5266 typedef struct ENVScalar2 { 5267 NeonGenTwoOpEnvFn *gen_bhs[3]; 5268 NeonGenTwo64OpEnvFn *gen_d; 5269 } ENVScalar2; 5270 do_env_scalar2(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5271 static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) 5272 { 5273 if (!fp_access_check(s)) { 5274 return true; 5275 } 5276 if (a->esz == MO_64) { 5277 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5278 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5279 f->gen_d(t0, tcg_env, t0, t1); 5280 write_fp_dreg(s, a->rd, t0); 5281 } else { 5282 TCGv_i32 t0 = tcg_temp_new_i32(); 5283 TCGv_i32 t1 = tcg_temp_new_i32(); 5284 5285 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5286 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5287 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 5288 write_fp_sreg(s, a->rd, t0); 5289 } 5290 return true; 5291 } 5292 5293 static const ENVScalar2 f_scalar_sqshl = { 5294 { gen_helper_neon_qshl_s8, 5295 gen_helper_neon_qshl_s16, 5296 gen_helper_neon_qshl_s32 }, 5297 gen_helper_neon_qshl_s64, 5298 }; 5299 TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) 5300 5301 static const ENVScalar2 f_scalar_uqshl = { 5302 { gen_helper_neon_qshl_u8, 5303 gen_helper_neon_qshl_u16, 5304 gen_helper_neon_qshl_u32 }, 5305 gen_helper_neon_qshl_u64, 5306 }; 5307 TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) 5308 5309 static const ENVScalar2 f_scalar_sqrshl = { 5310 { gen_helper_neon_qrshl_s8, 5311 gen_helper_neon_qrshl_s16, 5312 gen_helper_neon_qrshl_s32 }, 5313 gen_helper_neon_qrshl_s64, 5314 }; 5315 TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) 5316 5317 static const ENVScalar2 f_scalar_uqrshl = { 5318 { gen_helper_neon_qrshl_u8, 5319 gen_helper_neon_qrshl_u16, 5320 gen_helper_neon_qrshl_u32 }, 5321 gen_helper_neon_qrshl_u64, 5322 }; 5323 TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) 5324 do_env_scalar2_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar2 * f)5325 static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, 5326 const ENVScalar2 *f) 5327 { 5328 if (a->esz == MO_16 || a->esz == MO_32) { 5329 return do_env_scalar2(s, a, f); 5330 } 5331 return false; 5332 } 5333 5334 static const ENVScalar2 f_scalar_sqdmulh = { 5335 { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } 5336 }; 5337 TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) 5338 5339 static const ENVScalar2 f_scalar_sqrdmulh = { 5340 { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } 5341 }; 5342 TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) 5343 5344 typedef struct ENVScalar3 { 5345 NeonGenThreeOpEnvFn *gen_hs[2]; 5346 } ENVScalar3; 5347 do_env_scalar3_hs(DisasContext * s,arg_rrr_e * a,const ENVScalar3 * f)5348 static bool do_env_scalar3_hs(DisasContext *s, arg_rrr_e *a, 5349 const ENVScalar3 *f) 5350 { 5351 TCGv_i32 t0, t1, t2; 5352 5353 if (a->esz != MO_16 && a->esz != MO_32) { 5354 return false; 5355 } 5356 if (!fp_access_check(s)) { 5357 return true; 5358 } 5359 5360 t0 = tcg_temp_new_i32(); 5361 t1 = tcg_temp_new_i32(); 5362 t2 = tcg_temp_new_i32(); 5363 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 5364 read_vec_element_i32(s, t1, a->rm, 0, a->esz); 5365 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 5366 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 5367 write_fp_sreg(s, a->rd, t0); 5368 return true; 5369 } 5370 5371 static const ENVScalar3 f_scalar_sqrdmlah = { 5372 { gen_helper_neon_qrdmlah_s16, gen_helper_neon_qrdmlah_s32 } 5373 }; 5374 TRANS_FEAT(SQRDMLAH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlah) 5375 5376 static const ENVScalar3 f_scalar_sqrdmlsh = { 5377 { gen_helper_neon_qrdmlsh_s16, gen_helper_neon_qrdmlsh_s32 } 5378 }; 5379 TRANS_FEAT(SQRDMLSH_s, aa64_rdm, do_env_scalar3_hs, a, &f_scalar_sqrdmlsh) 5380 do_cmop_d(DisasContext * s,arg_rrr_e * a,TCGCond cond)5381 static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) 5382 { 5383 if (fp_access_check(s)) { 5384 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 5385 TCGv_i64 t1 = read_fp_dreg(s, a->rm); 5386 tcg_gen_negsetcond_i64(cond, t0, t0, t1); 5387 write_fp_dreg(s, a->rd, t0); 5388 } 5389 return true; 5390 } 5391 TRANS(CMGT_s,do_cmop_d,a,TCG_COND_GT)5392 TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) 5393 TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) 5394 TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) 5395 TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) 5396 TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) 5397 TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) 5398 5399 static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data, 5400 gen_helper_gvec_3_ptr * const fns[3]) 5401 { 5402 MemOp esz = a->esz; 5403 5404 switch (esz) { 5405 case MO_64: 5406 if (!a->q) { 5407 return false; 5408 } 5409 break; 5410 case MO_32: 5411 break; 5412 case MO_16: 5413 if (!dc_isar_feature(aa64_fp16, s)) { 5414 return false; 5415 } 5416 break; 5417 default: 5418 return false; 5419 } 5420 if (fp_access_check(s)) { 5421 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 5422 esz == MO_16, data, fns[esz - 1]); 5423 } 5424 return true; 5425 } 5426 5427 static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { 5428 gen_helper_gvec_fadd_h, 5429 gen_helper_gvec_fadd_s, 5430 gen_helper_gvec_fadd_d, 5431 }; 5432 TRANS(FADD_v, do_fp3_vector, a, 0, f_vector_fadd) 5433 5434 static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { 5435 gen_helper_gvec_fsub_h, 5436 gen_helper_gvec_fsub_s, 5437 gen_helper_gvec_fsub_d, 5438 }; 5439 TRANS(FSUB_v, do_fp3_vector, a, 0, f_vector_fsub) 5440 5441 static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { 5442 gen_helper_gvec_fdiv_h, 5443 gen_helper_gvec_fdiv_s, 5444 gen_helper_gvec_fdiv_d, 5445 }; 5446 TRANS(FDIV_v, do_fp3_vector, a, 0, f_vector_fdiv) 5447 5448 static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { 5449 gen_helper_gvec_fmul_h, 5450 gen_helper_gvec_fmul_s, 5451 gen_helper_gvec_fmul_d, 5452 }; 5453 TRANS(FMUL_v, do_fp3_vector, a, 0, f_vector_fmul) 5454 5455 static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { 5456 gen_helper_gvec_fmax_h, 5457 gen_helper_gvec_fmax_s, 5458 gen_helper_gvec_fmax_d, 5459 }; 5460 TRANS(FMAX_v, do_fp3_vector, a, 0, f_vector_fmax) 5461 5462 static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { 5463 gen_helper_gvec_fmin_h, 5464 gen_helper_gvec_fmin_s, 5465 gen_helper_gvec_fmin_d, 5466 }; 5467 TRANS(FMIN_v, do_fp3_vector, a, 0, f_vector_fmin) 5468 5469 static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { 5470 gen_helper_gvec_fmaxnum_h, 5471 gen_helper_gvec_fmaxnum_s, 5472 gen_helper_gvec_fmaxnum_d, 5473 }; 5474 TRANS(FMAXNM_v, do_fp3_vector, a, 0, f_vector_fmaxnm) 5475 5476 static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { 5477 gen_helper_gvec_fminnum_h, 5478 gen_helper_gvec_fminnum_s, 5479 gen_helper_gvec_fminnum_d, 5480 }; 5481 TRANS(FMINNM_v, do_fp3_vector, a, 0, f_vector_fminnm) 5482 5483 static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { 5484 gen_helper_gvec_fmulx_h, 5485 gen_helper_gvec_fmulx_s, 5486 gen_helper_gvec_fmulx_d, 5487 }; 5488 TRANS(FMULX_v, do_fp3_vector, a, 0, f_vector_fmulx) 5489 5490 static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { 5491 gen_helper_gvec_vfma_h, 5492 gen_helper_gvec_vfma_s, 5493 gen_helper_gvec_vfma_d, 5494 }; 5495 TRANS(FMLA_v, do_fp3_vector, a, 0, f_vector_fmla) 5496 5497 static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { 5498 gen_helper_gvec_vfms_h, 5499 gen_helper_gvec_vfms_s, 5500 gen_helper_gvec_vfms_d, 5501 }; 5502 TRANS(FMLS_v, do_fp3_vector, a, 0, f_vector_fmls) 5503 5504 static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { 5505 gen_helper_gvec_fceq_h, 5506 gen_helper_gvec_fceq_s, 5507 gen_helper_gvec_fceq_d, 5508 }; 5509 TRANS(FCMEQ_v, do_fp3_vector, a, 0, f_vector_fcmeq) 5510 5511 static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { 5512 gen_helper_gvec_fcge_h, 5513 gen_helper_gvec_fcge_s, 5514 gen_helper_gvec_fcge_d, 5515 }; 5516 TRANS(FCMGE_v, do_fp3_vector, a, 0, f_vector_fcmge) 5517 5518 static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { 5519 gen_helper_gvec_fcgt_h, 5520 gen_helper_gvec_fcgt_s, 5521 gen_helper_gvec_fcgt_d, 5522 }; 5523 TRANS(FCMGT_v, do_fp3_vector, a, 0, f_vector_fcmgt) 5524 5525 static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { 5526 gen_helper_gvec_facge_h, 5527 gen_helper_gvec_facge_s, 5528 gen_helper_gvec_facge_d, 5529 }; 5530 TRANS(FACGE_v, do_fp3_vector, a, 0, f_vector_facge) 5531 5532 static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { 5533 gen_helper_gvec_facgt_h, 5534 gen_helper_gvec_facgt_s, 5535 gen_helper_gvec_facgt_d, 5536 }; 5537 TRANS(FACGT_v, do_fp3_vector, a, 0, f_vector_facgt) 5538 5539 static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { 5540 gen_helper_gvec_fabd_h, 5541 gen_helper_gvec_fabd_s, 5542 gen_helper_gvec_fabd_d, 5543 }; 5544 TRANS(FABD_v, do_fp3_vector, a, 0, f_vector_fabd) 5545 5546 static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { 5547 gen_helper_gvec_recps_h, 5548 gen_helper_gvec_recps_s, 5549 gen_helper_gvec_recps_d, 5550 }; 5551 TRANS(FRECPS_v, do_fp3_vector, a, 0, f_vector_frecps) 5552 5553 static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { 5554 gen_helper_gvec_rsqrts_h, 5555 gen_helper_gvec_rsqrts_s, 5556 gen_helper_gvec_rsqrts_d, 5557 }; 5558 TRANS(FRSQRTS_v, do_fp3_vector, a, 0, f_vector_frsqrts) 5559 5560 static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { 5561 gen_helper_gvec_faddp_h, 5562 gen_helper_gvec_faddp_s, 5563 gen_helper_gvec_faddp_d, 5564 }; 5565 TRANS(FADDP_v, do_fp3_vector, a, 0, f_vector_faddp) 5566 5567 static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { 5568 gen_helper_gvec_fmaxp_h, 5569 gen_helper_gvec_fmaxp_s, 5570 gen_helper_gvec_fmaxp_d, 5571 }; 5572 TRANS(FMAXP_v, do_fp3_vector, a, 0, f_vector_fmaxp) 5573 5574 static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { 5575 gen_helper_gvec_fminp_h, 5576 gen_helper_gvec_fminp_s, 5577 gen_helper_gvec_fminp_d, 5578 }; 5579 TRANS(FMINP_v, do_fp3_vector, a, 0, f_vector_fminp) 5580 5581 static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { 5582 gen_helper_gvec_fmaxnump_h, 5583 gen_helper_gvec_fmaxnump_s, 5584 gen_helper_gvec_fmaxnump_d, 5585 }; 5586 TRANS(FMAXNMP_v, do_fp3_vector, a, 0, f_vector_fmaxnmp) 5587 5588 static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { 5589 gen_helper_gvec_fminnump_h, 5590 gen_helper_gvec_fminnump_s, 5591 gen_helper_gvec_fminnump_d, 5592 }; 5593 TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp) 5594 do_fmlal(DisasContext * s,arg_qrrr_e * a,bool is_s,bool is_2)5595 static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) 5596 { 5597 if (fp_access_check(s)) { 5598 int data = (is_2 << 1) | is_s; 5599 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 5600 vec_full_reg_offset(s, a->rn), 5601 vec_full_reg_offset(s, a->rm), tcg_env, 5602 a->q ? 16 : 8, vec_full_reg_size(s), 5603 data, gen_helper_gvec_fmlal_a64); 5604 } 5605 return true; 5606 } 5607 TRANS_FEAT(FMLAL_v,aa64_fhm,do_fmlal,a,false,false)5608 TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) 5609 TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) 5610 TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) 5611 TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) 5612 5613 TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) 5614 TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) 5615 TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) 5616 TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) 5617 TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) 5618 5619 TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) 5620 TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) 5621 TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) 5622 TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) 5623 TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) 5624 5625 static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) 5626 { 5627 if (fp_access_check(s)) { 5628 gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); 5629 } 5630 return true; 5631 } 5632 5633 TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) 5634 TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) 5635 TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) 5636 TRANS(SQADD_v,do_gvec_fn3,a,gen_gvec_sqadd_qc)5637 TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) 5638 TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) 5639 TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) 5640 TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) 5641 TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) 5642 TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) 5643 5644 TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) 5645 TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) 5646 TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) 5647 TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) 5648 TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) 5649 TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) 5650 TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) 5651 TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) 5652 5653 TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) 5654 TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) 5655 TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) 5656 TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) 5657 TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) 5658 TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) 5659 TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) 5660 TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) 5661 TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) 5662 TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) 5663 TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) 5664 TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) 5665 TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) 5666 TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) 5667 TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) 5668 TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) 5669 TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) 5670 TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) 5671 TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) 5672 TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) 5673 5674 static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) 5675 { 5676 if (a->esz == MO_64 && !a->q) { 5677 return false; 5678 } 5679 if (fp_access_check(s)) { 5680 tcg_gen_gvec_cmp(cond, a->esz, 5681 vec_full_reg_offset(s, a->rd), 5682 vec_full_reg_offset(s, a->rn), 5683 vec_full_reg_offset(s, a->rm), 5684 a->q ? 16 : 8, vec_full_reg_size(s)); 5685 } 5686 return true; 5687 } 5688 TRANS(CMGT_v,do_cmop_v,a,TCG_COND_GT)5689 TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) 5690 TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) 5691 TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) 5692 TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) 5693 TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) 5694 TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) 5695 5696 TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) 5697 TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) 5698 TRANS_FEAT(SQRDMLAH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlah_qc) 5699 TRANS_FEAT(SQRDMLSH_v, aa64_rdm, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmlsh_qc) 5700 5701 static bool do_dot_vector(DisasContext *s, arg_qrrr_e *a, 5702 gen_helper_gvec_4 *fn) 5703 { 5704 if (fp_access_check(s)) { 5705 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5706 } 5707 return true; 5708 } 5709 do_dot_vector_env(DisasContext * s,arg_qrrr_e * a,gen_helper_gvec_4_ptr * fn)5710 static bool do_dot_vector_env(DisasContext *s, arg_qrrr_e *a, 5711 gen_helper_gvec_4_ptr *fn) 5712 { 5713 if (fp_access_check(s)) { 5714 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, 0, fn); 5715 } 5716 return true; 5717 } 5718 TRANS_FEAT(SDOT_v,aa64_dp,do_dot_vector,a,gen_helper_gvec_sdot_b)5719 TRANS_FEAT(SDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_sdot_b) 5720 TRANS_FEAT(UDOT_v, aa64_dp, do_dot_vector, a, gen_helper_gvec_udot_b) 5721 TRANS_FEAT(USDOT_v, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usdot_b) 5722 TRANS_FEAT(BFDOT_v, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfdot) 5723 TRANS_FEAT(BFMMLA, aa64_bf16, do_dot_vector_env, a, gen_helper_gvec_bfmmla) 5724 TRANS_FEAT(SMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_smmla_b) 5725 TRANS_FEAT(UMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_ummla_b) 5726 TRANS_FEAT(USMMLA, aa64_i8mm, do_dot_vector, a, gen_helper_gvec_usmmla_b) 5727 5728 static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a) 5729 { 5730 if (!dc_isar_feature(aa64_bf16, s)) { 5731 return false; 5732 } 5733 if (fp_access_check(s)) { 5734 /* Q bit selects BFMLALB vs BFMLALT. */ 5735 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, false, a->q, 5736 gen_helper_gvec_bfmlal); 5737 } 5738 return true; 5739 } 5740 5741 static gen_helper_gvec_3_ptr * const f_vector_fcadd[3] = { 5742 gen_helper_gvec_fcaddh, 5743 gen_helper_gvec_fcadds, 5744 gen_helper_gvec_fcaddd, 5745 }; 5746 TRANS_FEAT(FCADD_90, aa64_fcma, do_fp3_vector, a, 0, f_vector_fcadd) 5747 TRANS_FEAT(FCADD_270, aa64_fcma, do_fp3_vector, a, 1, f_vector_fcadd) 5748 trans_FCMLA_v(DisasContext * s,arg_FCMLA_v * a)5749 static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a) 5750 { 5751 gen_helper_gvec_4_ptr *fn; 5752 5753 if (!dc_isar_feature(aa64_fcma, s)) { 5754 return false; 5755 } 5756 switch (a->esz) { 5757 case MO_64: 5758 if (!a->q) { 5759 return false; 5760 } 5761 fn = gen_helper_gvec_fcmlad; 5762 break; 5763 case MO_32: 5764 fn = gen_helper_gvec_fcmlas; 5765 break; 5766 case MO_16: 5767 if (!dc_isar_feature(aa64_fp16, s)) { 5768 return false; 5769 } 5770 fn = gen_helper_gvec_fcmlah; 5771 break; 5772 default: 5773 return false; 5774 } 5775 if (fp_access_check(s)) { 5776 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 5777 a->esz == MO_16, a->rot, fn); 5778 } 5779 return true; 5780 } 5781 5782 /* 5783 * Widening vector x vector/indexed. 5784 * 5785 * These read from the top or bottom half of a 128-bit vector. 5786 * After widening, optionally accumulate with a 128-bit vector. 5787 * Implement these inline, as the number of elements are limited 5788 * and the related SVE and SME operations on larger vectors use 5789 * even/odd elements instead of top/bottom half. 5790 * 5791 * If idx >= 0, operand 2 is indexed, otherwise vector. 5792 * If acc, operand 0 is loaded with rd. 5793 */ 5794 5795 /* For low half, iterating up. */ do_3op_widening(DisasContext * s,MemOp memop,int top,int rd,int rn,int rm,int idx,NeonGenTwo64OpFn * fn,bool acc)5796 static bool do_3op_widening(DisasContext *s, MemOp memop, int top, 5797 int rd, int rn, int rm, int idx, 5798 NeonGenTwo64OpFn *fn, bool acc) 5799 { 5800 TCGv_i64 tcg_op0 = tcg_temp_new_i64(); 5801 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 5802 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 5803 MemOp esz = memop & MO_SIZE; 5804 int half = 8 >> esz; 5805 int top_swap, top_half; 5806 5807 /* There are no 64x64->128 bit operations. */ 5808 if (esz >= MO_64) { 5809 return false; 5810 } 5811 if (!fp_access_check(s)) { 5812 return true; 5813 } 5814 5815 if (idx >= 0) { 5816 read_vec_element(s, tcg_op2, rm, idx, memop); 5817 } 5818 5819 /* 5820 * For top half inputs, iterate forward; backward for bottom half. 5821 * This means the store to the destination will not occur until 5822 * overlapping input inputs are consumed. 5823 * Use top_swap to conditionally invert the forward iteration index. 5824 */ 5825 top_swap = top ? 0 : half - 1; 5826 top_half = top ? half : 0; 5827 5828 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 5829 int elt = elt_fwd ^ top_swap; 5830 5831 read_vec_element(s, tcg_op1, rn, elt + top_half, memop); 5832 if (idx < 0) { 5833 read_vec_element(s, tcg_op2, rm, elt + top_half, memop); 5834 } 5835 if (acc) { 5836 read_vec_element(s, tcg_op0, rd, elt, memop + 1); 5837 } 5838 fn(tcg_op0, tcg_op1, tcg_op2); 5839 write_vec_element(s, tcg_op0, rd, elt, esz + 1); 5840 } 5841 clear_vec_high(s, 1, rd); 5842 return true; 5843 } 5844 gen_muladd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5845 static void gen_muladd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5846 { 5847 TCGv_i64 t = tcg_temp_new_i64(); 5848 tcg_gen_mul_i64(t, n, m); 5849 tcg_gen_add_i64(d, d, t); 5850 } 5851 gen_mulsub_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5852 static void gen_mulsub_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5853 { 5854 TCGv_i64 t = tcg_temp_new_i64(); 5855 tcg_gen_mul_i64(t, n, m); 5856 tcg_gen_sub_i64(d, d, t); 5857 } 5858 5859 TRANS(SMULL_v, do_3op_widening, 5860 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5861 tcg_gen_mul_i64, false) 5862 TRANS(UMULL_v, do_3op_widening, 5863 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5864 tcg_gen_mul_i64, false) 5865 TRANS(SMLAL_v, do_3op_widening, 5866 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5867 gen_muladd_i64, true) 5868 TRANS(UMLAL_v, do_3op_widening, 5869 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5870 gen_muladd_i64, true) 5871 TRANS(SMLSL_v, do_3op_widening, 5872 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5873 gen_mulsub_i64, true) 5874 TRANS(UMLSL_v, do_3op_widening, 5875 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5876 gen_mulsub_i64, true) 5877 5878 TRANS(SMULL_vi, do_3op_widening, 5879 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5880 tcg_gen_mul_i64, false) 5881 TRANS(UMULL_vi, do_3op_widening, 5882 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5883 tcg_gen_mul_i64, false) 5884 TRANS(SMLAL_vi, do_3op_widening, 5885 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5886 gen_muladd_i64, true) 5887 TRANS(UMLAL_vi, do_3op_widening, 5888 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5889 gen_muladd_i64, true) 5890 TRANS(SMLSL_vi, do_3op_widening, 5891 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 5892 gen_mulsub_i64, true) 5893 TRANS(UMLSL_vi, do_3op_widening, 5894 a->esz, a->q, a->rd, a->rn, a->rm, a->idx, 5895 gen_mulsub_i64, true) 5896 gen_sabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5897 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5898 { 5899 TCGv_i64 t1 = tcg_temp_new_i64(); 5900 TCGv_i64 t2 = tcg_temp_new_i64(); 5901 5902 tcg_gen_sub_i64(t1, n, m); 5903 tcg_gen_sub_i64(t2, m, n); 5904 tcg_gen_movcond_i64(TCG_COND_GE, d, n, m, t1, t2); 5905 } 5906 gen_uabd_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5907 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5908 { 5909 TCGv_i64 t1 = tcg_temp_new_i64(); 5910 TCGv_i64 t2 = tcg_temp_new_i64(); 5911 5912 tcg_gen_sub_i64(t1, n, m); 5913 tcg_gen_sub_i64(t2, m, n); 5914 tcg_gen_movcond_i64(TCG_COND_GEU, d, n, m, t1, t2); 5915 } 5916 gen_saba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5917 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5918 { 5919 TCGv_i64 t = tcg_temp_new_i64(); 5920 gen_sabd_i64(t, n, m); 5921 tcg_gen_add_i64(d, d, t); 5922 } 5923 gen_uaba_i64(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5924 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5925 { 5926 TCGv_i64 t = tcg_temp_new_i64(); 5927 gen_uabd_i64(t, n, m); 5928 tcg_gen_add_i64(d, d, t); 5929 } 5930 5931 TRANS(SADDL_v, do_3op_widening, 5932 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5933 tcg_gen_add_i64, false) 5934 TRANS(UADDL_v, do_3op_widening, 5935 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5936 tcg_gen_add_i64, false) 5937 TRANS(SSUBL_v, do_3op_widening, 5938 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5939 tcg_gen_sub_i64, false) 5940 TRANS(USUBL_v, do_3op_widening, 5941 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5942 tcg_gen_sub_i64, false) 5943 TRANS(SABDL_v, do_3op_widening, 5944 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5945 gen_sabd_i64, false) 5946 TRANS(UABDL_v, do_3op_widening, 5947 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5948 gen_uabd_i64, false) 5949 TRANS(SABAL_v, do_3op_widening, 5950 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 5951 gen_saba_i64, true) 5952 TRANS(UABAL_v, do_3op_widening, 5953 a->esz, a->q, a->rd, a->rn, a->rm, -1, 5954 gen_uaba_i64, true) 5955 gen_sqdmull_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5956 static void gen_sqdmull_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5957 { 5958 tcg_gen_mul_i64(d, n, m); 5959 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, d); 5960 } 5961 gen_sqdmull_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5962 static void gen_sqdmull_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5963 { 5964 tcg_gen_mul_i64(d, n, m); 5965 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, d); 5966 } 5967 gen_sqdmlal_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5968 static void gen_sqdmlal_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5969 { 5970 TCGv_i64 t = tcg_temp_new_i64(); 5971 5972 tcg_gen_mul_i64(t, n, m); 5973 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 5974 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 5975 } 5976 gen_sqdmlal_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5977 static void gen_sqdmlal_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5978 { 5979 TCGv_i64 t = tcg_temp_new_i64(); 5980 5981 tcg_gen_mul_i64(t, n, m); 5982 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 5983 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 5984 } 5985 gen_sqdmlsl_h(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5986 static void gen_sqdmlsl_h(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5987 { 5988 TCGv_i64 t = tcg_temp_new_i64(); 5989 5990 tcg_gen_mul_i64(t, n, m); 5991 gen_helper_neon_addl_saturate_s32(t, tcg_env, t, t); 5992 tcg_gen_neg_i64(t, t); 5993 gen_helper_neon_addl_saturate_s32(d, tcg_env, d, t); 5994 } 5995 gen_sqdmlsl_s(TCGv_i64 d,TCGv_i64 n,TCGv_i64 m)5996 static void gen_sqdmlsl_s(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 5997 { 5998 TCGv_i64 t = tcg_temp_new_i64(); 5999 6000 tcg_gen_mul_i64(t, n, m); 6001 gen_helper_neon_addl_saturate_s64(t, tcg_env, t, t); 6002 tcg_gen_neg_i64(t, t); 6003 gen_helper_neon_addl_saturate_s64(d, tcg_env, d, t); 6004 } 6005 6006 TRANS(SQDMULL_v, do_3op_widening, 6007 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6008 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6009 TRANS(SQDMLAL_v, do_3op_widening, 6010 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6011 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6012 TRANS(SQDMLSL_v, do_3op_widening, 6013 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, -1, 6014 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6015 6016 TRANS(SQDMULL_vi, do_3op_widening, 6017 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6018 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6019 TRANS(SQDMLAL_vi, do_3op_widening, 6020 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6021 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6022 TRANS(SQDMLSL_vi, do_3op_widening, 6023 a->esz | MO_SIGN, a->q, a->rd, a->rn, a->rm, a->idx, 6024 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6025 do_addsub_wide(DisasContext * s,arg_qrrr_e * a,MemOp sign,bool sub)6026 static bool do_addsub_wide(DisasContext *s, arg_qrrr_e *a, 6027 MemOp sign, bool sub) 6028 { 6029 TCGv_i64 tcg_op0, tcg_op1; 6030 MemOp esz = a->esz; 6031 int half = 8 >> esz; 6032 bool top = a->q; 6033 int top_swap = top ? 0 : half - 1; 6034 int top_half = top ? half : 0; 6035 6036 /* There are no 64x64->128 bit operations. */ 6037 if (esz >= MO_64) { 6038 return false; 6039 } 6040 if (!fp_access_check(s)) { 6041 return true; 6042 } 6043 tcg_op0 = tcg_temp_new_i64(); 6044 tcg_op1 = tcg_temp_new_i64(); 6045 6046 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6047 int elt = elt_fwd ^ top_swap; 6048 6049 read_vec_element(s, tcg_op1, a->rm, elt + top_half, esz | sign); 6050 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6051 if (sub) { 6052 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6053 } else { 6054 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6055 } 6056 write_vec_element(s, tcg_op0, a->rd, elt, esz + 1); 6057 } 6058 clear_vec_high(s, 1, a->rd); 6059 return true; 6060 } 6061 TRANS(SADDW,do_addsub_wide,a,MO_SIGN,false)6062 TRANS(SADDW, do_addsub_wide, a, MO_SIGN, false) 6063 TRANS(UADDW, do_addsub_wide, a, 0, false) 6064 TRANS(SSUBW, do_addsub_wide, a, MO_SIGN, true) 6065 TRANS(USUBW, do_addsub_wide, a, 0, true) 6066 6067 static bool do_addsub_highnarrow(DisasContext *s, arg_qrrr_e *a, 6068 bool sub, bool round) 6069 { 6070 TCGv_i64 tcg_op0, tcg_op1; 6071 MemOp esz = a->esz; 6072 int half = 8 >> esz; 6073 bool top = a->q; 6074 int ebits = 8 << esz; 6075 uint64_t rbit = 1ull << (ebits - 1); 6076 int top_swap, top_half; 6077 6078 /* There are no 128x128->64 bit operations. */ 6079 if (esz >= MO_64) { 6080 return false; 6081 } 6082 if (!fp_access_check(s)) { 6083 return true; 6084 } 6085 tcg_op0 = tcg_temp_new_i64(); 6086 tcg_op1 = tcg_temp_new_i64(); 6087 6088 /* 6089 * For top half inputs, iterate backward; forward for bottom half. 6090 * This means the store to the destination will not occur until 6091 * overlapping input inputs are consumed. 6092 */ 6093 top_swap = top ? half - 1 : 0; 6094 top_half = top ? half : 0; 6095 6096 for (int elt_fwd = 0; elt_fwd < half; ++elt_fwd) { 6097 int elt = elt_fwd ^ top_swap; 6098 6099 read_vec_element(s, tcg_op1, a->rm, elt, esz + 1); 6100 read_vec_element(s, tcg_op0, a->rn, elt, esz + 1); 6101 if (sub) { 6102 tcg_gen_sub_i64(tcg_op0, tcg_op0, tcg_op1); 6103 } else { 6104 tcg_gen_add_i64(tcg_op0, tcg_op0, tcg_op1); 6105 } 6106 if (round) { 6107 tcg_gen_addi_i64(tcg_op0, tcg_op0, rbit); 6108 } 6109 tcg_gen_shri_i64(tcg_op0, tcg_op0, ebits); 6110 write_vec_element(s, tcg_op0, a->rd, elt + top_half, esz); 6111 } 6112 clear_vec_high(s, top, a->rd); 6113 return true; 6114 } 6115 TRANS(ADDHN,do_addsub_highnarrow,a,false,false)6116 TRANS(ADDHN, do_addsub_highnarrow, a, false, false) 6117 TRANS(SUBHN, do_addsub_highnarrow, a, true, false) 6118 TRANS(RADDHN, do_addsub_highnarrow, a, false, true) 6119 TRANS(RSUBHN, do_addsub_highnarrow, a, true, true) 6120 6121 static bool do_pmull(DisasContext *s, arg_qrrr_e *a, gen_helper_gvec_3 *fn) 6122 { 6123 if (fp_access_check(s)) { 6124 /* The Q field specifies lo/hi half input for these insns. */ 6125 gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->q, fn); 6126 } 6127 return true; 6128 } 6129 TRANS(PMULL_p8,do_pmull,a,gen_helper_neon_pmull_h)6130 TRANS(PMULL_p8, do_pmull, a, gen_helper_neon_pmull_h) 6131 TRANS_FEAT(PMULL_p64, aa64_pmull, do_pmull, a, gen_helper_gvec_pmull_q) 6132 6133 /* 6134 * Advanced SIMD scalar/vector x indexed element 6135 */ 6136 6137 static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) 6138 { 6139 switch (a->esz) { 6140 case MO_64: 6141 if (fp_access_check(s)) { 6142 TCGv_i64 t0 = read_fp_dreg(s, a->rn); 6143 TCGv_i64 t1 = tcg_temp_new_i64(); 6144 6145 read_vec_element(s, t1, a->rm, a->idx, MO_64); 6146 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6147 write_fp_dreg(s, a->rd, t0); 6148 } 6149 break; 6150 case MO_32: 6151 if (fp_access_check(s)) { 6152 TCGv_i32 t0 = read_fp_sreg(s, a->rn); 6153 TCGv_i32 t1 = tcg_temp_new_i32(); 6154 6155 read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); 6156 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6157 write_fp_sreg(s, a->rd, t0); 6158 } 6159 break; 6160 case MO_16: 6161 if (!dc_isar_feature(aa64_fp16, s)) { 6162 return false; 6163 } 6164 if (fp_access_check(s)) { 6165 TCGv_i32 t0 = read_fp_hreg(s, a->rn); 6166 TCGv_i32 t1 = tcg_temp_new_i32(); 6167 6168 read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); 6169 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6170 write_fp_sreg(s, a->rd, t0); 6171 } 6172 break; 6173 default: 6174 g_assert_not_reached(); 6175 } 6176 return true; 6177 } 6178 6179 TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) 6180 TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) 6181 do_fmla_scalar_idx(DisasContext * s,arg_rrx_e * a,bool neg)6182 static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) 6183 { 6184 switch (a->esz) { 6185 case MO_64: 6186 if (fp_access_check(s)) { 6187 TCGv_i64 t0 = read_fp_dreg(s, a->rd); 6188 TCGv_i64 t1 = read_fp_dreg(s, a->rn); 6189 TCGv_i64 t2 = tcg_temp_new_i64(); 6190 6191 read_vec_element(s, t2, a->rm, a->idx, MO_64); 6192 if (neg) { 6193 gen_vfp_negd(t1, t1); 6194 } 6195 gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6196 write_fp_dreg(s, a->rd, t0); 6197 } 6198 break; 6199 case MO_32: 6200 if (fp_access_check(s)) { 6201 TCGv_i32 t0 = read_fp_sreg(s, a->rd); 6202 TCGv_i32 t1 = read_fp_sreg(s, a->rn); 6203 TCGv_i32 t2 = tcg_temp_new_i32(); 6204 6205 read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); 6206 if (neg) { 6207 gen_vfp_negs(t1, t1); 6208 } 6209 gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); 6210 write_fp_sreg(s, a->rd, t0); 6211 } 6212 break; 6213 case MO_16: 6214 if (!dc_isar_feature(aa64_fp16, s)) { 6215 return false; 6216 } 6217 if (fp_access_check(s)) { 6218 TCGv_i32 t0 = read_fp_hreg(s, a->rd); 6219 TCGv_i32 t1 = read_fp_hreg(s, a->rn); 6220 TCGv_i32 t2 = tcg_temp_new_i32(); 6221 6222 read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); 6223 if (neg) { 6224 gen_vfp_negh(t1, t1); 6225 } 6226 gen_helper_advsimd_muladdh(t0, t1, t2, t0, 6227 fpstatus_ptr(FPST_FPCR_F16)); 6228 write_fp_sreg(s, a->rd, t0); 6229 } 6230 break; 6231 default: 6232 g_assert_not_reached(); 6233 } 6234 return true; 6235 } 6236 TRANS(FMLA_si,do_fmla_scalar_idx,a,false)6237 TRANS(FMLA_si, do_fmla_scalar_idx, a, false) 6238 TRANS(FMLS_si, do_fmla_scalar_idx, a, true) 6239 6240 static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, 6241 const ENVScalar2 *f) 6242 { 6243 if (a->esz < MO_16 || a->esz > MO_32) { 6244 return false; 6245 } 6246 if (fp_access_check(s)) { 6247 TCGv_i32 t0 = tcg_temp_new_i32(); 6248 TCGv_i32 t1 = tcg_temp_new_i32(); 6249 6250 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6251 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6252 f->gen_bhs[a->esz](t0, tcg_env, t0, t1); 6253 write_fp_sreg(s, a->rd, t0); 6254 } 6255 return true; 6256 } 6257 6258 TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) 6259 TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) 6260 do_env_scalar3_idx_hs(DisasContext * s,arg_rrx_e * a,const ENVScalar3 * f)6261 static bool do_env_scalar3_idx_hs(DisasContext *s, arg_rrx_e *a, 6262 const ENVScalar3 *f) 6263 { 6264 if (a->esz < MO_16 || a->esz > MO_32) { 6265 return false; 6266 } 6267 if (fp_access_check(s)) { 6268 TCGv_i32 t0 = tcg_temp_new_i32(); 6269 TCGv_i32 t1 = tcg_temp_new_i32(); 6270 TCGv_i32 t2 = tcg_temp_new_i32(); 6271 6272 read_vec_element_i32(s, t0, a->rn, 0, a->esz); 6273 read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); 6274 read_vec_element_i32(s, t2, a->rd, 0, a->esz); 6275 f->gen_hs[a->esz - 1](t0, tcg_env, t0, t1, t2); 6276 write_fp_sreg(s, a->rd, t0); 6277 } 6278 return true; 6279 } 6280 6281 TRANS_FEAT(SQRDMLAH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlah) 6282 TRANS_FEAT(SQRDMLSH_si, aa64_rdm, do_env_scalar3_idx_hs, a, &f_scalar_sqrdmlsh) 6283 do_scalar_muladd_widening_idx(DisasContext * s,arg_rrx_e * a,NeonGenTwo64OpFn * fn,bool acc)6284 static bool do_scalar_muladd_widening_idx(DisasContext *s, arg_rrx_e *a, 6285 NeonGenTwo64OpFn *fn, bool acc) 6286 { 6287 if (fp_access_check(s)) { 6288 TCGv_i64 t0 = tcg_temp_new_i64(); 6289 TCGv_i64 t1 = tcg_temp_new_i64(); 6290 TCGv_i64 t2 = tcg_temp_new_i64(); 6291 unsigned vsz, dofs; 6292 6293 if (acc) { 6294 read_vec_element(s, t0, a->rd, 0, a->esz + 1); 6295 } 6296 read_vec_element(s, t1, a->rn, 0, a->esz | MO_SIGN); 6297 read_vec_element(s, t2, a->rm, a->idx, a->esz | MO_SIGN); 6298 fn(t0, t1, t2); 6299 6300 /* Clear the whole register first, then store scalar. */ 6301 vsz = vec_full_reg_size(s); 6302 dofs = vec_full_reg_offset(s, a->rd); 6303 tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); 6304 write_vec_element(s, t0, a->rd, 0, a->esz + 1); 6305 } 6306 return true; 6307 } 6308 6309 TRANS(SQDMULL_si, do_scalar_muladd_widening_idx, a, 6310 a->esz == MO_16 ? gen_sqdmull_h : gen_sqdmull_s, false) 6311 TRANS(SQDMLAL_si, do_scalar_muladd_widening_idx, a, 6312 a->esz == MO_16 ? gen_sqdmlal_h : gen_sqdmlal_s, true) 6313 TRANS(SQDMLSL_si, do_scalar_muladd_widening_idx, a, 6314 a->esz == MO_16 ? gen_sqdmlsl_h : gen_sqdmlsl_s, true) 6315 do_fp3_vector_idx(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_3_ptr * const fns[3])6316 static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6317 gen_helper_gvec_3_ptr * const fns[3]) 6318 { 6319 MemOp esz = a->esz; 6320 6321 switch (esz) { 6322 case MO_64: 6323 if (!a->q) { 6324 return false; 6325 } 6326 break; 6327 case MO_32: 6328 break; 6329 case MO_16: 6330 if (!dc_isar_feature(aa64_fp16, s)) { 6331 return false; 6332 } 6333 break; 6334 default: 6335 g_assert_not_reached(); 6336 } 6337 if (fp_access_check(s)) { 6338 gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, 6339 esz == MO_16, a->idx, fns[esz - 1]); 6340 } 6341 return true; 6342 } 6343 6344 static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { 6345 gen_helper_gvec_fmul_idx_h, 6346 gen_helper_gvec_fmul_idx_s, 6347 gen_helper_gvec_fmul_idx_d, 6348 }; 6349 TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) 6350 6351 static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { 6352 gen_helper_gvec_fmulx_idx_h, 6353 gen_helper_gvec_fmulx_idx_s, 6354 gen_helper_gvec_fmulx_idx_d, 6355 }; TRANS(FMULX_vi,do_fp3_vector_idx,a,f_vector_idx_fmulx)6356 TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) 6357 6358 static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) 6359 { 6360 static gen_helper_gvec_4_ptr * const fns[3] = { 6361 gen_helper_gvec_fmla_idx_h, 6362 gen_helper_gvec_fmla_idx_s, 6363 gen_helper_gvec_fmla_idx_d, 6364 }; 6365 MemOp esz = a->esz; 6366 6367 switch (esz) { 6368 case MO_64: 6369 if (!a->q) { 6370 return false; 6371 } 6372 break; 6373 case MO_32: 6374 break; 6375 case MO_16: 6376 if (!dc_isar_feature(aa64_fp16, s)) { 6377 return false; 6378 } 6379 break; 6380 default: 6381 g_assert_not_reached(); 6382 } 6383 if (fp_access_check(s)) { 6384 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6385 esz == MO_16, (a->idx << 1) | neg, 6386 fns[esz - 1]); 6387 } 6388 return true; 6389 } 6390 TRANS(FMLA_vi,do_fmla_vector_idx,a,false)6391 TRANS(FMLA_vi, do_fmla_vector_idx, a, false) 6392 TRANS(FMLS_vi, do_fmla_vector_idx, a, true) 6393 6394 static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) 6395 { 6396 if (fp_access_check(s)) { 6397 int data = (a->idx << 2) | (is_2 << 1) | is_s; 6398 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), 6399 vec_full_reg_offset(s, a->rn), 6400 vec_full_reg_offset(s, a->rm), tcg_env, 6401 a->q ? 16 : 8, vec_full_reg_size(s), 6402 data, gen_helper_gvec_fmlal_idx_a64); 6403 } 6404 return true; 6405 } 6406 TRANS_FEAT(FMLAL_vi,aa64_fhm,do_fmlal_idx,a,false,false)6407 TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) 6408 TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) 6409 TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) 6410 TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) 6411 6412 static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, 6413 gen_helper_gvec_3 * const fns[2]) 6414 { 6415 assert(a->esz == MO_16 || a->esz == MO_32); 6416 if (fp_access_check(s)) { 6417 gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); 6418 } 6419 return true; 6420 } 6421 6422 static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { 6423 gen_helper_gvec_mul_idx_h, 6424 gen_helper_gvec_mul_idx_s, 6425 }; TRANS(MUL_vi,do_int3_vector_idx,a,f_vector_idx_mul)6426 TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) 6427 6428 static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) 6429 { 6430 static gen_helper_gvec_4 * const fns[2][2] = { 6431 { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, 6432 { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, 6433 }; 6434 6435 assert(a->esz == MO_16 || a->esz == MO_32); 6436 if (fp_access_check(s)) { 6437 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, 6438 a->idx, fns[a->esz - 1][sub]); 6439 } 6440 return true; 6441 } 6442 TRANS(MLA_vi,do_mla_vector_idx,a,false)6443 TRANS(MLA_vi, do_mla_vector_idx, a, false) 6444 TRANS(MLS_vi, do_mla_vector_idx, a, true) 6445 6446 static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, 6447 gen_helper_gvec_4 * const fns[2]) 6448 { 6449 assert(a->esz == MO_16 || a->esz == MO_32); 6450 if (fp_access_check(s)) { 6451 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), 6452 vec_full_reg_offset(s, a->rn), 6453 vec_full_reg_offset(s, a->rm), 6454 offsetof(CPUARMState, vfp.qc), 6455 a->q ? 16 : 8, vec_full_reg_size(s), 6456 a->idx, fns[a->esz - 1]); 6457 } 6458 return true; 6459 } 6460 6461 static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { 6462 gen_helper_neon_sqdmulh_idx_h, 6463 gen_helper_neon_sqdmulh_idx_s, 6464 }; 6465 TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) 6466 6467 static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { 6468 gen_helper_neon_sqrdmulh_idx_h, 6469 gen_helper_neon_sqrdmulh_idx_s, 6470 }; 6471 TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) 6472 6473 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlah[2] = { 6474 gen_helper_neon_sqrdmlah_idx_h, 6475 gen_helper_neon_sqrdmlah_idx_s, 6476 }; 6477 TRANS_FEAT(SQRDMLAH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6478 f_vector_idx_sqrdmlah) 6479 6480 static gen_helper_gvec_4 * const f_vector_idx_sqrdmlsh[2] = { 6481 gen_helper_neon_sqrdmlsh_idx_h, 6482 gen_helper_neon_sqrdmlsh_idx_s, 6483 }; TRANS_FEAT(SQRDMLSH_vi,aa64_rdm,do_int3_qc_vector_idx,a,f_vector_idx_sqrdmlsh)6484 TRANS_FEAT(SQRDMLSH_vi, aa64_rdm, do_int3_qc_vector_idx, a, 6485 f_vector_idx_sqrdmlsh) 6486 6487 static bool do_dot_vector_idx(DisasContext *s, arg_qrrx_e *a, 6488 gen_helper_gvec_4 *fn) 6489 { 6490 if (fp_access_check(s)) { 6491 gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6492 } 6493 return true; 6494 } 6495 do_dot_vector_idx_env(DisasContext * s,arg_qrrx_e * a,gen_helper_gvec_4_ptr * fn)6496 static bool do_dot_vector_idx_env(DisasContext *s, arg_qrrx_e *a, 6497 gen_helper_gvec_4_ptr *fn) 6498 { 6499 if (fp_access_check(s)) { 6500 gen_gvec_op4_env(s, a->q, a->rd, a->rn, a->rm, a->rd, a->idx, fn); 6501 } 6502 return true; 6503 } 6504 TRANS_FEAT(SDOT_vi,aa64_dp,do_dot_vector_idx,a,gen_helper_gvec_sdot_idx_b)6505 TRANS_FEAT(SDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_sdot_idx_b) 6506 TRANS_FEAT(UDOT_vi, aa64_dp, do_dot_vector_idx, a, gen_helper_gvec_udot_idx_b) 6507 TRANS_FEAT(SUDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6508 gen_helper_gvec_sudot_idx_b) 6509 TRANS_FEAT(USDOT_vi, aa64_i8mm, do_dot_vector_idx, a, 6510 gen_helper_gvec_usdot_idx_b) 6511 TRANS_FEAT(BFDOT_vi, aa64_bf16, do_dot_vector_idx_env, a, 6512 gen_helper_gvec_bfdot_idx) 6513 6514 static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a) 6515 { 6516 if (!dc_isar_feature(aa64_bf16, s)) { 6517 return false; 6518 } 6519 if (fp_access_check(s)) { 6520 /* Q bit selects BFMLALB vs BFMLALT. */ 6521 gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd, 0, 6522 (a->idx << 1) | a->q, 6523 gen_helper_gvec_bfmlal_idx); 6524 } 6525 return true; 6526 } 6527 trans_FCMLA_vi(DisasContext * s,arg_FCMLA_vi * a)6528 static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a) 6529 { 6530 gen_helper_gvec_4_ptr *fn; 6531 6532 if (!dc_isar_feature(aa64_fcma, s)) { 6533 return false; 6534 } 6535 switch (a->esz) { 6536 case MO_16: 6537 if (!dc_isar_feature(aa64_fp16, s)) { 6538 return false; 6539 } 6540 fn = gen_helper_gvec_fcmlah_idx; 6541 break; 6542 case MO_32: 6543 fn = gen_helper_gvec_fcmlas_idx; 6544 break; 6545 default: 6546 g_assert_not_reached(); 6547 } 6548 if (fp_access_check(s)) { 6549 gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, 6550 a->esz == MO_16, (a->idx << 2) | a->rot, fn); 6551 } 6552 return true; 6553 } 6554 6555 /* 6556 * Advanced SIMD scalar pairwise 6557 */ 6558 do_fp3_scalar_pair(DisasContext * s,arg_rr_e * a,const FPScalar * f)6559 static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) 6560 { 6561 switch (a->esz) { 6562 case MO_64: 6563 if (fp_access_check(s)) { 6564 TCGv_i64 t0 = tcg_temp_new_i64(); 6565 TCGv_i64 t1 = tcg_temp_new_i64(); 6566 6567 read_vec_element(s, t0, a->rn, 0, MO_64); 6568 read_vec_element(s, t1, a->rn, 1, MO_64); 6569 f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6570 write_fp_dreg(s, a->rd, t0); 6571 } 6572 break; 6573 case MO_32: 6574 if (fp_access_check(s)) { 6575 TCGv_i32 t0 = tcg_temp_new_i32(); 6576 TCGv_i32 t1 = tcg_temp_new_i32(); 6577 6578 read_vec_element_i32(s, t0, a->rn, 0, MO_32); 6579 read_vec_element_i32(s, t1, a->rn, 1, MO_32); 6580 f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); 6581 write_fp_sreg(s, a->rd, t0); 6582 } 6583 break; 6584 case MO_16: 6585 if (!dc_isar_feature(aa64_fp16, s)) { 6586 return false; 6587 } 6588 if (fp_access_check(s)) { 6589 TCGv_i32 t0 = tcg_temp_new_i32(); 6590 TCGv_i32 t1 = tcg_temp_new_i32(); 6591 6592 read_vec_element_i32(s, t0, a->rn, 0, MO_16); 6593 read_vec_element_i32(s, t1, a->rn, 1, MO_16); 6594 f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); 6595 write_fp_sreg(s, a->rd, t0); 6596 } 6597 break; 6598 default: 6599 g_assert_not_reached(); 6600 } 6601 return true; 6602 } 6603 6604 TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) 6605 TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) 6606 TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) 6607 TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) 6608 TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) 6609 trans_ADDP_s(DisasContext * s,arg_rr_e * a)6610 static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) 6611 { 6612 if (fp_access_check(s)) { 6613 TCGv_i64 t0 = tcg_temp_new_i64(); 6614 TCGv_i64 t1 = tcg_temp_new_i64(); 6615 6616 read_vec_element(s, t0, a->rn, 0, MO_64); 6617 read_vec_element(s, t1, a->rn, 1, MO_64); 6618 tcg_gen_add_i64(t0, t0, t1); 6619 write_fp_dreg(s, a->rd, t0); 6620 } 6621 return true; 6622 } 6623 6624 /* 6625 * Floating-point conditional select 6626 */ 6627 trans_FCSEL(DisasContext * s,arg_FCSEL * a)6628 static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) 6629 { 6630 TCGv_i64 t_true, t_false; 6631 DisasCompare64 c; 6632 6633 switch (a->esz) { 6634 case MO_32: 6635 case MO_64: 6636 break; 6637 case MO_16: 6638 if (!dc_isar_feature(aa64_fp16, s)) { 6639 return false; 6640 } 6641 break; 6642 default: 6643 return false; 6644 } 6645 6646 if (!fp_access_check(s)) { 6647 return true; 6648 } 6649 6650 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6651 t_true = tcg_temp_new_i64(); 6652 t_false = tcg_temp_new_i64(); 6653 read_vec_element(s, t_true, a->rn, 0, a->esz); 6654 read_vec_element(s, t_false, a->rm, 0, a->esz); 6655 6656 a64_test_cc(&c, a->cond); 6657 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6658 t_true, t_false); 6659 6660 /* 6661 * Note that sregs & hregs write back zeros to the high bits, 6662 * and we've already done the zero-extension. 6663 */ 6664 write_fp_dreg(s, a->rd, t_true); 6665 return true; 6666 } 6667 6668 /* 6669 * Advanced SIMD Extract 6670 */ 6671 trans_EXT_d(DisasContext * s,arg_EXT_d * a)6672 static bool trans_EXT_d(DisasContext *s, arg_EXT_d *a) 6673 { 6674 if (fp_access_check(s)) { 6675 TCGv_i64 lo = read_fp_dreg(s, a->rn); 6676 if (a->imm != 0) { 6677 TCGv_i64 hi = read_fp_dreg(s, a->rm); 6678 tcg_gen_extract2_i64(lo, lo, hi, a->imm * 8); 6679 } 6680 write_fp_dreg(s, a->rd, lo); 6681 } 6682 return true; 6683 } 6684 trans_EXT_q(DisasContext * s,arg_EXT_q * a)6685 static bool trans_EXT_q(DisasContext *s, arg_EXT_q *a) 6686 { 6687 TCGv_i64 lo, hi; 6688 int pos = (a->imm & 7) * 8; 6689 int elt = a->imm >> 3; 6690 6691 if (!fp_access_check(s)) { 6692 return true; 6693 } 6694 6695 lo = tcg_temp_new_i64(); 6696 hi = tcg_temp_new_i64(); 6697 6698 read_vec_element(s, lo, a->rn, elt, MO_64); 6699 elt++; 6700 read_vec_element(s, hi, elt & 2 ? a->rm : a->rn, elt & 1, MO_64); 6701 elt++; 6702 6703 if (pos != 0) { 6704 TCGv_i64 hh = tcg_temp_new_i64(); 6705 tcg_gen_extract2_i64(lo, lo, hi, pos); 6706 read_vec_element(s, hh, a->rm, elt & 1, MO_64); 6707 tcg_gen_extract2_i64(hi, hi, hh, pos); 6708 } 6709 6710 write_vec_element(s, lo, a->rd, 0, MO_64); 6711 write_vec_element(s, hi, a->rd, 1, MO_64); 6712 clear_vec_high(s, true, a->rd); 6713 return true; 6714 } 6715 6716 /* 6717 * Floating-point data-processing (3 source) 6718 */ 6719 do_fmadd(DisasContext * s,arg_rrrr_e * a,bool neg_a,bool neg_n)6720 static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) 6721 { 6722 TCGv_ptr fpst; 6723 6724 /* 6725 * These are fused multiply-add. Note that doing the negations here 6726 * as separate steps is correct: an input NaN should come out with 6727 * its sign bit flipped if it is a negated-input. 6728 */ 6729 switch (a->esz) { 6730 case MO_64: 6731 if (fp_access_check(s)) { 6732 TCGv_i64 tn = read_fp_dreg(s, a->rn); 6733 TCGv_i64 tm = read_fp_dreg(s, a->rm); 6734 TCGv_i64 ta = read_fp_dreg(s, a->ra); 6735 6736 if (neg_a) { 6737 gen_vfp_negd(ta, ta); 6738 } 6739 if (neg_n) { 6740 gen_vfp_negd(tn, tn); 6741 } 6742 fpst = fpstatus_ptr(FPST_FPCR); 6743 gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); 6744 write_fp_dreg(s, a->rd, ta); 6745 } 6746 break; 6747 6748 case MO_32: 6749 if (fp_access_check(s)) { 6750 TCGv_i32 tn = read_fp_sreg(s, a->rn); 6751 TCGv_i32 tm = read_fp_sreg(s, a->rm); 6752 TCGv_i32 ta = read_fp_sreg(s, a->ra); 6753 6754 if (neg_a) { 6755 gen_vfp_negs(ta, ta); 6756 } 6757 if (neg_n) { 6758 gen_vfp_negs(tn, tn); 6759 } 6760 fpst = fpstatus_ptr(FPST_FPCR); 6761 gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); 6762 write_fp_sreg(s, a->rd, ta); 6763 } 6764 break; 6765 6766 case MO_16: 6767 if (!dc_isar_feature(aa64_fp16, s)) { 6768 return false; 6769 } 6770 if (fp_access_check(s)) { 6771 TCGv_i32 tn = read_fp_hreg(s, a->rn); 6772 TCGv_i32 tm = read_fp_hreg(s, a->rm); 6773 TCGv_i32 ta = read_fp_hreg(s, a->ra); 6774 6775 if (neg_a) { 6776 gen_vfp_negh(ta, ta); 6777 } 6778 if (neg_n) { 6779 gen_vfp_negh(tn, tn); 6780 } 6781 fpst = fpstatus_ptr(FPST_FPCR_F16); 6782 gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); 6783 write_fp_sreg(s, a->rd, ta); 6784 } 6785 break; 6786 6787 default: 6788 return false; 6789 } 6790 return true; 6791 } 6792 TRANS(FMADD,do_fmadd,a,false,false)6793 TRANS(FMADD, do_fmadd, a, false, false) 6794 TRANS(FNMADD, do_fmadd, a, true, true) 6795 TRANS(FMSUB, do_fmadd, a, false, true) 6796 TRANS(FNMSUB, do_fmadd, a, true, false) 6797 6798 /* 6799 * Advanced SIMD Across Lanes 6800 */ 6801 6802 static bool do_int_reduction(DisasContext *s, arg_qrr_e *a, bool widen, 6803 MemOp src_sign, NeonGenTwo64OpFn *fn) 6804 { 6805 TCGv_i64 tcg_res, tcg_elt; 6806 MemOp src_mop = a->esz | src_sign; 6807 int elements = (a->q ? 16 : 8) >> a->esz; 6808 6809 /* Reject MO_64, and MO_32 without Q: a minimum of 4 elements. */ 6810 if (elements < 4) { 6811 return false; 6812 } 6813 if (!fp_access_check(s)) { 6814 return true; 6815 } 6816 6817 tcg_res = tcg_temp_new_i64(); 6818 tcg_elt = tcg_temp_new_i64(); 6819 6820 read_vec_element(s, tcg_res, a->rn, 0, src_mop); 6821 for (int i = 1; i < elements; i++) { 6822 read_vec_element(s, tcg_elt, a->rn, i, src_mop); 6823 fn(tcg_res, tcg_res, tcg_elt); 6824 } 6825 6826 tcg_gen_ext_i64(tcg_res, tcg_res, a->esz + widen); 6827 write_fp_dreg(s, a->rd, tcg_res); 6828 return true; 6829 } 6830 6831 TRANS(ADDV, do_int_reduction, a, false, 0, tcg_gen_add_i64) TRANS(SADDLV,do_int_reduction,a,true,MO_SIGN,tcg_gen_add_i64)6832 TRANS(SADDLV, do_int_reduction, a, true, MO_SIGN, tcg_gen_add_i64) 6833 TRANS(UADDLV, do_int_reduction, a, true, 0, tcg_gen_add_i64) 6834 TRANS(SMAXV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smax_i64) 6835 TRANS(UMAXV, do_int_reduction, a, false, 0, tcg_gen_umax_i64) 6836 TRANS(SMINV, do_int_reduction, a, false, MO_SIGN, tcg_gen_smin_i64) 6837 TRANS(UMINV, do_int_reduction, a, false, 0, tcg_gen_umin_i64) 6838 6839 /* 6840 * do_fp_reduction helper 6841 * 6842 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 6843 * important for correct NaN propagation that we do these 6844 * operations in exactly the order specified by the pseudocode. 6845 * 6846 * This is a recursive function. 6847 */ 6848 static TCGv_i32 do_reduction_op(DisasContext *s, int rn, MemOp esz, 6849 int ebase, int ecount, TCGv_ptr fpst, 6850 NeonGenTwoSingleOpFn *fn) 6851 { 6852 if (ecount == 1) { 6853 TCGv_i32 tcg_elem = tcg_temp_new_i32(); 6854 read_vec_element_i32(s, tcg_elem, rn, ebase, esz); 6855 return tcg_elem; 6856 } else { 6857 int half = ecount >> 1; 6858 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 6859 6860 tcg_hi = do_reduction_op(s, rn, esz, ebase + half, half, fpst, fn); 6861 tcg_lo = do_reduction_op(s, rn, esz, ebase, half, fpst, fn); 6862 tcg_res = tcg_temp_new_i32(); 6863 6864 fn(tcg_res, tcg_lo, tcg_hi, fpst); 6865 return tcg_res; 6866 } 6867 } 6868 do_fp_reduction(DisasContext * s,arg_qrr_e * a,NeonGenTwoSingleOpFn * fn)6869 static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a, 6870 NeonGenTwoSingleOpFn *fn) 6871 { 6872 if (fp_access_check(s)) { 6873 MemOp esz = a->esz; 6874 int elts = (a->q ? 16 : 8) >> esz; 6875 TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 6876 TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst, fn); 6877 write_fp_sreg(s, a->rd, res); 6878 } 6879 return true; 6880 } 6881 TRANS_FEAT(FMAXNMV_h,aa64_fp16,do_fp_reduction,a,gen_helper_advsimd_maxnumh)6882 TRANS_FEAT(FMAXNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxnumh) 6883 TRANS_FEAT(FMINNMV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minnumh) 6884 TRANS_FEAT(FMAXV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_maxh) 6885 TRANS_FEAT(FMINV_h, aa64_fp16, do_fp_reduction, a, gen_helper_advsimd_minh) 6886 6887 TRANS(FMAXNMV_s, do_fp_reduction, a, gen_helper_vfp_maxnums) 6888 TRANS(FMINNMV_s, do_fp_reduction, a, gen_helper_vfp_minnums) 6889 TRANS(FMAXV_s, do_fp_reduction, a, gen_helper_vfp_maxs) 6890 TRANS(FMINV_s, do_fp_reduction, a, gen_helper_vfp_mins) 6891 6892 /* 6893 * Floating-point Immediate 6894 */ 6895 6896 static bool trans_FMOVI_s(DisasContext *s, arg_FMOVI_s *a) 6897 { 6898 switch (a->esz) { 6899 case MO_32: 6900 case MO_64: 6901 break; 6902 case MO_16: 6903 if (!dc_isar_feature(aa64_fp16, s)) { 6904 return false; 6905 } 6906 break; 6907 default: 6908 return false; 6909 } 6910 if (fp_access_check(s)) { 6911 uint64_t imm = vfp_expand_imm(a->esz, a->imm); 6912 write_fp_dreg(s, a->rd, tcg_constant_i64(imm)); 6913 } 6914 return true; 6915 } 6916 6917 /* 6918 * Advanced SIMD Modified Immediate 6919 */ 6920 trans_FMOVI_v_h(DisasContext * s,arg_FMOVI_v_h * a)6921 static bool trans_FMOVI_v_h(DisasContext *s, arg_FMOVI_v_h *a) 6922 { 6923 if (!dc_isar_feature(aa64_fp16, s)) { 6924 return false; 6925 } 6926 if (fp_access_check(s)) { 6927 tcg_gen_gvec_dup_imm(MO_16, vec_full_reg_offset(s, a->rd), 6928 a->q ? 16 : 8, vec_full_reg_size(s), 6929 vfp_expand_imm(MO_16, a->abcdefgh)); 6930 } 6931 return true; 6932 } 6933 gen_movi(unsigned vece,uint32_t dofs,uint32_t aofs,int64_t c,uint32_t oprsz,uint32_t maxsz)6934 static void gen_movi(unsigned vece, uint32_t dofs, uint32_t aofs, 6935 int64_t c, uint32_t oprsz, uint32_t maxsz) 6936 { 6937 tcg_gen_gvec_dup_imm(MO_64, dofs, oprsz, maxsz, c); 6938 } 6939 trans_Vimm(DisasContext * s,arg_Vimm * a)6940 static bool trans_Vimm(DisasContext *s, arg_Vimm *a) 6941 { 6942 GVecGen2iFn *fn; 6943 6944 /* Handle decode of cmode/op here between ORR/BIC/MOVI */ 6945 if ((a->cmode & 1) && a->cmode < 12) { 6946 /* For op=1, the imm will be inverted, so BIC becomes AND. */ 6947 fn = a->op ? tcg_gen_gvec_andi : tcg_gen_gvec_ori; 6948 } else { 6949 /* There is one unallocated cmode/op combination in this space */ 6950 if (a->cmode == 15 && a->op == 1 && a->q == 0) { 6951 return false; 6952 } 6953 fn = gen_movi; 6954 } 6955 6956 if (fp_access_check(s)) { 6957 uint64_t imm = asimd_imm_const(a->abcdefgh, a->cmode, a->op); 6958 gen_gvec_fn2i(s, a->q, a->rd, a->rd, imm, fn, MO_64); 6959 } 6960 return true; 6961 } 6962 6963 /* 6964 * Advanced SIMD Shift by Immediate 6965 */ 6966 do_vec_shift_imm(DisasContext * s,arg_qrri_e * a,GVecGen2iFn * fn)6967 static bool do_vec_shift_imm(DisasContext *s, arg_qrri_e *a, GVecGen2iFn *fn) 6968 { 6969 if (fp_access_check(s)) { 6970 gen_gvec_fn2i(s, a->q, a->rd, a->rn, a->imm, fn, a->esz); 6971 } 6972 return true; 6973 } 6974 6975 TRANS(SSHR_v, do_vec_shift_imm, a, gen_gvec_sshr) 6976 TRANS(USHR_v, do_vec_shift_imm, a, gen_gvec_ushr) 6977 TRANS(SSRA_v, do_vec_shift_imm, a, gen_gvec_ssra) 6978 TRANS(USRA_v, do_vec_shift_imm, a, gen_gvec_usra) 6979 TRANS(SRSHR_v, do_vec_shift_imm, a, gen_gvec_srshr) 6980 TRANS(URSHR_v, do_vec_shift_imm, a, gen_gvec_urshr) 6981 TRANS(SRSRA_v, do_vec_shift_imm, a, gen_gvec_srsra) 6982 TRANS(URSRA_v, do_vec_shift_imm, a, gen_gvec_ursra) 6983 TRANS(SRI_v, do_vec_shift_imm, a, gen_gvec_sri) 6984 TRANS(SHL_v, do_vec_shift_imm, a, tcg_gen_gvec_shli) 6985 TRANS(SLI_v, do_vec_shift_imm, a, gen_gvec_sli); TRANS(SQSHL_vi,do_vec_shift_imm,a,gen_neon_sqshli)6986 TRANS(SQSHL_vi, do_vec_shift_imm, a, gen_neon_sqshli) 6987 TRANS(UQSHL_vi, do_vec_shift_imm, a, gen_neon_uqshli) 6988 TRANS(SQSHLU_vi, do_vec_shift_imm, a, gen_neon_sqshlui) 6989 6990 static bool do_vec_shift_imm_wide(DisasContext *s, arg_qrri_e *a, bool is_u) 6991 { 6992 TCGv_i64 tcg_rn, tcg_rd; 6993 int esz = a->esz; 6994 int esize; 6995 6996 if (!fp_access_check(s)) { 6997 return true; 6998 } 6999 7000 /* 7001 * For the LL variants the store is larger than the load, 7002 * so if rd == rn we would overwrite parts of our input. 7003 * So load everything right now and use shifts in the main loop. 7004 */ 7005 tcg_rd = tcg_temp_new_i64(); 7006 tcg_rn = tcg_temp_new_i64(); 7007 read_vec_element(s, tcg_rn, a->rn, a->q, MO_64); 7008 7009 esize = 8 << esz; 7010 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7011 if (is_u) { 7012 tcg_gen_extract_i64(tcg_rd, tcg_rn, i * esize, esize); 7013 } else { 7014 tcg_gen_sextract_i64(tcg_rd, tcg_rn, i * esize, esize); 7015 } 7016 tcg_gen_shli_i64(tcg_rd, tcg_rd, a->imm); 7017 write_vec_element(s, tcg_rd, a->rd, i, esz + 1); 7018 } 7019 clear_vec_high(s, true, a->rd); 7020 return true; 7021 } 7022 TRANS(SSHLL_v,do_vec_shift_imm_wide,a,false)7023 TRANS(SSHLL_v, do_vec_shift_imm_wide, a, false) 7024 TRANS(USHLL_v, do_vec_shift_imm_wide, a, true) 7025 7026 static void gen_sshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7027 { 7028 assert(shift >= 0 && shift <= 64); 7029 tcg_gen_sari_i64(dst, src, MIN(shift, 63)); 7030 } 7031 gen_ushr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7032 static void gen_ushr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7033 { 7034 assert(shift >= 0 && shift <= 64); 7035 if (shift == 64) { 7036 tcg_gen_movi_i64(dst, 0); 7037 } else { 7038 tcg_gen_shri_i64(dst, src, shift); 7039 } 7040 } 7041 gen_ssra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7042 static void gen_ssra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7043 { 7044 gen_sshr_d(src, src, shift); 7045 tcg_gen_add_i64(dst, dst, src); 7046 } 7047 gen_usra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7048 static void gen_usra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7049 { 7050 gen_ushr_d(src, src, shift); 7051 tcg_gen_add_i64(dst, dst, src); 7052 } 7053 gen_srshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7054 static void gen_srshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7055 { 7056 assert(shift >= 0 && shift <= 32); 7057 if (shift) { 7058 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7059 tcg_gen_add_i64(dst, src, rnd); 7060 tcg_gen_sari_i64(dst, dst, shift); 7061 } else { 7062 tcg_gen_mov_i64(dst, src); 7063 } 7064 } 7065 gen_urshr_bhs(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7066 static void gen_urshr_bhs(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7067 { 7068 assert(shift >= 0 && shift <= 32); 7069 if (shift) { 7070 TCGv_i64 rnd = tcg_constant_i64(1ull << (shift - 1)); 7071 tcg_gen_add_i64(dst, src, rnd); 7072 tcg_gen_shri_i64(dst, dst, shift); 7073 } else { 7074 tcg_gen_mov_i64(dst, src); 7075 } 7076 } 7077 gen_srshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7078 static void gen_srshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7079 { 7080 assert(shift >= 0 && shift <= 64); 7081 if (shift == 0) { 7082 tcg_gen_mov_i64(dst, src); 7083 } else if (shift == 64) { 7084 /* Extension of sign bit (0,-1) plus sign bit (0,1) is zero. */ 7085 tcg_gen_movi_i64(dst, 0); 7086 } else { 7087 TCGv_i64 rnd = tcg_temp_new_i64(); 7088 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7089 tcg_gen_sari_i64(dst, src, shift); 7090 tcg_gen_add_i64(dst, dst, rnd); 7091 } 7092 } 7093 gen_urshr_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7094 static void gen_urshr_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7095 { 7096 assert(shift >= 0 && shift <= 64); 7097 if (shift == 0) { 7098 tcg_gen_mov_i64(dst, src); 7099 } else if (shift == 64) { 7100 /* Rounding will propagate bit 63 into bit 64. */ 7101 tcg_gen_shri_i64(dst, src, 63); 7102 } else { 7103 TCGv_i64 rnd = tcg_temp_new_i64(); 7104 tcg_gen_extract_i64(rnd, src, shift - 1, 1); 7105 tcg_gen_shri_i64(dst, src, shift); 7106 tcg_gen_add_i64(dst, dst, rnd); 7107 } 7108 } 7109 gen_srsra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7110 static void gen_srsra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7111 { 7112 gen_srshr_d(src, src, shift); 7113 tcg_gen_add_i64(dst, dst, src); 7114 } 7115 gen_ursra_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7116 static void gen_ursra_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7117 { 7118 gen_urshr_d(src, src, shift); 7119 tcg_gen_add_i64(dst, dst, src); 7120 } 7121 gen_sri_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7122 static void gen_sri_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7123 { 7124 /* If shift is 64, dst is unchanged. */ 7125 if (shift != 64) { 7126 tcg_gen_shri_i64(src, src, shift); 7127 tcg_gen_deposit_i64(dst, dst, src, 0, 64 - shift); 7128 } 7129 } 7130 gen_sli_d(TCGv_i64 dst,TCGv_i64 src,int64_t shift)7131 static void gen_sli_d(TCGv_i64 dst, TCGv_i64 src, int64_t shift) 7132 { 7133 tcg_gen_deposit_i64(dst, dst, src, shift, 64 - shift); 7134 } 7135 do_vec_shift_imm_narrow(DisasContext * s,arg_qrri_e * a,WideShiftImmFn * const fns[3],MemOp sign)7136 static bool do_vec_shift_imm_narrow(DisasContext *s, arg_qrri_e *a, 7137 WideShiftImmFn * const fns[3], MemOp sign) 7138 { 7139 TCGv_i64 tcg_rn, tcg_rd; 7140 int esz = a->esz; 7141 int esize; 7142 WideShiftImmFn *fn; 7143 7144 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7145 7146 if (!fp_access_check(s)) { 7147 return true; 7148 } 7149 7150 tcg_rn = tcg_temp_new_i64(); 7151 tcg_rd = tcg_temp_new_i64(); 7152 tcg_gen_movi_i64(tcg_rd, 0); 7153 7154 fn = fns[esz]; 7155 esize = 8 << esz; 7156 for (int i = 0, elements = 8 >> esz; i < elements; i++) { 7157 read_vec_element(s, tcg_rn, a->rn, i, (esz + 1) | sign); 7158 fn(tcg_rn, tcg_rn, a->imm); 7159 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, esize * i, esize); 7160 } 7161 7162 write_vec_element(s, tcg_rd, a->rd, a->q, MO_64); 7163 clear_vec_high(s, a->q, a->rd); 7164 return true; 7165 } 7166 gen_sqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7167 static void gen_sqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7168 { 7169 tcg_gen_sari_i64(d, s, i); 7170 tcg_gen_ext16u_i64(d, d); 7171 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7172 } 7173 gen_sqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7174 static void gen_sqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7175 { 7176 tcg_gen_sari_i64(d, s, i); 7177 tcg_gen_ext32u_i64(d, d); 7178 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7179 } 7180 gen_sqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7181 static void gen_sqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7182 { 7183 gen_sshr_d(d, s, i); 7184 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7185 } 7186 gen_uqshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7187 static void gen_uqshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7188 { 7189 tcg_gen_shri_i64(d, s, i); 7190 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7191 } 7192 gen_uqshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7193 static void gen_uqshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7194 { 7195 tcg_gen_shri_i64(d, s, i); 7196 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7197 } 7198 gen_uqshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7199 static void gen_uqshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7200 { 7201 gen_ushr_d(d, s, i); 7202 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7203 } 7204 gen_sqshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7205 static void gen_sqshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7206 { 7207 tcg_gen_sari_i64(d, s, i); 7208 tcg_gen_ext16u_i64(d, d); 7209 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7210 } 7211 gen_sqshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7212 static void gen_sqshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7213 { 7214 tcg_gen_sari_i64(d, s, i); 7215 tcg_gen_ext32u_i64(d, d); 7216 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7217 } 7218 gen_sqshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7219 static void gen_sqshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7220 { 7221 gen_sshr_d(d, s, i); 7222 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7223 } 7224 gen_sqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7225 static void gen_sqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7226 { 7227 gen_srshr_bhs(d, s, i); 7228 tcg_gen_ext16u_i64(d, d); 7229 gen_helper_neon_narrow_sat_s8(d, tcg_env, d); 7230 } 7231 gen_sqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7232 static void gen_sqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7233 { 7234 gen_srshr_bhs(d, s, i); 7235 tcg_gen_ext32u_i64(d, d); 7236 gen_helper_neon_narrow_sat_s16(d, tcg_env, d); 7237 } 7238 gen_sqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7239 static void gen_sqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7240 { 7241 gen_srshr_d(d, s, i); 7242 gen_helper_neon_narrow_sat_s32(d, tcg_env, d); 7243 } 7244 gen_uqrshrn_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7245 static void gen_uqrshrn_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7246 { 7247 gen_urshr_bhs(d, s, i); 7248 gen_helper_neon_narrow_sat_u8(d, tcg_env, d); 7249 } 7250 gen_uqrshrn_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7251 static void gen_uqrshrn_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7252 { 7253 gen_urshr_bhs(d, s, i); 7254 gen_helper_neon_narrow_sat_u16(d, tcg_env, d); 7255 } 7256 gen_uqrshrn_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7257 static void gen_uqrshrn_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7258 { 7259 gen_urshr_d(d, s, i); 7260 gen_helper_neon_narrow_sat_u32(d, tcg_env, d); 7261 } 7262 gen_sqrshrun_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7263 static void gen_sqrshrun_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7264 { 7265 gen_srshr_bhs(d, s, i); 7266 tcg_gen_ext16u_i64(d, d); 7267 gen_helper_neon_unarrow_sat8(d, tcg_env, d); 7268 } 7269 gen_sqrshrun_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7270 static void gen_sqrshrun_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7271 { 7272 gen_srshr_bhs(d, s, i); 7273 tcg_gen_ext32u_i64(d, d); 7274 gen_helper_neon_unarrow_sat16(d, tcg_env, d); 7275 } 7276 gen_sqrshrun_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7277 static void gen_sqrshrun_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7278 { 7279 gen_srshr_d(d, s, i); 7280 gen_helper_neon_unarrow_sat32(d, tcg_env, d); 7281 } 7282 7283 static WideShiftImmFn * const shrn_fns[] = { 7284 tcg_gen_shri_i64, 7285 tcg_gen_shri_i64, 7286 gen_ushr_d, 7287 }; 7288 TRANS(SHRN_v, do_vec_shift_imm_narrow, a, shrn_fns, 0) 7289 7290 static WideShiftImmFn * const rshrn_fns[] = { 7291 gen_urshr_bhs, 7292 gen_urshr_bhs, 7293 gen_urshr_d, 7294 }; 7295 TRANS(RSHRN_v, do_vec_shift_imm_narrow, a, rshrn_fns, 0) 7296 7297 static WideShiftImmFn * const sqshrn_fns[] = { 7298 gen_sqshrn_b, 7299 gen_sqshrn_h, 7300 gen_sqshrn_s, 7301 }; 7302 TRANS(SQSHRN_v, do_vec_shift_imm_narrow, a, sqshrn_fns, MO_SIGN) 7303 7304 static WideShiftImmFn * const uqshrn_fns[] = { 7305 gen_uqshrn_b, 7306 gen_uqshrn_h, 7307 gen_uqshrn_s, 7308 }; 7309 TRANS(UQSHRN_v, do_vec_shift_imm_narrow, a, uqshrn_fns, 0) 7310 7311 static WideShiftImmFn * const sqshrun_fns[] = { 7312 gen_sqshrun_b, 7313 gen_sqshrun_h, 7314 gen_sqshrun_s, 7315 }; 7316 TRANS(SQSHRUN_v, do_vec_shift_imm_narrow, a, sqshrun_fns, MO_SIGN) 7317 7318 static WideShiftImmFn * const sqrshrn_fns[] = { 7319 gen_sqrshrn_b, 7320 gen_sqrshrn_h, 7321 gen_sqrshrn_s, 7322 }; 7323 TRANS(SQRSHRN_v, do_vec_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN) 7324 7325 static WideShiftImmFn * const uqrshrn_fns[] = { 7326 gen_uqrshrn_b, 7327 gen_uqrshrn_h, 7328 gen_uqrshrn_s, 7329 }; 7330 TRANS(UQRSHRN_v, do_vec_shift_imm_narrow, a, uqrshrn_fns, 0) 7331 7332 static WideShiftImmFn * const sqrshrun_fns[] = { 7333 gen_sqrshrun_b, 7334 gen_sqrshrun_h, 7335 gen_sqrshrun_s, 7336 }; TRANS(SQRSHRUN_v,do_vec_shift_imm_narrow,a,sqrshrun_fns,MO_SIGN)7337 TRANS(SQRSHRUN_v, do_vec_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN) 7338 7339 /* 7340 * Advanced SIMD Scalar Shift by Immediate 7341 */ 7342 7343 static bool do_scalar_shift_imm(DisasContext *s, arg_rri_e *a, 7344 WideShiftImmFn *fn, bool accumulate, 7345 MemOp sign) 7346 { 7347 if (fp_access_check(s)) { 7348 TCGv_i64 rd = tcg_temp_new_i64(); 7349 TCGv_i64 rn = tcg_temp_new_i64(); 7350 7351 read_vec_element(s, rn, a->rn, 0, a->esz | sign); 7352 if (accumulate) { 7353 read_vec_element(s, rd, a->rd, 0, a->esz | sign); 7354 } 7355 fn(rd, rn, a->imm); 7356 write_fp_dreg(s, a->rd, rd); 7357 } 7358 return true; 7359 } 7360 7361 TRANS(SSHR_s, do_scalar_shift_imm, a, gen_sshr_d, false, 0) 7362 TRANS(USHR_s, do_scalar_shift_imm, a, gen_ushr_d, false, 0) 7363 TRANS(SSRA_s, do_scalar_shift_imm, a, gen_ssra_d, true, 0) 7364 TRANS(USRA_s, do_scalar_shift_imm, a, gen_usra_d, true, 0) 7365 TRANS(SRSHR_s, do_scalar_shift_imm, a, gen_srshr_d, false, 0) 7366 TRANS(URSHR_s, do_scalar_shift_imm, a, gen_urshr_d, false, 0) 7367 TRANS(SRSRA_s, do_scalar_shift_imm, a, gen_srsra_d, true, 0) 7368 TRANS(URSRA_s, do_scalar_shift_imm, a, gen_ursra_d, true, 0) 7369 TRANS(SRI_s, do_scalar_shift_imm, a, gen_sri_d, true, 0) 7370 7371 TRANS(SHL_s, do_scalar_shift_imm, a, tcg_gen_shli_i64, false, 0) 7372 TRANS(SLI_s, do_scalar_shift_imm, a, gen_sli_d, true, 0) 7373 trunc_i64_env_imm(TCGv_i64 d,TCGv_i64 s,int64_t i,NeonGenTwoOpEnvFn * fn)7374 static void trunc_i64_env_imm(TCGv_i64 d, TCGv_i64 s, int64_t i, 7375 NeonGenTwoOpEnvFn *fn) 7376 { 7377 TCGv_i32 t = tcg_temp_new_i32(); 7378 tcg_gen_extrl_i64_i32(t, s); 7379 fn(t, tcg_env, t, tcg_constant_i32(i)); 7380 tcg_gen_extu_i32_i64(d, t); 7381 } 7382 gen_sqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7383 static void gen_sqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7384 { 7385 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s8); 7386 } 7387 gen_sqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7388 static void gen_sqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7389 { 7390 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s16); 7391 } 7392 gen_sqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7393 static void gen_sqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7394 { 7395 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_s32); 7396 } 7397 gen_sqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7398 static void gen_sqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7399 { 7400 gen_helper_neon_qshl_s64(d, tcg_env, s, tcg_constant_i64(i)); 7401 } 7402 gen_uqshli_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7403 static void gen_uqshli_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7404 { 7405 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u8); 7406 } 7407 gen_uqshli_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7408 static void gen_uqshli_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7409 { 7410 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u16); 7411 } 7412 gen_uqshli_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7413 static void gen_uqshli_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7414 { 7415 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshl_u32); 7416 } 7417 gen_uqshli_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7418 static void gen_uqshli_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7419 { 7420 gen_helper_neon_qshl_u64(d, tcg_env, s, tcg_constant_i64(i)); 7421 } 7422 gen_sqshlui_b(TCGv_i64 d,TCGv_i64 s,int64_t i)7423 static void gen_sqshlui_b(TCGv_i64 d, TCGv_i64 s, int64_t i) 7424 { 7425 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s8); 7426 } 7427 gen_sqshlui_h(TCGv_i64 d,TCGv_i64 s,int64_t i)7428 static void gen_sqshlui_h(TCGv_i64 d, TCGv_i64 s, int64_t i) 7429 { 7430 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s16); 7431 } 7432 gen_sqshlui_s(TCGv_i64 d,TCGv_i64 s,int64_t i)7433 static void gen_sqshlui_s(TCGv_i64 d, TCGv_i64 s, int64_t i) 7434 { 7435 trunc_i64_env_imm(d, s, i, gen_helper_neon_qshlu_s32); 7436 } 7437 gen_sqshlui_d(TCGv_i64 d,TCGv_i64 s,int64_t i)7438 static void gen_sqshlui_d(TCGv_i64 d, TCGv_i64 s, int64_t i) 7439 { 7440 gen_helper_neon_qshlu_s64(d, tcg_env, s, tcg_constant_i64(i)); 7441 } 7442 7443 static WideShiftImmFn * const f_scalar_sqshli[] = { 7444 gen_sqshli_b, gen_sqshli_h, gen_sqshli_s, gen_sqshli_d 7445 }; 7446 7447 static WideShiftImmFn * const f_scalar_uqshli[] = { 7448 gen_uqshli_b, gen_uqshli_h, gen_uqshli_s, gen_uqshli_d 7449 }; 7450 7451 static WideShiftImmFn * const f_scalar_sqshlui[] = { 7452 gen_sqshlui_b, gen_sqshlui_h, gen_sqshlui_s, gen_sqshlui_d 7453 }; 7454 7455 /* Note that the helpers sign-extend their inputs, so don't do it here. */ 7456 TRANS(SQSHL_si, do_scalar_shift_imm, a, f_scalar_sqshli[a->esz], false, 0) 7457 TRANS(UQSHL_si, do_scalar_shift_imm, a, f_scalar_uqshli[a->esz], false, 0) 7458 TRANS(SQSHLU_si, do_scalar_shift_imm, a, f_scalar_sqshlui[a->esz], false, 0) 7459 do_scalar_shift_imm_narrow(DisasContext * s,arg_rri_e * a,WideShiftImmFn * const fns[3],MemOp sign,bool zext)7460 static bool do_scalar_shift_imm_narrow(DisasContext *s, arg_rri_e *a, 7461 WideShiftImmFn * const fns[3], 7462 MemOp sign, bool zext) 7463 { 7464 MemOp esz = a->esz; 7465 7466 tcg_debug_assert(esz >= MO_8 && esz <= MO_32); 7467 7468 if (fp_access_check(s)) { 7469 TCGv_i64 rd = tcg_temp_new_i64(); 7470 TCGv_i64 rn = tcg_temp_new_i64(); 7471 7472 read_vec_element(s, rn, a->rn, 0, (esz + 1) | sign); 7473 fns[esz](rd, rn, a->imm); 7474 if (zext) { 7475 tcg_gen_ext_i64(rd, rd, esz); 7476 } 7477 write_fp_dreg(s, a->rd, rd); 7478 } 7479 return true; 7480 } 7481 TRANS(SQSHRN_si,do_scalar_shift_imm_narrow,a,sqshrn_fns,MO_SIGN,true)7482 TRANS(SQSHRN_si, do_scalar_shift_imm_narrow, a, sqshrn_fns, MO_SIGN, true) 7483 TRANS(SQRSHRN_si, do_scalar_shift_imm_narrow, a, sqrshrn_fns, MO_SIGN, true) 7484 TRANS(UQSHRN_si, do_scalar_shift_imm_narrow, a, uqshrn_fns, 0, false) 7485 TRANS(UQRSHRN_si, do_scalar_shift_imm_narrow, a, uqrshrn_fns, 0, false) 7486 TRANS(SQSHRUN_si, do_scalar_shift_imm_narrow, a, sqshrun_fns, MO_SIGN, false) 7487 TRANS(SQRSHRUN_si, do_scalar_shift_imm_narrow, a, sqrshrun_fns, MO_SIGN, false) 7488 7489 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 7490 * Note that it is the caller's responsibility to ensure that the 7491 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 7492 * mandated semantics for out of range shifts. 7493 */ 7494 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 7495 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 7496 { 7497 switch (shift_type) { 7498 case A64_SHIFT_TYPE_LSL: 7499 tcg_gen_shl_i64(dst, src, shift_amount); 7500 break; 7501 case A64_SHIFT_TYPE_LSR: 7502 tcg_gen_shr_i64(dst, src, shift_amount); 7503 break; 7504 case A64_SHIFT_TYPE_ASR: 7505 if (!sf) { 7506 tcg_gen_ext32s_i64(dst, src); 7507 } 7508 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 7509 break; 7510 case A64_SHIFT_TYPE_ROR: 7511 if (sf) { 7512 tcg_gen_rotr_i64(dst, src, shift_amount); 7513 } else { 7514 TCGv_i32 t0, t1; 7515 t0 = tcg_temp_new_i32(); 7516 t1 = tcg_temp_new_i32(); 7517 tcg_gen_extrl_i64_i32(t0, src); 7518 tcg_gen_extrl_i64_i32(t1, shift_amount); 7519 tcg_gen_rotr_i32(t0, t0, t1); 7520 tcg_gen_extu_i32_i64(dst, t0); 7521 } 7522 break; 7523 default: 7524 assert(FALSE); /* all shift types should be handled */ 7525 break; 7526 } 7527 7528 if (!sf) { /* zero extend final result */ 7529 tcg_gen_ext32u_i64(dst, dst); 7530 } 7531 } 7532 7533 /* Shift a TCGv src by immediate, put result in dst. 7534 * The shift amount must be in range (this should always be true as the 7535 * relevant instructions will UNDEF on bad shift immediates). 7536 */ shift_reg_imm(TCGv_i64 dst,TCGv_i64 src,int sf,enum a64_shift_type shift_type,unsigned int shift_i)7537 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 7538 enum a64_shift_type shift_type, unsigned int shift_i) 7539 { 7540 assert(shift_i < (sf ? 64 : 32)); 7541 7542 if (shift_i == 0) { 7543 tcg_gen_mov_i64(dst, src); 7544 } else { 7545 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 7546 } 7547 } 7548 7549 /* Logical (shifted register) 7550 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 7551 * +----+-----+-----------+-------+---+------+--------+------+------+ 7552 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 7553 * +----+-----+-----------+-------+---+------+--------+------+------+ 7554 */ disas_logic_reg(DisasContext * s,uint32_t insn)7555 static void disas_logic_reg(DisasContext *s, uint32_t insn) 7556 { 7557 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 7558 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 7559 7560 sf = extract32(insn, 31, 1); 7561 opc = extract32(insn, 29, 2); 7562 shift_type = extract32(insn, 22, 2); 7563 invert = extract32(insn, 21, 1); 7564 rm = extract32(insn, 16, 5); 7565 shift_amount = extract32(insn, 10, 6); 7566 rn = extract32(insn, 5, 5); 7567 rd = extract32(insn, 0, 5); 7568 7569 if (!sf && (shift_amount & (1 << 5))) { 7570 unallocated_encoding(s); 7571 return; 7572 } 7573 7574 tcg_rd = cpu_reg(s, rd); 7575 7576 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 7577 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 7578 * register-register MOV and MVN, so it is worth special casing. 7579 */ 7580 tcg_rm = cpu_reg(s, rm); 7581 if (invert) { 7582 tcg_gen_not_i64(tcg_rd, tcg_rm); 7583 if (!sf) { 7584 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7585 } 7586 } else { 7587 if (sf) { 7588 tcg_gen_mov_i64(tcg_rd, tcg_rm); 7589 } else { 7590 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 7591 } 7592 } 7593 return; 7594 } 7595 7596 tcg_rm = read_cpu_reg(s, rm, sf); 7597 7598 if (shift_amount) { 7599 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 7600 } 7601 7602 tcg_rn = cpu_reg(s, rn); 7603 7604 switch (opc | (invert << 2)) { 7605 case 0: /* AND */ 7606 case 3: /* ANDS */ 7607 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 7608 break; 7609 case 1: /* ORR */ 7610 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 7611 break; 7612 case 2: /* EOR */ 7613 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 7614 break; 7615 case 4: /* BIC */ 7616 case 7: /* BICS */ 7617 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 7618 break; 7619 case 5: /* ORN */ 7620 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 7621 break; 7622 case 6: /* EON */ 7623 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 7624 break; 7625 default: 7626 assert(FALSE); 7627 break; 7628 } 7629 7630 if (!sf) { 7631 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7632 } 7633 7634 if (opc == 3) { 7635 gen_logic_CC(sf, tcg_rd); 7636 } 7637 } 7638 7639 /* 7640 * Add/subtract (extended register) 7641 * 7642 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 7643 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 7644 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 7645 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 7646 * 7647 * sf: 0 -> 32bit, 1 -> 64bit 7648 * op: 0 -> add , 1 -> sub 7649 * S: 1 -> set flags 7650 * opt: 00 7651 * option: extension type (see DecodeRegExtend) 7652 * imm3: optional shift to Rm 7653 * 7654 * Rd = Rn + LSL(extend(Rm), amount) 7655 */ disas_add_sub_ext_reg(DisasContext * s,uint32_t insn)7656 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 7657 { 7658 int rd = extract32(insn, 0, 5); 7659 int rn = extract32(insn, 5, 5); 7660 int imm3 = extract32(insn, 10, 3); 7661 int option = extract32(insn, 13, 3); 7662 int rm = extract32(insn, 16, 5); 7663 int opt = extract32(insn, 22, 2); 7664 bool setflags = extract32(insn, 29, 1); 7665 bool sub_op = extract32(insn, 30, 1); 7666 bool sf = extract32(insn, 31, 1); 7667 7668 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 7669 TCGv_i64 tcg_rd; 7670 TCGv_i64 tcg_result; 7671 7672 if (imm3 > 4 || opt != 0) { 7673 unallocated_encoding(s); 7674 return; 7675 } 7676 7677 /* non-flag setting ops may use SP */ 7678 if (!setflags) { 7679 tcg_rd = cpu_reg_sp(s, rd); 7680 } else { 7681 tcg_rd = cpu_reg(s, rd); 7682 } 7683 tcg_rn = read_cpu_reg_sp(s, rn, sf); 7684 7685 tcg_rm = read_cpu_reg(s, rm, sf); 7686 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 7687 7688 tcg_result = tcg_temp_new_i64(); 7689 7690 if (!setflags) { 7691 if (sub_op) { 7692 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 7693 } else { 7694 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 7695 } 7696 } else { 7697 if (sub_op) { 7698 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 7699 } else { 7700 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 7701 } 7702 } 7703 7704 if (sf) { 7705 tcg_gen_mov_i64(tcg_rd, tcg_result); 7706 } else { 7707 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 7708 } 7709 } 7710 7711 /* 7712 * Add/subtract (shifted register) 7713 * 7714 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 7715 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 7716 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 7717 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 7718 * 7719 * sf: 0 -> 32bit, 1 -> 64bit 7720 * op: 0 -> add , 1 -> sub 7721 * S: 1 -> set flags 7722 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 7723 * imm6: Shift amount to apply to Rm before the add/sub 7724 */ disas_add_sub_reg(DisasContext * s,uint32_t insn)7725 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 7726 { 7727 int rd = extract32(insn, 0, 5); 7728 int rn = extract32(insn, 5, 5); 7729 int imm6 = extract32(insn, 10, 6); 7730 int rm = extract32(insn, 16, 5); 7731 int shift_type = extract32(insn, 22, 2); 7732 bool setflags = extract32(insn, 29, 1); 7733 bool sub_op = extract32(insn, 30, 1); 7734 bool sf = extract32(insn, 31, 1); 7735 7736 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7737 TCGv_i64 tcg_rn, tcg_rm; 7738 TCGv_i64 tcg_result; 7739 7740 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 7741 unallocated_encoding(s); 7742 return; 7743 } 7744 7745 tcg_rn = read_cpu_reg(s, rn, sf); 7746 tcg_rm = read_cpu_reg(s, rm, sf); 7747 7748 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 7749 7750 tcg_result = tcg_temp_new_i64(); 7751 7752 if (!setflags) { 7753 if (sub_op) { 7754 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 7755 } else { 7756 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 7757 } 7758 } else { 7759 if (sub_op) { 7760 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 7761 } else { 7762 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 7763 } 7764 } 7765 7766 if (sf) { 7767 tcg_gen_mov_i64(tcg_rd, tcg_result); 7768 } else { 7769 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 7770 } 7771 } 7772 7773 /* Data-processing (3 source) 7774 * 7775 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 7776 * +--+------+-----------+------+------+----+------+------+------+ 7777 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 7778 * +--+------+-----------+------+------+----+------+------+------+ 7779 */ disas_data_proc_3src(DisasContext * s,uint32_t insn)7780 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 7781 { 7782 int rd = extract32(insn, 0, 5); 7783 int rn = extract32(insn, 5, 5); 7784 int ra = extract32(insn, 10, 5); 7785 int rm = extract32(insn, 16, 5); 7786 int op_id = (extract32(insn, 29, 3) << 4) | 7787 (extract32(insn, 21, 3) << 1) | 7788 extract32(insn, 15, 1); 7789 bool sf = extract32(insn, 31, 1); 7790 bool is_sub = extract32(op_id, 0, 1); 7791 bool is_high = extract32(op_id, 2, 1); 7792 bool is_signed = false; 7793 TCGv_i64 tcg_op1; 7794 TCGv_i64 tcg_op2; 7795 TCGv_i64 tcg_tmp; 7796 7797 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 7798 switch (op_id) { 7799 case 0x42: /* SMADDL */ 7800 case 0x43: /* SMSUBL */ 7801 case 0x44: /* SMULH */ 7802 is_signed = true; 7803 break; 7804 case 0x0: /* MADD (32bit) */ 7805 case 0x1: /* MSUB (32bit) */ 7806 case 0x40: /* MADD (64bit) */ 7807 case 0x41: /* MSUB (64bit) */ 7808 case 0x4a: /* UMADDL */ 7809 case 0x4b: /* UMSUBL */ 7810 case 0x4c: /* UMULH */ 7811 break; 7812 default: 7813 unallocated_encoding(s); 7814 return; 7815 } 7816 7817 if (is_high) { 7818 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 7819 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7820 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7821 TCGv_i64 tcg_rm = cpu_reg(s, rm); 7822 7823 if (is_signed) { 7824 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 7825 } else { 7826 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 7827 } 7828 return; 7829 } 7830 7831 tcg_op1 = tcg_temp_new_i64(); 7832 tcg_op2 = tcg_temp_new_i64(); 7833 tcg_tmp = tcg_temp_new_i64(); 7834 7835 if (op_id < 0x42) { 7836 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 7837 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 7838 } else { 7839 if (is_signed) { 7840 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 7841 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 7842 } else { 7843 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 7844 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 7845 } 7846 } 7847 7848 if (ra == 31 && !is_sub) { 7849 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 7850 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 7851 } else { 7852 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 7853 if (is_sub) { 7854 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 7855 } else { 7856 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 7857 } 7858 } 7859 7860 if (!sf) { 7861 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 7862 } 7863 } 7864 7865 /* Add/subtract (with carry) 7866 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 7867 * +--+--+--+------------------------+------+-------------+------+-----+ 7868 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 7869 * +--+--+--+------------------------+------+-------------+------+-----+ 7870 */ 7871 disas_adc_sbc(DisasContext * s,uint32_t insn)7872 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 7873 { 7874 unsigned int sf, op, setflags, rm, rn, rd; 7875 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 7876 7877 sf = extract32(insn, 31, 1); 7878 op = extract32(insn, 30, 1); 7879 setflags = extract32(insn, 29, 1); 7880 rm = extract32(insn, 16, 5); 7881 rn = extract32(insn, 5, 5); 7882 rd = extract32(insn, 0, 5); 7883 7884 tcg_rd = cpu_reg(s, rd); 7885 tcg_rn = cpu_reg(s, rn); 7886 7887 if (op) { 7888 tcg_y = tcg_temp_new_i64(); 7889 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 7890 } else { 7891 tcg_y = cpu_reg(s, rm); 7892 } 7893 7894 if (setflags) { 7895 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 7896 } else { 7897 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 7898 } 7899 } 7900 7901 /* 7902 * Rotate right into flags 7903 * 31 30 29 21 15 10 5 4 0 7904 * +--+--+--+-----------------+--------+-----------+------+--+------+ 7905 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 7906 * +--+--+--+-----------------+--------+-----------+------+--+------+ 7907 */ disas_rotate_right_into_flags(DisasContext * s,uint32_t insn)7908 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 7909 { 7910 int mask = extract32(insn, 0, 4); 7911 int o2 = extract32(insn, 4, 1); 7912 int rn = extract32(insn, 5, 5); 7913 int imm6 = extract32(insn, 15, 6); 7914 int sf_op_s = extract32(insn, 29, 3); 7915 TCGv_i64 tcg_rn; 7916 TCGv_i32 nzcv; 7917 7918 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 7919 unallocated_encoding(s); 7920 return; 7921 } 7922 7923 tcg_rn = read_cpu_reg(s, rn, 1); 7924 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 7925 7926 nzcv = tcg_temp_new_i32(); 7927 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 7928 7929 if (mask & 8) { /* N */ 7930 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 7931 } 7932 if (mask & 4) { /* Z */ 7933 tcg_gen_not_i32(cpu_ZF, nzcv); 7934 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 7935 } 7936 if (mask & 2) { /* C */ 7937 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 7938 } 7939 if (mask & 1) { /* V */ 7940 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 7941 } 7942 } 7943 7944 /* 7945 * Evaluate into flags 7946 * 31 30 29 21 15 14 10 5 4 0 7947 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 7948 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 7949 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 7950 */ disas_evaluate_into_flags(DisasContext * s,uint32_t insn)7951 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 7952 { 7953 int o3_mask = extract32(insn, 0, 5); 7954 int rn = extract32(insn, 5, 5); 7955 int o2 = extract32(insn, 15, 6); 7956 int sz = extract32(insn, 14, 1); 7957 int sf_op_s = extract32(insn, 29, 3); 7958 TCGv_i32 tmp; 7959 int shift; 7960 7961 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 7962 !dc_isar_feature(aa64_condm_4, s)) { 7963 unallocated_encoding(s); 7964 return; 7965 } 7966 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 7967 7968 tmp = tcg_temp_new_i32(); 7969 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 7970 tcg_gen_shli_i32(cpu_NF, tmp, shift); 7971 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 7972 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 7973 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 7974 } 7975 7976 /* Conditional compare (immediate / register) 7977 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 7978 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 7979 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 7980 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 7981 * [1] y [0] [0] 7982 */ disas_cc(DisasContext * s,uint32_t insn)7983 static void disas_cc(DisasContext *s, uint32_t insn) 7984 { 7985 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 7986 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 7987 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 7988 DisasCompare c; 7989 7990 if (!extract32(insn, 29, 1)) { 7991 unallocated_encoding(s); 7992 return; 7993 } 7994 if (insn & (1 << 10 | 1 << 4)) { 7995 unallocated_encoding(s); 7996 return; 7997 } 7998 sf = extract32(insn, 31, 1); 7999 op = extract32(insn, 30, 1); 8000 is_imm = extract32(insn, 11, 1); 8001 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 8002 cond = extract32(insn, 12, 4); 8003 rn = extract32(insn, 5, 5); 8004 nzcv = extract32(insn, 0, 4); 8005 8006 /* Set T0 = !COND. */ 8007 tcg_t0 = tcg_temp_new_i32(); 8008 arm_test_cc(&c, cond); 8009 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 8010 8011 /* Load the arguments for the new comparison. */ 8012 if (is_imm) { 8013 tcg_y = tcg_temp_new_i64(); 8014 tcg_gen_movi_i64(tcg_y, y); 8015 } else { 8016 tcg_y = cpu_reg(s, y); 8017 } 8018 tcg_rn = cpu_reg(s, rn); 8019 8020 /* Set the flags for the new comparison. */ 8021 tcg_tmp = tcg_temp_new_i64(); 8022 if (op) { 8023 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 8024 } else { 8025 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 8026 } 8027 8028 /* If COND was false, force the flags to #nzcv. Compute two masks 8029 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 8030 * For tcg hosts that support ANDC, we can make do with just T1. 8031 * In either case, allow the tcg optimizer to delete any unused mask. 8032 */ 8033 tcg_t1 = tcg_temp_new_i32(); 8034 tcg_t2 = tcg_temp_new_i32(); 8035 tcg_gen_neg_i32(tcg_t1, tcg_t0); 8036 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 8037 8038 if (nzcv & 8) { /* N */ 8039 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 8040 } else { 8041 if (TCG_TARGET_HAS_andc_i32) { 8042 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 8043 } else { 8044 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 8045 } 8046 } 8047 if (nzcv & 4) { /* Z */ 8048 if (TCG_TARGET_HAS_andc_i32) { 8049 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 8050 } else { 8051 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 8052 } 8053 } else { 8054 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 8055 } 8056 if (nzcv & 2) { /* C */ 8057 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 8058 } else { 8059 if (TCG_TARGET_HAS_andc_i32) { 8060 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 8061 } else { 8062 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 8063 } 8064 } 8065 if (nzcv & 1) { /* V */ 8066 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 8067 } else { 8068 if (TCG_TARGET_HAS_andc_i32) { 8069 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 8070 } else { 8071 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 8072 } 8073 } 8074 } 8075 8076 /* Conditional select 8077 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 8078 * +----+----+---+-----------------+------+------+-----+------+------+ 8079 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 8080 * +----+----+---+-----------------+------+------+-----+------+------+ 8081 */ disas_cond_select(DisasContext * s,uint32_t insn)8082 static void disas_cond_select(DisasContext *s, uint32_t insn) 8083 { 8084 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 8085 TCGv_i64 tcg_rd, zero; 8086 DisasCompare64 c; 8087 8088 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 8089 /* S == 1 or op2<1> == 1 */ 8090 unallocated_encoding(s); 8091 return; 8092 } 8093 sf = extract32(insn, 31, 1); 8094 else_inv = extract32(insn, 30, 1); 8095 rm = extract32(insn, 16, 5); 8096 cond = extract32(insn, 12, 4); 8097 else_inc = extract32(insn, 10, 1); 8098 rn = extract32(insn, 5, 5); 8099 rd = extract32(insn, 0, 5); 8100 8101 tcg_rd = cpu_reg(s, rd); 8102 8103 a64_test_cc(&c, cond); 8104 zero = tcg_constant_i64(0); 8105 8106 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 8107 /* CSET & CSETM. */ 8108 if (else_inv) { 8109 tcg_gen_negsetcond_i64(tcg_invert_cond(c.cond), 8110 tcg_rd, c.value, zero); 8111 } else { 8112 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), 8113 tcg_rd, c.value, zero); 8114 } 8115 } else { 8116 TCGv_i64 t_true = cpu_reg(s, rn); 8117 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 8118 if (else_inv && else_inc) { 8119 tcg_gen_neg_i64(t_false, t_false); 8120 } else if (else_inv) { 8121 tcg_gen_not_i64(t_false, t_false); 8122 } else if (else_inc) { 8123 tcg_gen_addi_i64(t_false, t_false, 1); 8124 } 8125 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 8126 } 8127 8128 if (!sf) { 8129 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8130 } 8131 } 8132 handle_clz(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8133 static void handle_clz(DisasContext *s, unsigned int sf, 8134 unsigned int rn, unsigned int rd) 8135 { 8136 TCGv_i64 tcg_rd, tcg_rn; 8137 tcg_rd = cpu_reg(s, rd); 8138 tcg_rn = cpu_reg(s, rn); 8139 8140 if (sf) { 8141 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 8142 } else { 8143 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 8144 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 8145 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 8146 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 8147 } 8148 } 8149 handle_cls(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8150 static void handle_cls(DisasContext *s, unsigned int sf, 8151 unsigned int rn, unsigned int rd) 8152 { 8153 TCGv_i64 tcg_rd, tcg_rn; 8154 tcg_rd = cpu_reg(s, rd); 8155 tcg_rn = cpu_reg(s, rn); 8156 8157 if (sf) { 8158 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 8159 } else { 8160 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 8161 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 8162 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 8163 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 8164 } 8165 } 8166 handle_rbit(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8167 static void handle_rbit(DisasContext *s, unsigned int sf, 8168 unsigned int rn, unsigned int rd) 8169 { 8170 TCGv_i64 tcg_rd, tcg_rn; 8171 tcg_rd = cpu_reg(s, rd); 8172 tcg_rn = cpu_reg(s, rn); 8173 8174 if (sf) { 8175 gen_helper_rbit64(tcg_rd, tcg_rn); 8176 } else { 8177 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 8178 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 8179 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 8180 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 8181 } 8182 } 8183 8184 /* REV with sf==1, opcode==3 ("REV64") */ handle_rev64(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8185 static void handle_rev64(DisasContext *s, unsigned int sf, 8186 unsigned int rn, unsigned int rd) 8187 { 8188 if (!sf) { 8189 unallocated_encoding(s); 8190 return; 8191 } 8192 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 8193 } 8194 8195 /* REV with sf==0, opcode==2 8196 * REV32 (sf==1, opcode==2) 8197 */ handle_rev32(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8198 static void handle_rev32(DisasContext *s, unsigned int sf, 8199 unsigned int rn, unsigned int rd) 8200 { 8201 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8202 TCGv_i64 tcg_rn = cpu_reg(s, rn); 8203 8204 if (sf) { 8205 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 8206 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 8207 } else { 8208 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 8209 } 8210 } 8211 8212 /* REV16 (opcode==1) */ handle_rev16(DisasContext * s,unsigned int sf,unsigned int rn,unsigned int rd)8213 static void handle_rev16(DisasContext *s, unsigned int sf, 8214 unsigned int rn, unsigned int rd) 8215 { 8216 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8217 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 8218 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 8219 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 8220 8221 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 8222 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 8223 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 8224 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 8225 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 8226 } 8227 8228 /* Data-processing (1 source) 8229 * 31 30 29 28 21 20 16 15 10 9 5 4 0 8230 * +----+---+---+-----------------+---------+--------+------+------+ 8231 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 8232 * +----+---+---+-----------------+---------+--------+------+------+ 8233 */ disas_data_proc_1src(DisasContext * s,uint32_t insn)8234 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 8235 { 8236 unsigned int sf, opcode, opcode2, rn, rd; 8237 TCGv_i64 tcg_rd; 8238 8239 if (extract32(insn, 29, 1)) { 8240 unallocated_encoding(s); 8241 return; 8242 } 8243 8244 sf = extract32(insn, 31, 1); 8245 opcode = extract32(insn, 10, 6); 8246 opcode2 = extract32(insn, 16, 5); 8247 rn = extract32(insn, 5, 5); 8248 rd = extract32(insn, 0, 5); 8249 8250 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 8251 8252 switch (MAP(sf, opcode2, opcode)) { 8253 case MAP(0, 0x00, 0x00): /* RBIT */ 8254 case MAP(1, 0x00, 0x00): 8255 handle_rbit(s, sf, rn, rd); 8256 break; 8257 case MAP(0, 0x00, 0x01): /* REV16 */ 8258 case MAP(1, 0x00, 0x01): 8259 handle_rev16(s, sf, rn, rd); 8260 break; 8261 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 8262 case MAP(1, 0x00, 0x02): 8263 handle_rev32(s, sf, rn, rd); 8264 break; 8265 case MAP(1, 0x00, 0x03): /* REV64 */ 8266 handle_rev64(s, sf, rn, rd); 8267 break; 8268 case MAP(0, 0x00, 0x04): /* CLZ */ 8269 case MAP(1, 0x00, 0x04): 8270 handle_clz(s, sf, rn, rd); 8271 break; 8272 case MAP(0, 0x00, 0x05): /* CLS */ 8273 case MAP(1, 0x00, 0x05): 8274 handle_cls(s, sf, rn, rd); 8275 break; 8276 case MAP(1, 0x01, 0x00): /* PACIA */ 8277 if (s->pauth_active) { 8278 tcg_rd = cpu_reg(s, rd); 8279 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8280 } else if (!dc_isar_feature(aa64_pauth, s)) { 8281 goto do_unallocated; 8282 } 8283 break; 8284 case MAP(1, 0x01, 0x01): /* PACIB */ 8285 if (s->pauth_active) { 8286 tcg_rd = cpu_reg(s, rd); 8287 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8288 } else if (!dc_isar_feature(aa64_pauth, s)) { 8289 goto do_unallocated; 8290 } 8291 break; 8292 case MAP(1, 0x01, 0x02): /* PACDA */ 8293 if (s->pauth_active) { 8294 tcg_rd = cpu_reg(s, rd); 8295 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8296 } else if (!dc_isar_feature(aa64_pauth, s)) { 8297 goto do_unallocated; 8298 } 8299 break; 8300 case MAP(1, 0x01, 0x03): /* PACDB */ 8301 if (s->pauth_active) { 8302 tcg_rd = cpu_reg(s, rd); 8303 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8304 } else if (!dc_isar_feature(aa64_pauth, s)) { 8305 goto do_unallocated; 8306 } 8307 break; 8308 case MAP(1, 0x01, 0x04): /* AUTIA */ 8309 if (s->pauth_active) { 8310 tcg_rd = cpu_reg(s, rd); 8311 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8312 } else if (!dc_isar_feature(aa64_pauth, s)) { 8313 goto do_unallocated; 8314 } 8315 break; 8316 case MAP(1, 0x01, 0x05): /* AUTIB */ 8317 if (s->pauth_active) { 8318 tcg_rd = cpu_reg(s, rd); 8319 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8320 } else if (!dc_isar_feature(aa64_pauth, s)) { 8321 goto do_unallocated; 8322 } 8323 break; 8324 case MAP(1, 0x01, 0x06): /* AUTDA */ 8325 if (s->pauth_active) { 8326 tcg_rd = cpu_reg(s, rd); 8327 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8328 } else if (!dc_isar_feature(aa64_pauth, s)) { 8329 goto do_unallocated; 8330 } 8331 break; 8332 case MAP(1, 0x01, 0x07): /* AUTDB */ 8333 if (s->pauth_active) { 8334 tcg_rd = cpu_reg(s, rd); 8335 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, cpu_reg_sp(s, rn)); 8336 } else if (!dc_isar_feature(aa64_pauth, s)) { 8337 goto do_unallocated; 8338 } 8339 break; 8340 case MAP(1, 0x01, 0x08): /* PACIZA */ 8341 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8342 goto do_unallocated; 8343 } else if (s->pauth_active) { 8344 tcg_rd = cpu_reg(s, rd); 8345 gen_helper_pacia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8346 } 8347 break; 8348 case MAP(1, 0x01, 0x09): /* PACIZB */ 8349 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8350 goto do_unallocated; 8351 } else if (s->pauth_active) { 8352 tcg_rd = cpu_reg(s, rd); 8353 gen_helper_pacib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8354 } 8355 break; 8356 case MAP(1, 0x01, 0x0a): /* PACDZA */ 8357 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8358 goto do_unallocated; 8359 } else if (s->pauth_active) { 8360 tcg_rd = cpu_reg(s, rd); 8361 gen_helper_pacda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8362 } 8363 break; 8364 case MAP(1, 0x01, 0x0b): /* PACDZB */ 8365 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8366 goto do_unallocated; 8367 } else if (s->pauth_active) { 8368 tcg_rd = cpu_reg(s, rd); 8369 gen_helper_pacdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8370 } 8371 break; 8372 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 8373 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8374 goto do_unallocated; 8375 } else if (s->pauth_active) { 8376 tcg_rd = cpu_reg(s, rd); 8377 gen_helper_autia(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8378 } 8379 break; 8380 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 8381 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8382 goto do_unallocated; 8383 } else if (s->pauth_active) { 8384 tcg_rd = cpu_reg(s, rd); 8385 gen_helper_autib(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8386 } 8387 break; 8388 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 8389 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8390 goto do_unallocated; 8391 } else if (s->pauth_active) { 8392 tcg_rd = cpu_reg(s, rd); 8393 gen_helper_autda(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8394 } 8395 break; 8396 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 8397 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8398 goto do_unallocated; 8399 } else if (s->pauth_active) { 8400 tcg_rd = cpu_reg(s, rd); 8401 gen_helper_autdb(tcg_rd, tcg_env, tcg_rd, tcg_constant_i64(0)); 8402 } 8403 break; 8404 case MAP(1, 0x01, 0x10): /* XPACI */ 8405 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8406 goto do_unallocated; 8407 } else if (s->pauth_active) { 8408 tcg_rd = cpu_reg(s, rd); 8409 gen_helper_xpaci(tcg_rd, tcg_env, tcg_rd); 8410 } 8411 break; 8412 case MAP(1, 0x01, 0x11): /* XPACD */ 8413 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 8414 goto do_unallocated; 8415 } else if (s->pauth_active) { 8416 tcg_rd = cpu_reg(s, rd); 8417 gen_helper_xpacd(tcg_rd, tcg_env, tcg_rd); 8418 } 8419 break; 8420 default: 8421 do_unallocated: 8422 unallocated_encoding(s); 8423 break; 8424 } 8425 8426 #undef MAP 8427 } 8428 handle_div(DisasContext * s,bool is_signed,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)8429 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 8430 unsigned int rm, unsigned int rn, unsigned int rd) 8431 { 8432 TCGv_i64 tcg_n, tcg_m, tcg_rd; 8433 tcg_rd = cpu_reg(s, rd); 8434 8435 if (!sf && is_signed) { 8436 tcg_n = tcg_temp_new_i64(); 8437 tcg_m = tcg_temp_new_i64(); 8438 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 8439 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 8440 } else { 8441 tcg_n = read_cpu_reg(s, rn, sf); 8442 tcg_m = read_cpu_reg(s, rm, sf); 8443 } 8444 8445 if (is_signed) { 8446 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 8447 } else { 8448 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 8449 } 8450 8451 if (!sf) { /* zero extend final result */ 8452 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8453 } 8454 } 8455 8456 /* LSLV, LSRV, ASRV, RORV */ handle_shift_reg(DisasContext * s,enum a64_shift_type shift_type,unsigned int sf,unsigned int rm,unsigned int rn,unsigned int rd)8457 static void handle_shift_reg(DisasContext *s, 8458 enum a64_shift_type shift_type, unsigned int sf, 8459 unsigned int rm, unsigned int rn, unsigned int rd) 8460 { 8461 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 8462 TCGv_i64 tcg_rd = cpu_reg(s, rd); 8463 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 8464 8465 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 8466 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 8467 } 8468 8469 /* CRC32[BHWX], CRC32C[BHWX] */ handle_crc32(DisasContext * s,unsigned int sf,unsigned int sz,bool crc32c,unsigned int rm,unsigned int rn,unsigned int rd)8470 static void handle_crc32(DisasContext *s, 8471 unsigned int sf, unsigned int sz, bool crc32c, 8472 unsigned int rm, unsigned int rn, unsigned int rd) 8473 { 8474 TCGv_i64 tcg_acc, tcg_val; 8475 TCGv_i32 tcg_bytes; 8476 8477 if (!dc_isar_feature(aa64_crc32, s) 8478 || (sf == 1 && sz != 3) 8479 || (sf == 0 && sz == 3)) { 8480 unallocated_encoding(s); 8481 return; 8482 } 8483 8484 if (sz == 3) { 8485 tcg_val = cpu_reg(s, rm); 8486 } else { 8487 uint64_t mask; 8488 switch (sz) { 8489 case 0: 8490 mask = 0xFF; 8491 break; 8492 case 1: 8493 mask = 0xFFFF; 8494 break; 8495 case 2: 8496 mask = 0xFFFFFFFF; 8497 break; 8498 default: 8499 g_assert_not_reached(); 8500 } 8501 tcg_val = tcg_temp_new_i64(); 8502 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 8503 } 8504 8505 tcg_acc = cpu_reg(s, rn); 8506 tcg_bytes = tcg_constant_i32(1 << sz); 8507 8508 if (crc32c) { 8509 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 8510 } else { 8511 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 8512 } 8513 } 8514 8515 /* Data-processing (2 source) 8516 * 31 30 29 28 21 20 16 15 10 9 5 4 0 8517 * +----+---+---+-----------------+------+--------+------+------+ 8518 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 8519 * +----+---+---+-----------------+------+--------+------+------+ 8520 */ disas_data_proc_2src(DisasContext * s,uint32_t insn)8521 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 8522 { 8523 unsigned int sf, rm, opcode, rn, rd, setflag; 8524 sf = extract32(insn, 31, 1); 8525 setflag = extract32(insn, 29, 1); 8526 rm = extract32(insn, 16, 5); 8527 opcode = extract32(insn, 10, 6); 8528 rn = extract32(insn, 5, 5); 8529 rd = extract32(insn, 0, 5); 8530 8531 if (setflag && opcode != 0) { 8532 unallocated_encoding(s); 8533 return; 8534 } 8535 8536 switch (opcode) { 8537 case 0: /* SUBP(S) */ 8538 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 8539 goto do_unallocated; 8540 } else { 8541 TCGv_i64 tcg_n, tcg_m, tcg_d; 8542 8543 tcg_n = read_cpu_reg_sp(s, rn, true); 8544 tcg_m = read_cpu_reg_sp(s, rm, true); 8545 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 8546 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 8547 tcg_d = cpu_reg(s, rd); 8548 8549 if (setflag) { 8550 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 8551 } else { 8552 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 8553 } 8554 } 8555 break; 8556 case 2: /* UDIV */ 8557 handle_div(s, false, sf, rm, rn, rd); 8558 break; 8559 case 3: /* SDIV */ 8560 handle_div(s, true, sf, rm, rn, rd); 8561 break; 8562 case 4: /* IRG */ 8563 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 8564 goto do_unallocated; 8565 } 8566 if (s->ata[0]) { 8567 gen_helper_irg(cpu_reg_sp(s, rd), tcg_env, 8568 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 8569 } else { 8570 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 8571 cpu_reg_sp(s, rn)); 8572 } 8573 break; 8574 case 5: /* GMI */ 8575 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 8576 goto do_unallocated; 8577 } else { 8578 TCGv_i64 t = tcg_temp_new_i64(); 8579 8580 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 8581 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 8582 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 8583 } 8584 break; 8585 case 8: /* LSLV */ 8586 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 8587 break; 8588 case 9: /* LSRV */ 8589 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 8590 break; 8591 case 10: /* ASRV */ 8592 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 8593 break; 8594 case 11: /* RORV */ 8595 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 8596 break; 8597 case 12: /* PACGA */ 8598 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 8599 goto do_unallocated; 8600 } 8601 gen_helper_pacga(cpu_reg(s, rd), tcg_env, 8602 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 8603 break; 8604 case 16: 8605 case 17: 8606 case 18: 8607 case 19: 8608 case 20: 8609 case 21: 8610 case 22: 8611 case 23: /* CRC32 */ 8612 { 8613 int sz = extract32(opcode, 0, 2); 8614 bool crc32c = extract32(opcode, 2, 1); 8615 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 8616 break; 8617 } 8618 default: 8619 do_unallocated: 8620 unallocated_encoding(s); 8621 break; 8622 } 8623 } 8624 8625 /* 8626 * Data processing - register 8627 * 31 30 29 28 25 21 20 16 10 0 8628 * +--+---+--+---+-------+-----+-------+-------+---------+ 8629 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 8630 * +--+---+--+---+-------+-----+-------+-------+---------+ 8631 */ disas_data_proc_reg(DisasContext * s,uint32_t insn)8632 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 8633 { 8634 int op0 = extract32(insn, 30, 1); 8635 int op1 = extract32(insn, 28, 1); 8636 int op2 = extract32(insn, 21, 4); 8637 int op3 = extract32(insn, 10, 6); 8638 8639 if (!op1) { 8640 if (op2 & 8) { 8641 if (op2 & 1) { 8642 /* Add/sub (extended register) */ 8643 disas_add_sub_ext_reg(s, insn); 8644 } else { 8645 /* Add/sub (shifted register) */ 8646 disas_add_sub_reg(s, insn); 8647 } 8648 } else { 8649 /* Logical (shifted register) */ 8650 disas_logic_reg(s, insn); 8651 } 8652 return; 8653 } 8654 8655 switch (op2) { 8656 case 0x0: 8657 switch (op3) { 8658 case 0x00: /* Add/subtract (with carry) */ 8659 disas_adc_sbc(s, insn); 8660 break; 8661 8662 case 0x01: /* Rotate right into flags */ 8663 case 0x21: 8664 disas_rotate_right_into_flags(s, insn); 8665 break; 8666 8667 case 0x02: /* Evaluate into flags */ 8668 case 0x12: 8669 case 0x22: 8670 case 0x32: 8671 disas_evaluate_into_flags(s, insn); 8672 break; 8673 8674 default: 8675 goto do_unallocated; 8676 } 8677 break; 8678 8679 case 0x2: /* Conditional compare */ 8680 disas_cc(s, insn); /* both imm and reg forms */ 8681 break; 8682 8683 case 0x4: /* Conditional select */ 8684 disas_cond_select(s, insn); 8685 break; 8686 8687 case 0x6: /* Data-processing */ 8688 if (op0) { /* (1 source) */ 8689 disas_data_proc_1src(s, insn); 8690 } else { /* (2 source) */ 8691 disas_data_proc_2src(s, insn); 8692 } 8693 break; 8694 case 0x8 ... 0xf: /* (3 source) */ 8695 disas_data_proc_3src(s, insn); 8696 break; 8697 8698 default: 8699 do_unallocated: 8700 unallocated_encoding(s); 8701 break; 8702 } 8703 } 8704 handle_fp_compare(DisasContext * s,int size,unsigned int rn,unsigned int rm,bool cmp_with_zero,bool signal_all_nans)8705 static void handle_fp_compare(DisasContext *s, int size, 8706 unsigned int rn, unsigned int rm, 8707 bool cmp_with_zero, bool signal_all_nans) 8708 { 8709 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 8710 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8711 8712 if (size == MO_64) { 8713 TCGv_i64 tcg_vn, tcg_vm; 8714 8715 tcg_vn = read_fp_dreg(s, rn); 8716 if (cmp_with_zero) { 8717 tcg_vm = tcg_constant_i64(0); 8718 } else { 8719 tcg_vm = read_fp_dreg(s, rm); 8720 } 8721 if (signal_all_nans) { 8722 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8723 } else { 8724 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8725 } 8726 } else { 8727 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 8728 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 8729 8730 read_vec_element_i32(s, tcg_vn, rn, 0, size); 8731 if (cmp_with_zero) { 8732 tcg_gen_movi_i32(tcg_vm, 0); 8733 } else { 8734 read_vec_element_i32(s, tcg_vm, rm, 0, size); 8735 } 8736 8737 switch (size) { 8738 case MO_32: 8739 if (signal_all_nans) { 8740 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8741 } else { 8742 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8743 } 8744 break; 8745 case MO_16: 8746 if (signal_all_nans) { 8747 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8748 } else { 8749 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 8750 } 8751 break; 8752 default: 8753 g_assert_not_reached(); 8754 } 8755 } 8756 8757 gen_set_nzcv(tcg_flags); 8758 } 8759 8760 /* Floating point compare 8761 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 8762 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 8763 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 8764 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 8765 */ disas_fp_compare(DisasContext * s,uint32_t insn)8766 static void disas_fp_compare(DisasContext *s, uint32_t insn) 8767 { 8768 unsigned int mos, type, rm, op, rn, opc, op2r; 8769 int size; 8770 8771 mos = extract32(insn, 29, 3); 8772 type = extract32(insn, 22, 2); 8773 rm = extract32(insn, 16, 5); 8774 op = extract32(insn, 14, 2); 8775 rn = extract32(insn, 5, 5); 8776 opc = extract32(insn, 3, 2); 8777 op2r = extract32(insn, 0, 3); 8778 8779 if (mos || op || op2r) { 8780 unallocated_encoding(s); 8781 return; 8782 } 8783 8784 switch (type) { 8785 case 0: 8786 size = MO_32; 8787 break; 8788 case 1: 8789 size = MO_64; 8790 break; 8791 case 3: 8792 size = MO_16; 8793 if (dc_isar_feature(aa64_fp16, s)) { 8794 break; 8795 } 8796 /* fallthru */ 8797 default: 8798 unallocated_encoding(s); 8799 return; 8800 } 8801 8802 if (!fp_access_check(s)) { 8803 return; 8804 } 8805 8806 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 8807 } 8808 8809 /* Floating point conditional compare 8810 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 8811 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 8812 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 8813 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 8814 */ disas_fp_ccomp(DisasContext * s,uint32_t insn)8815 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 8816 { 8817 unsigned int mos, type, rm, cond, rn, op, nzcv; 8818 TCGLabel *label_continue = NULL; 8819 int size; 8820 8821 mos = extract32(insn, 29, 3); 8822 type = extract32(insn, 22, 2); 8823 rm = extract32(insn, 16, 5); 8824 cond = extract32(insn, 12, 4); 8825 rn = extract32(insn, 5, 5); 8826 op = extract32(insn, 4, 1); 8827 nzcv = extract32(insn, 0, 4); 8828 8829 if (mos) { 8830 unallocated_encoding(s); 8831 return; 8832 } 8833 8834 switch (type) { 8835 case 0: 8836 size = MO_32; 8837 break; 8838 case 1: 8839 size = MO_64; 8840 break; 8841 case 3: 8842 size = MO_16; 8843 if (dc_isar_feature(aa64_fp16, s)) { 8844 break; 8845 } 8846 /* fallthru */ 8847 default: 8848 unallocated_encoding(s); 8849 return; 8850 } 8851 8852 if (!fp_access_check(s)) { 8853 return; 8854 } 8855 8856 if (cond < 0x0e) { /* not always */ 8857 TCGLabel *label_match = gen_new_label(); 8858 label_continue = gen_new_label(); 8859 arm_gen_test_cc(cond, label_match); 8860 /* nomatch: */ 8861 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 8862 tcg_gen_br(label_continue); 8863 gen_set_label(label_match); 8864 } 8865 8866 handle_fp_compare(s, size, rn, rm, false, op); 8867 8868 if (cond < 0x0e) { 8869 gen_set_label(label_continue); 8870 } 8871 } 8872 8873 /* Floating-point data-processing (1 source) - half precision */ handle_fp_1src_half(DisasContext * s,int opcode,int rd,int rn)8874 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 8875 { 8876 TCGv_ptr fpst = NULL; 8877 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 8878 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8879 8880 switch (opcode) { 8881 case 0x0: /* FMOV */ 8882 tcg_gen_mov_i32(tcg_res, tcg_op); 8883 break; 8884 case 0x1: /* FABS */ 8885 gen_vfp_absh(tcg_res, tcg_op); 8886 break; 8887 case 0x2: /* FNEG */ 8888 gen_vfp_negh(tcg_res, tcg_op); 8889 break; 8890 case 0x3: /* FSQRT */ 8891 fpst = fpstatus_ptr(FPST_FPCR_F16); 8892 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 8893 break; 8894 case 0x8: /* FRINTN */ 8895 case 0x9: /* FRINTP */ 8896 case 0xa: /* FRINTM */ 8897 case 0xb: /* FRINTZ */ 8898 case 0xc: /* FRINTA */ 8899 { 8900 TCGv_i32 tcg_rmode; 8901 8902 fpst = fpstatus_ptr(FPST_FPCR_F16); 8903 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 8904 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 8905 gen_restore_rmode(tcg_rmode, fpst); 8906 break; 8907 } 8908 case 0xe: /* FRINTX */ 8909 fpst = fpstatus_ptr(FPST_FPCR_F16); 8910 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 8911 break; 8912 case 0xf: /* FRINTI */ 8913 fpst = fpstatus_ptr(FPST_FPCR_F16); 8914 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 8915 break; 8916 default: 8917 g_assert_not_reached(); 8918 } 8919 8920 write_fp_sreg(s, rd, tcg_res); 8921 } 8922 8923 /* Floating-point data-processing (1 source) - single precision */ handle_fp_1src_single(DisasContext * s,int opcode,int rd,int rn)8924 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 8925 { 8926 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 8927 TCGv_i32 tcg_op, tcg_res; 8928 TCGv_ptr fpst; 8929 int rmode = -1; 8930 8931 tcg_op = read_fp_sreg(s, rn); 8932 tcg_res = tcg_temp_new_i32(); 8933 8934 switch (opcode) { 8935 case 0x0: /* FMOV */ 8936 tcg_gen_mov_i32(tcg_res, tcg_op); 8937 goto done; 8938 case 0x1: /* FABS */ 8939 gen_vfp_abss(tcg_res, tcg_op); 8940 goto done; 8941 case 0x2: /* FNEG */ 8942 gen_vfp_negs(tcg_res, tcg_op); 8943 goto done; 8944 case 0x3: /* FSQRT */ 8945 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 8946 goto done; 8947 case 0x6: /* BFCVT */ 8948 gen_fpst = gen_helper_bfcvt; 8949 break; 8950 case 0x8: /* FRINTN */ 8951 case 0x9: /* FRINTP */ 8952 case 0xa: /* FRINTM */ 8953 case 0xb: /* FRINTZ */ 8954 case 0xc: /* FRINTA */ 8955 rmode = opcode & 7; 8956 gen_fpst = gen_helper_rints; 8957 break; 8958 case 0xe: /* FRINTX */ 8959 gen_fpst = gen_helper_rints_exact; 8960 break; 8961 case 0xf: /* FRINTI */ 8962 gen_fpst = gen_helper_rints; 8963 break; 8964 case 0x10: /* FRINT32Z */ 8965 rmode = FPROUNDING_ZERO; 8966 gen_fpst = gen_helper_frint32_s; 8967 break; 8968 case 0x11: /* FRINT32X */ 8969 gen_fpst = gen_helper_frint32_s; 8970 break; 8971 case 0x12: /* FRINT64Z */ 8972 rmode = FPROUNDING_ZERO; 8973 gen_fpst = gen_helper_frint64_s; 8974 break; 8975 case 0x13: /* FRINT64X */ 8976 gen_fpst = gen_helper_frint64_s; 8977 break; 8978 default: 8979 g_assert_not_reached(); 8980 } 8981 8982 fpst = fpstatus_ptr(FPST_FPCR); 8983 if (rmode >= 0) { 8984 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 8985 gen_fpst(tcg_res, tcg_op, fpst); 8986 gen_restore_rmode(tcg_rmode, fpst); 8987 } else { 8988 gen_fpst(tcg_res, tcg_op, fpst); 8989 } 8990 8991 done: 8992 write_fp_sreg(s, rd, tcg_res); 8993 } 8994 8995 /* Floating-point data-processing (1 source) - double precision */ handle_fp_1src_double(DisasContext * s,int opcode,int rd,int rn)8996 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 8997 { 8998 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 8999 TCGv_i64 tcg_op, tcg_res; 9000 TCGv_ptr fpst; 9001 int rmode = -1; 9002 9003 switch (opcode) { 9004 case 0x0: /* FMOV */ 9005 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 9006 return; 9007 } 9008 9009 tcg_op = read_fp_dreg(s, rn); 9010 tcg_res = tcg_temp_new_i64(); 9011 9012 switch (opcode) { 9013 case 0x1: /* FABS */ 9014 gen_vfp_absd(tcg_res, tcg_op); 9015 goto done; 9016 case 0x2: /* FNEG */ 9017 gen_vfp_negd(tcg_res, tcg_op); 9018 goto done; 9019 case 0x3: /* FSQRT */ 9020 gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); 9021 goto done; 9022 case 0x8: /* FRINTN */ 9023 case 0x9: /* FRINTP */ 9024 case 0xa: /* FRINTM */ 9025 case 0xb: /* FRINTZ */ 9026 case 0xc: /* FRINTA */ 9027 rmode = opcode & 7; 9028 gen_fpst = gen_helper_rintd; 9029 break; 9030 case 0xe: /* FRINTX */ 9031 gen_fpst = gen_helper_rintd_exact; 9032 break; 9033 case 0xf: /* FRINTI */ 9034 gen_fpst = gen_helper_rintd; 9035 break; 9036 case 0x10: /* FRINT32Z */ 9037 rmode = FPROUNDING_ZERO; 9038 gen_fpst = gen_helper_frint32_d; 9039 break; 9040 case 0x11: /* FRINT32X */ 9041 gen_fpst = gen_helper_frint32_d; 9042 break; 9043 case 0x12: /* FRINT64Z */ 9044 rmode = FPROUNDING_ZERO; 9045 gen_fpst = gen_helper_frint64_d; 9046 break; 9047 case 0x13: /* FRINT64X */ 9048 gen_fpst = gen_helper_frint64_d; 9049 break; 9050 default: 9051 g_assert_not_reached(); 9052 } 9053 9054 fpst = fpstatus_ptr(FPST_FPCR); 9055 if (rmode >= 0) { 9056 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 9057 gen_fpst(tcg_res, tcg_op, fpst); 9058 gen_restore_rmode(tcg_rmode, fpst); 9059 } else { 9060 gen_fpst(tcg_res, tcg_op, fpst); 9061 } 9062 9063 done: 9064 write_fp_dreg(s, rd, tcg_res); 9065 } 9066 handle_fp_fcvt(DisasContext * s,int opcode,int rd,int rn,int dtype,int ntype)9067 static void handle_fp_fcvt(DisasContext *s, int opcode, 9068 int rd, int rn, int dtype, int ntype) 9069 { 9070 switch (ntype) { 9071 case 0x0: 9072 { 9073 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 9074 if (dtype == 1) { 9075 /* Single to double */ 9076 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9077 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, tcg_env); 9078 write_fp_dreg(s, rd, tcg_rd); 9079 } else { 9080 /* Single to half */ 9081 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9082 TCGv_i32 ahp = get_ahp_flag(); 9083 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9084 9085 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 9086 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 9087 write_fp_sreg(s, rd, tcg_rd); 9088 } 9089 break; 9090 } 9091 case 0x1: 9092 { 9093 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9094 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9095 if (dtype == 0) { 9096 /* Double to single */ 9097 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, tcg_env); 9098 } else { 9099 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9100 TCGv_i32 ahp = get_ahp_flag(); 9101 /* Double to half */ 9102 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 9103 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 9104 } 9105 write_fp_sreg(s, rd, tcg_rd); 9106 break; 9107 } 9108 case 0x3: 9109 { 9110 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 9111 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 9112 TCGv_i32 tcg_ahp = get_ahp_flag(); 9113 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 9114 if (dtype == 0) { 9115 /* Half to single */ 9116 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 9117 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9118 write_fp_sreg(s, rd, tcg_rd); 9119 } else { 9120 /* Half to double */ 9121 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 9122 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 9123 write_fp_dreg(s, rd, tcg_rd); 9124 } 9125 break; 9126 } 9127 default: 9128 g_assert_not_reached(); 9129 } 9130 } 9131 9132 /* Floating point data-processing (1 source) 9133 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 9134 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 9135 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 9136 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 9137 */ disas_fp_1src(DisasContext * s,uint32_t insn)9138 static void disas_fp_1src(DisasContext *s, uint32_t insn) 9139 { 9140 int mos = extract32(insn, 29, 3); 9141 int type = extract32(insn, 22, 2); 9142 int opcode = extract32(insn, 15, 6); 9143 int rn = extract32(insn, 5, 5); 9144 int rd = extract32(insn, 0, 5); 9145 9146 if (mos) { 9147 goto do_unallocated; 9148 } 9149 9150 switch (opcode) { 9151 case 0x4: case 0x5: case 0x7: 9152 { 9153 /* FCVT between half, single and double precision */ 9154 int dtype = extract32(opcode, 0, 2); 9155 if (type == 2 || dtype == type) { 9156 goto do_unallocated; 9157 } 9158 if (!fp_access_check(s)) { 9159 return; 9160 } 9161 9162 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 9163 break; 9164 } 9165 9166 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 9167 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 9168 goto do_unallocated; 9169 } 9170 /* fall through */ 9171 case 0x0 ... 0x3: 9172 case 0x8 ... 0xc: 9173 case 0xe ... 0xf: 9174 /* 32-to-32 and 64-to-64 ops */ 9175 switch (type) { 9176 case 0: 9177 if (!fp_access_check(s)) { 9178 return; 9179 } 9180 handle_fp_1src_single(s, opcode, rd, rn); 9181 break; 9182 case 1: 9183 if (!fp_access_check(s)) { 9184 return; 9185 } 9186 handle_fp_1src_double(s, opcode, rd, rn); 9187 break; 9188 case 3: 9189 if (!dc_isar_feature(aa64_fp16, s)) { 9190 goto do_unallocated; 9191 } 9192 9193 if (!fp_access_check(s)) { 9194 return; 9195 } 9196 handle_fp_1src_half(s, opcode, rd, rn); 9197 break; 9198 default: 9199 goto do_unallocated; 9200 } 9201 break; 9202 9203 case 0x6: 9204 switch (type) { 9205 case 1: /* BFCVT */ 9206 if (!dc_isar_feature(aa64_bf16, s)) { 9207 goto do_unallocated; 9208 } 9209 if (!fp_access_check(s)) { 9210 return; 9211 } 9212 handle_fp_1src_single(s, opcode, rd, rn); 9213 break; 9214 default: 9215 goto do_unallocated; 9216 } 9217 break; 9218 9219 default: 9220 do_unallocated: 9221 unallocated_encoding(s); 9222 break; 9223 } 9224 } 9225 9226 /* Handle floating point <=> fixed point conversions. Note that we can 9227 * also deal with fp <=> integer conversions as a special case (scale == 64) 9228 * OPTME: consider handling that special case specially or at least skipping 9229 * the call to scalbn in the helpers for zero shifts. 9230 */ handle_fpfpcvt(DisasContext * s,int rd,int rn,int opcode,bool itof,int rmode,int scale,int sf,int type)9231 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 9232 bool itof, int rmode, int scale, int sf, int type) 9233 { 9234 bool is_signed = !(opcode & 1); 9235 TCGv_ptr tcg_fpstatus; 9236 TCGv_i32 tcg_shift, tcg_single; 9237 TCGv_i64 tcg_double; 9238 9239 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 9240 9241 tcg_shift = tcg_constant_i32(64 - scale); 9242 9243 if (itof) { 9244 TCGv_i64 tcg_int = cpu_reg(s, rn); 9245 if (!sf) { 9246 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 9247 9248 if (is_signed) { 9249 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 9250 } else { 9251 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 9252 } 9253 9254 tcg_int = tcg_extend; 9255 } 9256 9257 switch (type) { 9258 case 1: /* float64 */ 9259 tcg_double = tcg_temp_new_i64(); 9260 if (is_signed) { 9261 gen_helper_vfp_sqtod(tcg_double, tcg_int, 9262 tcg_shift, tcg_fpstatus); 9263 } else { 9264 gen_helper_vfp_uqtod(tcg_double, tcg_int, 9265 tcg_shift, tcg_fpstatus); 9266 } 9267 write_fp_dreg(s, rd, tcg_double); 9268 break; 9269 9270 case 0: /* float32 */ 9271 tcg_single = tcg_temp_new_i32(); 9272 if (is_signed) { 9273 gen_helper_vfp_sqtos(tcg_single, tcg_int, 9274 tcg_shift, tcg_fpstatus); 9275 } else { 9276 gen_helper_vfp_uqtos(tcg_single, tcg_int, 9277 tcg_shift, tcg_fpstatus); 9278 } 9279 write_fp_sreg(s, rd, tcg_single); 9280 break; 9281 9282 case 3: /* float16 */ 9283 tcg_single = tcg_temp_new_i32(); 9284 if (is_signed) { 9285 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 9286 tcg_shift, tcg_fpstatus); 9287 } else { 9288 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 9289 tcg_shift, tcg_fpstatus); 9290 } 9291 write_fp_sreg(s, rd, tcg_single); 9292 break; 9293 9294 default: 9295 g_assert_not_reached(); 9296 } 9297 } else { 9298 TCGv_i64 tcg_int = cpu_reg(s, rd); 9299 TCGv_i32 tcg_rmode; 9300 9301 if (extract32(opcode, 2, 1)) { 9302 /* There are too many rounding modes to all fit into rmode, 9303 * so FCVTA[US] is a special case. 9304 */ 9305 rmode = FPROUNDING_TIEAWAY; 9306 } 9307 9308 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 9309 9310 switch (type) { 9311 case 1: /* float64 */ 9312 tcg_double = read_fp_dreg(s, rn); 9313 if (is_signed) { 9314 if (!sf) { 9315 gen_helper_vfp_tosld(tcg_int, tcg_double, 9316 tcg_shift, tcg_fpstatus); 9317 } else { 9318 gen_helper_vfp_tosqd(tcg_int, tcg_double, 9319 tcg_shift, tcg_fpstatus); 9320 } 9321 } else { 9322 if (!sf) { 9323 gen_helper_vfp_tould(tcg_int, tcg_double, 9324 tcg_shift, tcg_fpstatus); 9325 } else { 9326 gen_helper_vfp_touqd(tcg_int, tcg_double, 9327 tcg_shift, tcg_fpstatus); 9328 } 9329 } 9330 if (!sf) { 9331 tcg_gen_ext32u_i64(tcg_int, tcg_int); 9332 } 9333 break; 9334 9335 case 0: /* float32 */ 9336 tcg_single = read_fp_sreg(s, rn); 9337 if (sf) { 9338 if (is_signed) { 9339 gen_helper_vfp_tosqs(tcg_int, tcg_single, 9340 tcg_shift, tcg_fpstatus); 9341 } else { 9342 gen_helper_vfp_touqs(tcg_int, tcg_single, 9343 tcg_shift, tcg_fpstatus); 9344 } 9345 } else { 9346 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 9347 if (is_signed) { 9348 gen_helper_vfp_tosls(tcg_dest, tcg_single, 9349 tcg_shift, tcg_fpstatus); 9350 } else { 9351 gen_helper_vfp_touls(tcg_dest, tcg_single, 9352 tcg_shift, tcg_fpstatus); 9353 } 9354 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 9355 } 9356 break; 9357 9358 case 3: /* float16 */ 9359 tcg_single = read_fp_sreg(s, rn); 9360 if (sf) { 9361 if (is_signed) { 9362 gen_helper_vfp_tosqh(tcg_int, tcg_single, 9363 tcg_shift, tcg_fpstatus); 9364 } else { 9365 gen_helper_vfp_touqh(tcg_int, tcg_single, 9366 tcg_shift, tcg_fpstatus); 9367 } 9368 } else { 9369 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 9370 if (is_signed) { 9371 gen_helper_vfp_toslh(tcg_dest, tcg_single, 9372 tcg_shift, tcg_fpstatus); 9373 } else { 9374 gen_helper_vfp_toulh(tcg_dest, tcg_single, 9375 tcg_shift, tcg_fpstatus); 9376 } 9377 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 9378 } 9379 break; 9380 9381 default: 9382 g_assert_not_reached(); 9383 } 9384 9385 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9386 } 9387 } 9388 9389 /* Floating point <-> fixed point conversions 9390 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 9391 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 9392 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 9393 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 9394 */ disas_fp_fixed_conv(DisasContext * s,uint32_t insn)9395 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 9396 { 9397 int rd = extract32(insn, 0, 5); 9398 int rn = extract32(insn, 5, 5); 9399 int scale = extract32(insn, 10, 6); 9400 int opcode = extract32(insn, 16, 3); 9401 int rmode = extract32(insn, 19, 2); 9402 int type = extract32(insn, 22, 2); 9403 bool sbit = extract32(insn, 29, 1); 9404 bool sf = extract32(insn, 31, 1); 9405 bool itof; 9406 9407 if (sbit || (!sf && scale < 32)) { 9408 unallocated_encoding(s); 9409 return; 9410 } 9411 9412 switch (type) { 9413 case 0: /* float32 */ 9414 case 1: /* float64 */ 9415 break; 9416 case 3: /* float16 */ 9417 if (dc_isar_feature(aa64_fp16, s)) { 9418 break; 9419 } 9420 /* fallthru */ 9421 default: 9422 unallocated_encoding(s); 9423 return; 9424 } 9425 9426 switch ((rmode << 3) | opcode) { 9427 case 0x2: /* SCVTF */ 9428 case 0x3: /* UCVTF */ 9429 itof = true; 9430 break; 9431 case 0x18: /* FCVTZS */ 9432 case 0x19: /* FCVTZU */ 9433 itof = false; 9434 break; 9435 default: 9436 unallocated_encoding(s); 9437 return; 9438 } 9439 9440 if (!fp_access_check(s)) { 9441 return; 9442 } 9443 9444 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 9445 } 9446 handle_fmov(DisasContext * s,int rd,int rn,int type,bool itof)9447 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 9448 { 9449 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 9450 * without conversion. 9451 */ 9452 9453 if (itof) { 9454 TCGv_i64 tcg_rn = cpu_reg(s, rn); 9455 TCGv_i64 tmp; 9456 9457 switch (type) { 9458 case 0: 9459 /* 32 bit */ 9460 tmp = tcg_temp_new_i64(); 9461 tcg_gen_ext32u_i64(tmp, tcg_rn); 9462 write_fp_dreg(s, rd, tmp); 9463 break; 9464 case 1: 9465 /* 64 bit */ 9466 write_fp_dreg(s, rd, tcg_rn); 9467 break; 9468 case 2: 9469 /* 64 bit to top half. */ 9470 tcg_gen_st_i64(tcg_rn, tcg_env, fp_reg_hi_offset(s, rd)); 9471 clear_vec_high(s, true, rd); 9472 break; 9473 case 3: 9474 /* 16 bit */ 9475 tmp = tcg_temp_new_i64(); 9476 tcg_gen_ext16u_i64(tmp, tcg_rn); 9477 write_fp_dreg(s, rd, tmp); 9478 break; 9479 default: 9480 g_assert_not_reached(); 9481 } 9482 } else { 9483 TCGv_i64 tcg_rd = cpu_reg(s, rd); 9484 9485 switch (type) { 9486 case 0: 9487 /* 32 bit */ 9488 tcg_gen_ld32u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_32)); 9489 break; 9490 case 1: 9491 /* 64 bit */ 9492 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_64)); 9493 break; 9494 case 2: 9495 /* 64 bits from top half */ 9496 tcg_gen_ld_i64(tcg_rd, tcg_env, fp_reg_hi_offset(s, rn)); 9497 break; 9498 case 3: 9499 /* 16 bit */ 9500 tcg_gen_ld16u_i64(tcg_rd, tcg_env, fp_reg_offset(s, rn, MO_16)); 9501 break; 9502 default: 9503 g_assert_not_reached(); 9504 } 9505 } 9506 } 9507 handle_fjcvtzs(DisasContext * s,int rd,int rn)9508 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 9509 { 9510 TCGv_i64 t = read_fp_dreg(s, rn); 9511 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 9512 9513 gen_helper_fjcvtzs(t, t, fpstatus); 9514 9515 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 9516 tcg_gen_extrh_i64_i32(cpu_ZF, t); 9517 tcg_gen_movi_i32(cpu_CF, 0); 9518 tcg_gen_movi_i32(cpu_NF, 0); 9519 tcg_gen_movi_i32(cpu_VF, 0); 9520 } 9521 9522 /* Floating point <-> integer conversions 9523 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 9524 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 9525 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 9526 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 9527 */ disas_fp_int_conv(DisasContext * s,uint32_t insn)9528 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 9529 { 9530 int rd = extract32(insn, 0, 5); 9531 int rn = extract32(insn, 5, 5); 9532 int opcode = extract32(insn, 16, 3); 9533 int rmode = extract32(insn, 19, 2); 9534 int type = extract32(insn, 22, 2); 9535 bool sbit = extract32(insn, 29, 1); 9536 bool sf = extract32(insn, 31, 1); 9537 bool itof = false; 9538 9539 if (sbit) { 9540 goto do_unallocated; 9541 } 9542 9543 switch (opcode) { 9544 case 2: /* SCVTF */ 9545 case 3: /* UCVTF */ 9546 itof = true; 9547 /* fallthru */ 9548 case 4: /* FCVTAS */ 9549 case 5: /* FCVTAU */ 9550 if (rmode != 0) { 9551 goto do_unallocated; 9552 } 9553 /* fallthru */ 9554 case 0: /* FCVT[NPMZ]S */ 9555 case 1: /* FCVT[NPMZ]U */ 9556 switch (type) { 9557 case 0: /* float32 */ 9558 case 1: /* float64 */ 9559 break; 9560 case 3: /* float16 */ 9561 if (!dc_isar_feature(aa64_fp16, s)) { 9562 goto do_unallocated; 9563 } 9564 break; 9565 default: 9566 goto do_unallocated; 9567 } 9568 if (!fp_access_check(s)) { 9569 return; 9570 } 9571 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 9572 break; 9573 9574 default: 9575 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 9576 case 0b01100110: /* FMOV half <-> 32-bit int */ 9577 case 0b01100111: 9578 case 0b11100110: /* FMOV half <-> 64-bit int */ 9579 case 0b11100111: 9580 if (!dc_isar_feature(aa64_fp16, s)) { 9581 goto do_unallocated; 9582 } 9583 /* fallthru */ 9584 case 0b00000110: /* FMOV 32-bit */ 9585 case 0b00000111: 9586 case 0b10100110: /* FMOV 64-bit */ 9587 case 0b10100111: 9588 case 0b11001110: /* FMOV top half of 128-bit */ 9589 case 0b11001111: 9590 if (!fp_access_check(s)) { 9591 return; 9592 } 9593 itof = opcode & 1; 9594 handle_fmov(s, rd, rn, type, itof); 9595 break; 9596 9597 case 0b00111110: /* FJCVTZS */ 9598 if (!dc_isar_feature(aa64_jscvt, s)) { 9599 goto do_unallocated; 9600 } else if (fp_access_check(s)) { 9601 handle_fjcvtzs(s, rd, rn); 9602 } 9603 break; 9604 9605 default: 9606 do_unallocated: 9607 unallocated_encoding(s); 9608 return; 9609 } 9610 break; 9611 } 9612 } 9613 9614 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 9615 * 31 30 29 28 25 24 0 9616 * +---+---+---+---------+-----------------------------+ 9617 * | | 0 | | 1 1 1 1 | | 9618 * +---+---+---+---------+-----------------------------+ 9619 */ disas_data_proc_fp(DisasContext * s,uint32_t insn)9620 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 9621 { 9622 if (extract32(insn, 24, 1)) { 9623 unallocated_encoding(s); /* in decodetree */ 9624 } else if (extract32(insn, 21, 1) == 0) { 9625 /* Floating point to fixed point conversions */ 9626 disas_fp_fixed_conv(s, insn); 9627 } else { 9628 switch (extract32(insn, 10, 2)) { 9629 case 1: 9630 /* Floating point conditional compare */ 9631 disas_fp_ccomp(s, insn); 9632 break; 9633 case 2: 9634 /* Floating point data-processing (2 source) */ 9635 unallocated_encoding(s); /* in decodetree */ 9636 break; 9637 case 3: 9638 /* Floating point conditional select */ 9639 unallocated_encoding(s); /* in decodetree */ 9640 break; 9641 case 0: 9642 switch (ctz32(extract32(insn, 12, 4))) { 9643 case 0: /* [15:12] == xxx1 */ 9644 /* Floating point immediate */ 9645 unallocated_encoding(s); /* in decodetree */ 9646 break; 9647 case 1: /* [15:12] == xx10 */ 9648 /* Floating point compare */ 9649 disas_fp_compare(s, insn); 9650 break; 9651 case 2: /* [15:12] == x100 */ 9652 /* Floating point data-processing (1 source) */ 9653 disas_fp_1src(s, insn); 9654 break; 9655 case 3: /* [15:12] == 1000 */ 9656 unallocated_encoding(s); 9657 break; 9658 default: /* [15:12] == 0000 */ 9659 /* Floating point <-> integer conversions */ 9660 disas_fp_int_conv(s, insn); 9661 break; 9662 } 9663 break; 9664 } 9665 } 9666 } 9667 9668 /* Common vector code for handling integer to FP conversion */ handle_simd_intfp_conv(DisasContext * s,int rd,int rn,int elements,int is_signed,int fracbits,int size)9669 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 9670 int elements, int is_signed, 9671 int fracbits, int size) 9672 { 9673 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9674 TCGv_i32 tcg_shift = NULL; 9675 9676 MemOp mop = size | (is_signed ? MO_SIGN : 0); 9677 int pass; 9678 9679 if (fracbits || size == MO_64) { 9680 tcg_shift = tcg_constant_i32(fracbits); 9681 } 9682 9683 if (size == MO_64) { 9684 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 9685 TCGv_i64 tcg_double = tcg_temp_new_i64(); 9686 9687 for (pass = 0; pass < elements; pass++) { 9688 read_vec_element(s, tcg_int64, rn, pass, mop); 9689 9690 if (is_signed) { 9691 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 9692 tcg_shift, tcg_fpst); 9693 } else { 9694 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 9695 tcg_shift, tcg_fpst); 9696 } 9697 if (elements == 1) { 9698 write_fp_dreg(s, rd, tcg_double); 9699 } else { 9700 write_vec_element(s, tcg_double, rd, pass, MO_64); 9701 } 9702 } 9703 } else { 9704 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 9705 TCGv_i32 tcg_float = tcg_temp_new_i32(); 9706 9707 for (pass = 0; pass < elements; pass++) { 9708 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 9709 9710 switch (size) { 9711 case MO_32: 9712 if (fracbits) { 9713 if (is_signed) { 9714 gen_helper_vfp_sltos(tcg_float, tcg_int32, 9715 tcg_shift, tcg_fpst); 9716 } else { 9717 gen_helper_vfp_ultos(tcg_float, tcg_int32, 9718 tcg_shift, tcg_fpst); 9719 } 9720 } else { 9721 if (is_signed) { 9722 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 9723 } else { 9724 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 9725 } 9726 } 9727 break; 9728 case MO_16: 9729 if (fracbits) { 9730 if (is_signed) { 9731 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 9732 tcg_shift, tcg_fpst); 9733 } else { 9734 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 9735 tcg_shift, tcg_fpst); 9736 } 9737 } else { 9738 if (is_signed) { 9739 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 9740 } else { 9741 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 9742 } 9743 } 9744 break; 9745 default: 9746 g_assert_not_reached(); 9747 } 9748 9749 if (elements == 1) { 9750 write_fp_sreg(s, rd, tcg_float); 9751 } else { 9752 write_vec_element_i32(s, tcg_float, rd, pass, size); 9753 } 9754 } 9755 } 9756 9757 clear_vec_high(s, elements << size == 16, rd); 9758 } 9759 9760 /* UCVTF/SCVTF - Integer to FP conversion */ handle_simd_shift_intfp_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int opcode,int rn,int rd)9761 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 9762 bool is_q, bool is_u, 9763 int immh, int immb, int opcode, 9764 int rn, int rd) 9765 { 9766 int size, elements, fracbits; 9767 int immhb = immh << 3 | immb; 9768 9769 if (immh & 8) { 9770 size = MO_64; 9771 if (!is_scalar && !is_q) { 9772 unallocated_encoding(s); 9773 return; 9774 } 9775 } else if (immh & 4) { 9776 size = MO_32; 9777 } else if (immh & 2) { 9778 size = MO_16; 9779 if (!dc_isar_feature(aa64_fp16, s)) { 9780 unallocated_encoding(s); 9781 return; 9782 } 9783 } else { 9784 /* immh == 0 would be a failure of the decode logic */ 9785 g_assert(immh == 1); 9786 unallocated_encoding(s); 9787 return; 9788 } 9789 9790 if (is_scalar) { 9791 elements = 1; 9792 } else { 9793 elements = (8 << is_q) >> size; 9794 } 9795 fracbits = (16 << size) - immhb; 9796 9797 if (!fp_access_check(s)) { 9798 return; 9799 } 9800 9801 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 9802 } 9803 9804 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ handle_simd_shift_fpint_conv(DisasContext * s,bool is_scalar,bool is_q,bool is_u,int immh,int immb,int rn,int rd)9805 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 9806 bool is_q, bool is_u, 9807 int immh, int immb, int rn, int rd) 9808 { 9809 int immhb = immh << 3 | immb; 9810 int pass, size, fracbits; 9811 TCGv_ptr tcg_fpstatus; 9812 TCGv_i32 tcg_rmode, tcg_shift; 9813 9814 if (immh & 0x8) { 9815 size = MO_64; 9816 if (!is_scalar && !is_q) { 9817 unallocated_encoding(s); 9818 return; 9819 } 9820 } else if (immh & 0x4) { 9821 size = MO_32; 9822 } else if (immh & 0x2) { 9823 size = MO_16; 9824 if (!dc_isar_feature(aa64_fp16, s)) { 9825 unallocated_encoding(s); 9826 return; 9827 } 9828 } else { 9829 /* Should have split out AdvSIMD modified immediate earlier. */ 9830 assert(immh == 1); 9831 unallocated_encoding(s); 9832 return; 9833 } 9834 9835 if (!fp_access_check(s)) { 9836 return; 9837 } 9838 9839 assert(!(is_scalar && is_q)); 9840 9841 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9842 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 9843 fracbits = (16 << size) - immhb; 9844 tcg_shift = tcg_constant_i32(fracbits); 9845 9846 if (size == MO_64) { 9847 int maxpass = is_scalar ? 1 : 2; 9848 9849 for (pass = 0; pass < maxpass; pass++) { 9850 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9851 9852 read_vec_element(s, tcg_op, rn, pass, MO_64); 9853 if (is_u) { 9854 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9855 } else { 9856 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9857 } 9858 write_vec_element(s, tcg_op, rd, pass, MO_64); 9859 } 9860 clear_vec_high(s, is_q, rd); 9861 } else { 9862 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 9863 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 9864 9865 switch (size) { 9866 case MO_16: 9867 if (is_u) { 9868 fn = gen_helper_vfp_touhh; 9869 } else { 9870 fn = gen_helper_vfp_toshh; 9871 } 9872 break; 9873 case MO_32: 9874 if (is_u) { 9875 fn = gen_helper_vfp_touls; 9876 } else { 9877 fn = gen_helper_vfp_tosls; 9878 } 9879 break; 9880 default: 9881 g_assert_not_reached(); 9882 } 9883 9884 for (pass = 0; pass < maxpass; pass++) { 9885 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9886 9887 read_vec_element_i32(s, tcg_op, rn, pass, size); 9888 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9889 if (is_scalar) { 9890 if (size == MO_16 && !is_u) { 9891 tcg_gen_ext16u_i32(tcg_op, tcg_op); 9892 } 9893 write_fp_sreg(s, rd, tcg_op); 9894 } else { 9895 write_vec_element_i32(s, tcg_op, rd, pass, size); 9896 } 9897 } 9898 if (!is_scalar) { 9899 clear_vec_high(s, is_q, rd); 9900 } 9901 } 9902 9903 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 9904 } 9905 9906 /* AdvSIMD scalar shift by immediate 9907 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 9908 * +-----+---+-------------+------+------+--------+---+------+------+ 9909 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 9910 * +-----+---+-------------+------+------+--------+---+------+------+ 9911 * 9912 * This is the scalar version so it works on a fixed sized registers 9913 */ disas_simd_scalar_shift_imm(DisasContext * s,uint32_t insn)9914 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 9915 { 9916 int rd = extract32(insn, 0, 5); 9917 int rn = extract32(insn, 5, 5); 9918 int opcode = extract32(insn, 11, 5); 9919 int immb = extract32(insn, 16, 3); 9920 int immh = extract32(insn, 19, 4); 9921 bool is_u = extract32(insn, 29, 1); 9922 9923 if (immh == 0) { 9924 unallocated_encoding(s); 9925 return; 9926 } 9927 9928 switch (opcode) { 9929 case 0x1c: /* SCVTF, UCVTF */ 9930 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 9931 opcode, rn, rd); 9932 break; 9933 case 0x1f: /* FCVTZS, FCVTZU */ 9934 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 9935 break; 9936 default: 9937 case 0x00: /* SSHR / USHR */ 9938 case 0x02: /* SSRA / USRA */ 9939 case 0x04: /* SRSHR / URSHR */ 9940 case 0x06: /* SRSRA / URSRA */ 9941 case 0x08: /* SRI */ 9942 case 0x0a: /* SHL / SLI */ 9943 case 0x0c: /* SQSHLU */ 9944 case 0x0e: /* SQSHL, UQSHL */ 9945 case 0x10: /* SQSHRUN */ 9946 case 0x11: /* SQRSHRUN */ 9947 case 0x12: /* SQSHRN, UQSHRN */ 9948 case 0x13: /* SQRSHRN, UQRSHRN */ 9949 unallocated_encoding(s); 9950 break; 9951 } 9952 } 9953 handle_2misc_64(DisasContext * s,int opcode,bool u,TCGv_i64 tcg_rd,TCGv_i64 tcg_rn,TCGv_i32 tcg_rmode,TCGv_ptr tcg_fpstatus)9954 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9955 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9956 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9957 { 9958 /* Handle 64->64 opcodes which are shared between the scalar and 9959 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9960 * is valid in either group and also the double-precision fp ops. 9961 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9962 * requires them. 9963 */ 9964 TCGCond cond; 9965 9966 switch (opcode) { 9967 case 0x4: /* CLS, CLZ */ 9968 if (u) { 9969 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9970 } else { 9971 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9972 } 9973 break; 9974 case 0x5: /* NOT */ 9975 /* This opcode is shared with CNT and RBIT but we have earlier 9976 * enforced that size == 3 if and only if this is the NOT insn. 9977 */ 9978 tcg_gen_not_i64(tcg_rd, tcg_rn); 9979 break; 9980 case 0x7: /* SQABS, SQNEG */ 9981 if (u) { 9982 gen_helper_neon_qneg_s64(tcg_rd, tcg_env, tcg_rn); 9983 } else { 9984 gen_helper_neon_qabs_s64(tcg_rd, tcg_env, tcg_rn); 9985 } 9986 break; 9987 case 0xa: /* CMLT */ 9988 cond = TCG_COND_LT; 9989 do_cmop: 9990 /* 64 bit integer comparison against zero, result is test ? -1 : 0. */ 9991 tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_constant_i64(0)); 9992 break; 9993 case 0x8: /* CMGT, CMGE */ 9994 cond = u ? TCG_COND_GE : TCG_COND_GT; 9995 goto do_cmop; 9996 case 0x9: /* CMEQ, CMLE */ 9997 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9998 goto do_cmop; 9999 case 0xb: /* ABS, NEG */ 10000 if (u) { 10001 tcg_gen_neg_i64(tcg_rd, tcg_rn); 10002 } else { 10003 tcg_gen_abs_i64(tcg_rd, tcg_rn); 10004 } 10005 break; 10006 case 0x2f: /* FABS */ 10007 gen_vfp_absd(tcg_rd, tcg_rn); 10008 break; 10009 case 0x6f: /* FNEG */ 10010 gen_vfp_negd(tcg_rd, tcg_rn); 10011 break; 10012 case 0x7f: /* FSQRT */ 10013 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); 10014 break; 10015 case 0x1a: /* FCVTNS */ 10016 case 0x1b: /* FCVTMS */ 10017 case 0x1c: /* FCVTAS */ 10018 case 0x3a: /* FCVTPS */ 10019 case 0x3b: /* FCVTZS */ 10020 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10021 break; 10022 case 0x5a: /* FCVTNU */ 10023 case 0x5b: /* FCVTMU */ 10024 case 0x5c: /* FCVTAU */ 10025 case 0x7a: /* FCVTPU */ 10026 case 0x7b: /* FCVTZU */ 10027 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10028 break; 10029 case 0x18: /* FRINTN */ 10030 case 0x19: /* FRINTM */ 10031 case 0x38: /* FRINTP */ 10032 case 0x39: /* FRINTZ */ 10033 case 0x58: /* FRINTA */ 10034 case 0x79: /* FRINTI */ 10035 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 10036 break; 10037 case 0x59: /* FRINTX */ 10038 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 10039 break; 10040 case 0x1e: /* FRINT32Z */ 10041 case 0x5e: /* FRINT32X */ 10042 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 10043 break; 10044 case 0x1f: /* FRINT64Z */ 10045 case 0x5f: /* FRINT64X */ 10046 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 10047 break; 10048 default: 10049 g_assert_not_reached(); 10050 } 10051 } 10052 handle_2misc_fcmp_zero(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)10053 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 10054 bool is_scalar, bool is_u, bool is_q, 10055 int size, int rn, int rd) 10056 { 10057 bool is_double = (size == MO_64); 10058 TCGv_ptr fpst; 10059 10060 if (!fp_access_check(s)) { 10061 return; 10062 } 10063 10064 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 10065 10066 if (is_double) { 10067 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10068 TCGv_i64 tcg_zero = tcg_constant_i64(0); 10069 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10070 NeonGenTwoDoubleOpFn *genfn; 10071 bool swap = false; 10072 int pass; 10073 10074 switch (opcode) { 10075 case 0x2e: /* FCMLT (zero) */ 10076 swap = true; 10077 /* fallthrough */ 10078 case 0x2c: /* FCMGT (zero) */ 10079 genfn = gen_helper_neon_cgt_f64; 10080 break; 10081 case 0x2d: /* FCMEQ (zero) */ 10082 genfn = gen_helper_neon_ceq_f64; 10083 break; 10084 case 0x6d: /* FCMLE (zero) */ 10085 swap = true; 10086 /* fall through */ 10087 case 0x6c: /* FCMGE (zero) */ 10088 genfn = gen_helper_neon_cge_f64; 10089 break; 10090 default: 10091 g_assert_not_reached(); 10092 } 10093 10094 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10095 read_vec_element(s, tcg_op, rn, pass, MO_64); 10096 if (swap) { 10097 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10098 } else { 10099 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10100 } 10101 write_vec_element(s, tcg_res, rd, pass, MO_64); 10102 } 10103 10104 clear_vec_high(s, !is_scalar, rd); 10105 } else { 10106 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10107 TCGv_i32 tcg_zero = tcg_constant_i32(0); 10108 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10109 NeonGenTwoSingleOpFn *genfn; 10110 bool swap = false; 10111 int pass, maxpasses; 10112 10113 if (size == MO_16) { 10114 switch (opcode) { 10115 case 0x2e: /* FCMLT (zero) */ 10116 swap = true; 10117 /* fall through */ 10118 case 0x2c: /* FCMGT (zero) */ 10119 genfn = gen_helper_advsimd_cgt_f16; 10120 break; 10121 case 0x2d: /* FCMEQ (zero) */ 10122 genfn = gen_helper_advsimd_ceq_f16; 10123 break; 10124 case 0x6d: /* FCMLE (zero) */ 10125 swap = true; 10126 /* fall through */ 10127 case 0x6c: /* FCMGE (zero) */ 10128 genfn = gen_helper_advsimd_cge_f16; 10129 break; 10130 default: 10131 g_assert_not_reached(); 10132 } 10133 } else { 10134 switch (opcode) { 10135 case 0x2e: /* FCMLT (zero) */ 10136 swap = true; 10137 /* fall through */ 10138 case 0x2c: /* FCMGT (zero) */ 10139 genfn = gen_helper_neon_cgt_f32; 10140 break; 10141 case 0x2d: /* FCMEQ (zero) */ 10142 genfn = gen_helper_neon_ceq_f32; 10143 break; 10144 case 0x6d: /* FCMLE (zero) */ 10145 swap = true; 10146 /* fall through */ 10147 case 0x6c: /* FCMGE (zero) */ 10148 genfn = gen_helper_neon_cge_f32; 10149 break; 10150 default: 10151 g_assert_not_reached(); 10152 } 10153 } 10154 10155 if (is_scalar) { 10156 maxpasses = 1; 10157 } else { 10158 int vector_size = 8 << is_q; 10159 maxpasses = vector_size >> size; 10160 } 10161 10162 for (pass = 0; pass < maxpasses; pass++) { 10163 read_vec_element_i32(s, tcg_op, rn, pass, size); 10164 if (swap) { 10165 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10166 } else { 10167 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10168 } 10169 if (is_scalar) { 10170 write_fp_sreg(s, rd, tcg_res); 10171 } else { 10172 write_vec_element_i32(s, tcg_res, rd, pass, size); 10173 } 10174 } 10175 10176 if (!is_scalar) { 10177 clear_vec_high(s, is_q, rd); 10178 } 10179 } 10180 } 10181 handle_2misc_reciprocal(DisasContext * s,int opcode,bool is_scalar,bool is_u,bool is_q,int size,int rn,int rd)10182 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 10183 bool is_scalar, bool is_u, bool is_q, 10184 int size, int rn, int rd) 10185 { 10186 bool is_double = (size == 3); 10187 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10188 10189 if (is_double) { 10190 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10191 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10192 int pass; 10193 10194 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10195 read_vec_element(s, tcg_op, rn, pass, MO_64); 10196 switch (opcode) { 10197 case 0x3d: /* FRECPE */ 10198 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 10199 break; 10200 case 0x3f: /* FRECPX */ 10201 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 10202 break; 10203 case 0x7d: /* FRSQRTE */ 10204 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 10205 break; 10206 default: 10207 g_assert_not_reached(); 10208 } 10209 write_vec_element(s, tcg_res, rd, pass, MO_64); 10210 } 10211 clear_vec_high(s, !is_scalar, rd); 10212 } else { 10213 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10214 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10215 int pass, maxpasses; 10216 10217 if (is_scalar) { 10218 maxpasses = 1; 10219 } else { 10220 maxpasses = is_q ? 4 : 2; 10221 } 10222 10223 for (pass = 0; pass < maxpasses; pass++) { 10224 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 10225 10226 switch (opcode) { 10227 case 0x3c: /* URECPE */ 10228 gen_helper_recpe_u32(tcg_res, tcg_op); 10229 break; 10230 case 0x3d: /* FRECPE */ 10231 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 10232 break; 10233 case 0x3f: /* FRECPX */ 10234 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 10235 break; 10236 case 0x7d: /* FRSQRTE */ 10237 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 10238 break; 10239 default: 10240 g_assert_not_reached(); 10241 } 10242 10243 if (is_scalar) { 10244 write_fp_sreg(s, rd, tcg_res); 10245 } else { 10246 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 10247 } 10248 } 10249 if (!is_scalar) { 10250 clear_vec_high(s, is_q, rd); 10251 } 10252 } 10253 } 10254 handle_2misc_narrow(DisasContext * s,bool scalar,int opcode,bool u,bool is_q,int size,int rn,int rd)10255 static void handle_2misc_narrow(DisasContext *s, bool scalar, 10256 int opcode, bool u, bool is_q, 10257 int size, int rn, int rd) 10258 { 10259 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 10260 * in the source becomes a size element in the destination). 10261 */ 10262 int pass; 10263 TCGv_i64 tcg_res[2]; 10264 int destelt = is_q ? 2 : 0; 10265 int passes = scalar ? 1 : 2; 10266 10267 if (scalar) { 10268 tcg_res[1] = tcg_constant_i64(0); 10269 } 10270 10271 for (pass = 0; pass < passes; pass++) { 10272 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10273 NeonGenOne64OpFn *genfn = NULL; 10274 NeonGenOne64OpEnvFn *genenvfn = NULL; 10275 10276 if (scalar) { 10277 read_vec_element(s, tcg_op, rn, pass, size + 1); 10278 } else { 10279 read_vec_element(s, tcg_op, rn, pass, MO_64); 10280 } 10281 tcg_res[pass] = tcg_temp_new_i64(); 10282 10283 switch (opcode) { 10284 case 0x12: /* XTN, SQXTUN */ 10285 { 10286 static NeonGenOne64OpFn * const xtnfns[3] = { 10287 gen_helper_neon_narrow_u8, 10288 gen_helper_neon_narrow_u16, 10289 tcg_gen_ext32u_i64, 10290 }; 10291 static NeonGenOne64OpEnvFn * const sqxtunfns[3] = { 10292 gen_helper_neon_unarrow_sat8, 10293 gen_helper_neon_unarrow_sat16, 10294 gen_helper_neon_unarrow_sat32, 10295 }; 10296 if (u) { 10297 genenvfn = sqxtunfns[size]; 10298 } else { 10299 genfn = xtnfns[size]; 10300 } 10301 break; 10302 } 10303 case 0x14: /* SQXTN, UQXTN */ 10304 { 10305 static NeonGenOne64OpEnvFn * const fns[3][2] = { 10306 { gen_helper_neon_narrow_sat_s8, 10307 gen_helper_neon_narrow_sat_u8 }, 10308 { gen_helper_neon_narrow_sat_s16, 10309 gen_helper_neon_narrow_sat_u16 }, 10310 { gen_helper_neon_narrow_sat_s32, 10311 gen_helper_neon_narrow_sat_u32 }, 10312 }; 10313 genenvfn = fns[size][u]; 10314 break; 10315 } 10316 case 0x16: /* FCVTN, FCVTN2 */ 10317 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 10318 if (size == 2) { 10319 TCGv_i32 tmp = tcg_temp_new_i32(); 10320 gen_helper_vfp_fcvtsd(tmp, tcg_op, tcg_env); 10321 tcg_gen_extu_i32_i64(tcg_res[pass], tmp); 10322 } else { 10323 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10324 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10325 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10326 TCGv_i32 ahp = get_ahp_flag(); 10327 10328 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 10329 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10330 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10331 tcg_gen_deposit_i32(tcg_lo, tcg_lo, tcg_hi, 16, 16); 10332 tcg_gen_extu_i32_i64(tcg_res[pass], tcg_lo); 10333 } 10334 break; 10335 case 0x36: /* BFCVTN, BFCVTN2 */ 10336 { 10337 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10338 TCGv_i32 tmp = tcg_temp_new_i32(); 10339 gen_helper_bfcvt_pair(tmp, tcg_op, fpst); 10340 tcg_gen_extu_i32_i64(tcg_res[pass], tmp); 10341 } 10342 break; 10343 case 0x56: /* FCVTXN, FCVTXN2 */ 10344 { 10345 /* 10346 * 64 bit to 32 bit float conversion 10347 * with von Neumann rounding (round to odd) 10348 */ 10349 TCGv_i32 tmp = tcg_temp_new_i32(); 10350 assert(size == 2); 10351 gen_helper_fcvtx_f64_to_f32(tmp, tcg_op, tcg_env); 10352 tcg_gen_extu_i32_i64(tcg_res[pass], tmp); 10353 } 10354 break; 10355 default: 10356 g_assert_not_reached(); 10357 } 10358 10359 if (genfn) { 10360 genfn(tcg_res[pass], tcg_op); 10361 } else if (genenvfn) { 10362 genenvfn(tcg_res[pass], tcg_env, tcg_op); 10363 } 10364 } 10365 10366 for (pass = 0; pass < 2; pass++) { 10367 write_vec_element(s, tcg_res[pass], rd, destelt + pass, MO_32); 10368 } 10369 clear_vec_high(s, is_q, rd); 10370 } 10371 10372 /* AdvSIMD scalar two reg misc 10373 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10374 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10375 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10376 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10377 */ disas_simd_scalar_two_reg_misc(DisasContext * s,uint32_t insn)10378 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10379 { 10380 int rd = extract32(insn, 0, 5); 10381 int rn = extract32(insn, 5, 5); 10382 int opcode = extract32(insn, 12, 5); 10383 int size = extract32(insn, 22, 2); 10384 bool u = extract32(insn, 29, 1); 10385 bool is_fcvt = false; 10386 int rmode; 10387 TCGv_i32 tcg_rmode; 10388 TCGv_ptr tcg_fpstatus; 10389 10390 switch (opcode) { 10391 case 0x7: /* SQABS / SQNEG */ 10392 break; 10393 case 0xa: /* CMLT */ 10394 if (u) { 10395 unallocated_encoding(s); 10396 return; 10397 } 10398 /* fall through */ 10399 case 0x8: /* CMGT, CMGE */ 10400 case 0x9: /* CMEQ, CMLE */ 10401 case 0xb: /* ABS, NEG */ 10402 if (size != 3) { 10403 unallocated_encoding(s); 10404 return; 10405 } 10406 break; 10407 case 0x12: /* SQXTUN */ 10408 if (!u) { 10409 unallocated_encoding(s); 10410 return; 10411 } 10412 /* fall through */ 10413 case 0x14: /* SQXTN, UQXTN */ 10414 if (size == 3) { 10415 unallocated_encoding(s); 10416 return; 10417 } 10418 if (!fp_access_check(s)) { 10419 return; 10420 } 10421 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10422 return; 10423 case 0xc ... 0xf: 10424 case 0x16 ... 0x1d: 10425 case 0x1f: 10426 /* Floating point: U, size[1] and opcode indicate operation; 10427 * size[0] indicates single or double precision. 10428 */ 10429 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10430 size = extract32(size, 0, 1) ? 3 : 2; 10431 switch (opcode) { 10432 case 0x2c: /* FCMGT (zero) */ 10433 case 0x2d: /* FCMEQ (zero) */ 10434 case 0x2e: /* FCMLT (zero) */ 10435 case 0x6c: /* FCMGE (zero) */ 10436 case 0x6d: /* FCMLE (zero) */ 10437 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10438 return; 10439 case 0x1d: /* SCVTF */ 10440 case 0x5d: /* UCVTF */ 10441 { 10442 bool is_signed = (opcode == 0x1d); 10443 if (!fp_access_check(s)) { 10444 return; 10445 } 10446 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10447 return; 10448 } 10449 case 0x3d: /* FRECPE */ 10450 case 0x3f: /* FRECPX */ 10451 case 0x7d: /* FRSQRTE */ 10452 if (!fp_access_check(s)) { 10453 return; 10454 } 10455 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10456 return; 10457 case 0x1a: /* FCVTNS */ 10458 case 0x1b: /* FCVTMS */ 10459 case 0x3a: /* FCVTPS */ 10460 case 0x3b: /* FCVTZS */ 10461 case 0x5a: /* FCVTNU */ 10462 case 0x5b: /* FCVTMU */ 10463 case 0x7a: /* FCVTPU */ 10464 case 0x7b: /* FCVTZU */ 10465 is_fcvt = true; 10466 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10467 break; 10468 case 0x1c: /* FCVTAS */ 10469 case 0x5c: /* FCVTAU */ 10470 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10471 is_fcvt = true; 10472 rmode = FPROUNDING_TIEAWAY; 10473 break; 10474 case 0x56: /* FCVTXN, FCVTXN2 */ 10475 if (size == 2) { 10476 unallocated_encoding(s); 10477 return; 10478 } 10479 if (!fp_access_check(s)) { 10480 return; 10481 } 10482 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10483 return; 10484 default: 10485 unallocated_encoding(s); 10486 return; 10487 } 10488 break; 10489 default: 10490 case 0x3: /* USQADD / SUQADD */ 10491 unallocated_encoding(s); 10492 return; 10493 } 10494 10495 if (!fp_access_check(s)) { 10496 return; 10497 } 10498 10499 if (is_fcvt) { 10500 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10501 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 10502 } else { 10503 tcg_fpstatus = NULL; 10504 tcg_rmode = NULL; 10505 } 10506 10507 if (size == 3) { 10508 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10509 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10510 10511 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10512 write_fp_dreg(s, rd, tcg_rd); 10513 } else { 10514 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10515 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10516 10517 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10518 10519 switch (opcode) { 10520 case 0x7: /* SQABS, SQNEG */ 10521 { 10522 NeonGenOneOpEnvFn *genfn; 10523 static NeonGenOneOpEnvFn * const fns[3][2] = { 10524 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10525 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10526 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10527 }; 10528 genfn = fns[size][u]; 10529 genfn(tcg_rd, tcg_env, tcg_rn); 10530 break; 10531 } 10532 case 0x1a: /* FCVTNS */ 10533 case 0x1b: /* FCVTMS */ 10534 case 0x1c: /* FCVTAS */ 10535 case 0x3a: /* FCVTPS */ 10536 case 0x3b: /* FCVTZS */ 10537 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10538 tcg_fpstatus); 10539 break; 10540 case 0x5a: /* FCVTNU */ 10541 case 0x5b: /* FCVTMU */ 10542 case 0x5c: /* FCVTAU */ 10543 case 0x7a: /* FCVTPU */ 10544 case 0x7b: /* FCVTZU */ 10545 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10546 tcg_fpstatus); 10547 break; 10548 default: 10549 g_assert_not_reached(); 10550 } 10551 10552 write_fp_sreg(s, rd, tcg_rd); 10553 } 10554 10555 if (is_fcvt) { 10556 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 10557 } 10558 } 10559 10560 /* AdvSIMD shift by immediate 10561 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10562 * +---+---+---+-------------+------+------+--------+---+------+------+ 10563 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10564 * +---+---+---+-------------+------+------+--------+---+------+------+ 10565 */ disas_simd_shift_imm(DisasContext * s,uint32_t insn)10566 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10567 { 10568 int rd = extract32(insn, 0, 5); 10569 int rn = extract32(insn, 5, 5); 10570 int opcode = extract32(insn, 11, 5); 10571 int immb = extract32(insn, 16, 3); 10572 int immh = extract32(insn, 19, 4); 10573 bool is_u = extract32(insn, 29, 1); 10574 bool is_q = extract32(insn, 30, 1); 10575 10576 if (immh == 0) { 10577 unallocated_encoding(s); 10578 return; 10579 } 10580 10581 switch (opcode) { 10582 case 0x1c: /* SCVTF / UCVTF */ 10583 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10584 opcode, rn, rd); 10585 break; 10586 case 0x1f: /* FCVTZS/ FCVTZU */ 10587 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10588 return; 10589 default: 10590 case 0x00: /* SSHR / USHR */ 10591 case 0x02: /* SSRA / USRA (accumulate) */ 10592 case 0x04: /* SRSHR / URSHR (rounding) */ 10593 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10594 case 0x08: /* SRI */ 10595 case 0x0a: /* SHL / SLI */ 10596 case 0x0c: /* SQSHLU */ 10597 case 0x0e: /* SQSHL, UQSHL */ 10598 case 0x10: /* SHRN / SQSHRUN */ 10599 case 0x11: /* RSHRN / SQRSHRUN */ 10600 case 0x12: /* SQSHRN / UQSHRN */ 10601 case 0x13: /* SQRSHRN / UQRSHRN */ 10602 case 0x14: /* SSHLL / USHLL */ 10603 unallocated_encoding(s); 10604 return; 10605 } 10606 } 10607 handle_2misc_widening(DisasContext * s,int opcode,bool is_q,int size,int rn,int rd)10608 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 10609 int size, int rn, int rd) 10610 { 10611 /* Handle 2-reg-misc ops which are widening (so each size element 10612 * in the source becomes a 2*size element in the destination. 10613 * The only instruction like this is FCVTL. 10614 */ 10615 int pass; 10616 10617 if (size == 3) { 10618 /* 32 -> 64 bit fp conversion */ 10619 TCGv_i64 tcg_res[2]; 10620 int srcelt = is_q ? 2 : 0; 10621 10622 for (pass = 0; pass < 2; pass++) { 10623 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10624 tcg_res[pass] = tcg_temp_new_i64(); 10625 10626 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 10627 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, tcg_env); 10628 } 10629 for (pass = 0; pass < 2; pass++) { 10630 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10631 } 10632 } else { 10633 /* 16 -> 32 bit fp conversion */ 10634 int srcelt = is_q ? 4 : 0; 10635 TCGv_i32 tcg_res[4]; 10636 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10637 TCGv_i32 ahp = get_ahp_flag(); 10638 10639 for (pass = 0; pass < 4; pass++) { 10640 tcg_res[pass] = tcg_temp_new_i32(); 10641 10642 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 10643 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 10644 fpst, ahp); 10645 } 10646 for (pass = 0; pass < 4; pass++) { 10647 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 10648 } 10649 } 10650 } 10651 handle_rev(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)10652 static void handle_rev(DisasContext *s, int opcode, bool u, 10653 bool is_q, int size, int rn, int rd) 10654 { 10655 int op = (opcode << 1) | u; 10656 int opsz = op + size; 10657 int grp_size = 3 - opsz; 10658 int dsize = is_q ? 128 : 64; 10659 int i; 10660 10661 if (opsz >= 3) { 10662 unallocated_encoding(s); 10663 return; 10664 } 10665 10666 if (!fp_access_check(s)) { 10667 return; 10668 } 10669 10670 if (size == 0) { 10671 /* Special case bytes, use bswap op on each group of elements */ 10672 int groups = dsize / (8 << grp_size); 10673 10674 for (i = 0; i < groups; i++) { 10675 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 10676 10677 read_vec_element(s, tcg_tmp, rn, i, grp_size); 10678 switch (grp_size) { 10679 case MO_16: 10680 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 10681 break; 10682 case MO_32: 10683 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 10684 break; 10685 case MO_64: 10686 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 10687 break; 10688 default: 10689 g_assert_not_reached(); 10690 } 10691 write_vec_element(s, tcg_tmp, rd, i, grp_size); 10692 } 10693 clear_vec_high(s, is_q, rd); 10694 } else { 10695 int revmask = (1 << grp_size) - 1; 10696 int esize = 8 << size; 10697 int elements = dsize / esize; 10698 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10699 TCGv_i64 tcg_rd[2]; 10700 10701 for (i = 0; i < 2; i++) { 10702 tcg_rd[i] = tcg_temp_new_i64(); 10703 tcg_gen_movi_i64(tcg_rd[i], 0); 10704 } 10705 10706 for (i = 0; i < elements; i++) { 10707 int e_rev = (i & 0xf) ^ revmask; 10708 int w = (e_rev * esize) / 64; 10709 int o = (e_rev * esize) % 64; 10710 10711 read_vec_element(s, tcg_rn, rn, i, size); 10712 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 10713 } 10714 10715 for (i = 0; i < 2; i++) { 10716 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 10717 } 10718 clear_vec_high(s, true, rd); 10719 } 10720 } 10721 handle_2misc_pairwise(DisasContext * s,int opcode,bool u,bool is_q,int size,int rn,int rd)10722 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 10723 bool is_q, int size, int rn, int rd) 10724 { 10725 /* Implement the pairwise operations from 2-misc: 10726 * SADDLP, UADDLP, SADALP, UADALP. 10727 * These all add pairs of elements in the input to produce a 10728 * double-width result element in the output (possibly accumulating). 10729 */ 10730 bool accum = (opcode == 0x6); 10731 int maxpass = is_q ? 2 : 1; 10732 int pass; 10733 TCGv_i64 tcg_res[2]; 10734 10735 if (size == 2) { 10736 /* 32 + 32 -> 64 op */ 10737 MemOp memop = size + (u ? 0 : MO_SIGN); 10738 10739 for (pass = 0; pass < maxpass; pass++) { 10740 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10741 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10742 10743 tcg_res[pass] = tcg_temp_new_i64(); 10744 10745 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 10746 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 10747 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 10748 if (accum) { 10749 read_vec_element(s, tcg_op1, rd, pass, MO_64); 10750 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 10751 } 10752 } 10753 } else { 10754 for (pass = 0; pass < maxpass; pass++) { 10755 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10756 NeonGenOne64OpFn *genfn; 10757 static NeonGenOne64OpFn * const fns[2][2] = { 10758 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 10759 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 10760 }; 10761 10762 genfn = fns[size][u]; 10763 10764 tcg_res[pass] = tcg_temp_new_i64(); 10765 10766 read_vec_element(s, tcg_op, rn, pass, MO_64); 10767 genfn(tcg_res[pass], tcg_op); 10768 10769 if (accum) { 10770 read_vec_element(s, tcg_op, rd, pass, MO_64); 10771 if (size == 0) { 10772 gen_helper_neon_addl_u16(tcg_res[pass], 10773 tcg_res[pass], tcg_op); 10774 } else { 10775 gen_helper_neon_addl_u32(tcg_res[pass], 10776 tcg_res[pass], tcg_op); 10777 } 10778 } 10779 } 10780 } 10781 if (!is_q) { 10782 tcg_res[1] = tcg_constant_i64(0); 10783 } 10784 for (pass = 0; pass < 2; pass++) { 10785 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10786 } 10787 } 10788 handle_shll(DisasContext * s,bool is_q,int size,int rn,int rd)10789 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 10790 { 10791 /* Implement SHLL and SHLL2 */ 10792 int pass; 10793 int part = is_q ? 2 : 0; 10794 TCGv_i64 tcg_res[2]; 10795 10796 for (pass = 0; pass < 2; pass++) { 10797 static NeonGenWidenFn * const widenfns[3] = { 10798 gen_helper_neon_widen_u8, 10799 gen_helper_neon_widen_u16, 10800 tcg_gen_extu_i32_i64, 10801 }; 10802 NeonGenWidenFn *widenfn = widenfns[size]; 10803 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10804 10805 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 10806 tcg_res[pass] = tcg_temp_new_i64(); 10807 widenfn(tcg_res[pass], tcg_op); 10808 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 10809 } 10810 10811 for (pass = 0; pass < 2; pass++) { 10812 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10813 } 10814 } 10815 10816 /* AdvSIMD two reg misc 10817 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10818 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 10819 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10820 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 10821 */ disas_simd_two_reg_misc(DisasContext * s,uint32_t insn)10822 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 10823 { 10824 int size = extract32(insn, 22, 2); 10825 int opcode = extract32(insn, 12, 5); 10826 bool u = extract32(insn, 29, 1); 10827 bool is_q = extract32(insn, 30, 1); 10828 int rn = extract32(insn, 5, 5); 10829 int rd = extract32(insn, 0, 5); 10830 bool need_fpstatus = false; 10831 int rmode = -1; 10832 TCGv_i32 tcg_rmode; 10833 TCGv_ptr tcg_fpstatus; 10834 10835 switch (opcode) { 10836 case 0x0: /* REV64, REV32 */ 10837 case 0x1: /* REV16 */ 10838 handle_rev(s, opcode, u, is_q, size, rn, rd); 10839 return; 10840 case 0x5: /* CNT, NOT, RBIT */ 10841 if (u && size == 0) { 10842 /* NOT */ 10843 break; 10844 } else if (u && size == 1) { 10845 /* RBIT */ 10846 break; 10847 } else if (!u && size == 0) { 10848 /* CNT */ 10849 break; 10850 } 10851 unallocated_encoding(s); 10852 return; 10853 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 10854 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 10855 if (size == 3) { 10856 unallocated_encoding(s); 10857 return; 10858 } 10859 if (!fp_access_check(s)) { 10860 return; 10861 } 10862 10863 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 10864 return; 10865 case 0x4: /* CLS, CLZ */ 10866 if (size == 3) { 10867 unallocated_encoding(s); 10868 return; 10869 } 10870 break; 10871 case 0x2: /* SADDLP, UADDLP */ 10872 case 0x6: /* SADALP, UADALP */ 10873 if (size == 3) { 10874 unallocated_encoding(s); 10875 return; 10876 } 10877 if (!fp_access_check(s)) { 10878 return; 10879 } 10880 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 10881 return; 10882 case 0x13: /* SHLL, SHLL2 */ 10883 if (u == 0 || size == 3) { 10884 unallocated_encoding(s); 10885 return; 10886 } 10887 if (!fp_access_check(s)) { 10888 return; 10889 } 10890 handle_shll(s, is_q, size, rn, rd); 10891 return; 10892 case 0xa: /* CMLT */ 10893 if (u == 1) { 10894 unallocated_encoding(s); 10895 return; 10896 } 10897 /* fall through */ 10898 case 0x8: /* CMGT, CMGE */ 10899 case 0x9: /* CMEQ, CMLE */ 10900 case 0xb: /* ABS, NEG */ 10901 if (size == 3 && !is_q) { 10902 unallocated_encoding(s); 10903 return; 10904 } 10905 break; 10906 case 0x7: /* SQABS, SQNEG */ 10907 if (size == 3 && !is_q) { 10908 unallocated_encoding(s); 10909 return; 10910 } 10911 break; 10912 case 0xc ... 0xf: 10913 case 0x16 ... 0x1f: 10914 { 10915 /* Floating point: U, size[1] and opcode indicate operation; 10916 * size[0] indicates single or double precision. 10917 */ 10918 int is_double = extract32(size, 0, 1); 10919 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10920 size = is_double ? 3 : 2; 10921 switch (opcode) { 10922 case 0x2f: /* FABS */ 10923 case 0x6f: /* FNEG */ 10924 if (size == 3 && !is_q) { 10925 unallocated_encoding(s); 10926 return; 10927 } 10928 break; 10929 case 0x1d: /* SCVTF */ 10930 case 0x5d: /* UCVTF */ 10931 { 10932 bool is_signed = (opcode == 0x1d) ? true : false; 10933 int elements = is_double ? 2 : is_q ? 4 : 2; 10934 if (is_double && !is_q) { 10935 unallocated_encoding(s); 10936 return; 10937 } 10938 if (!fp_access_check(s)) { 10939 return; 10940 } 10941 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 10942 return; 10943 } 10944 case 0x2c: /* FCMGT (zero) */ 10945 case 0x2d: /* FCMEQ (zero) */ 10946 case 0x2e: /* FCMLT (zero) */ 10947 case 0x6c: /* FCMGE (zero) */ 10948 case 0x6d: /* FCMLE (zero) */ 10949 if (size == 3 && !is_q) { 10950 unallocated_encoding(s); 10951 return; 10952 } 10953 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 10954 return; 10955 case 0x7f: /* FSQRT */ 10956 if (size == 3 && !is_q) { 10957 unallocated_encoding(s); 10958 return; 10959 } 10960 break; 10961 case 0x1a: /* FCVTNS */ 10962 case 0x1b: /* FCVTMS */ 10963 case 0x3a: /* FCVTPS */ 10964 case 0x3b: /* FCVTZS */ 10965 case 0x5a: /* FCVTNU */ 10966 case 0x5b: /* FCVTMU */ 10967 case 0x7a: /* FCVTPU */ 10968 case 0x7b: /* FCVTZU */ 10969 need_fpstatus = true; 10970 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10971 if (size == 3 && !is_q) { 10972 unallocated_encoding(s); 10973 return; 10974 } 10975 break; 10976 case 0x5c: /* FCVTAU */ 10977 case 0x1c: /* FCVTAS */ 10978 need_fpstatus = true; 10979 rmode = FPROUNDING_TIEAWAY; 10980 if (size == 3 && !is_q) { 10981 unallocated_encoding(s); 10982 return; 10983 } 10984 break; 10985 case 0x3c: /* URECPE */ 10986 if (size == 3) { 10987 unallocated_encoding(s); 10988 return; 10989 } 10990 /* fall through */ 10991 case 0x3d: /* FRECPE */ 10992 case 0x7d: /* FRSQRTE */ 10993 if (size == 3 && !is_q) { 10994 unallocated_encoding(s); 10995 return; 10996 } 10997 if (!fp_access_check(s)) { 10998 return; 10999 } 11000 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 11001 return; 11002 case 0x56: /* FCVTXN, FCVTXN2 */ 11003 if (size == 2) { 11004 unallocated_encoding(s); 11005 return; 11006 } 11007 /* fall through */ 11008 case 0x16: /* FCVTN, FCVTN2 */ 11009 /* handle_2misc_narrow does a 2*size -> size operation, but these 11010 * instructions encode the source size rather than dest size. 11011 */ 11012 if (!fp_access_check(s)) { 11013 return; 11014 } 11015 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11016 return; 11017 case 0x36: /* BFCVTN, BFCVTN2 */ 11018 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 11019 unallocated_encoding(s); 11020 return; 11021 } 11022 if (!fp_access_check(s)) { 11023 return; 11024 } 11025 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 11026 return; 11027 case 0x17: /* FCVTL, FCVTL2 */ 11028 if (!fp_access_check(s)) { 11029 return; 11030 } 11031 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 11032 return; 11033 case 0x18: /* FRINTN */ 11034 case 0x19: /* FRINTM */ 11035 case 0x38: /* FRINTP */ 11036 case 0x39: /* FRINTZ */ 11037 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 11038 /* fall through */ 11039 case 0x59: /* FRINTX */ 11040 case 0x79: /* FRINTI */ 11041 need_fpstatus = true; 11042 if (size == 3 && !is_q) { 11043 unallocated_encoding(s); 11044 return; 11045 } 11046 break; 11047 case 0x58: /* FRINTA */ 11048 rmode = FPROUNDING_TIEAWAY; 11049 need_fpstatus = true; 11050 if (size == 3 && !is_q) { 11051 unallocated_encoding(s); 11052 return; 11053 } 11054 break; 11055 case 0x7c: /* URSQRTE */ 11056 if (size == 3) { 11057 unallocated_encoding(s); 11058 return; 11059 } 11060 break; 11061 case 0x1e: /* FRINT32Z */ 11062 case 0x1f: /* FRINT64Z */ 11063 rmode = FPROUNDING_ZERO; 11064 /* fall through */ 11065 case 0x5e: /* FRINT32X */ 11066 case 0x5f: /* FRINT64X */ 11067 need_fpstatus = true; 11068 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 11069 unallocated_encoding(s); 11070 return; 11071 } 11072 break; 11073 default: 11074 unallocated_encoding(s); 11075 return; 11076 } 11077 break; 11078 } 11079 default: 11080 case 0x3: /* SUQADD, USQADD */ 11081 unallocated_encoding(s); 11082 return; 11083 } 11084 11085 if (!fp_access_check(s)) { 11086 return; 11087 } 11088 11089 if (need_fpstatus || rmode >= 0) { 11090 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 11091 } else { 11092 tcg_fpstatus = NULL; 11093 } 11094 if (rmode >= 0) { 11095 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 11096 } else { 11097 tcg_rmode = NULL; 11098 } 11099 11100 switch (opcode) { 11101 case 0x5: 11102 if (u && size == 0) { /* NOT */ 11103 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 11104 return; 11105 } 11106 break; 11107 case 0x8: /* CMGT, CMGE */ 11108 if (u) { 11109 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 11110 } else { 11111 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 11112 } 11113 return; 11114 case 0x9: /* CMEQ, CMLE */ 11115 if (u) { 11116 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 11117 } else { 11118 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 11119 } 11120 return; 11121 case 0xa: /* CMLT */ 11122 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 11123 return; 11124 case 0xb: 11125 if (u) { /* ABS, NEG */ 11126 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 11127 } else { 11128 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 11129 } 11130 return; 11131 } 11132 11133 if (size == 3) { 11134 /* All 64-bit element operations can be shared with scalar 2misc */ 11135 int pass; 11136 11137 /* Coverity claims (size == 3 && !is_q) has been eliminated 11138 * from all paths leading to here. 11139 */ 11140 tcg_debug_assert(is_q); 11141 for (pass = 0; pass < 2; pass++) { 11142 TCGv_i64 tcg_op = tcg_temp_new_i64(); 11143 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11144 11145 read_vec_element(s, tcg_op, rn, pass, MO_64); 11146 11147 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 11148 tcg_rmode, tcg_fpstatus); 11149 11150 write_vec_element(s, tcg_res, rd, pass, MO_64); 11151 } 11152 } else { 11153 int pass; 11154 11155 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11156 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11157 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11158 11159 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 11160 11161 if (size == 2) { 11162 /* Special cases for 32 bit elements */ 11163 switch (opcode) { 11164 case 0x4: /* CLS */ 11165 if (u) { 11166 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 11167 } else { 11168 tcg_gen_clrsb_i32(tcg_res, tcg_op); 11169 } 11170 break; 11171 case 0x7: /* SQABS, SQNEG */ 11172 if (u) { 11173 gen_helper_neon_qneg_s32(tcg_res, tcg_env, tcg_op); 11174 } else { 11175 gen_helper_neon_qabs_s32(tcg_res, tcg_env, tcg_op); 11176 } 11177 break; 11178 case 0x2f: /* FABS */ 11179 gen_vfp_abss(tcg_res, tcg_op); 11180 break; 11181 case 0x6f: /* FNEG */ 11182 gen_vfp_negs(tcg_res, tcg_op); 11183 break; 11184 case 0x7f: /* FSQRT */ 11185 gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); 11186 break; 11187 case 0x1a: /* FCVTNS */ 11188 case 0x1b: /* FCVTMS */ 11189 case 0x1c: /* FCVTAS */ 11190 case 0x3a: /* FCVTPS */ 11191 case 0x3b: /* FCVTZS */ 11192 gen_helper_vfp_tosls(tcg_res, tcg_op, 11193 tcg_constant_i32(0), tcg_fpstatus); 11194 break; 11195 case 0x5a: /* FCVTNU */ 11196 case 0x5b: /* FCVTMU */ 11197 case 0x5c: /* FCVTAU */ 11198 case 0x7a: /* FCVTPU */ 11199 case 0x7b: /* FCVTZU */ 11200 gen_helper_vfp_touls(tcg_res, tcg_op, 11201 tcg_constant_i32(0), tcg_fpstatus); 11202 break; 11203 case 0x18: /* FRINTN */ 11204 case 0x19: /* FRINTM */ 11205 case 0x38: /* FRINTP */ 11206 case 0x39: /* FRINTZ */ 11207 case 0x58: /* FRINTA */ 11208 case 0x79: /* FRINTI */ 11209 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 11210 break; 11211 case 0x59: /* FRINTX */ 11212 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 11213 break; 11214 case 0x7c: /* URSQRTE */ 11215 gen_helper_rsqrte_u32(tcg_res, tcg_op); 11216 break; 11217 case 0x1e: /* FRINT32Z */ 11218 case 0x5e: /* FRINT32X */ 11219 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 11220 break; 11221 case 0x1f: /* FRINT64Z */ 11222 case 0x5f: /* FRINT64X */ 11223 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 11224 break; 11225 default: 11226 g_assert_not_reached(); 11227 } 11228 } else { 11229 /* Use helpers for 8 and 16 bit elements */ 11230 switch (opcode) { 11231 case 0x5: /* CNT, RBIT */ 11232 /* For these two insns size is part of the opcode specifier 11233 * (handled earlier); they always operate on byte elements. 11234 */ 11235 if (u) { 11236 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 11237 } else { 11238 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 11239 } 11240 break; 11241 case 0x7: /* SQABS, SQNEG */ 11242 { 11243 NeonGenOneOpEnvFn *genfn; 11244 static NeonGenOneOpEnvFn * const fns[2][2] = { 11245 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 11246 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 11247 }; 11248 genfn = fns[size][u]; 11249 genfn(tcg_res, tcg_env, tcg_op); 11250 break; 11251 } 11252 case 0x4: /* CLS, CLZ */ 11253 if (u) { 11254 if (size == 0) { 11255 gen_helper_neon_clz_u8(tcg_res, tcg_op); 11256 } else { 11257 gen_helper_neon_clz_u16(tcg_res, tcg_op); 11258 } 11259 } else { 11260 if (size == 0) { 11261 gen_helper_neon_cls_s8(tcg_res, tcg_op); 11262 } else { 11263 gen_helper_neon_cls_s16(tcg_res, tcg_op); 11264 } 11265 } 11266 break; 11267 default: 11268 g_assert_not_reached(); 11269 } 11270 } 11271 11272 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11273 } 11274 } 11275 clear_vec_high(s, is_q, rd); 11276 11277 if (tcg_rmode) { 11278 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 11279 } 11280 } 11281 11282 /* AdvSIMD [scalar] two register miscellaneous (FP16) 11283 * 11284 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 11285 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 11286 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 11287 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 11288 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 11289 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 11290 * 11291 * This actually covers two groups where scalar access is governed by 11292 * bit 28. A bunch of the instructions (float to integral) only exist 11293 * in the vector form and are un-allocated for the scalar decode. Also 11294 * in the scalar decode Q is always 1. 11295 */ disas_simd_two_reg_misc_fp16(DisasContext * s,uint32_t insn)11296 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 11297 { 11298 int fpop, opcode, a, u; 11299 int rn, rd; 11300 bool is_q; 11301 bool is_scalar; 11302 bool only_in_vector = false; 11303 11304 int pass; 11305 TCGv_i32 tcg_rmode = NULL; 11306 TCGv_ptr tcg_fpstatus = NULL; 11307 bool need_fpst = true; 11308 int rmode = -1; 11309 11310 if (!dc_isar_feature(aa64_fp16, s)) { 11311 unallocated_encoding(s); 11312 return; 11313 } 11314 11315 rd = extract32(insn, 0, 5); 11316 rn = extract32(insn, 5, 5); 11317 11318 a = extract32(insn, 23, 1); 11319 u = extract32(insn, 29, 1); 11320 is_scalar = extract32(insn, 28, 1); 11321 is_q = extract32(insn, 30, 1); 11322 11323 opcode = extract32(insn, 12, 5); 11324 fpop = deposit32(opcode, 5, 1, a); 11325 fpop = deposit32(fpop, 6, 1, u); 11326 11327 switch (fpop) { 11328 case 0x1d: /* SCVTF */ 11329 case 0x5d: /* UCVTF */ 11330 { 11331 int elements; 11332 11333 if (is_scalar) { 11334 elements = 1; 11335 } else { 11336 elements = (is_q ? 8 : 4); 11337 } 11338 11339 if (!fp_access_check(s)) { 11340 return; 11341 } 11342 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 11343 return; 11344 } 11345 break; 11346 case 0x2c: /* FCMGT (zero) */ 11347 case 0x2d: /* FCMEQ (zero) */ 11348 case 0x2e: /* FCMLT (zero) */ 11349 case 0x6c: /* FCMGE (zero) */ 11350 case 0x6d: /* FCMLE (zero) */ 11351 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 11352 return; 11353 case 0x3d: /* FRECPE */ 11354 case 0x3f: /* FRECPX */ 11355 break; 11356 case 0x18: /* FRINTN */ 11357 only_in_vector = true; 11358 rmode = FPROUNDING_TIEEVEN; 11359 break; 11360 case 0x19: /* FRINTM */ 11361 only_in_vector = true; 11362 rmode = FPROUNDING_NEGINF; 11363 break; 11364 case 0x38: /* FRINTP */ 11365 only_in_vector = true; 11366 rmode = FPROUNDING_POSINF; 11367 break; 11368 case 0x39: /* FRINTZ */ 11369 only_in_vector = true; 11370 rmode = FPROUNDING_ZERO; 11371 break; 11372 case 0x58: /* FRINTA */ 11373 only_in_vector = true; 11374 rmode = FPROUNDING_TIEAWAY; 11375 break; 11376 case 0x59: /* FRINTX */ 11377 case 0x79: /* FRINTI */ 11378 only_in_vector = true; 11379 /* current rounding mode */ 11380 break; 11381 case 0x1a: /* FCVTNS */ 11382 rmode = FPROUNDING_TIEEVEN; 11383 break; 11384 case 0x1b: /* FCVTMS */ 11385 rmode = FPROUNDING_NEGINF; 11386 break; 11387 case 0x1c: /* FCVTAS */ 11388 rmode = FPROUNDING_TIEAWAY; 11389 break; 11390 case 0x3a: /* FCVTPS */ 11391 rmode = FPROUNDING_POSINF; 11392 break; 11393 case 0x3b: /* FCVTZS */ 11394 rmode = FPROUNDING_ZERO; 11395 break; 11396 case 0x5a: /* FCVTNU */ 11397 rmode = FPROUNDING_TIEEVEN; 11398 break; 11399 case 0x5b: /* FCVTMU */ 11400 rmode = FPROUNDING_NEGINF; 11401 break; 11402 case 0x5c: /* FCVTAU */ 11403 rmode = FPROUNDING_TIEAWAY; 11404 break; 11405 case 0x7a: /* FCVTPU */ 11406 rmode = FPROUNDING_POSINF; 11407 break; 11408 case 0x7b: /* FCVTZU */ 11409 rmode = FPROUNDING_ZERO; 11410 break; 11411 case 0x2f: /* FABS */ 11412 case 0x6f: /* FNEG */ 11413 need_fpst = false; 11414 break; 11415 case 0x7d: /* FRSQRTE */ 11416 case 0x7f: /* FSQRT (vector) */ 11417 break; 11418 default: 11419 unallocated_encoding(s); 11420 return; 11421 } 11422 11423 11424 /* Check additional constraints for the scalar encoding */ 11425 if (is_scalar) { 11426 if (!is_q) { 11427 unallocated_encoding(s); 11428 return; 11429 } 11430 /* FRINTxx is only in the vector form */ 11431 if (only_in_vector) { 11432 unallocated_encoding(s); 11433 return; 11434 } 11435 } 11436 11437 if (!fp_access_check(s)) { 11438 return; 11439 } 11440 11441 if (rmode >= 0 || need_fpst) { 11442 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 11443 } 11444 11445 if (rmode >= 0) { 11446 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 11447 } 11448 11449 if (is_scalar) { 11450 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 11451 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11452 11453 switch (fpop) { 11454 case 0x1a: /* FCVTNS */ 11455 case 0x1b: /* FCVTMS */ 11456 case 0x1c: /* FCVTAS */ 11457 case 0x3a: /* FCVTPS */ 11458 case 0x3b: /* FCVTZS */ 11459 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 11460 break; 11461 case 0x3d: /* FRECPE */ 11462 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 11463 break; 11464 case 0x3f: /* FRECPX */ 11465 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 11466 break; 11467 case 0x5a: /* FCVTNU */ 11468 case 0x5b: /* FCVTMU */ 11469 case 0x5c: /* FCVTAU */ 11470 case 0x7a: /* FCVTPU */ 11471 case 0x7b: /* FCVTZU */ 11472 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 11473 break; 11474 case 0x6f: /* FNEG */ 11475 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 11476 break; 11477 case 0x7d: /* FRSQRTE */ 11478 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 11479 break; 11480 default: 11481 g_assert_not_reached(); 11482 } 11483 11484 /* limit any sign extension going on */ 11485 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 11486 write_fp_sreg(s, rd, tcg_res); 11487 } else { 11488 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 11489 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11490 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11491 11492 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 11493 11494 switch (fpop) { 11495 case 0x1a: /* FCVTNS */ 11496 case 0x1b: /* FCVTMS */ 11497 case 0x1c: /* FCVTAS */ 11498 case 0x3a: /* FCVTPS */ 11499 case 0x3b: /* FCVTZS */ 11500 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 11501 break; 11502 case 0x3d: /* FRECPE */ 11503 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 11504 break; 11505 case 0x5a: /* FCVTNU */ 11506 case 0x5b: /* FCVTMU */ 11507 case 0x5c: /* FCVTAU */ 11508 case 0x7a: /* FCVTPU */ 11509 case 0x7b: /* FCVTZU */ 11510 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 11511 break; 11512 case 0x18: /* FRINTN */ 11513 case 0x19: /* FRINTM */ 11514 case 0x38: /* FRINTP */ 11515 case 0x39: /* FRINTZ */ 11516 case 0x58: /* FRINTA */ 11517 case 0x79: /* FRINTI */ 11518 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 11519 break; 11520 case 0x59: /* FRINTX */ 11521 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 11522 break; 11523 case 0x2f: /* FABS */ 11524 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 11525 break; 11526 case 0x6f: /* FNEG */ 11527 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 11528 break; 11529 case 0x7d: /* FRSQRTE */ 11530 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 11531 break; 11532 case 0x7f: /* FSQRT */ 11533 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 11534 break; 11535 default: 11536 g_assert_not_reached(); 11537 } 11538 11539 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11540 } 11541 11542 clear_vec_high(s, is_q, rd); 11543 } 11544 11545 if (tcg_rmode) { 11546 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 11547 } 11548 } 11549 11550 /* C3.6 Data processing - SIMD, inc Crypto 11551 * 11552 * As the decode gets a little complex we are using a table based 11553 * approach for this part of the decode. 11554 */ 11555 static const AArch64DecodeTable data_proc_simd[] = { 11556 /* pattern , mask , fn */ 11557 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 11558 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 11559 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 11560 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 11561 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 11562 { 0x00000000, 0x00000000, NULL } 11563 }; 11564 disas_data_proc_simd(DisasContext * s,uint32_t insn)11565 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 11566 { 11567 /* Note that this is called with all non-FP cases from 11568 * table C3-6 so it must UNDEF for entries not specifically 11569 * allocated to instructions in that table. 11570 */ 11571 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 11572 if (fn) { 11573 fn(s, insn); 11574 } else { 11575 unallocated_encoding(s); 11576 } 11577 } 11578 11579 /* C3.6 Data processing - SIMD and floating point */ disas_data_proc_simd_fp(DisasContext * s,uint32_t insn)11580 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 11581 { 11582 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 11583 disas_data_proc_fp(s, insn); 11584 } else { 11585 /* SIMD, including crypto */ 11586 disas_data_proc_simd(s, insn); 11587 } 11588 } 11589 trans_OK(DisasContext * s,arg_OK * a)11590 static bool trans_OK(DisasContext *s, arg_OK *a) 11591 { 11592 return true; 11593 } 11594 trans_FAIL(DisasContext * s,arg_OK * a)11595 static bool trans_FAIL(DisasContext *s, arg_OK *a) 11596 { 11597 s->is_nonstreaming = true; 11598 return true; 11599 } 11600 11601 /** 11602 * btype_destination_ok: 11603 * @insn: The instruction at the branch destination 11604 * @bt: SCTLR_ELx.BT 11605 * @btype: PSTATE.BTYPE, and is non-zero 11606 * 11607 * On a guarded page, there are a limited number of insns 11608 * that may be present at the branch target: 11609 * - branch target identifiers, 11610 * - paciasp, pacibsp, 11611 * - BRK insn 11612 * - HLT insn 11613 * Anything else causes a Branch Target Exception. 11614 * 11615 * Return true if the branch is compatible, false to raise BTITRAP. 11616 */ btype_destination_ok(uint32_t insn,bool bt,int btype)11617 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 11618 { 11619 if ((insn & 0xfffff01fu) == 0xd503201fu) { 11620 /* HINT space */ 11621 switch (extract32(insn, 5, 7)) { 11622 case 0b011001: /* PACIASP */ 11623 case 0b011011: /* PACIBSP */ 11624 /* 11625 * If SCTLR_ELx.BT, then PACI*SP are not compatible 11626 * with btype == 3. Otherwise all btype are ok. 11627 */ 11628 return !bt || btype != 3; 11629 case 0b100000: /* BTI */ 11630 /* Not compatible with any btype. */ 11631 return false; 11632 case 0b100010: /* BTI c */ 11633 /* Not compatible with btype == 3 */ 11634 return btype != 3; 11635 case 0b100100: /* BTI j */ 11636 /* Not compatible with btype == 2 */ 11637 return btype != 2; 11638 case 0b100110: /* BTI jc */ 11639 /* Compatible with any btype. */ 11640 return true; 11641 } 11642 } else { 11643 switch (insn & 0xffe0001fu) { 11644 case 0xd4200000u: /* BRK */ 11645 case 0xd4400000u: /* HLT */ 11646 /* Give priority to the breakpoint exception. */ 11647 return true; 11648 } 11649 } 11650 return false; 11651 } 11652 11653 /* C3.1 A64 instruction index by encoding */ disas_a64_legacy(DisasContext * s,uint32_t insn)11654 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 11655 { 11656 switch (extract32(insn, 25, 4)) { 11657 case 0x5: 11658 case 0xd: /* Data processing - register */ 11659 disas_data_proc_reg(s, insn); 11660 break; 11661 case 0x7: 11662 case 0xf: /* Data processing - SIMD and floating point */ 11663 disas_data_proc_simd_fp(s, insn); 11664 break; 11665 default: 11666 unallocated_encoding(s); 11667 break; 11668 } 11669 } 11670 aarch64_tr_init_disas_context(DisasContextBase * dcbase,CPUState * cpu)11671 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 11672 CPUState *cpu) 11673 { 11674 DisasContext *dc = container_of(dcbase, DisasContext, base); 11675 CPUARMState *env = cpu_env(cpu); 11676 ARMCPU *arm_cpu = env_archcpu(env); 11677 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 11678 int bound, core_mmu_idx; 11679 11680 dc->isar = &arm_cpu->isar; 11681 dc->condjmp = 0; 11682 dc->pc_save = dc->base.pc_first; 11683 dc->aarch64 = true; 11684 dc->thumb = false; 11685 dc->sctlr_b = 0; 11686 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 11687 dc->condexec_mask = 0; 11688 dc->condexec_cond = 0; 11689 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 11690 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 11691 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 11692 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 11693 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 11694 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 11695 #if !defined(CONFIG_USER_ONLY) 11696 dc->user = (dc->current_el == 0); 11697 #endif 11698 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 11699 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 11700 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 11701 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 11702 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 11703 dc->trap_eret = EX_TBFLAG_A64(tb_flags, TRAP_ERET); 11704 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 11705 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 11706 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 11707 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 11708 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 11709 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 11710 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 11711 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 11712 dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); 11713 dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); 11714 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 11715 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 11716 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 11717 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 11718 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 11719 dc->naa = EX_TBFLAG_A64(tb_flags, NAA); 11720 dc->nv = EX_TBFLAG_A64(tb_flags, NV); 11721 dc->nv1 = EX_TBFLAG_A64(tb_flags, NV1); 11722 dc->nv2 = EX_TBFLAG_A64(tb_flags, NV2); 11723 dc->nv2_mem_e20 = EX_TBFLAG_A64(tb_flags, NV2_MEM_E20); 11724 dc->nv2_mem_be = EX_TBFLAG_A64(tb_flags, NV2_MEM_BE); 11725 dc->vec_len = 0; 11726 dc->vec_stride = 0; 11727 dc->cp_regs = arm_cpu->cp_regs; 11728 dc->features = env->features; 11729 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 11730 dc->gm_blocksize = arm_cpu->gm_blocksize; 11731 11732 #ifdef CONFIG_USER_ONLY 11733 /* In sve_probe_page, we assume TBI is enabled. */ 11734 tcg_debug_assert(dc->tbid & 1); 11735 #endif 11736 11737 dc->lse2 = dc_isar_feature(aa64_lse2, dc); 11738 11739 /* Single step state. The code-generation logic here is: 11740 * SS_ACTIVE == 0: 11741 * generate code with no special handling for single-stepping (except 11742 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 11743 * this happens anyway because those changes are all system register or 11744 * PSTATE writes). 11745 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 11746 * emit code for one insn 11747 * emit code to clear PSTATE.SS 11748 * emit code to generate software step exception for completed step 11749 * end TB (as usual for having generated an exception) 11750 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 11751 * emit code to generate a software step exception 11752 * end the TB 11753 */ 11754 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 11755 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 11756 dc->is_ldex = false; 11757 11758 /* Bound the number of insns to execute to those left on the page. */ 11759 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 11760 11761 /* If architectural single step active, limit to 1. */ 11762 if (dc->ss_active) { 11763 bound = 1; 11764 } 11765 dc->base.max_insns = MIN(dc->base.max_insns, bound); 11766 } 11767 aarch64_tr_tb_start(DisasContextBase * db,CPUState * cpu)11768 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 11769 { 11770 } 11771 aarch64_tr_insn_start(DisasContextBase * dcbase,CPUState * cpu)11772 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 11773 { 11774 DisasContext *dc = container_of(dcbase, DisasContext, base); 11775 target_ulong pc_arg = dc->base.pc_next; 11776 11777 if (tb_cflags(dcbase->tb) & CF_PCREL) { 11778 pc_arg &= ~TARGET_PAGE_MASK; 11779 } 11780 tcg_gen_insn_start(pc_arg, 0, 0); 11781 dc->insn_start_updated = false; 11782 } 11783 aarch64_tr_translate_insn(DisasContextBase * dcbase,CPUState * cpu)11784 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 11785 { 11786 DisasContext *s = container_of(dcbase, DisasContext, base); 11787 CPUARMState *env = cpu_env(cpu); 11788 uint64_t pc = s->base.pc_next; 11789 uint32_t insn; 11790 11791 /* Singlestep exceptions have the highest priority. */ 11792 if (s->ss_active && !s->pstate_ss) { 11793 /* Singlestep state is Active-pending. 11794 * If we're in this state at the start of a TB then either 11795 * a) we just took an exception to an EL which is being debugged 11796 * and this is the first insn in the exception handler 11797 * b) debug exceptions were masked and we just unmasked them 11798 * without changing EL (eg by clearing PSTATE.D) 11799 * In either case we're going to take a swstep exception in the 11800 * "did not step an insn" case, and so the syndrome ISV and EX 11801 * bits should be zero. 11802 */ 11803 assert(s->base.num_insns == 1); 11804 gen_swstep_exception(s, 0, 0); 11805 s->base.is_jmp = DISAS_NORETURN; 11806 s->base.pc_next = pc + 4; 11807 return; 11808 } 11809 11810 if (pc & 3) { 11811 /* 11812 * PC alignment fault. This has priority over the instruction abort 11813 * that we would receive from a translation fault via arm_ldl_code. 11814 * This should only be possible after an indirect branch, at the 11815 * start of the TB. 11816 */ 11817 assert(s->base.num_insns == 1); 11818 gen_helper_exception_pc_alignment(tcg_env, tcg_constant_tl(pc)); 11819 s->base.is_jmp = DISAS_NORETURN; 11820 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 11821 return; 11822 } 11823 11824 s->pc_curr = pc; 11825 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 11826 s->insn = insn; 11827 s->base.pc_next = pc + 4; 11828 11829 s->fp_access_checked = 0; 11830 s->sve_access_checked = 0; 11831 11832 if (s->pstate_il) { 11833 /* 11834 * Illegal execution state. This has priority over BTI 11835 * exceptions, but comes after instruction abort exceptions. 11836 */ 11837 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 11838 return; 11839 } 11840 11841 if (dc_isar_feature(aa64_bti, s)) { 11842 if (s->base.num_insns == 1) { 11843 /* First insn can have btype set to non-zero. */ 11844 tcg_debug_assert(s->btype >= 0); 11845 11846 /* 11847 * Note that the Branch Target Exception has fairly high 11848 * priority -- below debugging exceptions but above most 11849 * everything else. This allows us to handle this now 11850 * instead of waiting until the insn is otherwise decoded. 11851 * 11852 * We can check all but the guarded page check here; 11853 * defer the latter to a helper. 11854 */ 11855 if (s->btype != 0 11856 && !btype_destination_ok(insn, s->bt, s->btype)) { 11857 gen_helper_guarded_page_check(tcg_env); 11858 } 11859 } else { 11860 /* Not the first insn: btype must be 0. */ 11861 tcg_debug_assert(s->btype == 0); 11862 } 11863 } 11864 11865 s->is_nonstreaming = false; 11866 if (s->sme_trap_nonstreaming) { 11867 disas_sme_fa64(s, insn); 11868 } 11869 11870 if (!disas_a64(s, insn) && 11871 !disas_sme(s, insn) && 11872 !disas_sve(s, insn)) { 11873 disas_a64_legacy(s, insn); 11874 } 11875 11876 /* 11877 * After execution of most insns, btype is reset to 0. 11878 * Note that we set btype == -1 when the insn sets btype. 11879 */ 11880 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 11881 reset_btype(s); 11882 } 11883 } 11884 aarch64_tr_tb_stop(DisasContextBase * dcbase,CPUState * cpu)11885 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 11886 { 11887 DisasContext *dc = container_of(dcbase, DisasContext, base); 11888 11889 if (unlikely(dc->ss_active)) { 11890 /* Note that this means single stepping WFI doesn't halt the CPU. 11891 * For conditional branch insns this is harmless unreachable code as 11892 * gen_goto_tb() has already handled emitting the debug exception 11893 * (and thus a tb-jump is not possible when singlestepping). 11894 */ 11895 switch (dc->base.is_jmp) { 11896 default: 11897 gen_a64_update_pc(dc, 4); 11898 /* fall through */ 11899 case DISAS_EXIT: 11900 case DISAS_JUMP: 11901 gen_step_complete_exception(dc); 11902 break; 11903 case DISAS_NORETURN: 11904 break; 11905 } 11906 } else { 11907 switch (dc->base.is_jmp) { 11908 case DISAS_NEXT: 11909 case DISAS_TOO_MANY: 11910 gen_goto_tb(dc, 1, 4); 11911 break; 11912 default: 11913 case DISAS_UPDATE_EXIT: 11914 gen_a64_update_pc(dc, 4); 11915 /* fall through */ 11916 case DISAS_EXIT: 11917 tcg_gen_exit_tb(NULL, 0); 11918 break; 11919 case DISAS_UPDATE_NOCHAIN: 11920 gen_a64_update_pc(dc, 4); 11921 /* fall through */ 11922 case DISAS_JUMP: 11923 tcg_gen_lookup_and_goto_ptr(); 11924 break; 11925 case DISAS_NORETURN: 11926 case DISAS_SWI: 11927 break; 11928 case DISAS_WFE: 11929 gen_a64_update_pc(dc, 4); 11930 gen_helper_wfe(tcg_env); 11931 break; 11932 case DISAS_YIELD: 11933 gen_a64_update_pc(dc, 4); 11934 gen_helper_yield(tcg_env); 11935 break; 11936 case DISAS_WFI: 11937 /* 11938 * This is a special case because we don't want to just halt 11939 * the CPU if trying to debug across a WFI. 11940 */ 11941 gen_a64_update_pc(dc, 4); 11942 gen_helper_wfi(tcg_env, tcg_constant_i32(4)); 11943 /* 11944 * The helper doesn't necessarily throw an exception, but we 11945 * must go back to the main loop to check for interrupts anyway. 11946 */ 11947 tcg_gen_exit_tb(NULL, 0); 11948 break; 11949 } 11950 } 11951 } 11952 11953 const TranslatorOps aarch64_translator_ops = { 11954 .init_disas_context = aarch64_tr_init_disas_context, 11955 .tb_start = aarch64_tr_tb_start, 11956 .insn_start = aarch64_tr_insn_start, 11957 .translate_insn = aarch64_tr_translate_insn, 11958 .tb_stop = aarch64_tr_tb_stop, 11959 }; 11960