1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "qemu/log.h" 26 #include "arm_ldst.h" 27 #include "translate.h" 28 #include "internals.h" 29 #include "qemu/host-utils.h" 30 #include "semihosting/semihost.h" 31 #include "exec/gen-icount.h" 32 #include "exec/helper-proto.h" 33 #include "exec/helper-gen.h" 34 #include "exec/log.h" 35 #include "cpregs.h" 36 #include "translate-a64.h" 37 #include "qemu/atomic128.h" 38 39 static TCGv_i64 cpu_X[32]; 40 static TCGv_i64 cpu_pc; 41 42 /* Load/store exclusive handling */ 43 static TCGv_i64 cpu_exclusive_high; 44 45 static const char *regnames[] = { 46 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 47 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 48 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 49 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 50 }; 51 52 enum a64_shift_type { 53 A64_SHIFT_TYPE_LSL = 0, 54 A64_SHIFT_TYPE_LSR = 1, 55 A64_SHIFT_TYPE_ASR = 2, 56 A64_SHIFT_TYPE_ROR = 3 57 }; 58 59 /* 60 * Include the generated decoders. 61 */ 62 63 #include "decode-sme-fa64.c.inc" 64 #include "decode-a64.c.inc" 65 66 /* Table based decoder typedefs - used when the relevant bits for decode 67 * are too awkwardly scattered across the instruction (eg SIMD). 68 */ 69 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 70 71 typedef struct AArch64DecodeTable { 72 uint32_t pattern; 73 uint32_t mask; 74 AArch64DecodeFn *disas_fn; 75 } AArch64DecodeTable; 76 77 /* initialize TCG globals. */ 78 void a64_translate_init(void) 79 { 80 int i; 81 82 cpu_pc = tcg_global_mem_new_i64(cpu_env, 83 offsetof(CPUARMState, pc), 84 "pc"); 85 for (i = 0; i < 32; i++) { 86 cpu_X[i] = tcg_global_mem_new_i64(cpu_env, 87 offsetof(CPUARMState, xregs[i]), 88 regnames[i]); 89 } 90 91 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env, 92 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 93 } 94 95 /* 96 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns 97 */ 98 static int get_a64_user_mem_index(DisasContext *s) 99 { 100 /* 101 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 102 * which is the usual mmu_idx for this cpu state. 103 */ 104 ARMMMUIdx useridx = s->mmu_idx; 105 106 if (s->unpriv) { 107 /* 108 * We have pre-computed the condition for AccType_UNPRIV. 109 * Therefore we should never get here with a mmu_idx for 110 * which we do not know the corresponding user mmu_idx. 111 */ 112 switch (useridx) { 113 case ARMMMUIdx_E10_1: 114 case ARMMMUIdx_E10_1_PAN: 115 useridx = ARMMMUIdx_E10_0; 116 break; 117 case ARMMMUIdx_E20_2: 118 case ARMMMUIdx_E20_2_PAN: 119 useridx = ARMMMUIdx_E20_0; 120 break; 121 default: 122 g_assert_not_reached(); 123 } 124 } 125 return arm_to_core_mmu_idx(useridx); 126 } 127 128 static void set_btype_raw(int val) 129 { 130 tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, 131 offsetof(CPUARMState, btype)); 132 } 133 134 static void set_btype(DisasContext *s, int val) 135 { 136 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 137 tcg_debug_assert(val >= 1 && val <= 3); 138 set_btype_raw(val); 139 s->btype = -1; 140 } 141 142 static void reset_btype(DisasContext *s) 143 { 144 if (s->btype != 0) { 145 set_btype_raw(0); 146 s->btype = 0; 147 } 148 } 149 150 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 151 { 152 assert(s->pc_save != -1); 153 if (tb_cflags(s->base.tb) & CF_PCREL) { 154 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 155 } else { 156 tcg_gen_movi_i64(dest, s->pc_curr + diff); 157 } 158 } 159 160 void gen_a64_update_pc(DisasContext *s, target_long diff) 161 { 162 gen_pc_plus_diff(s, cpu_pc, diff); 163 s->pc_save = s->pc_curr + diff; 164 } 165 166 /* 167 * Handle Top Byte Ignore (TBI) bits. 168 * 169 * If address tagging is enabled via the TCR TBI bits: 170 * + for EL2 and EL3 there is only one TBI bit, and if it is set 171 * then the address is zero-extended, clearing bits [63:56] 172 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 173 * and TBI1 controls addressses with bit 55 == 1. 174 * If the appropriate TBI bit is set for the address then 175 * the address is sign-extended from bit 55 into bits [63:56] 176 * 177 * Here We have concatenated TBI{1,0} into tbi. 178 */ 179 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 180 TCGv_i64 src, int tbi) 181 { 182 if (tbi == 0) { 183 /* Load unmodified address */ 184 tcg_gen_mov_i64(dst, src); 185 } else if (!regime_has_2_ranges(s->mmu_idx)) { 186 /* Force tag byte to all zero */ 187 tcg_gen_extract_i64(dst, src, 0, 56); 188 } else { 189 /* Sign-extend from bit 55. */ 190 tcg_gen_sextract_i64(dst, src, 0, 56); 191 192 switch (tbi) { 193 case 1: 194 /* tbi0 but !tbi1: only use the extension if positive */ 195 tcg_gen_and_i64(dst, dst, src); 196 break; 197 case 2: 198 /* !tbi0 but tbi1: only use the extension if negative */ 199 tcg_gen_or_i64(dst, dst, src); 200 break; 201 case 3: 202 /* tbi0 and tbi1: always use the extension */ 203 break; 204 default: 205 g_assert_not_reached(); 206 } 207 } 208 } 209 210 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 211 { 212 /* 213 * If address tagging is enabled for instructions via the TCR TBI bits, 214 * then loading an address into the PC will clear out any tag. 215 */ 216 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 217 s->pc_save = -1; 218 } 219 220 /* 221 * Handle MTE and/or TBI. 222 * 223 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 224 * for the tag to be present in the FAR_ELx register. But for user-only 225 * mode we do not have a TLB with which to implement this, so we must 226 * remove the top byte now. 227 * 228 * Always return a fresh temporary that we can increment independently 229 * of the write-back address. 230 */ 231 232 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 233 { 234 TCGv_i64 clean = tcg_temp_new_i64(); 235 #ifdef CONFIG_USER_ONLY 236 gen_top_byte_ignore(s, clean, addr, s->tbid); 237 #else 238 tcg_gen_mov_i64(clean, addr); 239 #endif 240 return clean; 241 } 242 243 /* Insert a zero tag into src, with the result at dst. */ 244 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 245 { 246 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 247 } 248 249 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 250 MMUAccessType acc, int log2_size) 251 { 252 gen_helper_probe_access(cpu_env, ptr, 253 tcg_constant_i32(acc), 254 tcg_constant_i32(get_mem_index(s)), 255 tcg_constant_i32(1 << log2_size)); 256 } 257 258 /* 259 * For MTE, check a single logical or atomic access. This probes a single 260 * address, the exact one specified. The size and alignment of the access 261 * is not relevant to MTE, per se, but watchpoints do require the size, 262 * and we want to recognize those before making any other changes to state. 263 */ 264 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 265 bool is_write, bool tag_checked, 266 int log2_size, bool is_unpriv, 267 int core_idx) 268 { 269 if (tag_checked && s->mte_active[is_unpriv]) { 270 TCGv_i64 ret; 271 int desc = 0; 272 273 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 274 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 275 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 276 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 277 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); 278 279 ret = tcg_temp_new_i64(); 280 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 281 282 return ret; 283 } 284 return clean_data_tbi(s, addr); 285 } 286 287 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 288 bool tag_checked, int log2_size) 289 { 290 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, 291 false, get_mem_index(s)); 292 } 293 294 /* 295 * For MTE, check multiple logical sequential accesses. 296 */ 297 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 298 bool tag_checked, int size) 299 { 300 if (tag_checked && s->mte_active[0]) { 301 TCGv_i64 ret; 302 int desc = 0; 303 304 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 305 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 306 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 307 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 308 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); 309 310 ret = tcg_temp_new_i64(); 311 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 312 313 return ret; 314 } 315 return clean_data_tbi(s, addr); 316 } 317 318 typedef struct DisasCompare64 { 319 TCGCond cond; 320 TCGv_i64 value; 321 } DisasCompare64; 322 323 static void a64_test_cc(DisasCompare64 *c64, int cc) 324 { 325 DisasCompare c32; 326 327 arm_test_cc(&c32, cc); 328 329 /* 330 * Sign-extend the 32-bit value so that the GE/LT comparisons work 331 * properly. The NE/EQ comparisons are also fine with this choice. 332 */ 333 c64->cond = c32.cond; 334 c64->value = tcg_temp_new_i64(); 335 tcg_gen_ext_i32_i64(c64->value, c32.value); 336 } 337 338 static void gen_rebuild_hflags(DisasContext *s) 339 { 340 gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el)); 341 } 342 343 static void gen_exception_internal(int excp) 344 { 345 assert(excp_is_internal(excp)); 346 gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp)); 347 } 348 349 static void gen_exception_internal_insn(DisasContext *s, int excp) 350 { 351 gen_a64_update_pc(s, 0); 352 gen_exception_internal(excp); 353 s->base.is_jmp = DISAS_NORETURN; 354 } 355 356 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 357 { 358 gen_a64_update_pc(s, 0); 359 gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome)); 360 s->base.is_jmp = DISAS_NORETURN; 361 } 362 363 static void gen_step_complete_exception(DisasContext *s) 364 { 365 /* We just completed step of an insn. Move from Active-not-pending 366 * to Active-pending, and then also take the swstep exception. 367 * This corresponds to making the (IMPDEF) choice to prioritize 368 * swstep exceptions over asynchronous exceptions taken to an exception 369 * level where debug is disabled. This choice has the advantage that 370 * we do not need to maintain internal state corresponding to the 371 * ISV/EX syndrome bits between completion of the step and generation 372 * of the exception, and our syndrome information is always correct. 373 */ 374 gen_ss_advance(s); 375 gen_swstep_exception(s, 1, s->is_ldex); 376 s->base.is_jmp = DISAS_NORETURN; 377 } 378 379 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 380 { 381 if (s->ss_active) { 382 return false; 383 } 384 return translator_use_goto_tb(&s->base, dest); 385 } 386 387 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 388 { 389 if (use_goto_tb(s, s->pc_curr + diff)) { 390 /* 391 * For pcrel, the pc must always be up-to-date on entry to 392 * the linked TB, so that it can use simple additions for all 393 * further adjustments. For !pcrel, the linked TB is compiled 394 * to know its full virtual address, so we can delay the 395 * update to pc to the unlinked path. A long chain of links 396 * can thus avoid many updates to the PC. 397 */ 398 if (tb_cflags(s->base.tb) & CF_PCREL) { 399 gen_a64_update_pc(s, diff); 400 tcg_gen_goto_tb(n); 401 } else { 402 tcg_gen_goto_tb(n); 403 gen_a64_update_pc(s, diff); 404 } 405 tcg_gen_exit_tb(s->base.tb, n); 406 s->base.is_jmp = DISAS_NORETURN; 407 } else { 408 gen_a64_update_pc(s, diff); 409 if (s->ss_active) { 410 gen_step_complete_exception(s); 411 } else { 412 tcg_gen_lookup_and_goto_ptr(); 413 s->base.is_jmp = DISAS_NORETURN; 414 } 415 } 416 } 417 418 /* 419 * Register access functions 420 * 421 * These functions are used for directly accessing a register in where 422 * changes to the final register value are likely to be made. If you 423 * need to use a register for temporary calculation (e.g. index type 424 * operations) use the read_* form. 425 * 426 * B1.2.1 Register mappings 427 * 428 * In instruction register encoding 31 can refer to ZR (zero register) or 429 * the SP (stack pointer) depending on context. In QEMU's case we map SP 430 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 431 * This is the point of the _sp forms. 432 */ 433 TCGv_i64 cpu_reg(DisasContext *s, int reg) 434 { 435 if (reg == 31) { 436 TCGv_i64 t = tcg_temp_new_i64(); 437 tcg_gen_movi_i64(t, 0); 438 return t; 439 } else { 440 return cpu_X[reg]; 441 } 442 } 443 444 /* register access for when 31 == SP */ 445 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 446 { 447 return cpu_X[reg]; 448 } 449 450 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 451 * representing the register contents. This TCGv is an auto-freed 452 * temporary so it need not be explicitly freed, and may be modified. 453 */ 454 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 455 { 456 TCGv_i64 v = tcg_temp_new_i64(); 457 if (reg != 31) { 458 if (sf) { 459 tcg_gen_mov_i64(v, cpu_X[reg]); 460 } else { 461 tcg_gen_ext32u_i64(v, cpu_X[reg]); 462 } 463 } else { 464 tcg_gen_movi_i64(v, 0); 465 } 466 return v; 467 } 468 469 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 470 { 471 TCGv_i64 v = tcg_temp_new_i64(); 472 if (sf) { 473 tcg_gen_mov_i64(v, cpu_X[reg]); 474 } else { 475 tcg_gen_ext32u_i64(v, cpu_X[reg]); 476 } 477 return v; 478 } 479 480 /* Return the offset into CPUARMState of a slice (from 481 * the least significant end) of FP register Qn (ie 482 * Dn, Sn, Hn or Bn). 483 * (Note that this is not the same mapping as for A32; see cpu.h) 484 */ 485 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 486 { 487 return vec_reg_offset(s, regno, 0, size); 488 } 489 490 /* Offset of the high half of the 128 bit vector Qn */ 491 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 492 { 493 return vec_reg_offset(s, regno, 1, MO_64); 494 } 495 496 /* Convenience accessors for reading and writing single and double 497 * FP registers. Writing clears the upper parts of the associated 498 * 128 bit vector register, as required by the architecture. 499 * Note that unlike the GP register accessors, the values returned 500 * by the read functions must be manually freed. 501 */ 502 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 503 { 504 TCGv_i64 v = tcg_temp_new_i64(); 505 506 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); 507 return v; 508 } 509 510 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 511 { 512 TCGv_i32 v = tcg_temp_new_i32(); 513 514 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); 515 return v; 516 } 517 518 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 519 { 520 TCGv_i32 v = tcg_temp_new_i32(); 521 522 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); 523 return v; 524 } 525 526 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 527 * If SVE is not enabled, then there are only 128 bits in the vector. 528 */ 529 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 530 { 531 unsigned ofs = fp_reg_offset(s, rd, MO_64); 532 unsigned vsz = vec_full_reg_size(s); 533 534 /* Nop move, with side effect of clearing the tail. */ 535 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 536 } 537 538 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 539 { 540 unsigned ofs = fp_reg_offset(s, reg, MO_64); 541 542 tcg_gen_st_i64(v, cpu_env, ofs); 543 clear_vec_high(s, false, reg); 544 } 545 546 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 547 { 548 TCGv_i64 tmp = tcg_temp_new_i64(); 549 550 tcg_gen_extu_i32_i64(tmp, v); 551 write_fp_dreg(s, reg, tmp); 552 } 553 554 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 555 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 556 GVecGen2Fn *gvec_fn, int vece) 557 { 558 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 559 is_q ? 16 : 8, vec_full_reg_size(s)); 560 } 561 562 /* Expand a 2-operand + immediate AdvSIMD vector operation using 563 * an expander function. 564 */ 565 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 566 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 567 { 568 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 569 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 570 } 571 572 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 573 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 574 GVecGen3Fn *gvec_fn, int vece) 575 { 576 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 577 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 578 } 579 580 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 581 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 582 int rx, GVecGen4Fn *gvec_fn, int vece) 583 { 584 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 585 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 586 is_q ? 16 : 8, vec_full_reg_size(s)); 587 } 588 589 /* Expand a 2-operand operation using an out-of-line helper. */ 590 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 591 int rn, int data, gen_helper_gvec_2 *fn) 592 { 593 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 594 vec_full_reg_offset(s, rn), 595 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 596 } 597 598 /* Expand a 3-operand operation using an out-of-line helper. */ 599 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 600 int rn, int rm, int data, gen_helper_gvec_3 *fn) 601 { 602 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 603 vec_full_reg_offset(s, rn), 604 vec_full_reg_offset(s, rm), 605 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 606 } 607 608 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 609 * an out-of-line helper. 610 */ 611 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 612 int rm, bool is_fp16, int data, 613 gen_helper_gvec_3_ptr *fn) 614 { 615 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 616 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 617 vec_full_reg_offset(s, rn), 618 vec_full_reg_offset(s, rm), fpst, 619 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 620 } 621 622 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 623 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 624 int rm, gen_helper_gvec_3_ptr *fn) 625 { 626 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 627 628 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); 629 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 630 vec_full_reg_offset(s, rn), 631 vec_full_reg_offset(s, rm), qc_ptr, 632 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 633 } 634 635 /* Expand a 4-operand operation using an out-of-line helper. */ 636 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 637 int rm, int ra, int data, gen_helper_gvec_4 *fn) 638 { 639 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 640 vec_full_reg_offset(s, rn), 641 vec_full_reg_offset(s, rm), 642 vec_full_reg_offset(s, ra), 643 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 644 } 645 646 /* 647 * Expand a 4-operand + fpstatus pointer + simd data value operation using 648 * an out-of-line helper. 649 */ 650 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 651 int rm, int ra, bool is_fp16, int data, 652 gen_helper_gvec_4_ptr *fn) 653 { 654 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 655 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 656 vec_full_reg_offset(s, rn), 657 vec_full_reg_offset(s, rm), 658 vec_full_reg_offset(s, ra), fpst, 659 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 660 } 661 662 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 663 * than the 32 bit equivalent. 664 */ 665 static inline void gen_set_NZ64(TCGv_i64 result) 666 { 667 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 668 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 669 } 670 671 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 672 static inline void gen_logic_CC(int sf, TCGv_i64 result) 673 { 674 if (sf) { 675 gen_set_NZ64(result); 676 } else { 677 tcg_gen_extrl_i64_i32(cpu_ZF, result); 678 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 679 } 680 tcg_gen_movi_i32(cpu_CF, 0); 681 tcg_gen_movi_i32(cpu_VF, 0); 682 } 683 684 /* dest = T0 + T1; compute C, N, V and Z flags */ 685 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 686 { 687 if (sf) { 688 TCGv_i64 result, flag, tmp; 689 result = tcg_temp_new_i64(); 690 flag = tcg_temp_new_i64(); 691 tmp = tcg_temp_new_i64(); 692 693 tcg_gen_movi_i64(tmp, 0); 694 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 695 696 tcg_gen_extrl_i64_i32(cpu_CF, flag); 697 698 gen_set_NZ64(result); 699 700 tcg_gen_xor_i64(flag, result, t0); 701 tcg_gen_xor_i64(tmp, t0, t1); 702 tcg_gen_andc_i64(flag, flag, tmp); 703 tcg_gen_extrh_i64_i32(cpu_VF, flag); 704 705 tcg_gen_mov_i64(dest, result); 706 } else { 707 /* 32 bit arithmetic */ 708 TCGv_i32 t0_32 = tcg_temp_new_i32(); 709 TCGv_i32 t1_32 = tcg_temp_new_i32(); 710 TCGv_i32 tmp = tcg_temp_new_i32(); 711 712 tcg_gen_movi_i32(tmp, 0); 713 tcg_gen_extrl_i64_i32(t0_32, t0); 714 tcg_gen_extrl_i64_i32(t1_32, t1); 715 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 716 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 717 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 718 tcg_gen_xor_i32(tmp, t0_32, t1_32); 719 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 720 tcg_gen_extu_i32_i64(dest, cpu_NF); 721 } 722 } 723 724 /* dest = T0 - T1; compute C, N, V and Z flags */ 725 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 726 { 727 if (sf) { 728 /* 64 bit arithmetic */ 729 TCGv_i64 result, flag, tmp; 730 731 result = tcg_temp_new_i64(); 732 flag = tcg_temp_new_i64(); 733 tcg_gen_sub_i64(result, t0, t1); 734 735 gen_set_NZ64(result); 736 737 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 738 tcg_gen_extrl_i64_i32(cpu_CF, flag); 739 740 tcg_gen_xor_i64(flag, result, t0); 741 tmp = tcg_temp_new_i64(); 742 tcg_gen_xor_i64(tmp, t0, t1); 743 tcg_gen_and_i64(flag, flag, tmp); 744 tcg_gen_extrh_i64_i32(cpu_VF, flag); 745 tcg_gen_mov_i64(dest, result); 746 } else { 747 /* 32 bit arithmetic */ 748 TCGv_i32 t0_32 = tcg_temp_new_i32(); 749 TCGv_i32 t1_32 = tcg_temp_new_i32(); 750 TCGv_i32 tmp; 751 752 tcg_gen_extrl_i64_i32(t0_32, t0); 753 tcg_gen_extrl_i64_i32(t1_32, t1); 754 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 755 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 756 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 757 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 758 tmp = tcg_temp_new_i32(); 759 tcg_gen_xor_i32(tmp, t0_32, t1_32); 760 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 761 tcg_gen_extu_i32_i64(dest, cpu_NF); 762 } 763 } 764 765 /* dest = T0 + T1 + CF; do not compute flags. */ 766 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 767 { 768 TCGv_i64 flag = tcg_temp_new_i64(); 769 tcg_gen_extu_i32_i64(flag, cpu_CF); 770 tcg_gen_add_i64(dest, t0, t1); 771 tcg_gen_add_i64(dest, dest, flag); 772 773 if (!sf) { 774 tcg_gen_ext32u_i64(dest, dest); 775 } 776 } 777 778 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 779 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 780 { 781 if (sf) { 782 TCGv_i64 result = tcg_temp_new_i64(); 783 TCGv_i64 cf_64 = tcg_temp_new_i64(); 784 TCGv_i64 vf_64 = tcg_temp_new_i64(); 785 TCGv_i64 tmp = tcg_temp_new_i64(); 786 TCGv_i64 zero = tcg_constant_i64(0); 787 788 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 789 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 790 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 791 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 792 gen_set_NZ64(result); 793 794 tcg_gen_xor_i64(vf_64, result, t0); 795 tcg_gen_xor_i64(tmp, t0, t1); 796 tcg_gen_andc_i64(vf_64, vf_64, tmp); 797 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 798 799 tcg_gen_mov_i64(dest, result); 800 } else { 801 TCGv_i32 t0_32 = tcg_temp_new_i32(); 802 TCGv_i32 t1_32 = tcg_temp_new_i32(); 803 TCGv_i32 tmp = tcg_temp_new_i32(); 804 TCGv_i32 zero = tcg_constant_i32(0); 805 806 tcg_gen_extrl_i64_i32(t0_32, t0); 807 tcg_gen_extrl_i64_i32(t1_32, t1); 808 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 809 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 810 811 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 812 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 813 tcg_gen_xor_i32(tmp, t0_32, t1_32); 814 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 815 tcg_gen_extu_i32_i64(dest, cpu_NF); 816 } 817 } 818 819 /* 820 * Load/Store generators 821 */ 822 823 /* 824 * Store from GPR register to memory. 825 */ 826 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 827 TCGv_i64 tcg_addr, MemOp memop, int memidx, 828 bool iss_valid, 829 unsigned int iss_srt, 830 bool iss_sf, bool iss_ar) 831 { 832 memop = finalize_memop(s, memop); 833 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 834 835 if (iss_valid) { 836 uint32_t syn; 837 838 syn = syn_data_abort_with_iss(0, 839 (memop & MO_SIZE), 840 false, 841 iss_srt, 842 iss_sf, 843 iss_ar, 844 0, 0, 0, 0, 0, false); 845 disas_set_insn_syndrome(s, syn); 846 } 847 } 848 849 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 850 TCGv_i64 tcg_addr, MemOp memop, 851 bool iss_valid, 852 unsigned int iss_srt, 853 bool iss_sf, bool iss_ar) 854 { 855 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 856 iss_valid, iss_srt, iss_sf, iss_ar); 857 } 858 859 /* 860 * Load from memory to GPR register 861 */ 862 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 863 MemOp memop, bool extend, int memidx, 864 bool iss_valid, unsigned int iss_srt, 865 bool iss_sf, bool iss_ar) 866 { 867 memop = finalize_memop(s, memop); 868 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 869 870 if (extend && (memop & MO_SIGN)) { 871 g_assert((memop & MO_SIZE) <= MO_32); 872 tcg_gen_ext32u_i64(dest, dest); 873 } 874 875 if (iss_valid) { 876 uint32_t syn; 877 878 syn = syn_data_abort_with_iss(0, 879 (memop & MO_SIZE), 880 (memop & MO_SIGN) != 0, 881 iss_srt, 882 iss_sf, 883 iss_ar, 884 0, 0, 0, 0, 0, false); 885 disas_set_insn_syndrome(s, syn); 886 } 887 } 888 889 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 890 MemOp memop, bool extend, 891 bool iss_valid, unsigned int iss_srt, 892 bool iss_sf, bool iss_ar) 893 { 894 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 895 iss_valid, iss_srt, iss_sf, iss_ar); 896 } 897 898 /* 899 * Store from FP register to memory 900 */ 901 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) 902 { 903 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 904 TCGv_i64 tmplo = tcg_temp_new_i64(); 905 MemOp mop; 906 907 tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); 908 909 if (size < 4) { 910 mop = finalize_memop(s, size); 911 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 912 } else { 913 bool be = s->be_data == MO_BE; 914 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); 915 TCGv_i64 tmphi = tcg_temp_new_i64(); 916 917 tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); 918 919 mop = s->be_data | MO_UQ; 920 tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 921 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 922 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 923 tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, 924 get_mem_index(s), mop); 925 } 926 } 927 928 /* 929 * Load from memory to FP register 930 */ 931 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) 932 { 933 /* This always zero-extends and writes to a full 128 bit wide vector */ 934 TCGv_i64 tmplo = tcg_temp_new_i64(); 935 TCGv_i64 tmphi = NULL; 936 MemOp mop; 937 938 if (size < 4) { 939 mop = finalize_memop(s, size); 940 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 941 } else { 942 bool be = s->be_data == MO_BE; 943 TCGv_i64 tcg_hiaddr; 944 945 tmphi = tcg_temp_new_i64(); 946 tcg_hiaddr = tcg_temp_new_i64(); 947 948 mop = s->be_data | MO_UQ; 949 tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 950 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 951 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 952 tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, 953 get_mem_index(s), mop); 954 } 955 956 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); 957 958 if (tmphi) { 959 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); 960 } 961 clear_vec_high(s, tmphi != NULL, destidx); 962 } 963 964 /* 965 * Vector load/store helpers. 966 * 967 * The principal difference between this and a FP load is that we don't 968 * zero extend as we are filling a partial chunk of the vector register. 969 * These functions don't support 128 bit loads/stores, which would be 970 * normal load/store operations. 971 * 972 * The _i32 versions are useful when operating on 32 bit quantities 973 * (eg for floating point single or using Neon helper functions). 974 */ 975 976 /* Get value of an element within a vector register */ 977 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 978 int element, MemOp memop) 979 { 980 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 981 switch ((unsigned)memop) { 982 case MO_8: 983 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); 984 break; 985 case MO_16: 986 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); 987 break; 988 case MO_32: 989 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); 990 break; 991 case MO_8|MO_SIGN: 992 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); 993 break; 994 case MO_16|MO_SIGN: 995 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); 996 break; 997 case MO_32|MO_SIGN: 998 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); 999 break; 1000 case MO_64: 1001 case MO_64|MO_SIGN: 1002 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); 1003 break; 1004 default: 1005 g_assert_not_reached(); 1006 } 1007 } 1008 1009 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1010 int element, MemOp memop) 1011 { 1012 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1013 switch (memop) { 1014 case MO_8: 1015 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); 1016 break; 1017 case MO_16: 1018 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); 1019 break; 1020 case MO_8|MO_SIGN: 1021 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); 1022 break; 1023 case MO_16|MO_SIGN: 1024 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); 1025 break; 1026 case MO_32: 1027 case MO_32|MO_SIGN: 1028 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); 1029 break; 1030 default: 1031 g_assert_not_reached(); 1032 } 1033 } 1034 1035 /* Set value of an element within a vector register */ 1036 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1037 int element, MemOp memop) 1038 { 1039 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1040 switch (memop) { 1041 case MO_8: 1042 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); 1043 break; 1044 case MO_16: 1045 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); 1046 break; 1047 case MO_32: 1048 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); 1049 break; 1050 case MO_64: 1051 tcg_gen_st_i64(tcg_src, cpu_env, vect_off); 1052 break; 1053 default: 1054 g_assert_not_reached(); 1055 } 1056 } 1057 1058 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1059 int destidx, int element, MemOp memop) 1060 { 1061 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1062 switch (memop) { 1063 case MO_8: 1064 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); 1065 break; 1066 case MO_16: 1067 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); 1068 break; 1069 case MO_32: 1070 tcg_gen_st_i32(tcg_src, cpu_env, vect_off); 1071 break; 1072 default: 1073 g_assert_not_reached(); 1074 } 1075 } 1076 1077 /* Store from vector register to memory */ 1078 static void do_vec_st(DisasContext *s, int srcidx, int element, 1079 TCGv_i64 tcg_addr, MemOp mop) 1080 { 1081 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1082 1083 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1084 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1085 } 1086 1087 /* Load from memory to vector register */ 1088 static void do_vec_ld(DisasContext *s, int destidx, int element, 1089 TCGv_i64 tcg_addr, MemOp mop) 1090 { 1091 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1092 1093 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1094 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1095 } 1096 1097 /* Check that FP/Neon access is enabled. If it is, return 1098 * true. If not, emit code to generate an appropriate exception, 1099 * and return false; the caller should not emit any code for 1100 * the instruction. Note that this check must happen after all 1101 * unallocated-encoding checks (otherwise the syndrome information 1102 * for the resulting exception will be incorrect). 1103 */ 1104 static bool fp_access_check_only(DisasContext *s) 1105 { 1106 if (s->fp_excp_el) { 1107 assert(!s->fp_access_checked); 1108 s->fp_access_checked = true; 1109 1110 gen_exception_insn_el(s, 0, EXCP_UDEF, 1111 syn_fp_access_trap(1, 0xe, false, 0), 1112 s->fp_excp_el); 1113 return false; 1114 } 1115 s->fp_access_checked = true; 1116 return true; 1117 } 1118 1119 static bool fp_access_check(DisasContext *s) 1120 { 1121 if (!fp_access_check_only(s)) { 1122 return false; 1123 } 1124 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1125 gen_exception_insn(s, 0, EXCP_UDEF, 1126 syn_smetrap(SME_ET_Streaming, false)); 1127 return false; 1128 } 1129 return true; 1130 } 1131 1132 /* 1133 * Check that SVE access is enabled. If it is, return true. 1134 * If not, emit code to generate an appropriate exception and return false. 1135 * This function corresponds to CheckSVEEnabled(). 1136 */ 1137 bool sve_access_check(DisasContext *s) 1138 { 1139 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1140 assert(dc_isar_feature(aa64_sme, s)); 1141 if (!sme_sm_enabled_check(s)) { 1142 goto fail_exit; 1143 } 1144 } else if (s->sve_excp_el) { 1145 gen_exception_insn_el(s, 0, EXCP_UDEF, 1146 syn_sve_access_trap(), s->sve_excp_el); 1147 goto fail_exit; 1148 } 1149 s->sve_access_checked = true; 1150 return fp_access_check(s); 1151 1152 fail_exit: 1153 /* Assert that we only raise one exception per instruction. */ 1154 assert(!s->sve_access_checked); 1155 s->sve_access_checked = true; 1156 return false; 1157 } 1158 1159 /* 1160 * Check that SME access is enabled, raise an exception if not. 1161 * Note that this function corresponds to CheckSMEAccess and is 1162 * only used directly for cpregs. 1163 */ 1164 static bool sme_access_check(DisasContext *s) 1165 { 1166 if (s->sme_excp_el) { 1167 gen_exception_insn_el(s, 0, EXCP_UDEF, 1168 syn_smetrap(SME_ET_AccessTrap, false), 1169 s->sme_excp_el); 1170 return false; 1171 } 1172 return true; 1173 } 1174 1175 /* This function corresponds to CheckSMEEnabled. */ 1176 bool sme_enabled_check(DisasContext *s) 1177 { 1178 /* 1179 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1180 * to be zero when fp_excp_el has priority. This is because we need 1181 * sme_excp_el by itself for cpregs access checks. 1182 */ 1183 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1184 s->fp_access_checked = true; 1185 return sme_access_check(s); 1186 } 1187 return fp_access_check_only(s); 1188 } 1189 1190 /* Common subroutine for CheckSMEAnd*Enabled. */ 1191 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1192 { 1193 if (!sme_enabled_check(s)) { 1194 return false; 1195 } 1196 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1197 gen_exception_insn(s, 0, EXCP_UDEF, 1198 syn_smetrap(SME_ET_NotStreaming, false)); 1199 return false; 1200 } 1201 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1202 gen_exception_insn(s, 0, EXCP_UDEF, 1203 syn_smetrap(SME_ET_InactiveZA, false)); 1204 return false; 1205 } 1206 return true; 1207 } 1208 1209 /* 1210 * This utility function is for doing register extension with an 1211 * optional shift. You will likely want to pass a temporary for the 1212 * destination register. See DecodeRegExtend() in the ARM ARM. 1213 */ 1214 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1215 int option, unsigned int shift) 1216 { 1217 int extsize = extract32(option, 0, 2); 1218 bool is_signed = extract32(option, 2, 1); 1219 1220 if (is_signed) { 1221 switch (extsize) { 1222 case 0: 1223 tcg_gen_ext8s_i64(tcg_out, tcg_in); 1224 break; 1225 case 1: 1226 tcg_gen_ext16s_i64(tcg_out, tcg_in); 1227 break; 1228 case 2: 1229 tcg_gen_ext32s_i64(tcg_out, tcg_in); 1230 break; 1231 case 3: 1232 tcg_gen_mov_i64(tcg_out, tcg_in); 1233 break; 1234 } 1235 } else { 1236 switch (extsize) { 1237 case 0: 1238 tcg_gen_ext8u_i64(tcg_out, tcg_in); 1239 break; 1240 case 1: 1241 tcg_gen_ext16u_i64(tcg_out, tcg_in); 1242 break; 1243 case 2: 1244 tcg_gen_ext32u_i64(tcg_out, tcg_in); 1245 break; 1246 case 3: 1247 tcg_gen_mov_i64(tcg_out, tcg_in); 1248 break; 1249 } 1250 } 1251 1252 if (shift) { 1253 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1254 } 1255 } 1256 1257 static inline void gen_check_sp_alignment(DisasContext *s) 1258 { 1259 /* The AArch64 architecture mandates that (if enabled via PSTATE 1260 * or SCTLR bits) there is a check that SP is 16-aligned on every 1261 * SP-relative load or store (with an exception generated if it is not). 1262 * In line with general QEMU practice regarding misaligned accesses, 1263 * we omit these checks for the sake of guest program performance. 1264 * This function is provided as a hook so we can more easily add these 1265 * checks in future (possibly as a "favour catching guest program bugs 1266 * over speed" user selectable option). 1267 */ 1268 } 1269 1270 /* 1271 * This provides a simple table based table lookup decoder. It is 1272 * intended to be used when the relevant bits for decode are too 1273 * awkwardly placed and switch/if based logic would be confusing and 1274 * deeply nested. Since it's a linear search through the table, tables 1275 * should be kept small. 1276 * 1277 * It returns the first handler where insn & mask == pattern, or 1278 * NULL if there is no match. 1279 * The table is terminated by an empty mask (i.e. 0) 1280 */ 1281 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1282 uint32_t insn) 1283 { 1284 const AArch64DecodeTable *tptr = table; 1285 1286 while (tptr->mask) { 1287 if ((insn & tptr->mask) == tptr->pattern) { 1288 return tptr->disas_fn; 1289 } 1290 tptr++; 1291 } 1292 return NULL; 1293 } 1294 1295 /* 1296 * The instruction disassembly implemented here matches 1297 * the instruction encoding classifications in chapter C4 1298 * of the ARM Architecture Reference Manual (DDI0487B_a); 1299 * classification names and decode diagrams here should generally 1300 * match up with those in the manual. 1301 */ 1302 1303 /* Unconditional branch (immediate) 1304 * 31 30 26 25 0 1305 * +----+-----------+-------------------------------------+ 1306 * | op | 0 0 1 0 1 | imm26 | 1307 * +----+-----------+-------------------------------------+ 1308 */ 1309 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) 1310 { 1311 int64_t diff = sextract32(insn, 0, 26) * 4; 1312 1313 if (insn & (1U << 31)) { 1314 /* BL Branch with link */ 1315 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1316 } 1317 1318 /* B Branch / BL Branch with link */ 1319 reset_btype(s); 1320 gen_goto_tb(s, 0, diff); 1321 } 1322 1323 /* Compare and branch (immediate) 1324 * 31 30 25 24 23 5 4 0 1325 * +----+-------------+----+---------------------+--------+ 1326 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt | 1327 * +----+-------------+----+---------------------+--------+ 1328 */ 1329 static void disas_comp_b_imm(DisasContext *s, uint32_t insn) 1330 { 1331 unsigned int sf, op, rt; 1332 int64_t diff; 1333 DisasLabel match; 1334 TCGv_i64 tcg_cmp; 1335 1336 sf = extract32(insn, 31, 1); 1337 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */ 1338 rt = extract32(insn, 0, 5); 1339 diff = sextract32(insn, 5, 19) * 4; 1340 1341 tcg_cmp = read_cpu_reg(s, rt, sf); 1342 reset_btype(s); 1343 1344 match = gen_disas_label(s); 1345 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1346 tcg_cmp, 0, match.label); 1347 gen_goto_tb(s, 0, 4); 1348 set_disas_label(s, match); 1349 gen_goto_tb(s, 1, diff); 1350 } 1351 1352 /* Test and branch (immediate) 1353 * 31 30 25 24 23 19 18 5 4 0 1354 * +----+-------------+----+-------+-------------+------+ 1355 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt | 1356 * +----+-------------+----+-------+-------------+------+ 1357 */ 1358 static void disas_test_b_imm(DisasContext *s, uint32_t insn) 1359 { 1360 unsigned int bit_pos, op, rt; 1361 int64_t diff; 1362 DisasLabel match; 1363 TCGv_i64 tcg_cmp; 1364 1365 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5); 1366 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */ 1367 diff = sextract32(insn, 5, 14) * 4; 1368 rt = extract32(insn, 0, 5); 1369 1370 tcg_cmp = tcg_temp_new_i64(); 1371 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos)); 1372 1373 reset_btype(s); 1374 1375 match = gen_disas_label(s); 1376 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1377 tcg_cmp, 0, match.label); 1378 gen_goto_tb(s, 0, 4); 1379 set_disas_label(s, match); 1380 gen_goto_tb(s, 1, diff); 1381 } 1382 1383 /* Conditional branch (immediate) 1384 * 31 25 24 23 5 4 3 0 1385 * +---------------+----+---------------------+----+------+ 1386 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond | 1387 * +---------------+----+---------------------+----+------+ 1388 */ 1389 static void disas_cond_b_imm(DisasContext *s, uint32_t insn) 1390 { 1391 unsigned int cond; 1392 int64_t diff; 1393 1394 if ((insn & (1 << 4)) || (insn & (1 << 24))) { 1395 unallocated_encoding(s); 1396 return; 1397 } 1398 diff = sextract32(insn, 5, 19) * 4; 1399 cond = extract32(insn, 0, 4); 1400 1401 reset_btype(s); 1402 if (cond < 0x0e) { 1403 /* genuinely conditional branches */ 1404 DisasLabel match = gen_disas_label(s); 1405 arm_gen_test_cc(cond, match.label); 1406 gen_goto_tb(s, 0, 4); 1407 set_disas_label(s, match); 1408 gen_goto_tb(s, 1, diff); 1409 } else { 1410 /* 0xe and 0xf are both "always" conditions */ 1411 gen_goto_tb(s, 0, diff); 1412 } 1413 } 1414 1415 /* HINT instruction group, including various allocated HINTs */ 1416 static void handle_hint(DisasContext *s, uint32_t insn, 1417 unsigned int op1, unsigned int op2, unsigned int crm) 1418 { 1419 unsigned int selector = crm << 3 | op2; 1420 1421 if (op1 != 3) { 1422 unallocated_encoding(s); 1423 return; 1424 } 1425 1426 switch (selector) { 1427 case 0b00000: /* NOP */ 1428 break; 1429 case 0b00011: /* WFI */ 1430 s->base.is_jmp = DISAS_WFI; 1431 break; 1432 case 0b00001: /* YIELD */ 1433 /* When running in MTTCG we don't generate jumps to the yield and 1434 * WFE helpers as it won't affect the scheduling of other vCPUs. 1435 * If we wanted to more completely model WFE/SEV so we don't busy 1436 * spin unnecessarily we would need to do something more involved. 1437 */ 1438 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1439 s->base.is_jmp = DISAS_YIELD; 1440 } 1441 break; 1442 case 0b00010: /* WFE */ 1443 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1444 s->base.is_jmp = DISAS_WFE; 1445 } 1446 break; 1447 case 0b00100: /* SEV */ 1448 case 0b00101: /* SEVL */ 1449 case 0b00110: /* DGH */ 1450 /* we treat all as NOP at least for now */ 1451 break; 1452 case 0b00111: /* XPACLRI */ 1453 if (s->pauth_active) { 1454 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); 1455 } 1456 break; 1457 case 0b01000: /* PACIA1716 */ 1458 if (s->pauth_active) { 1459 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1460 } 1461 break; 1462 case 0b01010: /* PACIB1716 */ 1463 if (s->pauth_active) { 1464 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1465 } 1466 break; 1467 case 0b01100: /* AUTIA1716 */ 1468 if (s->pauth_active) { 1469 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1470 } 1471 break; 1472 case 0b01110: /* AUTIB1716 */ 1473 if (s->pauth_active) { 1474 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1475 } 1476 break; 1477 case 0b10000: /* ESB */ 1478 /* Without RAS, we must implement this as NOP. */ 1479 if (dc_isar_feature(aa64_ras, s)) { 1480 /* 1481 * QEMU does not have a source of physical SErrors, 1482 * so we are only concerned with virtual SErrors. 1483 * The pseudocode in the ARM for this case is 1484 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1485 * AArch64.vESBOperation(); 1486 * Most of the condition can be evaluated at translation time. 1487 * Test for EL2 present, and defer test for SEL2 to runtime. 1488 */ 1489 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1490 gen_helper_vesb(cpu_env); 1491 } 1492 } 1493 break; 1494 case 0b11000: /* PACIAZ */ 1495 if (s->pauth_active) { 1496 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], 1497 tcg_constant_i64(0)); 1498 } 1499 break; 1500 case 0b11001: /* PACIASP */ 1501 if (s->pauth_active) { 1502 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1503 } 1504 break; 1505 case 0b11010: /* PACIBZ */ 1506 if (s->pauth_active) { 1507 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], 1508 tcg_constant_i64(0)); 1509 } 1510 break; 1511 case 0b11011: /* PACIBSP */ 1512 if (s->pauth_active) { 1513 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1514 } 1515 break; 1516 case 0b11100: /* AUTIAZ */ 1517 if (s->pauth_active) { 1518 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], 1519 tcg_constant_i64(0)); 1520 } 1521 break; 1522 case 0b11101: /* AUTIASP */ 1523 if (s->pauth_active) { 1524 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1525 } 1526 break; 1527 case 0b11110: /* AUTIBZ */ 1528 if (s->pauth_active) { 1529 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], 1530 tcg_constant_i64(0)); 1531 } 1532 break; 1533 case 0b11111: /* AUTIBSP */ 1534 if (s->pauth_active) { 1535 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1536 } 1537 break; 1538 default: 1539 /* default specified as NOP equivalent */ 1540 break; 1541 } 1542 } 1543 1544 static void gen_clrex(DisasContext *s, uint32_t insn) 1545 { 1546 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1547 } 1548 1549 /* CLREX, DSB, DMB, ISB */ 1550 static void handle_sync(DisasContext *s, uint32_t insn, 1551 unsigned int op1, unsigned int op2, unsigned int crm) 1552 { 1553 TCGBar bar; 1554 1555 if (op1 != 3) { 1556 unallocated_encoding(s); 1557 return; 1558 } 1559 1560 switch (op2) { 1561 case 2: /* CLREX */ 1562 gen_clrex(s, insn); 1563 return; 1564 case 4: /* DSB */ 1565 case 5: /* DMB */ 1566 switch (crm & 3) { 1567 case 1: /* MBReqTypes_Reads */ 1568 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1569 break; 1570 case 2: /* MBReqTypes_Writes */ 1571 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1572 break; 1573 default: /* MBReqTypes_All */ 1574 bar = TCG_BAR_SC | TCG_MO_ALL; 1575 break; 1576 } 1577 tcg_gen_mb(bar); 1578 return; 1579 case 6: /* ISB */ 1580 /* We need to break the TB after this insn to execute 1581 * a self-modified code correctly and also to take 1582 * any pending interrupts immediately. 1583 */ 1584 reset_btype(s); 1585 gen_goto_tb(s, 0, 4); 1586 return; 1587 1588 case 7: /* SB */ 1589 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { 1590 goto do_unallocated; 1591 } 1592 /* 1593 * TODO: There is no speculation barrier opcode for TCG; 1594 * MB and end the TB instead. 1595 */ 1596 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1597 gen_goto_tb(s, 0, 4); 1598 return; 1599 1600 default: 1601 do_unallocated: 1602 unallocated_encoding(s); 1603 return; 1604 } 1605 } 1606 1607 static void gen_xaflag(void) 1608 { 1609 TCGv_i32 z = tcg_temp_new_i32(); 1610 1611 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1612 1613 /* 1614 * (!C & !Z) << 31 1615 * (!(C | Z)) << 31 1616 * ~((C | Z) << 31) 1617 * ~-(C | Z) 1618 * (C | Z) - 1 1619 */ 1620 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1621 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1622 1623 /* !(Z & C) */ 1624 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1625 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1626 1627 /* (!C & Z) << 31 -> -(Z & ~C) */ 1628 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1629 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1630 1631 /* C | Z */ 1632 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1633 } 1634 1635 static void gen_axflag(void) 1636 { 1637 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1638 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1639 1640 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1641 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1642 1643 tcg_gen_movi_i32(cpu_NF, 0); 1644 tcg_gen_movi_i32(cpu_VF, 0); 1645 } 1646 1647 /* MSR (immediate) - move immediate to processor state field */ 1648 static void handle_msr_i(DisasContext *s, uint32_t insn, 1649 unsigned int op1, unsigned int op2, unsigned int crm) 1650 { 1651 int op = op1 << 3 | op2; 1652 1653 /* End the TB by default, chaining is ok. */ 1654 s->base.is_jmp = DISAS_TOO_MANY; 1655 1656 switch (op) { 1657 case 0x00: /* CFINV */ 1658 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { 1659 goto do_unallocated; 1660 } 1661 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1662 s->base.is_jmp = DISAS_NEXT; 1663 break; 1664 1665 case 0x01: /* XAFlag */ 1666 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1667 goto do_unallocated; 1668 } 1669 gen_xaflag(); 1670 s->base.is_jmp = DISAS_NEXT; 1671 break; 1672 1673 case 0x02: /* AXFlag */ 1674 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1675 goto do_unallocated; 1676 } 1677 gen_axflag(); 1678 s->base.is_jmp = DISAS_NEXT; 1679 break; 1680 1681 case 0x03: /* UAO */ 1682 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1683 goto do_unallocated; 1684 } 1685 if (crm & 1) { 1686 set_pstate_bits(PSTATE_UAO); 1687 } else { 1688 clear_pstate_bits(PSTATE_UAO); 1689 } 1690 gen_rebuild_hflags(s); 1691 break; 1692 1693 case 0x04: /* PAN */ 1694 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1695 goto do_unallocated; 1696 } 1697 if (crm & 1) { 1698 set_pstate_bits(PSTATE_PAN); 1699 } else { 1700 clear_pstate_bits(PSTATE_PAN); 1701 } 1702 gen_rebuild_hflags(s); 1703 break; 1704 1705 case 0x05: /* SPSel */ 1706 if (s->current_el == 0) { 1707 goto do_unallocated; 1708 } 1709 gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP)); 1710 break; 1711 1712 case 0x19: /* SSBS */ 1713 if (!dc_isar_feature(aa64_ssbs, s)) { 1714 goto do_unallocated; 1715 } 1716 if (crm & 1) { 1717 set_pstate_bits(PSTATE_SSBS); 1718 } else { 1719 clear_pstate_bits(PSTATE_SSBS); 1720 } 1721 /* Don't need to rebuild hflags since SSBS is a nop */ 1722 break; 1723 1724 case 0x1a: /* DIT */ 1725 if (!dc_isar_feature(aa64_dit, s)) { 1726 goto do_unallocated; 1727 } 1728 if (crm & 1) { 1729 set_pstate_bits(PSTATE_DIT); 1730 } else { 1731 clear_pstate_bits(PSTATE_DIT); 1732 } 1733 /* There's no need to rebuild hflags because DIT is a nop */ 1734 break; 1735 1736 case 0x1e: /* DAIFSet */ 1737 gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm)); 1738 break; 1739 1740 case 0x1f: /* DAIFClear */ 1741 gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm)); 1742 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ 1743 s->base.is_jmp = DISAS_UPDATE_EXIT; 1744 break; 1745 1746 case 0x1c: /* TCO */ 1747 if (dc_isar_feature(aa64_mte, s)) { 1748 /* Full MTE is enabled -- set the TCO bit as directed. */ 1749 if (crm & 1) { 1750 set_pstate_bits(PSTATE_TCO); 1751 } else { 1752 clear_pstate_bits(PSTATE_TCO); 1753 } 1754 gen_rebuild_hflags(s); 1755 /* Many factors, including TCO, go into MTE_ACTIVE. */ 1756 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1757 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 1758 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 1759 s->base.is_jmp = DISAS_NEXT; 1760 } else { 1761 goto do_unallocated; 1762 } 1763 break; 1764 1765 case 0x1b: /* SVCR* */ 1766 if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) { 1767 goto do_unallocated; 1768 } 1769 if (sme_access_check(s)) { 1770 int old = s->pstate_sm | (s->pstate_za << 1); 1771 int new = (crm & 1) * 3; 1772 int msk = (crm >> 1) & 3; 1773 1774 if ((old ^ new) & msk) { 1775 /* At least one bit changes. */ 1776 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), 1777 tcg_constant_i32(msk)); 1778 } else { 1779 s->base.is_jmp = DISAS_NEXT; 1780 } 1781 } 1782 break; 1783 1784 default: 1785 do_unallocated: 1786 unallocated_encoding(s); 1787 return; 1788 } 1789 } 1790 1791 static void gen_get_nzcv(TCGv_i64 tcg_rt) 1792 { 1793 TCGv_i32 tmp = tcg_temp_new_i32(); 1794 TCGv_i32 nzcv = tcg_temp_new_i32(); 1795 1796 /* build bit 31, N */ 1797 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 1798 /* build bit 30, Z */ 1799 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 1800 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 1801 /* build bit 29, C */ 1802 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 1803 /* build bit 28, V */ 1804 tcg_gen_shri_i32(tmp, cpu_VF, 31); 1805 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 1806 /* generate result */ 1807 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 1808 } 1809 1810 static void gen_set_nzcv(TCGv_i64 tcg_rt) 1811 { 1812 TCGv_i32 nzcv = tcg_temp_new_i32(); 1813 1814 /* take NZCV from R[t] */ 1815 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 1816 1817 /* bit 31, N */ 1818 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 1819 /* bit 30, Z */ 1820 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 1821 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 1822 /* bit 29, C */ 1823 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 1824 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 1825 /* bit 28, V */ 1826 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 1827 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 1828 } 1829 1830 static void gen_sysreg_undef(DisasContext *s, bool isread, 1831 uint8_t op0, uint8_t op1, uint8_t op2, 1832 uint8_t crn, uint8_t crm, uint8_t rt) 1833 { 1834 /* 1835 * Generate code to emit an UNDEF with correct syndrome 1836 * information for a failed system register access. 1837 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 1838 * but if FEAT_IDST is implemented then read accesses to registers 1839 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 1840 * syndrome. 1841 */ 1842 uint32_t syndrome; 1843 1844 if (isread && dc_isar_feature(aa64_ids, s) && 1845 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 1846 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1847 } else { 1848 syndrome = syn_uncategorized(); 1849 } 1850 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 1851 } 1852 1853 /* MRS - move from system register 1854 * MSR (register) - move to system register 1855 * SYS 1856 * SYSL 1857 * These are all essentially the same insn in 'read' and 'write' 1858 * versions, with varying op0 fields. 1859 */ 1860 static void handle_sys(DisasContext *s, uint32_t insn, bool isread, 1861 unsigned int op0, unsigned int op1, unsigned int op2, 1862 unsigned int crn, unsigned int crm, unsigned int rt) 1863 { 1864 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 1865 crn, crm, op0, op1, op2); 1866 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 1867 TCGv_ptr tcg_ri = NULL; 1868 TCGv_i64 tcg_rt; 1869 1870 if (!ri) { 1871 /* Unknown register; this might be a guest error or a QEMU 1872 * unimplemented feature. 1873 */ 1874 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 1875 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 1876 isread ? "read" : "write", op0, op1, crn, crm, op2); 1877 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1878 return; 1879 } 1880 1881 /* Check access permissions */ 1882 if (!cp_access_ok(s->current_el, ri, isread)) { 1883 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1884 return; 1885 } 1886 1887 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 1888 /* Emit code to perform further access permissions checks at 1889 * runtime; this may result in an exception. 1890 */ 1891 uint32_t syndrome; 1892 1893 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1894 gen_a64_update_pc(s, 0); 1895 tcg_ri = tcg_temp_new_ptr(); 1896 gen_helper_access_check_cp_reg(tcg_ri, cpu_env, 1897 tcg_constant_i32(key), 1898 tcg_constant_i32(syndrome), 1899 tcg_constant_i32(isread)); 1900 } else if (ri->type & ARM_CP_RAISES_EXC) { 1901 /* 1902 * The readfn or writefn might raise an exception; 1903 * synchronize the CPU state in case it does. 1904 */ 1905 gen_a64_update_pc(s, 0); 1906 } 1907 1908 /* Handle special cases first */ 1909 switch (ri->type & ARM_CP_SPECIAL_MASK) { 1910 case 0: 1911 break; 1912 case ARM_CP_NOP: 1913 return; 1914 case ARM_CP_NZCV: 1915 tcg_rt = cpu_reg(s, rt); 1916 if (isread) { 1917 gen_get_nzcv(tcg_rt); 1918 } else { 1919 gen_set_nzcv(tcg_rt); 1920 } 1921 return; 1922 case ARM_CP_CURRENTEL: 1923 /* Reads as current EL value from pstate, which is 1924 * guaranteed to be constant by the tb flags. 1925 */ 1926 tcg_rt = cpu_reg(s, rt); 1927 tcg_gen_movi_i64(tcg_rt, s->current_el << 2); 1928 return; 1929 case ARM_CP_DC_ZVA: 1930 /* Writes clear the aligned block of memory which rt points into. */ 1931 if (s->mte_active[0]) { 1932 int desc = 0; 1933 1934 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 1935 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 1936 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 1937 1938 tcg_rt = tcg_temp_new_i64(); 1939 gen_helper_mte_check_zva(tcg_rt, cpu_env, 1940 tcg_constant_i32(desc), cpu_reg(s, rt)); 1941 } else { 1942 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 1943 } 1944 gen_helper_dc_zva(cpu_env, tcg_rt); 1945 return; 1946 case ARM_CP_DC_GVA: 1947 { 1948 TCGv_i64 clean_addr, tag; 1949 1950 /* 1951 * DC_GVA, like DC_ZVA, requires that we supply the original 1952 * pointer for an invalid page. Probe that address first. 1953 */ 1954 tcg_rt = cpu_reg(s, rt); 1955 clean_addr = clean_data_tbi(s, tcg_rt); 1956 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 1957 1958 if (s->ata) { 1959 /* Extract the tag from the register to match STZGM. */ 1960 tag = tcg_temp_new_i64(); 1961 tcg_gen_shri_i64(tag, tcg_rt, 56); 1962 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 1963 } 1964 } 1965 return; 1966 case ARM_CP_DC_GZVA: 1967 { 1968 TCGv_i64 clean_addr, tag; 1969 1970 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 1971 tcg_rt = cpu_reg(s, rt); 1972 clean_addr = clean_data_tbi(s, tcg_rt); 1973 gen_helper_dc_zva(cpu_env, clean_addr); 1974 1975 if (s->ata) { 1976 /* Extract the tag from the register to match STZGM. */ 1977 tag = tcg_temp_new_i64(); 1978 tcg_gen_shri_i64(tag, tcg_rt, 56); 1979 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 1980 } 1981 } 1982 return; 1983 default: 1984 g_assert_not_reached(); 1985 } 1986 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 1987 return; 1988 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 1989 return; 1990 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 1991 return; 1992 } 1993 1994 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 1995 gen_io_start(); 1996 } 1997 1998 tcg_rt = cpu_reg(s, rt); 1999 2000 if (isread) { 2001 if (ri->type & ARM_CP_CONST) { 2002 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2003 } else if (ri->readfn) { 2004 if (!tcg_ri) { 2005 tcg_ri = gen_lookup_cp_reg(key); 2006 } 2007 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri); 2008 } else { 2009 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); 2010 } 2011 } else { 2012 if (ri->type & ARM_CP_CONST) { 2013 /* If not forbidden by access permissions, treat as WI */ 2014 return; 2015 } else if (ri->writefn) { 2016 if (!tcg_ri) { 2017 tcg_ri = gen_lookup_cp_reg(key); 2018 } 2019 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt); 2020 } else { 2021 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); 2022 } 2023 } 2024 2025 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2026 /* I/O operations must end the TB here (whether read or write) */ 2027 s->base.is_jmp = DISAS_UPDATE_EXIT; 2028 } 2029 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2030 /* 2031 * A write to any coprocessor regiser that ends a TB 2032 * must rebuild the hflags for the next TB. 2033 */ 2034 gen_rebuild_hflags(s); 2035 /* 2036 * We default to ending the TB on a coprocessor register write, 2037 * but allow this to be suppressed by the register definition 2038 * (usually only necessary to work around guest bugs). 2039 */ 2040 s->base.is_jmp = DISAS_UPDATE_EXIT; 2041 } 2042 } 2043 2044 /* System 2045 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 2046 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2047 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | 2048 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2049 */ 2050 static void disas_system(DisasContext *s, uint32_t insn) 2051 { 2052 unsigned int l, op0, op1, crn, crm, op2, rt; 2053 l = extract32(insn, 21, 1); 2054 op0 = extract32(insn, 19, 2); 2055 op1 = extract32(insn, 16, 3); 2056 crn = extract32(insn, 12, 4); 2057 crm = extract32(insn, 8, 4); 2058 op2 = extract32(insn, 5, 3); 2059 rt = extract32(insn, 0, 5); 2060 2061 if (op0 == 0) { 2062 if (l || rt != 31) { 2063 unallocated_encoding(s); 2064 return; 2065 } 2066 switch (crn) { 2067 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ 2068 handle_hint(s, insn, op1, op2, crm); 2069 break; 2070 case 3: /* CLREX, DSB, DMB, ISB */ 2071 handle_sync(s, insn, op1, op2, crm); 2072 break; 2073 case 4: /* MSR (immediate) */ 2074 handle_msr_i(s, insn, op1, op2, crm); 2075 break; 2076 default: 2077 unallocated_encoding(s); 2078 break; 2079 } 2080 return; 2081 } 2082 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); 2083 } 2084 2085 /* Exception generation 2086 * 2087 * 31 24 23 21 20 5 4 2 1 0 2088 * +-----------------+-----+------------------------+-----+----+ 2089 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | 2090 * +-----------------------+------------------------+----------+ 2091 */ 2092 static void disas_exc(DisasContext *s, uint32_t insn) 2093 { 2094 int opc = extract32(insn, 21, 3); 2095 int op2_ll = extract32(insn, 0, 5); 2096 int imm16 = extract32(insn, 5, 16); 2097 uint32_t syndrome; 2098 2099 switch (opc) { 2100 case 0: 2101 /* For SVC, HVC and SMC we advance the single-step state 2102 * machine before taking the exception. This is architecturally 2103 * mandated, to ensure that single-stepping a system call 2104 * instruction works properly. 2105 */ 2106 switch (op2_ll) { 2107 case 1: /* SVC */ 2108 syndrome = syn_aa64_svc(imm16); 2109 if (s->fgt_svc) { 2110 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2111 break; 2112 } 2113 gen_ss_advance(s); 2114 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2115 break; 2116 case 2: /* HVC */ 2117 if (s->current_el == 0) { 2118 unallocated_encoding(s); 2119 break; 2120 } 2121 /* The pre HVC helper handles cases when HVC gets trapped 2122 * as an undefined insn by runtime configuration. 2123 */ 2124 gen_a64_update_pc(s, 0); 2125 gen_helper_pre_hvc(cpu_env); 2126 gen_ss_advance(s); 2127 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2); 2128 break; 2129 case 3: /* SMC */ 2130 if (s->current_el == 0) { 2131 unallocated_encoding(s); 2132 break; 2133 } 2134 gen_a64_update_pc(s, 0); 2135 gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16))); 2136 gen_ss_advance(s); 2137 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3); 2138 break; 2139 default: 2140 unallocated_encoding(s); 2141 break; 2142 } 2143 break; 2144 case 1: 2145 if (op2_ll != 0) { 2146 unallocated_encoding(s); 2147 break; 2148 } 2149 /* BRK */ 2150 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); 2151 break; 2152 case 2: 2153 if (op2_ll != 0) { 2154 unallocated_encoding(s); 2155 break; 2156 } 2157 /* HLT. This has two purposes. 2158 * Architecturally, it is an external halting debug instruction. 2159 * Since QEMU doesn't implement external debug, we treat this as 2160 * it is required for halting debug disabled: it will UNDEF. 2161 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2162 */ 2163 if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) { 2164 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2165 } else { 2166 unallocated_encoding(s); 2167 } 2168 break; 2169 case 5: 2170 if (op2_ll < 1 || op2_ll > 3) { 2171 unallocated_encoding(s); 2172 break; 2173 } 2174 /* DCPS1, DCPS2, DCPS3 */ 2175 unallocated_encoding(s); 2176 break; 2177 default: 2178 unallocated_encoding(s); 2179 break; 2180 } 2181 } 2182 2183 /* Unconditional branch (register) 2184 * 31 25 24 21 20 16 15 10 9 5 4 0 2185 * +---------------+-------+-------+-------+------+-------+ 2186 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 | 2187 * +---------------+-------+-------+-------+------+-------+ 2188 */ 2189 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) 2190 { 2191 unsigned int opc, op2, op3, rn, op4; 2192 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ 2193 TCGv_i64 dst; 2194 TCGv_i64 modifier; 2195 2196 opc = extract32(insn, 21, 4); 2197 op2 = extract32(insn, 16, 5); 2198 op3 = extract32(insn, 10, 6); 2199 rn = extract32(insn, 5, 5); 2200 op4 = extract32(insn, 0, 5); 2201 2202 if (op2 != 0x1f) { 2203 goto do_unallocated; 2204 } 2205 2206 switch (opc) { 2207 case 0: /* BR */ 2208 case 1: /* BLR */ 2209 case 2: /* RET */ 2210 btype_mod = opc; 2211 switch (op3) { 2212 case 0: 2213 /* BR, BLR, RET */ 2214 if (op4 != 0) { 2215 goto do_unallocated; 2216 } 2217 dst = cpu_reg(s, rn); 2218 break; 2219 2220 case 2: 2221 case 3: 2222 if (!dc_isar_feature(aa64_pauth, s)) { 2223 goto do_unallocated; 2224 } 2225 if (opc == 2) { 2226 /* RETAA, RETAB */ 2227 if (rn != 0x1f || op4 != 0x1f) { 2228 goto do_unallocated; 2229 } 2230 rn = 30; 2231 modifier = cpu_X[31]; 2232 } else { 2233 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */ 2234 if (op4 != 0x1f) { 2235 goto do_unallocated; 2236 } 2237 modifier = tcg_constant_i64(0); 2238 } 2239 if (s->pauth_active) { 2240 dst = tcg_temp_new_i64(); 2241 if (op3 == 2) { 2242 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2243 } else { 2244 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2245 } 2246 } else { 2247 dst = cpu_reg(s, rn); 2248 } 2249 break; 2250 2251 default: 2252 goto do_unallocated; 2253 } 2254 /* BLR also needs to load return address */ 2255 if (opc == 1) { 2256 TCGv_i64 lr = cpu_reg(s, 30); 2257 if (dst == lr) { 2258 TCGv_i64 tmp = tcg_temp_new_i64(); 2259 tcg_gen_mov_i64(tmp, dst); 2260 dst = tmp; 2261 } 2262 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2263 } 2264 gen_a64_set_pc(s, dst); 2265 break; 2266 2267 case 8: /* BRAA */ 2268 case 9: /* BLRAA */ 2269 if (!dc_isar_feature(aa64_pauth, s)) { 2270 goto do_unallocated; 2271 } 2272 if ((op3 & ~1) != 2) { 2273 goto do_unallocated; 2274 } 2275 btype_mod = opc & 1; 2276 if (s->pauth_active) { 2277 dst = tcg_temp_new_i64(); 2278 modifier = cpu_reg_sp(s, op4); 2279 if (op3 == 2) { 2280 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2281 } else { 2282 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2283 } 2284 } else { 2285 dst = cpu_reg(s, rn); 2286 } 2287 /* BLRAA also needs to load return address */ 2288 if (opc == 9) { 2289 TCGv_i64 lr = cpu_reg(s, 30); 2290 if (dst == lr) { 2291 TCGv_i64 tmp = tcg_temp_new_i64(); 2292 tcg_gen_mov_i64(tmp, dst); 2293 dst = tmp; 2294 } 2295 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2296 } 2297 gen_a64_set_pc(s, dst); 2298 break; 2299 2300 case 4: /* ERET */ 2301 if (s->current_el == 0) { 2302 goto do_unallocated; 2303 } 2304 switch (op3) { 2305 case 0: /* ERET */ 2306 if (op4 != 0) { 2307 goto do_unallocated; 2308 } 2309 if (s->fgt_eret) { 2310 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2311 return; 2312 } 2313 dst = tcg_temp_new_i64(); 2314 tcg_gen_ld_i64(dst, cpu_env, 2315 offsetof(CPUARMState, elr_el[s->current_el])); 2316 break; 2317 2318 case 2: /* ERETAA */ 2319 case 3: /* ERETAB */ 2320 if (!dc_isar_feature(aa64_pauth, s)) { 2321 goto do_unallocated; 2322 } 2323 if (rn != 0x1f || op4 != 0x1f) { 2324 goto do_unallocated; 2325 } 2326 /* The FGT trap takes precedence over an auth trap. */ 2327 if (s->fgt_eret) { 2328 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2329 return; 2330 } 2331 dst = tcg_temp_new_i64(); 2332 tcg_gen_ld_i64(dst, cpu_env, 2333 offsetof(CPUARMState, elr_el[s->current_el])); 2334 if (s->pauth_active) { 2335 modifier = cpu_X[31]; 2336 if (op3 == 2) { 2337 gen_helper_autia(dst, cpu_env, dst, modifier); 2338 } else { 2339 gen_helper_autib(dst, cpu_env, dst, modifier); 2340 } 2341 } 2342 break; 2343 2344 default: 2345 goto do_unallocated; 2346 } 2347 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { 2348 gen_io_start(); 2349 } 2350 2351 gen_helper_exception_return(cpu_env, dst); 2352 /* Must exit loop to check un-masked IRQs */ 2353 s->base.is_jmp = DISAS_EXIT; 2354 return; 2355 2356 case 5: /* DRPS */ 2357 if (op3 != 0 || op4 != 0 || rn != 0x1f) { 2358 goto do_unallocated; 2359 } else { 2360 unallocated_encoding(s); 2361 } 2362 return; 2363 2364 default: 2365 do_unallocated: 2366 unallocated_encoding(s); 2367 return; 2368 } 2369 2370 switch (btype_mod) { 2371 case 0: /* BR */ 2372 if (dc_isar_feature(aa64_bti, s)) { 2373 /* BR to {x16,x17} or !guard -> 1, else 3. */ 2374 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 2375 } 2376 break; 2377 2378 case 1: /* BLR */ 2379 if (dc_isar_feature(aa64_bti, s)) { 2380 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 2381 set_btype(s, 2); 2382 } 2383 break; 2384 2385 default: /* RET or none of the above. */ 2386 /* BTYPE will be set to 0 by normal end-of-insn processing. */ 2387 break; 2388 } 2389 2390 s->base.is_jmp = DISAS_JUMP; 2391 } 2392 2393 /* Branches, exception generating and system instructions */ 2394 static void disas_b_exc_sys(DisasContext *s, uint32_t insn) 2395 { 2396 switch (extract32(insn, 25, 7)) { 2397 case 0x0a: case 0x0b: 2398 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ 2399 disas_uncond_b_imm(s, insn); 2400 break; 2401 case 0x1a: case 0x5a: /* Compare & branch (immediate) */ 2402 disas_comp_b_imm(s, insn); 2403 break; 2404 case 0x1b: case 0x5b: /* Test & branch (immediate) */ 2405 disas_test_b_imm(s, insn); 2406 break; 2407 case 0x2a: /* Conditional branch (immediate) */ 2408 disas_cond_b_imm(s, insn); 2409 break; 2410 case 0x6a: /* Exception generation / System */ 2411 if (insn & (1 << 24)) { 2412 if (extract32(insn, 22, 2) == 0) { 2413 disas_system(s, insn); 2414 } else { 2415 unallocated_encoding(s); 2416 } 2417 } else { 2418 disas_exc(s, insn); 2419 } 2420 break; 2421 case 0x6b: /* Unconditional branch (register) */ 2422 disas_uncond_b_reg(s, insn); 2423 break; 2424 default: 2425 unallocated_encoding(s); 2426 break; 2427 } 2428 } 2429 2430 /* 2431 * Load/Store exclusive instructions are implemented by remembering 2432 * the value/address loaded, and seeing if these are the same 2433 * when the store is performed. This is not actually the architecturally 2434 * mandated semantics, but it works for typical guest code sequences 2435 * and avoids having to monitor regular stores. 2436 * 2437 * The store exclusive uses the atomic cmpxchg primitives to avoid 2438 * races in multi-threaded linux-user and when MTTCG softmmu is 2439 * enabled. 2440 */ 2441 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, 2442 TCGv_i64 addr, int size, bool is_pair) 2443 { 2444 int idx = get_mem_index(s); 2445 MemOp memop = s->be_data; 2446 2447 g_assert(size <= 3); 2448 if (is_pair) { 2449 g_assert(size >= 2); 2450 if (size == 2) { 2451 /* The pair must be single-copy atomic for the doubleword. */ 2452 memop |= MO_64 | MO_ALIGN; 2453 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2454 if (s->be_data == MO_LE) { 2455 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2456 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2457 } else { 2458 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2459 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2460 } 2461 } else { 2462 /* The pair must be single-copy atomic for *each* doubleword, not 2463 the entire quadword, however it must be quadword aligned. */ 2464 memop |= MO_64; 2465 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, 2466 memop | MO_ALIGN_16); 2467 2468 TCGv_i64 addr2 = tcg_temp_new_i64(); 2469 tcg_gen_addi_i64(addr2, addr, 8); 2470 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); 2471 2472 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2473 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2474 } 2475 } else { 2476 memop |= size | MO_ALIGN; 2477 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2478 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2479 } 2480 tcg_gen_mov_i64(cpu_exclusive_addr, addr); 2481 } 2482 2483 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2484 TCGv_i64 addr, int size, int is_pair) 2485 { 2486 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2487 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2488 * [addr] = {Rt}; 2489 * if (is_pair) { 2490 * [addr + datasize] = {Rt2}; 2491 * } 2492 * {Rd} = 0; 2493 * } else { 2494 * {Rd} = 1; 2495 * } 2496 * env->exclusive_addr = -1; 2497 */ 2498 TCGLabel *fail_label = gen_new_label(); 2499 TCGLabel *done_label = gen_new_label(); 2500 TCGv_i64 tmp; 2501 2502 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); 2503 2504 tmp = tcg_temp_new_i64(); 2505 if (is_pair) { 2506 if (size == 2) { 2507 if (s->be_data == MO_LE) { 2508 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2509 } else { 2510 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2511 } 2512 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2513 cpu_exclusive_val, tmp, 2514 get_mem_index(s), 2515 MO_64 | MO_ALIGN | s->be_data); 2516 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2517 } else { 2518 TCGv_i128 t16 = tcg_temp_new_i128(); 2519 TCGv_i128 c16 = tcg_temp_new_i128(); 2520 TCGv_i64 a, b; 2521 2522 if (s->be_data == MO_LE) { 2523 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2524 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2525 cpu_exclusive_high); 2526 } else { 2527 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2528 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2529 cpu_exclusive_val); 2530 } 2531 2532 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2533 get_mem_index(s), 2534 MO_128 | MO_ALIGN | s->be_data); 2535 2536 a = tcg_temp_new_i64(); 2537 b = tcg_temp_new_i64(); 2538 if (s->be_data == MO_LE) { 2539 tcg_gen_extr_i128_i64(a, b, t16); 2540 } else { 2541 tcg_gen_extr_i128_i64(b, a, t16); 2542 } 2543 2544 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2545 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2546 tcg_gen_or_i64(tmp, a, b); 2547 2548 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2549 } 2550 } else { 2551 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2552 cpu_reg(s, rt), get_mem_index(s), 2553 size | MO_ALIGN | s->be_data); 2554 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2555 } 2556 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2557 tcg_gen_br(done_label); 2558 2559 gen_set_label(fail_label); 2560 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2561 gen_set_label(done_label); 2562 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2563 } 2564 2565 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2566 int rn, int size) 2567 { 2568 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2569 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2570 int memidx = get_mem_index(s); 2571 TCGv_i64 clean_addr; 2572 2573 if (rn == 31) { 2574 gen_check_sp_alignment(s); 2575 } 2576 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); 2577 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, 2578 size | MO_ALIGN | s->be_data); 2579 } 2580 2581 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2582 int rn, int size) 2583 { 2584 TCGv_i64 s1 = cpu_reg(s, rs); 2585 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2586 TCGv_i64 t1 = cpu_reg(s, rt); 2587 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2588 TCGv_i64 clean_addr; 2589 int memidx = get_mem_index(s); 2590 2591 if (rn == 31) { 2592 gen_check_sp_alignment(s); 2593 } 2594 2595 /* This is a single atomic access, despite the "pair". */ 2596 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); 2597 2598 if (size == 2) { 2599 TCGv_i64 cmp = tcg_temp_new_i64(); 2600 TCGv_i64 val = tcg_temp_new_i64(); 2601 2602 if (s->be_data == MO_LE) { 2603 tcg_gen_concat32_i64(val, t1, t2); 2604 tcg_gen_concat32_i64(cmp, s1, s2); 2605 } else { 2606 tcg_gen_concat32_i64(val, t2, t1); 2607 tcg_gen_concat32_i64(cmp, s2, s1); 2608 } 2609 2610 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, 2611 MO_64 | MO_ALIGN | s->be_data); 2612 2613 if (s->be_data == MO_LE) { 2614 tcg_gen_extr32_i64(s1, s2, cmp); 2615 } else { 2616 tcg_gen_extr32_i64(s2, s1, cmp); 2617 } 2618 } else { 2619 TCGv_i128 cmp = tcg_temp_new_i128(); 2620 TCGv_i128 val = tcg_temp_new_i128(); 2621 2622 if (s->be_data == MO_LE) { 2623 tcg_gen_concat_i64_i128(val, t1, t2); 2624 tcg_gen_concat_i64_i128(cmp, s1, s2); 2625 } else { 2626 tcg_gen_concat_i64_i128(val, t2, t1); 2627 tcg_gen_concat_i64_i128(cmp, s2, s1); 2628 } 2629 2630 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, 2631 MO_128 | MO_ALIGN | s->be_data); 2632 2633 if (s->be_data == MO_LE) { 2634 tcg_gen_extr_i128_i64(s1, s2, cmp); 2635 } else { 2636 tcg_gen_extr_i128_i64(s2, s1, cmp); 2637 } 2638 } 2639 } 2640 2641 /* Update the Sixty-Four bit (SF) registersize. This logic is derived 2642 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2643 */ 2644 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) 2645 { 2646 int opc0 = extract32(opc, 0, 1); 2647 int regsize; 2648 2649 if (is_signed) { 2650 regsize = opc0 ? 32 : 64; 2651 } else { 2652 regsize = size == 3 ? 64 : 32; 2653 } 2654 return regsize == 64; 2655 } 2656 2657 /* Load/store exclusive 2658 * 2659 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 2660 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2661 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | 2662 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2663 * 2664 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit 2665 * L: 0 -> store, 1 -> load 2666 * o2: 0 -> exclusive, 1 -> not 2667 * o1: 0 -> single register, 1 -> register pair 2668 * o0: 1 -> load-acquire/store-release, 0 -> not 2669 */ 2670 static void disas_ldst_excl(DisasContext *s, uint32_t insn) 2671 { 2672 int rt = extract32(insn, 0, 5); 2673 int rn = extract32(insn, 5, 5); 2674 int rt2 = extract32(insn, 10, 5); 2675 int rs = extract32(insn, 16, 5); 2676 int is_lasr = extract32(insn, 15, 1); 2677 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; 2678 int size = extract32(insn, 30, 2); 2679 TCGv_i64 clean_addr; 2680 2681 switch (o2_L_o1_o0) { 2682 case 0x0: /* STXR */ 2683 case 0x1: /* STLXR */ 2684 if (rn == 31) { 2685 gen_check_sp_alignment(s); 2686 } 2687 if (is_lasr) { 2688 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2689 } 2690 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2691 true, rn != 31, size); 2692 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); 2693 return; 2694 2695 case 0x4: /* LDXR */ 2696 case 0x5: /* LDAXR */ 2697 if (rn == 31) { 2698 gen_check_sp_alignment(s); 2699 } 2700 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2701 false, rn != 31, size); 2702 s->is_ldex = true; 2703 gen_load_exclusive(s, rt, rt2, clean_addr, size, false); 2704 if (is_lasr) { 2705 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2706 } 2707 return; 2708 2709 case 0x8: /* STLLR */ 2710 if (!dc_isar_feature(aa64_lor, s)) { 2711 break; 2712 } 2713 /* StoreLORelease is the same as Store-Release for QEMU. */ 2714 /* fall through */ 2715 case 0x9: /* STLR */ 2716 /* Generate ISS for non-exclusive accesses including LASR. */ 2717 if (rn == 31) { 2718 gen_check_sp_alignment(s); 2719 } 2720 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2721 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2722 true, rn != 31, size); 2723 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2724 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, 2725 disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2726 return; 2727 2728 case 0xc: /* LDLAR */ 2729 if (!dc_isar_feature(aa64_lor, s)) { 2730 break; 2731 } 2732 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2733 /* fall through */ 2734 case 0xd: /* LDAR */ 2735 /* Generate ISS for non-exclusive accesses including LASR. */ 2736 if (rn == 31) { 2737 gen_check_sp_alignment(s); 2738 } 2739 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2740 false, rn != 31, size); 2741 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2742 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, 2743 rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2744 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2745 return; 2746 2747 case 0x2: case 0x3: /* CASP / STXP */ 2748 if (size & 2) { /* STXP / STLXP */ 2749 if (rn == 31) { 2750 gen_check_sp_alignment(s); 2751 } 2752 if (is_lasr) { 2753 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2754 } 2755 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2756 true, rn != 31, size); 2757 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); 2758 return; 2759 } 2760 if (rt2 == 31 2761 && ((rt | rs) & 1) == 0 2762 && dc_isar_feature(aa64_atomics, s)) { 2763 /* CASP / CASPL */ 2764 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2765 return; 2766 } 2767 break; 2768 2769 case 0x6: case 0x7: /* CASPA / LDXP */ 2770 if (size & 2) { /* LDXP / LDAXP */ 2771 if (rn == 31) { 2772 gen_check_sp_alignment(s); 2773 } 2774 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2775 false, rn != 31, size); 2776 s->is_ldex = true; 2777 gen_load_exclusive(s, rt, rt2, clean_addr, size, true); 2778 if (is_lasr) { 2779 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2780 } 2781 return; 2782 } 2783 if (rt2 == 31 2784 && ((rt | rs) & 1) == 0 2785 && dc_isar_feature(aa64_atomics, s)) { 2786 /* CASPA / CASPAL */ 2787 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2788 return; 2789 } 2790 break; 2791 2792 case 0xa: /* CAS */ 2793 case 0xb: /* CASL */ 2794 case 0xe: /* CASA */ 2795 case 0xf: /* CASAL */ 2796 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { 2797 gen_compare_and_swap(s, rs, rt, rn, size); 2798 return; 2799 } 2800 break; 2801 } 2802 unallocated_encoding(s); 2803 } 2804 2805 /* 2806 * Load register (literal) 2807 * 2808 * 31 30 29 27 26 25 24 23 5 4 0 2809 * +-----+-------+---+-----+-------------------+-------+ 2810 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | 2811 * +-----+-------+---+-----+-------------------+-------+ 2812 * 2813 * V: 1 -> vector (simd/fp) 2814 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, 2815 * 10-> 32 bit signed, 11 -> prefetch 2816 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) 2817 */ 2818 static void disas_ld_lit(DisasContext *s, uint32_t insn) 2819 { 2820 int rt = extract32(insn, 0, 5); 2821 int64_t imm = sextract32(insn, 5, 19) << 2; 2822 bool is_vector = extract32(insn, 26, 1); 2823 int opc = extract32(insn, 30, 2); 2824 bool is_signed = false; 2825 int size = 2; 2826 TCGv_i64 tcg_rt, clean_addr; 2827 2828 if (is_vector) { 2829 if (opc == 3) { 2830 unallocated_encoding(s); 2831 return; 2832 } 2833 size = 2 + opc; 2834 if (!fp_access_check(s)) { 2835 return; 2836 } 2837 } else { 2838 if (opc == 3) { 2839 /* PRFM (literal) : prefetch */ 2840 return; 2841 } 2842 size = 2 + extract32(opc, 0, 1); 2843 is_signed = extract32(opc, 1, 1); 2844 } 2845 2846 tcg_rt = cpu_reg(s, rt); 2847 2848 clean_addr = tcg_temp_new_i64(); 2849 gen_pc_plus_diff(s, clean_addr, imm); 2850 if (is_vector) { 2851 do_fp_ld(s, rt, clean_addr, size); 2852 } else { 2853 /* Only unsigned 32bit loads target 32bit registers. */ 2854 bool iss_sf = opc != 0; 2855 2856 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 2857 false, true, rt, iss_sf, false); 2858 } 2859 } 2860 2861 /* 2862 * LDNP (Load Pair - non-temporal hint) 2863 * LDP (Load Pair - non vector) 2864 * LDPSW (Load Pair Signed Word - non vector) 2865 * STNP (Store Pair - non-temporal hint) 2866 * STP (Store Pair - non vector) 2867 * LDNP (Load Pair of SIMD&FP - non-temporal hint) 2868 * LDP (Load Pair of SIMD&FP) 2869 * STNP (Store Pair of SIMD&FP - non-temporal hint) 2870 * STP (Store Pair of SIMD&FP) 2871 * 2872 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 2873 * +-----+-------+---+---+-------+---+-----------------------------+ 2874 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | 2875 * +-----+-------+---+---+-------+---+-------+-------+------+------+ 2876 * 2877 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit 2878 * LDPSW/STGP 01 2879 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit 2880 * V: 0 -> GPR, 1 -> Vector 2881 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, 2882 * 10 -> signed offset, 11 -> pre-index 2883 * L: 0 -> Store 1 -> Load 2884 * 2885 * Rt, Rt2 = GPR or SIMD registers to be stored 2886 * Rn = general purpose register containing address 2887 * imm7 = signed offset (multiple of 4 or 8 depending on size) 2888 */ 2889 static void disas_ldst_pair(DisasContext *s, uint32_t insn) 2890 { 2891 int rt = extract32(insn, 0, 5); 2892 int rn = extract32(insn, 5, 5); 2893 int rt2 = extract32(insn, 10, 5); 2894 uint64_t offset = sextract64(insn, 15, 7); 2895 int index = extract32(insn, 23, 2); 2896 bool is_vector = extract32(insn, 26, 1); 2897 bool is_load = extract32(insn, 22, 1); 2898 int opc = extract32(insn, 30, 2); 2899 2900 bool is_signed = false; 2901 bool postindex = false; 2902 bool wback = false; 2903 bool set_tag = false; 2904 2905 TCGv_i64 clean_addr, dirty_addr; 2906 2907 int size; 2908 2909 if (opc == 3) { 2910 unallocated_encoding(s); 2911 return; 2912 } 2913 2914 if (is_vector) { 2915 size = 2 + opc; 2916 } else if (opc == 1 && !is_load) { 2917 /* STGP */ 2918 if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { 2919 unallocated_encoding(s); 2920 return; 2921 } 2922 size = 3; 2923 set_tag = true; 2924 } else { 2925 size = 2 + extract32(opc, 1, 1); 2926 is_signed = extract32(opc, 0, 1); 2927 if (!is_load && is_signed) { 2928 unallocated_encoding(s); 2929 return; 2930 } 2931 } 2932 2933 switch (index) { 2934 case 1: /* post-index */ 2935 postindex = true; 2936 wback = true; 2937 break; 2938 case 0: 2939 /* signed offset with "non-temporal" hint. Since we don't emulate 2940 * caches we don't care about hints to the cache system about 2941 * data access patterns, and handle this identically to plain 2942 * signed offset. 2943 */ 2944 if (is_signed) { 2945 /* There is no non-temporal-hint version of LDPSW */ 2946 unallocated_encoding(s); 2947 return; 2948 } 2949 postindex = false; 2950 break; 2951 case 2: /* signed offset, rn not updated */ 2952 postindex = false; 2953 break; 2954 case 3: /* pre-index */ 2955 postindex = false; 2956 wback = true; 2957 break; 2958 } 2959 2960 if (is_vector && !fp_access_check(s)) { 2961 return; 2962 } 2963 2964 offset <<= (set_tag ? LOG2_TAG_GRANULE : size); 2965 2966 if (rn == 31) { 2967 gen_check_sp_alignment(s); 2968 } 2969 2970 dirty_addr = read_cpu_reg_sp(s, rn, 1); 2971 if (!postindex) { 2972 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 2973 } 2974 2975 if (set_tag) { 2976 if (!s->ata) { 2977 /* 2978 * TODO: We could rely on the stores below, at least for 2979 * system mode, if we arrange to add MO_ALIGN_16. 2980 */ 2981 gen_helper_stg_stub(cpu_env, dirty_addr); 2982 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 2983 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); 2984 } else { 2985 gen_helper_stg(cpu_env, dirty_addr, dirty_addr); 2986 } 2987 } 2988 2989 clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, 2990 (wback || rn != 31) && !set_tag, 2 << size); 2991 2992 if (is_vector) { 2993 if (is_load) { 2994 do_fp_ld(s, rt, clean_addr, size); 2995 } else { 2996 do_fp_st(s, rt, clean_addr, size); 2997 } 2998 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 2999 if (is_load) { 3000 do_fp_ld(s, rt2, clean_addr, size); 3001 } else { 3002 do_fp_st(s, rt2, clean_addr, size); 3003 } 3004 } else { 3005 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3006 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); 3007 3008 if (is_load) { 3009 TCGv_i64 tmp = tcg_temp_new_i64(); 3010 3011 /* Do not modify tcg_rt before recognizing any exception 3012 * from the second load. 3013 */ 3014 do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, 3015 false, false, 0, false, false); 3016 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3017 do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, 3018 false, false, 0, false, false); 3019 3020 tcg_gen_mov_i64(tcg_rt, tmp); 3021 } else { 3022 do_gpr_st(s, tcg_rt, clean_addr, size, 3023 false, 0, false, false); 3024 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3025 do_gpr_st(s, tcg_rt2, clean_addr, size, 3026 false, 0, false, false); 3027 } 3028 } 3029 3030 if (wback) { 3031 if (postindex) { 3032 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3033 } 3034 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3035 } 3036 } 3037 3038 /* 3039 * Load/store (immediate post-indexed) 3040 * Load/store (immediate pre-indexed) 3041 * Load/store (unscaled immediate) 3042 * 3043 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 3044 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3045 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | 3046 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3047 * 3048 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) 3049 10 -> unprivileged 3050 * V = 0 -> non-vector 3051 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit 3052 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3053 */ 3054 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, 3055 int opc, 3056 int size, 3057 int rt, 3058 bool is_vector) 3059 { 3060 int rn = extract32(insn, 5, 5); 3061 int imm9 = sextract32(insn, 12, 9); 3062 int idx = extract32(insn, 10, 2); 3063 bool is_signed = false; 3064 bool is_store = false; 3065 bool is_extended = false; 3066 bool is_unpriv = (idx == 2); 3067 bool iss_valid; 3068 bool post_index; 3069 bool writeback; 3070 int memidx; 3071 3072 TCGv_i64 clean_addr, dirty_addr; 3073 3074 if (is_vector) { 3075 size |= (opc & 2) << 1; 3076 if (size > 4 || is_unpriv) { 3077 unallocated_encoding(s); 3078 return; 3079 } 3080 is_store = ((opc & 1) == 0); 3081 if (!fp_access_check(s)) { 3082 return; 3083 } 3084 } else { 3085 if (size == 3 && opc == 2) { 3086 /* PRFM - prefetch */ 3087 if (idx != 0) { 3088 unallocated_encoding(s); 3089 return; 3090 } 3091 return; 3092 } 3093 if (opc == 3 && size > 1) { 3094 unallocated_encoding(s); 3095 return; 3096 } 3097 is_store = (opc == 0); 3098 is_signed = extract32(opc, 1, 1); 3099 is_extended = (size < 3) && extract32(opc, 0, 1); 3100 } 3101 3102 switch (idx) { 3103 case 0: 3104 case 2: 3105 post_index = false; 3106 writeback = false; 3107 break; 3108 case 1: 3109 post_index = true; 3110 writeback = true; 3111 break; 3112 case 3: 3113 post_index = false; 3114 writeback = true; 3115 break; 3116 default: 3117 g_assert_not_reached(); 3118 } 3119 3120 iss_valid = !is_vector && !writeback; 3121 3122 if (rn == 31) { 3123 gen_check_sp_alignment(s); 3124 } 3125 3126 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3127 if (!post_index) { 3128 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3129 } 3130 3131 memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3132 clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, 3133 writeback || rn != 31, 3134 size, is_unpriv, memidx); 3135 3136 if (is_vector) { 3137 if (is_store) { 3138 do_fp_st(s, rt, clean_addr, size); 3139 } else { 3140 do_fp_ld(s, rt, clean_addr, size); 3141 } 3142 } else { 3143 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3144 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3145 3146 if (is_store) { 3147 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, 3148 iss_valid, rt, iss_sf, false); 3149 } else { 3150 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3151 is_extended, memidx, 3152 iss_valid, rt, iss_sf, false); 3153 } 3154 } 3155 3156 if (writeback) { 3157 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 3158 if (post_index) { 3159 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3160 } 3161 tcg_gen_mov_i64(tcg_rn, dirty_addr); 3162 } 3163 } 3164 3165 /* 3166 * Load/store (register offset) 3167 * 3168 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3169 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3170 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | 3171 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3172 * 3173 * For non-vector: 3174 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3175 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3176 * For vector: 3177 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3178 * opc<0>: 0 -> store, 1 -> load 3179 * V: 1 -> vector/simd 3180 * opt: extend encoding (see DecodeRegExtend) 3181 * S: if S=1 then scale (essentially index by sizeof(size)) 3182 * Rt: register to transfer into/out of 3183 * Rn: address register or SP for base 3184 * Rm: offset register or ZR for offset 3185 */ 3186 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, 3187 int opc, 3188 int size, 3189 int rt, 3190 bool is_vector) 3191 { 3192 int rn = extract32(insn, 5, 5); 3193 int shift = extract32(insn, 12, 1); 3194 int rm = extract32(insn, 16, 5); 3195 int opt = extract32(insn, 13, 3); 3196 bool is_signed = false; 3197 bool is_store = false; 3198 bool is_extended = false; 3199 3200 TCGv_i64 tcg_rm, clean_addr, dirty_addr; 3201 3202 if (extract32(opt, 1, 1) == 0) { 3203 unallocated_encoding(s); 3204 return; 3205 } 3206 3207 if (is_vector) { 3208 size |= (opc & 2) << 1; 3209 if (size > 4) { 3210 unallocated_encoding(s); 3211 return; 3212 } 3213 is_store = !extract32(opc, 0, 1); 3214 if (!fp_access_check(s)) { 3215 return; 3216 } 3217 } else { 3218 if (size == 3 && opc == 2) { 3219 /* PRFM - prefetch */ 3220 return; 3221 } 3222 if (opc == 3 && size > 1) { 3223 unallocated_encoding(s); 3224 return; 3225 } 3226 is_store = (opc == 0); 3227 is_signed = extract32(opc, 1, 1); 3228 is_extended = (size < 3) && extract32(opc, 0, 1); 3229 } 3230 3231 if (rn == 31) { 3232 gen_check_sp_alignment(s); 3233 } 3234 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3235 3236 tcg_rm = read_cpu_reg(s, rm, 1); 3237 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); 3238 3239 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); 3240 clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); 3241 3242 if (is_vector) { 3243 if (is_store) { 3244 do_fp_st(s, rt, clean_addr, size); 3245 } else { 3246 do_fp_ld(s, rt, clean_addr, size); 3247 } 3248 } else { 3249 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3250 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3251 if (is_store) { 3252 do_gpr_st(s, tcg_rt, clean_addr, size, 3253 true, rt, iss_sf, false); 3254 } else { 3255 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3256 is_extended, true, rt, iss_sf, false); 3257 } 3258 } 3259 } 3260 3261 /* 3262 * Load/store (unsigned immediate) 3263 * 3264 * 31 30 29 27 26 25 24 23 22 21 10 9 5 3265 * +----+-------+---+-----+-----+------------+-------+------+ 3266 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | 3267 * +----+-------+---+-----+-----+------------+-------+------+ 3268 * 3269 * For non-vector: 3270 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3271 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3272 * For vector: 3273 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3274 * opc<0>: 0 -> store, 1 -> load 3275 * Rn: base address register (inc SP) 3276 * Rt: target register 3277 */ 3278 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, 3279 int opc, 3280 int size, 3281 int rt, 3282 bool is_vector) 3283 { 3284 int rn = extract32(insn, 5, 5); 3285 unsigned int imm12 = extract32(insn, 10, 12); 3286 unsigned int offset; 3287 3288 TCGv_i64 clean_addr, dirty_addr; 3289 3290 bool is_store; 3291 bool is_signed = false; 3292 bool is_extended = false; 3293 3294 if (is_vector) { 3295 size |= (opc & 2) << 1; 3296 if (size > 4) { 3297 unallocated_encoding(s); 3298 return; 3299 } 3300 is_store = !extract32(opc, 0, 1); 3301 if (!fp_access_check(s)) { 3302 return; 3303 } 3304 } else { 3305 if (size == 3 && opc == 2) { 3306 /* PRFM - prefetch */ 3307 return; 3308 } 3309 if (opc == 3 && size > 1) { 3310 unallocated_encoding(s); 3311 return; 3312 } 3313 is_store = (opc == 0); 3314 is_signed = extract32(opc, 1, 1); 3315 is_extended = (size < 3) && extract32(opc, 0, 1); 3316 } 3317 3318 if (rn == 31) { 3319 gen_check_sp_alignment(s); 3320 } 3321 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3322 offset = imm12 << size; 3323 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3324 clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); 3325 3326 if (is_vector) { 3327 if (is_store) { 3328 do_fp_st(s, rt, clean_addr, size); 3329 } else { 3330 do_fp_ld(s, rt, clean_addr, size); 3331 } 3332 } else { 3333 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3334 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3335 if (is_store) { 3336 do_gpr_st(s, tcg_rt, clean_addr, size, 3337 true, rt, iss_sf, false); 3338 } else { 3339 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3340 is_extended, true, rt, iss_sf, false); 3341 } 3342 } 3343 } 3344 3345 /* Atomic memory operations 3346 * 3347 * 31 30 27 26 24 22 21 16 15 12 10 5 0 3348 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ 3349 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | 3350 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ 3351 * 3352 * Rt: the result register 3353 * Rn: base address or SP 3354 * Rs: the source register for the operation 3355 * V: vector flag (always 0 as of v8.3) 3356 * A: acquire flag 3357 * R: release flag 3358 */ 3359 static void disas_ldst_atomic(DisasContext *s, uint32_t insn, 3360 int size, int rt, bool is_vector) 3361 { 3362 int rs = extract32(insn, 16, 5); 3363 int rn = extract32(insn, 5, 5); 3364 int o3_opc = extract32(insn, 12, 4); 3365 bool r = extract32(insn, 22, 1); 3366 bool a = extract32(insn, 23, 1); 3367 TCGv_i64 tcg_rs, tcg_rt, clean_addr; 3368 AtomicThreeOpFn *fn = NULL; 3369 MemOp mop = s->be_data | size | MO_ALIGN; 3370 3371 if (is_vector || !dc_isar_feature(aa64_atomics, s)) { 3372 unallocated_encoding(s); 3373 return; 3374 } 3375 switch (o3_opc) { 3376 case 000: /* LDADD */ 3377 fn = tcg_gen_atomic_fetch_add_i64; 3378 break; 3379 case 001: /* LDCLR */ 3380 fn = tcg_gen_atomic_fetch_and_i64; 3381 break; 3382 case 002: /* LDEOR */ 3383 fn = tcg_gen_atomic_fetch_xor_i64; 3384 break; 3385 case 003: /* LDSET */ 3386 fn = tcg_gen_atomic_fetch_or_i64; 3387 break; 3388 case 004: /* LDSMAX */ 3389 fn = tcg_gen_atomic_fetch_smax_i64; 3390 mop |= MO_SIGN; 3391 break; 3392 case 005: /* LDSMIN */ 3393 fn = tcg_gen_atomic_fetch_smin_i64; 3394 mop |= MO_SIGN; 3395 break; 3396 case 006: /* LDUMAX */ 3397 fn = tcg_gen_atomic_fetch_umax_i64; 3398 break; 3399 case 007: /* LDUMIN */ 3400 fn = tcg_gen_atomic_fetch_umin_i64; 3401 break; 3402 case 010: /* SWP */ 3403 fn = tcg_gen_atomic_xchg_i64; 3404 break; 3405 case 014: /* LDAPR, LDAPRH, LDAPRB */ 3406 if (!dc_isar_feature(aa64_rcpc_8_3, s) || 3407 rs != 31 || a != 1 || r != 0) { 3408 unallocated_encoding(s); 3409 return; 3410 } 3411 break; 3412 default: 3413 unallocated_encoding(s); 3414 return; 3415 } 3416 3417 if (rn == 31) { 3418 gen_check_sp_alignment(s); 3419 } 3420 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); 3421 3422 if (o3_opc == 014) { 3423 /* 3424 * LDAPR* are a special case because they are a simple load, not a 3425 * fetch-and-do-something op. 3426 * The architectural consistency requirements here are weaker than 3427 * full load-acquire (we only need "load-acquire processor consistent"), 3428 * but we choose to implement them as full LDAQ. 3429 */ 3430 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, 3431 true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); 3432 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3433 return; 3434 } 3435 3436 tcg_rs = read_cpu_reg(s, rs, true); 3437 tcg_rt = cpu_reg(s, rt); 3438 3439 if (o3_opc == 1) { /* LDCLR */ 3440 tcg_gen_not_i64(tcg_rs, tcg_rs); 3441 } 3442 3443 /* The tcg atomic primitives are all full barriers. Therefore we 3444 * can ignore the Acquire and Release bits of this instruction. 3445 */ 3446 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3447 3448 if ((mop & MO_SIGN) && size != MO_64) { 3449 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3450 } 3451 } 3452 3453 /* 3454 * PAC memory operations 3455 * 3456 * 31 30 27 26 24 22 21 12 11 10 5 0 3457 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3458 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | 3459 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3460 * 3461 * Rt: the result register 3462 * Rn: base address or SP 3463 * V: vector flag (always 0 as of v8.3) 3464 * M: clear for key DA, set for key DB 3465 * W: pre-indexing flag 3466 * S: sign for imm9. 3467 */ 3468 static void disas_ldst_pac(DisasContext *s, uint32_t insn, 3469 int size, int rt, bool is_vector) 3470 { 3471 int rn = extract32(insn, 5, 5); 3472 bool is_wback = extract32(insn, 11, 1); 3473 bool use_key_a = !extract32(insn, 23, 1); 3474 int offset; 3475 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3476 3477 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { 3478 unallocated_encoding(s); 3479 return; 3480 } 3481 3482 if (rn == 31) { 3483 gen_check_sp_alignment(s); 3484 } 3485 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3486 3487 if (s->pauth_active) { 3488 if (use_key_a) { 3489 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, 3490 tcg_constant_i64(0)); 3491 } else { 3492 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, 3493 tcg_constant_i64(0)); 3494 } 3495 } 3496 3497 /* Form the 10-bit signed, scaled offset. */ 3498 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); 3499 offset = sextract32(offset << size, 0, 10 + size); 3500 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3501 3502 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3503 clean_addr = gen_mte_check1(s, dirty_addr, false, 3504 is_wback || rn != 31, size); 3505 3506 tcg_rt = cpu_reg(s, rt); 3507 do_gpr_ld(s, tcg_rt, clean_addr, size, 3508 /* extend */ false, /* iss_valid */ !is_wback, 3509 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); 3510 3511 if (is_wback) { 3512 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3513 } 3514 } 3515 3516 /* 3517 * LDAPR/STLR (unscaled immediate) 3518 * 3519 * 31 30 24 22 21 12 10 5 0 3520 * +------+-------------+-----+---+--------+-----+----+-----+ 3521 * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | 3522 * +------+-------------+-----+---+--------+-----+----+-----+ 3523 * 3524 * Rt: source or destination register 3525 * Rn: base register 3526 * imm9: unscaled immediate offset 3527 * opc: 00: STLUR*, 01/10/11: various LDAPUR* 3528 * size: size of load/store 3529 */ 3530 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) 3531 { 3532 int rt = extract32(insn, 0, 5); 3533 int rn = extract32(insn, 5, 5); 3534 int offset = sextract32(insn, 12, 9); 3535 int opc = extract32(insn, 22, 2); 3536 int size = extract32(insn, 30, 2); 3537 TCGv_i64 clean_addr, dirty_addr; 3538 bool is_store = false; 3539 bool extend = false; 3540 bool iss_sf; 3541 MemOp mop; 3542 3543 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3544 unallocated_encoding(s); 3545 return; 3546 } 3547 3548 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3549 mop = size | MO_ALIGN; 3550 3551 switch (opc) { 3552 case 0: /* STLURB */ 3553 is_store = true; 3554 break; 3555 case 1: /* LDAPUR* */ 3556 break; 3557 case 2: /* LDAPURS* 64-bit variant */ 3558 if (size == 3) { 3559 unallocated_encoding(s); 3560 return; 3561 } 3562 mop |= MO_SIGN; 3563 break; 3564 case 3: /* LDAPURS* 32-bit variant */ 3565 if (size > 1) { 3566 unallocated_encoding(s); 3567 return; 3568 } 3569 mop |= MO_SIGN; 3570 extend = true; /* zero-extend 32->64 after signed load */ 3571 break; 3572 default: 3573 g_assert_not_reached(); 3574 } 3575 3576 iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); 3577 3578 if (rn == 31) { 3579 gen_check_sp_alignment(s); 3580 } 3581 3582 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3583 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3584 clean_addr = clean_data_tbi(s, dirty_addr); 3585 3586 if (is_store) { 3587 /* Store-Release semantics */ 3588 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3589 do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); 3590 } else { 3591 /* 3592 * Load-AcquirePC semantics; we implement as the slightly more 3593 * restrictive Load-Acquire. 3594 */ 3595 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, 3596 extend, true, rt, iss_sf, true); 3597 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3598 } 3599 } 3600 3601 /* Load/store register (all forms) */ 3602 static void disas_ldst_reg(DisasContext *s, uint32_t insn) 3603 { 3604 int rt = extract32(insn, 0, 5); 3605 int opc = extract32(insn, 22, 2); 3606 bool is_vector = extract32(insn, 26, 1); 3607 int size = extract32(insn, 30, 2); 3608 3609 switch (extract32(insn, 24, 2)) { 3610 case 0: 3611 if (extract32(insn, 21, 1) == 0) { 3612 /* Load/store register (unscaled immediate) 3613 * Load/store immediate pre/post-indexed 3614 * Load/store register unprivileged 3615 */ 3616 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); 3617 return; 3618 } 3619 switch (extract32(insn, 10, 2)) { 3620 case 0: 3621 disas_ldst_atomic(s, insn, size, rt, is_vector); 3622 return; 3623 case 2: 3624 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); 3625 return; 3626 default: 3627 disas_ldst_pac(s, insn, size, rt, is_vector); 3628 return; 3629 } 3630 break; 3631 case 1: 3632 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); 3633 return; 3634 } 3635 unallocated_encoding(s); 3636 } 3637 3638 /* AdvSIMD load/store multiple structures 3639 * 3640 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 3641 * +---+---+---------------+---+-------------+--------+------+------+------+ 3642 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | 3643 * +---+---+---------------+---+-------------+--------+------+------+------+ 3644 * 3645 * AdvSIMD load/store multiple structures (post-indexed) 3646 * 3647 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 3648 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3649 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | 3650 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3651 * 3652 * Rt: first (or only) SIMD&FP register to be transferred 3653 * Rn: base address or SP 3654 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3655 */ 3656 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) 3657 { 3658 int rt = extract32(insn, 0, 5); 3659 int rn = extract32(insn, 5, 5); 3660 int rm = extract32(insn, 16, 5); 3661 int size = extract32(insn, 10, 2); 3662 int opcode = extract32(insn, 12, 4); 3663 bool is_store = !extract32(insn, 22, 1); 3664 bool is_postidx = extract32(insn, 23, 1); 3665 bool is_q = extract32(insn, 30, 1); 3666 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3667 MemOp endian, align, mop; 3668 3669 int total; /* total bytes */ 3670 int elements; /* elements per vector */ 3671 int rpt; /* num iterations */ 3672 int selem; /* structure elements */ 3673 int r; 3674 3675 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { 3676 unallocated_encoding(s); 3677 return; 3678 } 3679 3680 if (!is_postidx && rm != 0) { 3681 unallocated_encoding(s); 3682 return; 3683 } 3684 3685 /* From the shared decode logic */ 3686 switch (opcode) { 3687 case 0x0: 3688 rpt = 1; 3689 selem = 4; 3690 break; 3691 case 0x2: 3692 rpt = 4; 3693 selem = 1; 3694 break; 3695 case 0x4: 3696 rpt = 1; 3697 selem = 3; 3698 break; 3699 case 0x6: 3700 rpt = 3; 3701 selem = 1; 3702 break; 3703 case 0x7: 3704 rpt = 1; 3705 selem = 1; 3706 break; 3707 case 0x8: 3708 rpt = 1; 3709 selem = 2; 3710 break; 3711 case 0xa: 3712 rpt = 2; 3713 selem = 1; 3714 break; 3715 default: 3716 unallocated_encoding(s); 3717 return; 3718 } 3719 3720 if (size == 3 && !is_q && selem != 1) { 3721 /* reserved */ 3722 unallocated_encoding(s); 3723 return; 3724 } 3725 3726 if (!fp_access_check(s)) { 3727 return; 3728 } 3729 3730 if (rn == 31) { 3731 gen_check_sp_alignment(s); 3732 } 3733 3734 /* For our purposes, bytes are always little-endian. */ 3735 endian = s->be_data; 3736 if (size == 0) { 3737 endian = MO_LE; 3738 } 3739 3740 total = rpt * selem * (is_q ? 16 : 8); 3741 tcg_rn = cpu_reg_sp(s, rn); 3742 3743 /* 3744 * Issue the MTE check vs the logical repeat count, before we 3745 * promote consecutive little-endian elements below. 3746 */ 3747 clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, 3748 total); 3749 3750 /* 3751 * Consecutive little-endian elements from a single register 3752 * can be promoted to a larger little-endian operation. 3753 */ 3754 align = MO_ALIGN; 3755 if (selem == 1 && endian == MO_LE) { 3756 align = pow2_align(size); 3757 size = 3; 3758 } 3759 if (!s->align_mem) { 3760 align = 0; 3761 } 3762 mop = endian | size | align; 3763 3764 elements = (is_q ? 16 : 8) >> size; 3765 tcg_ebytes = tcg_constant_i64(1 << size); 3766 for (r = 0; r < rpt; r++) { 3767 int e; 3768 for (e = 0; e < elements; e++) { 3769 int xs; 3770 for (xs = 0; xs < selem; xs++) { 3771 int tt = (rt + r + xs) % 32; 3772 if (is_store) { 3773 do_vec_st(s, tt, e, clean_addr, mop); 3774 } else { 3775 do_vec_ld(s, tt, e, clean_addr, mop); 3776 } 3777 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3778 } 3779 } 3780 } 3781 3782 if (!is_store) { 3783 /* For non-quad operations, setting a slice of the low 3784 * 64 bits of the register clears the high 64 bits (in 3785 * the ARM ARM pseudocode this is implicit in the fact 3786 * that 'rval' is a 64 bit wide variable). 3787 * For quad operations, we might still need to zero the 3788 * high bits of SVE. 3789 */ 3790 for (r = 0; r < rpt * selem; r++) { 3791 int tt = (rt + r) % 32; 3792 clear_vec_high(s, is_q, tt); 3793 } 3794 } 3795 3796 if (is_postidx) { 3797 if (rm == 31) { 3798 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3799 } else { 3800 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 3801 } 3802 } 3803 } 3804 3805 /* AdvSIMD load/store single structure 3806 * 3807 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3808 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3809 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | 3810 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3811 * 3812 * AdvSIMD load/store single structure (post-indexed) 3813 * 3814 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3815 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3816 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | 3817 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3818 * 3819 * Rt: first (or only) SIMD&FP register to be transferred 3820 * Rn: base address or SP 3821 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3822 * index = encoded in Q:S:size dependent on size 3823 * 3824 * lane_size = encoded in R, opc 3825 * transfer width = encoded in opc, S, size 3826 */ 3827 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) 3828 { 3829 int rt = extract32(insn, 0, 5); 3830 int rn = extract32(insn, 5, 5); 3831 int rm = extract32(insn, 16, 5); 3832 int size = extract32(insn, 10, 2); 3833 int S = extract32(insn, 12, 1); 3834 int opc = extract32(insn, 13, 3); 3835 int R = extract32(insn, 21, 1); 3836 int is_load = extract32(insn, 22, 1); 3837 int is_postidx = extract32(insn, 23, 1); 3838 int is_q = extract32(insn, 30, 1); 3839 3840 int scale = extract32(opc, 1, 2); 3841 int selem = (extract32(opc, 0, 1) << 1 | R) + 1; 3842 bool replicate = false; 3843 int index = is_q << 3 | S << 2 | size; 3844 int xs, total; 3845 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3846 MemOp mop; 3847 3848 if (extract32(insn, 31, 1)) { 3849 unallocated_encoding(s); 3850 return; 3851 } 3852 if (!is_postidx && rm != 0) { 3853 unallocated_encoding(s); 3854 return; 3855 } 3856 3857 switch (scale) { 3858 case 3: 3859 if (!is_load || S) { 3860 unallocated_encoding(s); 3861 return; 3862 } 3863 scale = size; 3864 replicate = true; 3865 break; 3866 case 0: 3867 break; 3868 case 1: 3869 if (extract32(size, 0, 1)) { 3870 unallocated_encoding(s); 3871 return; 3872 } 3873 index >>= 1; 3874 break; 3875 case 2: 3876 if (extract32(size, 1, 1)) { 3877 unallocated_encoding(s); 3878 return; 3879 } 3880 if (!extract32(size, 0, 1)) { 3881 index >>= 2; 3882 } else { 3883 if (S) { 3884 unallocated_encoding(s); 3885 return; 3886 } 3887 index >>= 3; 3888 scale = 3; 3889 } 3890 break; 3891 default: 3892 g_assert_not_reached(); 3893 } 3894 3895 if (!fp_access_check(s)) { 3896 return; 3897 } 3898 3899 if (rn == 31) { 3900 gen_check_sp_alignment(s); 3901 } 3902 3903 total = selem << scale; 3904 tcg_rn = cpu_reg_sp(s, rn); 3905 3906 clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, 3907 total); 3908 mop = finalize_memop(s, scale); 3909 3910 tcg_ebytes = tcg_constant_i64(1 << scale); 3911 for (xs = 0; xs < selem; xs++) { 3912 if (replicate) { 3913 /* Load and replicate to all elements */ 3914 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3915 3916 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3917 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), 3918 (is_q + 1) * 8, vec_full_reg_size(s), 3919 tcg_tmp); 3920 } else { 3921 /* Load/store one element per register */ 3922 if (is_load) { 3923 do_vec_ld(s, rt, index, clean_addr, mop); 3924 } else { 3925 do_vec_st(s, rt, index, clean_addr, mop); 3926 } 3927 } 3928 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3929 rt = (rt + 1) % 32; 3930 } 3931 3932 if (is_postidx) { 3933 if (rm == 31) { 3934 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3935 } else { 3936 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 3937 } 3938 } 3939 } 3940 3941 /* 3942 * Load/Store memory tags 3943 * 3944 * 31 30 29 24 22 21 12 10 5 0 3945 * +-----+-------------+-----+---+------+-----+------+------+ 3946 * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | 3947 * +-----+-------------+-----+---+------+-----+------+------+ 3948 */ 3949 static void disas_ldst_tag(DisasContext *s, uint32_t insn) 3950 { 3951 int rt = extract32(insn, 0, 5); 3952 int rn = extract32(insn, 5, 5); 3953 uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; 3954 int op2 = extract32(insn, 10, 2); 3955 int op1 = extract32(insn, 22, 2); 3956 bool is_load = false, is_pair = false, is_zero = false, is_mult = false; 3957 int index = 0; 3958 TCGv_i64 addr, clean_addr, tcg_rt; 3959 3960 /* We checked insn bits [29:24,21] in the caller. */ 3961 if (extract32(insn, 30, 2) != 3) { 3962 goto do_unallocated; 3963 } 3964 3965 /* 3966 * @index is a tri-state variable which has 3 states: 3967 * < 0 : post-index, writeback 3968 * = 0 : signed offset 3969 * > 0 : pre-index, writeback 3970 */ 3971 switch (op1) { 3972 case 0: 3973 if (op2 != 0) { 3974 /* STG */ 3975 index = op2 - 2; 3976 } else { 3977 /* STZGM */ 3978 if (s->current_el == 0 || offset != 0) { 3979 goto do_unallocated; 3980 } 3981 is_mult = is_zero = true; 3982 } 3983 break; 3984 case 1: 3985 if (op2 != 0) { 3986 /* STZG */ 3987 is_zero = true; 3988 index = op2 - 2; 3989 } else { 3990 /* LDG */ 3991 is_load = true; 3992 } 3993 break; 3994 case 2: 3995 if (op2 != 0) { 3996 /* ST2G */ 3997 is_pair = true; 3998 index = op2 - 2; 3999 } else { 4000 /* STGM */ 4001 if (s->current_el == 0 || offset != 0) { 4002 goto do_unallocated; 4003 } 4004 is_mult = true; 4005 } 4006 break; 4007 case 3: 4008 if (op2 != 0) { 4009 /* STZ2G */ 4010 is_pair = is_zero = true; 4011 index = op2 - 2; 4012 } else { 4013 /* LDGM */ 4014 if (s->current_el == 0 || offset != 0) { 4015 goto do_unallocated; 4016 } 4017 is_mult = is_load = true; 4018 } 4019 break; 4020 4021 default: 4022 do_unallocated: 4023 unallocated_encoding(s); 4024 return; 4025 } 4026 4027 if (is_mult 4028 ? !dc_isar_feature(aa64_mte, s) 4029 : !dc_isar_feature(aa64_mte_insn_reg, s)) { 4030 goto do_unallocated; 4031 } 4032 4033 if (rn == 31) { 4034 gen_check_sp_alignment(s); 4035 } 4036 4037 addr = read_cpu_reg_sp(s, rn, true); 4038 if (index >= 0) { 4039 /* pre-index or signed offset */ 4040 tcg_gen_addi_i64(addr, addr, offset); 4041 } 4042 4043 if (is_mult) { 4044 tcg_rt = cpu_reg(s, rt); 4045 4046 if (is_zero) { 4047 int size = 4 << s->dcz_blocksize; 4048 4049 if (s->ata) { 4050 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); 4051 } 4052 /* 4053 * The non-tags portion of STZGM is mostly like DC_ZVA, 4054 * except the alignment happens before the access. 4055 */ 4056 clean_addr = clean_data_tbi(s, addr); 4057 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4058 gen_helper_dc_zva(cpu_env, clean_addr); 4059 } else if (s->ata) { 4060 if (is_load) { 4061 gen_helper_ldgm(tcg_rt, cpu_env, addr); 4062 } else { 4063 gen_helper_stgm(cpu_env, addr, tcg_rt); 4064 } 4065 } else { 4066 MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 4067 int size = 4 << GMID_EL1_BS; 4068 4069 clean_addr = clean_data_tbi(s, addr); 4070 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4071 gen_probe_access(s, clean_addr, acc, size); 4072 4073 if (is_load) { 4074 /* The result tags are zeros. */ 4075 tcg_gen_movi_i64(tcg_rt, 0); 4076 } 4077 } 4078 return; 4079 } 4080 4081 if (is_load) { 4082 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4083 tcg_rt = cpu_reg(s, rt); 4084 if (s->ata) { 4085 gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); 4086 } else { 4087 clean_addr = clean_data_tbi(s, addr); 4088 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4089 gen_address_with_allocation_tag0(tcg_rt, addr); 4090 } 4091 } else { 4092 tcg_rt = cpu_reg_sp(s, rt); 4093 if (!s->ata) { 4094 /* 4095 * For STG and ST2G, we need to check alignment and probe memory. 4096 * TODO: For STZG and STZ2G, we could rely on the stores below, 4097 * at least for system mode; user-only won't enforce alignment. 4098 */ 4099 if (is_pair) { 4100 gen_helper_st2g_stub(cpu_env, addr); 4101 } else { 4102 gen_helper_stg_stub(cpu_env, addr); 4103 } 4104 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4105 if (is_pair) { 4106 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); 4107 } else { 4108 gen_helper_stg_parallel(cpu_env, addr, tcg_rt); 4109 } 4110 } else { 4111 if (is_pair) { 4112 gen_helper_st2g(cpu_env, addr, tcg_rt); 4113 } else { 4114 gen_helper_stg(cpu_env, addr, tcg_rt); 4115 } 4116 } 4117 } 4118 4119 if (is_zero) { 4120 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4121 TCGv_i64 tcg_zero = tcg_constant_i64(0); 4122 int mem_index = get_mem_index(s); 4123 int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; 4124 4125 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, 4126 MO_UQ | MO_ALIGN_16); 4127 for (i = 8; i < n; i += 8) { 4128 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4129 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ); 4130 } 4131 } 4132 4133 if (index != 0) { 4134 /* pre-index or post-index */ 4135 if (index < 0) { 4136 /* post-index */ 4137 tcg_gen_addi_i64(addr, addr, offset); 4138 } 4139 tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); 4140 } 4141 } 4142 4143 /* Loads and stores */ 4144 static void disas_ldst(DisasContext *s, uint32_t insn) 4145 { 4146 switch (extract32(insn, 24, 6)) { 4147 case 0x08: /* Load/store exclusive */ 4148 disas_ldst_excl(s, insn); 4149 break; 4150 case 0x18: case 0x1c: /* Load register (literal) */ 4151 disas_ld_lit(s, insn); 4152 break; 4153 case 0x28: case 0x29: 4154 case 0x2c: case 0x2d: /* Load/store pair (all forms) */ 4155 disas_ldst_pair(s, insn); 4156 break; 4157 case 0x38: case 0x39: 4158 case 0x3c: case 0x3d: /* Load/store register (all forms) */ 4159 disas_ldst_reg(s, insn); 4160 break; 4161 case 0x0c: /* AdvSIMD load/store multiple structures */ 4162 disas_ldst_multiple_struct(s, insn); 4163 break; 4164 case 0x0d: /* AdvSIMD load/store single structure */ 4165 disas_ldst_single_struct(s, insn); 4166 break; 4167 case 0x19: 4168 if (extract32(insn, 21, 1) != 0) { 4169 disas_ldst_tag(s, insn); 4170 } else if (extract32(insn, 10, 2) == 0) { 4171 disas_ldst_ldapr_stlr(s, insn); 4172 } else { 4173 unallocated_encoding(s); 4174 } 4175 break; 4176 default: 4177 unallocated_encoding(s); 4178 break; 4179 } 4180 } 4181 4182 /* PC-rel. addressing 4183 * 31 30 29 28 24 23 5 4 0 4184 * +----+-------+-----------+-------------------+------+ 4185 * | op | immlo | 1 0 0 0 0 | immhi | Rd | 4186 * +----+-------+-----------+-------------------+------+ 4187 */ 4188 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) 4189 { 4190 unsigned int page, rd; 4191 int64_t offset; 4192 4193 page = extract32(insn, 31, 1); 4194 /* SignExtend(immhi:immlo) -> offset */ 4195 offset = sextract64(insn, 5, 19); 4196 offset = offset << 2 | extract32(insn, 29, 2); 4197 rd = extract32(insn, 0, 5); 4198 4199 if (page) { 4200 /* ADRP (page based) */ 4201 offset <<= 12; 4202 /* The page offset is ok for CF_PCREL. */ 4203 offset -= s->pc_curr & 0xfff; 4204 } 4205 4206 gen_pc_plus_diff(s, cpu_reg(s, rd), offset); 4207 } 4208 4209 /* 4210 * Add/subtract (immediate) 4211 * 4212 * 31 30 29 28 23 22 21 10 9 5 4 0 4213 * +--+--+--+-------------+--+-------------+-----+-----+ 4214 * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | 4215 * +--+--+--+-------------+--+-------------+-----+-----+ 4216 * 4217 * sf: 0 -> 32bit, 1 -> 64bit 4218 * op: 0 -> add , 1 -> sub 4219 * S: 1 -> set flags 4220 * sh: 1 -> LSL imm by 12 4221 */ 4222 static void disas_add_sub_imm(DisasContext *s, uint32_t insn) 4223 { 4224 int rd = extract32(insn, 0, 5); 4225 int rn = extract32(insn, 5, 5); 4226 uint64_t imm = extract32(insn, 10, 12); 4227 bool shift = extract32(insn, 22, 1); 4228 bool setflags = extract32(insn, 29, 1); 4229 bool sub_op = extract32(insn, 30, 1); 4230 bool is_64bit = extract32(insn, 31, 1); 4231 4232 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 4233 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); 4234 TCGv_i64 tcg_result; 4235 4236 if (shift) { 4237 imm <<= 12; 4238 } 4239 4240 tcg_result = tcg_temp_new_i64(); 4241 if (!setflags) { 4242 if (sub_op) { 4243 tcg_gen_subi_i64(tcg_result, tcg_rn, imm); 4244 } else { 4245 tcg_gen_addi_i64(tcg_result, tcg_rn, imm); 4246 } 4247 } else { 4248 TCGv_i64 tcg_imm = tcg_constant_i64(imm); 4249 if (sub_op) { 4250 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4251 } else { 4252 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4253 } 4254 } 4255 4256 if (is_64bit) { 4257 tcg_gen_mov_i64(tcg_rd, tcg_result); 4258 } else { 4259 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4260 } 4261 } 4262 4263 /* 4264 * Add/subtract (immediate, with tags) 4265 * 4266 * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 4267 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4268 * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | 4269 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4270 * 4271 * op: 0 -> add, 1 -> sub 4272 */ 4273 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) 4274 { 4275 int rd = extract32(insn, 0, 5); 4276 int rn = extract32(insn, 5, 5); 4277 int uimm4 = extract32(insn, 10, 4); 4278 int uimm6 = extract32(insn, 16, 6); 4279 bool sub_op = extract32(insn, 30, 1); 4280 TCGv_i64 tcg_rn, tcg_rd; 4281 int imm; 4282 4283 /* Test all of sf=1, S=0, o2=0, o3=0. */ 4284 if ((insn & 0xa040c000u) != 0x80000000u || 4285 !dc_isar_feature(aa64_mte_insn_reg, s)) { 4286 unallocated_encoding(s); 4287 return; 4288 } 4289 4290 imm = uimm6 << LOG2_TAG_GRANULE; 4291 if (sub_op) { 4292 imm = -imm; 4293 } 4294 4295 tcg_rn = cpu_reg_sp(s, rn); 4296 tcg_rd = cpu_reg_sp(s, rd); 4297 4298 if (s->ata) { 4299 gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, 4300 tcg_constant_i32(imm), 4301 tcg_constant_i32(uimm4)); 4302 } else { 4303 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4304 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4305 } 4306 } 4307 4308 /* The input should be a value in the bottom e bits (with higher 4309 * bits zero); returns that value replicated into every element 4310 * of size e in a 64 bit integer. 4311 */ 4312 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4313 { 4314 assert(e != 0); 4315 while (e < 64) { 4316 mask |= mask << e; 4317 e *= 2; 4318 } 4319 return mask; 4320 } 4321 4322 /* Return a value with the bottom len bits set (where 0 < len <= 64) */ 4323 static inline uint64_t bitmask64(unsigned int length) 4324 { 4325 assert(length > 0 && length <= 64); 4326 return ~0ULL >> (64 - length); 4327 } 4328 4329 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we 4330 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4331 * value (ie should cause a guest UNDEF exception), and true if they are 4332 * valid, in which case the decoded bit pattern is written to result. 4333 */ 4334 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4335 unsigned int imms, unsigned int immr) 4336 { 4337 uint64_t mask; 4338 unsigned e, levels, s, r; 4339 int len; 4340 4341 assert(immn < 2 && imms < 64 && immr < 64); 4342 4343 /* The bit patterns we create here are 64 bit patterns which 4344 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4345 * 64 bits each. Each element contains the same value: a run 4346 * of between 1 and e-1 non-zero bits, rotated within the 4347 * element by between 0 and e-1 bits. 4348 * 4349 * The element size and run length are encoded into immn (1 bit) 4350 * and imms (6 bits) as follows: 4351 * 64 bit elements: immn = 1, imms = <length of run - 1> 4352 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4353 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4354 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4355 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4356 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4357 * Notice that immn = 0, imms = 11111x is the only combination 4358 * not covered by one of the above options; this is reserved. 4359 * Further, <length of run - 1> all-ones is a reserved pattern. 4360 * 4361 * In all cases the rotation is by immr % e (and immr is 6 bits). 4362 */ 4363 4364 /* First determine the element size */ 4365 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4366 if (len < 1) { 4367 /* This is the immn == 0, imms == 0x11111x case */ 4368 return false; 4369 } 4370 e = 1 << len; 4371 4372 levels = e - 1; 4373 s = imms & levels; 4374 r = immr & levels; 4375 4376 if (s == levels) { 4377 /* <length of run - 1> mustn't be all-ones. */ 4378 return false; 4379 } 4380 4381 /* Create the value of one element: s+1 set bits rotated 4382 * by r within the element (which is e bits wide)... 4383 */ 4384 mask = bitmask64(s + 1); 4385 if (r) { 4386 mask = (mask >> r) | (mask << (e - r)); 4387 mask &= bitmask64(e); 4388 } 4389 /* ...then replicate the element over the whole 64 bit value */ 4390 mask = bitfield_replicate(mask, e); 4391 *result = mask; 4392 return true; 4393 } 4394 4395 /* Logical (immediate) 4396 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4397 * +----+-----+-------------+---+------+------+------+------+ 4398 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd | 4399 * +----+-----+-------------+---+------+------+------+------+ 4400 */ 4401 static void disas_logic_imm(DisasContext *s, uint32_t insn) 4402 { 4403 unsigned int sf, opc, is_n, immr, imms, rn, rd; 4404 TCGv_i64 tcg_rd, tcg_rn; 4405 uint64_t wmask; 4406 bool is_and = false; 4407 4408 sf = extract32(insn, 31, 1); 4409 opc = extract32(insn, 29, 2); 4410 is_n = extract32(insn, 22, 1); 4411 immr = extract32(insn, 16, 6); 4412 imms = extract32(insn, 10, 6); 4413 rn = extract32(insn, 5, 5); 4414 rd = extract32(insn, 0, 5); 4415 4416 if (!sf && is_n) { 4417 unallocated_encoding(s); 4418 return; 4419 } 4420 4421 if (opc == 0x3) { /* ANDS */ 4422 tcg_rd = cpu_reg(s, rd); 4423 } else { 4424 tcg_rd = cpu_reg_sp(s, rd); 4425 } 4426 tcg_rn = cpu_reg(s, rn); 4427 4428 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) { 4429 /* some immediate field values are reserved */ 4430 unallocated_encoding(s); 4431 return; 4432 } 4433 4434 if (!sf) { 4435 wmask &= 0xffffffff; 4436 } 4437 4438 switch (opc) { 4439 case 0x3: /* ANDS */ 4440 case 0x0: /* AND */ 4441 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask); 4442 is_and = true; 4443 break; 4444 case 0x1: /* ORR */ 4445 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask); 4446 break; 4447 case 0x2: /* EOR */ 4448 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask); 4449 break; 4450 default: 4451 assert(FALSE); /* must handle all above */ 4452 break; 4453 } 4454 4455 if (!sf && !is_and) { 4456 /* zero extend final result; we know we can skip this for AND 4457 * since the immediate had the high 32 bits clear. 4458 */ 4459 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4460 } 4461 4462 if (opc == 3) { /* ANDS */ 4463 gen_logic_CC(sf, tcg_rd); 4464 } 4465 } 4466 4467 /* 4468 * Move wide (immediate) 4469 * 4470 * 31 30 29 28 23 22 21 20 5 4 0 4471 * +--+-----+-------------+-----+----------------+------+ 4472 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd | 4473 * +--+-----+-------------+-----+----------------+------+ 4474 * 4475 * sf: 0 -> 32 bit, 1 -> 64 bit 4476 * opc: 00 -> N, 10 -> Z, 11 -> K 4477 * hw: shift/16 (0,16, and sf only 32, 48) 4478 */ 4479 static void disas_movw_imm(DisasContext *s, uint32_t insn) 4480 { 4481 int rd = extract32(insn, 0, 5); 4482 uint64_t imm = extract32(insn, 5, 16); 4483 int sf = extract32(insn, 31, 1); 4484 int opc = extract32(insn, 29, 2); 4485 int pos = extract32(insn, 21, 2) << 4; 4486 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4487 4488 if (!sf && (pos >= 32)) { 4489 unallocated_encoding(s); 4490 return; 4491 } 4492 4493 switch (opc) { 4494 case 0: /* MOVN */ 4495 case 2: /* MOVZ */ 4496 imm <<= pos; 4497 if (opc == 0) { 4498 imm = ~imm; 4499 } 4500 if (!sf) { 4501 imm &= 0xffffffffu; 4502 } 4503 tcg_gen_movi_i64(tcg_rd, imm); 4504 break; 4505 case 3: /* MOVK */ 4506 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16); 4507 if (!sf) { 4508 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4509 } 4510 break; 4511 default: 4512 unallocated_encoding(s); 4513 break; 4514 } 4515 } 4516 4517 /* Bitfield 4518 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4519 * +----+-----+-------------+---+------+------+------+------+ 4520 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd | 4521 * +----+-----+-------------+---+------+------+------+------+ 4522 */ 4523 static void disas_bitfield(DisasContext *s, uint32_t insn) 4524 { 4525 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len; 4526 TCGv_i64 tcg_rd, tcg_tmp; 4527 4528 sf = extract32(insn, 31, 1); 4529 opc = extract32(insn, 29, 2); 4530 n = extract32(insn, 22, 1); 4531 ri = extract32(insn, 16, 6); 4532 si = extract32(insn, 10, 6); 4533 rn = extract32(insn, 5, 5); 4534 rd = extract32(insn, 0, 5); 4535 bitsize = sf ? 64 : 32; 4536 4537 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) { 4538 unallocated_encoding(s); 4539 return; 4540 } 4541 4542 tcg_rd = cpu_reg(s, rd); 4543 4544 /* Suppress the zero-extend for !sf. Since RI and SI are constrained 4545 to be smaller than bitsize, we'll never reference data outside the 4546 low 32-bits anyway. */ 4547 tcg_tmp = read_cpu_reg(s, rn, 1); 4548 4549 /* Recognize simple(r) extractions. */ 4550 if (si >= ri) { 4551 /* Wd<s-r:0> = Wn<s:r> */ 4552 len = (si - ri) + 1; 4553 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ 4554 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4555 goto done; 4556 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */ 4557 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4558 return; 4559 } 4560 /* opc == 1, BFXIL fall through to deposit */ 4561 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4562 pos = 0; 4563 } else { 4564 /* Handle the ri > si case with a deposit 4565 * Wd<32+s-r,32-r> = Wn<s:0> 4566 */ 4567 len = si + 1; 4568 pos = (bitsize - ri) & (bitsize - 1); 4569 } 4570 4571 if (opc == 0 && len < ri) { 4572 /* SBFM: sign extend the destination field from len to fill 4573 the balance of the word. Let the deposit below insert all 4574 of those sign bits. */ 4575 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4576 len = ri; 4577 } 4578 4579 if (opc == 1) { /* BFM, BFXIL */ 4580 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4581 } else { 4582 /* SBFM or UBFM: We start with zero, and we haven't modified 4583 any bits outside bitsize, therefore the zero-extension 4584 below is unneeded. */ 4585 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4586 return; 4587 } 4588 4589 done: 4590 if (!sf) { /* zero extend final result */ 4591 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4592 } 4593 } 4594 4595 /* Extract 4596 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0 4597 * +----+------+-------------+---+----+------+--------+------+------+ 4598 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd | 4599 * +----+------+-------------+---+----+------+--------+------+------+ 4600 */ 4601 static void disas_extract(DisasContext *s, uint32_t insn) 4602 { 4603 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0; 4604 4605 sf = extract32(insn, 31, 1); 4606 n = extract32(insn, 22, 1); 4607 rm = extract32(insn, 16, 5); 4608 imm = extract32(insn, 10, 6); 4609 rn = extract32(insn, 5, 5); 4610 rd = extract32(insn, 0, 5); 4611 op21 = extract32(insn, 29, 2); 4612 op0 = extract32(insn, 21, 1); 4613 bitsize = sf ? 64 : 32; 4614 4615 if (sf != n || op21 || op0 || imm >= bitsize) { 4616 unallocated_encoding(s); 4617 } else { 4618 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4619 4620 tcg_rd = cpu_reg(s, rd); 4621 4622 if (unlikely(imm == 0)) { 4623 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4624 * so an extract from bit 0 is a special case. 4625 */ 4626 if (sf) { 4627 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm)); 4628 } else { 4629 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); 4630 } 4631 } else { 4632 tcg_rm = cpu_reg(s, rm); 4633 tcg_rn = cpu_reg(s, rn); 4634 4635 if (sf) { 4636 /* Specialization to ROR happens in EXTRACT2. */ 4637 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm); 4638 } else { 4639 TCGv_i32 t0 = tcg_temp_new_i32(); 4640 4641 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4642 if (rm == rn) { 4643 tcg_gen_rotri_i32(t0, t0, imm); 4644 } else { 4645 TCGv_i32 t1 = tcg_temp_new_i32(); 4646 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4647 tcg_gen_extract2_i32(t0, t0, t1, imm); 4648 } 4649 tcg_gen_extu_i32_i64(tcg_rd, t0); 4650 } 4651 } 4652 } 4653 } 4654 4655 /* Data processing - immediate */ 4656 static void disas_data_proc_imm(DisasContext *s, uint32_t insn) 4657 { 4658 switch (extract32(insn, 23, 6)) { 4659 case 0x20: case 0x21: /* PC-rel. addressing */ 4660 disas_pc_rel_adr(s, insn); 4661 break; 4662 case 0x22: /* Add/subtract (immediate) */ 4663 disas_add_sub_imm(s, insn); 4664 break; 4665 case 0x23: /* Add/subtract (immediate, with tags) */ 4666 disas_add_sub_imm_with_tags(s, insn); 4667 break; 4668 case 0x24: /* Logical (immediate) */ 4669 disas_logic_imm(s, insn); 4670 break; 4671 case 0x25: /* Move wide (immediate) */ 4672 disas_movw_imm(s, insn); 4673 break; 4674 case 0x26: /* Bitfield */ 4675 disas_bitfield(s, insn); 4676 break; 4677 case 0x27: /* Extract */ 4678 disas_extract(s, insn); 4679 break; 4680 default: 4681 unallocated_encoding(s); 4682 break; 4683 } 4684 } 4685 4686 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4687 * Note that it is the caller's responsibility to ensure that the 4688 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4689 * mandated semantics for out of range shifts. 4690 */ 4691 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4692 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4693 { 4694 switch (shift_type) { 4695 case A64_SHIFT_TYPE_LSL: 4696 tcg_gen_shl_i64(dst, src, shift_amount); 4697 break; 4698 case A64_SHIFT_TYPE_LSR: 4699 tcg_gen_shr_i64(dst, src, shift_amount); 4700 break; 4701 case A64_SHIFT_TYPE_ASR: 4702 if (!sf) { 4703 tcg_gen_ext32s_i64(dst, src); 4704 } 4705 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4706 break; 4707 case A64_SHIFT_TYPE_ROR: 4708 if (sf) { 4709 tcg_gen_rotr_i64(dst, src, shift_amount); 4710 } else { 4711 TCGv_i32 t0, t1; 4712 t0 = tcg_temp_new_i32(); 4713 t1 = tcg_temp_new_i32(); 4714 tcg_gen_extrl_i64_i32(t0, src); 4715 tcg_gen_extrl_i64_i32(t1, shift_amount); 4716 tcg_gen_rotr_i32(t0, t0, t1); 4717 tcg_gen_extu_i32_i64(dst, t0); 4718 } 4719 break; 4720 default: 4721 assert(FALSE); /* all shift types should be handled */ 4722 break; 4723 } 4724 4725 if (!sf) { /* zero extend final result */ 4726 tcg_gen_ext32u_i64(dst, dst); 4727 } 4728 } 4729 4730 /* Shift a TCGv src by immediate, put result in dst. 4731 * The shift amount must be in range (this should always be true as the 4732 * relevant instructions will UNDEF on bad shift immediates). 4733 */ 4734 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4735 enum a64_shift_type shift_type, unsigned int shift_i) 4736 { 4737 assert(shift_i < (sf ? 64 : 32)); 4738 4739 if (shift_i == 0) { 4740 tcg_gen_mov_i64(dst, src); 4741 } else { 4742 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4743 } 4744 } 4745 4746 /* Logical (shifted register) 4747 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4748 * +----+-----+-----------+-------+---+------+--------+------+------+ 4749 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4750 * +----+-----+-----------+-------+---+------+--------+------+------+ 4751 */ 4752 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4753 { 4754 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4755 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4756 4757 sf = extract32(insn, 31, 1); 4758 opc = extract32(insn, 29, 2); 4759 shift_type = extract32(insn, 22, 2); 4760 invert = extract32(insn, 21, 1); 4761 rm = extract32(insn, 16, 5); 4762 shift_amount = extract32(insn, 10, 6); 4763 rn = extract32(insn, 5, 5); 4764 rd = extract32(insn, 0, 5); 4765 4766 if (!sf && (shift_amount & (1 << 5))) { 4767 unallocated_encoding(s); 4768 return; 4769 } 4770 4771 tcg_rd = cpu_reg(s, rd); 4772 4773 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4774 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4775 * register-register MOV and MVN, so it is worth special casing. 4776 */ 4777 tcg_rm = cpu_reg(s, rm); 4778 if (invert) { 4779 tcg_gen_not_i64(tcg_rd, tcg_rm); 4780 if (!sf) { 4781 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4782 } 4783 } else { 4784 if (sf) { 4785 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4786 } else { 4787 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4788 } 4789 } 4790 return; 4791 } 4792 4793 tcg_rm = read_cpu_reg(s, rm, sf); 4794 4795 if (shift_amount) { 4796 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4797 } 4798 4799 tcg_rn = cpu_reg(s, rn); 4800 4801 switch (opc | (invert << 2)) { 4802 case 0: /* AND */ 4803 case 3: /* ANDS */ 4804 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4805 break; 4806 case 1: /* ORR */ 4807 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4808 break; 4809 case 2: /* EOR */ 4810 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4811 break; 4812 case 4: /* BIC */ 4813 case 7: /* BICS */ 4814 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4815 break; 4816 case 5: /* ORN */ 4817 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4818 break; 4819 case 6: /* EON */ 4820 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4821 break; 4822 default: 4823 assert(FALSE); 4824 break; 4825 } 4826 4827 if (!sf) { 4828 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4829 } 4830 4831 if (opc == 3) { 4832 gen_logic_CC(sf, tcg_rd); 4833 } 4834 } 4835 4836 /* 4837 * Add/subtract (extended register) 4838 * 4839 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4840 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4841 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4842 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4843 * 4844 * sf: 0 -> 32bit, 1 -> 64bit 4845 * op: 0 -> add , 1 -> sub 4846 * S: 1 -> set flags 4847 * opt: 00 4848 * option: extension type (see DecodeRegExtend) 4849 * imm3: optional shift to Rm 4850 * 4851 * Rd = Rn + LSL(extend(Rm), amount) 4852 */ 4853 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4854 { 4855 int rd = extract32(insn, 0, 5); 4856 int rn = extract32(insn, 5, 5); 4857 int imm3 = extract32(insn, 10, 3); 4858 int option = extract32(insn, 13, 3); 4859 int rm = extract32(insn, 16, 5); 4860 int opt = extract32(insn, 22, 2); 4861 bool setflags = extract32(insn, 29, 1); 4862 bool sub_op = extract32(insn, 30, 1); 4863 bool sf = extract32(insn, 31, 1); 4864 4865 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4866 TCGv_i64 tcg_rd; 4867 TCGv_i64 tcg_result; 4868 4869 if (imm3 > 4 || opt != 0) { 4870 unallocated_encoding(s); 4871 return; 4872 } 4873 4874 /* non-flag setting ops may use SP */ 4875 if (!setflags) { 4876 tcg_rd = cpu_reg_sp(s, rd); 4877 } else { 4878 tcg_rd = cpu_reg(s, rd); 4879 } 4880 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4881 4882 tcg_rm = read_cpu_reg(s, rm, sf); 4883 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4884 4885 tcg_result = tcg_temp_new_i64(); 4886 4887 if (!setflags) { 4888 if (sub_op) { 4889 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4890 } else { 4891 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4892 } 4893 } else { 4894 if (sub_op) { 4895 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4896 } else { 4897 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4898 } 4899 } 4900 4901 if (sf) { 4902 tcg_gen_mov_i64(tcg_rd, tcg_result); 4903 } else { 4904 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4905 } 4906 } 4907 4908 /* 4909 * Add/subtract (shifted register) 4910 * 4911 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4912 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4913 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 4914 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4915 * 4916 * sf: 0 -> 32bit, 1 -> 64bit 4917 * op: 0 -> add , 1 -> sub 4918 * S: 1 -> set flags 4919 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 4920 * imm6: Shift amount to apply to Rm before the add/sub 4921 */ 4922 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 4923 { 4924 int rd = extract32(insn, 0, 5); 4925 int rn = extract32(insn, 5, 5); 4926 int imm6 = extract32(insn, 10, 6); 4927 int rm = extract32(insn, 16, 5); 4928 int shift_type = extract32(insn, 22, 2); 4929 bool setflags = extract32(insn, 29, 1); 4930 bool sub_op = extract32(insn, 30, 1); 4931 bool sf = extract32(insn, 31, 1); 4932 4933 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4934 TCGv_i64 tcg_rn, tcg_rm; 4935 TCGv_i64 tcg_result; 4936 4937 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 4938 unallocated_encoding(s); 4939 return; 4940 } 4941 4942 tcg_rn = read_cpu_reg(s, rn, sf); 4943 tcg_rm = read_cpu_reg(s, rm, sf); 4944 4945 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 4946 4947 tcg_result = tcg_temp_new_i64(); 4948 4949 if (!setflags) { 4950 if (sub_op) { 4951 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4952 } else { 4953 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4954 } 4955 } else { 4956 if (sub_op) { 4957 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4958 } else { 4959 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4960 } 4961 } 4962 4963 if (sf) { 4964 tcg_gen_mov_i64(tcg_rd, tcg_result); 4965 } else { 4966 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4967 } 4968 } 4969 4970 /* Data-processing (3 source) 4971 * 4972 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 4973 * +--+------+-----------+------+------+----+------+------+------+ 4974 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 4975 * +--+------+-----------+------+------+----+------+------+------+ 4976 */ 4977 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 4978 { 4979 int rd = extract32(insn, 0, 5); 4980 int rn = extract32(insn, 5, 5); 4981 int ra = extract32(insn, 10, 5); 4982 int rm = extract32(insn, 16, 5); 4983 int op_id = (extract32(insn, 29, 3) << 4) | 4984 (extract32(insn, 21, 3) << 1) | 4985 extract32(insn, 15, 1); 4986 bool sf = extract32(insn, 31, 1); 4987 bool is_sub = extract32(op_id, 0, 1); 4988 bool is_high = extract32(op_id, 2, 1); 4989 bool is_signed = false; 4990 TCGv_i64 tcg_op1; 4991 TCGv_i64 tcg_op2; 4992 TCGv_i64 tcg_tmp; 4993 4994 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 4995 switch (op_id) { 4996 case 0x42: /* SMADDL */ 4997 case 0x43: /* SMSUBL */ 4998 case 0x44: /* SMULH */ 4999 is_signed = true; 5000 break; 5001 case 0x0: /* MADD (32bit) */ 5002 case 0x1: /* MSUB (32bit) */ 5003 case 0x40: /* MADD (64bit) */ 5004 case 0x41: /* MSUB (64bit) */ 5005 case 0x4a: /* UMADDL */ 5006 case 0x4b: /* UMSUBL */ 5007 case 0x4c: /* UMULH */ 5008 break; 5009 default: 5010 unallocated_encoding(s); 5011 return; 5012 } 5013 5014 if (is_high) { 5015 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 5016 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5017 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5018 TCGv_i64 tcg_rm = cpu_reg(s, rm); 5019 5020 if (is_signed) { 5021 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5022 } else { 5023 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5024 } 5025 return; 5026 } 5027 5028 tcg_op1 = tcg_temp_new_i64(); 5029 tcg_op2 = tcg_temp_new_i64(); 5030 tcg_tmp = tcg_temp_new_i64(); 5031 5032 if (op_id < 0x42) { 5033 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 5034 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 5035 } else { 5036 if (is_signed) { 5037 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 5038 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 5039 } else { 5040 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 5041 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 5042 } 5043 } 5044 5045 if (ra == 31 && !is_sub) { 5046 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 5047 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 5048 } else { 5049 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 5050 if (is_sub) { 5051 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5052 } else { 5053 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5054 } 5055 } 5056 5057 if (!sf) { 5058 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 5059 } 5060 } 5061 5062 /* Add/subtract (with carry) 5063 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 5064 * +--+--+--+------------------------+------+-------------+------+-----+ 5065 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 5066 * +--+--+--+------------------------+------+-------------+------+-----+ 5067 */ 5068 5069 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 5070 { 5071 unsigned int sf, op, setflags, rm, rn, rd; 5072 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 5073 5074 sf = extract32(insn, 31, 1); 5075 op = extract32(insn, 30, 1); 5076 setflags = extract32(insn, 29, 1); 5077 rm = extract32(insn, 16, 5); 5078 rn = extract32(insn, 5, 5); 5079 rd = extract32(insn, 0, 5); 5080 5081 tcg_rd = cpu_reg(s, rd); 5082 tcg_rn = cpu_reg(s, rn); 5083 5084 if (op) { 5085 tcg_y = tcg_temp_new_i64(); 5086 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 5087 } else { 5088 tcg_y = cpu_reg(s, rm); 5089 } 5090 5091 if (setflags) { 5092 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 5093 } else { 5094 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 5095 } 5096 } 5097 5098 /* 5099 * Rotate right into flags 5100 * 31 30 29 21 15 10 5 4 0 5101 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5102 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 5103 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5104 */ 5105 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 5106 { 5107 int mask = extract32(insn, 0, 4); 5108 int o2 = extract32(insn, 4, 1); 5109 int rn = extract32(insn, 5, 5); 5110 int imm6 = extract32(insn, 15, 6); 5111 int sf_op_s = extract32(insn, 29, 3); 5112 TCGv_i64 tcg_rn; 5113 TCGv_i32 nzcv; 5114 5115 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 5116 unallocated_encoding(s); 5117 return; 5118 } 5119 5120 tcg_rn = read_cpu_reg(s, rn, 1); 5121 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 5122 5123 nzcv = tcg_temp_new_i32(); 5124 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 5125 5126 if (mask & 8) { /* N */ 5127 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 5128 } 5129 if (mask & 4) { /* Z */ 5130 tcg_gen_not_i32(cpu_ZF, nzcv); 5131 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 5132 } 5133 if (mask & 2) { /* C */ 5134 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 5135 } 5136 if (mask & 1) { /* V */ 5137 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 5138 } 5139 } 5140 5141 /* 5142 * Evaluate into flags 5143 * 31 30 29 21 15 14 10 5 4 0 5144 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5145 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 5146 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5147 */ 5148 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 5149 { 5150 int o3_mask = extract32(insn, 0, 5); 5151 int rn = extract32(insn, 5, 5); 5152 int o2 = extract32(insn, 15, 6); 5153 int sz = extract32(insn, 14, 1); 5154 int sf_op_s = extract32(insn, 29, 3); 5155 TCGv_i32 tmp; 5156 int shift; 5157 5158 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 5159 !dc_isar_feature(aa64_condm_4, s)) { 5160 unallocated_encoding(s); 5161 return; 5162 } 5163 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 5164 5165 tmp = tcg_temp_new_i32(); 5166 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 5167 tcg_gen_shli_i32(cpu_NF, tmp, shift); 5168 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 5169 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 5170 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 5171 } 5172 5173 /* Conditional compare (immediate / register) 5174 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5175 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5176 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 5177 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5178 * [1] y [0] [0] 5179 */ 5180 static void disas_cc(DisasContext *s, uint32_t insn) 5181 { 5182 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 5183 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 5184 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 5185 DisasCompare c; 5186 5187 if (!extract32(insn, 29, 1)) { 5188 unallocated_encoding(s); 5189 return; 5190 } 5191 if (insn & (1 << 10 | 1 << 4)) { 5192 unallocated_encoding(s); 5193 return; 5194 } 5195 sf = extract32(insn, 31, 1); 5196 op = extract32(insn, 30, 1); 5197 is_imm = extract32(insn, 11, 1); 5198 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 5199 cond = extract32(insn, 12, 4); 5200 rn = extract32(insn, 5, 5); 5201 nzcv = extract32(insn, 0, 4); 5202 5203 /* Set T0 = !COND. */ 5204 tcg_t0 = tcg_temp_new_i32(); 5205 arm_test_cc(&c, cond); 5206 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 5207 5208 /* Load the arguments for the new comparison. */ 5209 if (is_imm) { 5210 tcg_y = tcg_temp_new_i64(); 5211 tcg_gen_movi_i64(tcg_y, y); 5212 } else { 5213 tcg_y = cpu_reg(s, y); 5214 } 5215 tcg_rn = cpu_reg(s, rn); 5216 5217 /* Set the flags for the new comparison. */ 5218 tcg_tmp = tcg_temp_new_i64(); 5219 if (op) { 5220 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5221 } else { 5222 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5223 } 5224 5225 /* If COND was false, force the flags to #nzcv. Compute two masks 5226 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 5227 * For tcg hosts that support ANDC, we can make do with just T1. 5228 * In either case, allow the tcg optimizer to delete any unused mask. 5229 */ 5230 tcg_t1 = tcg_temp_new_i32(); 5231 tcg_t2 = tcg_temp_new_i32(); 5232 tcg_gen_neg_i32(tcg_t1, tcg_t0); 5233 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 5234 5235 if (nzcv & 8) { /* N */ 5236 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 5237 } else { 5238 if (TCG_TARGET_HAS_andc_i32) { 5239 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 5240 } else { 5241 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 5242 } 5243 } 5244 if (nzcv & 4) { /* Z */ 5245 if (TCG_TARGET_HAS_andc_i32) { 5246 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 5247 } else { 5248 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 5249 } 5250 } else { 5251 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 5252 } 5253 if (nzcv & 2) { /* C */ 5254 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 5255 } else { 5256 if (TCG_TARGET_HAS_andc_i32) { 5257 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 5258 } else { 5259 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 5260 } 5261 } 5262 if (nzcv & 1) { /* V */ 5263 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 5264 } else { 5265 if (TCG_TARGET_HAS_andc_i32) { 5266 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 5267 } else { 5268 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 5269 } 5270 } 5271 } 5272 5273 /* Conditional select 5274 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 5275 * +----+----+---+-----------------+------+------+-----+------+------+ 5276 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 5277 * +----+----+---+-----------------+------+------+-----+------+------+ 5278 */ 5279 static void disas_cond_select(DisasContext *s, uint32_t insn) 5280 { 5281 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 5282 TCGv_i64 tcg_rd, zero; 5283 DisasCompare64 c; 5284 5285 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 5286 /* S == 1 or op2<1> == 1 */ 5287 unallocated_encoding(s); 5288 return; 5289 } 5290 sf = extract32(insn, 31, 1); 5291 else_inv = extract32(insn, 30, 1); 5292 rm = extract32(insn, 16, 5); 5293 cond = extract32(insn, 12, 4); 5294 else_inc = extract32(insn, 10, 1); 5295 rn = extract32(insn, 5, 5); 5296 rd = extract32(insn, 0, 5); 5297 5298 tcg_rd = cpu_reg(s, rd); 5299 5300 a64_test_cc(&c, cond); 5301 zero = tcg_constant_i64(0); 5302 5303 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 5304 /* CSET & CSETM. */ 5305 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero); 5306 if (else_inv) { 5307 tcg_gen_neg_i64(tcg_rd, tcg_rd); 5308 } 5309 } else { 5310 TCGv_i64 t_true = cpu_reg(s, rn); 5311 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 5312 if (else_inv && else_inc) { 5313 tcg_gen_neg_i64(t_false, t_false); 5314 } else if (else_inv) { 5315 tcg_gen_not_i64(t_false, t_false); 5316 } else if (else_inc) { 5317 tcg_gen_addi_i64(t_false, t_false, 1); 5318 } 5319 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 5320 } 5321 5322 if (!sf) { 5323 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5324 } 5325 } 5326 5327 static void handle_clz(DisasContext *s, unsigned int sf, 5328 unsigned int rn, unsigned int rd) 5329 { 5330 TCGv_i64 tcg_rd, tcg_rn; 5331 tcg_rd = cpu_reg(s, rd); 5332 tcg_rn = cpu_reg(s, rn); 5333 5334 if (sf) { 5335 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 5336 } else { 5337 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5338 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5339 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 5340 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5341 } 5342 } 5343 5344 static void handle_cls(DisasContext *s, unsigned int sf, 5345 unsigned int rn, unsigned int rd) 5346 { 5347 TCGv_i64 tcg_rd, tcg_rn; 5348 tcg_rd = cpu_reg(s, rd); 5349 tcg_rn = cpu_reg(s, rn); 5350 5351 if (sf) { 5352 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 5353 } else { 5354 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5355 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5356 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 5357 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5358 } 5359 } 5360 5361 static void handle_rbit(DisasContext *s, unsigned int sf, 5362 unsigned int rn, unsigned int rd) 5363 { 5364 TCGv_i64 tcg_rd, tcg_rn; 5365 tcg_rd = cpu_reg(s, rd); 5366 tcg_rn = cpu_reg(s, rn); 5367 5368 if (sf) { 5369 gen_helper_rbit64(tcg_rd, tcg_rn); 5370 } else { 5371 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5372 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5373 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5374 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5375 } 5376 } 5377 5378 /* REV with sf==1, opcode==3 ("REV64") */ 5379 static void handle_rev64(DisasContext *s, unsigned int sf, 5380 unsigned int rn, unsigned int rd) 5381 { 5382 if (!sf) { 5383 unallocated_encoding(s); 5384 return; 5385 } 5386 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5387 } 5388 5389 /* REV with sf==0, opcode==2 5390 * REV32 (sf==1, opcode==2) 5391 */ 5392 static void handle_rev32(DisasContext *s, unsigned int sf, 5393 unsigned int rn, unsigned int rd) 5394 { 5395 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5396 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5397 5398 if (sf) { 5399 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5400 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5401 } else { 5402 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5403 } 5404 } 5405 5406 /* REV16 (opcode==1) */ 5407 static void handle_rev16(DisasContext *s, unsigned int sf, 5408 unsigned int rn, unsigned int rd) 5409 { 5410 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5411 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5412 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5413 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5414 5415 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5416 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5417 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5418 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5419 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5420 } 5421 5422 /* Data-processing (1 source) 5423 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5424 * +----+---+---+-----------------+---------+--------+------+------+ 5425 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5426 * +----+---+---+-----------------+---------+--------+------+------+ 5427 */ 5428 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5429 { 5430 unsigned int sf, opcode, opcode2, rn, rd; 5431 TCGv_i64 tcg_rd; 5432 5433 if (extract32(insn, 29, 1)) { 5434 unallocated_encoding(s); 5435 return; 5436 } 5437 5438 sf = extract32(insn, 31, 1); 5439 opcode = extract32(insn, 10, 6); 5440 opcode2 = extract32(insn, 16, 5); 5441 rn = extract32(insn, 5, 5); 5442 rd = extract32(insn, 0, 5); 5443 5444 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5445 5446 switch (MAP(sf, opcode2, opcode)) { 5447 case MAP(0, 0x00, 0x00): /* RBIT */ 5448 case MAP(1, 0x00, 0x00): 5449 handle_rbit(s, sf, rn, rd); 5450 break; 5451 case MAP(0, 0x00, 0x01): /* REV16 */ 5452 case MAP(1, 0x00, 0x01): 5453 handle_rev16(s, sf, rn, rd); 5454 break; 5455 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5456 case MAP(1, 0x00, 0x02): 5457 handle_rev32(s, sf, rn, rd); 5458 break; 5459 case MAP(1, 0x00, 0x03): /* REV64 */ 5460 handle_rev64(s, sf, rn, rd); 5461 break; 5462 case MAP(0, 0x00, 0x04): /* CLZ */ 5463 case MAP(1, 0x00, 0x04): 5464 handle_clz(s, sf, rn, rd); 5465 break; 5466 case MAP(0, 0x00, 0x05): /* CLS */ 5467 case MAP(1, 0x00, 0x05): 5468 handle_cls(s, sf, rn, rd); 5469 break; 5470 case MAP(1, 0x01, 0x00): /* PACIA */ 5471 if (s->pauth_active) { 5472 tcg_rd = cpu_reg(s, rd); 5473 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5474 } else if (!dc_isar_feature(aa64_pauth, s)) { 5475 goto do_unallocated; 5476 } 5477 break; 5478 case MAP(1, 0x01, 0x01): /* PACIB */ 5479 if (s->pauth_active) { 5480 tcg_rd = cpu_reg(s, rd); 5481 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5482 } else if (!dc_isar_feature(aa64_pauth, s)) { 5483 goto do_unallocated; 5484 } 5485 break; 5486 case MAP(1, 0x01, 0x02): /* PACDA */ 5487 if (s->pauth_active) { 5488 tcg_rd = cpu_reg(s, rd); 5489 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5490 } else if (!dc_isar_feature(aa64_pauth, s)) { 5491 goto do_unallocated; 5492 } 5493 break; 5494 case MAP(1, 0x01, 0x03): /* PACDB */ 5495 if (s->pauth_active) { 5496 tcg_rd = cpu_reg(s, rd); 5497 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5498 } else if (!dc_isar_feature(aa64_pauth, s)) { 5499 goto do_unallocated; 5500 } 5501 break; 5502 case MAP(1, 0x01, 0x04): /* AUTIA */ 5503 if (s->pauth_active) { 5504 tcg_rd = cpu_reg(s, rd); 5505 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5506 } else if (!dc_isar_feature(aa64_pauth, s)) { 5507 goto do_unallocated; 5508 } 5509 break; 5510 case MAP(1, 0x01, 0x05): /* AUTIB */ 5511 if (s->pauth_active) { 5512 tcg_rd = cpu_reg(s, rd); 5513 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5514 } else if (!dc_isar_feature(aa64_pauth, s)) { 5515 goto do_unallocated; 5516 } 5517 break; 5518 case MAP(1, 0x01, 0x06): /* AUTDA */ 5519 if (s->pauth_active) { 5520 tcg_rd = cpu_reg(s, rd); 5521 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5522 } else if (!dc_isar_feature(aa64_pauth, s)) { 5523 goto do_unallocated; 5524 } 5525 break; 5526 case MAP(1, 0x01, 0x07): /* AUTDB */ 5527 if (s->pauth_active) { 5528 tcg_rd = cpu_reg(s, rd); 5529 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5530 } else if (!dc_isar_feature(aa64_pauth, s)) { 5531 goto do_unallocated; 5532 } 5533 break; 5534 case MAP(1, 0x01, 0x08): /* PACIZA */ 5535 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5536 goto do_unallocated; 5537 } else if (s->pauth_active) { 5538 tcg_rd = cpu_reg(s, rd); 5539 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5540 } 5541 break; 5542 case MAP(1, 0x01, 0x09): /* PACIZB */ 5543 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5544 goto do_unallocated; 5545 } else if (s->pauth_active) { 5546 tcg_rd = cpu_reg(s, rd); 5547 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5548 } 5549 break; 5550 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5551 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5552 goto do_unallocated; 5553 } else if (s->pauth_active) { 5554 tcg_rd = cpu_reg(s, rd); 5555 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5556 } 5557 break; 5558 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5559 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5560 goto do_unallocated; 5561 } else if (s->pauth_active) { 5562 tcg_rd = cpu_reg(s, rd); 5563 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5564 } 5565 break; 5566 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5567 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5568 goto do_unallocated; 5569 } else if (s->pauth_active) { 5570 tcg_rd = cpu_reg(s, rd); 5571 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5572 } 5573 break; 5574 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5575 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5576 goto do_unallocated; 5577 } else if (s->pauth_active) { 5578 tcg_rd = cpu_reg(s, rd); 5579 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5580 } 5581 break; 5582 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5583 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5584 goto do_unallocated; 5585 } else if (s->pauth_active) { 5586 tcg_rd = cpu_reg(s, rd); 5587 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5588 } 5589 break; 5590 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5591 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5592 goto do_unallocated; 5593 } else if (s->pauth_active) { 5594 tcg_rd = cpu_reg(s, rd); 5595 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5596 } 5597 break; 5598 case MAP(1, 0x01, 0x10): /* XPACI */ 5599 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5600 goto do_unallocated; 5601 } else if (s->pauth_active) { 5602 tcg_rd = cpu_reg(s, rd); 5603 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd); 5604 } 5605 break; 5606 case MAP(1, 0x01, 0x11): /* XPACD */ 5607 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5608 goto do_unallocated; 5609 } else if (s->pauth_active) { 5610 tcg_rd = cpu_reg(s, rd); 5611 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd); 5612 } 5613 break; 5614 default: 5615 do_unallocated: 5616 unallocated_encoding(s); 5617 break; 5618 } 5619 5620 #undef MAP 5621 } 5622 5623 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5624 unsigned int rm, unsigned int rn, unsigned int rd) 5625 { 5626 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5627 tcg_rd = cpu_reg(s, rd); 5628 5629 if (!sf && is_signed) { 5630 tcg_n = tcg_temp_new_i64(); 5631 tcg_m = tcg_temp_new_i64(); 5632 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5633 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5634 } else { 5635 tcg_n = read_cpu_reg(s, rn, sf); 5636 tcg_m = read_cpu_reg(s, rm, sf); 5637 } 5638 5639 if (is_signed) { 5640 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5641 } else { 5642 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5643 } 5644 5645 if (!sf) { /* zero extend final result */ 5646 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5647 } 5648 } 5649 5650 /* LSLV, LSRV, ASRV, RORV */ 5651 static void handle_shift_reg(DisasContext *s, 5652 enum a64_shift_type shift_type, unsigned int sf, 5653 unsigned int rm, unsigned int rn, unsigned int rd) 5654 { 5655 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5656 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5657 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5658 5659 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5660 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5661 } 5662 5663 /* CRC32[BHWX], CRC32C[BHWX] */ 5664 static void handle_crc32(DisasContext *s, 5665 unsigned int sf, unsigned int sz, bool crc32c, 5666 unsigned int rm, unsigned int rn, unsigned int rd) 5667 { 5668 TCGv_i64 tcg_acc, tcg_val; 5669 TCGv_i32 tcg_bytes; 5670 5671 if (!dc_isar_feature(aa64_crc32, s) 5672 || (sf == 1 && sz != 3) 5673 || (sf == 0 && sz == 3)) { 5674 unallocated_encoding(s); 5675 return; 5676 } 5677 5678 if (sz == 3) { 5679 tcg_val = cpu_reg(s, rm); 5680 } else { 5681 uint64_t mask; 5682 switch (sz) { 5683 case 0: 5684 mask = 0xFF; 5685 break; 5686 case 1: 5687 mask = 0xFFFF; 5688 break; 5689 case 2: 5690 mask = 0xFFFFFFFF; 5691 break; 5692 default: 5693 g_assert_not_reached(); 5694 } 5695 tcg_val = tcg_temp_new_i64(); 5696 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5697 } 5698 5699 tcg_acc = cpu_reg(s, rn); 5700 tcg_bytes = tcg_constant_i32(1 << sz); 5701 5702 if (crc32c) { 5703 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5704 } else { 5705 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5706 } 5707 } 5708 5709 /* Data-processing (2 source) 5710 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5711 * +----+---+---+-----------------+------+--------+------+------+ 5712 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5713 * +----+---+---+-----------------+------+--------+------+------+ 5714 */ 5715 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5716 { 5717 unsigned int sf, rm, opcode, rn, rd, setflag; 5718 sf = extract32(insn, 31, 1); 5719 setflag = extract32(insn, 29, 1); 5720 rm = extract32(insn, 16, 5); 5721 opcode = extract32(insn, 10, 6); 5722 rn = extract32(insn, 5, 5); 5723 rd = extract32(insn, 0, 5); 5724 5725 if (setflag && opcode != 0) { 5726 unallocated_encoding(s); 5727 return; 5728 } 5729 5730 switch (opcode) { 5731 case 0: /* SUBP(S) */ 5732 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5733 goto do_unallocated; 5734 } else { 5735 TCGv_i64 tcg_n, tcg_m, tcg_d; 5736 5737 tcg_n = read_cpu_reg_sp(s, rn, true); 5738 tcg_m = read_cpu_reg_sp(s, rm, true); 5739 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5740 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5741 tcg_d = cpu_reg(s, rd); 5742 5743 if (setflag) { 5744 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5745 } else { 5746 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5747 } 5748 } 5749 break; 5750 case 2: /* UDIV */ 5751 handle_div(s, false, sf, rm, rn, rd); 5752 break; 5753 case 3: /* SDIV */ 5754 handle_div(s, true, sf, rm, rn, rd); 5755 break; 5756 case 4: /* IRG */ 5757 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5758 goto do_unallocated; 5759 } 5760 if (s->ata) { 5761 gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, 5762 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5763 } else { 5764 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5765 cpu_reg_sp(s, rn)); 5766 } 5767 break; 5768 case 5: /* GMI */ 5769 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5770 goto do_unallocated; 5771 } else { 5772 TCGv_i64 t = tcg_temp_new_i64(); 5773 5774 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5775 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5776 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5777 } 5778 break; 5779 case 8: /* LSLV */ 5780 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5781 break; 5782 case 9: /* LSRV */ 5783 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5784 break; 5785 case 10: /* ASRV */ 5786 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5787 break; 5788 case 11: /* RORV */ 5789 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5790 break; 5791 case 12: /* PACGA */ 5792 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5793 goto do_unallocated; 5794 } 5795 gen_helper_pacga(cpu_reg(s, rd), cpu_env, 5796 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5797 break; 5798 case 16: 5799 case 17: 5800 case 18: 5801 case 19: 5802 case 20: 5803 case 21: 5804 case 22: 5805 case 23: /* CRC32 */ 5806 { 5807 int sz = extract32(opcode, 0, 2); 5808 bool crc32c = extract32(opcode, 2, 1); 5809 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5810 break; 5811 } 5812 default: 5813 do_unallocated: 5814 unallocated_encoding(s); 5815 break; 5816 } 5817 } 5818 5819 /* 5820 * Data processing - register 5821 * 31 30 29 28 25 21 20 16 10 0 5822 * +--+---+--+---+-------+-----+-------+-------+---------+ 5823 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5824 * +--+---+--+---+-------+-----+-------+-------+---------+ 5825 */ 5826 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5827 { 5828 int op0 = extract32(insn, 30, 1); 5829 int op1 = extract32(insn, 28, 1); 5830 int op2 = extract32(insn, 21, 4); 5831 int op3 = extract32(insn, 10, 6); 5832 5833 if (!op1) { 5834 if (op2 & 8) { 5835 if (op2 & 1) { 5836 /* Add/sub (extended register) */ 5837 disas_add_sub_ext_reg(s, insn); 5838 } else { 5839 /* Add/sub (shifted register) */ 5840 disas_add_sub_reg(s, insn); 5841 } 5842 } else { 5843 /* Logical (shifted register) */ 5844 disas_logic_reg(s, insn); 5845 } 5846 return; 5847 } 5848 5849 switch (op2) { 5850 case 0x0: 5851 switch (op3) { 5852 case 0x00: /* Add/subtract (with carry) */ 5853 disas_adc_sbc(s, insn); 5854 break; 5855 5856 case 0x01: /* Rotate right into flags */ 5857 case 0x21: 5858 disas_rotate_right_into_flags(s, insn); 5859 break; 5860 5861 case 0x02: /* Evaluate into flags */ 5862 case 0x12: 5863 case 0x22: 5864 case 0x32: 5865 disas_evaluate_into_flags(s, insn); 5866 break; 5867 5868 default: 5869 goto do_unallocated; 5870 } 5871 break; 5872 5873 case 0x2: /* Conditional compare */ 5874 disas_cc(s, insn); /* both imm and reg forms */ 5875 break; 5876 5877 case 0x4: /* Conditional select */ 5878 disas_cond_select(s, insn); 5879 break; 5880 5881 case 0x6: /* Data-processing */ 5882 if (op0) { /* (1 source) */ 5883 disas_data_proc_1src(s, insn); 5884 } else { /* (2 source) */ 5885 disas_data_proc_2src(s, insn); 5886 } 5887 break; 5888 case 0x8 ... 0xf: /* (3 source) */ 5889 disas_data_proc_3src(s, insn); 5890 break; 5891 5892 default: 5893 do_unallocated: 5894 unallocated_encoding(s); 5895 break; 5896 } 5897 } 5898 5899 static void handle_fp_compare(DisasContext *s, int size, 5900 unsigned int rn, unsigned int rm, 5901 bool cmp_with_zero, bool signal_all_nans) 5902 { 5903 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 5904 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 5905 5906 if (size == MO_64) { 5907 TCGv_i64 tcg_vn, tcg_vm; 5908 5909 tcg_vn = read_fp_dreg(s, rn); 5910 if (cmp_with_zero) { 5911 tcg_vm = tcg_constant_i64(0); 5912 } else { 5913 tcg_vm = read_fp_dreg(s, rm); 5914 } 5915 if (signal_all_nans) { 5916 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5917 } else { 5918 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5919 } 5920 } else { 5921 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 5922 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 5923 5924 read_vec_element_i32(s, tcg_vn, rn, 0, size); 5925 if (cmp_with_zero) { 5926 tcg_gen_movi_i32(tcg_vm, 0); 5927 } else { 5928 read_vec_element_i32(s, tcg_vm, rm, 0, size); 5929 } 5930 5931 switch (size) { 5932 case MO_32: 5933 if (signal_all_nans) { 5934 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5935 } else { 5936 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5937 } 5938 break; 5939 case MO_16: 5940 if (signal_all_nans) { 5941 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5942 } else { 5943 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 5944 } 5945 break; 5946 default: 5947 g_assert_not_reached(); 5948 } 5949 } 5950 5951 gen_set_nzcv(tcg_flags); 5952 } 5953 5954 /* Floating point compare 5955 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 5956 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5957 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 5958 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 5959 */ 5960 static void disas_fp_compare(DisasContext *s, uint32_t insn) 5961 { 5962 unsigned int mos, type, rm, op, rn, opc, op2r; 5963 int size; 5964 5965 mos = extract32(insn, 29, 3); 5966 type = extract32(insn, 22, 2); 5967 rm = extract32(insn, 16, 5); 5968 op = extract32(insn, 14, 2); 5969 rn = extract32(insn, 5, 5); 5970 opc = extract32(insn, 3, 2); 5971 op2r = extract32(insn, 0, 3); 5972 5973 if (mos || op || op2r) { 5974 unallocated_encoding(s); 5975 return; 5976 } 5977 5978 switch (type) { 5979 case 0: 5980 size = MO_32; 5981 break; 5982 case 1: 5983 size = MO_64; 5984 break; 5985 case 3: 5986 size = MO_16; 5987 if (dc_isar_feature(aa64_fp16, s)) { 5988 break; 5989 } 5990 /* fallthru */ 5991 default: 5992 unallocated_encoding(s); 5993 return; 5994 } 5995 5996 if (!fp_access_check(s)) { 5997 return; 5998 } 5999 6000 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 6001 } 6002 6003 /* Floating point conditional compare 6004 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 6005 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6006 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 6007 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6008 */ 6009 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 6010 { 6011 unsigned int mos, type, rm, cond, rn, op, nzcv; 6012 TCGLabel *label_continue = NULL; 6013 int size; 6014 6015 mos = extract32(insn, 29, 3); 6016 type = extract32(insn, 22, 2); 6017 rm = extract32(insn, 16, 5); 6018 cond = extract32(insn, 12, 4); 6019 rn = extract32(insn, 5, 5); 6020 op = extract32(insn, 4, 1); 6021 nzcv = extract32(insn, 0, 4); 6022 6023 if (mos) { 6024 unallocated_encoding(s); 6025 return; 6026 } 6027 6028 switch (type) { 6029 case 0: 6030 size = MO_32; 6031 break; 6032 case 1: 6033 size = MO_64; 6034 break; 6035 case 3: 6036 size = MO_16; 6037 if (dc_isar_feature(aa64_fp16, s)) { 6038 break; 6039 } 6040 /* fallthru */ 6041 default: 6042 unallocated_encoding(s); 6043 return; 6044 } 6045 6046 if (!fp_access_check(s)) { 6047 return; 6048 } 6049 6050 if (cond < 0x0e) { /* not always */ 6051 TCGLabel *label_match = gen_new_label(); 6052 label_continue = gen_new_label(); 6053 arm_gen_test_cc(cond, label_match); 6054 /* nomatch: */ 6055 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 6056 tcg_gen_br(label_continue); 6057 gen_set_label(label_match); 6058 } 6059 6060 handle_fp_compare(s, size, rn, rm, false, op); 6061 6062 if (cond < 0x0e) { 6063 gen_set_label(label_continue); 6064 } 6065 } 6066 6067 /* Floating point conditional select 6068 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6069 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6070 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 6071 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6072 */ 6073 static void disas_fp_csel(DisasContext *s, uint32_t insn) 6074 { 6075 unsigned int mos, type, rm, cond, rn, rd; 6076 TCGv_i64 t_true, t_false; 6077 DisasCompare64 c; 6078 MemOp sz; 6079 6080 mos = extract32(insn, 29, 3); 6081 type = extract32(insn, 22, 2); 6082 rm = extract32(insn, 16, 5); 6083 cond = extract32(insn, 12, 4); 6084 rn = extract32(insn, 5, 5); 6085 rd = extract32(insn, 0, 5); 6086 6087 if (mos) { 6088 unallocated_encoding(s); 6089 return; 6090 } 6091 6092 switch (type) { 6093 case 0: 6094 sz = MO_32; 6095 break; 6096 case 1: 6097 sz = MO_64; 6098 break; 6099 case 3: 6100 sz = MO_16; 6101 if (dc_isar_feature(aa64_fp16, s)) { 6102 break; 6103 } 6104 /* fallthru */ 6105 default: 6106 unallocated_encoding(s); 6107 return; 6108 } 6109 6110 if (!fp_access_check(s)) { 6111 return; 6112 } 6113 6114 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6115 t_true = tcg_temp_new_i64(); 6116 t_false = tcg_temp_new_i64(); 6117 read_vec_element(s, t_true, rn, 0, sz); 6118 read_vec_element(s, t_false, rm, 0, sz); 6119 6120 a64_test_cc(&c, cond); 6121 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6122 t_true, t_false); 6123 6124 /* Note that sregs & hregs write back zeros to the high bits, 6125 and we've already done the zero-extension. */ 6126 write_fp_dreg(s, rd, t_true); 6127 } 6128 6129 /* Floating-point data-processing (1 source) - half precision */ 6130 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 6131 { 6132 TCGv_ptr fpst = NULL; 6133 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 6134 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6135 6136 switch (opcode) { 6137 case 0x0: /* FMOV */ 6138 tcg_gen_mov_i32(tcg_res, tcg_op); 6139 break; 6140 case 0x1: /* FABS */ 6141 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 6142 break; 6143 case 0x2: /* FNEG */ 6144 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 6145 break; 6146 case 0x3: /* FSQRT */ 6147 fpst = fpstatus_ptr(FPST_FPCR_F16); 6148 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 6149 break; 6150 case 0x8: /* FRINTN */ 6151 case 0x9: /* FRINTP */ 6152 case 0xa: /* FRINTM */ 6153 case 0xb: /* FRINTZ */ 6154 case 0xc: /* FRINTA */ 6155 { 6156 TCGv_i32 tcg_rmode; 6157 6158 fpst = fpstatus_ptr(FPST_FPCR_F16); 6159 tcg_rmode = gen_set_rmode(opcode & 7, fpst); 6160 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6161 gen_restore_rmode(tcg_rmode, fpst); 6162 break; 6163 } 6164 case 0xe: /* FRINTX */ 6165 fpst = fpstatus_ptr(FPST_FPCR_F16); 6166 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 6167 break; 6168 case 0xf: /* FRINTI */ 6169 fpst = fpstatus_ptr(FPST_FPCR_F16); 6170 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6171 break; 6172 default: 6173 g_assert_not_reached(); 6174 } 6175 6176 write_fp_sreg(s, rd, tcg_res); 6177 } 6178 6179 /* Floating-point data-processing (1 source) - single precision */ 6180 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 6181 { 6182 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 6183 TCGv_i32 tcg_op, tcg_res; 6184 TCGv_ptr fpst; 6185 int rmode = -1; 6186 6187 tcg_op = read_fp_sreg(s, rn); 6188 tcg_res = tcg_temp_new_i32(); 6189 6190 switch (opcode) { 6191 case 0x0: /* FMOV */ 6192 tcg_gen_mov_i32(tcg_res, tcg_op); 6193 goto done; 6194 case 0x1: /* FABS */ 6195 gen_helper_vfp_abss(tcg_res, tcg_op); 6196 goto done; 6197 case 0x2: /* FNEG */ 6198 gen_helper_vfp_negs(tcg_res, tcg_op); 6199 goto done; 6200 case 0x3: /* FSQRT */ 6201 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 6202 goto done; 6203 case 0x6: /* BFCVT */ 6204 gen_fpst = gen_helper_bfcvt; 6205 break; 6206 case 0x8: /* FRINTN */ 6207 case 0x9: /* FRINTP */ 6208 case 0xa: /* FRINTM */ 6209 case 0xb: /* FRINTZ */ 6210 case 0xc: /* FRINTA */ 6211 rmode = opcode & 7; 6212 gen_fpst = gen_helper_rints; 6213 break; 6214 case 0xe: /* FRINTX */ 6215 gen_fpst = gen_helper_rints_exact; 6216 break; 6217 case 0xf: /* FRINTI */ 6218 gen_fpst = gen_helper_rints; 6219 break; 6220 case 0x10: /* FRINT32Z */ 6221 rmode = FPROUNDING_ZERO; 6222 gen_fpst = gen_helper_frint32_s; 6223 break; 6224 case 0x11: /* FRINT32X */ 6225 gen_fpst = gen_helper_frint32_s; 6226 break; 6227 case 0x12: /* FRINT64Z */ 6228 rmode = FPROUNDING_ZERO; 6229 gen_fpst = gen_helper_frint64_s; 6230 break; 6231 case 0x13: /* FRINT64X */ 6232 gen_fpst = gen_helper_frint64_s; 6233 break; 6234 default: 6235 g_assert_not_reached(); 6236 } 6237 6238 fpst = fpstatus_ptr(FPST_FPCR); 6239 if (rmode >= 0) { 6240 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 6241 gen_fpst(tcg_res, tcg_op, fpst); 6242 gen_restore_rmode(tcg_rmode, fpst); 6243 } else { 6244 gen_fpst(tcg_res, tcg_op, fpst); 6245 } 6246 6247 done: 6248 write_fp_sreg(s, rd, tcg_res); 6249 } 6250 6251 /* Floating-point data-processing (1 source) - double precision */ 6252 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 6253 { 6254 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 6255 TCGv_i64 tcg_op, tcg_res; 6256 TCGv_ptr fpst; 6257 int rmode = -1; 6258 6259 switch (opcode) { 6260 case 0x0: /* FMOV */ 6261 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 6262 return; 6263 } 6264 6265 tcg_op = read_fp_dreg(s, rn); 6266 tcg_res = tcg_temp_new_i64(); 6267 6268 switch (opcode) { 6269 case 0x1: /* FABS */ 6270 gen_helper_vfp_absd(tcg_res, tcg_op); 6271 goto done; 6272 case 0x2: /* FNEG */ 6273 gen_helper_vfp_negd(tcg_res, tcg_op); 6274 goto done; 6275 case 0x3: /* FSQRT */ 6276 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); 6277 goto done; 6278 case 0x8: /* FRINTN */ 6279 case 0x9: /* FRINTP */ 6280 case 0xa: /* FRINTM */ 6281 case 0xb: /* FRINTZ */ 6282 case 0xc: /* FRINTA */ 6283 rmode = opcode & 7; 6284 gen_fpst = gen_helper_rintd; 6285 break; 6286 case 0xe: /* FRINTX */ 6287 gen_fpst = gen_helper_rintd_exact; 6288 break; 6289 case 0xf: /* FRINTI */ 6290 gen_fpst = gen_helper_rintd; 6291 break; 6292 case 0x10: /* FRINT32Z */ 6293 rmode = FPROUNDING_ZERO; 6294 gen_fpst = gen_helper_frint32_d; 6295 break; 6296 case 0x11: /* FRINT32X */ 6297 gen_fpst = gen_helper_frint32_d; 6298 break; 6299 case 0x12: /* FRINT64Z */ 6300 rmode = FPROUNDING_ZERO; 6301 gen_fpst = gen_helper_frint64_d; 6302 break; 6303 case 0x13: /* FRINT64X */ 6304 gen_fpst = gen_helper_frint64_d; 6305 break; 6306 default: 6307 g_assert_not_reached(); 6308 } 6309 6310 fpst = fpstatus_ptr(FPST_FPCR); 6311 if (rmode >= 0) { 6312 TCGv_i32 tcg_rmode = gen_set_rmode(rmode, fpst); 6313 gen_fpst(tcg_res, tcg_op, fpst); 6314 gen_restore_rmode(tcg_rmode, fpst); 6315 } else { 6316 gen_fpst(tcg_res, tcg_op, fpst); 6317 } 6318 6319 done: 6320 write_fp_dreg(s, rd, tcg_res); 6321 } 6322 6323 static void handle_fp_fcvt(DisasContext *s, int opcode, 6324 int rd, int rn, int dtype, int ntype) 6325 { 6326 switch (ntype) { 6327 case 0x0: 6328 { 6329 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6330 if (dtype == 1) { 6331 /* Single to double */ 6332 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6333 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); 6334 write_fp_dreg(s, rd, tcg_rd); 6335 } else { 6336 /* Single to half */ 6337 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6338 TCGv_i32 ahp = get_ahp_flag(); 6339 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6340 6341 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6342 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6343 write_fp_sreg(s, rd, tcg_rd); 6344 } 6345 break; 6346 } 6347 case 0x1: 6348 { 6349 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 6350 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6351 if (dtype == 0) { 6352 /* Double to single */ 6353 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); 6354 } else { 6355 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6356 TCGv_i32 ahp = get_ahp_flag(); 6357 /* Double to half */ 6358 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6359 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6360 } 6361 write_fp_sreg(s, rd, tcg_rd); 6362 break; 6363 } 6364 case 0x3: 6365 { 6366 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6367 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6368 TCGv_i32 tcg_ahp = get_ahp_flag(); 6369 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6370 if (dtype == 0) { 6371 /* Half to single */ 6372 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6373 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6374 write_fp_sreg(s, rd, tcg_rd); 6375 } else { 6376 /* Half to double */ 6377 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6378 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6379 write_fp_dreg(s, rd, tcg_rd); 6380 } 6381 break; 6382 } 6383 default: 6384 g_assert_not_reached(); 6385 } 6386 } 6387 6388 /* Floating point data-processing (1 source) 6389 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6390 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6391 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6392 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6393 */ 6394 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6395 { 6396 int mos = extract32(insn, 29, 3); 6397 int type = extract32(insn, 22, 2); 6398 int opcode = extract32(insn, 15, 6); 6399 int rn = extract32(insn, 5, 5); 6400 int rd = extract32(insn, 0, 5); 6401 6402 if (mos) { 6403 goto do_unallocated; 6404 } 6405 6406 switch (opcode) { 6407 case 0x4: case 0x5: case 0x7: 6408 { 6409 /* FCVT between half, single and double precision */ 6410 int dtype = extract32(opcode, 0, 2); 6411 if (type == 2 || dtype == type) { 6412 goto do_unallocated; 6413 } 6414 if (!fp_access_check(s)) { 6415 return; 6416 } 6417 6418 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6419 break; 6420 } 6421 6422 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6423 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6424 goto do_unallocated; 6425 } 6426 /* fall through */ 6427 case 0x0 ... 0x3: 6428 case 0x8 ... 0xc: 6429 case 0xe ... 0xf: 6430 /* 32-to-32 and 64-to-64 ops */ 6431 switch (type) { 6432 case 0: 6433 if (!fp_access_check(s)) { 6434 return; 6435 } 6436 handle_fp_1src_single(s, opcode, rd, rn); 6437 break; 6438 case 1: 6439 if (!fp_access_check(s)) { 6440 return; 6441 } 6442 handle_fp_1src_double(s, opcode, rd, rn); 6443 break; 6444 case 3: 6445 if (!dc_isar_feature(aa64_fp16, s)) { 6446 goto do_unallocated; 6447 } 6448 6449 if (!fp_access_check(s)) { 6450 return; 6451 } 6452 handle_fp_1src_half(s, opcode, rd, rn); 6453 break; 6454 default: 6455 goto do_unallocated; 6456 } 6457 break; 6458 6459 case 0x6: 6460 switch (type) { 6461 case 1: /* BFCVT */ 6462 if (!dc_isar_feature(aa64_bf16, s)) { 6463 goto do_unallocated; 6464 } 6465 if (!fp_access_check(s)) { 6466 return; 6467 } 6468 handle_fp_1src_single(s, opcode, rd, rn); 6469 break; 6470 default: 6471 goto do_unallocated; 6472 } 6473 break; 6474 6475 default: 6476 do_unallocated: 6477 unallocated_encoding(s); 6478 break; 6479 } 6480 } 6481 6482 /* Floating-point data-processing (2 source) - single precision */ 6483 static void handle_fp_2src_single(DisasContext *s, int opcode, 6484 int rd, int rn, int rm) 6485 { 6486 TCGv_i32 tcg_op1; 6487 TCGv_i32 tcg_op2; 6488 TCGv_i32 tcg_res; 6489 TCGv_ptr fpst; 6490 6491 tcg_res = tcg_temp_new_i32(); 6492 fpst = fpstatus_ptr(FPST_FPCR); 6493 tcg_op1 = read_fp_sreg(s, rn); 6494 tcg_op2 = read_fp_sreg(s, rm); 6495 6496 switch (opcode) { 6497 case 0x0: /* FMUL */ 6498 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6499 break; 6500 case 0x1: /* FDIV */ 6501 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6502 break; 6503 case 0x2: /* FADD */ 6504 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6505 break; 6506 case 0x3: /* FSUB */ 6507 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6508 break; 6509 case 0x4: /* FMAX */ 6510 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6511 break; 6512 case 0x5: /* FMIN */ 6513 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6514 break; 6515 case 0x6: /* FMAXNM */ 6516 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6517 break; 6518 case 0x7: /* FMINNM */ 6519 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6520 break; 6521 case 0x8: /* FNMUL */ 6522 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6523 gen_helper_vfp_negs(tcg_res, tcg_res); 6524 break; 6525 } 6526 6527 write_fp_sreg(s, rd, tcg_res); 6528 } 6529 6530 /* Floating-point data-processing (2 source) - double precision */ 6531 static void handle_fp_2src_double(DisasContext *s, int opcode, 6532 int rd, int rn, int rm) 6533 { 6534 TCGv_i64 tcg_op1; 6535 TCGv_i64 tcg_op2; 6536 TCGv_i64 tcg_res; 6537 TCGv_ptr fpst; 6538 6539 tcg_res = tcg_temp_new_i64(); 6540 fpst = fpstatus_ptr(FPST_FPCR); 6541 tcg_op1 = read_fp_dreg(s, rn); 6542 tcg_op2 = read_fp_dreg(s, rm); 6543 6544 switch (opcode) { 6545 case 0x0: /* FMUL */ 6546 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6547 break; 6548 case 0x1: /* FDIV */ 6549 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6550 break; 6551 case 0x2: /* FADD */ 6552 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6553 break; 6554 case 0x3: /* FSUB */ 6555 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6556 break; 6557 case 0x4: /* FMAX */ 6558 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6559 break; 6560 case 0x5: /* FMIN */ 6561 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6562 break; 6563 case 0x6: /* FMAXNM */ 6564 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6565 break; 6566 case 0x7: /* FMINNM */ 6567 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6568 break; 6569 case 0x8: /* FNMUL */ 6570 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6571 gen_helper_vfp_negd(tcg_res, tcg_res); 6572 break; 6573 } 6574 6575 write_fp_dreg(s, rd, tcg_res); 6576 } 6577 6578 /* Floating-point data-processing (2 source) - half precision */ 6579 static void handle_fp_2src_half(DisasContext *s, int opcode, 6580 int rd, int rn, int rm) 6581 { 6582 TCGv_i32 tcg_op1; 6583 TCGv_i32 tcg_op2; 6584 TCGv_i32 tcg_res; 6585 TCGv_ptr fpst; 6586 6587 tcg_res = tcg_temp_new_i32(); 6588 fpst = fpstatus_ptr(FPST_FPCR_F16); 6589 tcg_op1 = read_fp_hreg(s, rn); 6590 tcg_op2 = read_fp_hreg(s, rm); 6591 6592 switch (opcode) { 6593 case 0x0: /* FMUL */ 6594 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6595 break; 6596 case 0x1: /* FDIV */ 6597 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6598 break; 6599 case 0x2: /* FADD */ 6600 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6601 break; 6602 case 0x3: /* FSUB */ 6603 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6604 break; 6605 case 0x4: /* FMAX */ 6606 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6607 break; 6608 case 0x5: /* FMIN */ 6609 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6610 break; 6611 case 0x6: /* FMAXNM */ 6612 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6613 break; 6614 case 0x7: /* FMINNM */ 6615 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6616 break; 6617 case 0x8: /* FNMUL */ 6618 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6619 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6620 break; 6621 default: 6622 g_assert_not_reached(); 6623 } 6624 6625 write_fp_sreg(s, rd, tcg_res); 6626 } 6627 6628 /* Floating point data-processing (2 source) 6629 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6630 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6631 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6632 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6633 */ 6634 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6635 { 6636 int mos = extract32(insn, 29, 3); 6637 int type = extract32(insn, 22, 2); 6638 int rd = extract32(insn, 0, 5); 6639 int rn = extract32(insn, 5, 5); 6640 int rm = extract32(insn, 16, 5); 6641 int opcode = extract32(insn, 12, 4); 6642 6643 if (opcode > 8 || mos) { 6644 unallocated_encoding(s); 6645 return; 6646 } 6647 6648 switch (type) { 6649 case 0: 6650 if (!fp_access_check(s)) { 6651 return; 6652 } 6653 handle_fp_2src_single(s, opcode, rd, rn, rm); 6654 break; 6655 case 1: 6656 if (!fp_access_check(s)) { 6657 return; 6658 } 6659 handle_fp_2src_double(s, opcode, rd, rn, rm); 6660 break; 6661 case 3: 6662 if (!dc_isar_feature(aa64_fp16, s)) { 6663 unallocated_encoding(s); 6664 return; 6665 } 6666 if (!fp_access_check(s)) { 6667 return; 6668 } 6669 handle_fp_2src_half(s, opcode, rd, rn, rm); 6670 break; 6671 default: 6672 unallocated_encoding(s); 6673 } 6674 } 6675 6676 /* Floating-point data-processing (3 source) - single precision */ 6677 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6678 int rd, int rn, int rm, int ra) 6679 { 6680 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6681 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6682 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6683 6684 tcg_op1 = read_fp_sreg(s, rn); 6685 tcg_op2 = read_fp_sreg(s, rm); 6686 tcg_op3 = read_fp_sreg(s, ra); 6687 6688 /* These are fused multiply-add, and must be done as one 6689 * floating point operation with no rounding between the 6690 * multiplication and addition steps. 6691 * NB that doing the negations here as separate steps is 6692 * correct : an input NaN should come out with its sign bit 6693 * flipped if it is a negated-input. 6694 */ 6695 if (o1 == true) { 6696 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6697 } 6698 6699 if (o0 != o1) { 6700 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6701 } 6702 6703 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6704 6705 write_fp_sreg(s, rd, tcg_res); 6706 } 6707 6708 /* Floating-point data-processing (3 source) - double precision */ 6709 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6710 int rd, int rn, int rm, int ra) 6711 { 6712 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6713 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6714 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6715 6716 tcg_op1 = read_fp_dreg(s, rn); 6717 tcg_op2 = read_fp_dreg(s, rm); 6718 tcg_op3 = read_fp_dreg(s, ra); 6719 6720 /* These are fused multiply-add, and must be done as one 6721 * floating point operation with no rounding between the 6722 * multiplication and addition steps. 6723 * NB that doing the negations here as separate steps is 6724 * correct : an input NaN should come out with its sign bit 6725 * flipped if it is a negated-input. 6726 */ 6727 if (o1 == true) { 6728 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6729 } 6730 6731 if (o0 != o1) { 6732 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6733 } 6734 6735 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6736 6737 write_fp_dreg(s, rd, tcg_res); 6738 } 6739 6740 /* Floating-point data-processing (3 source) - half precision */ 6741 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6742 int rd, int rn, int rm, int ra) 6743 { 6744 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6745 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6746 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6747 6748 tcg_op1 = read_fp_hreg(s, rn); 6749 tcg_op2 = read_fp_hreg(s, rm); 6750 tcg_op3 = read_fp_hreg(s, ra); 6751 6752 /* These are fused multiply-add, and must be done as one 6753 * floating point operation with no rounding between the 6754 * multiplication and addition steps. 6755 * NB that doing the negations here as separate steps is 6756 * correct : an input NaN should come out with its sign bit 6757 * flipped if it is a negated-input. 6758 */ 6759 if (o1 == true) { 6760 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6761 } 6762 6763 if (o0 != o1) { 6764 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6765 } 6766 6767 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6768 6769 write_fp_sreg(s, rd, tcg_res); 6770 } 6771 6772 /* Floating point data-processing (3 source) 6773 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6774 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6775 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6776 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6777 */ 6778 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6779 { 6780 int mos = extract32(insn, 29, 3); 6781 int type = extract32(insn, 22, 2); 6782 int rd = extract32(insn, 0, 5); 6783 int rn = extract32(insn, 5, 5); 6784 int ra = extract32(insn, 10, 5); 6785 int rm = extract32(insn, 16, 5); 6786 bool o0 = extract32(insn, 15, 1); 6787 bool o1 = extract32(insn, 21, 1); 6788 6789 if (mos) { 6790 unallocated_encoding(s); 6791 return; 6792 } 6793 6794 switch (type) { 6795 case 0: 6796 if (!fp_access_check(s)) { 6797 return; 6798 } 6799 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 6800 break; 6801 case 1: 6802 if (!fp_access_check(s)) { 6803 return; 6804 } 6805 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 6806 break; 6807 case 3: 6808 if (!dc_isar_feature(aa64_fp16, s)) { 6809 unallocated_encoding(s); 6810 return; 6811 } 6812 if (!fp_access_check(s)) { 6813 return; 6814 } 6815 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 6816 break; 6817 default: 6818 unallocated_encoding(s); 6819 } 6820 } 6821 6822 /* Floating point immediate 6823 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 6824 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6825 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 6826 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6827 */ 6828 static void disas_fp_imm(DisasContext *s, uint32_t insn) 6829 { 6830 int rd = extract32(insn, 0, 5); 6831 int imm5 = extract32(insn, 5, 5); 6832 int imm8 = extract32(insn, 13, 8); 6833 int type = extract32(insn, 22, 2); 6834 int mos = extract32(insn, 29, 3); 6835 uint64_t imm; 6836 MemOp sz; 6837 6838 if (mos || imm5) { 6839 unallocated_encoding(s); 6840 return; 6841 } 6842 6843 switch (type) { 6844 case 0: 6845 sz = MO_32; 6846 break; 6847 case 1: 6848 sz = MO_64; 6849 break; 6850 case 3: 6851 sz = MO_16; 6852 if (dc_isar_feature(aa64_fp16, s)) { 6853 break; 6854 } 6855 /* fallthru */ 6856 default: 6857 unallocated_encoding(s); 6858 return; 6859 } 6860 6861 if (!fp_access_check(s)) { 6862 return; 6863 } 6864 6865 imm = vfp_expand_imm(sz, imm8); 6866 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 6867 } 6868 6869 /* Handle floating point <=> fixed point conversions. Note that we can 6870 * also deal with fp <=> integer conversions as a special case (scale == 64) 6871 * OPTME: consider handling that special case specially or at least skipping 6872 * the call to scalbn in the helpers for zero shifts. 6873 */ 6874 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 6875 bool itof, int rmode, int scale, int sf, int type) 6876 { 6877 bool is_signed = !(opcode & 1); 6878 TCGv_ptr tcg_fpstatus; 6879 TCGv_i32 tcg_shift, tcg_single; 6880 TCGv_i64 tcg_double; 6881 6882 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 6883 6884 tcg_shift = tcg_constant_i32(64 - scale); 6885 6886 if (itof) { 6887 TCGv_i64 tcg_int = cpu_reg(s, rn); 6888 if (!sf) { 6889 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 6890 6891 if (is_signed) { 6892 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 6893 } else { 6894 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 6895 } 6896 6897 tcg_int = tcg_extend; 6898 } 6899 6900 switch (type) { 6901 case 1: /* float64 */ 6902 tcg_double = tcg_temp_new_i64(); 6903 if (is_signed) { 6904 gen_helper_vfp_sqtod(tcg_double, tcg_int, 6905 tcg_shift, tcg_fpstatus); 6906 } else { 6907 gen_helper_vfp_uqtod(tcg_double, tcg_int, 6908 tcg_shift, tcg_fpstatus); 6909 } 6910 write_fp_dreg(s, rd, tcg_double); 6911 break; 6912 6913 case 0: /* float32 */ 6914 tcg_single = tcg_temp_new_i32(); 6915 if (is_signed) { 6916 gen_helper_vfp_sqtos(tcg_single, tcg_int, 6917 tcg_shift, tcg_fpstatus); 6918 } else { 6919 gen_helper_vfp_uqtos(tcg_single, tcg_int, 6920 tcg_shift, tcg_fpstatus); 6921 } 6922 write_fp_sreg(s, rd, tcg_single); 6923 break; 6924 6925 case 3: /* float16 */ 6926 tcg_single = tcg_temp_new_i32(); 6927 if (is_signed) { 6928 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 6929 tcg_shift, tcg_fpstatus); 6930 } else { 6931 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 6932 tcg_shift, tcg_fpstatus); 6933 } 6934 write_fp_sreg(s, rd, tcg_single); 6935 break; 6936 6937 default: 6938 g_assert_not_reached(); 6939 } 6940 } else { 6941 TCGv_i64 tcg_int = cpu_reg(s, rd); 6942 TCGv_i32 tcg_rmode; 6943 6944 if (extract32(opcode, 2, 1)) { 6945 /* There are too many rounding modes to all fit into rmode, 6946 * so FCVTA[US] is a special case. 6947 */ 6948 rmode = FPROUNDING_TIEAWAY; 6949 } 6950 6951 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 6952 6953 switch (type) { 6954 case 1: /* float64 */ 6955 tcg_double = read_fp_dreg(s, rn); 6956 if (is_signed) { 6957 if (!sf) { 6958 gen_helper_vfp_tosld(tcg_int, tcg_double, 6959 tcg_shift, tcg_fpstatus); 6960 } else { 6961 gen_helper_vfp_tosqd(tcg_int, tcg_double, 6962 tcg_shift, tcg_fpstatus); 6963 } 6964 } else { 6965 if (!sf) { 6966 gen_helper_vfp_tould(tcg_int, tcg_double, 6967 tcg_shift, tcg_fpstatus); 6968 } else { 6969 gen_helper_vfp_touqd(tcg_int, tcg_double, 6970 tcg_shift, tcg_fpstatus); 6971 } 6972 } 6973 if (!sf) { 6974 tcg_gen_ext32u_i64(tcg_int, tcg_int); 6975 } 6976 break; 6977 6978 case 0: /* float32 */ 6979 tcg_single = read_fp_sreg(s, rn); 6980 if (sf) { 6981 if (is_signed) { 6982 gen_helper_vfp_tosqs(tcg_int, tcg_single, 6983 tcg_shift, tcg_fpstatus); 6984 } else { 6985 gen_helper_vfp_touqs(tcg_int, tcg_single, 6986 tcg_shift, tcg_fpstatus); 6987 } 6988 } else { 6989 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 6990 if (is_signed) { 6991 gen_helper_vfp_tosls(tcg_dest, tcg_single, 6992 tcg_shift, tcg_fpstatus); 6993 } else { 6994 gen_helper_vfp_touls(tcg_dest, tcg_single, 6995 tcg_shift, tcg_fpstatus); 6996 } 6997 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 6998 } 6999 break; 7000 7001 case 3: /* float16 */ 7002 tcg_single = read_fp_sreg(s, rn); 7003 if (sf) { 7004 if (is_signed) { 7005 gen_helper_vfp_tosqh(tcg_int, tcg_single, 7006 tcg_shift, tcg_fpstatus); 7007 } else { 7008 gen_helper_vfp_touqh(tcg_int, tcg_single, 7009 tcg_shift, tcg_fpstatus); 7010 } 7011 } else { 7012 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7013 if (is_signed) { 7014 gen_helper_vfp_toslh(tcg_dest, tcg_single, 7015 tcg_shift, tcg_fpstatus); 7016 } else { 7017 gen_helper_vfp_toulh(tcg_dest, tcg_single, 7018 tcg_shift, tcg_fpstatus); 7019 } 7020 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7021 } 7022 break; 7023 7024 default: 7025 g_assert_not_reached(); 7026 } 7027 7028 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 7029 } 7030 } 7031 7032 /* Floating point <-> fixed point conversions 7033 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7034 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7035 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 7036 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7037 */ 7038 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 7039 { 7040 int rd = extract32(insn, 0, 5); 7041 int rn = extract32(insn, 5, 5); 7042 int scale = extract32(insn, 10, 6); 7043 int opcode = extract32(insn, 16, 3); 7044 int rmode = extract32(insn, 19, 2); 7045 int type = extract32(insn, 22, 2); 7046 bool sbit = extract32(insn, 29, 1); 7047 bool sf = extract32(insn, 31, 1); 7048 bool itof; 7049 7050 if (sbit || (!sf && scale < 32)) { 7051 unallocated_encoding(s); 7052 return; 7053 } 7054 7055 switch (type) { 7056 case 0: /* float32 */ 7057 case 1: /* float64 */ 7058 break; 7059 case 3: /* float16 */ 7060 if (dc_isar_feature(aa64_fp16, s)) { 7061 break; 7062 } 7063 /* fallthru */ 7064 default: 7065 unallocated_encoding(s); 7066 return; 7067 } 7068 7069 switch ((rmode << 3) | opcode) { 7070 case 0x2: /* SCVTF */ 7071 case 0x3: /* UCVTF */ 7072 itof = true; 7073 break; 7074 case 0x18: /* FCVTZS */ 7075 case 0x19: /* FCVTZU */ 7076 itof = false; 7077 break; 7078 default: 7079 unallocated_encoding(s); 7080 return; 7081 } 7082 7083 if (!fp_access_check(s)) { 7084 return; 7085 } 7086 7087 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 7088 } 7089 7090 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 7091 { 7092 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 7093 * without conversion. 7094 */ 7095 7096 if (itof) { 7097 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7098 TCGv_i64 tmp; 7099 7100 switch (type) { 7101 case 0: 7102 /* 32 bit */ 7103 tmp = tcg_temp_new_i64(); 7104 tcg_gen_ext32u_i64(tmp, tcg_rn); 7105 write_fp_dreg(s, rd, tmp); 7106 break; 7107 case 1: 7108 /* 64 bit */ 7109 write_fp_dreg(s, rd, tcg_rn); 7110 break; 7111 case 2: 7112 /* 64 bit to top half. */ 7113 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); 7114 clear_vec_high(s, true, rd); 7115 break; 7116 case 3: 7117 /* 16 bit */ 7118 tmp = tcg_temp_new_i64(); 7119 tcg_gen_ext16u_i64(tmp, tcg_rn); 7120 write_fp_dreg(s, rd, tmp); 7121 break; 7122 default: 7123 g_assert_not_reached(); 7124 } 7125 } else { 7126 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7127 7128 switch (type) { 7129 case 0: 7130 /* 32 bit */ 7131 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); 7132 break; 7133 case 1: 7134 /* 64 bit */ 7135 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); 7136 break; 7137 case 2: 7138 /* 64 bits from top half */ 7139 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); 7140 break; 7141 case 3: 7142 /* 16 bit */ 7143 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); 7144 break; 7145 default: 7146 g_assert_not_reached(); 7147 } 7148 } 7149 } 7150 7151 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 7152 { 7153 TCGv_i64 t = read_fp_dreg(s, rn); 7154 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 7155 7156 gen_helper_fjcvtzs(t, t, fpstatus); 7157 7158 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 7159 tcg_gen_extrh_i64_i32(cpu_ZF, t); 7160 tcg_gen_movi_i32(cpu_CF, 0); 7161 tcg_gen_movi_i32(cpu_NF, 0); 7162 tcg_gen_movi_i32(cpu_VF, 0); 7163 } 7164 7165 /* Floating point <-> integer conversions 7166 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7167 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7168 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 7169 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7170 */ 7171 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 7172 { 7173 int rd = extract32(insn, 0, 5); 7174 int rn = extract32(insn, 5, 5); 7175 int opcode = extract32(insn, 16, 3); 7176 int rmode = extract32(insn, 19, 2); 7177 int type = extract32(insn, 22, 2); 7178 bool sbit = extract32(insn, 29, 1); 7179 bool sf = extract32(insn, 31, 1); 7180 bool itof = false; 7181 7182 if (sbit) { 7183 goto do_unallocated; 7184 } 7185 7186 switch (opcode) { 7187 case 2: /* SCVTF */ 7188 case 3: /* UCVTF */ 7189 itof = true; 7190 /* fallthru */ 7191 case 4: /* FCVTAS */ 7192 case 5: /* FCVTAU */ 7193 if (rmode != 0) { 7194 goto do_unallocated; 7195 } 7196 /* fallthru */ 7197 case 0: /* FCVT[NPMZ]S */ 7198 case 1: /* FCVT[NPMZ]U */ 7199 switch (type) { 7200 case 0: /* float32 */ 7201 case 1: /* float64 */ 7202 break; 7203 case 3: /* float16 */ 7204 if (!dc_isar_feature(aa64_fp16, s)) { 7205 goto do_unallocated; 7206 } 7207 break; 7208 default: 7209 goto do_unallocated; 7210 } 7211 if (!fp_access_check(s)) { 7212 return; 7213 } 7214 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 7215 break; 7216 7217 default: 7218 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 7219 case 0b01100110: /* FMOV half <-> 32-bit int */ 7220 case 0b01100111: 7221 case 0b11100110: /* FMOV half <-> 64-bit int */ 7222 case 0b11100111: 7223 if (!dc_isar_feature(aa64_fp16, s)) { 7224 goto do_unallocated; 7225 } 7226 /* fallthru */ 7227 case 0b00000110: /* FMOV 32-bit */ 7228 case 0b00000111: 7229 case 0b10100110: /* FMOV 64-bit */ 7230 case 0b10100111: 7231 case 0b11001110: /* FMOV top half of 128-bit */ 7232 case 0b11001111: 7233 if (!fp_access_check(s)) { 7234 return; 7235 } 7236 itof = opcode & 1; 7237 handle_fmov(s, rd, rn, type, itof); 7238 break; 7239 7240 case 0b00111110: /* FJCVTZS */ 7241 if (!dc_isar_feature(aa64_jscvt, s)) { 7242 goto do_unallocated; 7243 } else if (fp_access_check(s)) { 7244 handle_fjcvtzs(s, rd, rn); 7245 } 7246 break; 7247 7248 default: 7249 do_unallocated: 7250 unallocated_encoding(s); 7251 return; 7252 } 7253 break; 7254 } 7255 } 7256 7257 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 7258 * 31 30 29 28 25 24 0 7259 * +---+---+---+---------+-----------------------------+ 7260 * | | 0 | | 1 1 1 1 | | 7261 * +---+---+---+---------+-----------------------------+ 7262 */ 7263 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 7264 { 7265 if (extract32(insn, 24, 1)) { 7266 /* Floating point data-processing (3 source) */ 7267 disas_fp_3src(s, insn); 7268 } else if (extract32(insn, 21, 1) == 0) { 7269 /* Floating point to fixed point conversions */ 7270 disas_fp_fixed_conv(s, insn); 7271 } else { 7272 switch (extract32(insn, 10, 2)) { 7273 case 1: 7274 /* Floating point conditional compare */ 7275 disas_fp_ccomp(s, insn); 7276 break; 7277 case 2: 7278 /* Floating point data-processing (2 source) */ 7279 disas_fp_2src(s, insn); 7280 break; 7281 case 3: 7282 /* Floating point conditional select */ 7283 disas_fp_csel(s, insn); 7284 break; 7285 case 0: 7286 switch (ctz32(extract32(insn, 12, 4))) { 7287 case 0: /* [15:12] == xxx1 */ 7288 /* Floating point immediate */ 7289 disas_fp_imm(s, insn); 7290 break; 7291 case 1: /* [15:12] == xx10 */ 7292 /* Floating point compare */ 7293 disas_fp_compare(s, insn); 7294 break; 7295 case 2: /* [15:12] == x100 */ 7296 /* Floating point data-processing (1 source) */ 7297 disas_fp_1src(s, insn); 7298 break; 7299 case 3: /* [15:12] == 1000 */ 7300 unallocated_encoding(s); 7301 break; 7302 default: /* [15:12] == 0000 */ 7303 /* Floating point <-> integer conversions */ 7304 disas_fp_int_conv(s, insn); 7305 break; 7306 } 7307 break; 7308 } 7309 } 7310 } 7311 7312 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 7313 int pos) 7314 { 7315 /* Extract 64 bits from the middle of two concatenated 64 bit 7316 * vector register slices left:right. The extracted bits start 7317 * at 'pos' bits into the right (least significant) side. 7318 * We return the result in tcg_right, and guarantee not to 7319 * trash tcg_left. 7320 */ 7321 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7322 assert(pos > 0 && pos < 64); 7323 7324 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 7325 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 7326 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 7327 } 7328 7329 /* EXT 7330 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 7331 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7332 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 7333 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7334 */ 7335 static void disas_simd_ext(DisasContext *s, uint32_t insn) 7336 { 7337 int is_q = extract32(insn, 30, 1); 7338 int op2 = extract32(insn, 22, 2); 7339 int imm4 = extract32(insn, 11, 4); 7340 int rm = extract32(insn, 16, 5); 7341 int rn = extract32(insn, 5, 5); 7342 int rd = extract32(insn, 0, 5); 7343 int pos = imm4 << 3; 7344 TCGv_i64 tcg_resl, tcg_resh; 7345 7346 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 7347 unallocated_encoding(s); 7348 return; 7349 } 7350 7351 if (!fp_access_check(s)) { 7352 return; 7353 } 7354 7355 tcg_resh = tcg_temp_new_i64(); 7356 tcg_resl = tcg_temp_new_i64(); 7357 7358 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7359 * either extracting 128 bits from a 128:128 concatenation, or 7360 * extracting 64 bits from a 64:64 concatenation. 7361 */ 7362 if (!is_q) { 7363 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7364 if (pos != 0) { 7365 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7366 do_ext64(s, tcg_resh, tcg_resl, pos); 7367 } 7368 } else { 7369 TCGv_i64 tcg_hh; 7370 typedef struct { 7371 int reg; 7372 int elt; 7373 } EltPosns; 7374 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7375 EltPosns *elt = eltposns; 7376 7377 if (pos >= 64) { 7378 elt++; 7379 pos -= 64; 7380 } 7381 7382 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7383 elt++; 7384 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7385 elt++; 7386 if (pos != 0) { 7387 do_ext64(s, tcg_resh, tcg_resl, pos); 7388 tcg_hh = tcg_temp_new_i64(); 7389 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7390 do_ext64(s, tcg_hh, tcg_resh, pos); 7391 } 7392 } 7393 7394 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7395 if (is_q) { 7396 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7397 } 7398 clear_vec_high(s, is_q, rd); 7399 } 7400 7401 /* TBL/TBX 7402 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7403 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7404 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7405 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7406 */ 7407 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7408 { 7409 int op2 = extract32(insn, 22, 2); 7410 int is_q = extract32(insn, 30, 1); 7411 int rm = extract32(insn, 16, 5); 7412 int rn = extract32(insn, 5, 5); 7413 int rd = extract32(insn, 0, 5); 7414 int is_tbx = extract32(insn, 12, 1); 7415 int len = (extract32(insn, 13, 2) + 1) * 16; 7416 7417 if (op2 != 0) { 7418 unallocated_encoding(s); 7419 return; 7420 } 7421 7422 if (!fp_access_check(s)) { 7423 return; 7424 } 7425 7426 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7427 vec_full_reg_offset(s, rm), cpu_env, 7428 is_q ? 16 : 8, vec_full_reg_size(s), 7429 (len << 6) | (is_tbx << 5) | rn, 7430 gen_helper_simd_tblx); 7431 } 7432 7433 /* ZIP/UZP/TRN 7434 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7435 * +---+---+-------------+------+---+------+---+------------------+------+ 7436 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7437 * +---+---+-------------+------+---+------+---+------------------+------+ 7438 */ 7439 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7440 { 7441 int rd = extract32(insn, 0, 5); 7442 int rn = extract32(insn, 5, 5); 7443 int rm = extract32(insn, 16, 5); 7444 int size = extract32(insn, 22, 2); 7445 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7446 * bit 2 indicates 1 vs 2 variant of the insn. 7447 */ 7448 int opcode = extract32(insn, 12, 2); 7449 bool part = extract32(insn, 14, 1); 7450 bool is_q = extract32(insn, 30, 1); 7451 int esize = 8 << size; 7452 int i; 7453 int datasize = is_q ? 128 : 64; 7454 int elements = datasize / esize; 7455 TCGv_i64 tcg_res[2], tcg_ele; 7456 7457 if (opcode == 0 || (size == 3 && !is_q)) { 7458 unallocated_encoding(s); 7459 return; 7460 } 7461 7462 if (!fp_access_check(s)) { 7463 return; 7464 } 7465 7466 tcg_res[0] = tcg_temp_new_i64(); 7467 tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL; 7468 tcg_ele = tcg_temp_new_i64(); 7469 7470 for (i = 0; i < elements; i++) { 7471 int o, w; 7472 7473 switch (opcode) { 7474 case 1: /* UZP1/2 */ 7475 { 7476 int midpoint = elements / 2; 7477 if (i < midpoint) { 7478 read_vec_element(s, tcg_ele, rn, 2 * i + part, size); 7479 } else { 7480 read_vec_element(s, tcg_ele, rm, 7481 2 * (i - midpoint) + part, size); 7482 } 7483 break; 7484 } 7485 case 2: /* TRN1/2 */ 7486 if (i & 1) { 7487 read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size); 7488 } else { 7489 read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size); 7490 } 7491 break; 7492 case 3: /* ZIP1/2 */ 7493 { 7494 int base = part * elements / 2; 7495 if (i & 1) { 7496 read_vec_element(s, tcg_ele, rm, base + (i >> 1), size); 7497 } else { 7498 read_vec_element(s, tcg_ele, rn, base + (i >> 1), size); 7499 } 7500 break; 7501 } 7502 default: 7503 g_assert_not_reached(); 7504 } 7505 7506 w = (i * esize) / 64; 7507 o = (i * esize) % 64; 7508 if (o == 0) { 7509 tcg_gen_mov_i64(tcg_res[w], tcg_ele); 7510 } else { 7511 tcg_gen_shli_i64(tcg_ele, tcg_ele, o); 7512 tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele); 7513 } 7514 } 7515 7516 for (i = 0; i <= is_q; ++i) { 7517 write_vec_element(s, tcg_res[i], rd, i, MO_64); 7518 } 7519 clear_vec_high(s, is_q, rd); 7520 } 7521 7522 /* 7523 * do_reduction_op helper 7524 * 7525 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7526 * important for correct NaN propagation that we do these 7527 * operations in exactly the order specified by the pseudocode. 7528 * 7529 * This is a recursive function, TCG temps should be freed by the 7530 * calling function once it is done with the values. 7531 */ 7532 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7533 int esize, int size, int vmap, TCGv_ptr fpst) 7534 { 7535 if (esize == size) { 7536 int element; 7537 MemOp msize = esize == 16 ? MO_16 : MO_32; 7538 TCGv_i32 tcg_elem; 7539 7540 /* We should have one register left here */ 7541 assert(ctpop8(vmap) == 1); 7542 element = ctz32(vmap); 7543 assert(element < 8); 7544 7545 tcg_elem = tcg_temp_new_i32(); 7546 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7547 return tcg_elem; 7548 } else { 7549 int bits = size / 2; 7550 int shift = ctpop8(vmap) / 2; 7551 int vmap_lo = (vmap >> shift) & vmap; 7552 int vmap_hi = (vmap & ~vmap_lo); 7553 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7554 7555 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7556 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7557 tcg_res = tcg_temp_new_i32(); 7558 7559 switch (fpopcode) { 7560 case 0x0c: /* fmaxnmv half-precision */ 7561 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7562 break; 7563 case 0x0f: /* fmaxv half-precision */ 7564 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7565 break; 7566 case 0x1c: /* fminnmv half-precision */ 7567 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7568 break; 7569 case 0x1f: /* fminv half-precision */ 7570 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7571 break; 7572 case 0x2c: /* fmaxnmv */ 7573 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7574 break; 7575 case 0x2f: /* fmaxv */ 7576 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7577 break; 7578 case 0x3c: /* fminnmv */ 7579 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7580 break; 7581 case 0x3f: /* fminv */ 7582 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7583 break; 7584 default: 7585 g_assert_not_reached(); 7586 } 7587 return tcg_res; 7588 } 7589 } 7590 7591 /* AdvSIMD across lanes 7592 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7593 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7594 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7595 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7596 */ 7597 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7598 { 7599 int rd = extract32(insn, 0, 5); 7600 int rn = extract32(insn, 5, 5); 7601 int size = extract32(insn, 22, 2); 7602 int opcode = extract32(insn, 12, 5); 7603 bool is_q = extract32(insn, 30, 1); 7604 bool is_u = extract32(insn, 29, 1); 7605 bool is_fp = false; 7606 bool is_min = false; 7607 int esize; 7608 int elements; 7609 int i; 7610 TCGv_i64 tcg_res, tcg_elt; 7611 7612 switch (opcode) { 7613 case 0x1b: /* ADDV */ 7614 if (is_u) { 7615 unallocated_encoding(s); 7616 return; 7617 } 7618 /* fall through */ 7619 case 0x3: /* SADDLV, UADDLV */ 7620 case 0xa: /* SMAXV, UMAXV */ 7621 case 0x1a: /* SMINV, UMINV */ 7622 if (size == 3 || (size == 2 && !is_q)) { 7623 unallocated_encoding(s); 7624 return; 7625 } 7626 break; 7627 case 0xc: /* FMAXNMV, FMINNMV */ 7628 case 0xf: /* FMAXV, FMINV */ 7629 /* Bit 1 of size field encodes min vs max and the actual size 7630 * depends on the encoding of the U bit. If not set (and FP16 7631 * enabled) then we do half-precision float instead of single 7632 * precision. 7633 */ 7634 is_min = extract32(size, 1, 1); 7635 is_fp = true; 7636 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7637 size = 1; 7638 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7639 unallocated_encoding(s); 7640 return; 7641 } else { 7642 size = 2; 7643 } 7644 break; 7645 default: 7646 unallocated_encoding(s); 7647 return; 7648 } 7649 7650 if (!fp_access_check(s)) { 7651 return; 7652 } 7653 7654 esize = 8 << size; 7655 elements = (is_q ? 128 : 64) / esize; 7656 7657 tcg_res = tcg_temp_new_i64(); 7658 tcg_elt = tcg_temp_new_i64(); 7659 7660 /* These instructions operate across all lanes of a vector 7661 * to produce a single result. We can guarantee that a 64 7662 * bit intermediate is sufficient: 7663 * + for [US]ADDLV the maximum element size is 32 bits, and 7664 * the result type is 64 bits 7665 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7666 * same as the element size, which is 32 bits at most 7667 * For the integer operations we can choose to work at 64 7668 * or 32 bits and truncate at the end; for simplicity 7669 * we use 64 bits always. The floating point 7670 * ops do require 32 bit intermediates, though. 7671 */ 7672 if (!is_fp) { 7673 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7674 7675 for (i = 1; i < elements; i++) { 7676 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7677 7678 switch (opcode) { 7679 case 0x03: /* SADDLV / UADDLV */ 7680 case 0x1b: /* ADDV */ 7681 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7682 break; 7683 case 0x0a: /* SMAXV / UMAXV */ 7684 if (is_u) { 7685 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7686 } else { 7687 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7688 } 7689 break; 7690 case 0x1a: /* SMINV / UMINV */ 7691 if (is_u) { 7692 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7693 } else { 7694 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7695 } 7696 break; 7697 default: 7698 g_assert_not_reached(); 7699 } 7700 7701 } 7702 } else { 7703 /* Floating point vector reduction ops which work across 32 7704 * bit (single) or 16 bit (half-precision) intermediates. 7705 * Note that correct NaN propagation requires that we do these 7706 * operations in exactly the order specified by the pseudocode. 7707 */ 7708 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7709 int fpopcode = opcode | is_min << 4 | is_u << 5; 7710 int vmap = (1 << elements) - 1; 7711 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7712 (is_q ? 128 : 64), vmap, fpst); 7713 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7714 } 7715 7716 /* Now truncate the result to the width required for the final output */ 7717 if (opcode == 0x03) { 7718 /* SADDLV, UADDLV: result is 2*esize */ 7719 size++; 7720 } 7721 7722 switch (size) { 7723 case 0: 7724 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7725 break; 7726 case 1: 7727 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7728 break; 7729 case 2: 7730 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7731 break; 7732 case 3: 7733 break; 7734 default: 7735 g_assert_not_reached(); 7736 } 7737 7738 write_fp_dreg(s, rd, tcg_res); 7739 } 7740 7741 /* DUP (Element, Vector) 7742 * 7743 * 31 30 29 21 20 16 15 10 9 5 4 0 7744 * +---+---+-------------------+--------+-------------+------+------+ 7745 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7746 * +---+---+-------------------+--------+-------------+------+------+ 7747 * 7748 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7749 */ 7750 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7751 int imm5) 7752 { 7753 int size = ctz32(imm5); 7754 int index; 7755 7756 if (size > 3 || (size == 3 && !is_q)) { 7757 unallocated_encoding(s); 7758 return; 7759 } 7760 7761 if (!fp_access_check(s)) { 7762 return; 7763 } 7764 7765 index = imm5 >> (size + 1); 7766 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 7767 vec_reg_offset(s, rn, index, size), 7768 is_q ? 16 : 8, vec_full_reg_size(s)); 7769 } 7770 7771 /* DUP (element, scalar) 7772 * 31 21 20 16 15 10 9 5 4 0 7773 * +-----------------------+--------+-------------+------+------+ 7774 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7775 * +-----------------------+--------+-------------+------+------+ 7776 */ 7777 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 7778 int imm5) 7779 { 7780 int size = ctz32(imm5); 7781 int index; 7782 TCGv_i64 tmp; 7783 7784 if (size > 3) { 7785 unallocated_encoding(s); 7786 return; 7787 } 7788 7789 if (!fp_access_check(s)) { 7790 return; 7791 } 7792 7793 index = imm5 >> (size + 1); 7794 7795 /* This instruction just extracts the specified element and 7796 * zero-extends it into the bottom of the destination register. 7797 */ 7798 tmp = tcg_temp_new_i64(); 7799 read_vec_element(s, tmp, rn, index, size); 7800 write_fp_dreg(s, rd, tmp); 7801 } 7802 7803 /* DUP (General) 7804 * 7805 * 31 30 29 21 20 16 15 10 9 5 4 0 7806 * +---+---+-------------------+--------+-------------+------+------+ 7807 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 7808 * +---+---+-------------------+--------+-------------+------+------+ 7809 * 7810 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7811 */ 7812 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 7813 int imm5) 7814 { 7815 int size = ctz32(imm5); 7816 uint32_t dofs, oprsz, maxsz; 7817 7818 if (size > 3 || ((size == 3) && !is_q)) { 7819 unallocated_encoding(s); 7820 return; 7821 } 7822 7823 if (!fp_access_check(s)) { 7824 return; 7825 } 7826 7827 dofs = vec_full_reg_offset(s, rd); 7828 oprsz = is_q ? 16 : 8; 7829 maxsz = vec_full_reg_size(s); 7830 7831 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 7832 } 7833 7834 /* INS (Element) 7835 * 7836 * 31 21 20 16 15 14 11 10 9 5 4 0 7837 * +-----------------------+--------+------------+---+------+------+ 7838 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7839 * +-----------------------+--------+------------+---+------+------+ 7840 * 7841 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7842 * index: encoded in imm5<4:size+1> 7843 */ 7844 static void handle_simd_inse(DisasContext *s, int rd, int rn, 7845 int imm4, int imm5) 7846 { 7847 int size = ctz32(imm5); 7848 int src_index, dst_index; 7849 TCGv_i64 tmp; 7850 7851 if (size > 3) { 7852 unallocated_encoding(s); 7853 return; 7854 } 7855 7856 if (!fp_access_check(s)) { 7857 return; 7858 } 7859 7860 dst_index = extract32(imm5, 1+size, 5); 7861 src_index = extract32(imm4, size, 4); 7862 7863 tmp = tcg_temp_new_i64(); 7864 7865 read_vec_element(s, tmp, rn, src_index, size); 7866 write_vec_element(s, tmp, rd, dst_index, size); 7867 7868 /* INS is considered a 128-bit write for SVE. */ 7869 clear_vec_high(s, true, rd); 7870 } 7871 7872 7873 /* INS (General) 7874 * 7875 * 31 21 20 16 15 10 9 5 4 0 7876 * +-----------------------+--------+-------------+------+------+ 7877 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 7878 * +-----------------------+--------+-------------+------+------+ 7879 * 7880 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7881 * index: encoded in imm5<4:size+1> 7882 */ 7883 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 7884 { 7885 int size = ctz32(imm5); 7886 int idx; 7887 7888 if (size > 3) { 7889 unallocated_encoding(s); 7890 return; 7891 } 7892 7893 if (!fp_access_check(s)) { 7894 return; 7895 } 7896 7897 idx = extract32(imm5, 1 + size, 4 - size); 7898 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 7899 7900 /* INS is considered a 128-bit write for SVE. */ 7901 clear_vec_high(s, true, rd); 7902 } 7903 7904 /* 7905 * UMOV (General) 7906 * SMOV (General) 7907 * 7908 * 31 30 29 21 20 16 15 12 10 9 5 4 0 7909 * +---+---+-------------------+--------+-------------+------+------+ 7910 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 7911 * +---+---+-------------------+--------+-------------+------+------+ 7912 * 7913 * U: unsigned when set 7914 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7915 */ 7916 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 7917 int rn, int rd, int imm5) 7918 { 7919 int size = ctz32(imm5); 7920 int element; 7921 TCGv_i64 tcg_rd; 7922 7923 /* Check for UnallocatedEncodings */ 7924 if (is_signed) { 7925 if (size > 2 || (size == 2 && !is_q)) { 7926 unallocated_encoding(s); 7927 return; 7928 } 7929 } else { 7930 if (size > 3 7931 || (size < 3 && is_q) 7932 || (size == 3 && !is_q)) { 7933 unallocated_encoding(s); 7934 return; 7935 } 7936 } 7937 7938 if (!fp_access_check(s)) { 7939 return; 7940 } 7941 7942 element = extract32(imm5, 1+size, 4); 7943 7944 tcg_rd = cpu_reg(s, rd); 7945 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 7946 if (is_signed && !is_q) { 7947 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 7948 } 7949 } 7950 7951 /* AdvSIMD copy 7952 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 7953 * +---+---+----+-----------------+------+---+------+---+------+------+ 7954 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 7955 * +---+---+----+-----------------+------+---+------+---+------+------+ 7956 */ 7957 static void disas_simd_copy(DisasContext *s, uint32_t insn) 7958 { 7959 int rd = extract32(insn, 0, 5); 7960 int rn = extract32(insn, 5, 5); 7961 int imm4 = extract32(insn, 11, 4); 7962 int op = extract32(insn, 29, 1); 7963 int is_q = extract32(insn, 30, 1); 7964 int imm5 = extract32(insn, 16, 5); 7965 7966 if (op) { 7967 if (is_q) { 7968 /* INS (element) */ 7969 handle_simd_inse(s, rd, rn, imm4, imm5); 7970 } else { 7971 unallocated_encoding(s); 7972 } 7973 } else { 7974 switch (imm4) { 7975 case 0: 7976 /* DUP (element - vector) */ 7977 handle_simd_dupe(s, is_q, rd, rn, imm5); 7978 break; 7979 case 1: 7980 /* DUP (general) */ 7981 handle_simd_dupg(s, is_q, rd, rn, imm5); 7982 break; 7983 case 3: 7984 if (is_q) { 7985 /* INS (general) */ 7986 handle_simd_insg(s, rd, rn, imm5); 7987 } else { 7988 unallocated_encoding(s); 7989 } 7990 break; 7991 case 5: 7992 case 7: 7993 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 7994 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 7995 break; 7996 default: 7997 unallocated_encoding(s); 7998 break; 7999 } 8000 } 8001 } 8002 8003 /* AdvSIMD modified immediate 8004 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 8005 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8006 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 8007 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8008 * 8009 * There are a number of operations that can be carried out here: 8010 * MOVI - move (shifted) imm into register 8011 * MVNI - move inverted (shifted) imm into register 8012 * ORR - bitwise OR of (shifted) imm with register 8013 * BIC - bitwise clear of (shifted) imm with register 8014 * With ARMv8.2 we also have: 8015 * FMOV half-precision 8016 */ 8017 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 8018 { 8019 int rd = extract32(insn, 0, 5); 8020 int cmode = extract32(insn, 12, 4); 8021 int o2 = extract32(insn, 11, 1); 8022 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 8023 bool is_neg = extract32(insn, 29, 1); 8024 bool is_q = extract32(insn, 30, 1); 8025 uint64_t imm = 0; 8026 8027 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 8028 /* Check for FMOV (vector, immediate) - half-precision */ 8029 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 8030 unallocated_encoding(s); 8031 return; 8032 } 8033 } 8034 8035 if (!fp_access_check(s)) { 8036 return; 8037 } 8038 8039 if (cmode == 15 && o2 && !is_neg) { 8040 /* FMOV (vector, immediate) - half-precision */ 8041 imm = vfp_expand_imm(MO_16, abcdefgh); 8042 /* now duplicate across the lanes */ 8043 imm = dup_const(MO_16, imm); 8044 } else { 8045 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 8046 } 8047 8048 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 8049 /* MOVI or MVNI, with MVNI negation handled above. */ 8050 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 8051 vec_full_reg_size(s), imm); 8052 } else { 8053 /* ORR or BIC, with BIC negation to AND handled above. */ 8054 if (is_neg) { 8055 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 8056 } else { 8057 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 8058 } 8059 } 8060 } 8061 8062 /* AdvSIMD scalar copy 8063 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8064 * +-----+----+-----------------+------+---+------+---+------+------+ 8065 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8066 * +-----+----+-----------------+------+---+------+---+------+------+ 8067 */ 8068 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 8069 { 8070 int rd = extract32(insn, 0, 5); 8071 int rn = extract32(insn, 5, 5); 8072 int imm4 = extract32(insn, 11, 4); 8073 int imm5 = extract32(insn, 16, 5); 8074 int op = extract32(insn, 29, 1); 8075 8076 if (op != 0 || imm4 != 0) { 8077 unallocated_encoding(s); 8078 return; 8079 } 8080 8081 /* DUP (element, scalar) */ 8082 handle_simd_dupes(s, rd, rn, imm5); 8083 } 8084 8085 /* AdvSIMD scalar pairwise 8086 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 8087 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8088 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 8089 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8090 */ 8091 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 8092 { 8093 int u = extract32(insn, 29, 1); 8094 int size = extract32(insn, 22, 2); 8095 int opcode = extract32(insn, 12, 5); 8096 int rn = extract32(insn, 5, 5); 8097 int rd = extract32(insn, 0, 5); 8098 TCGv_ptr fpst; 8099 8100 /* For some ops (the FP ones), size[1] is part of the encoding. 8101 * For ADDP strictly it is not but size[1] is always 1 for valid 8102 * encodings. 8103 */ 8104 opcode |= (extract32(size, 1, 1) << 5); 8105 8106 switch (opcode) { 8107 case 0x3b: /* ADDP */ 8108 if (u || size != 3) { 8109 unallocated_encoding(s); 8110 return; 8111 } 8112 if (!fp_access_check(s)) { 8113 return; 8114 } 8115 8116 fpst = NULL; 8117 break; 8118 case 0xc: /* FMAXNMP */ 8119 case 0xd: /* FADDP */ 8120 case 0xf: /* FMAXP */ 8121 case 0x2c: /* FMINNMP */ 8122 case 0x2f: /* FMINP */ 8123 /* FP op, size[0] is 32 or 64 bit*/ 8124 if (!u) { 8125 if (!dc_isar_feature(aa64_fp16, s)) { 8126 unallocated_encoding(s); 8127 return; 8128 } else { 8129 size = MO_16; 8130 } 8131 } else { 8132 size = extract32(size, 0, 1) ? MO_64 : MO_32; 8133 } 8134 8135 if (!fp_access_check(s)) { 8136 return; 8137 } 8138 8139 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8140 break; 8141 default: 8142 unallocated_encoding(s); 8143 return; 8144 } 8145 8146 if (size == MO_64) { 8147 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8148 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8149 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8150 8151 read_vec_element(s, tcg_op1, rn, 0, MO_64); 8152 read_vec_element(s, tcg_op2, rn, 1, MO_64); 8153 8154 switch (opcode) { 8155 case 0x3b: /* ADDP */ 8156 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 8157 break; 8158 case 0xc: /* FMAXNMP */ 8159 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8160 break; 8161 case 0xd: /* FADDP */ 8162 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8163 break; 8164 case 0xf: /* FMAXP */ 8165 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8166 break; 8167 case 0x2c: /* FMINNMP */ 8168 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8169 break; 8170 case 0x2f: /* FMINP */ 8171 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8172 break; 8173 default: 8174 g_assert_not_reached(); 8175 } 8176 8177 write_fp_dreg(s, rd, tcg_res); 8178 } else { 8179 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8180 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8181 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8182 8183 read_vec_element_i32(s, tcg_op1, rn, 0, size); 8184 read_vec_element_i32(s, tcg_op2, rn, 1, size); 8185 8186 if (size == MO_16) { 8187 switch (opcode) { 8188 case 0xc: /* FMAXNMP */ 8189 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8190 break; 8191 case 0xd: /* FADDP */ 8192 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 8193 break; 8194 case 0xf: /* FMAXP */ 8195 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 8196 break; 8197 case 0x2c: /* FMINNMP */ 8198 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8199 break; 8200 case 0x2f: /* FMINP */ 8201 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 8202 break; 8203 default: 8204 g_assert_not_reached(); 8205 } 8206 } else { 8207 switch (opcode) { 8208 case 0xc: /* FMAXNMP */ 8209 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8210 break; 8211 case 0xd: /* FADDP */ 8212 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8213 break; 8214 case 0xf: /* FMAXP */ 8215 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8216 break; 8217 case 0x2c: /* FMINNMP */ 8218 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8219 break; 8220 case 0x2f: /* FMINP */ 8221 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8222 break; 8223 default: 8224 g_assert_not_reached(); 8225 } 8226 } 8227 8228 write_fp_sreg(s, rd, tcg_res); 8229 } 8230 } 8231 8232 /* 8233 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 8234 * 8235 * This code is handles the common shifting code and is used by both 8236 * the vector and scalar code. 8237 */ 8238 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 8239 TCGv_i64 tcg_rnd, bool accumulate, 8240 bool is_u, int size, int shift) 8241 { 8242 bool extended_result = false; 8243 bool round = tcg_rnd != NULL; 8244 int ext_lshift = 0; 8245 TCGv_i64 tcg_src_hi; 8246 8247 if (round && size == 3) { 8248 extended_result = true; 8249 ext_lshift = 64 - shift; 8250 tcg_src_hi = tcg_temp_new_i64(); 8251 } else if (shift == 64) { 8252 if (!accumulate && is_u) { 8253 /* result is zero */ 8254 tcg_gen_movi_i64(tcg_res, 0); 8255 return; 8256 } 8257 } 8258 8259 /* Deal with the rounding step */ 8260 if (round) { 8261 if (extended_result) { 8262 TCGv_i64 tcg_zero = tcg_constant_i64(0); 8263 if (!is_u) { 8264 /* take care of sign extending tcg_res */ 8265 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 8266 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8267 tcg_src, tcg_src_hi, 8268 tcg_rnd, tcg_zero); 8269 } else { 8270 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8271 tcg_src, tcg_zero, 8272 tcg_rnd, tcg_zero); 8273 } 8274 } else { 8275 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 8276 } 8277 } 8278 8279 /* Now do the shift right */ 8280 if (round && extended_result) { 8281 /* extended case, >64 bit precision required */ 8282 if (ext_lshift == 0) { 8283 /* special case, only high bits matter */ 8284 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 8285 } else { 8286 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8287 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 8288 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 8289 } 8290 } else { 8291 if (is_u) { 8292 if (shift == 64) { 8293 /* essentially shifting in 64 zeros */ 8294 tcg_gen_movi_i64(tcg_src, 0); 8295 } else { 8296 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8297 } 8298 } else { 8299 if (shift == 64) { 8300 /* effectively extending the sign-bit */ 8301 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 8302 } else { 8303 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 8304 } 8305 } 8306 } 8307 8308 if (accumulate) { 8309 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 8310 } else { 8311 tcg_gen_mov_i64(tcg_res, tcg_src); 8312 } 8313 } 8314 8315 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 8316 static void handle_scalar_simd_shri(DisasContext *s, 8317 bool is_u, int immh, int immb, 8318 int opcode, int rn, int rd) 8319 { 8320 const int size = 3; 8321 int immhb = immh << 3 | immb; 8322 int shift = 2 * (8 << size) - immhb; 8323 bool accumulate = false; 8324 bool round = false; 8325 bool insert = false; 8326 TCGv_i64 tcg_rn; 8327 TCGv_i64 tcg_rd; 8328 TCGv_i64 tcg_round; 8329 8330 if (!extract32(immh, 3, 1)) { 8331 unallocated_encoding(s); 8332 return; 8333 } 8334 8335 if (!fp_access_check(s)) { 8336 return; 8337 } 8338 8339 switch (opcode) { 8340 case 0x02: /* SSRA / USRA (accumulate) */ 8341 accumulate = true; 8342 break; 8343 case 0x04: /* SRSHR / URSHR (rounding) */ 8344 round = true; 8345 break; 8346 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 8347 accumulate = round = true; 8348 break; 8349 case 0x08: /* SRI */ 8350 insert = true; 8351 break; 8352 } 8353 8354 if (round) { 8355 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8356 } else { 8357 tcg_round = NULL; 8358 } 8359 8360 tcg_rn = read_fp_dreg(s, rn); 8361 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8362 8363 if (insert) { 8364 /* shift count same as element size is valid but does nothing; 8365 * special case to avoid potential shift by 64. 8366 */ 8367 int esize = 8 << size; 8368 if (shift != esize) { 8369 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8370 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8371 } 8372 } else { 8373 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8374 accumulate, is_u, size, shift); 8375 } 8376 8377 write_fp_dreg(s, rd, tcg_rd); 8378 } 8379 8380 /* SHL/SLI - Scalar shift left */ 8381 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8382 int immh, int immb, int opcode, 8383 int rn, int rd) 8384 { 8385 int size = 32 - clz32(immh) - 1; 8386 int immhb = immh << 3 | immb; 8387 int shift = immhb - (8 << size); 8388 TCGv_i64 tcg_rn; 8389 TCGv_i64 tcg_rd; 8390 8391 if (!extract32(immh, 3, 1)) { 8392 unallocated_encoding(s); 8393 return; 8394 } 8395 8396 if (!fp_access_check(s)) { 8397 return; 8398 } 8399 8400 tcg_rn = read_fp_dreg(s, rn); 8401 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8402 8403 if (insert) { 8404 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8405 } else { 8406 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8407 } 8408 8409 write_fp_dreg(s, rd, tcg_rd); 8410 } 8411 8412 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8413 * (signed/unsigned) narrowing */ 8414 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8415 bool is_u_shift, bool is_u_narrow, 8416 int immh, int immb, int opcode, 8417 int rn, int rd) 8418 { 8419 int immhb = immh << 3 | immb; 8420 int size = 32 - clz32(immh) - 1; 8421 int esize = 8 << size; 8422 int shift = (2 * esize) - immhb; 8423 int elements = is_scalar ? 1 : (64 / esize); 8424 bool round = extract32(opcode, 0, 1); 8425 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8426 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8427 TCGv_i32 tcg_rd_narrowed; 8428 TCGv_i64 tcg_final; 8429 8430 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8431 { gen_helper_neon_narrow_sat_s8, 8432 gen_helper_neon_unarrow_sat8 }, 8433 { gen_helper_neon_narrow_sat_s16, 8434 gen_helper_neon_unarrow_sat16 }, 8435 { gen_helper_neon_narrow_sat_s32, 8436 gen_helper_neon_unarrow_sat32 }, 8437 { NULL, NULL }, 8438 }; 8439 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8440 gen_helper_neon_narrow_sat_u8, 8441 gen_helper_neon_narrow_sat_u16, 8442 gen_helper_neon_narrow_sat_u32, 8443 NULL 8444 }; 8445 NeonGenNarrowEnvFn *narrowfn; 8446 8447 int i; 8448 8449 assert(size < 4); 8450 8451 if (extract32(immh, 3, 1)) { 8452 unallocated_encoding(s); 8453 return; 8454 } 8455 8456 if (!fp_access_check(s)) { 8457 return; 8458 } 8459 8460 if (is_u_shift) { 8461 narrowfn = unsigned_narrow_fns[size]; 8462 } else { 8463 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8464 } 8465 8466 tcg_rn = tcg_temp_new_i64(); 8467 tcg_rd = tcg_temp_new_i64(); 8468 tcg_rd_narrowed = tcg_temp_new_i32(); 8469 tcg_final = tcg_temp_new_i64(); 8470 8471 if (round) { 8472 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8473 } else { 8474 tcg_round = NULL; 8475 } 8476 8477 for (i = 0; i < elements; i++) { 8478 read_vec_element(s, tcg_rn, rn, i, ldop); 8479 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8480 false, is_u_shift, size+1, shift); 8481 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); 8482 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8483 if (i == 0) { 8484 tcg_gen_mov_i64(tcg_final, tcg_rd); 8485 } else { 8486 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8487 } 8488 } 8489 8490 if (!is_q) { 8491 write_vec_element(s, tcg_final, rd, 0, MO_64); 8492 } else { 8493 write_vec_element(s, tcg_final, rd, 1, MO_64); 8494 } 8495 clear_vec_high(s, is_q, rd); 8496 } 8497 8498 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8499 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8500 bool src_unsigned, bool dst_unsigned, 8501 int immh, int immb, int rn, int rd) 8502 { 8503 int immhb = immh << 3 | immb; 8504 int size = 32 - clz32(immh) - 1; 8505 int shift = immhb - (8 << size); 8506 int pass; 8507 8508 assert(immh != 0); 8509 assert(!(scalar && is_q)); 8510 8511 if (!scalar) { 8512 if (!is_q && extract32(immh, 3, 1)) { 8513 unallocated_encoding(s); 8514 return; 8515 } 8516 8517 /* Since we use the variable-shift helpers we must 8518 * replicate the shift count into each element of 8519 * the tcg_shift value. 8520 */ 8521 switch (size) { 8522 case 0: 8523 shift |= shift << 8; 8524 /* fall through */ 8525 case 1: 8526 shift |= shift << 16; 8527 break; 8528 case 2: 8529 case 3: 8530 break; 8531 default: 8532 g_assert_not_reached(); 8533 } 8534 } 8535 8536 if (!fp_access_check(s)) { 8537 return; 8538 } 8539 8540 if (size == 3) { 8541 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8542 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8543 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8544 { NULL, gen_helper_neon_qshl_u64 }, 8545 }; 8546 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8547 int maxpass = is_q ? 2 : 1; 8548 8549 for (pass = 0; pass < maxpass; pass++) { 8550 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8551 8552 read_vec_element(s, tcg_op, rn, pass, MO_64); 8553 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8554 write_vec_element(s, tcg_op, rd, pass, MO_64); 8555 } 8556 clear_vec_high(s, is_q, rd); 8557 } else { 8558 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8559 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8560 { 8561 { gen_helper_neon_qshl_s8, 8562 gen_helper_neon_qshl_s16, 8563 gen_helper_neon_qshl_s32 }, 8564 { gen_helper_neon_qshlu_s8, 8565 gen_helper_neon_qshlu_s16, 8566 gen_helper_neon_qshlu_s32 } 8567 }, { 8568 { NULL, NULL, NULL }, 8569 { gen_helper_neon_qshl_u8, 8570 gen_helper_neon_qshl_u16, 8571 gen_helper_neon_qshl_u32 } 8572 } 8573 }; 8574 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8575 MemOp memop = scalar ? size : MO_32; 8576 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8577 8578 for (pass = 0; pass < maxpass; pass++) { 8579 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8580 8581 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8582 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8583 if (scalar) { 8584 switch (size) { 8585 case 0: 8586 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8587 break; 8588 case 1: 8589 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8590 break; 8591 case 2: 8592 break; 8593 default: 8594 g_assert_not_reached(); 8595 } 8596 write_fp_sreg(s, rd, tcg_op); 8597 } else { 8598 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8599 } 8600 } 8601 8602 if (!scalar) { 8603 clear_vec_high(s, is_q, rd); 8604 } 8605 } 8606 } 8607 8608 /* Common vector code for handling integer to FP conversion */ 8609 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8610 int elements, int is_signed, 8611 int fracbits, int size) 8612 { 8613 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8614 TCGv_i32 tcg_shift = NULL; 8615 8616 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8617 int pass; 8618 8619 if (fracbits || size == MO_64) { 8620 tcg_shift = tcg_constant_i32(fracbits); 8621 } 8622 8623 if (size == MO_64) { 8624 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8625 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8626 8627 for (pass = 0; pass < elements; pass++) { 8628 read_vec_element(s, tcg_int64, rn, pass, mop); 8629 8630 if (is_signed) { 8631 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8632 tcg_shift, tcg_fpst); 8633 } else { 8634 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8635 tcg_shift, tcg_fpst); 8636 } 8637 if (elements == 1) { 8638 write_fp_dreg(s, rd, tcg_double); 8639 } else { 8640 write_vec_element(s, tcg_double, rd, pass, MO_64); 8641 } 8642 } 8643 } else { 8644 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8645 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8646 8647 for (pass = 0; pass < elements; pass++) { 8648 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8649 8650 switch (size) { 8651 case MO_32: 8652 if (fracbits) { 8653 if (is_signed) { 8654 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8655 tcg_shift, tcg_fpst); 8656 } else { 8657 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8658 tcg_shift, tcg_fpst); 8659 } 8660 } else { 8661 if (is_signed) { 8662 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8663 } else { 8664 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8665 } 8666 } 8667 break; 8668 case MO_16: 8669 if (fracbits) { 8670 if (is_signed) { 8671 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8672 tcg_shift, tcg_fpst); 8673 } else { 8674 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8675 tcg_shift, tcg_fpst); 8676 } 8677 } else { 8678 if (is_signed) { 8679 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8680 } else { 8681 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8682 } 8683 } 8684 break; 8685 default: 8686 g_assert_not_reached(); 8687 } 8688 8689 if (elements == 1) { 8690 write_fp_sreg(s, rd, tcg_float); 8691 } else { 8692 write_vec_element_i32(s, tcg_float, rd, pass, size); 8693 } 8694 } 8695 } 8696 8697 clear_vec_high(s, elements << size == 16, rd); 8698 } 8699 8700 /* UCVTF/SCVTF - Integer to FP conversion */ 8701 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8702 bool is_q, bool is_u, 8703 int immh, int immb, int opcode, 8704 int rn, int rd) 8705 { 8706 int size, elements, fracbits; 8707 int immhb = immh << 3 | immb; 8708 8709 if (immh & 8) { 8710 size = MO_64; 8711 if (!is_scalar && !is_q) { 8712 unallocated_encoding(s); 8713 return; 8714 } 8715 } else if (immh & 4) { 8716 size = MO_32; 8717 } else if (immh & 2) { 8718 size = MO_16; 8719 if (!dc_isar_feature(aa64_fp16, s)) { 8720 unallocated_encoding(s); 8721 return; 8722 } 8723 } else { 8724 /* immh == 0 would be a failure of the decode logic */ 8725 g_assert(immh == 1); 8726 unallocated_encoding(s); 8727 return; 8728 } 8729 8730 if (is_scalar) { 8731 elements = 1; 8732 } else { 8733 elements = (8 << is_q) >> size; 8734 } 8735 fracbits = (16 << size) - immhb; 8736 8737 if (!fp_access_check(s)) { 8738 return; 8739 } 8740 8741 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 8742 } 8743 8744 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 8745 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 8746 bool is_q, bool is_u, 8747 int immh, int immb, int rn, int rd) 8748 { 8749 int immhb = immh << 3 | immb; 8750 int pass, size, fracbits; 8751 TCGv_ptr tcg_fpstatus; 8752 TCGv_i32 tcg_rmode, tcg_shift; 8753 8754 if (immh & 0x8) { 8755 size = MO_64; 8756 if (!is_scalar && !is_q) { 8757 unallocated_encoding(s); 8758 return; 8759 } 8760 } else if (immh & 0x4) { 8761 size = MO_32; 8762 } else if (immh & 0x2) { 8763 size = MO_16; 8764 if (!dc_isar_feature(aa64_fp16, s)) { 8765 unallocated_encoding(s); 8766 return; 8767 } 8768 } else { 8769 /* Should have split out AdvSIMD modified immediate earlier. */ 8770 assert(immh == 1); 8771 unallocated_encoding(s); 8772 return; 8773 } 8774 8775 if (!fp_access_check(s)) { 8776 return; 8777 } 8778 8779 assert(!(is_scalar && is_q)); 8780 8781 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8782 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, tcg_fpstatus); 8783 fracbits = (16 << size) - immhb; 8784 tcg_shift = tcg_constant_i32(fracbits); 8785 8786 if (size == MO_64) { 8787 int maxpass = is_scalar ? 1 : 2; 8788 8789 for (pass = 0; pass < maxpass; pass++) { 8790 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8791 8792 read_vec_element(s, tcg_op, rn, pass, MO_64); 8793 if (is_u) { 8794 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8795 } else { 8796 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8797 } 8798 write_vec_element(s, tcg_op, rd, pass, MO_64); 8799 } 8800 clear_vec_high(s, is_q, rd); 8801 } else { 8802 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 8803 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 8804 8805 switch (size) { 8806 case MO_16: 8807 if (is_u) { 8808 fn = gen_helper_vfp_touhh; 8809 } else { 8810 fn = gen_helper_vfp_toshh; 8811 } 8812 break; 8813 case MO_32: 8814 if (is_u) { 8815 fn = gen_helper_vfp_touls; 8816 } else { 8817 fn = gen_helper_vfp_tosls; 8818 } 8819 break; 8820 default: 8821 g_assert_not_reached(); 8822 } 8823 8824 for (pass = 0; pass < maxpass; pass++) { 8825 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8826 8827 read_vec_element_i32(s, tcg_op, rn, pass, size); 8828 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 8829 if (is_scalar) { 8830 write_fp_sreg(s, rd, tcg_op); 8831 } else { 8832 write_vec_element_i32(s, tcg_op, rd, pass, size); 8833 } 8834 } 8835 if (!is_scalar) { 8836 clear_vec_high(s, is_q, rd); 8837 } 8838 } 8839 8840 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 8841 } 8842 8843 /* AdvSIMD scalar shift by immediate 8844 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 8845 * +-----+---+-------------+------+------+--------+---+------+------+ 8846 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 8847 * +-----+---+-------------+------+------+--------+---+------+------+ 8848 * 8849 * This is the scalar version so it works on a fixed sized registers 8850 */ 8851 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 8852 { 8853 int rd = extract32(insn, 0, 5); 8854 int rn = extract32(insn, 5, 5); 8855 int opcode = extract32(insn, 11, 5); 8856 int immb = extract32(insn, 16, 3); 8857 int immh = extract32(insn, 19, 4); 8858 bool is_u = extract32(insn, 29, 1); 8859 8860 if (immh == 0) { 8861 unallocated_encoding(s); 8862 return; 8863 } 8864 8865 switch (opcode) { 8866 case 0x08: /* SRI */ 8867 if (!is_u) { 8868 unallocated_encoding(s); 8869 return; 8870 } 8871 /* fall through */ 8872 case 0x00: /* SSHR / USHR */ 8873 case 0x02: /* SSRA / USRA */ 8874 case 0x04: /* SRSHR / URSHR */ 8875 case 0x06: /* SRSRA / URSRA */ 8876 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 8877 break; 8878 case 0x0a: /* SHL / SLI */ 8879 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 8880 break; 8881 case 0x1c: /* SCVTF, UCVTF */ 8882 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 8883 opcode, rn, rd); 8884 break; 8885 case 0x10: /* SQSHRUN, SQSHRUN2 */ 8886 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 8887 if (!is_u) { 8888 unallocated_encoding(s); 8889 return; 8890 } 8891 handle_vec_simd_sqshrn(s, true, false, false, true, 8892 immh, immb, opcode, rn, rd); 8893 break; 8894 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 8895 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 8896 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 8897 immh, immb, opcode, rn, rd); 8898 break; 8899 case 0xc: /* SQSHLU */ 8900 if (!is_u) { 8901 unallocated_encoding(s); 8902 return; 8903 } 8904 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 8905 break; 8906 case 0xe: /* SQSHL, UQSHL */ 8907 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 8908 break; 8909 case 0x1f: /* FCVTZS, FCVTZU */ 8910 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 8911 break; 8912 default: 8913 unallocated_encoding(s); 8914 break; 8915 } 8916 } 8917 8918 /* AdvSIMD scalar three different 8919 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 8920 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8921 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 8922 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 8923 */ 8924 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 8925 { 8926 bool is_u = extract32(insn, 29, 1); 8927 int size = extract32(insn, 22, 2); 8928 int opcode = extract32(insn, 12, 4); 8929 int rm = extract32(insn, 16, 5); 8930 int rn = extract32(insn, 5, 5); 8931 int rd = extract32(insn, 0, 5); 8932 8933 if (is_u) { 8934 unallocated_encoding(s); 8935 return; 8936 } 8937 8938 switch (opcode) { 8939 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8940 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8941 case 0xd: /* SQDMULL, SQDMULL2 */ 8942 if (size == 0 || size == 3) { 8943 unallocated_encoding(s); 8944 return; 8945 } 8946 break; 8947 default: 8948 unallocated_encoding(s); 8949 return; 8950 } 8951 8952 if (!fp_access_check(s)) { 8953 return; 8954 } 8955 8956 if (size == 2) { 8957 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8958 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8959 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8960 8961 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 8962 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 8963 8964 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 8965 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); 8966 8967 switch (opcode) { 8968 case 0xd: /* SQDMULL, SQDMULL2 */ 8969 break; 8970 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8971 tcg_gen_neg_i64(tcg_res, tcg_res); 8972 /* fall through */ 8973 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8974 read_vec_element(s, tcg_op1, rd, 0, MO_64); 8975 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, 8976 tcg_res, tcg_op1); 8977 break; 8978 default: 8979 g_assert_not_reached(); 8980 } 8981 8982 write_fp_dreg(s, rd, tcg_res); 8983 } else { 8984 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 8985 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 8986 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8987 8988 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 8989 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); 8990 8991 switch (opcode) { 8992 case 0xd: /* SQDMULL, SQDMULL2 */ 8993 break; 8994 case 0xb: /* SQDMLSL, SQDMLSL2 */ 8995 gen_helper_neon_negl_u32(tcg_res, tcg_res); 8996 /* fall through */ 8997 case 0x9: /* SQDMLAL, SQDMLAL2 */ 8998 { 8999 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 9000 read_vec_element(s, tcg_op3, rd, 0, MO_32); 9001 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, 9002 tcg_res, tcg_op3); 9003 break; 9004 } 9005 default: 9006 g_assert_not_reached(); 9007 } 9008 9009 tcg_gen_ext32u_i64(tcg_res, tcg_res); 9010 write_fp_dreg(s, rd, tcg_res); 9011 } 9012 } 9013 9014 static void handle_3same_64(DisasContext *s, int opcode, bool u, 9015 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 9016 { 9017 /* Handle 64x64->64 opcodes which are shared between the scalar 9018 * and vector 3-same groups. We cover every opcode where size == 3 9019 * is valid in either the three-reg-same (integer, not pairwise) 9020 * or scalar-three-reg-same groups. 9021 */ 9022 TCGCond cond; 9023 9024 switch (opcode) { 9025 case 0x1: /* SQADD */ 9026 if (u) { 9027 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9028 } else { 9029 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9030 } 9031 break; 9032 case 0x5: /* SQSUB */ 9033 if (u) { 9034 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9035 } else { 9036 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9037 } 9038 break; 9039 case 0x6: /* CMGT, CMHI */ 9040 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. 9041 * We implement this using setcond (test) and then negating. 9042 */ 9043 cond = u ? TCG_COND_GTU : TCG_COND_GT; 9044 do_cmop: 9045 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 9046 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9047 break; 9048 case 0x7: /* CMGE, CMHS */ 9049 cond = u ? TCG_COND_GEU : TCG_COND_GE; 9050 goto do_cmop; 9051 case 0x11: /* CMTST, CMEQ */ 9052 if (u) { 9053 cond = TCG_COND_EQ; 9054 goto do_cmop; 9055 } 9056 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 9057 break; 9058 case 0x8: /* SSHL, USHL */ 9059 if (u) { 9060 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 9061 } else { 9062 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 9063 } 9064 break; 9065 case 0x9: /* SQSHL, UQSHL */ 9066 if (u) { 9067 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9068 } else { 9069 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9070 } 9071 break; 9072 case 0xa: /* SRSHL, URSHL */ 9073 if (u) { 9074 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 9075 } else { 9076 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 9077 } 9078 break; 9079 case 0xb: /* SQRSHL, UQRSHL */ 9080 if (u) { 9081 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9082 } else { 9083 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9084 } 9085 break; 9086 case 0x10: /* ADD, SUB */ 9087 if (u) { 9088 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 9089 } else { 9090 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 9091 } 9092 break; 9093 default: 9094 g_assert_not_reached(); 9095 } 9096 } 9097 9098 /* Handle the 3-same-operands float operations; shared by the scalar 9099 * and vector encodings. The caller must filter out any encodings 9100 * not allocated for the encoding it is dealing with. 9101 */ 9102 static void handle_3same_float(DisasContext *s, int size, int elements, 9103 int fpopcode, int rd, int rn, int rm) 9104 { 9105 int pass; 9106 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9107 9108 for (pass = 0; pass < elements; pass++) { 9109 if (size) { 9110 /* Double */ 9111 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9112 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9113 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9114 9115 read_vec_element(s, tcg_op1, rn, pass, MO_64); 9116 read_vec_element(s, tcg_op2, rm, pass, MO_64); 9117 9118 switch (fpopcode) { 9119 case 0x39: /* FMLS */ 9120 /* As usual for ARM, separate negation for fused multiply-add */ 9121 gen_helper_vfp_negd(tcg_op1, tcg_op1); 9122 /* fall through */ 9123 case 0x19: /* FMLA */ 9124 read_vec_element(s, tcg_res, rd, pass, MO_64); 9125 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 9126 tcg_res, fpst); 9127 break; 9128 case 0x18: /* FMAXNM */ 9129 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9130 break; 9131 case 0x1a: /* FADD */ 9132 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 9133 break; 9134 case 0x1b: /* FMULX */ 9135 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 9136 break; 9137 case 0x1c: /* FCMEQ */ 9138 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9139 break; 9140 case 0x1e: /* FMAX */ 9141 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 9142 break; 9143 case 0x1f: /* FRECPS */ 9144 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9145 break; 9146 case 0x38: /* FMINNM */ 9147 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9148 break; 9149 case 0x3a: /* FSUB */ 9150 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9151 break; 9152 case 0x3e: /* FMIN */ 9153 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 9154 break; 9155 case 0x3f: /* FRSQRTS */ 9156 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9157 break; 9158 case 0x5b: /* FMUL */ 9159 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 9160 break; 9161 case 0x5c: /* FCMGE */ 9162 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9163 break; 9164 case 0x5d: /* FACGE */ 9165 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9166 break; 9167 case 0x5f: /* FDIV */ 9168 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 9169 break; 9170 case 0x7a: /* FABD */ 9171 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9172 gen_helper_vfp_absd(tcg_res, tcg_res); 9173 break; 9174 case 0x7c: /* FCMGT */ 9175 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9176 break; 9177 case 0x7d: /* FACGT */ 9178 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9179 break; 9180 default: 9181 g_assert_not_reached(); 9182 } 9183 9184 write_vec_element(s, tcg_res, rd, pass, MO_64); 9185 } else { 9186 /* Single */ 9187 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 9188 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 9189 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9190 9191 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 9192 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 9193 9194 switch (fpopcode) { 9195 case 0x39: /* FMLS */ 9196 /* As usual for ARM, separate negation for fused multiply-add */ 9197 gen_helper_vfp_negs(tcg_op1, tcg_op1); 9198 /* fall through */ 9199 case 0x19: /* FMLA */ 9200 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9201 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 9202 tcg_res, fpst); 9203 break; 9204 case 0x1a: /* FADD */ 9205 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 9206 break; 9207 case 0x1b: /* FMULX */ 9208 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 9209 break; 9210 case 0x1c: /* FCMEQ */ 9211 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9212 break; 9213 case 0x1e: /* FMAX */ 9214 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 9215 break; 9216 case 0x1f: /* FRECPS */ 9217 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9218 break; 9219 case 0x18: /* FMAXNM */ 9220 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 9221 break; 9222 case 0x38: /* FMINNM */ 9223 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 9224 break; 9225 case 0x3a: /* FSUB */ 9226 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9227 break; 9228 case 0x3e: /* FMIN */ 9229 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 9230 break; 9231 case 0x3f: /* FRSQRTS */ 9232 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9233 break; 9234 case 0x5b: /* FMUL */ 9235 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 9236 break; 9237 case 0x5c: /* FCMGE */ 9238 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9239 break; 9240 case 0x5d: /* FACGE */ 9241 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9242 break; 9243 case 0x5f: /* FDIV */ 9244 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 9245 break; 9246 case 0x7a: /* FABD */ 9247 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9248 gen_helper_vfp_abss(tcg_res, tcg_res); 9249 break; 9250 case 0x7c: /* FCMGT */ 9251 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9252 break; 9253 case 0x7d: /* FACGT */ 9254 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9255 break; 9256 default: 9257 g_assert_not_reached(); 9258 } 9259 9260 if (elements == 1) { 9261 /* scalar single so clear high part */ 9262 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9263 9264 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 9265 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 9266 } else { 9267 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9268 } 9269 } 9270 } 9271 9272 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 9273 } 9274 9275 /* AdvSIMD scalar three same 9276 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 9277 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9278 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 9279 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9280 */ 9281 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 9282 { 9283 int rd = extract32(insn, 0, 5); 9284 int rn = extract32(insn, 5, 5); 9285 int opcode = extract32(insn, 11, 5); 9286 int rm = extract32(insn, 16, 5); 9287 int size = extract32(insn, 22, 2); 9288 bool u = extract32(insn, 29, 1); 9289 TCGv_i64 tcg_rd; 9290 9291 if (opcode >= 0x18) { 9292 /* Floating point: U, size[1] and opcode indicate operation */ 9293 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 9294 switch (fpopcode) { 9295 case 0x1b: /* FMULX */ 9296 case 0x1f: /* FRECPS */ 9297 case 0x3f: /* FRSQRTS */ 9298 case 0x5d: /* FACGE */ 9299 case 0x7d: /* FACGT */ 9300 case 0x1c: /* FCMEQ */ 9301 case 0x5c: /* FCMGE */ 9302 case 0x7c: /* FCMGT */ 9303 case 0x7a: /* FABD */ 9304 break; 9305 default: 9306 unallocated_encoding(s); 9307 return; 9308 } 9309 9310 if (!fp_access_check(s)) { 9311 return; 9312 } 9313 9314 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 9315 return; 9316 } 9317 9318 switch (opcode) { 9319 case 0x1: /* SQADD, UQADD */ 9320 case 0x5: /* SQSUB, UQSUB */ 9321 case 0x9: /* SQSHL, UQSHL */ 9322 case 0xb: /* SQRSHL, UQRSHL */ 9323 break; 9324 case 0x8: /* SSHL, USHL */ 9325 case 0xa: /* SRSHL, URSHL */ 9326 case 0x6: /* CMGT, CMHI */ 9327 case 0x7: /* CMGE, CMHS */ 9328 case 0x11: /* CMTST, CMEQ */ 9329 case 0x10: /* ADD, SUB (vector) */ 9330 if (size != 3) { 9331 unallocated_encoding(s); 9332 return; 9333 } 9334 break; 9335 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 9336 if (size != 1 && size != 2) { 9337 unallocated_encoding(s); 9338 return; 9339 } 9340 break; 9341 default: 9342 unallocated_encoding(s); 9343 return; 9344 } 9345 9346 if (!fp_access_check(s)) { 9347 return; 9348 } 9349 9350 tcg_rd = tcg_temp_new_i64(); 9351 9352 if (size == 3) { 9353 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9354 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 9355 9356 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 9357 } else { 9358 /* Do a single operation on the lowest element in the vector. 9359 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 9360 * no side effects for all these operations. 9361 * OPTME: special-purpose helpers would avoid doing some 9362 * unnecessary work in the helper for the 8 and 16 bit cases. 9363 */ 9364 NeonGenTwoOpEnvFn *genenvfn; 9365 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9366 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9367 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9368 9369 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9370 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9371 9372 switch (opcode) { 9373 case 0x1: /* SQADD, UQADD */ 9374 { 9375 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9376 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9377 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9378 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9379 }; 9380 genenvfn = fns[size][u]; 9381 break; 9382 } 9383 case 0x5: /* SQSUB, UQSUB */ 9384 { 9385 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9386 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9387 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9388 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9389 }; 9390 genenvfn = fns[size][u]; 9391 break; 9392 } 9393 case 0x9: /* SQSHL, UQSHL */ 9394 { 9395 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9396 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9397 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9398 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9399 }; 9400 genenvfn = fns[size][u]; 9401 break; 9402 } 9403 case 0xb: /* SQRSHL, UQRSHL */ 9404 { 9405 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9406 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9407 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9408 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9409 }; 9410 genenvfn = fns[size][u]; 9411 break; 9412 } 9413 case 0x16: /* SQDMULH, SQRDMULH */ 9414 { 9415 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9416 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9417 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9418 }; 9419 assert(size == 1 || size == 2); 9420 genenvfn = fns[size - 1][u]; 9421 break; 9422 } 9423 default: 9424 g_assert_not_reached(); 9425 } 9426 9427 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); 9428 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9429 } 9430 9431 write_fp_dreg(s, rd, tcg_rd); 9432 } 9433 9434 /* AdvSIMD scalar three same FP16 9435 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9436 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9437 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9438 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9439 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9440 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9441 */ 9442 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9443 uint32_t insn) 9444 { 9445 int rd = extract32(insn, 0, 5); 9446 int rn = extract32(insn, 5, 5); 9447 int opcode = extract32(insn, 11, 3); 9448 int rm = extract32(insn, 16, 5); 9449 bool u = extract32(insn, 29, 1); 9450 bool a = extract32(insn, 23, 1); 9451 int fpopcode = opcode | (a << 3) | (u << 4); 9452 TCGv_ptr fpst; 9453 TCGv_i32 tcg_op1; 9454 TCGv_i32 tcg_op2; 9455 TCGv_i32 tcg_res; 9456 9457 switch (fpopcode) { 9458 case 0x03: /* FMULX */ 9459 case 0x04: /* FCMEQ (reg) */ 9460 case 0x07: /* FRECPS */ 9461 case 0x0f: /* FRSQRTS */ 9462 case 0x14: /* FCMGE (reg) */ 9463 case 0x15: /* FACGE */ 9464 case 0x1a: /* FABD */ 9465 case 0x1c: /* FCMGT (reg) */ 9466 case 0x1d: /* FACGT */ 9467 break; 9468 default: 9469 unallocated_encoding(s); 9470 return; 9471 } 9472 9473 if (!dc_isar_feature(aa64_fp16, s)) { 9474 unallocated_encoding(s); 9475 } 9476 9477 if (!fp_access_check(s)) { 9478 return; 9479 } 9480 9481 fpst = fpstatus_ptr(FPST_FPCR_F16); 9482 9483 tcg_op1 = read_fp_hreg(s, rn); 9484 tcg_op2 = read_fp_hreg(s, rm); 9485 tcg_res = tcg_temp_new_i32(); 9486 9487 switch (fpopcode) { 9488 case 0x03: /* FMULX */ 9489 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9490 break; 9491 case 0x04: /* FCMEQ (reg) */ 9492 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9493 break; 9494 case 0x07: /* FRECPS */ 9495 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9496 break; 9497 case 0x0f: /* FRSQRTS */ 9498 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9499 break; 9500 case 0x14: /* FCMGE (reg) */ 9501 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9502 break; 9503 case 0x15: /* FACGE */ 9504 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9505 break; 9506 case 0x1a: /* FABD */ 9507 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9508 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9509 break; 9510 case 0x1c: /* FCMGT (reg) */ 9511 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9512 break; 9513 case 0x1d: /* FACGT */ 9514 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9515 break; 9516 default: 9517 g_assert_not_reached(); 9518 } 9519 9520 write_fp_sreg(s, rd, tcg_res); 9521 } 9522 9523 /* AdvSIMD scalar three same extra 9524 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9525 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9526 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9527 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9528 */ 9529 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9530 uint32_t insn) 9531 { 9532 int rd = extract32(insn, 0, 5); 9533 int rn = extract32(insn, 5, 5); 9534 int opcode = extract32(insn, 11, 4); 9535 int rm = extract32(insn, 16, 5); 9536 int size = extract32(insn, 22, 2); 9537 bool u = extract32(insn, 29, 1); 9538 TCGv_i32 ele1, ele2, ele3; 9539 TCGv_i64 res; 9540 bool feature; 9541 9542 switch (u * 16 + opcode) { 9543 case 0x10: /* SQRDMLAH (vector) */ 9544 case 0x11: /* SQRDMLSH (vector) */ 9545 if (size != 1 && size != 2) { 9546 unallocated_encoding(s); 9547 return; 9548 } 9549 feature = dc_isar_feature(aa64_rdm, s); 9550 break; 9551 default: 9552 unallocated_encoding(s); 9553 return; 9554 } 9555 if (!feature) { 9556 unallocated_encoding(s); 9557 return; 9558 } 9559 if (!fp_access_check(s)) { 9560 return; 9561 } 9562 9563 /* Do a single operation on the lowest element in the vector. 9564 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9565 * with no side effects for all these operations. 9566 * OPTME: special-purpose helpers would avoid doing some 9567 * unnecessary work in the helper for the 16 bit cases. 9568 */ 9569 ele1 = tcg_temp_new_i32(); 9570 ele2 = tcg_temp_new_i32(); 9571 ele3 = tcg_temp_new_i32(); 9572 9573 read_vec_element_i32(s, ele1, rn, 0, size); 9574 read_vec_element_i32(s, ele2, rm, 0, size); 9575 read_vec_element_i32(s, ele3, rd, 0, size); 9576 9577 switch (opcode) { 9578 case 0x0: /* SQRDMLAH */ 9579 if (size == 1) { 9580 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3); 9581 } else { 9582 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3); 9583 } 9584 break; 9585 case 0x1: /* SQRDMLSH */ 9586 if (size == 1) { 9587 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3); 9588 } else { 9589 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3); 9590 } 9591 break; 9592 default: 9593 g_assert_not_reached(); 9594 } 9595 9596 res = tcg_temp_new_i64(); 9597 tcg_gen_extu_i32_i64(res, ele3); 9598 write_fp_dreg(s, rd, res); 9599 } 9600 9601 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9602 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9603 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9604 { 9605 /* Handle 64->64 opcodes which are shared between the scalar and 9606 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9607 * is valid in either group and also the double-precision fp ops. 9608 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9609 * requires them. 9610 */ 9611 TCGCond cond; 9612 9613 switch (opcode) { 9614 case 0x4: /* CLS, CLZ */ 9615 if (u) { 9616 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9617 } else { 9618 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9619 } 9620 break; 9621 case 0x5: /* NOT */ 9622 /* This opcode is shared with CNT and RBIT but we have earlier 9623 * enforced that size == 3 if and only if this is the NOT insn. 9624 */ 9625 tcg_gen_not_i64(tcg_rd, tcg_rn); 9626 break; 9627 case 0x7: /* SQABS, SQNEG */ 9628 if (u) { 9629 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); 9630 } else { 9631 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); 9632 } 9633 break; 9634 case 0xa: /* CMLT */ 9635 /* 64 bit integer comparison against zero, result is 9636 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and 9637 * subtracting 1. 9638 */ 9639 cond = TCG_COND_LT; 9640 do_cmop: 9641 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); 9642 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9643 break; 9644 case 0x8: /* CMGT, CMGE */ 9645 cond = u ? TCG_COND_GE : TCG_COND_GT; 9646 goto do_cmop; 9647 case 0x9: /* CMEQ, CMLE */ 9648 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9649 goto do_cmop; 9650 case 0xb: /* ABS, NEG */ 9651 if (u) { 9652 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9653 } else { 9654 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9655 } 9656 break; 9657 case 0x2f: /* FABS */ 9658 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9659 break; 9660 case 0x6f: /* FNEG */ 9661 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9662 break; 9663 case 0x7f: /* FSQRT */ 9664 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); 9665 break; 9666 case 0x1a: /* FCVTNS */ 9667 case 0x1b: /* FCVTMS */ 9668 case 0x1c: /* FCVTAS */ 9669 case 0x3a: /* FCVTPS */ 9670 case 0x3b: /* FCVTZS */ 9671 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9672 break; 9673 case 0x5a: /* FCVTNU */ 9674 case 0x5b: /* FCVTMU */ 9675 case 0x5c: /* FCVTAU */ 9676 case 0x7a: /* FCVTPU */ 9677 case 0x7b: /* FCVTZU */ 9678 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9679 break; 9680 case 0x18: /* FRINTN */ 9681 case 0x19: /* FRINTM */ 9682 case 0x38: /* FRINTP */ 9683 case 0x39: /* FRINTZ */ 9684 case 0x58: /* FRINTA */ 9685 case 0x79: /* FRINTI */ 9686 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 9687 break; 9688 case 0x59: /* FRINTX */ 9689 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 9690 break; 9691 case 0x1e: /* FRINT32Z */ 9692 case 0x5e: /* FRINT32X */ 9693 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 9694 break; 9695 case 0x1f: /* FRINT64Z */ 9696 case 0x5f: /* FRINT64X */ 9697 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 9698 break; 9699 default: 9700 g_assert_not_reached(); 9701 } 9702 } 9703 9704 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 9705 bool is_scalar, bool is_u, bool is_q, 9706 int size, int rn, int rd) 9707 { 9708 bool is_double = (size == MO_64); 9709 TCGv_ptr fpst; 9710 9711 if (!fp_access_check(s)) { 9712 return; 9713 } 9714 9715 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9716 9717 if (is_double) { 9718 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9719 TCGv_i64 tcg_zero = tcg_constant_i64(0); 9720 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9721 NeonGenTwoDoubleOpFn *genfn; 9722 bool swap = false; 9723 int pass; 9724 9725 switch (opcode) { 9726 case 0x2e: /* FCMLT (zero) */ 9727 swap = true; 9728 /* fallthrough */ 9729 case 0x2c: /* FCMGT (zero) */ 9730 genfn = gen_helper_neon_cgt_f64; 9731 break; 9732 case 0x2d: /* FCMEQ (zero) */ 9733 genfn = gen_helper_neon_ceq_f64; 9734 break; 9735 case 0x6d: /* FCMLE (zero) */ 9736 swap = true; 9737 /* fall through */ 9738 case 0x6c: /* FCMGE (zero) */ 9739 genfn = gen_helper_neon_cge_f64; 9740 break; 9741 default: 9742 g_assert_not_reached(); 9743 } 9744 9745 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9746 read_vec_element(s, tcg_op, rn, pass, MO_64); 9747 if (swap) { 9748 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9749 } else { 9750 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9751 } 9752 write_vec_element(s, tcg_res, rd, pass, MO_64); 9753 } 9754 9755 clear_vec_high(s, !is_scalar, rd); 9756 } else { 9757 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9758 TCGv_i32 tcg_zero = tcg_constant_i32(0); 9759 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9760 NeonGenTwoSingleOpFn *genfn; 9761 bool swap = false; 9762 int pass, maxpasses; 9763 9764 if (size == MO_16) { 9765 switch (opcode) { 9766 case 0x2e: /* FCMLT (zero) */ 9767 swap = true; 9768 /* fall through */ 9769 case 0x2c: /* FCMGT (zero) */ 9770 genfn = gen_helper_advsimd_cgt_f16; 9771 break; 9772 case 0x2d: /* FCMEQ (zero) */ 9773 genfn = gen_helper_advsimd_ceq_f16; 9774 break; 9775 case 0x6d: /* FCMLE (zero) */ 9776 swap = true; 9777 /* fall through */ 9778 case 0x6c: /* FCMGE (zero) */ 9779 genfn = gen_helper_advsimd_cge_f16; 9780 break; 9781 default: 9782 g_assert_not_reached(); 9783 } 9784 } else { 9785 switch (opcode) { 9786 case 0x2e: /* FCMLT (zero) */ 9787 swap = true; 9788 /* fall through */ 9789 case 0x2c: /* FCMGT (zero) */ 9790 genfn = gen_helper_neon_cgt_f32; 9791 break; 9792 case 0x2d: /* FCMEQ (zero) */ 9793 genfn = gen_helper_neon_ceq_f32; 9794 break; 9795 case 0x6d: /* FCMLE (zero) */ 9796 swap = true; 9797 /* fall through */ 9798 case 0x6c: /* FCMGE (zero) */ 9799 genfn = gen_helper_neon_cge_f32; 9800 break; 9801 default: 9802 g_assert_not_reached(); 9803 } 9804 } 9805 9806 if (is_scalar) { 9807 maxpasses = 1; 9808 } else { 9809 int vector_size = 8 << is_q; 9810 maxpasses = vector_size >> size; 9811 } 9812 9813 for (pass = 0; pass < maxpasses; pass++) { 9814 read_vec_element_i32(s, tcg_op, rn, pass, size); 9815 if (swap) { 9816 genfn(tcg_res, tcg_zero, tcg_op, fpst); 9817 } else { 9818 genfn(tcg_res, tcg_op, tcg_zero, fpst); 9819 } 9820 if (is_scalar) { 9821 write_fp_sreg(s, rd, tcg_res); 9822 } else { 9823 write_vec_element_i32(s, tcg_res, rd, pass, size); 9824 } 9825 } 9826 9827 if (!is_scalar) { 9828 clear_vec_high(s, is_q, rd); 9829 } 9830 } 9831 } 9832 9833 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 9834 bool is_scalar, bool is_u, bool is_q, 9835 int size, int rn, int rd) 9836 { 9837 bool is_double = (size == 3); 9838 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9839 9840 if (is_double) { 9841 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9842 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9843 int pass; 9844 9845 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 9846 read_vec_element(s, tcg_op, rn, pass, MO_64); 9847 switch (opcode) { 9848 case 0x3d: /* FRECPE */ 9849 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 9850 break; 9851 case 0x3f: /* FRECPX */ 9852 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 9853 break; 9854 case 0x7d: /* FRSQRTE */ 9855 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 9856 break; 9857 default: 9858 g_assert_not_reached(); 9859 } 9860 write_vec_element(s, tcg_res, rd, pass, MO_64); 9861 } 9862 clear_vec_high(s, !is_scalar, rd); 9863 } else { 9864 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9865 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9866 int pass, maxpasses; 9867 9868 if (is_scalar) { 9869 maxpasses = 1; 9870 } else { 9871 maxpasses = is_q ? 4 : 2; 9872 } 9873 9874 for (pass = 0; pass < maxpasses; pass++) { 9875 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 9876 9877 switch (opcode) { 9878 case 0x3c: /* URECPE */ 9879 gen_helper_recpe_u32(tcg_res, tcg_op); 9880 break; 9881 case 0x3d: /* FRECPE */ 9882 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 9883 break; 9884 case 0x3f: /* FRECPX */ 9885 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 9886 break; 9887 case 0x7d: /* FRSQRTE */ 9888 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 9889 break; 9890 default: 9891 g_assert_not_reached(); 9892 } 9893 9894 if (is_scalar) { 9895 write_fp_sreg(s, rd, tcg_res); 9896 } else { 9897 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9898 } 9899 } 9900 if (!is_scalar) { 9901 clear_vec_high(s, is_q, rd); 9902 } 9903 } 9904 } 9905 9906 static void handle_2misc_narrow(DisasContext *s, bool scalar, 9907 int opcode, bool u, bool is_q, 9908 int size, int rn, int rd) 9909 { 9910 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 9911 * in the source becomes a size element in the destination). 9912 */ 9913 int pass; 9914 TCGv_i32 tcg_res[2]; 9915 int destelt = is_q ? 2 : 0; 9916 int passes = scalar ? 1 : 2; 9917 9918 if (scalar) { 9919 tcg_res[1] = tcg_constant_i32(0); 9920 } 9921 9922 for (pass = 0; pass < passes; pass++) { 9923 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9924 NeonGenNarrowFn *genfn = NULL; 9925 NeonGenNarrowEnvFn *genenvfn = NULL; 9926 9927 if (scalar) { 9928 read_vec_element(s, tcg_op, rn, pass, size + 1); 9929 } else { 9930 read_vec_element(s, tcg_op, rn, pass, MO_64); 9931 } 9932 tcg_res[pass] = tcg_temp_new_i32(); 9933 9934 switch (opcode) { 9935 case 0x12: /* XTN, SQXTUN */ 9936 { 9937 static NeonGenNarrowFn * const xtnfns[3] = { 9938 gen_helper_neon_narrow_u8, 9939 gen_helper_neon_narrow_u16, 9940 tcg_gen_extrl_i64_i32, 9941 }; 9942 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 9943 gen_helper_neon_unarrow_sat8, 9944 gen_helper_neon_unarrow_sat16, 9945 gen_helper_neon_unarrow_sat32, 9946 }; 9947 if (u) { 9948 genenvfn = sqxtunfns[size]; 9949 } else { 9950 genfn = xtnfns[size]; 9951 } 9952 break; 9953 } 9954 case 0x14: /* SQXTN, UQXTN */ 9955 { 9956 static NeonGenNarrowEnvFn * const fns[3][2] = { 9957 { gen_helper_neon_narrow_sat_s8, 9958 gen_helper_neon_narrow_sat_u8 }, 9959 { gen_helper_neon_narrow_sat_s16, 9960 gen_helper_neon_narrow_sat_u16 }, 9961 { gen_helper_neon_narrow_sat_s32, 9962 gen_helper_neon_narrow_sat_u32 }, 9963 }; 9964 genenvfn = fns[size][u]; 9965 break; 9966 } 9967 case 0x16: /* FCVTN, FCVTN2 */ 9968 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 9969 if (size == 2) { 9970 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); 9971 } else { 9972 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 9973 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 9974 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9975 TCGv_i32 ahp = get_ahp_flag(); 9976 9977 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 9978 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 9979 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 9980 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 9981 } 9982 break; 9983 case 0x36: /* BFCVTN, BFCVTN2 */ 9984 { 9985 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9986 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 9987 } 9988 break; 9989 case 0x56: /* FCVTXN, FCVTXN2 */ 9990 /* 64 bit to 32 bit float conversion 9991 * with von Neumann rounding (round to odd) 9992 */ 9993 assert(size == 2); 9994 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); 9995 break; 9996 default: 9997 g_assert_not_reached(); 9998 } 9999 10000 if (genfn) { 10001 genfn(tcg_res[pass], tcg_op); 10002 } else if (genenvfn) { 10003 genenvfn(tcg_res[pass], cpu_env, tcg_op); 10004 } 10005 } 10006 10007 for (pass = 0; pass < 2; pass++) { 10008 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 10009 } 10010 clear_vec_high(s, is_q, rd); 10011 } 10012 10013 /* Remaining saturating accumulating ops */ 10014 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 10015 bool is_q, int size, int rn, int rd) 10016 { 10017 bool is_double = (size == 3); 10018 10019 if (is_double) { 10020 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10021 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10022 int pass; 10023 10024 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10025 read_vec_element(s, tcg_rn, rn, pass, MO_64); 10026 read_vec_element(s, tcg_rd, rd, pass, MO_64); 10027 10028 if (is_u) { /* USQADD */ 10029 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10030 } else { /* SUQADD */ 10031 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10032 } 10033 write_vec_element(s, tcg_rd, rd, pass, MO_64); 10034 } 10035 clear_vec_high(s, !is_scalar, rd); 10036 } else { 10037 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10038 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10039 int pass, maxpasses; 10040 10041 if (is_scalar) { 10042 maxpasses = 1; 10043 } else { 10044 maxpasses = is_q ? 4 : 2; 10045 } 10046 10047 for (pass = 0; pass < maxpasses; pass++) { 10048 if (is_scalar) { 10049 read_vec_element_i32(s, tcg_rn, rn, pass, size); 10050 read_vec_element_i32(s, tcg_rd, rd, pass, size); 10051 } else { 10052 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 10053 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10054 } 10055 10056 if (is_u) { /* USQADD */ 10057 switch (size) { 10058 case 0: 10059 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10060 break; 10061 case 1: 10062 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10063 break; 10064 case 2: 10065 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10066 break; 10067 default: 10068 g_assert_not_reached(); 10069 } 10070 } else { /* SUQADD */ 10071 switch (size) { 10072 case 0: 10073 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10074 break; 10075 case 1: 10076 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10077 break; 10078 case 2: 10079 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10080 break; 10081 default: 10082 g_assert_not_reached(); 10083 } 10084 } 10085 10086 if (is_scalar) { 10087 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 10088 } 10089 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10090 } 10091 clear_vec_high(s, is_q, rd); 10092 } 10093 } 10094 10095 /* AdvSIMD scalar two reg misc 10096 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10097 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10098 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10099 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10100 */ 10101 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10102 { 10103 int rd = extract32(insn, 0, 5); 10104 int rn = extract32(insn, 5, 5); 10105 int opcode = extract32(insn, 12, 5); 10106 int size = extract32(insn, 22, 2); 10107 bool u = extract32(insn, 29, 1); 10108 bool is_fcvt = false; 10109 int rmode; 10110 TCGv_i32 tcg_rmode; 10111 TCGv_ptr tcg_fpstatus; 10112 10113 switch (opcode) { 10114 case 0x3: /* USQADD / SUQADD*/ 10115 if (!fp_access_check(s)) { 10116 return; 10117 } 10118 handle_2misc_satacc(s, true, u, false, size, rn, rd); 10119 return; 10120 case 0x7: /* SQABS / SQNEG */ 10121 break; 10122 case 0xa: /* CMLT */ 10123 if (u) { 10124 unallocated_encoding(s); 10125 return; 10126 } 10127 /* fall through */ 10128 case 0x8: /* CMGT, CMGE */ 10129 case 0x9: /* CMEQ, CMLE */ 10130 case 0xb: /* ABS, NEG */ 10131 if (size != 3) { 10132 unallocated_encoding(s); 10133 return; 10134 } 10135 break; 10136 case 0x12: /* SQXTUN */ 10137 if (!u) { 10138 unallocated_encoding(s); 10139 return; 10140 } 10141 /* fall through */ 10142 case 0x14: /* SQXTN, UQXTN */ 10143 if (size == 3) { 10144 unallocated_encoding(s); 10145 return; 10146 } 10147 if (!fp_access_check(s)) { 10148 return; 10149 } 10150 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10151 return; 10152 case 0xc ... 0xf: 10153 case 0x16 ... 0x1d: 10154 case 0x1f: 10155 /* Floating point: U, size[1] and opcode indicate operation; 10156 * size[0] indicates single or double precision. 10157 */ 10158 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10159 size = extract32(size, 0, 1) ? 3 : 2; 10160 switch (opcode) { 10161 case 0x2c: /* FCMGT (zero) */ 10162 case 0x2d: /* FCMEQ (zero) */ 10163 case 0x2e: /* FCMLT (zero) */ 10164 case 0x6c: /* FCMGE (zero) */ 10165 case 0x6d: /* FCMLE (zero) */ 10166 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10167 return; 10168 case 0x1d: /* SCVTF */ 10169 case 0x5d: /* UCVTF */ 10170 { 10171 bool is_signed = (opcode == 0x1d); 10172 if (!fp_access_check(s)) { 10173 return; 10174 } 10175 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10176 return; 10177 } 10178 case 0x3d: /* FRECPE */ 10179 case 0x3f: /* FRECPX */ 10180 case 0x7d: /* FRSQRTE */ 10181 if (!fp_access_check(s)) { 10182 return; 10183 } 10184 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10185 return; 10186 case 0x1a: /* FCVTNS */ 10187 case 0x1b: /* FCVTMS */ 10188 case 0x3a: /* FCVTPS */ 10189 case 0x3b: /* FCVTZS */ 10190 case 0x5a: /* FCVTNU */ 10191 case 0x5b: /* FCVTMU */ 10192 case 0x7a: /* FCVTPU */ 10193 case 0x7b: /* FCVTZU */ 10194 is_fcvt = true; 10195 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10196 break; 10197 case 0x1c: /* FCVTAS */ 10198 case 0x5c: /* FCVTAU */ 10199 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10200 is_fcvt = true; 10201 rmode = FPROUNDING_TIEAWAY; 10202 break; 10203 case 0x56: /* FCVTXN, FCVTXN2 */ 10204 if (size == 2) { 10205 unallocated_encoding(s); 10206 return; 10207 } 10208 if (!fp_access_check(s)) { 10209 return; 10210 } 10211 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10212 return; 10213 default: 10214 unallocated_encoding(s); 10215 return; 10216 } 10217 break; 10218 default: 10219 unallocated_encoding(s); 10220 return; 10221 } 10222 10223 if (!fp_access_check(s)) { 10224 return; 10225 } 10226 10227 if (is_fcvt) { 10228 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10229 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 10230 } else { 10231 tcg_fpstatus = NULL; 10232 tcg_rmode = NULL; 10233 } 10234 10235 if (size == 3) { 10236 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10237 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10238 10239 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10240 write_fp_dreg(s, rd, tcg_rd); 10241 } else { 10242 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10243 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10244 10245 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10246 10247 switch (opcode) { 10248 case 0x7: /* SQABS, SQNEG */ 10249 { 10250 NeonGenOneOpEnvFn *genfn; 10251 static NeonGenOneOpEnvFn * const fns[3][2] = { 10252 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10253 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10254 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10255 }; 10256 genfn = fns[size][u]; 10257 genfn(tcg_rd, cpu_env, tcg_rn); 10258 break; 10259 } 10260 case 0x1a: /* FCVTNS */ 10261 case 0x1b: /* FCVTMS */ 10262 case 0x1c: /* FCVTAS */ 10263 case 0x3a: /* FCVTPS */ 10264 case 0x3b: /* FCVTZS */ 10265 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10266 tcg_fpstatus); 10267 break; 10268 case 0x5a: /* FCVTNU */ 10269 case 0x5b: /* FCVTMU */ 10270 case 0x5c: /* FCVTAU */ 10271 case 0x7a: /* FCVTPU */ 10272 case 0x7b: /* FCVTZU */ 10273 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10274 tcg_fpstatus); 10275 break; 10276 default: 10277 g_assert_not_reached(); 10278 } 10279 10280 write_fp_sreg(s, rd, tcg_rd); 10281 } 10282 10283 if (is_fcvt) { 10284 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 10285 } 10286 } 10287 10288 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10289 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10290 int immh, int immb, int opcode, int rn, int rd) 10291 { 10292 int size = 32 - clz32(immh) - 1; 10293 int immhb = immh << 3 | immb; 10294 int shift = 2 * (8 << size) - immhb; 10295 GVecGen2iFn *gvec_fn; 10296 10297 if (extract32(immh, 3, 1) && !is_q) { 10298 unallocated_encoding(s); 10299 return; 10300 } 10301 tcg_debug_assert(size <= 3); 10302 10303 if (!fp_access_check(s)) { 10304 return; 10305 } 10306 10307 switch (opcode) { 10308 case 0x02: /* SSRA / USRA (accumulate) */ 10309 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10310 break; 10311 10312 case 0x08: /* SRI */ 10313 gvec_fn = gen_gvec_sri; 10314 break; 10315 10316 case 0x00: /* SSHR / USHR */ 10317 if (is_u) { 10318 if (shift == 8 << size) { 10319 /* Shift count the same size as element size produces zero. */ 10320 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10321 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10322 return; 10323 } 10324 gvec_fn = tcg_gen_gvec_shri; 10325 } else { 10326 /* Shift count the same size as element size produces all sign. */ 10327 if (shift == 8 << size) { 10328 shift -= 1; 10329 } 10330 gvec_fn = tcg_gen_gvec_sari; 10331 } 10332 break; 10333 10334 case 0x04: /* SRSHR / URSHR (rounding) */ 10335 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10336 break; 10337 10338 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10339 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10340 break; 10341 10342 default: 10343 g_assert_not_reached(); 10344 } 10345 10346 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10347 } 10348 10349 /* SHL/SLI - Vector shift left */ 10350 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10351 int immh, int immb, int opcode, int rn, int rd) 10352 { 10353 int size = 32 - clz32(immh) - 1; 10354 int immhb = immh << 3 | immb; 10355 int shift = immhb - (8 << size); 10356 10357 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10358 assert(size >= 0 && size <= 3); 10359 10360 if (extract32(immh, 3, 1) && !is_q) { 10361 unallocated_encoding(s); 10362 return; 10363 } 10364 10365 if (!fp_access_check(s)) { 10366 return; 10367 } 10368 10369 if (insert) { 10370 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10371 } else { 10372 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10373 } 10374 } 10375 10376 /* USHLL/SHLL - Vector shift left with widening */ 10377 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10378 int immh, int immb, int opcode, int rn, int rd) 10379 { 10380 int size = 32 - clz32(immh) - 1; 10381 int immhb = immh << 3 | immb; 10382 int shift = immhb - (8 << size); 10383 int dsize = 64; 10384 int esize = 8 << size; 10385 int elements = dsize/esize; 10386 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10387 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10388 int i; 10389 10390 if (size >= 3) { 10391 unallocated_encoding(s); 10392 return; 10393 } 10394 10395 if (!fp_access_check(s)) { 10396 return; 10397 } 10398 10399 /* For the LL variants the store is larger than the load, 10400 * so if rd == rn we would overwrite parts of our input. 10401 * So load everything right now and use shifts in the main loop. 10402 */ 10403 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10404 10405 for (i = 0; i < elements; i++) { 10406 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10407 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10408 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10409 write_vec_element(s, tcg_rd, rd, i, size + 1); 10410 } 10411 } 10412 10413 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10414 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10415 int immh, int immb, int opcode, int rn, int rd) 10416 { 10417 int immhb = immh << 3 | immb; 10418 int size = 32 - clz32(immh) - 1; 10419 int dsize = 64; 10420 int esize = 8 << size; 10421 int elements = dsize/esize; 10422 int shift = (2 * esize) - immhb; 10423 bool round = extract32(opcode, 0, 1); 10424 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10425 TCGv_i64 tcg_round; 10426 int i; 10427 10428 if (extract32(immh, 3, 1)) { 10429 unallocated_encoding(s); 10430 return; 10431 } 10432 10433 if (!fp_access_check(s)) { 10434 return; 10435 } 10436 10437 tcg_rn = tcg_temp_new_i64(); 10438 tcg_rd = tcg_temp_new_i64(); 10439 tcg_final = tcg_temp_new_i64(); 10440 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10441 10442 if (round) { 10443 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10444 } else { 10445 tcg_round = NULL; 10446 } 10447 10448 for (i = 0; i < elements; i++) { 10449 read_vec_element(s, tcg_rn, rn, i, size+1); 10450 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10451 false, true, size+1, shift); 10452 10453 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10454 } 10455 10456 if (!is_q) { 10457 write_vec_element(s, tcg_final, rd, 0, MO_64); 10458 } else { 10459 write_vec_element(s, tcg_final, rd, 1, MO_64); 10460 } 10461 10462 clear_vec_high(s, is_q, rd); 10463 } 10464 10465 10466 /* AdvSIMD shift by immediate 10467 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10468 * +---+---+---+-------------+------+------+--------+---+------+------+ 10469 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10470 * +---+---+---+-------------+------+------+--------+---+------+------+ 10471 */ 10472 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10473 { 10474 int rd = extract32(insn, 0, 5); 10475 int rn = extract32(insn, 5, 5); 10476 int opcode = extract32(insn, 11, 5); 10477 int immb = extract32(insn, 16, 3); 10478 int immh = extract32(insn, 19, 4); 10479 bool is_u = extract32(insn, 29, 1); 10480 bool is_q = extract32(insn, 30, 1); 10481 10482 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10483 assert(immh != 0); 10484 10485 switch (opcode) { 10486 case 0x08: /* SRI */ 10487 if (!is_u) { 10488 unallocated_encoding(s); 10489 return; 10490 } 10491 /* fall through */ 10492 case 0x00: /* SSHR / USHR */ 10493 case 0x02: /* SSRA / USRA (accumulate) */ 10494 case 0x04: /* SRSHR / URSHR (rounding) */ 10495 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10496 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10497 break; 10498 case 0x0a: /* SHL / SLI */ 10499 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10500 break; 10501 case 0x10: /* SHRN */ 10502 case 0x11: /* RSHRN / SQRSHRUN */ 10503 if (is_u) { 10504 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10505 opcode, rn, rd); 10506 } else { 10507 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10508 } 10509 break; 10510 case 0x12: /* SQSHRN / UQSHRN */ 10511 case 0x13: /* SQRSHRN / UQRSHRN */ 10512 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10513 opcode, rn, rd); 10514 break; 10515 case 0x14: /* SSHLL / USHLL */ 10516 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10517 break; 10518 case 0x1c: /* SCVTF / UCVTF */ 10519 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10520 opcode, rn, rd); 10521 break; 10522 case 0xc: /* SQSHLU */ 10523 if (!is_u) { 10524 unallocated_encoding(s); 10525 return; 10526 } 10527 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10528 break; 10529 case 0xe: /* SQSHL, UQSHL */ 10530 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10531 break; 10532 case 0x1f: /* FCVTZS/ FCVTZU */ 10533 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10534 return; 10535 default: 10536 unallocated_encoding(s); 10537 return; 10538 } 10539 } 10540 10541 /* Generate code to do a "long" addition or subtraction, ie one done in 10542 * TCGv_i64 on vector lanes twice the width specified by size. 10543 */ 10544 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10545 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10546 { 10547 static NeonGenTwo64OpFn * const fns[3][2] = { 10548 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10549 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10550 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10551 }; 10552 NeonGenTwo64OpFn *genfn; 10553 assert(size < 3); 10554 10555 genfn = fns[size][is_sub]; 10556 genfn(tcg_res, tcg_op1, tcg_op2); 10557 } 10558 10559 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10560 int opcode, int rd, int rn, int rm) 10561 { 10562 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10563 TCGv_i64 tcg_res[2]; 10564 int pass, accop; 10565 10566 tcg_res[0] = tcg_temp_new_i64(); 10567 tcg_res[1] = tcg_temp_new_i64(); 10568 10569 /* Does this op do an adding accumulate, a subtracting accumulate, 10570 * or no accumulate at all? 10571 */ 10572 switch (opcode) { 10573 case 5: 10574 case 8: 10575 case 9: 10576 accop = 1; 10577 break; 10578 case 10: 10579 case 11: 10580 accop = -1; 10581 break; 10582 default: 10583 accop = 0; 10584 break; 10585 } 10586 10587 if (accop != 0) { 10588 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10589 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10590 } 10591 10592 /* size == 2 means two 32x32->64 operations; this is worth special 10593 * casing because we can generally handle it inline. 10594 */ 10595 if (size == 2) { 10596 for (pass = 0; pass < 2; pass++) { 10597 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10598 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10599 TCGv_i64 tcg_passres; 10600 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10601 10602 int elt = pass + is_q * 2; 10603 10604 read_vec_element(s, tcg_op1, rn, elt, memop); 10605 read_vec_element(s, tcg_op2, rm, elt, memop); 10606 10607 if (accop == 0) { 10608 tcg_passres = tcg_res[pass]; 10609 } else { 10610 tcg_passres = tcg_temp_new_i64(); 10611 } 10612 10613 switch (opcode) { 10614 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10615 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10616 break; 10617 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10618 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10619 break; 10620 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10621 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10622 { 10623 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10624 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10625 10626 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10627 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10628 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10629 tcg_passres, 10630 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10631 break; 10632 } 10633 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10634 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10635 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10636 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10637 break; 10638 case 9: /* SQDMLAL, SQDMLAL2 */ 10639 case 11: /* SQDMLSL, SQDMLSL2 */ 10640 case 13: /* SQDMULL, SQDMULL2 */ 10641 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10642 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 10643 tcg_passres, tcg_passres); 10644 break; 10645 default: 10646 g_assert_not_reached(); 10647 } 10648 10649 if (opcode == 9 || opcode == 11) { 10650 /* saturating accumulate ops */ 10651 if (accop < 0) { 10652 tcg_gen_neg_i64(tcg_passres, tcg_passres); 10653 } 10654 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 10655 tcg_res[pass], tcg_passres); 10656 } else if (accop > 0) { 10657 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10658 } else if (accop < 0) { 10659 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10660 } 10661 } 10662 } else { 10663 /* size 0 or 1, generally helper functions */ 10664 for (pass = 0; pass < 2; pass++) { 10665 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10666 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10667 TCGv_i64 tcg_passres; 10668 int elt = pass + is_q * 2; 10669 10670 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 10671 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 10672 10673 if (accop == 0) { 10674 tcg_passres = tcg_res[pass]; 10675 } else { 10676 tcg_passres = tcg_temp_new_i64(); 10677 } 10678 10679 switch (opcode) { 10680 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10681 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10682 { 10683 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 10684 static NeonGenWidenFn * const widenfns[2][2] = { 10685 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10686 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10687 }; 10688 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10689 10690 widenfn(tcg_op2_64, tcg_op2); 10691 widenfn(tcg_passres, tcg_op1); 10692 gen_neon_addl(size, (opcode == 2), tcg_passres, 10693 tcg_passres, tcg_op2_64); 10694 break; 10695 } 10696 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10697 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10698 if (size == 0) { 10699 if (is_u) { 10700 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 10701 } else { 10702 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 10703 } 10704 } else { 10705 if (is_u) { 10706 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 10707 } else { 10708 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 10709 } 10710 } 10711 break; 10712 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10713 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10714 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10715 if (size == 0) { 10716 if (is_u) { 10717 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 10718 } else { 10719 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 10720 } 10721 } else { 10722 if (is_u) { 10723 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 10724 } else { 10725 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10726 } 10727 } 10728 break; 10729 case 9: /* SQDMLAL, SQDMLAL2 */ 10730 case 11: /* SQDMLSL, SQDMLSL2 */ 10731 case 13: /* SQDMULL, SQDMULL2 */ 10732 assert(size == 1); 10733 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 10734 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 10735 tcg_passres, tcg_passres); 10736 break; 10737 default: 10738 g_assert_not_reached(); 10739 } 10740 10741 if (accop != 0) { 10742 if (opcode == 9 || opcode == 11) { 10743 /* saturating accumulate ops */ 10744 if (accop < 0) { 10745 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 10746 } 10747 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 10748 tcg_res[pass], 10749 tcg_passres); 10750 } else { 10751 gen_neon_addl(size, (accop < 0), tcg_res[pass], 10752 tcg_res[pass], tcg_passres); 10753 } 10754 } 10755 } 10756 } 10757 10758 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 10759 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 10760 } 10761 10762 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 10763 int opcode, int rd, int rn, int rm) 10764 { 10765 TCGv_i64 tcg_res[2]; 10766 int part = is_q ? 2 : 0; 10767 int pass; 10768 10769 for (pass = 0; pass < 2; pass++) { 10770 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10771 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10772 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 10773 static NeonGenWidenFn * const widenfns[3][2] = { 10774 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 10775 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 10776 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 10777 }; 10778 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 10779 10780 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10781 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 10782 widenfn(tcg_op2_wide, tcg_op2); 10783 tcg_res[pass] = tcg_temp_new_i64(); 10784 gen_neon_addl(size, (opcode == 3), 10785 tcg_res[pass], tcg_op1, tcg_op2_wide); 10786 } 10787 10788 for (pass = 0; pass < 2; pass++) { 10789 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 10790 } 10791 } 10792 10793 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 10794 { 10795 tcg_gen_addi_i64(in, in, 1U << 31); 10796 tcg_gen_extrh_i64_i32(res, in); 10797 } 10798 10799 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 10800 int opcode, int rd, int rn, int rm) 10801 { 10802 TCGv_i32 tcg_res[2]; 10803 int part = is_q ? 2 : 0; 10804 int pass; 10805 10806 for (pass = 0; pass < 2; pass++) { 10807 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10808 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10809 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 10810 static NeonGenNarrowFn * const narrowfns[3][2] = { 10811 { gen_helper_neon_narrow_high_u8, 10812 gen_helper_neon_narrow_round_high_u8 }, 10813 { gen_helper_neon_narrow_high_u16, 10814 gen_helper_neon_narrow_round_high_u16 }, 10815 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 10816 }; 10817 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 10818 10819 read_vec_element(s, tcg_op1, rn, pass, MO_64); 10820 read_vec_element(s, tcg_op2, rm, pass, MO_64); 10821 10822 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 10823 10824 tcg_res[pass] = tcg_temp_new_i32(); 10825 gennarrow(tcg_res[pass], tcg_wideres); 10826 } 10827 10828 for (pass = 0; pass < 2; pass++) { 10829 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 10830 } 10831 clear_vec_high(s, is_q, rd); 10832 } 10833 10834 /* AdvSIMD three different 10835 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 10836 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10837 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 10838 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 10839 */ 10840 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 10841 { 10842 /* Instructions in this group fall into three basic classes 10843 * (in each case with the operation working on each element in 10844 * the input vectors): 10845 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 10846 * 128 bit input) 10847 * (2) wide 64 x 128 -> 128 10848 * (3) narrowing 128 x 128 -> 64 10849 * Here we do initial decode, catch unallocated cases and 10850 * dispatch to separate functions for each class. 10851 */ 10852 int is_q = extract32(insn, 30, 1); 10853 int is_u = extract32(insn, 29, 1); 10854 int size = extract32(insn, 22, 2); 10855 int opcode = extract32(insn, 12, 4); 10856 int rm = extract32(insn, 16, 5); 10857 int rn = extract32(insn, 5, 5); 10858 int rd = extract32(insn, 0, 5); 10859 10860 switch (opcode) { 10861 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 10862 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 10863 /* 64 x 128 -> 128 */ 10864 if (size == 3) { 10865 unallocated_encoding(s); 10866 return; 10867 } 10868 if (!fp_access_check(s)) { 10869 return; 10870 } 10871 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 10872 break; 10873 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 10874 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 10875 /* 128 x 128 -> 64 */ 10876 if (size == 3) { 10877 unallocated_encoding(s); 10878 return; 10879 } 10880 if (!fp_access_check(s)) { 10881 return; 10882 } 10883 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 10884 break; 10885 case 14: /* PMULL, PMULL2 */ 10886 if (is_u) { 10887 unallocated_encoding(s); 10888 return; 10889 } 10890 switch (size) { 10891 case 0: /* PMULL.P8 */ 10892 if (!fp_access_check(s)) { 10893 return; 10894 } 10895 /* The Q field specifies lo/hi half input for this insn. */ 10896 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10897 gen_helper_neon_pmull_h); 10898 break; 10899 10900 case 3: /* PMULL.P64 */ 10901 if (!dc_isar_feature(aa64_pmull, s)) { 10902 unallocated_encoding(s); 10903 return; 10904 } 10905 if (!fp_access_check(s)) { 10906 return; 10907 } 10908 /* The Q field specifies lo/hi half input for this insn. */ 10909 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 10910 gen_helper_gvec_pmull_q); 10911 break; 10912 10913 default: 10914 unallocated_encoding(s); 10915 break; 10916 } 10917 return; 10918 case 9: /* SQDMLAL, SQDMLAL2 */ 10919 case 11: /* SQDMLSL, SQDMLSL2 */ 10920 case 13: /* SQDMULL, SQDMULL2 */ 10921 if (is_u || size == 0) { 10922 unallocated_encoding(s); 10923 return; 10924 } 10925 /* fall through */ 10926 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10927 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10928 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10929 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10930 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10931 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10932 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 10933 /* 64 x 64 -> 128 */ 10934 if (size == 3) { 10935 unallocated_encoding(s); 10936 return; 10937 } 10938 if (!fp_access_check(s)) { 10939 return; 10940 } 10941 10942 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 10943 break; 10944 default: 10945 /* opcode 15 not allocated */ 10946 unallocated_encoding(s); 10947 break; 10948 } 10949 } 10950 10951 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 10952 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 10953 { 10954 int rd = extract32(insn, 0, 5); 10955 int rn = extract32(insn, 5, 5); 10956 int rm = extract32(insn, 16, 5); 10957 int size = extract32(insn, 22, 2); 10958 bool is_u = extract32(insn, 29, 1); 10959 bool is_q = extract32(insn, 30, 1); 10960 10961 if (!fp_access_check(s)) { 10962 return; 10963 } 10964 10965 switch (size + 4 * is_u) { 10966 case 0: /* AND */ 10967 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 10968 return; 10969 case 1: /* BIC */ 10970 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 10971 return; 10972 case 2: /* ORR */ 10973 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 10974 return; 10975 case 3: /* ORN */ 10976 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 10977 return; 10978 case 4: /* EOR */ 10979 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 10980 return; 10981 10982 case 5: /* BSL bitwise select */ 10983 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 10984 return; 10985 case 6: /* BIT, bitwise insert if true */ 10986 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 10987 return; 10988 case 7: /* BIF, bitwise insert if false */ 10989 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 10990 return; 10991 10992 default: 10993 g_assert_not_reached(); 10994 } 10995 } 10996 10997 /* Pairwise op subgroup of C3.6.16. 10998 * 10999 * This is called directly or via the handle_3same_float for float pairwise 11000 * operations where the opcode and size are calculated differently. 11001 */ 11002 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 11003 int size, int rn, int rm, int rd) 11004 { 11005 TCGv_ptr fpst; 11006 int pass; 11007 11008 /* Floating point operations need fpst */ 11009 if (opcode >= 0x58) { 11010 fpst = fpstatus_ptr(FPST_FPCR); 11011 } else { 11012 fpst = NULL; 11013 } 11014 11015 if (!fp_access_check(s)) { 11016 return; 11017 } 11018 11019 /* These operations work on the concatenated rm:rn, with each pair of 11020 * adjacent elements being operated on to produce an element in the result. 11021 */ 11022 if (size == 3) { 11023 TCGv_i64 tcg_res[2]; 11024 11025 for (pass = 0; pass < 2; pass++) { 11026 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11027 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11028 int passreg = (pass == 0) ? rn : rm; 11029 11030 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 11031 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 11032 tcg_res[pass] = tcg_temp_new_i64(); 11033 11034 switch (opcode) { 11035 case 0x17: /* ADDP */ 11036 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11037 break; 11038 case 0x58: /* FMAXNMP */ 11039 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11040 break; 11041 case 0x5a: /* FADDP */ 11042 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11043 break; 11044 case 0x5e: /* FMAXP */ 11045 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11046 break; 11047 case 0x78: /* FMINNMP */ 11048 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11049 break; 11050 case 0x7e: /* FMINP */ 11051 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11052 break; 11053 default: 11054 g_assert_not_reached(); 11055 } 11056 } 11057 11058 for (pass = 0; pass < 2; pass++) { 11059 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11060 } 11061 } else { 11062 int maxpass = is_q ? 4 : 2; 11063 TCGv_i32 tcg_res[4]; 11064 11065 for (pass = 0; pass < maxpass; pass++) { 11066 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11067 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11068 NeonGenTwoOpFn *genfn = NULL; 11069 int passreg = pass < (maxpass / 2) ? rn : rm; 11070 int passelt = (is_q && (pass & 1)) ? 2 : 0; 11071 11072 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 11073 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 11074 tcg_res[pass] = tcg_temp_new_i32(); 11075 11076 switch (opcode) { 11077 case 0x17: /* ADDP */ 11078 { 11079 static NeonGenTwoOpFn * const fns[3] = { 11080 gen_helper_neon_padd_u8, 11081 gen_helper_neon_padd_u16, 11082 tcg_gen_add_i32, 11083 }; 11084 genfn = fns[size]; 11085 break; 11086 } 11087 case 0x14: /* SMAXP, UMAXP */ 11088 { 11089 static NeonGenTwoOpFn * const fns[3][2] = { 11090 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 11091 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 11092 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 11093 }; 11094 genfn = fns[size][u]; 11095 break; 11096 } 11097 case 0x15: /* SMINP, UMINP */ 11098 { 11099 static NeonGenTwoOpFn * const fns[3][2] = { 11100 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 11101 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 11102 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 11103 }; 11104 genfn = fns[size][u]; 11105 break; 11106 } 11107 /* The FP operations are all on single floats (32 bit) */ 11108 case 0x58: /* FMAXNMP */ 11109 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11110 break; 11111 case 0x5a: /* FADDP */ 11112 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11113 break; 11114 case 0x5e: /* FMAXP */ 11115 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11116 break; 11117 case 0x78: /* FMINNMP */ 11118 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11119 break; 11120 case 0x7e: /* FMINP */ 11121 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11122 break; 11123 default: 11124 g_assert_not_reached(); 11125 } 11126 11127 /* FP ops called directly, otherwise call now */ 11128 if (genfn) { 11129 genfn(tcg_res[pass], tcg_op1, tcg_op2); 11130 } 11131 } 11132 11133 for (pass = 0; pass < maxpass; pass++) { 11134 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11135 } 11136 clear_vec_high(s, is_q, rd); 11137 } 11138 } 11139 11140 /* Floating point op subgroup of C3.6.16. */ 11141 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 11142 { 11143 /* For floating point ops, the U, size[1] and opcode bits 11144 * together indicate the operation. size[0] indicates single 11145 * or double. 11146 */ 11147 int fpopcode = extract32(insn, 11, 5) 11148 | (extract32(insn, 23, 1) << 5) 11149 | (extract32(insn, 29, 1) << 6); 11150 int is_q = extract32(insn, 30, 1); 11151 int size = extract32(insn, 22, 1); 11152 int rm = extract32(insn, 16, 5); 11153 int rn = extract32(insn, 5, 5); 11154 int rd = extract32(insn, 0, 5); 11155 11156 int datasize = is_q ? 128 : 64; 11157 int esize = 32 << size; 11158 int elements = datasize / esize; 11159 11160 if (size == 1 && !is_q) { 11161 unallocated_encoding(s); 11162 return; 11163 } 11164 11165 switch (fpopcode) { 11166 case 0x58: /* FMAXNMP */ 11167 case 0x5a: /* FADDP */ 11168 case 0x5e: /* FMAXP */ 11169 case 0x78: /* FMINNMP */ 11170 case 0x7e: /* FMINP */ 11171 if (size && !is_q) { 11172 unallocated_encoding(s); 11173 return; 11174 } 11175 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 11176 rn, rm, rd); 11177 return; 11178 case 0x1b: /* FMULX */ 11179 case 0x1f: /* FRECPS */ 11180 case 0x3f: /* FRSQRTS */ 11181 case 0x5d: /* FACGE */ 11182 case 0x7d: /* FACGT */ 11183 case 0x19: /* FMLA */ 11184 case 0x39: /* FMLS */ 11185 case 0x18: /* FMAXNM */ 11186 case 0x1a: /* FADD */ 11187 case 0x1c: /* FCMEQ */ 11188 case 0x1e: /* FMAX */ 11189 case 0x38: /* FMINNM */ 11190 case 0x3a: /* FSUB */ 11191 case 0x3e: /* FMIN */ 11192 case 0x5b: /* FMUL */ 11193 case 0x5c: /* FCMGE */ 11194 case 0x5f: /* FDIV */ 11195 case 0x7a: /* FABD */ 11196 case 0x7c: /* FCMGT */ 11197 if (!fp_access_check(s)) { 11198 return; 11199 } 11200 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 11201 return; 11202 11203 case 0x1d: /* FMLAL */ 11204 case 0x3d: /* FMLSL */ 11205 case 0x59: /* FMLAL2 */ 11206 case 0x79: /* FMLSL2 */ 11207 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 11208 unallocated_encoding(s); 11209 return; 11210 } 11211 if (fp_access_check(s)) { 11212 int is_s = extract32(insn, 23, 1); 11213 int is_2 = extract32(insn, 29, 1); 11214 int data = (is_2 << 1) | is_s; 11215 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 11216 vec_full_reg_offset(s, rn), 11217 vec_full_reg_offset(s, rm), cpu_env, 11218 is_q ? 16 : 8, vec_full_reg_size(s), 11219 data, gen_helper_gvec_fmlal_a64); 11220 } 11221 return; 11222 11223 default: 11224 unallocated_encoding(s); 11225 return; 11226 } 11227 } 11228 11229 /* Integer op subgroup of C3.6.16. */ 11230 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 11231 { 11232 int is_q = extract32(insn, 30, 1); 11233 int u = extract32(insn, 29, 1); 11234 int size = extract32(insn, 22, 2); 11235 int opcode = extract32(insn, 11, 5); 11236 int rm = extract32(insn, 16, 5); 11237 int rn = extract32(insn, 5, 5); 11238 int rd = extract32(insn, 0, 5); 11239 int pass; 11240 TCGCond cond; 11241 11242 switch (opcode) { 11243 case 0x13: /* MUL, PMUL */ 11244 if (u && size != 0) { 11245 unallocated_encoding(s); 11246 return; 11247 } 11248 /* fall through */ 11249 case 0x0: /* SHADD, UHADD */ 11250 case 0x2: /* SRHADD, URHADD */ 11251 case 0x4: /* SHSUB, UHSUB */ 11252 case 0xc: /* SMAX, UMAX */ 11253 case 0xd: /* SMIN, UMIN */ 11254 case 0xe: /* SABD, UABD */ 11255 case 0xf: /* SABA, UABA */ 11256 case 0x12: /* MLA, MLS */ 11257 if (size == 3) { 11258 unallocated_encoding(s); 11259 return; 11260 } 11261 break; 11262 case 0x16: /* SQDMULH, SQRDMULH */ 11263 if (size == 0 || size == 3) { 11264 unallocated_encoding(s); 11265 return; 11266 } 11267 break; 11268 default: 11269 if (size == 3 && !is_q) { 11270 unallocated_encoding(s); 11271 return; 11272 } 11273 break; 11274 } 11275 11276 if (!fp_access_check(s)) { 11277 return; 11278 } 11279 11280 switch (opcode) { 11281 case 0x01: /* SQADD, UQADD */ 11282 if (u) { 11283 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 11284 } else { 11285 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 11286 } 11287 return; 11288 case 0x05: /* SQSUB, UQSUB */ 11289 if (u) { 11290 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 11291 } else { 11292 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 11293 } 11294 return; 11295 case 0x08: /* SSHL, USHL */ 11296 if (u) { 11297 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 11298 } else { 11299 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 11300 } 11301 return; 11302 case 0x0c: /* SMAX, UMAX */ 11303 if (u) { 11304 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 11305 } else { 11306 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 11307 } 11308 return; 11309 case 0x0d: /* SMIN, UMIN */ 11310 if (u) { 11311 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 11312 } else { 11313 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 11314 } 11315 return; 11316 case 0xe: /* SABD, UABD */ 11317 if (u) { 11318 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 11319 } else { 11320 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 11321 } 11322 return; 11323 case 0xf: /* SABA, UABA */ 11324 if (u) { 11325 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 11326 } else { 11327 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 11328 } 11329 return; 11330 case 0x10: /* ADD, SUB */ 11331 if (u) { 11332 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 11333 } else { 11334 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 11335 } 11336 return; 11337 case 0x13: /* MUL, PMUL */ 11338 if (!u) { /* MUL */ 11339 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 11340 } else { /* PMUL */ 11341 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 11342 } 11343 return; 11344 case 0x12: /* MLA, MLS */ 11345 if (u) { 11346 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 11347 } else { 11348 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 11349 } 11350 return; 11351 case 0x16: /* SQDMULH, SQRDMULH */ 11352 { 11353 static gen_helper_gvec_3_ptr * const fns[2][2] = { 11354 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 11355 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 11356 }; 11357 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 11358 } 11359 return; 11360 case 0x11: 11361 if (!u) { /* CMTST */ 11362 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 11363 return; 11364 } 11365 /* else CMEQ */ 11366 cond = TCG_COND_EQ; 11367 goto do_gvec_cmp; 11368 case 0x06: /* CMGT, CMHI */ 11369 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11370 goto do_gvec_cmp; 11371 case 0x07: /* CMGE, CMHS */ 11372 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11373 do_gvec_cmp: 11374 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11375 vec_full_reg_offset(s, rn), 11376 vec_full_reg_offset(s, rm), 11377 is_q ? 16 : 8, vec_full_reg_size(s)); 11378 return; 11379 } 11380 11381 if (size == 3) { 11382 assert(is_q); 11383 for (pass = 0; pass < 2; pass++) { 11384 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11385 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11386 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11387 11388 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11389 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11390 11391 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11392 11393 write_vec_element(s, tcg_res, rd, pass, MO_64); 11394 } 11395 } else { 11396 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11397 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11398 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11399 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11400 NeonGenTwoOpFn *genfn = NULL; 11401 NeonGenTwoOpEnvFn *genenvfn = NULL; 11402 11403 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11404 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11405 11406 switch (opcode) { 11407 case 0x0: /* SHADD, UHADD */ 11408 { 11409 static NeonGenTwoOpFn * const fns[3][2] = { 11410 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11411 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11412 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11413 }; 11414 genfn = fns[size][u]; 11415 break; 11416 } 11417 case 0x2: /* SRHADD, URHADD */ 11418 { 11419 static NeonGenTwoOpFn * const fns[3][2] = { 11420 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11421 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11422 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11423 }; 11424 genfn = fns[size][u]; 11425 break; 11426 } 11427 case 0x4: /* SHSUB, UHSUB */ 11428 { 11429 static NeonGenTwoOpFn * const fns[3][2] = { 11430 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11431 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11432 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11433 }; 11434 genfn = fns[size][u]; 11435 break; 11436 } 11437 case 0x9: /* SQSHL, UQSHL */ 11438 { 11439 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11440 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11441 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11442 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11443 }; 11444 genenvfn = fns[size][u]; 11445 break; 11446 } 11447 case 0xa: /* SRSHL, URSHL */ 11448 { 11449 static NeonGenTwoOpFn * const fns[3][2] = { 11450 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11451 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11452 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11453 }; 11454 genfn = fns[size][u]; 11455 break; 11456 } 11457 case 0xb: /* SQRSHL, UQRSHL */ 11458 { 11459 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11460 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11461 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11462 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11463 }; 11464 genenvfn = fns[size][u]; 11465 break; 11466 } 11467 default: 11468 g_assert_not_reached(); 11469 } 11470 11471 if (genenvfn) { 11472 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); 11473 } else { 11474 genfn(tcg_res, tcg_op1, tcg_op2); 11475 } 11476 11477 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11478 } 11479 } 11480 clear_vec_high(s, is_q, rd); 11481 } 11482 11483 /* AdvSIMD three same 11484 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11485 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11486 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11487 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11488 */ 11489 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11490 { 11491 int opcode = extract32(insn, 11, 5); 11492 11493 switch (opcode) { 11494 case 0x3: /* logic ops */ 11495 disas_simd_3same_logic(s, insn); 11496 break; 11497 case 0x17: /* ADDP */ 11498 case 0x14: /* SMAXP, UMAXP */ 11499 case 0x15: /* SMINP, UMINP */ 11500 { 11501 /* Pairwise operations */ 11502 int is_q = extract32(insn, 30, 1); 11503 int u = extract32(insn, 29, 1); 11504 int size = extract32(insn, 22, 2); 11505 int rm = extract32(insn, 16, 5); 11506 int rn = extract32(insn, 5, 5); 11507 int rd = extract32(insn, 0, 5); 11508 if (opcode == 0x17) { 11509 if (u || (size == 3 && !is_q)) { 11510 unallocated_encoding(s); 11511 return; 11512 } 11513 } else { 11514 if (size == 3) { 11515 unallocated_encoding(s); 11516 return; 11517 } 11518 } 11519 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11520 break; 11521 } 11522 case 0x18 ... 0x31: 11523 /* floating point ops, sz[1] and U are part of opcode */ 11524 disas_simd_3same_float(s, insn); 11525 break; 11526 default: 11527 disas_simd_3same_int(s, insn); 11528 break; 11529 } 11530 } 11531 11532 /* 11533 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11534 * 11535 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11536 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11537 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11538 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11539 * 11540 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11541 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11542 * 11543 */ 11544 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11545 { 11546 int opcode = extract32(insn, 11, 3); 11547 int u = extract32(insn, 29, 1); 11548 int a = extract32(insn, 23, 1); 11549 int is_q = extract32(insn, 30, 1); 11550 int rm = extract32(insn, 16, 5); 11551 int rn = extract32(insn, 5, 5); 11552 int rd = extract32(insn, 0, 5); 11553 /* 11554 * For these floating point ops, the U, a and opcode bits 11555 * together indicate the operation. 11556 */ 11557 int fpopcode = opcode | (a << 3) | (u << 4); 11558 int datasize = is_q ? 128 : 64; 11559 int elements = datasize / 16; 11560 bool pairwise; 11561 TCGv_ptr fpst; 11562 int pass; 11563 11564 switch (fpopcode) { 11565 case 0x0: /* FMAXNM */ 11566 case 0x1: /* FMLA */ 11567 case 0x2: /* FADD */ 11568 case 0x3: /* FMULX */ 11569 case 0x4: /* FCMEQ */ 11570 case 0x6: /* FMAX */ 11571 case 0x7: /* FRECPS */ 11572 case 0x8: /* FMINNM */ 11573 case 0x9: /* FMLS */ 11574 case 0xa: /* FSUB */ 11575 case 0xe: /* FMIN */ 11576 case 0xf: /* FRSQRTS */ 11577 case 0x13: /* FMUL */ 11578 case 0x14: /* FCMGE */ 11579 case 0x15: /* FACGE */ 11580 case 0x17: /* FDIV */ 11581 case 0x1a: /* FABD */ 11582 case 0x1c: /* FCMGT */ 11583 case 0x1d: /* FACGT */ 11584 pairwise = false; 11585 break; 11586 case 0x10: /* FMAXNMP */ 11587 case 0x12: /* FADDP */ 11588 case 0x16: /* FMAXP */ 11589 case 0x18: /* FMINNMP */ 11590 case 0x1e: /* FMINP */ 11591 pairwise = true; 11592 break; 11593 default: 11594 unallocated_encoding(s); 11595 return; 11596 } 11597 11598 if (!dc_isar_feature(aa64_fp16, s)) { 11599 unallocated_encoding(s); 11600 return; 11601 } 11602 11603 if (!fp_access_check(s)) { 11604 return; 11605 } 11606 11607 fpst = fpstatus_ptr(FPST_FPCR_F16); 11608 11609 if (pairwise) { 11610 int maxpass = is_q ? 8 : 4; 11611 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11612 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11613 TCGv_i32 tcg_res[8]; 11614 11615 for (pass = 0; pass < maxpass; pass++) { 11616 int passreg = pass < (maxpass / 2) ? rn : rm; 11617 int passelt = (pass << 1) & (maxpass - 1); 11618 11619 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 11620 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 11621 tcg_res[pass] = tcg_temp_new_i32(); 11622 11623 switch (fpopcode) { 11624 case 0x10: /* FMAXNMP */ 11625 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 11626 fpst); 11627 break; 11628 case 0x12: /* FADDP */ 11629 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11630 break; 11631 case 0x16: /* FMAXP */ 11632 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11633 break; 11634 case 0x18: /* FMINNMP */ 11635 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 11636 fpst); 11637 break; 11638 case 0x1e: /* FMINP */ 11639 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11640 break; 11641 default: 11642 g_assert_not_reached(); 11643 } 11644 } 11645 11646 for (pass = 0; pass < maxpass; pass++) { 11647 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 11648 } 11649 } else { 11650 for (pass = 0; pass < elements; pass++) { 11651 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11652 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11653 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11654 11655 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 11656 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 11657 11658 switch (fpopcode) { 11659 case 0x0: /* FMAXNM */ 11660 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11661 break; 11662 case 0x1: /* FMLA */ 11663 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11664 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11665 fpst); 11666 break; 11667 case 0x2: /* FADD */ 11668 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 11669 break; 11670 case 0x3: /* FMULX */ 11671 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 11672 break; 11673 case 0x4: /* FCMEQ */ 11674 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11675 break; 11676 case 0x6: /* FMAX */ 11677 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 11678 break; 11679 case 0x7: /* FRECPS */ 11680 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11681 break; 11682 case 0x8: /* FMINNM */ 11683 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 11684 break; 11685 case 0x9: /* FMLS */ 11686 /* As usual for ARM, separate negation for fused multiply-add */ 11687 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 11688 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11689 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 11690 fpst); 11691 break; 11692 case 0xa: /* FSUB */ 11693 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11694 break; 11695 case 0xe: /* FMIN */ 11696 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 11697 break; 11698 case 0xf: /* FRSQRTS */ 11699 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11700 break; 11701 case 0x13: /* FMUL */ 11702 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 11703 break; 11704 case 0x14: /* FCMGE */ 11705 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11706 break; 11707 case 0x15: /* FACGE */ 11708 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11709 break; 11710 case 0x17: /* FDIV */ 11711 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 11712 break; 11713 case 0x1a: /* FABD */ 11714 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 11715 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 11716 break; 11717 case 0x1c: /* FCMGT */ 11718 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11719 break; 11720 case 0x1d: /* FACGT */ 11721 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 11722 break; 11723 default: 11724 g_assert_not_reached(); 11725 } 11726 11727 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 11728 } 11729 } 11730 11731 clear_vec_high(s, is_q, rd); 11732 } 11733 11734 /* AdvSIMD three same extra 11735 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 11736 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11737 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 11738 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 11739 */ 11740 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 11741 { 11742 int rd = extract32(insn, 0, 5); 11743 int rn = extract32(insn, 5, 5); 11744 int opcode = extract32(insn, 11, 4); 11745 int rm = extract32(insn, 16, 5); 11746 int size = extract32(insn, 22, 2); 11747 bool u = extract32(insn, 29, 1); 11748 bool is_q = extract32(insn, 30, 1); 11749 bool feature; 11750 int rot; 11751 11752 switch (u * 16 + opcode) { 11753 case 0x10: /* SQRDMLAH (vector) */ 11754 case 0x11: /* SQRDMLSH (vector) */ 11755 if (size != 1 && size != 2) { 11756 unallocated_encoding(s); 11757 return; 11758 } 11759 feature = dc_isar_feature(aa64_rdm, s); 11760 break; 11761 case 0x02: /* SDOT (vector) */ 11762 case 0x12: /* UDOT (vector) */ 11763 if (size != MO_32) { 11764 unallocated_encoding(s); 11765 return; 11766 } 11767 feature = dc_isar_feature(aa64_dp, s); 11768 break; 11769 case 0x03: /* USDOT */ 11770 if (size != MO_32) { 11771 unallocated_encoding(s); 11772 return; 11773 } 11774 feature = dc_isar_feature(aa64_i8mm, s); 11775 break; 11776 case 0x04: /* SMMLA */ 11777 case 0x14: /* UMMLA */ 11778 case 0x05: /* USMMLA */ 11779 if (!is_q || size != MO_32) { 11780 unallocated_encoding(s); 11781 return; 11782 } 11783 feature = dc_isar_feature(aa64_i8mm, s); 11784 break; 11785 case 0x18: /* FCMLA, #0 */ 11786 case 0x19: /* FCMLA, #90 */ 11787 case 0x1a: /* FCMLA, #180 */ 11788 case 0x1b: /* FCMLA, #270 */ 11789 case 0x1c: /* FCADD, #90 */ 11790 case 0x1e: /* FCADD, #270 */ 11791 if (size == 0 11792 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 11793 || (size == 3 && !is_q)) { 11794 unallocated_encoding(s); 11795 return; 11796 } 11797 feature = dc_isar_feature(aa64_fcma, s); 11798 break; 11799 case 0x1d: /* BFMMLA */ 11800 if (size != MO_16 || !is_q) { 11801 unallocated_encoding(s); 11802 return; 11803 } 11804 feature = dc_isar_feature(aa64_bf16, s); 11805 break; 11806 case 0x1f: 11807 switch (size) { 11808 case 1: /* BFDOT */ 11809 case 3: /* BFMLAL{B,T} */ 11810 feature = dc_isar_feature(aa64_bf16, s); 11811 break; 11812 default: 11813 unallocated_encoding(s); 11814 return; 11815 } 11816 break; 11817 default: 11818 unallocated_encoding(s); 11819 return; 11820 } 11821 if (!feature) { 11822 unallocated_encoding(s); 11823 return; 11824 } 11825 if (!fp_access_check(s)) { 11826 return; 11827 } 11828 11829 switch (opcode) { 11830 case 0x0: /* SQRDMLAH (vector) */ 11831 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 11832 return; 11833 11834 case 0x1: /* SQRDMLSH (vector) */ 11835 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 11836 return; 11837 11838 case 0x2: /* SDOT / UDOT */ 11839 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 11840 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 11841 return; 11842 11843 case 0x3: /* USDOT */ 11844 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 11845 return; 11846 11847 case 0x04: /* SMMLA, UMMLA */ 11848 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 11849 u ? gen_helper_gvec_ummla_b 11850 : gen_helper_gvec_smmla_b); 11851 return; 11852 case 0x05: /* USMMLA */ 11853 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 11854 return; 11855 11856 case 0x8: /* FCMLA, #0 */ 11857 case 0x9: /* FCMLA, #90 */ 11858 case 0xa: /* FCMLA, #180 */ 11859 case 0xb: /* FCMLA, #270 */ 11860 rot = extract32(opcode, 0, 2); 11861 switch (size) { 11862 case 1: 11863 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 11864 gen_helper_gvec_fcmlah); 11865 break; 11866 case 2: 11867 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11868 gen_helper_gvec_fcmlas); 11869 break; 11870 case 3: 11871 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 11872 gen_helper_gvec_fcmlad); 11873 break; 11874 default: 11875 g_assert_not_reached(); 11876 } 11877 return; 11878 11879 case 0xc: /* FCADD, #90 */ 11880 case 0xe: /* FCADD, #270 */ 11881 rot = extract32(opcode, 1, 1); 11882 switch (size) { 11883 case 1: 11884 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11885 gen_helper_gvec_fcaddh); 11886 break; 11887 case 2: 11888 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11889 gen_helper_gvec_fcadds); 11890 break; 11891 case 3: 11892 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 11893 gen_helper_gvec_fcaddd); 11894 break; 11895 default: 11896 g_assert_not_reached(); 11897 } 11898 return; 11899 11900 case 0xd: /* BFMMLA */ 11901 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 11902 return; 11903 case 0xf: 11904 switch (size) { 11905 case 1: /* BFDOT */ 11906 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 11907 break; 11908 case 3: /* BFMLAL{B,T} */ 11909 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 11910 gen_helper_gvec_bfmlal); 11911 break; 11912 default: 11913 g_assert_not_reached(); 11914 } 11915 return; 11916 11917 default: 11918 g_assert_not_reached(); 11919 } 11920 } 11921 11922 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 11923 int size, int rn, int rd) 11924 { 11925 /* Handle 2-reg-misc ops which are widening (so each size element 11926 * in the source becomes a 2*size element in the destination. 11927 * The only instruction like this is FCVTL. 11928 */ 11929 int pass; 11930 11931 if (size == 3) { 11932 /* 32 -> 64 bit fp conversion */ 11933 TCGv_i64 tcg_res[2]; 11934 int srcelt = is_q ? 2 : 0; 11935 11936 for (pass = 0; pass < 2; pass++) { 11937 TCGv_i32 tcg_op = tcg_temp_new_i32(); 11938 tcg_res[pass] = tcg_temp_new_i64(); 11939 11940 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 11941 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); 11942 } 11943 for (pass = 0; pass < 2; pass++) { 11944 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11945 } 11946 } else { 11947 /* 16 -> 32 bit fp conversion */ 11948 int srcelt = is_q ? 4 : 0; 11949 TCGv_i32 tcg_res[4]; 11950 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 11951 TCGv_i32 ahp = get_ahp_flag(); 11952 11953 for (pass = 0; pass < 4; pass++) { 11954 tcg_res[pass] = tcg_temp_new_i32(); 11955 11956 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 11957 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 11958 fpst, ahp); 11959 } 11960 for (pass = 0; pass < 4; pass++) { 11961 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11962 } 11963 } 11964 } 11965 11966 static void handle_rev(DisasContext *s, int opcode, bool u, 11967 bool is_q, int size, int rn, int rd) 11968 { 11969 int op = (opcode << 1) | u; 11970 int opsz = op + size; 11971 int grp_size = 3 - opsz; 11972 int dsize = is_q ? 128 : 64; 11973 int i; 11974 11975 if (opsz >= 3) { 11976 unallocated_encoding(s); 11977 return; 11978 } 11979 11980 if (!fp_access_check(s)) { 11981 return; 11982 } 11983 11984 if (size == 0) { 11985 /* Special case bytes, use bswap op on each group of elements */ 11986 int groups = dsize / (8 << grp_size); 11987 11988 for (i = 0; i < groups; i++) { 11989 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 11990 11991 read_vec_element(s, tcg_tmp, rn, i, grp_size); 11992 switch (grp_size) { 11993 case MO_16: 11994 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11995 break; 11996 case MO_32: 11997 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 11998 break; 11999 case MO_64: 12000 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 12001 break; 12002 default: 12003 g_assert_not_reached(); 12004 } 12005 write_vec_element(s, tcg_tmp, rd, i, grp_size); 12006 } 12007 clear_vec_high(s, is_q, rd); 12008 } else { 12009 int revmask = (1 << grp_size) - 1; 12010 int esize = 8 << size; 12011 int elements = dsize / esize; 12012 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 12013 TCGv_i64 tcg_rd[2]; 12014 12015 for (i = 0; i < 2; i++) { 12016 tcg_rd[i] = tcg_temp_new_i64(); 12017 tcg_gen_movi_i64(tcg_rd[i], 0); 12018 } 12019 12020 for (i = 0; i < elements; i++) { 12021 int e_rev = (i & 0xf) ^ revmask; 12022 int w = (e_rev * esize) / 64; 12023 int o = (e_rev * esize) % 64; 12024 12025 read_vec_element(s, tcg_rn, rn, i, size); 12026 tcg_gen_deposit_i64(tcg_rd[w], tcg_rd[w], tcg_rn, o, esize); 12027 } 12028 12029 for (i = 0; i < 2; i++) { 12030 write_vec_element(s, tcg_rd[i], rd, i, MO_64); 12031 } 12032 clear_vec_high(s, true, rd); 12033 } 12034 } 12035 12036 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 12037 bool is_q, int size, int rn, int rd) 12038 { 12039 /* Implement the pairwise operations from 2-misc: 12040 * SADDLP, UADDLP, SADALP, UADALP. 12041 * These all add pairs of elements in the input to produce a 12042 * double-width result element in the output (possibly accumulating). 12043 */ 12044 bool accum = (opcode == 0x6); 12045 int maxpass = is_q ? 2 : 1; 12046 int pass; 12047 TCGv_i64 tcg_res[2]; 12048 12049 if (size == 2) { 12050 /* 32 + 32 -> 64 op */ 12051 MemOp memop = size + (u ? 0 : MO_SIGN); 12052 12053 for (pass = 0; pass < maxpass; pass++) { 12054 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 12055 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 12056 12057 tcg_res[pass] = tcg_temp_new_i64(); 12058 12059 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 12060 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 12061 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 12062 if (accum) { 12063 read_vec_element(s, tcg_op1, rd, pass, MO_64); 12064 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 12065 } 12066 } 12067 } else { 12068 for (pass = 0; pass < maxpass; pass++) { 12069 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12070 NeonGenOne64OpFn *genfn; 12071 static NeonGenOne64OpFn * const fns[2][2] = { 12072 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 12073 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 12074 }; 12075 12076 genfn = fns[size][u]; 12077 12078 tcg_res[pass] = tcg_temp_new_i64(); 12079 12080 read_vec_element(s, tcg_op, rn, pass, MO_64); 12081 genfn(tcg_res[pass], tcg_op); 12082 12083 if (accum) { 12084 read_vec_element(s, tcg_op, rd, pass, MO_64); 12085 if (size == 0) { 12086 gen_helper_neon_addl_u16(tcg_res[pass], 12087 tcg_res[pass], tcg_op); 12088 } else { 12089 gen_helper_neon_addl_u32(tcg_res[pass], 12090 tcg_res[pass], tcg_op); 12091 } 12092 } 12093 } 12094 } 12095 if (!is_q) { 12096 tcg_res[1] = tcg_constant_i64(0); 12097 } 12098 for (pass = 0; pass < 2; pass++) { 12099 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12100 } 12101 } 12102 12103 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 12104 { 12105 /* Implement SHLL and SHLL2 */ 12106 int pass; 12107 int part = is_q ? 2 : 0; 12108 TCGv_i64 tcg_res[2]; 12109 12110 for (pass = 0; pass < 2; pass++) { 12111 static NeonGenWidenFn * const widenfns[3] = { 12112 gen_helper_neon_widen_u8, 12113 gen_helper_neon_widen_u16, 12114 tcg_gen_extu_i32_i64, 12115 }; 12116 NeonGenWidenFn *widenfn = widenfns[size]; 12117 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12118 12119 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 12120 tcg_res[pass] = tcg_temp_new_i64(); 12121 widenfn(tcg_res[pass], tcg_op); 12122 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 12123 } 12124 12125 for (pass = 0; pass < 2; pass++) { 12126 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12127 } 12128 } 12129 12130 /* AdvSIMD two reg misc 12131 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 12132 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12133 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 12134 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12135 */ 12136 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 12137 { 12138 int size = extract32(insn, 22, 2); 12139 int opcode = extract32(insn, 12, 5); 12140 bool u = extract32(insn, 29, 1); 12141 bool is_q = extract32(insn, 30, 1); 12142 int rn = extract32(insn, 5, 5); 12143 int rd = extract32(insn, 0, 5); 12144 bool need_fpstatus = false; 12145 int rmode = -1; 12146 TCGv_i32 tcg_rmode; 12147 TCGv_ptr tcg_fpstatus; 12148 12149 switch (opcode) { 12150 case 0x0: /* REV64, REV32 */ 12151 case 0x1: /* REV16 */ 12152 handle_rev(s, opcode, u, is_q, size, rn, rd); 12153 return; 12154 case 0x5: /* CNT, NOT, RBIT */ 12155 if (u && size == 0) { 12156 /* NOT */ 12157 break; 12158 } else if (u && size == 1) { 12159 /* RBIT */ 12160 break; 12161 } else if (!u && size == 0) { 12162 /* CNT */ 12163 break; 12164 } 12165 unallocated_encoding(s); 12166 return; 12167 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 12168 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 12169 if (size == 3) { 12170 unallocated_encoding(s); 12171 return; 12172 } 12173 if (!fp_access_check(s)) { 12174 return; 12175 } 12176 12177 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 12178 return; 12179 case 0x4: /* CLS, CLZ */ 12180 if (size == 3) { 12181 unallocated_encoding(s); 12182 return; 12183 } 12184 break; 12185 case 0x2: /* SADDLP, UADDLP */ 12186 case 0x6: /* SADALP, UADALP */ 12187 if (size == 3) { 12188 unallocated_encoding(s); 12189 return; 12190 } 12191 if (!fp_access_check(s)) { 12192 return; 12193 } 12194 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 12195 return; 12196 case 0x13: /* SHLL, SHLL2 */ 12197 if (u == 0 || size == 3) { 12198 unallocated_encoding(s); 12199 return; 12200 } 12201 if (!fp_access_check(s)) { 12202 return; 12203 } 12204 handle_shll(s, is_q, size, rn, rd); 12205 return; 12206 case 0xa: /* CMLT */ 12207 if (u == 1) { 12208 unallocated_encoding(s); 12209 return; 12210 } 12211 /* fall through */ 12212 case 0x8: /* CMGT, CMGE */ 12213 case 0x9: /* CMEQ, CMLE */ 12214 case 0xb: /* ABS, NEG */ 12215 if (size == 3 && !is_q) { 12216 unallocated_encoding(s); 12217 return; 12218 } 12219 break; 12220 case 0x3: /* SUQADD, USQADD */ 12221 if (size == 3 && !is_q) { 12222 unallocated_encoding(s); 12223 return; 12224 } 12225 if (!fp_access_check(s)) { 12226 return; 12227 } 12228 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 12229 return; 12230 case 0x7: /* SQABS, SQNEG */ 12231 if (size == 3 && !is_q) { 12232 unallocated_encoding(s); 12233 return; 12234 } 12235 break; 12236 case 0xc ... 0xf: 12237 case 0x16 ... 0x1f: 12238 { 12239 /* Floating point: U, size[1] and opcode indicate operation; 12240 * size[0] indicates single or double precision. 12241 */ 12242 int is_double = extract32(size, 0, 1); 12243 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 12244 size = is_double ? 3 : 2; 12245 switch (opcode) { 12246 case 0x2f: /* FABS */ 12247 case 0x6f: /* FNEG */ 12248 if (size == 3 && !is_q) { 12249 unallocated_encoding(s); 12250 return; 12251 } 12252 break; 12253 case 0x1d: /* SCVTF */ 12254 case 0x5d: /* UCVTF */ 12255 { 12256 bool is_signed = (opcode == 0x1d) ? true : false; 12257 int elements = is_double ? 2 : is_q ? 4 : 2; 12258 if (is_double && !is_q) { 12259 unallocated_encoding(s); 12260 return; 12261 } 12262 if (!fp_access_check(s)) { 12263 return; 12264 } 12265 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 12266 return; 12267 } 12268 case 0x2c: /* FCMGT (zero) */ 12269 case 0x2d: /* FCMEQ (zero) */ 12270 case 0x2e: /* FCMLT (zero) */ 12271 case 0x6c: /* FCMGE (zero) */ 12272 case 0x6d: /* FCMLE (zero) */ 12273 if (size == 3 && !is_q) { 12274 unallocated_encoding(s); 12275 return; 12276 } 12277 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 12278 return; 12279 case 0x7f: /* FSQRT */ 12280 if (size == 3 && !is_q) { 12281 unallocated_encoding(s); 12282 return; 12283 } 12284 break; 12285 case 0x1a: /* FCVTNS */ 12286 case 0x1b: /* FCVTMS */ 12287 case 0x3a: /* FCVTPS */ 12288 case 0x3b: /* FCVTZS */ 12289 case 0x5a: /* FCVTNU */ 12290 case 0x5b: /* FCVTMU */ 12291 case 0x7a: /* FCVTPU */ 12292 case 0x7b: /* FCVTZU */ 12293 need_fpstatus = true; 12294 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12295 if (size == 3 && !is_q) { 12296 unallocated_encoding(s); 12297 return; 12298 } 12299 break; 12300 case 0x5c: /* FCVTAU */ 12301 case 0x1c: /* FCVTAS */ 12302 need_fpstatus = true; 12303 rmode = FPROUNDING_TIEAWAY; 12304 if (size == 3 && !is_q) { 12305 unallocated_encoding(s); 12306 return; 12307 } 12308 break; 12309 case 0x3c: /* URECPE */ 12310 if (size == 3) { 12311 unallocated_encoding(s); 12312 return; 12313 } 12314 /* fall through */ 12315 case 0x3d: /* FRECPE */ 12316 case 0x7d: /* FRSQRTE */ 12317 if (size == 3 && !is_q) { 12318 unallocated_encoding(s); 12319 return; 12320 } 12321 if (!fp_access_check(s)) { 12322 return; 12323 } 12324 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 12325 return; 12326 case 0x56: /* FCVTXN, FCVTXN2 */ 12327 if (size == 2) { 12328 unallocated_encoding(s); 12329 return; 12330 } 12331 /* fall through */ 12332 case 0x16: /* FCVTN, FCVTN2 */ 12333 /* handle_2misc_narrow does a 2*size -> size operation, but these 12334 * instructions encode the source size rather than dest size. 12335 */ 12336 if (!fp_access_check(s)) { 12337 return; 12338 } 12339 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12340 return; 12341 case 0x36: /* BFCVTN, BFCVTN2 */ 12342 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 12343 unallocated_encoding(s); 12344 return; 12345 } 12346 if (!fp_access_check(s)) { 12347 return; 12348 } 12349 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12350 return; 12351 case 0x17: /* FCVTL, FCVTL2 */ 12352 if (!fp_access_check(s)) { 12353 return; 12354 } 12355 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 12356 return; 12357 case 0x18: /* FRINTN */ 12358 case 0x19: /* FRINTM */ 12359 case 0x38: /* FRINTP */ 12360 case 0x39: /* FRINTZ */ 12361 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12362 /* fall through */ 12363 case 0x59: /* FRINTX */ 12364 case 0x79: /* FRINTI */ 12365 need_fpstatus = true; 12366 if (size == 3 && !is_q) { 12367 unallocated_encoding(s); 12368 return; 12369 } 12370 break; 12371 case 0x58: /* FRINTA */ 12372 rmode = FPROUNDING_TIEAWAY; 12373 need_fpstatus = true; 12374 if (size == 3 && !is_q) { 12375 unallocated_encoding(s); 12376 return; 12377 } 12378 break; 12379 case 0x7c: /* URSQRTE */ 12380 if (size == 3) { 12381 unallocated_encoding(s); 12382 return; 12383 } 12384 break; 12385 case 0x1e: /* FRINT32Z */ 12386 case 0x1f: /* FRINT64Z */ 12387 rmode = FPROUNDING_ZERO; 12388 /* fall through */ 12389 case 0x5e: /* FRINT32X */ 12390 case 0x5f: /* FRINT64X */ 12391 need_fpstatus = true; 12392 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12393 unallocated_encoding(s); 12394 return; 12395 } 12396 break; 12397 default: 12398 unallocated_encoding(s); 12399 return; 12400 } 12401 break; 12402 } 12403 default: 12404 unallocated_encoding(s); 12405 return; 12406 } 12407 12408 if (!fp_access_check(s)) { 12409 return; 12410 } 12411 12412 if (need_fpstatus || rmode >= 0) { 12413 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12414 } else { 12415 tcg_fpstatus = NULL; 12416 } 12417 if (rmode >= 0) { 12418 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12419 } else { 12420 tcg_rmode = NULL; 12421 } 12422 12423 switch (opcode) { 12424 case 0x5: 12425 if (u && size == 0) { /* NOT */ 12426 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12427 return; 12428 } 12429 break; 12430 case 0x8: /* CMGT, CMGE */ 12431 if (u) { 12432 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12433 } else { 12434 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12435 } 12436 return; 12437 case 0x9: /* CMEQ, CMLE */ 12438 if (u) { 12439 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12440 } else { 12441 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12442 } 12443 return; 12444 case 0xa: /* CMLT */ 12445 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12446 return; 12447 case 0xb: 12448 if (u) { /* ABS, NEG */ 12449 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12450 } else { 12451 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12452 } 12453 return; 12454 } 12455 12456 if (size == 3) { 12457 /* All 64-bit element operations can be shared with scalar 2misc */ 12458 int pass; 12459 12460 /* Coverity claims (size == 3 && !is_q) has been eliminated 12461 * from all paths leading to here. 12462 */ 12463 tcg_debug_assert(is_q); 12464 for (pass = 0; pass < 2; pass++) { 12465 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12466 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12467 12468 read_vec_element(s, tcg_op, rn, pass, MO_64); 12469 12470 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12471 tcg_rmode, tcg_fpstatus); 12472 12473 write_vec_element(s, tcg_res, rd, pass, MO_64); 12474 } 12475 } else { 12476 int pass; 12477 12478 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12479 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12480 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12481 12482 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12483 12484 if (size == 2) { 12485 /* Special cases for 32 bit elements */ 12486 switch (opcode) { 12487 case 0x4: /* CLS */ 12488 if (u) { 12489 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12490 } else { 12491 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12492 } 12493 break; 12494 case 0x7: /* SQABS, SQNEG */ 12495 if (u) { 12496 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); 12497 } else { 12498 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); 12499 } 12500 break; 12501 case 0x2f: /* FABS */ 12502 gen_helper_vfp_abss(tcg_res, tcg_op); 12503 break; 12504 case 0x6f: /* FNEG */ 12505 gen_helper_vfp_negs(tcg_res, tcg_op); 12506 break; 12507 case 0x7f: /* FSQRT */ 12508 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 12509 break; 12510 case 0x1a: /* FCVTNS */ 12511 case 0x1b: /* FCVTMS */ 12512 case 0x1c: /* FCVTAS */ 12513 case 0x3a: /* FCVTPS */ 12514 case 0x3b: /* FCVTZS */ 12515 gen_helper_vfp_tosls(tcg_res, tcg_op, 12516 tcg_constant_i32(0), tcg_fpstatus); 12517 break; 12518 case 0x5a: /* FCVTNU */ 12519 case 0x5b: /* FCVTMU */ 12520 case 0x5c: /* FCVTAU */ 12521 case 0x7a: /* FCVTPU */ 12522 case 0x7b: /* FCVTZU */ 12523 gen_helper_vfp_touls(tcg_res, tcg_op, 12524 tcg_constant_i32(0), tcg_fpstatus); 12525 break; 12526 case 0x18: /* FRINTN */ 12527 case 0x19: /* FRINTM */ 12528 case 0x38: /* FRINTP */ 12529 case 0x39: /* FRINTZ */ 12530 case 0x58: /* FRINTA */ 12531 case 0x79: /* FRINTI */ 12532 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12533 break; 12534 case 0x59: /* FRINTX */ 12535 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12536 break; 12537 case 0x7c: /* URSQRTE */ 12538 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12539 break; 12540 case 0x1e: /* FRINT32Z */ 12541 case 0x5e: /* FRINT32X */ 12542 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12543 break; 12544 case 0x1f: /* FRINT64Z */ 12545 case 0x5f: /* FRINT64X */ 12546 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12547 break; 12548 default: 12549 g_assert_not_reached(); 12550 } 12551 } else { 12552 /* Use helpers for 8 and 16 bit elements */ 12553 switch (opcode) { 12554 case 0x5: /* CNT, RBIT */ 12555 /* For these two insns size is part of the opcode specifier 12556 * (handled earlier); they always operate on byte elements. 12557 */ 12558 if (u) { 12559 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12560 } else { 12561 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 12562 } 12563 break; 12564 case 0x7: /* SQABS, SQNEG */ 12565 { 12566 NeonGenOneOpEnvFn *genfn; 12567 static NeonGenOneOpEnvFn * const fns[2][2] = { 12568 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 12569 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 12570 }; 12571 genfn = fns[size][u]; 12572 genfn(tcg_res, cpu_env, tcg_op); 12573 break; 12574 } 12575 case 0x4: /* CLS, CLZ */ 12576 if (u) { 12577 if (size == 0) { 12578 gen_helper_neon_clz_u8(tcg_res, tcg_op); 12579 } else { 12580 gen_helper_neon_clz_u16(tcg_res, tcg_op); 12581 } 12582 } else { 12583 if (size == 0) { 12584 gen_helper_neon_cls_s8(tcg_res, tcg_op); 12585 } else { 12586 gen_helper_neon_cls_s16(tcg_res, tcg_op); 12587 } 12588 } 12589 break; 12590 default: 12591 g_assert_not_reached(); 12592 } 12593 } 12594 12595 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 12596 } 12597 } 12598 clear_vec_high(s, is_q, rd); 12599 12600 if (tcg_rmode) { 12601 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12602 } 12603 } 12604 12605 /* AdvSIMD [scalar] two register miscellaneous (FP16) 12606 * 12607 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 12608 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12609 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 12610 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 12611 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 12612 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 12613 * 12614 * This actually covers two groups where scalar access is governed by 12615 * bit 28. A bunch of the instructions (float to integral) only exist 12616 * in the vector form and are un-allocated for the scalar decode. Also 12617 * in the scalar decode Q is always 1. 12618 */ 12619 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 12620 { 12621 int fpop, opcode, a, u; 12622 int rn, rd; 12623 bool is_q; 12624 bool is_scalar; 12625 bool only_in_vector = false; 12626 12627 int pass; 12628 TCGv_i32 tcg_rmode = NULL; 12629 TCGv_ptr tcg_fpstatus = NULL; 12630 bool need_fpst = true; 12631 int rmode = -1; 12632 12633 if (!dc_isar_feature(aa64_fp16, s)) { 12634 unallocated_encoding(s); 12635 return; 12636 } 12637 12638 rd = extract32(insn, 0, 5); 12639 rn = extract32(insn, 5, 5); 12640 12641 a = extract32(insn, 23, 1); 12642 u = extract32(insn, 29, 1); 12643 is_scalar = extract32(insn, 28, 1); 12644 is_q = extract32(insn, 30, 1); 12645 12646 opcode = extract32(insn, 12, 5); 12647 fpop = deposit32(opcode, 5, 1, a); 12648 fpop = deposit32(fpop, 6, 1, u); 12649 12650 switch (fpop) { 12651 case 0x1d: /* SCVTF */ 12652 case 0x5d: /* UCVTF */ 12653 { 12654 int elements; 12655 12656 if (is_scalar) { 12657 elements = 1; 12658 } else { 12659 elements = (is_q ? 8 : 4); 12660 } 12661 12662 if (!fp_access_check(s)) { 12663 return; 12664 } 12665 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 12666 return; 12667 } 12668 break; 12669 case 0x2c: /* FCMGT (zero) */ 12670 case 0x2d: /* FCMEQ (zero) */ 12671 case 0x2e: /* FCMLT (zero) */ 12672 case 0x6c: /* FCMGE (zero) */ 12673 case 0x6d: /* FCMLE (zero) */ 12674 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 12675 return; 12676 case 0x3d: /* FRECPE */ 12677 case 0x3f: /* FRECPX */ 12678 break; 12679 case 0x18: /* FRINTN */ 12680 only_in_vector = true; 12681 rmode = FPROUNDING_TIEEVEN; 12682 break; 12683 case 0x19: /* FRINTM */ 12684 only_in_vector = true; 12685 rmode = FPROUNDING_NEGINF; 12686 break; 12687 case 0x38: /* FRINTP */ 12688 only_in_vector = true; 12689 rmode = FPROUNDING_POSINF; 12690 break; 12691 case 0x39: /* FRINTZ */ 12692 only_in_vector = true; 12693 rmode = FPROUNDING_ZERO; 12694 break; 12695 case 0x58: /* FRINTA */ 12696 only_in_vector = true; 12697 rmode = FPROUNDING_TIEAWAY; 12698 break; 12699 case 0x59: /* FRINTX */ 12700 case 0x79: /* FRINTI */ 12701 only_in_vector = true; 12702 /* current rounding mode */ 12703 break; 12704 case 0x1a: /* FCVTNS */ 12705 rmode = FPROUNDING_TIEEVEN; 12706 break; 12707 case 0x1b: /* FCVTMS */ 12708 rmode = FPROUNDING_NEGINF; 12709 break; 12710 case 0x1c: /* FCVTAS */ 12711 rmode = FPROUNDING_TIEAWAY; 12712 break; 12713 case 0x3a: /* FCVTPS */ 12714 rmode = FPROUNDING_POSINF; 12715 break; 12716 case 0x3b: /* FCVTZS */ 12717 rmode = FPROUNDING_ZERO; 12718 break; 12719 case 0x5a: /* FCVTNU */ 12720 rmode = FPROUNDING_TIEEVEN; 12721 break; 12722 case 0x5b: /* FCVTMU */ 12723 rmode = FPROUNDING_NEGINF; 12724 break; 12725 case 0x5c: /* FCVTAU */ 12726 rmode = FPROUNDING_TIEAWAY; 12727 break; 12728 case 0x7a: /* FCVTPU */ 12729 rmode = FPROUNDING_POSINF; 12730 break; 12731 case 0x7b: /* FCVTZU */ 12732 rmode = FPROUNDING_ZERO; 12733 break; 12734 case 0x2f: /* FABS */ 12735 case 0x6f: /* FNEG */ 12736 need_fpst = false; 12737 break; 12738 case 0x7d: /* FRSQRTE */ 12739 case 0x7f: /* FSQRT (vector) */ 12740 break; 12741 default: 12742 unallocated_encoding(s); 12743 return; 12744 } 12745 12746 12747 /* Check additional constraints for the scalar encoding */ 12748 if (is_scalar) { 12749 if (!is_q) { 12750 unallocated_encoding(s); 12751 return; 12752 } 12753 /* FRINTxx is only in the vector form */ 12754 if (only_in_vector) { 12755 unallocated_encoding(s); 12756 return; 12757 } 12758 } 12759 12760 if (!fp_access_check(s)) { 12761 return; 12762 } 12763 12764 if (rmode >= 0 || need_fpst) { 12765 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 12766 } 12767 12768 if (rmode >= 0) { 12769 tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus); 12770 } 12771 12772 if (is_scalar) { 12773 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 12774 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12775 12776 switch (fpop) { 12777 case 0x1a: /* FCVTNS */ 12778 case 0x1b: /* FCVTMS */ 12779 case 0x1c: /* FCVTAS */ 12780 case 0x3a: /* FCVTPS */ 12781 case 0x3b: /* FCVTZS */ 12782 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12783 break; 12784 case 0x3d: /* FRECPE */ 12785 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12786 break; 12787 case 0x3f: /* FRECPX */ 12788 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 12789 break; 12790 case 0x5a: /* FCVTNU */ 12791 case 0x5b: /* FCVTMU */ 12792 case 0x5c: /* FCVTAU */ 12793 case 0x7a: /* FCVTPU */ 12794 case 0x7b: /* FCVTZU */ 12795 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12796 break; 12797 case 0x6f: /* FNEG */ 12798 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12799 break; 12800 case 0x7d: /* FRSQRTE */ 12801 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12802 break; 12803 default: 12804 g_assert_not_reached(); 12805 } 12806 12807 /* limit any sign extension going on */ 12808 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 12809 write_fp_sreg(s, rd, tcg_res); 12810 } else { 12811 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 12812 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12813 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12814 12815 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 12816 12817 switch (fpop) { 12818 case 0x1a: /* FCVTNS */ 12819 case 0x1b: /* FCVTMS */ 12820 case 0x1c: /* FCVTAS */ 12821 case 0x3a: /* FCVTPS */ 12822 case 0x3b: /* FCVTZS */ 12823 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 12824 break; 12825 case 0x3d: /* FRECPE */ 12826 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 12827 break; 12828 case 0x5a: /* FCVTNU */ 12829 case 0x5b: /* FCVTMU */ 12830 case 0x5c: /* FCVTAU */ 12831 case 0x7a: /* FCVTPU */ 12832 case 0x7b: /* FCVTZU */ 12833 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 12834 break; 12835 case 0x18: /* FRINTN */ 12836 case 0x19: /* FRINTM */ 12837 case 0x38: /* FRINTP */ 12838 case 0x39: /* FRINTZ */ 12839 case 0x58: /* FRINTA */ 12840 case 0x79: /* FRINTI */ 12841 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 12842 break; 12843 case 0x59: /* FRINTX */ 12844 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 12845 break; 12846 case 0x2f: /* FABS */ 12847 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 12848 break; 12849 case 0x6f: /* FNEG */ 12850 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 12851 break; 12852 case 0x7d: /* FRSQRTE */ 12853 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 12854 break; 12855 case 0x7f: /* FSQRT */ 12856 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 12857 break; 12858 default: 12859 g_assert_not_reached(); 12860 } 12861 12862 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12863 } 12864 12865 clear_vec_high(s, is_q, rd); 12866 } 12867 12868 if (tcg_rmode) { 12869 gen_restore_rmode(tcg_rmode, tcg_fpstatus); 12870 } 12871 } 12872 12873 /* AdvSIMD scalar x indexed element 12874 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12875 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12876 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12877 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 12878 * AdvSIMD vector x indexed element 12879 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 12880 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12881 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 12882 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 12883 */ 12884 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 12885 { 12886 /* This encoding has two kinds of instruction: 12887 * normal, where we perform elt x idxelt => elt for each 12888 * element in the vector 12889 * long, where we perform elt x idxelt and generate a result of 12890 * double the width of the input element 12891 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 12892 */ 12893 bool is_scalar = extract32(insn, 28, 1); 12894 bool is_q = extract32(insn, 30, 1); 12895 bool u = extract32(insn, 29, 1); 12896 int size = extract32(insn, 22, 2); 12897 int l = extract32(insn, 21, 1); 12898 int m = extract32(insn, 20, 1); 12899 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 12900 int rm = extract32(insn, 16, 4); 12901 int opcode = extract32(insn, 12, 4); 12902 int h = extract32(insn, 11, 1); 12903 int rn = extract32(insn, 5, 5); 12904 int rd = extract32(insn, 0, 5); 12905 bool is_long = false; 12906 int is_fp = 0; 12907 bool is_fp16 = false; 12908 int index; 12909 TCGv_ptr fpst; 12910 12911 switch (16 * u + opcode) { 12912 case 0x08: /* MUL */ 12913 case 0x10: /* MLA */ 12914 case 0x14: /* MLS */ 12915 if (is_scalar) { 12916 unallocated_encoding(s); 12917 return; 12918 } 12919 break; 12920 case 0x02: /* SMLAL, SMLAL2 */ 12921 case 0x12: /* UMLAL, UMLAL2 */ 12922 case 0x06: /* SMLSL, SMLSL2 */ 12923 case 0x16: /* UMLSL, UMLSL2 */ 12924 case 0x0a: /* SMULL, SMULL2 */ 12925 case 0x1a: /* UMULL, UMULL2 */ 12926 if (is_scalar) { 12927 unallocated_encoding(s); 12928 return; 12929 } 12930 is_long = true; 12931 break; 12932 case 0x03: /* SQDMLAL, SQDMLAL2 */ 12933 case 0x07: /* SQDMLSL, SQDMLSL2 */ 12934 case 0x0b: /* SQDMULL, SQDMULL2 */ 12935 is_long = true; 12936 break; 12937 case 0x0c: /* SQDMULH */ 12938 case 0x0d: /* SQRDMULH */ 12939 break; 12940 case 0x01: /* FMLA */ 12941 case 0x05: /* FMLS */ 12942 case 0x09: /* FMUL */ 12943 case 0x19: /* FMULX */ 12944 is_fp = 1; 12945 break; 12946 case 0x1d: /* SQRDMLAH */ 12947 case 0x1f: /* SQRDMLSH */ 12948 if (!dc_isar_feature(aa64_rdm, s)) { 12949 unallocated_encoding(s); 12950 return; 12951 } 12952 break; 12953 case 0x0e: /* SDOT */ 12954 case 0x1e: /* UDOT */ 12955 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 12956 unallocated_encoding(s); 12957 return; 12958 } 12959 break; 12960 case 0x0f: 12961 switch (size) { 12962 case 0: /* SUDOT */ 12963 case 2: /* USDOT */ 12964 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 12965 unallocated_encoding(s); 12966 return; 12967 } 12968 size = MO_32; 12969 break; 12970 case 1: /* BFDOT */ 12971 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12972 unallocated_encoding(s); 12973 return; 12974 } 12975 size = MO_32; 12976 break; 12977 case 3: /* BFMLAL{B,T} */ 12978 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 12979 unallocated_encoding(s); 12980 return; 12981 } 12982 /* can't set is_fp without other incorrect size checks */ 12983 size = MO_16; 12984 break; 12985 default: 12986 unallocated_encoding(s); 12987 return; 12988 } 12989 break; 12990 case 0x11: /* FCMLA #0 */ 12991 case 0x13: /* FCMLA #90 */ 12992 case 0x15: /* FCMLA #180 */ 12993 case 0x17: /* FCMLA #270 */ 12994 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 12995 unallocated_encoding(s); 12996 return; 12997 } 12998 is_fp = 2; 12999 break; 13000 case 0x00: /* FMLAL */ 13001 case 0x04: /* FMLSL */ 13002 case 0x18: /* FMLAL2 */ 13003 case 0x1c: /* FMLSL2 */ 13004 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 13005 unallocated_encoding(s); 13006 return; 13007 } 13008 size = MO_16; 13009 /* is_fp, but we pass cpu_env not fp_status. */ 13010 break; 13011 default: 13012 unallocated_encoding(s); 13013 return; 13014 } 13015 13016 switch (is_fp) { 13017 case 1: /* normal fp */ 13018 /* convert insn encoded size to MemOp size */ 13019 switch (size) { 13020 case 0: /* half-precision */ 13021 size = MO_16; 13022 is_fp16 = true; 13023 break; 13024 case MO_32: /* single precision */ 13025 case MO_64: /* double precision */ 13026 break; 13027 default: 13028 unallocated_encoding(s); 13029 return; 13030 } 13031 break; 13032 13033 case 2: /* complex fp */ 13034 /* Each indexable element is a complex pair. */ 13035 size += 1; 13036 switch (size) { 13037 case MO_32: 13038 if (h && !is_q) { 13039 unallocated_encoding(s); 13040 return; 13041 } 13042 is_fp16 = true; 13043 break; 13044 case MO_64: 13045 break; 13046 default: 13047 unallocated_encoding(s); 13048 return; 13049 } 13050 break; 13051 13052 default: /* integer */ 13053 switch (size) { 13054 case MO_8: 13055 case MO_64: 13056 unallocated_encoding(s); 13057 return; 13058 } 13059 break; 13060 } 13061 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 13062 unallocated_encoding(s); 13063 return; 13064 } 13065 13066 /* Given MemOp size, adjust register and indexing. */ 13067 switch (size) { 13068 case MO_16: 13069 index = h << 2 | l << 1 | m; 13070 break; 13071 case MO_32: 13072 index = h << 1 | l; 13073 rm |= m << 4; 13074 break; 13075 case MO_64: 13076 if (l || !is_q) { 13077 unallocated_encoding(s); 13078 return; 13079 } 13080 index = h; 13081 rm |= m << 4; 13082 break; 13083 default: 13084 g_assert_not_reached(); 13085 } 13086 13087 if (!fp_access_check(s)) { 13088 return; 13089 } 13090 13091 if (is_fp) { 13092 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 13093 } else { 13094 fpst = NULL; 13095 } 13096 13097 switch (16 * u + opcode) { 13098 case 0x0e: /* SDOT */ 13099 case 0x1e: /* UDOT */ 13100 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13101 u ? gen_helper_gvec_udot_idx_b 13102 : gen_helper_gvec_sdot_idx_b); 13103 return; 13104 case 0x0f: 13105 switch (extract32(insn, 22, 2)) { 13106 case 0: /* SUDOT */ 13107 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13108 gen_helper_gvec_sudot_idx_b); 13109 return; 13110 case 1: /* BFDOT */ 13111 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13112 gen_helper_gvec_bfdot_idx); 13113 return; 13114 case 2: /* USDOT */ 13115 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13116 gen_helper_gvec_usdot_idx_b); 13117 return; 13118 case 3: /* BFMLAL{B,T} */ 13119 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 13120 gen_helper_gvec_bfmlal_idx); 13121 return; 13122 } 13123 g_assert_not_reached(); 13124 case 0x11: /* FCMLA #0 */ 13125 case 0x13: /* FCMLA #90 */ 13126 case 0x15: /* FCMLA #180 */ 13127 case 0x17: /* FCMLA #270 */ 13128 { 13129 int rot = extract32(insn, 13, 2); 13130 int data = (index << 2) | rot; 13131 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 13132 vec_full_reg_offset(s, rn), 13133 vec_full_reg_offset(s, rm), 13134 vec_full_reg_offset(s, rd), fpst, 13135 is_q ? 16 : 8, vec_full_reg_size(s), data, 13136 size == MO_64 13137 ? gen_helper_gvec_fcmlas_idx 13138 : gen_helper_gvec_fcmlah_idx); 13139 } 13140 return; 13141 13142 case 0x00: /* FMLAL */ 13143 case 0x04: /* FMLSL */ 13144 case 0x18: /* FMLAL2 */ 13145 case 0x1c: /* FMLSL2 */ 13146 { 13147 int is_s = extract32(opcode, 2, 1); 13148 int is_2 = u; 13149 int data = (index << 2) | (is_2 << 1) | is_s; 13150 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 13151 vec_full_reg_offset(s, rn), 13152 vec_full_reg_offset(s, rm), cpu_env, 13153 is_q ? 16 : 8, vec_full_reg_size(s), 13154 data, gen_helper_gvec_fmlal_idx_a64); 13155 } 13156 return; 13157 13158 case 0x08: /* MUL */ 13159 if (!is_long && !is_scalar) { 13160 static gen_helper_gvec_3 * const fns[3] = { 13161 gen_helper_gvec_mul_idx_h, 13162 gen_helper_gvec_mul_idx_s, 13163 gen_helper_gvec_mul_idx_d, 13164 }; 13165 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 13166 vec_full_reg_offset(s, rn), 13167 vec_full_reg_offset(s, rm), 13168 is_q ? 16 : 8, vec_full_reg_size(s), 13169 index, fns[size - 1]); 13170 return; 13171 } 13172 break; 13173 13174 case 0x10: /* MLA */ 13175 if (!is_long && !is_scalar) { 13176 static gen_helper_gvec_4 * const fns[3] = { 13177 gen_helper_gvec_mla_idx_h, 13178 gen_helper_gvec_mla_idx_s, 13179 gen_helper_gvec_mla_idx_d, 13180 }; 13181 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13182 vec_full_reg_offset(s, rn), 13183 vec_full_reg_offset(s, rm), 13184 vec_full_reg_offset(s, rd), 13185 is_q ? 16 : 8, vec_full_reg_size(s), 13186 index, fns[size - 1]); 13187 return; 13188 } 13189 break; 13190 13191 case 0x14: /* MLS */ 13192 if (!is_long && !is_scalar) { 13193 static gen_helper_gvec_4 * const fns[3] = { 13194 gen_helper_gvec_mls_idx_h, 13195 gen_helper_gvec_mls_idx_s, 13196 gen_helper_gvec_mls_idx_d, 13197 }; 13198 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13199 vec_full_reg_offset(s, rn), 13200 vec_full_reg_offset(s, rm), 13201 vec_full_reg_offset(s, rd), 13202 is_q ? 16 : 8, vec_full_reg_size(s), 13203 index, fns[size - 1]); 13204 return; 13205 } 13206 break; 13207 } 13208 13209 if (size == 3) { 13210 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13211 int pass; 13212 13213 assert(is_fp && is_q && !is_long); 13214 13215 read_vec_element(s, tcg_idx, rm, index, MO_64); 13216 13217 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13218 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13219 TCGv_i64 tcg_res = tcg_temp_new_i64(); 13220 13221 read_vec_element(s, tcg_op, rn, pass, MO_64); 13222 13223 switch (16 * u + opcode) { 13224 case 0x05: /* FMLS */ 13225 /* As usual for ARM, separate negation for fused multiply-add */ 13226 gen_helper_vfp_negd(tcg_op, tcg_op); 13227 /* fall through */ 13228 case 0x01: /* FMLA */ 13229 read_vec_element(s, tcg_res, rd, pass, MO_64); 13230 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 13231 break; 13232 case 0x09: /* FMUL */ 13233 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 13234 break; 13235 case 0x19: /* FMULX */ 13236 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 13237 break; 13238 default: 13239 g_assert_not_reached(); 13240 } 13241 13242 write_vec_element(s, tcg_res, rd, pass, MO_64); 13243 } 13244 13245 clear_vec_high(s, !is_scalar, rd); 13246 } else if (!is_long) { 13247 /* 32 bit floating point, or 16 or 32 bit integer. 13248 * For the 16 bit scalar case we use the usual Neon helpers and 13249 * rely on the fact that 0 op 0 == 0 with no side effects. 13250 */ 13251 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13252 int pass, maxpasses; 13253 13254 if (is_scalar) { 13255 maxpasses = 1; 13256 } else { 13257 maxpasses = is_q ? 4 : 2; 13258 } 13259 13260 read_vec_element_i32(s, tcg_idx, rm, index, size); 13261 13262 if (size == 1 && !is_scalar) { 13263 /* The simplest way to handle the 16x16 indexed ops is to duplicate 13264 * the index into both halves of the 32 bit tcg_idx and then use 13265 * the usual Neon helpers. 13266 */ 13267 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13268 } 13269 13270 for (pass = 0; pass < maxpasses; pass++) { 13271 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13272 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13273 13274 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 13275 13276 switch (16 * u + opcode) { 13277 case 0x08: /* MUL */ 13278 case 0x10: /* MLA */ 13279 case 0x14: /* MLS */ 13280 { 13281 static NeonGenTwoOpFn * const fns[2][2] = { 13282 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 13283 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 13284 }; 13285 NeonGenTwoOpFn *genfn; 13286 bool is_sub = opcode == 0x4; 13287 13288 if (size == 1) { 13289 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 13290 } else { 13291 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 13292 } 13293 if (opcode == 0x8) { 13294 break; 13295 } 13296 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 13297 genfn = fns[size - 1][is_sub]; 13298 genfn(tcg_res, tcg_op, tcg_res); 13299 break; 13300 } 13301 case 0x05: /* FMLS */ 13302 case 0x01: /* FMLA */ 13303 read_vec_element_i32(s, tcg_res, rd, pass, 13304 is_scalar ? size : MO_32); 13305 switch (size) { 13306 case 1: 13307 if (opcode == 0x5) { 13308 /* As usual for ARM, separate negation for fused 13309 * multiply-add */ 13310 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 13311 } 13312 if (is_scalar) { 13313 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 13314 tcg_res, fpst); 13315 } else { 13316 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 13317 tcg_res, fpst); 13318 } 13319 break; 13320 case 2: 13321 if (opcode == 0x5) { 13322 /* As usual for ARM, separate negation for 13323 * fused multiply-add */ 13324 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 13325 } 13326 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 13327 tcg_res, fpst); 13328 break; 13329 default: 13330 g_assert_not_reached(); 13331 } 13332 break; 13333 case 0x09: /* FMUL */ 13334 switch (size) { 13335 case 1: 13336 if (is_scalar) { 13337 gen_helper_advsimd_mulh(tcg_res, tcg_op, 13338 tcg_idx, fpst); 13339 } else { 13340 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 13341 tcg_idx, fpst); 13342 } 13343 break; 13344 case 2: 13345 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 13346 break; 13347 default: 13348 g_assert_not_reached(); 13349 } 13350 break; 13351 case 0x19: /* FMULX */ 13352 switch (size) { 13353 case 1: 13354 if (is_scalar) { 13355 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 13356 tcg_idx, fpst); 13357 } else { 13358 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 13359 tcg_idx, fpst); 13360 } 13361 break; 13362 case 2: 13363 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 13364 break; 13365 default: 13366 g_assert_not_reached(); 13367 } 13368 break; 13369 case 0x0c: /* SQDMULH */ 13370 if (size == 1) { 13371 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, 13372 tcg_op, tcg_idx); 13373 } else { 13374 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, 13375 tcg_op, tcg_idx); 13376 } 13377 break; 13378 case 0x0d: /* SQRDMULH */ 13379 if (size == 1) { 13380 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, 13381 tcg_op, tcg_idx); 13382 } else { 13383 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, 13384 tcg_op, tcg_idx); 13385 } 13386 break; 13387 case 0x1d: /* SQRDMLAH */ 13388 read_vec_element_i32(s, tcg_res, rd, pass, 13389 is_scalar ? size : MO_32); 13390 if (size == 1) { 13391 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env, 13392 tcg_op, tcg_idx, tcg_res); 13393 } else { 13394 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env, 13395 tcg_op, tcg_idx, tcg_res); 13396 } 13397 break; 13398 case 0x1f: /* SQRDMLSH */ 13399 read_vec_element_i32(s, tcg_res, rd, pass, 13400 is_scalar ? size : MO_32); 13401 if (size == 1) { 13402 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env, 13403 tcg_op, tcg_idx, tcg_res); 13404 } else { 13405 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env, 13406 tcg_op, tcg_idx, tcg_res); 13407 } 13408 break; 13409 default: 13410 g_assert_not_reached(); 13411 } 13412 13413 if (is_scalar) { 13414 write_fp_sreg(s, rd, tcg_res); 13415 } else { 13416 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13417 } 13418 } 13419 13420 clear_vec_high(s, is_q, rd); 13421 } else { 13422 /* long ops: 16x16->32 or 32x32->64 */ 13423 TCGv_i64 tcg_res[2]; 13424 int pass; 13425 bool satop = extract32(opcode, 0, 1); 13426 MemOp memop = MO_32; 13427 13428 if (satop || !u) { 13429 memop |= MO_SIGN; 13430 } 13431 13432 if (size == 2) { 13433 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13434 13435 read_vec_element(s, tcg_idx, rm, index, memop); 13436 13437 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13438 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13439 TCGv_i64 tcg_passres; 13440 int passelt; 13441 13442 if (is_scalar) { 13443 passelt = 0; 13444 } else { 13445 passelt = pass + (is_q * 2); 13446 } 13447 13448 read_vec_element(s, tcg_op, rn, passelt, memop); 13449 13450 tcg_res[pass] = tcg_temp_new_i64(); 13451 13452 if (opcode == 0xa || opcode == 0xb) { 13453 /* Non-accumulating ops */ 13454 tcg_passres = tcg_res[pass]; 13455 } else { 13456 tcg_passres = tcg_temp_new_i64(); 13457 } 13458 13459 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13460 13461 if (satop) { 13462 /* saturating, doubling */ 13463 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 13464 tcg_passres, tcg_passres); 13465 } 13466 13467 if (opcode == 0xa || opcode == 0xb) { 13468 continue; 13469 } 13470 13471 /* Accumulating op: handle accumulate step */ 13472 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13473 13474 switch (opcode) { 13475 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13476 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13477 break; 13478 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13479 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13480 break; 13481 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13482 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13483 /* fall through */ 13484 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13485 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 13486 tcg_res[pass], 13487 tcg_passres); 13488 break; 13489 default: 13490 g_assert_not_reached(); 13491 } 13492 } 13493 13494 clear_vec_high(s, !is_scalar, rd); 13495 } else { 13496 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13497 13498 assert(size == 1); 13499 read_vec_element_i32(s, tcg_idx, rm, index, size); 13500 13501 if (!is_scalar) { 13502 /* The simplest way to handle the 16x16 indexed ops is to 13503 * duplicate the index into both halves of the 32 bit tcg_idx 13504 * and then use the usual Neon helpers. 13505 */ 13506 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13507 } 13508 13509 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13510 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13511 TCGv_i64 tcg_passres; 13512 13513 if (is_scalar) { 13514 read_vec_element_i32(s, tcg_op, rn, pass, size); 13515 } else { 13516 read_vec_element_i32(s, tcg_op, rn, 13517 pass + (is_q * 2), MO_32); 13518 } 13519 13520 tcg_res[pass] = tcg_temp_new_i64(); 13521 13522 if (opcode == 0xa || opcode == 0xb) { 13523 /* Non-accumulating ops */ 13524 tcg_passres = tcg_res[pass]; 13525 } else { 13526 tcg_passres = tcg_temp_new_i64(); 13527 } 13528 13529 if (memop & MO_SIGN) { 13530 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 13531 } else { 13532 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 13533 } 13534 if (satop) { 13535 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 13536 tcg_passres, tcg_passres); 13537 } 13538 13539 if (opcode == 0xa || opcode == 0xb) { 13540 continue; 13541 } 13542 13543 /* Accumulating op: handle accumulate step */ 13544 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13545 13546 switch (opcode) { 13547 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13548 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 13549 tcg_passres); 13550 break; 13551 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13552 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 13553 tcg_passres); 13554 break; 13555 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13556 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 13557 /* fall through */ 13558 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13559 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 13560 tcg_res[pass], 13561 tcg_passres); 13562 break; 13563 default: 13564 g_assert_not_reached(); 13565 } 13566 } 13567 13568 if (is_scalar) { 13569 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 13570 } 13571 } 13572 13573 if (is_scalar) { 13574 tcg_res[1] = tcg_constant_i64(0); 13575 } 13576 13577 for (pass = 0; pass < 2; pass++) { 13578 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13579 } 13580 } 13581 } 13582 13583 /* Crypto AES 13584 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13585 * +-----------------+------+-----------+--------+-----+------+------+ 13586 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13587 * +-----------------+------+-----------+--------+-----+------+------+ 13588 */ 13589 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 13590 { 13591 int size = extract32(insn, 22, 2); 13592 int opcode = extract32(insn, 12, 5); 13593 int rn = extract32(insn, 5, 5); 13594 int rd = extract32(insn, 0, 5); 13595 int decrypt; 13596 gen_helper_gvec_2 *genfn2 = NULL; 13597 gen_helper_gvec_3 *genfn3 = NULL; 13598 13599 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 13600 unallocated_encoding(s); 13601 return; 13602 } 13603 13604 switch (opcode) { 13605 case 0x4: /* AESE */ 13606 decrypt = 0; 13607 genfn3 = gen_helper_crypto_aese; 13608 break; 13609 case 0x6: /* AESMC */ 13610 decrypt = 0; 13611 genfn2 = gen_helper_crypto_aesmc; 13612 break; 13613 case 0x5: /* AESD */ 13614 decrypt = 1; 13615 genfn3 = gen_helper_crypto_aese; 13616 break; 13617 case 0x7: /* AESIMC */ 13618 decrypt = 1; 13619 genfn2 = gen_helper_crypto_aesmc; 13620 break; 13621 default: 13622 unallocated_encoding(s); 13623 return; 13624 } 13625 13626 if (!fp_access_check(s)) { 13627 return; 13628 } 13629 if (genfn2) { 13630 gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); 13631 } else { 13632 gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); 13633 } 13634 } 13635 13636 /* Crypto three-reg SHA 13637 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 13638 * +-----------------+------+---+------+---+--------+-----+------+------+ 13639 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 13640 * +-----------------+------+---+------+---+--------+-----+------+------+ 13641 */ 13642 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 13643 { 13644 int size = extract32(insn, 22, 2); 13645 int opcode = extract32(insn, 12, 3); 13646 int rm = extract32(insn, 16, 5); 13647 int rn = extract32(insn, 5, 5); 13648 int rd = extract32(insn, 0, 5); 13649 gen_helper_gvec_3 *genfn; 13650 bool feature; 13651 13652 if (size != 0) { 13653 unallocated_encoding(s); 13654 return; 13655 } 13656 13657 switch (opcode) { 13658 case 0: /* SHA1C */ 13659 genfn = gen_helper_crypto_sha1c; 13660 feature = dc_isar_feature(aa64_sha1, s); 13661 break; 13662 case 1: /* SHA1P */ 13663 genfn = gen_helper_crypto_sha1p; 13664 feature = dc_isar_feature(aa64_sha1, s); 13665 break; 13666 case 2: /* SHA1M */ 13667 genfn = gen_helper_crypto_sha1m; 13668 feature = dc_isar_feature(aa64_sha1, s); 13669 break; 13670 case 3: /* SHA1SU0 */ 13671 genfn = gen_helper_crypto_sha1su0; 13672 feature = dc_isar_feature(aa64_sha1, s); 13673 break; 13674 case 4: /* SHA256H */ 13675 genfn = gen_helper_crypto_sha256h; 13676 feature = dc_isar_feature(aa64_sha256, s); 13677 break; 13678 case 5: /* SHA256H2 */ 13679 genfn = gen_helper_crypto_sha256h2; 13680 feature = dc_isar_feature(aa64_sha256, s); 13681 break; 13682 case 6: /* SHA256SU1 */ 13683 genfn = gen_helper_crypto_sha256su1; 13684 feature = dc_isar_feature(aa64_sha256, s); 13685 break; 13686 default: 13687 unallocated_encoding(s); 13688 return; 13689 } 13690 13691 if (!feature) { 13692 unallocated_encoding(s); 13693 return; 13694 } 13695 13696 if (!fp_access_check(s)) { 13697 return; 13698 } 13699 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 13700 } 13701 13702 /* Crypto two-reg SHA 13703 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 13704 * +-----------------+------+-----------+--------+-----+------+------+ 13705 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 13706 * +-----------------+------+-----------+--------+-----+------+------+ 13707 */ 13708 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 13709 { 13710 int size = extract32(insn, 22, 2); 13711 int opcode = extract32(insn, 12, 5); 13712 int rn = extract32(insn, 5, 5); 13713 int rd = extract32(insn, 0, 5); 13714 gen_helper_gvec_2 *genfn; 13715 bool feature; 13716 13717 if (size != 0) { 13718 unallocated_encoding(s); 13719 return; 13720 } 13721 13722 switch (opcode) { 13723 case 0: /* SHA1H */ 13724 feature = dc_isar_feature(aa64_sha1, s); 13725 genfn = gen_helper_crypto_sha1h; 13726 break; 13727 case 1: /* SHA1SU1 */ 13728 feature = dc_isar_feature(aa64_sha1, s); 13729 genfn = gen_helper_crypto_sha1su1; 13730 break; 13731 case 2: /* SHA256SU0 */ 13732 feature = dc_isar_feature(aa64_sha256, s); 13733 genfn = gen_helper_crypto_sha256su0; 13734 break; 13735 default: 13736 unallocated_encoding(s); 13737 return; 13738 } 13739 13740 if (!feature) { 13741 unallocated_encoding(s); 13742 return; 13743 } 13744 13745 if (!fp_access_check(s)) { 13746 return; 13747 } 13748 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 13749 } 13750 13751 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 13752 { 13753 tcg_gen_rotli_i64(d, m, 1); 13754 tcg_gen_xor_i64(d, d, n); 13755 } 13756 13757 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 13758 { 13759 tcg_gen_rotli_vec(vece, d, m, 1); 13760 tcg_gen_xor_vec(vece, d, d, n); 13761 } 13762 13763 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 13764 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 13765 { 13766 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 13767 static const GVecGen3 op = { 13768 .fni8 = gen_rax1_i64, 13769 .fniv = gen_rax1_vec, 13770 .opt_opc = vecop_list, 13771 .fno = gen_helper_crypto_rax1, 13772 .vece = MO_64, 13773 }; 13774 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 13775 } 13776 13777 /* Crypto three-reg SHA512 13778 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 13779 * +-----------------------+------+---+---+-----+--------+------+------+ 13780 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 13781 * +-----------------------+------+---+---+-----+--------+------+------+ 13782 */ 13783 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 13784 { 13785 int opcode = extract32(insn, 10, 2); 13786 int o = extract32(insn, 14, 1); 13787 int rm = extract32(insn, 16, 5); 13788 int rn = extract32(insn, 5, 5); 13789 int rd = extract32(insn, 0, 5); 13790 bool feature; 13791 gen_helper_gvec_3 *oolfn = NULL; 13792 GVecGen3Fn *gvecfn = NULL; 13793 13794 if (o == 0) { 13795 switch (opcode) { 13796 case 0: /* SHA512H */ 13797 feature = dc_isar_feature(aa64_sha512, s); 13798 oolfn = gen_helper_crypto_sha512h; 13799 break; 13800 case 1: /* SHA512H2 */ 13801 feature = dc_isar_feature(aa64_sha512, s); 13802 oolfn = gen_helper_crypto_sha512h2; 13803 break; 13804 case 2: /* SHA512SU1 */ 13805 feature = dc_isar_feature(aa64_sha512, s); 13806 oolfn = gen_helper_crypto_sha512su1; 13807 break; 13808 case 3: /* RAX1 */ 13809 feature = dc_isar_feature(aa64_sha3, s); 13810 gvecfn = gen_gvec_rax1; 13811 break; 13812 default: 13813 g_assert_not_reached(); 13814 } 13815 } else { 13816 switch (opcode) { 13817 case 0: /* SM3PARTW1 */ 13818 feature = dc_isar_feature(aa64_sm3, s); 13819 oolfn = gen_helper_crypto_sm3partw1; 13820 break; 13821 case 1: /* SM3PARTW2 */ 13822 feature = dc_isar_feature(aa64_sm3, s); 13823 oolfn = gen_helper_crypto_sm3partw2; 13824 break; 13825 case 2: /* SM4EKEY */ 13826 feature = dc_isar_feature(aa64_sm4, s); 13827 oolfn = gen_helper_crypto_sm4ekey; 13828 break; 13829 default: 13830 unallocated_encoding(s); 13831 return; 13832 } 13833 } 13834 13835 if (!feature) { 13836 unallocated_encoding(s); 13837 return; 13838 } 13839 13840 if (!fp_access_check(s)) { 13841 return; 13842 } 13843 13844 if (oolfn) { 13845 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 13846 } else { 13847 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 13848 } 13849 } 13850 13851 /* Crypto two-reg SHA512 13852 * 31 12 11 10 9 5 4 0 13853 * +-----------------------------------------+--------+------+------+ 13854 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 13855 * +-----------------------------------------+--------+------+------+ 13856 */ 13857 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 13858 { 13859 int opcode = extract32(insn, 10, 2); 13860 int rn = extract32(insn, 5, 5); 13861 int rd = extract32(insn, 0, 5); 13862 bool feature; 13863 13864 switch (opcode) { 13865 case 0: /* SHA512SU0 */ 13866 feature = dc_isar_feature(aa64_sha512, s); 13867 break; 13868 case 1: /* SM4E */ 13869 feature = dc_isar_feature(aa64_sm4, s); 13870 break; 13871 default: 13872 unallocated_encoding(s); 13873 return; 13874 } 13875 13876 if (!feature) { 13877 unallocated_encoding(s); 13878 return; 13879 } 13880 13881 if (!fp_access_check(s)) { 13882 return; 13883 } 13884 13885 switch (opcode) { 13886 case 0: /* SHA512SU0 */ 13887 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 13888 break; 13889 case 1: /* SM4E */ 13890 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 13891 break; 13892 default: 13893 g_assert_not_reached(); 13894 } 13895 } 13896 13897 /* Crypto four-register 13898 * 31 23 22 21 20 16 15 14 10 9 5 4 0 13899 * +-------------------+-----+------+---+------+------+------+ 13900 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 13901 * +-------------------+-----+------+---+------+------+------+ 13902 */ 13903 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 13904 { 13905 int op0 = extract32(insn, 21, 2); 13906 int rm = extract32(insn, 16, 5); 13907 int ra = extract32(insn, 10, 5); 13908 int rn = extract32(insn, 5, 5); 13909 int rd = extract32(insn, 0, 5); 13910 bool feature; 13911 13912 switch (op0) { 13913 case 0: /* EOR3 */ 13914 case 1: /* BCAX */ 13915 feature = dc_isar_feature(aa64_sha3, s); 13916 break; 13917 case 2: /* SM3SS1 */ 13918 feature = dc_isar_feature(aa64_sm3, s); 13919 break; 13920 default: 13921 unallocated_encoding(s); 13922 return; 13923 } 13924 13925 if (!feature) { 13926 unallocated_encoding(s); 13927 return; 13928 } 13929 13930 if (!fp_access_check(s)) { 13931 return; 13932 } 13933 13934 if (op0 < 2) { 13935 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 13936 int pass; 13937 13938 tcg_op1 = tcg_temp_new_i64(); 13939 tcg_op2 = tcg_temp_new_i64(); 13940 tcg_op3 = tcg_temp_new_i64(); 13941 tcg_res[0] = tcg_temp_new_i64(); 13942 tcg_res[1] = tcg_temp_new_i64(); 13943 13944 for (pass = 0; pass < 2; pass++) { 13945 read_vec_element(s, tcg_op1, rn, pass, MO_64); 13946 read_vec_element(s, tcg_op2, rm, pass, MO_64); 13947 read_vec_element(s, tcg_op3, ra, pass, MO_64); 13948 13949 if (op0 == 0) { 13950 /* EOR3 */ 13951 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 13952 } else { 13953 /* BCAX */ 13954 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 13955 } 13956 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 13957 } 13958 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 13959 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 13960 } else { 13961 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 13962 13963 tcg_op1 = tcg_temp_new_i32(); 13964 tcg_op2 = tcg_temp_new_i32(); 13965 tcg_op3 = tcg_temp_new_i32(); 13966 tcg_res = tcg_temp_new_i32(); 13967 tcg_zero = tcg_constant_i32(0); 13968 13969 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 13970 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 13971 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 13972 13973 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 13974 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 13975 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 13976 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 13977 13978 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 13979 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 13980 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 13981 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 13982 } 13983 } 13984 13985 /* Crypto XAR 13986 * 31 21 20 16 15 10 9 5 4 0 13987 * +-----------------------+------+--------+------+------+ 13988 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 13989 * +-----------------------+------+--------+------+------+ 13990 */ 13991 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 13992 { 13993 int rm = extract32(insn, 16, 5); 13994 int imm6 = extract32(insn, 10, 6); 13995 int rn = extract32(insn, 5, 5); 13996 int rd = extract32(insn, 0, 5); 13997 13998 if (!dc_isar_feature(aa64_sha3, s)) { 13999 unallocated_encoding(s); 14000 return; 14001 } 14002 14003 if (!fp_access_check(s)) { 14004 return; 14005 } 14006 14007 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 14008 vec_full_reg_offset(s, rn), 14009 vec_full_reg_offset(s, rm), imm6, 16, 14010 vec_full_reg_size(s)); 14011 } 14012 14013 /* Crypto three-reg imm2 14014 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14015 * +-----------------------+------+-----+------+--------+------+------+ 14016 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 14017 * +-----------------------+------+-----+------+--------+------+------+ 14018 */ 14019 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 14020 { 14021 static gen_helper_gvec_3 * const fns[4] = { 14022 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 14023 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 14024 }; 14025 int opcode = extract32(insn, 10, 2); 14026 int imm2 = extract32(insn, 12, 2); 14027 int rm = extract32(insn, 16, 5); 14028 int rn = extract32(insn, 5, 5); 14029 int rd = extract32(insn, 0, 5); 14030 14031 if (!dc_isar_feature(aa64_sm3, s)) { 14032 unallocated_encoding(s); 14033 return; 14034 } 14035 14036 if (!fp_access_check(s)) { 14037 return; 14038 } 14039 14040 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 14041 } 14042 14043 /* C3.6 Data processing - SIMD, inc Crypto 14044 * 14045 * As the decode gets a little complex we are using a table based 14046 * approach for this part of the decode. 14047 */ 14048 static const AArch64DecodeTable data_proc_simd[] = { 14049 /* pattern , mask , fn */ 14050 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 14051 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 14052 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 14053 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 14054 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 14055 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 14056 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 14057 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 14058 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 14059 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 14060 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 14061 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 14062 { 0x2e000000, 0xbf208400, disas_simd_ext }, 14063 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 14064 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 14065 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 14066 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 14067 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 14068 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 14069 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 14070 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 14071 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 14072 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 14073 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 14074 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 14075 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 14076 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 14077 { 0xce800000, 0xffe00000, disas_crypto_xar }, 14078 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 14079 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 14080 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 14081 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 14082 { 0x00000000, 0x00000000, NULL } 14083 }; 14084 14085 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 14086 { 14087 /* Note that this is called with all non-FP cases from 14088 * table C3-6 so it must UNDEF for entries not specifically 14089 * allocated to instructions in that table. 14090 */ 14091 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 14092 if (fn) { 14093 fn(s, insn); 14094 } else { 14095 unallocated_encoding(s); 14096 } 14097 } 14098 14099 /* C3.6 Data processing - SIMD and floating point */ 14100 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 14101 { 14102 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 14103 disas_data_proc_fp(s, insn); 14104 } else { 14105 /* SIMD, including crypto */ 14106 disas_data_proc_simd(s, insn); 14107 } 14108 } 14109 14110 static bool trans_OK(DisasContext *s, arg_OK *a) 14111 { 14112 return true; 14113 } 14114 14115 static bool trans_FAIL(DisasContext *s, arg_OK *a) 14116 { 14117 s->is_nonstreaming = true; 14118 return true; 14119 } 14120 14121 /** 14122 * is_guarded_page: 14123 * @env: The cpu environment 14124 * @s: The DisasContext 14125 * 14126 * Return true if the page is guarded. 14127 */ 14128 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 14129 { 14130 uint64_t addr = s->base.pc_first; 14131 #ifdef CONFIG_USER_ONLY 14132 return page_get_flags(addr) & PAGE_BTI; 14133 #else 14134 CPUTLBEntryFull *full; 14135 void *host; 14136 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 14137 int flags; 14138 14139 /* 14140 * We test this immediately after reading an insn, which means 14141 * that the TLB entry must be present and valid, and thus this 14142 * access will never raise an exception. 14143 */ 14144 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 14145 false, &host, &full, 0); 14146 assert(!(flags & TLB_INVALID_MASK)); 14147 14148 return full->guarded; 14149 #endif 14150 } 14151 14152 /** 14153 * btype_destination_ok: 14154 * @insn: The instruction at the branch destination 14155 * @bt: SCTLR_ELx.BT 14156 * @btype: PSTATE.BTYPE, and is non-zero 14157 * 14158 * On a guarded page, there are a limited number of insns 14159 * that may be present at the branch target: 14160 * - branch target identifiers, 14161 * - paciasp, pacibsp, 14162 * - BRK insn 14163 * - HLT insn 14164 * Anything else causes a Branch Target Exception. 14165 * 14166 * Return true if the branch is compatible, false to raise BTITRAP. 14167 */ 14168 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 14169 { 14170 if ((insn & 0xfffff01fu) == 0xd503201fu) { 14171 /* HINT space */ 14172 switch (extract32(insn, 5, 7)) { 14173 case 0b011001: /* PACIASP */ 14174 case 0b011011: /* PACIBSP */ 14175 /* 14176 * If SCTLR_ELx.BT, then PACI*SP are not compatible 14177 * with btype == 3. Otherwise all btype are ok. 14178 */ 14179 return !bt || btype != 3; 14180 case 0b100000: /* BTI */ 14181 /* Not compatible with any btype. */ 14182 return false; 14183 case 0b100010: /* BTI c */ 14184 /* Not compatible with btype == 3 */ 14185 return btype != 3; 14186 case 0b100100: /* BTI j */ 14187 /* Not compatible with btype == 2 */ 14188 return btype != 2; 14189 case 0b100110: /* BTI jc */ 14190 /* Compatible with any btype. */ 14191 return true; 14192 } 14193 } else { 14194 switch (insn & 0xffe0001fu) { 14195 case 0xd4200000u: /* BRK */ 14196 case 0xd4400000u: /* HLT */ 14197 /* Give priority to the breakpoint exception. */ 14198 return true; 14199 } 14200 } 14201 return false; 14202 } 14203 14204 /* C3.1 A64 instruction index by encoding */ 14205 static void disas_a64_legacy(DisasContext *s, uint32_t insn) 14206 { 14207 switch (extract32(insn, 25, 4)) { 14208 case 0x0: 14209 if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { 14210 unallocated_encoding(s); 14211 } 14212 break; 14213 case 0x1: case 0x3: /* UNALLOCATED */ 14214 unallocated_encoding(s); 14215 break; 14216 case 0x2: 14217 if (!disas_sve(s, insn)) { 14218 unallocated_encoding(s); 14219 } 14220 break; 14221 case 0x8: case 0x9: /* Data processing - immediate */ 14222 disas_data_proc_imm(s, insn); 14223 break; 14224 case 0xa: case 0xb: /* Branch, exception generation and system insns */ 14225 disas_b_exc_sys(s, insn); 14226 break; 14227 case 0x4: 14228 case 0x6: 14229 case 0xc: 14230 case 0xe: /* Loads and stores */ 14231 disas_ldst(s, insn); 14232 break; 14233 case 0x5: 14234 case 0xd: /* Data processing - register */ 14235 disas_data_proc_reg(s, insn); 14236 break; 14237 case 0x7: 14238 case 0xf: /* Data processing - SIMD and floating point */ 14239 disas_data_proc_simd_fp(s, insn); 14240 break; 14241 default: 14242 assert(FALSE); /* all 15 cases should be handled above */ 14243 break; 14244 } 14245 } 14246 14247 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 14248 CPUState *cpu) 14249 { 14250 DisasContext *dc = container_of(dcbase, DisasContext, base); 14251 CPUARMState *env = cpu->env_ptr; 14252 ARMCPU *arm_cpu = env_archcpu(env); 14253 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 14254 int bound, core_mmu_idx; 14255 14256 dc->isar = &arm_cpu->isar; 14257 dc->condjmp = 0; 14258 dc->pc_save = dc->base.pc_first; 14259 dc->aarch64 = true; 14260 dc->thumb = false; 14261 dc->sctlr_b = 0; 14262 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 14263 dc->condexec_mask = 0; 14264 dc->condexec_cond = 0; 14265 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 14266 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 14267 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 14268 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 14269 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 14270 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 14271 #if !defined(CONFIG_USER_ONLY) 14272 dc->user = (dc->current_el == 0); 14273 #endif 14274 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 14275 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 14276 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 14277 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 14278 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 14279 dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET); 14280 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 14281 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 14282 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 14283 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 14284 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 14285 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 14286 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 14287 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 14288 dc->ata = EX_TBFLAG_A64(tb_flags, ATA); 14289 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 14290 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 14291 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 14292 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 14293 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 14294 dc->vec_len = 0; 14295 dc->vec_stride = 0; 14296 dc->cp_regs = arm_cpu->cp_regs; 14297 dc->features = env->features; 14298 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 14299 14300 #ifdef CONFIG_USER_ONLY 14301 /* In sve_probe_page, we assume TBI is enabled. */ 14302 tcg_debug_assert(dc->tbid & 1); 14303 #endif 14304 14305 /* Single step state. The code-generation logic here is: 14306 * SS_ACTIVE == 0: 14307 * generate code with no special handling for single-stepping (except 14308 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 14309 * this happens anyway because those changes are all system register or 14310 * PSTATE writes). 14311 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 14312 * emit code for one insn 14313 * emit code to clear PSTATE.SS 14314 * emit code to generate software step exception for completed step 14315 * end TB (as usual for having generated an exception) 14316 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 14317 * emit code to generate a software step exception 14318 * end the TB 14319 */ 14320 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 14321 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 14322 dc->is_ldex = false; 14323 14324 /* Bound the number of insns to execute to those left on the page. */ 14325 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 14326 14327 /* If architectural single step active, limit to 1. */ 14328 if (dc->ss_active) { 14329 bound = 1; 14330 } 14331 dc->base.max_insns = MIN(dc->base.max_insns, bound); 14332 } 14333 14334 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 14335 { 14336 } 14337 14338 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 14339 { 14340 DisasContext *dc = container_of(dcbase, DisasContext, base); 14341 target_ulong pc_arg = dc->base.pc_next; 14342 14343 if (tb_cflags(dcbase->tb) & CF_PCREL) { 14344 pc_arg &= ~TARGET_PAGE_MASK; 14345 } 14346 tcg_gen_insn_start(pc_arg, 0, 0); 14347 dc->insn_start = tcg_last_op(); 14348 } 14349 14350 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 14351 { 14352 DisasContext *s = container_of(dcbase, DisasContext, base); 14353 CPUARMState *env = cpu->env_ptr; 14354 uint64_t pc = s->base.pc_next; 14355 uint32_t insn; 14356 14357 /* Singlestep exceptions have the highest priority. */ 14358 if (s->ss_active && !s->pstate_ss) { 14359 /* Singlestep state is Active-pending. 14360 * If we're in this state at the start of a TB then either 14361 * a) we just took an exception to an EL which is being debugged 14362 * and this is the first insn in the exception handler 14363 * b) debug exceptions were masked and we just unmasked them 14364 * without changing EL (eg by clearing PSTATE.D) 14365 * In either case we're going to take a swstep exception in the 14366 * "did not step an insn" case, and so the syndrome ISV and EX 14367 * bits should be zero. 14368 */ 14369 assert(s->base.num_insns == 1); 14370 gen_swstep_exception(s, 0, 0); 14371 s->base.is_jmp = DISAS_NORETURN; 14372 s->base.pc_next = pc + 4; 14373 return; 14374 } 14375 14376 if (pc & 3) { 14377 /* 14378 * PC alignment fault. This has priority over the instruction abort 14379 * that we would receive from a translation fault via arm_ldl_code. 14380 * This should only be possible after an indirect branch, at the 14381 * start of the TB. 14382 */ 14383 assert(s->base.num_insns == 1); 14384 gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc)); 14385 s->base.is_jmp = DISAS_NORETURN; 14386 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 14387 return; 14388 } 14389 14390 s->pc_curr = pc; 14391 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14392 s->insn = insn; 14393 s->base.pc_next = pc + 4; 14394 14395 s->fp_access_checked = false; 14396 s->sve_access_checked = false; 14397 14398 if (s->pstate_il) { 14399 /* 14400 * Illegal execution state. This has priority over BTI 14401 * exceptions, but comes after instruction abort exceptions. 14402 */ 14403 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14404 return; 14405 } 14406 14407 if (dc_isar_feature(aa64_bti, s)) { 14408 if (s->base.num_insns == 1) { 14409 /* 14410 * At the first insn of the TB, compute s->guarded_page. 14411 * We delayed computing this until successfully reading 14412 * the first insn of the TB, above. This (mostly) ensures 14413 * that the softmmu tlb entry has been populated, and the 14414 * page table GP bit is available. 14415 * 14416 * Note that we need to compute this even if btype == 0, 14417 * because this value is used for BR instructions later 14418 * where ENV is not available. 14419 */ 14420 s->guarded_page = is_guarded_page(env, s); 14421 14422 /* First insn can have btype set to non-zero. */ 14423 tcg_debug_assert(s->btype >= 0); 14424 14425 /* 14426 * Note that the Branch Target Exception has fairly high 14427 * priority -- below debugging exceptions but above most 14428 * everything else. This allows us to handle this now 14429 * instead of waiting until the insn is otherwise decoded. 14430 */ 14431 if (s->btype != 0 14432 && s->guarded_page 14433 && !btype_destination_ok(insn, s->bt, s->btype)) { 14434 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14435 return; 14436 } 14437 } else { 14438 /* Not the first insn: btype must be 0. */ 14439 tcg_debug_assert(s->btype == 0); 14440 } 14441 } 14442 14443 s->is_nonstreaming = false; 14444 if (s->sme_trap_nonstreaming) { 14445 disas_sme_fa64(s, insn); 14446 } 14447 14448 14449 if (!disas_a64(s, insn)) { 14450 disas_a64_legacy(s, insn); 14451 } 14452 14453 /* 14454 * After execution of most insns, btype is reset to 0. 14455 * Note that we set btype == -1 when the insn sets btype. 14456 */ 14457 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14458 reset_btype(s); 14459 } 14460 } 14461 14462 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14463 { 14464 DisasContext *dc = container_of(dcbase, DisasContext, base); 14465 14466 if (unlikely(dc->ss_active)) { 14467 /* Note that this means single stepping WFI doesn't halt the CPU. 14468 * For conditional branch insns this is harmless unreachable code as 14469 * gen_goto_tb() has already handled emitting the debug exception 14470 * (and thus a tb-jump is not possible when singlestepping). 14471 */ 14472 switch (dc->base.is_jmp) { 14473 default: 14474 gen_a64_update_pc(dc, 4); 14475 /* fall through */ 14476 case DISAS_EXIT: 14477 case DISAS_JUMP: 14478 gen_step_complete_exception(dc); 14479 break; 14480 case DISAS_NORETURN: 14481 break; 14482 } 14483 } else { 14484 switch (dc->base.is_jmp) { 14485 case DISAS_NEXT: 14486 case DISAS_TOO_MANY: 14487 gen_goto_tb(dc, 1, 4); 14488 break; 14489 default: 14490 case DISAS_UPDATE_EXIT: 14491 gen_a64_update_pc(dc, 4); 14492 /* fall through */ 14493 case DISAS_EXIT: 14494 tcg_gen_exit_tb(NULL, 0); 14495 break; 14496 case DISAS_UPDATE_NOCHAIN: 14497 gen_a64_update_pc(dc, 4); 14498 /* fall through */ 14499 case DISAS_JUMP: 14500 tcg_gen_lookup_and_goto_ptr(); 14501 break; 14502 case DISAS_NORETURN: 14503 case DISAS_SWI: 14504 break; 14505 case DISAS_WFE: 14506 gen_a64_update_pc(dc, 4); 14507 gen_helper_wfe(cpu_env); 14508 break; 14509 case DISAS_YIELD: 14510 gen_a64_update_pc(dc, 4); 14511 gen_helper_yield(cpu_env); 14512 break; 14513 case DISAS_WFI: 14514 /* 14515 * This is a special case because we don't want to just halt 14516 * the CPU if trying to debug across a WFI. 14517 */ 14518 gen_a64_update_pc(dc, 4); 14519 gen_helper_wfi(cpu_env, tcg_constant_i32(4)); 14520 /* 14521 * The helper doesn't necessarily throw an exception, but we 14522 * must go back to the main loop to check for interrupts anyway. 14523 */ 14524 tcg_gen_exit_tb(NULL, 0); 14525 break; 14526 } 14527 } 14528 } 14529 14530 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 14531 CPUState *cpu, FILE *logfile) 14532 { 14533 DisasContext *dc = container_of(dcbase, DisasContext, base); 14534 14535 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 14536 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 14537 } 14538 14539 const TranslatorOps aarch64_translator_ops = { 14540 .init_disas_context = aarch64_tr_init_disas_context, 14541 .tb_start = aarch64_tr_tb_start, 14542 .insn_start = aarch64_tr_insn_start, 14543 .translate_insn = aarch64_tr_translate_insn, 14544 .tb_stop = aarch64_tr_tb_stop, 14545 .disas_log = aarch64_tr_disas_log, 14546 }; 14547