1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "qemu/log.h" 26 #include "arm_ldst.h" 27 #include "translate.h" 28 #include "internals.h" 29 #include "qemu/host-utils.h" 30 #include "semihosting/semihost.h" 31 #include "exec/gen-icount.h" 32 #include "exec/helper-proto.h" 33 #include "exec/helper-gen.h" 34 #include "exec/log.h" 35 #include "cpregs.h" 36 #include "translate-a64.h" 37 #include "qemu/atomic128.h" 38 39 static TCGv_i64 cpu_X[32]; 40 static TCGv_i64 cpu_pc; 41 42 /* Load/store exclusive handling */ 43 static TCGv_i64 cpu_exclusive_high; 44 45 static const char *regnames[] = { 46 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 47 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 48 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 49 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 50 }; 51 52 enum a64_shift_type { 53 A64_SHIFT_TYPE_LSL = 0, 54 A64_SHIFT_TYPE_LSR = 1, 55 A64_SHIFT_TYPE_ASR = 2, 56 A64_SHIFT_TYPE_ROR = 3 57 }; 58 59 /* Table based decoder typedefs - used when the relevant bits for decode 60 * are too awkwardly scattered across the instruction (eg SIMD). 61 */ 62 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 63 64 typedef struct AArch64DecodeTable { 65 uint32_t pattern; 66 uint32_t mask; 67 AArch64DecodeFn *disas_fn; 68 } AArch64DecodeTable; 69 70 /* initialize TCG globals. */ 71 void a64_translate_init(void) 72 { 73 int i; 74 75 cpu_pc = tcg_global_mem_new_i64(cpu_env, 76 offsetof(CPUARMState, pc), 77 "pc"); 78 for (i = 0; i < 32; i++) { 79 cpu_X[i] = tcg_global_mem_new_i64(cpu_env, 80 offsetof(CPUARMState, xregs[i]), 81 regnames[i]); 82 } 83 84 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env, 85 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 86 } 87 88 /* 89 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns 90 */ 91 static int get_a64_user_mem_index(DisasContext *s) 92 { 93 /* 94 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 95 * which is the usual mmu_idx for this cpu state. 96 */ 97 ARMMMUIdx useridx = s->mmu_idx; 98 99 if (s->unpriv) { 100 /* 101 * We have pre-computed the condition for AccType_UNPRIV. 102 * Therefore we should never get here with a mmu_idx for 103 * which we do not know the corresponding user mmu_idx. 104 */ 105 switch (useridx) { 106 case ARMMMUIdx_E10_1: 107 case ARMMMUIdx_E10_1_PAN: 108 useridx = ARMMMUIdx_E10_0; 109 break; 110 case ARMMMUIdx_E20_2: 111 case ARMMMUIdx_E20_2_PAN: 112 useridx = ARMMMUIdx_E20_0; 113 break; 114 default: 115 g_assert_not_reached(); 116 } 117 } 118 return arm_to_core_mmu_idx(useridx); 119 } 120 121 static void set_btype_raw(int val) 122 { 123 tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, 124 offsetof(CPUARMState, btype)); 125 } 126 127 static void set_btype(DisasContext *s, int val) 128 { 129 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 130 tcg_debug_assert(val >= 1 && val <= 3); 131 set_btype_raw(val); 132 s->btype = -1; 133 } 134 135 static void reset_btype(DisasContext *s) 136 { 137 if (s->btype != 0) { 138 set_btype_raw(0); 139 s->btype = 0; 140 } 141 } 142 143 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 144 { 145 assert(s->pc_save != -1); 146 if (tb_cflags(s->base.tb) & CF_PCREL) { 147 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 148 } else { 149 tcg_gen_movi_i64(dest, s->pc_curr + diff); 150 } 151 } 152 153 void gen_a64_update_pc(DisasContext *s, target_long diff) 154 { 155 gen_pc_plus_diff(s, cpu_pc, diff); 156 s->pc_save = s->pc_curr + diff; 157 } 158 159 /* 160 * Handle Top Byte Ignore (TBI) bits. 161 * 162 * If address tagging is enabled via the TCR TBI bits: 163 * + for EL2 and EL3 there is only one TBI bit, and if it is set 164 * then the address is zero-extended, clearing bits [63:56] 165 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 166 * and TBI1 controls addressses with bit 55 == 1. 167 * If the appropriate TBI bit is set for the address then 168 * the address is sign-extended from bit 55 into bits [63:56] 169 * 170 * Here We have concatenated TBI{1,0} into tbi. 171 */ 172 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 173 TCGv_i64 src, int tbi) 174 { 175 if (tbi == 0) { 176 /* Load unmodified address */ 177 tcg_gen_mov_i64(dst, src); 178 } else if (!regime_has_2_ranges(s->mmu_idx)) { 179 /* Force tag byte to all zero */ 180 tcg_gen_extract_i64(dst, src, 0, 56); 181 } else { 182 /* Sign-extend from bit 55. */ 183 tcg_gen_sextract_i64(dst, src, 0, 56); 184 185 switch (tbi) { 186 case 1: 187 /* tbi0 but !tbi1: only use the extension if positive */ 188 tcg_gen_and_i64(dst, dst, src); 189 break; 190 case 2: 191 /* !tbi0 but tbi1: only use the extension if negative */ 192 tcg_gen_or_i64(dst, dst, src); 193 break; 194 case 3: 195 /* tbi0 and tbi1: always use the extension */ 196 break; 197 default: 198 g_assert_not_reached(); 199 } 200 } 201 } 202 203 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 204 { 205 /* 206 * If address tagging is enabled for instructions via the TCR TBI bits, 207 * then loading an address into the PC will clear out any tag. 208 */ 209 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 210 s->pc_save = -1; 211 } 212 213 /* 214 * Handle MTE and/or TBI. 215 * 216 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 217 * for the tag to be present in the FAR_ELx register. But for user-only 218 * mode we do not have a TLB with which to implement this, so we must 219 * remove the top byte now. 220 * 221 * Always return a fresh temporary that we can increment independently 222 * of the write-back address. 223 */ 224 225 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 226 { 227 TCGv_i64 clean = new_tmp_a64(s); 228 #ifdef CONFIG_USER_ONLY 229 gen_top_byte_ignore(s, clean, addr, s->tbid); 230 #else 231 tcg_gen_mov_i64(clean, addr); 232 #endif 233 return clean; 234 } 235 236 /* Insert a zero tag into src, with the result at dst. */ 237 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 238 { 239 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 240 } 241 242 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 243 MMUAccessType acc, int log2_size) 244 { 245 gen_helper_probe_access(cpu_env, ptr, 246 tcg_constant_i32(acc), 247 tcg_constant_i32(get_mem_index(s)), 248 tcg_constant_i32(1 << log2_size)); 249 } 250 251 /* 252 * For MTE, check a single logical or atomic access. This probes a single 253 * address, the exact one specified. The size and alignment of the access 254 * is not relevant to MTE, per se, but watchpoints do require the size, 255 * and we want to recognize those before making any other changes to state. 256 */ 257 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 258 bool is_write, bool tag_checked, 259 int log2_size, bool is_unpriv, 260 int core_idx) 261 { 262 if (tag_checked && s->mte_active[is_unpriv]) { 263 TCGv_i64 ret; 264 int desc = 0; 265 266 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 267 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 268 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 269 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 270 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); 271 272 ret = new_tmp_a64(s); 273 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 274 275 return ret; 276 } 277 return clean_data_tbi(s, addr); 278 } 279 280 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 281 bool tag_checked, int log2_size) 282 { 283 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, 284 false, get_mem_index(s)); 285 } 286 287 /* 288 * For MTE, check multiple logical sequential accesses. 289 */ 290 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 291 bool tag_checked, int size) 292 { 293 if (tag_checked && s->mte_active[0]) { 294 TCGv_i64 ret; 295 int desc = 0; 296 297 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 298 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 299 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 300 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 301 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); 302 303 ret = new_tmp_a64(s); 304 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 305 306 return ret; 307 } 308 return clean_data_tbi(s, addr); 309 } 310 311 typedef struct DisasCompare64 { 312 TCGCond cond; 313 TCGv_i64 value; 314 } DisasCompare64; 315 316 static void a64_test_cc(DisasCompare64 *c64, int cc) 317 { 318 DisasCompare c32; 319 320 arm_test_cc(&c32, cc); 321 322 /* Sign-extend the 32-bit value so that the GE/LT comparisons work 323 * properly. The NE/EQ comparisons are also fine with this choice. */ 324 c64->cond = c32.cond; 325 c64->value = tcg_temp_new_i64(); 326 tcg_gen_ext_i32_i64(c64->value, c32.value); 327 328 arm_free_cc(&c32); 329 } 330 331 static void a64_free_cc(DisasCompare64 *c64) 332 { 333 tcg_temp_free_i64(c64->value); 334 } 335 336 static void gen_rebuild_hflags(DisasContext *s) 337 { 338 gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el)); 339 } 340 341 static void gen_exception_internal(int excp) 342 { 343 assert(excp_is_internal(excp)); 344 gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp)); 345 } 346 347 static void gen_exception_internal_insn(DisasContext *s, int excp) 348 { 349 gen_a64_update_pc(s, 0); 350 gen_exception_internal(excp); 351 s->base.is_jmp = DISAS_NORETURN; 352 } 353 354 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 355 { 356 gen_a64_update_pc(s, 0); 357 gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome)); 358 s->base.is_jmp = DISAS_NORETURN; 359 } 360 361 static void gen_step_complete_exception(DisasContext *s) 362 { 363 /* We just completed step of an insn. Move from Active-not-pending 364 * to Active-pending, and then also take the swstep exception. 365 * This corresponds to making the (IMPDEF) choice to prioritize 366 * swstep exceptions over asynchronous exceptions taken to an exception 367 * level where debug is disabled. This choice has the advantage that 368 * we do not need to maintain internal state corresponding to the 369 * ISV/EX syndrome bits between completion of the step and generation 370 * of the exception, and our syndrome information is always correct. 371 */ 372 gen_ss_advance(s); 373 gen_swstep_exception(s, 1, s->is_ldex); 374 s->base.is_jmp = DISAS_NORETURN; 375 } 376 377 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 378 { 379 if (s->ss_active) { 380 return false; 381 } 382 return translator_use_goto_tb(&s->base, dest); 383 } 384 385 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 386 { 387 if (use_goto_tb(s, s->pc_curr + diff)) { 388 /* 389 * For pcrel, the pc must always be up-to-date on entry to 390 * the linked TB, so that it can use simple additions for all 391 * further adjustments. For !pcrel, the linked TB is compiled 392 * to know its full virtual address, so we can delay the 393 * update to pc to the unlinked path. A long chain of links 394 * can thus avoid many updates to the PC. 395 */ 396 if (tb_cflags(s->base.tb) & CF_PCREL) { 397 gen_a64_update_pc(s, diff); 398 tcg_gen_goto_tb(n); 399 } else { 400 tcg_gen_goto_tb(n); 401 gen_a64_update_pc(s, diff); 402 } 403 tcg_gen_exit_tb(s->base.tb, n); 404 s->base.is_jmp = DISAS_NORETURN; 405 } else { 406 gen_a64_update_pc(s, diff); 407 if (s->ss_active) { 408 gen_step_complete_exception(s); 409 } else { 410 tcg_gen_lookup_and_goto_ptr(); 411 s->base.is_jmp = DISAS_NORETURN; 412 } 413 } 414 } 415 416 static void init_tmp_a64_array(DisasContext *s) 417 { 418 #ifdef CONFIG_DEBUG_TCG 419 memset(s->tmp_a64, 0, sizeof(s->tmp_a64)); 420 #endif 421 s->tmp_a64_count = 0; 422 } 423 424 static void free_tmp_a64(DisasContext *s) 425 { 426 int i; 427 for (i = 0; i < s->tmp_a64_count; i++) { 428 tcg_temp_free_i64(s->tmp_a64[i]); 429 } 430 init_tmp_a64_array(s); 431 } 432 433 TCGv_i64 new_tmp_a64(DisasContext *s) 434 { 435 assert(s->tmp_a64_count < TMP_A64_MAX); 436 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64(); 437 } 438 439 TCGv_i64 new_tmp_a64_local(DisasContext *s) 440 { 441 assert(s->tmp_a64_count < TMP_A64_MAX); 442 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_local_new_i64(); 443 } 444 445 TCGv_i64 new_tmp_a64_zero(DisasContext *s) 446 { 447 TCGv_i64 t = new_tmp_a64(s); 448 tcg_gen_movi_i64(t, 0); 449 return t; 450 } 451 452 /* 453 * Register access functions 454 * 455 * These functions are used for directly accessing a register in where 456 * changes to the final register value are likely to be made. If you 457 * need to use a register for temporary calculation (e.g. index type 458 * operations) use the read_* form. 459 * 460 * B1.2.1 Register mappings 461 * 462 * In instruction register encoding 31 can refer to ZR (zero register) or 463 * the SP (stack pointer) depending on context. In QEMU's case we map SP 464 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 465 * This is the point of the _sp forms. 466 */ 467 TCGv_i64 cpu_reg(DisasContext *s, int reg) 468 { 469 if (reg == 31) { 470 return new_tmp_a64_zero(s); 471 } else { 472 return cpu_X[reg]; 473 } 474 } 475 476 /* register access for when 31 == SP */ 477 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 478 { 479 return cpu_X[reg]; 480 } 481 482 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 483 * representing the register contents. This TCGv is an auto-freed 484 * temporary so it need not be explicitly freed, and may be modified. 485 */ 486 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 487 { 488 TCGv_i64 v = new_tmp_a64(s); 489 if (reg != 31) { 490 if (sf) { 491 tcg_gen_mov_i64(v, cpu_X[reg]); 492 } else { 493 tcg_gen_ext32u_i64(v, cpu_X[reg]); 494 } 495 } else { 496 tcg_gen_movi_i64(v, 0); 497 } 498 return v; 499 } 500 501 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 502 { 503 TCGv_i64 v = new_tmp_a64(s); 504 if (sf) { 505 tcg_gen_mov_i64(v, cpu_X[reg]); 506 } else { 507 tcg_gen_ext32u_i64(v, cpu_X[reg]); 508 } 509 return v; 510 } 511 512 /* Return the offset into CPUARMState of a slice (from 513 * the least significant end) of FP register Qn (ie 514 * Dn, Sn, Hn or Bn). 515 * (Note that this is not the same mapping as for A32; see cpu.h) 516 */ 517 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 518 { 519 return vec_reg_offset(s, regno, 0, size); 520 } 521 522 /* Offset of the high half of the 128 bit vector Qn */ 523 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 524 { 525 return vec_reg_offset(s, regno, 1, MO_64); 526 } 527 528 /* Convenience accessors for reading and writing single and double 529 * FP registers. Writing clears the upper parts of the associated 530 * 128 bit vector register, as required by the architecture. 531 * Note that unlike the GP register accessors, the values returned 532 * by the read functions must be manually freed. 533 */ 534 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 535 { 536 TCGv_i64 v = tcg_temp_new_i64(); 537 538 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); 539 return v; 540 } 541 542 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 543 { 544 TCGv_i32 v = tcg_temp_new_i32(); 545 546 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); 547 return v; 548 } 549 550 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 551 { 552 TCGv_i32 v = tcg_temp_new_i32(); 553 554 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); 555 return v; 556 } 557 558 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 559 * If SVE is not enabled, then there are only 128 bits in the vector. 560 */ 561 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 562 { 563 unsigned ofs = fp_reg_offset(s, rd, MO_64); 564 unsigned vsz = vec_full_reg_size(s); 565 566 /* Nop move, with side effect of clearing the tail. */ 567 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 568 } 569 570 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 571 { 572 unsigned ofs = fp_reg_offset(s, reg, MO_64); 573 574 tcg_gen_st_i64(v, cpu_env, ofs); 575 clear_vec_high(s, false, reg); 576 } 577 578 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 579 { 580 TCGv_i64 tmp = tcg_temp_new_i64(); 581 582 tcg_gen_extu_i32_i64(tmp, v); 583 write_fp_dreg(s, reg, tmp); 584 tcg_temp_free_i64(tmp); 585 } 586 587 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 588 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 589 GVecGen2Fn *gvec_fn, int vece) 590 { 591 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 592 is_q ? 16 : 8, vec_full_reg_size(s)); 593 } 594 595 /* Expand a 2-operand + immediate AdvSIMD vector operation using 596 * an expander function. 597 */ 598 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 599 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 600 { 601 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 602 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 603 } 604 605 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 606 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 607 GVecGen3Fn *gvec_fn, int vece) 608 { 609 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 610 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 611 } 612 613 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 614 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 615 int rx, GVecGen4Fn *gvec_fn, int vece) 616 { 617 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 618 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 619 is_q ? 16 : 8, vec_full_reg_size(s)); 620 } 621 622 /* Expand a 2-operand operation using an out-of-line helper. */ 623 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 624 int rn, int data, gen_helper_gvec_2 *fn) 625 { 626 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 627 vec_full_reg_offset(s, rn), 628 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 629 } 630 631 /* Expand a 3-operand operation using an out-of-line helper. */ 632 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 633 int rn, int rm, int data, gen_helper_gvec_3 *fn) 634 { 635 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 636 vec_full_reg_offset(s, rn), 637 vec_full_reg_offset(s, rm), 638 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 639 } 640 641 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 642 * an out-of-line helper. 643 */ 644 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 645 int rm, bool is_fp16, int data, 646 gen_helper_gvec_3_ptr *fn) 647 { 648 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 649 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 650 vec_full_reg_offset(s, rn), 651 vec_full_reg_offset(s, rm), fpst, 652 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 653 tcg_temp_free_ptr(fpst); 654 } 655 656 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 657 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 658 int rm, gen_helper_gvec_3_ptr *fn) 659 { 660 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 661 662 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); 663 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 664 vec_full_reg_offset(s, rn), 665 vec_full_reg_offset(s, rm), qc_ptr, 666 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 667 tcg_temp_free_ptr(qc_ptr); 668 } 669 670 /* Expand a 4-operand operation using an out-of-line helper. */ 671 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 672 int rm, int ra, int data, gen_helper_gvec_4 *fn) 673 { 674 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 675 vec_full_reg_offset(s, rn), 676 vec_full_reg_offset(s, rm), 677 vec_full_reg_offset(s, ra), 678 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 679 } 680 681 /* 682 * Expand a 4-operand + fpstatus pointer + simd data value operation using 683 * an out-of-line helper. 684 */ 685 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 686 int rm, int ra, bool is_fp16, int data, 687 gen_helper_gvec_4_ptr *fn) 688 { 689 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 690 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 691 vec_full_reg_offset(s, rn), 692 vec_full_reg_offset(s, rm), 693 vec_full_reg_offset(s, ra), fpst, 694 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 695 tcg_temp_free_ptr(fpst); 696 } 697 698 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 699 * than the 32 bit equivalent. 700 */ 701 static inline void gen_set_NZ64(TCGv_i64 result) 702 { 703 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 704 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 705 } 706 707 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 708 static inline void gen_logic_CC(int sf, TCGv_i64 result) 709 { 710 if (sf) { 711 gen_set_NZ64(result); 712 } else { 713 tcg_gen_extrl_i64_i32(cpu_ZF, result); 714 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 715 } 716 tcg_gen_movi_i32(cpu_CF, 0); 717 tcg_gen_movi_i32(cpu_VF, 0); 718 } 719 720 /* dest = T0 + T1; compute C, N, V and Z flags */ 721 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 722 { 723 if (sf) { 724 TCGv_i64 result, flag, tmp; 725 result = tcg_temp_new_i64(); 726 flag = tcg_temp_new_i64(); 727 tmp = tcg_temp_new_i64(); 728 729 tcg_gen_movi_i64(tmp, 0); 730 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 731 732 tcg_gen_extrl_i64_i32(cpu_CF, flag); 733 734 gen_set_NZ64(result); 735 736 tcg_gen_xor_i64(flag, result, t0); 737 tcg_gen_xor_i64(tmp, t0, t1); 738 tcg_gen_andc_i64(flag, flag, tmp); 739 tcg_temp_free_i64(tmp); 740 tcg_gen_extrh_i64_i32(cpu_VF, flag); 741 742 tcg_gen_mov_i64(dest, result); 743 tcg_temp_free_i64(result); 744 tcg_temp_free_i64(flag); 745 } else { 746 /* 32 bit arithmetic */ 747 TCGv_i32 t0_32 = tcg_temp_new_i32(); 748 TCGv_i32 t1_32 = tcg_temp_new_i32(); 749 TCGv_i32 tmp = tcg_temp_new_i32(); 750 751 tcg_gen_movi_i32(tmp, 0); 752 tcg_gen_extrl_i64_i32(t0_32, t0); 753 tcg_gen_extrl_i64_i32(t1_32, t1); 754 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 755 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 756 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 757 tcg_gen_xor_i32(tmp, t0_32, t1_32); 758 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 759 tcg_gen_extu_i32_i64(dest, cpu_NF); 760 761 tcg_temp_free_i32(tmp); 762 tcg_temp_free_i32(t0_32); 763 tcg_temp_free_i32(t1_32); 764 } 765 } 766 767 /* dest = T0 - T1; compute C, N, V and Z flags */ 768 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 769 { 770 if (sf) { 771 /* 64 bit arithmetic */ 772 TCGv_i64 result, flag, tmp; 773 774 result = tcg_temp_new_i64(); 775 flag = tcg_temp_new_i64(); 776 tcg_gen_sub_i64(result, t0, t1); 777 778 gen_set_NZ64(result); 779 780 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 781 tcg_gen_extrl_i64_i32(cpu_CF, flag); 782 783 tcg_gen_xor_i64(flag, result, t0); 784 tmp = tcg_temp_new_i64(); 785 tcg_gen_xor_i64(tmp, t0, t1); 786 tcg_gen_and_i64(flag, flag, tmp); 787 tcg_temp_free_i64(tmp); 788 tcg_gen_extrh_i64_i32(cpu_VF, flag); 789 tcg_gen_mov_i64(dest, result); 790 tcg_temp_free_i64(flag); 791 tcg_temp_free_i64(result); 792 } else { 793 /* 32 bit arithmetic */ 794 TCGv_i32 t0_32 = tcg_temp_new_i32(); 795 TCGv_i32 t1_32 = tcg_temp_new_i32(); 796 TCGv_i32 tmp; 797 798 tcg_gen_extrl_i64_i32(t0_32, t0); 799 tcg_gen_extrl_i64_i32(t1_32, t1); 800 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 801 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 802 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 803 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 804 tmp = tcg_temp_new_i32(); 805 tcg_gen_xor_i32(tmp, t0_32, t1_32); 806 tcg_temp_free_i32(t0_32); 807 tcg_temp_free_i32(t1_32); 808 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 809 tcg_temp_free_i32(tmp); 810 tcg_gen_extu_i32_i64(dest, cpu_NF); 811 } 812 } 813 814 /* dest = T0 + T1 + CF; do not compute flags. */ 815 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 816 { 817 TCGv_i64 flag = tcg_temp_new_i64(); 818 tcg_gen_extu_i32_i64(flag, cpu_CF); 819 tcg_gen_add_i64(dest, t0, t1); 820 tcg_gen_add_i64(dest, dest, flag); 821 tcg_temp_free_i64(flag); 822 823 if (!sf) { 824 tcg_gen_ext32u_i64(dest, dest); 825 } 826 } 827 828 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 829 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 830 { 831 if (sf) { 832 TCGv_i64 result = tcg_temp_new_i64(); 833 TCGv_i64 cf_64 = tcg_temp_new_i64(); 834 TCGv_i64 vf_64 = tcg_temp_new_i64(); 835 TCGv_i64 tmp = tcg_temp_new_i64(); 836 TCGv_i64 zero = tcg_constant_i64(0); 837 838 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 839 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 840 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 841 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 842 gen_set_NZ64(result); 843 844 tcg_gen_xor_i64(vf_64, result, t0); 845 tcg_gen_xor_i64(tmp, t0, t1); 846 tcg_gen_andc_i64(vf_64, vf_64, tmp); 847 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 848 849 tcg_gen_mov_i64(dest, result); 850 851 tcg_temp_free_i64(tmp); 852 tcg_temp_free_i64(vf_64); 853 tcg_temp_free_i64(cf_64); 854 tcg_temp_free_i64(result); 855 } else { 856 TCGv_i32 t0_32 = tcg_temp_new_i32(); 857 TCGv_i32 t1_32 = tcg_temp_new_i32(); 858 TCGv_i32 tmp = tcg_temp_new_i32(); 859 TCGv_i32 zero = tcg_constant_i32(0); 860 861 tcg_gen_extrl_i64_i32(t0_32, t0); 862 tcg_gen_extrl_i64_i32(t1_32, t1); 863 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 864 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 865 866 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 867 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 868 tcg_gen_xor_i32(tmp, t0_32, t1_32); 869 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 870 tcg_gen_extu_i32_i64(dest, cpu_NF); 871 872 tcg_temp_free_i32(tmp); 873 tcg_temp_free_i32(t1_32); 874 tcg_temp_free_i32(t0_32); 875 } 876 } 877 878 /* 879 * Load/Store generators 880 */ 881 882 /* 883 * Store from GPR register to memory. 884 */ 885 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 886 TCGv_i64 tcg_addr, MemOp memop, int memidx, 887 bool iss_valid, 888 unsigned int iss_srt, 889 bool iss_sf, bool iss_ar) 890 { 891 memop = finalize_memop(s, memop); 892 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 893 894 if (iss_valid) { 895 uint32_t syn; 896 897 syn = syn_data_abort_with_iss(0, 898 (memop & MO_SIZE), 899 false, 900 iss_srt, 901 iss_sf, 902 iss_ar, 903 0, 0, 0, 0, 0, false); 904 disas_set_insn_syndrome(s, syn); 905 } 906 } 907 908 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 909 TCGv_i64 tcg_addr, MemOp memop, 910 bool iss_valid, 911 unsigned int iss_srt, 912 bool iss_sf, bool iss_ar) 913 { 914 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 915 iss_valid, iss_srt, iss_sf, iss_ar); 916 } 917 918 /* 919 * Load from memory to GPR register 920 */ 921 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 922 MemOp memop, bool extend, int memidx, 923 bool iss_valid, unsigned int iss_srt, 924 bool iss_sf, bool iss_ar) 925 { 926 memop = finalize_memop(s, memop); 927 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 928 929 if (extend && (memop & MO_SIGN)) { 930 g_assert((memop & MO_SIZE) <= MO_32); 931 tcg_gen_ext32u_i64(dest, dest); 932 } 933 934 if (iss_valid) { 935 uint32_t syn; 936 937 syn = syn_data_abort_with_iss(0, 938 (memop & MO_SIZE), 939 (memop & MO_SIGN) != 0, 940 iss_srt, 941 iss_sf, 942 iss_ar, 943 0, 0, 0, 0, 0, false); 944 disas_set_insn_syndrome(s, syn); 945 } 946 } 947 948 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 949 MemOp memop, bool extend, 950 bool iss_valid, unsigned int iss_srt, 951 bool iss_sf, bool iss_ar) 952 { 953 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 954 iss_valid, iss_srt, iss_sf, iss_ar); 955 } 956 957 /* 958 * Store from FP register to memory 959 */ 960 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) 961 { 962 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 963 TCGv_i64 tmplo = tcg_temp_new_i64(); 964 MemOp mop; 965 966 tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); 967 968 if (size < 4) { 969 mop = finalize_memop(s, size); 970 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 971 } else { 972 bool be = s->be_data == MO_BE; 973 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); 974 TCGv_i64 tmphi = tcg_temp_new_i64(); 975 976 tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); 977 978 mop = s->be_data | MO_UQ; 979 tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 980 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 981 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 982 tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, 983 get_mem_index(s), mop); 984 985 tcg_temp_free_i64(tcg_hiaddr); 986 tcg_temp_free_i64(tmphi); 987 } 988 989 tcg_temp_free_i64(tmplo); 990 } 991 992 /* 993 * Load from memory to FP register 994 */ 995 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) 996 { 997 /* This always zero-extends and writes to a full 128 bit wide vector */ 998 TCGv_i64 tmplo = tcg_temp_new_i64(); 999 TCGv_i64 tmphi = NULL; 1000 MemOp mop; 1001 1002 if (size < 4) { 1003 mop = finalize_memop(s, size); 1004 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 1005 } else { 1006 bool be = s->be_data == MO_BE; 1007 TCGv_i64 tcg_hiaddr; 1008 1009 tmphi = tcg_temp_new_i64(); 1010 tcg_hiaddr = tcg_temp_new_i64(); 1011 1012 mop = s->be_data | MO_UQ; 1013 tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 1014 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 1015 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 1016 tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, 1017 get_mem_index(s), mop); 1018 tcg_temp_free_i64(tcg_hiaddr); 1019 } 1020 1021 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); 1022 tcg_temp_free_i64(tmplo); 1023 1024 if (tmphi) { 1025 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); 1026 tcg_temp_free_i64(tmphi); 1027 } 1028 clear_vec_high(s, tmphi != NULL, destidx); 1029 } 1030 1031 /* 1032 * Vector load/store helpers. 1033 * 1034 * The principal difference between this and a FP load is that we don't 1035 * zero extend as we are filling a partial chunk of the vector register. 1036 * These functions don't support 128 bit loads/stores, which would be 1037 * normal load/store operations. 1038 * 1039 * The _i32 versions are useful when operating on 32 bit quantities 1040 * (eg for floating point single or using Neon helper functions). 1041 */ 1042 1043 /* Get value of an element within a vector register */ 1044 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1045 int element, MemOp memop) 1046 { 1047 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1048 switch ((unsigned)memop) { 1049 case MO_8: 1050 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); 1051 break; 1052 case MO_16: 1053 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); 1054 break; 1055 case MO_32: 1056 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); 1057 break; 1058 case MO_8|MO_SIGN: 1059 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); 1060 break; 1061 case MO_16|MO_SIGN: 1062 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); 1063 break; 1064 case MO_32|MO_SIGN: 1065 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); 1066 break; 1067 case MO_64: 1068 case MO_64|MO_SIGN: 1069 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); 1070 break; 1071 default: 1072 g_assert_not_reached(); 1073 } 1074 } 1075 1076 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1077 int element, MemOp memop) 1078 { 1079 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1080 switch (memop) { 1081 case MO_8: 1082 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); 1083 break; 1084 case MO_16: 1085 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); 1086 break; 1087 case MO_8|MO_SIGN: 1088 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); 1089 break; 1090 case MO_16|MO_SIGN: 1091 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); 1092 break; 1093 case MO_32: 1094 case MO_32|MO_SIGN: 1095 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); 1096 break; 1097 default: 1098 g_assert_not_reached(); 1099 } 1100 } 1101 1102 /* Set value of an element within a vector register */ 1103 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1104 int element, MemOp memop) 1105 { 1106 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1107 switch (memop) { 1108 case MO_8: 1109 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); 1110 break; 1111 case MO_16: 1112 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); 1113 break; 1114 case MO_32: 1115 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); 1116 break; 1117 case MO_64: 1118 tcg_gen_st_i64(tcg_src, cpu_env, vect_off); 1119 break; 1120 default: 1121 g_assert_not_reached(); 1122 } 1123 } 1124 1125 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1126 int destidx, int element, MemOp memop) 1127 { 1128 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1129 switch (memop) { 1130 case MO_8: 1131 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); 1132 break; 1133 case MO_16: 1134 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); 1135 break; 1136 case MO_32: 1137 tcg_gen_st_i32(tcg_src, cpu_env, vect_off); 1138 break; 1139 default: 1140 g_assert_not_reached(); 1141 } 1142 } 1143 1144 /* Store from vector register to memory */ 1145 static void do_vec_st(DisasContext *s, int srcidx, int element, 1146 TCGv_i64 tcg_addr, MemOp mop) 1147 { 1148 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1149 1150 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1151 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1152 1153 tcg_temp_free_i64(tcg_tmp); 1154 } 1155 1156 /* Load from memory to vector register */ 1157 static void do_vec_ld(DisasContext *s, int destidx, int element, 1158 TCGv_i64 tcg_addr, MemOp mop) 1159 { 1160 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1161 1162 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1163 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1164 1165 tcg_temp_free_i64(tcg_tmp); 1166 } 1167 1168 /* Check that FP/Neon access is enabled. If it is, return 1169 * true. If not, emit code to generate an appropriate exception, 1170 * and return false; the caller should not emit any code for 1171 * the instruction. Note that this check must happen after all 1172 * unallocated-encoding checks (otherwise the syndrome information 1173 * for the resulting exception will be incorrect). 1174 */ 1175 static bool fp_access_check_only(DisasContext *s) 1176 { 1177 if (s->fp_excp_el) { 1178 assert(!s->fp_access_checked); 1179 s->fp_access_checked = true; 1180 1181 gen_exception_insn_el(s, 0, EXCP_UDEF, 1182 syn_fp_access_trap(1, 0xe, false, 0), 1183 s->fp_excp_el); 1184 return false; 1185 } 1186 s->fp_access_checked = true; 1187 return true; 1188 } 1189 1190 static bool fp_access_check(DisasContext *s) 1191 { 1192 if (!fp_access_check_only(s)) { 1193 return false; 1194 } 1195 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1196 gen_exception_insn(s, 0, EXCP_UDEF, 1197 syn_smetrap(SME_ET_Streaming, false)); 1198 return false; 1199 } 1200 return true; 1201 } 1202 1203 /* 1204 * Check that SVE access is enabled. If it is, return true. 1205 * If not, emit code to generate an appropriate exception and return false. 1206 * This function corresponds to CheckSVEEnabled(). 1207 */ 1208 bool sve_access_check(DisasContext *s) 1209 { 1210 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1211 assert(dc_isar_feature(aa64_sme, s)); 1212 if (!sme_sm_enabled_check(s)) { 1213 goto fail_exit; 1214 } 1215 } else if (s->sve_excp_el) { 1216 gen_exception_insn_el(s, 0, EXCP_UDEF, 1217 syn_sve_access_trap(), s->sve_excp_el); 1218 goto fail_exit; 1219 } 1220 s->sve_access_checked = true; 1221 return fp_access_check(s); 1222 1223 fail_exit: 1224 /* Assert that we only raise one exception per instruction. */ 1225 assert(!s->sve_access_checked); 1226 s->sve_access_checked = true; 1227 return false; 1228 } 1229 1230 /* 1231 * Check that SME access is enabled, raise an exception if not. 1232 * Note that this function corresponds to CheckSMEAccess and is 1233 * only used directly for cpregs. 1234 */ 1235 static bool sme_access_check(DisasContext *s) 1236 { 1237 if (s->sme_excp_el) { 1238 gen_exception_insn_el(s, 0, EXCP_UDEF, 1239 syn_smetrap(SME_ET_AccessTrap, false), 1240 s->sme_excp_el); 1241 return false; 1242 } 1243 return true; 1244 } 1245 1246 /* This function corresponds to CheckSMEEnabled. */ 1247 bool sme_enabled_check(DisasContext *s) 1248 { 1249 /* 1250 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1251 * to be zero when fp_excp_el has priority. This is because we need 1252 * sme_excp_el by itself for cpregs access checks. 1253 */ 1254 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1255 s->fp_access_checked = true; 1256 return sme_access_check(s); 1257 } 1258 return fp_access_check_only(s); 1259 } 1260 1261 /* Common subroutine for CheckSMEAnd*Enabled. */ 1262 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1263 { 1264 if (!sme_enabled_check(s)) { 1265 return false; 1266 } 1267 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1268 gen_exception_insn(s, 0, EXCP_UDEF, 1269 syn_smetrap(SME_ET_NotStreaming, false)); 1270 return false; 1271 } 1272 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1273 gen_exception_insn(s, 0, EXCP_UDEF, 1274 syn_smetrap(SME_ET_InactiveZA, false)); 1275 return false; 1276 } 1277 return true; 1278 } 1279 1280 /* 1281 * This utility function is for doing register extension with an 1282 * optional shift. You will likely want to pass a temporary for the 1283 * destination register. See DecodeRegExtend() in the ARM ARM. 1284 */ 1285 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1286 int option, unsigned int shift) 1287 { 1288 int extsize = extract32(option, 0, 2); 1289 bool is_signed = extract32(option, 2, 1); 1290 1291 if (is_signed) { 1292 switch (extsize) { 1293 case 0: 1294 tcg_gen_ext8s_i64(tcg_out, tcg_in); 1295 break; 1296 case 1: 1297 tcg_gen_ext16s_i64(tcg_out, tcg_in); 1298 break; 1299 case 2: 1300 tcg_gen_ext32s_i64(tcg_out, tcg_in); 1301 break; 1302 case 3: 1303 tcg_gen_mov_i64(tcg_out, tcg_in); 1304 break; 1305 } 1306 } else { 1307 switch (extsize) { 1308 case 0: 1309 tcg_gen_ext8u_i64(tcg_out, tcg_in); 1310 break; 1311 case 1: 1312 tcg_gen_ext16u_i64(tcg_out, tcg_in); 1313 break; 1314 case 2: 1315 tcg_gen_ext32u_i64(tcg_out, tcg_in); 1316 break; 1317 case 3: 1318 tcg_gen_mov_i64(tcg_out, tcg_in); 1319 break; 1320 } 1321 } 1322 1323 if (shift) { 1324 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1325 } 1326 } 1327 1328 static inline void gen_check_sp_alignment(DisasContext *s) 1329 { 1330 /* The AArch64 architecture mandates that (if enabled via PSTATE 1331 * or SCTLR bits) there is a check that SP is 16-aligned on every 1332 * SP-relative load or store (with an exception generated if it is not). 1333 * In line with general QEMU practice regarding misaligned accesses, 1334 * we omit these checks for the sake of guest program performance. 1335 * This function is provided as a hook so we can more easily add these 1336 * checks in future (possibly as a "favour catching guest program bugs 1337 * over speed" user selectable option). 1338 */ 1339 } 1340 1341 /* 1342 * This provides a simple table based table lookup decoder. It is 1343 * intended to be used when the relevant bits for decode are too 1344 * awkwardly placed and switch/if based logic would be confusing and 1345 * deeply nested. Since it's a linear search through the table, tables 1346 * should be kept small. 1347 * 1348 * It returns the first handler where insn & mask == pattern, or 1349 * NULL if there is no match. 1350 * The table is terminated by an empty mask (i.e. 0) 1351 */ 1352 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1353 uint32_t insn) 1354 { 1355 const AArch64DecodeTable *tptr = table; 1356 1357 while (tptr->mask) { 1358 if ((insn & tptr->mask) == tptr->pattern) { 1359 return tptr->disas_fn; 1360 } 1361 tptr++; 1362 } 1363 return NULL; 1364 } 1365 1366 /* 1367 * The instruction disassembly implemented here matches 1368 * the instruction encoding classifications in chapter C4 1369 * of the ARM Architecture Reference Manual (DDI0487B_a); 1370 * classification names and decode diagrams here should generally 1371 * match up with those in the manual. 1372 */ 1373 1374 /* Unconditional branch (immediate) 1375 * 31 30 26 25 0 1376 * +----+-----------+-------------------------------------+ 1377 * | op | 0 0 1 0 1 | imm26 | 1378 * +----+-----------+-------------------------------------+ 1379 */ 1380 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) 1381 { 1382 int64_t diff = sextract32(insn, 0, 26) * 4; 1383 1384 if (insn & (1U << 31)) { 1385 /* BL Branch with link */ 1386 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1387 } 1388 1389 /* B Branch / BL Branch with link */ 1390 reset_btype(s); 1391 gen_goto_tb(s, 0, diff); 1392 } 1393 1394 /* Compare and branch (immediate) 1395 * 31 30 25 24 23 5 4 0 1396 * +----+-------------+----+---------------------+--------+ 1397 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt | 1398 * +----+-------------+----+---------------------+--------+ 1399 */ 1400 static void disas_comp_b_imm(DisasContext *s, uint32_t insn) 1401 { 1402 unsigned int sf, op, rt; 1403 int64_t diff; 1404 DisasLabel match; 1405 TCGv_i64 tcg_cmp; 1406 1407 sf = extract32(insn, 31, 1); 1408 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */ 1409 rt = extract32(insn, 0, 5); 1410 diff = sextract32(insn, 5, 19) * 4; 1411 1412 tcg_cmp = read_cpu_reg(s, rt, sf); 1413 reset_btype(s); 1414 1415 match = gen_disas_label(s); 1416 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1417 tcg_cmp, 0, match.label); 1418 gen_goto_tb(s, 0, 4); 1419 set_disas_label(s, match); 1420 gen_goto_tb(s, 1, diff); 1421 } 1422 1423 /* Test and branch (immediate) 1424 * 31 30 25 24 23 19 18 5 4 0 1425 * +----+-------------+----+-------+-------------+------+ 1426 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt | 1427 * +----+-------------+----+-------+-------------+------+ 1428 */ 1429 static void disas_test_b_imm(DisasContext *s, uint32_t insn) 1430 { 1431 unsigned int bit_pos, op, rt; 1432 int64_t diff; 1433 DisasLabel match; 1434 TCGv_i64 tcg_cmp; 1435 1436 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5); 1437 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */ 1438 diff = sextract32(insn, 5, 14) * 4; 1439 rt = extract32(insn, 0, 5); 1440 1441 tcg_cmp = tcg_temp_new_i64(); 1442 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos)); 1443 1444 reset_btype(s); 1445 1446 match = gen_disas_label(s); 1447 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1448 tcg_cmp, 0, match.label); 1449 tcg_temp_free_i64(tcg_cmp); 1450 gen_goto_tb(s, 0, 4); 1451 set_disas_label(s, match); 1452 gen_goto_tb(s, 1, diff); 1453 } 1454 1455 /* Conditional branch (immediate) 1456 * 31 25 24 23 5 4 3 0 1457 * +---------------+----+---------------------+----+------+ 1458 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond | 1459 * +---------------+----+---------------------+----+------+ 1460 */ 1461 static void disas_cond_b_imm(DisasContext *s, uint32_t insn) 1462 { 1463 unsigned int cond; 1464 int64_t diff; 1465 1466 if ((insn & (1 << 4)) || (insn & (1 << 24))) { 1467 unallocated_encoding(s); 1468 return; 1469 } 1470 diff = sextract32(insn, 5, 19) * 4; 1471 cond = extract32(insn, 0, 4); 1472 1473 reset_btype(s); 1474 if (cond < 0x0e) { 1475 /* genuinely conditional branches */ 1476 DisasLabel match = gen_disas_label(s); 1477 arm_gen_test_cc(cond, match.label); 1478 gen_goto_tb(s, 0, 4); 1479 set_disas_label(s, match); 1480 gen_goto_tb(s, 1, diff); 1481 } else { 1482 /* 0xe and 0xf are both "always" conditions */ 1483 gen_goto_tb(s, 0, diff); 1484 } 1485 } 1486 1487 /* HINT instruction group, including various allocated HINTs */ 1488 static void handle_hint(DisasContext *s, uint32_t insn, 1489 unsigned int op1, unsigned int op2, unsigned int crm) 1490 { 1491 unsigned int selector = crm << 3 | op2; 1492 1493 if (op1 != 3) { 1494 unallocated_encoding(s); 1495 return; 1496 } 1497 1498 switch (selector) { 1499 case 0b00000: /* NOP */ 1500 break; 1501 case 0b00011: /* WFI */ 1502 s->base.is_jmp = DISAS_WFI; 1503 break; 1504 case 0b00001: /* YIELD */ 1505 /* When running in MTTCG we don't generate jumps to the yield and 1506 * WFE helpers as it won't affect the scheduling of other vCPUs. 1507 * If we wanted to more completely model WFE/SEV so we don't busy 1508 * spin unnecessarily we would need to do something more involved. 1509 */ 1510 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1511 s->base.is_jmp = DISAS_YIELD; 1512 } 1513 break; 1514 case 0b00010: /* WFE */ 1515 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1516 s->base.is_jmp = DISAS_WFE; 1517 } 1518 break; 1519 case 0b00100: /* SEV */ 1520 case 0b00101: /* SEVL */ 1521 case 0b00110: /* DGH */ 1522 /* we treat all as NOP at least for now */ 1523 break; 1524 case 0b00111: /* XPACLRI */ 1525 if (s->pauth_active) { 1526 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); 1527 } 1528 break; 1529 case 0b01000: /* PACIA1716 */ 1530 if (s->pauth_active) { 1531 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1532 } 1533 break; 1534 case 0b01010: /* PACIB1716 */ 1535 if (s->pauth_active) { 1536 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1537 } 1538 break; 1539 case 0b01100: /* AUTIA1716 */ 1540 if (s->pauth_active) { 1541 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1542 } 1543 break; 1544 case 0b01110: /* AUTIB1716 */ 1545 if (s->pauth_active) { 1546 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1547 } 1548 break; 1549 case 0b10000: /* ESB */ 1550 /* Without RAS, we must implement this as NOP. */ 1551 if (dc_isar_feature(aa64_ras, s)) { 1552 /* 1553 * QEMU does not have a source of physical SErrors, 1554 * so we are only concerned with virtual SErrors. 1555 * The pseudocode in the ARM for this case is 1556 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1557 * AArch64.vESBOperation(); 1558 * Most of the condition can be evaluated at translation time. 1559 * Test for EL2 present, and defer test for SEL2 to runtime. 1560 */ 1561 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1562 gen_helper_vesb(cpu_env); 1563 } 1564 } 1565 break; 1566 case 0b11000: /* PACIAZ */ 1567 if (s->pauth_active) { 1568 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], 1569 new_tmp_a64_zero(s)); 1570 } 1571 break; 1572 case 0b11001: /* PACIASP */ 1573 if (s->pauth_active) { 1574 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1575 } 1576 break; 1577 case 0b11010: /* PACIBZ */ 1578 if (s->pauth_active) { 1579 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], 1580 new_tmp_a64_zero(s)); 1581 } 1582 break; 1583 case 0b11011: /* PACIBSP */ 1584 if (s->pauth_active) { 1585 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1586 } 1587 break; 1588 case 0b11100: /* AUTIAZ */ 1589 if (s->pauth_active) { 1590 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], 1591 new_tmp_a64_zero(s)); 1592 } 1593 break; 1594 case 0b11101: /* AUTIASP */ 1595 if (s->pauth_active) { 1596 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1597 } 1598 break; 1599 case 0b11110: /* AUTIBZ */ 1600 if (s->pauth_active) { 1601 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], 1602 new_tmp_a64_zero(s)); 1603 } 1604 break; 1605 case 0b11111: /* AUTIBSP */ 1606 if (s->pauth_active) { 1607 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1608 } 1609 break; 1610 default: 1611 /* default specified as NOP equivalent */ 1612 break; 1613 } 1614 } 1615 1616 static void gen_clrex(DisasContext *s, uint32_t insn) 1617 { 1618 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1619 } 1620 1621 /* CLREX, DSB, DMB, ISB */ 1622 static void handle_sync(DisasContext *s, uint32_t insn, 1623 unsigned int op1, unsigned int op2, unsigned int crm) 1624 { 1625 TCGBar bar; 1626 1627 if (op1 != 3) { 1628 unallocated_encoding(s); 1629 return; 1630 } 1631 1632 switch (op2) { 1633 case 2: /* CLREX */ 1634 gen_clrex(s, insn); 1635 return; 1636 case 4: /* DSB */ 1637 case 5: /* DMB */ 1638 switch (crm & 3) { 1639 case 1: /* MBReqTypes_Reads */ 1640 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1641 break; 1642 case 2: /* MBReqTypes_Writes */ 1643 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1644 break; 1645 default: /* MBReqTypes_All */ 1646 bar = TCG_BAR_SC | TCG_MO_ALL; 1647 break; 1648 } 1649 tcg_gen_mb(bar); 1650 return; 1651 case 6: /* ISB */ 1652 /* We need to break the TB after this insn to execute 1653 * a self-modified code correctly and also to take 1654 * any pending interrupts immediately. 1655 */ 1656 reset_btype(s); 1657 gen_goto_tb(s, 0, 4); 1658 return; 1659 1660 case 7: /* SB */ 1661 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { 1662 goto do_unallocated; 1663 } 1664 /* 1665 * TODO: There is no speculation barrier opcode for TCG; 1666 * MB and end the TB instead. 1667 */ 1668 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1669 gen_goto_tb(s, 0, 4); 1670 return; 1671 1672 default: 1673 do_unallocated: 1674 unallocated_encoding(s); 1675 return; 1676 } 1677 } 1678 1679 static void gen_xaflag(void) 1680 { 1681 TCGv_i32 z = tcg_temp_new_i32(); 1682 1683 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1684 1685 /* 1686 * (!C & !Z) << 31 1687 * (!(C | Z)) << 31 1688 * ~((C | Z) << 31) 1689 * ~-(C | Z) 1690 * (C | Z) - 1 1691 */ 1692 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1693 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1694 1695 /* !(Z & C) */ 1696 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1697 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1698 1699 /* (!C & Z) << 31 -> -(Z & ~C) */ 1700 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1701 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1702 1703 /* C | Z */ 1704 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1705 1706 tcg_temp_free_i32(z); 1707 } 1708 1709 static void gen_axflag(void) 1710 { 1711 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1712 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1713 1714 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1715 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1716 1717 tcg_gen_movi_i32(cpu_NF, 0); 1718 tcg_gen_movi_i32(cpu_VF, 0); 1719 } 1720 1721 /* MSR (immediate) - move immediate to processor state field */ 1722 static void handle_msr_i(DisasContext *s, uint32_t insn, 1723 unsigned int op1, unsigned int op2, unsigned int crm) 1724 { 1725 int op = op1 << 3 | op2; 1726 1727 /* End the TB by default, chaining is ok. */ 1728 s->base.is_jmp = DISAS_TOO_MANY; 1729 1730 switch (op) { 1731 case 0x00: /* CFINV */ 1732 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { 1733 goto do_unallocated; 1734 } 1735 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1736 s->base.is_jmp = DISAS_NEXT; 1737 break; 1738 1739 case 0x01: /* XAFlag */ 1740 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1741 goto do_unallocated; 1742 } 1743 gen_xaflag(); 1744 s->base.is_jmp = DISAS_NEXT; 1745 break; 1746 1747 case 0x02: /* AXFlag */ 1748 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1749 goto do_unallocated; 1750 } 1751 gen_axflag(); 1752 s->base.is_jmp = DISAS_NEXT; 1753 break; 1754 1755 case 0x03: /* UAO */ 1756 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1757 goto do_unallocated; 1758 } 1759 if (crm & 1) { 1760 set_pstate_bits(PSTATE_UAO); 1761 } else { 1762 clear_pstate_bits(PSTATE_UAO); 1763 } 1764 gen_rebuild_hflags(s); 1765 break; 1766 1767 case 0x04: /* PAN */ 1768 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1769 goto do_unallocated; 1770 } 1771 if (crm & 1) { 1772 set_pstate_bits(PSTATE_PAN); 1773 } else { 1774 clear_pstate_bits(PSTATE_PAN); 1775 } 1776 gen_rebuild_hflags(s); 1777 break; 1778 1779 case 0x05: /* SPSel */ 1780 if (s->current_el == 0) { 1781 goto do_unallocated; 1782 } 1783 gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP)); 1784 break; 1785 1786 case 0x19: /* SSBS */ 1787 if (!dc_isar_feature(aa64_ssbs, s)) { 1788 goto do_unallocated; 1789 } 1790 if (crm & 1) { 1791 set_pstate_bits(PSTATE_SSBS); 1792 } else { 1793 clear_pstate_bits(PSTATE_SSBS); 1794 } 1795 /* Don't need to rebuild hflags since SSBS is a nop */ 1796 break; 1797 1798 case 0x1a: /* DIT */ 1799 if (!dc_isar_feature(aa64_dit, s)) { 1800 goto do_unallocated; 1801 } 1802 if (crm & 1) { 1803 set_pstate_bits(PSTATE_DIT); 1804 } else { 1805 clear_pstate_bits(PSTATE_DIT); 1806 } 1807 /* There's no need to rebuild hflags because DIT is a nop */ 1808 break; 1809 1810 case 0x1e: /* DAIFSet */ 1811 gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm)); 1812 break; 1813 1814 case 0x1f: /* DAIFClear */ 1815 gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm)); 1816 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ 1817 s->base.is_jmp = DISAS_UPDATE_EXIT; 1818 break; 1819 1820 case 0x1c: /* TCO */ 1821 if (dc_isar_feature(aa64_mte, s)) { 1822 /* Full MTE is enabled -- set the TCO bit as directed. */ 1823 if (crm & 1) { 1824 set_pstate_bits(PSTATE_TCO); 1825 } else { 1826 clear_pstate_bits(PSTATE_TCO); 1827 } 1828 gen_rebuild_hflags(s); 1829 /* Many factors, including TCO, go into MTE_ACTIVE. */ 1830 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1831 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 1832 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 1833 s->base.is_jmp = DISAS_NEXT; 1834 } else { 1835 goto do_unallocated; 1836 } 1837 break; 1838 1839 case 0x1b: /* SVCR* */ 1840 if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) { 1841 goto do_unallocated; 1842 } 1843 if (sme_access_check(s)) { 1844 int old = s->pstate_sm | (s->pstate_za << 1); 1845 int new = (crm & 1) * 3; 1846 int msk = (crm >> 1) & 3; 1847 1848 if ((old ^ new) & msk) { 1849 /* At least one bit changes. */ 1850 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), 1851 tcg_constant_i32(msk)); 1852 } else { 1853 s->base.is_jmp = DISAS_NEXT; 1854 } 1855 } 1856 break; 1857 1858 default: 1859 do_unallocated: 1860 unallocated_encoding(s); 1861 return; 1862 } 1863 } 1864 1865 static void gen_get_nzcv(TCGv_i64 tcg_rt) 1866 { 1867 TCGv_i32 tmp = tcg_temp_new_i32(); 1868 TCGv_i32 nzcv = tcg_temp_new_i32(); 1869 1870 /* build bit 31, N */ 1871 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 1872 /* build bit 30, Z */ 1873 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 1874 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 1875 /* build bit 29, C */ 1876 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 1877 /* build bit 28, V */ 1878 tcg_gen_shri_i32(tmp, cpu_VF, 31); 1879 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 1880 /* generate result */ 1881 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 1882 1883 tcg_temp_free_i32(nzcv); 1884 tcg_temp_free_i32(tmp); 1885 } 1886 1887 static void gen_set_nzcv(TCGv_i64 tcg_rt) 1888 { 1889 TCGv_i32 nzcv = tcg_temp_new_i32(); 1890 1891 /* take NZCV from R[t] */ 1892 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 1893 1894 /* bit 31, N */ 1895 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 1896 /* bit 30, Z */ 1897 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 1898 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 1899 /* bit 29, C */ 1900 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 1901 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 1902 /* bit 28, V */ 1903 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 1904 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 1905 tcg_temp_free_i32(nzcv); 1906 } 1907 1908 static void gen_sysreg_undef(DisasContext *s, bool isread, 1909 uint8_t op0, uint8_t op1, uint8_t op2, 1910 uint8_t crn, uint8_t crm, uint8_t rt) 1911 { 1912 /* 1913 * Generate code to emit an UNDEF with correct syndrome 1914 * information for a failed system register access. 1915 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 1916 * but if FEAT_IDST is implemented then read accesses to registers 1917 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 1918 * syndrome. 1919 */ 1920 uint32_t syndrome; 1921 1922 if (isread && dc_isar_feature(aa64_ids, s) && 1923 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 1924 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1925 } else { 1926 syndrome = syn_uncategorized(); 1927 } 1928 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 1929 } 1930 1931 /* MRS - move from system register 1932 * MSR (register) - move to system register 1933 * SYS 1934 * SYSL 1935 * These are all essentially the same insn in 'read' and 'write' 1936 * versions, with varying op0 fields. 1937 */ 1938 static void handle_sys(DisasContext *s, uint32_t insn, bool isread, 1939 unsigned int op0, unsigned int op1, unsigned int op2, 1940 unsigned int crn, unsigned int crm, unsigned int rt) 1941 { 1942 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 1943 crn, crm, op0, op1, op2); 1944 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 1945 TCGv_ptr tcg_ri = NULL; 1946 TCGv_i64 tcg_rt; 1947 1948 if (!ri) { 1949 /* Unknown register; this might be a guest error or a QEMU 1950 * unimplemented feature. 1951 */ 1952 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 1953 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 1954 isread ? "read" : "write", op0, op1, crn, crm, op2); 1955 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1956 return; 1957 } 1958 1959 /* Check access permissions */ 1960 if (!cp_access_ok(s->current_el, ri, isread)) { 1961 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1962 return; 1963 } 1964 1965 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 1966 /* Emit code to perform further access permissions checks at 1967 * runtime; this may result in an exception. 1968 */ 1969 uint32_t syndrome; 1970 1971 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1972 gen_a64_update_pc(s, 0); 1973 tcg_ri = tcg_temp_new_ptr(); 1974 gen_helper_access_check_cp_reg(tcg_ri, cpu_env, 1975 tcg_constant_i32(key), 1976 tcg_constant_i32(syndrome), 1977 tcg_constant_i32(isread)); 1978 } else if (ri->type & ARM_CP_RAISES_EXC) { 1979 /* 1980 * The readfn or writefn might raise an exception; 1981 * synchronize the CPU state in case it does. 1982 */ 1983 gen_a64_update_pc(s, 0); 1984 } 1985 1986 /* Handle special cases first */ 1987 switch (ri->type & ARM_CP_SPECIAL_MASK) { 1988 case 0: 1989 break; 1990 case ARM_CP_NOP: 1991 goto exit; 1992 case ARM_CP_NZCV: 1993 tcg_rt = cpu_reg(s, rt); 1994 if (isread) { 1995 gen_get_nzcv(tcg_rt); 1996 } else { 1997 gen_set_nzcv(tcg_rt); 1998 } 1999 goto exit; 2000 case ARM_CP_CURRENTEL: 2001 /* Reads as current EL value from pstate, which is 2002 * guaranteed to be constant by the tb flags. 2003 */ 2004 tcg_rt = cpu_reg(s, rt); 2005 tcg_gen_movi_i64(tcg_rt, s->current_el << 2); 2006 goto exit; 2007 case ARM_CP_DC_ZVA: 2008 /* Writes clear the aligned block of memory which rt points into. */ 2009 if (s->mte_active[0]) { 2010 int desc = 0; 2011 2012 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 2013 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 2014 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 2015 2016 tcg_rt = new_tmp_a64(s); 2017 gen_helper_mte_check_zva(tcg_rt, cpu_env, 2018 tcg_constant_i32(desc), cpu_reg(s, rt)); 2019 } else { 2020 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 2021 } 2022 gen_helper_dc_zva(cpu_env, tcg_rt); 2023 goto exit; 2024 case ARM_CP_DC_GVA: 2025 { 2026 TCGv_i64 clean_addr, tag; 2027 2028 /* 2029 * DC_GVA, like DC_ZVA, requires that we supply the original 2030 * pointer for an invalid page. Probe that address first. 2031 */ 2032 tcg_rt = cpu_reg(s, rt); 2033 clean_addr = clean_data_tbi(s, tcg_rt); 2034 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2035 2036 if (s->ata) { 2037 /* Extract the tag from the register to match STZGM. */ 2038 tag = tcg_temp_new_i64(); 2039 tcg_gen_shri_i64(tag, tcg_rt, 56); 2040 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2041 tcg_temp_free_i64(tag); 2042 } 2043 } 2044 goto exit; 2045 case ARM_CP_DC_GZVA: 2046 { 2047 TCGv_i64 clean_addr, tag; 2048 2049 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2050 tcg_rt = cpu_reg(s, rt); 2051 clean_addr = clean_data_tbi(s, tcg_rt); 2052 gen_helper_dc_zva(cpu_env, clean_addr); 2053 2054 if (s->ata) { 2055 /* Extract the tag from the register to match STZGM. */ 2056 tag = tcg_temp_new_i64(); 2057 tcg_gen_shri_i64(tag, tcg_rt, 56); 2058 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2059 tcg_temp_free_i64(tag); 2060 } 2061 } 2062 goto exit; 2063 default: 2064 g_assert_not_reached(); 2065 } 2066 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2067 goto exit; 2068 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2069 goto exit; 2070 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2071 goto exit; 2072 } 2073 2074 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2075 gen_io_start(); 2076 } 2077 2078 tcg_rt = cpu_reg(s, rt); 2079 2080 if (isread) { 2081 if (ri->type & ARM_CP_CONST) { 2082 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2083 } else if (ri->readfn) { 2084 if (!tcg_ri) { 2085 tcg_ri = gen_lookup_cp_reg(key); 2086 } 2087 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri); 2088 } else { 2089 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); 2090 } 2091 } else { 2092 if (ri->type & ARM_CP_CONST) { 2093 /* If not forbidden by access permissions, treat as WI */ 2094 goto exit; 2095 } else if (ri->writefn) { 2096 if (!tcg_ri) { 2097 tcg_ri = gen_lookup_cp_reg(key); 2098 } 2099 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt); 2100 } else { 2101 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); 2102 } 2103 } 2104 2105 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2106 /* I/O operations must end the TB here (whether read or write) */ 2107 s->base.is_jmp = DISAS_UPDATE_EXIT; 2108 } 2109 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2110 /* 2111 * A write to any coprocessor regiser that ends a TB 2112 * must rebuild the hflags for the next TB. 2113 */ 2114 gen_rebuild_hflags(s); 2115 /* 2116 * We default to ending the TB on a coprocessor register write, 2117 * but allow this to be suppressed by the register definition 2118 * (usually only necessary to work around guest bugs). 2119 */ 2120 s->base.is_jmp = DISAS_UPDATE_EXIT; 2121 } 2122 2123 exit: 2124 if (tcg_ri) { 2125 tcg_temp_free_ptr(tcg_ri); 2126 } 2127 } 2128 2129 /* System 2130 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 2131 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2132 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | 2133 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2134 */ 2135 static void disas_system(DisasContext *s, uint32_t insn) 2136 { 2137 unsigned int l, op0, op1, crn, crm, op2, rt; 2138 l = extract32(insn, 21, 1); 2139 op0 = extract32(insn, 19, 2); 2140 op1 = extract32(insn, 16, 3); 2141 crn = extract32(insn, 12, 4); 2142 crm = extract32(insn, 8, 4); 2143 op2 = extract32(insn, 5, 3); 2144 rt = extract32(insn, 0, 5); 2145 2146 if (op0 == 0) { 2147 if (l || rt != 31) { 2148 unallocated_encoding(s); 2149 return; 2150 } 2151 switch (crn) { 2152 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ 2153 handle_hint(s, insn, op1, op2, crm); 2154 break; 2155 case 3: /* CLREX, DSB, DMB, ISB */ 2156 handle_sync(s, insn, op1, op2, crm); 2157 break; 2158 case 4: /* MSR (immediate) */ 2159 handle_msr_i(s, insn, op1, op2, crm); 2160 break; 2161 default: 2162 unallocated_encoding(s); 2163 break; 2164 } 2165 return; 2166 } 2167 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); 2168 } 2169 2170 /* Exception generation 2171 * 2172 * 31 24 23 21 20 5 4 2 1 0 2173 * +-----------------+-----+------------------------+-----+----+ 2174 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | 2175 * +-----------------------+------------------------+----------+ 2176 */ 2177 static void disas_exc(DisasContext *s, uint32_t insn) 2178 { 2179 int opc = extract32(insn, 21, 3); 2180 int op2_ll = extract32(insn, 0, 5); 2181 int imm16 = extract32(insn, 5, 16); 2182 uint32_t syndrome; 2183 2184 switch (opc) { 2185 case 0: 2186 /* For SVC, HVC and SMC we advance the single-step state 2187 * machine before taking the exception. This is architecturally 2188 * mandated, to ensure that single-stepping a system call 2189 * instruction works properly. 2190 */ 2191 switch (op2_ll) { 2192 case 1: /* SVC */ 2193 syndrome = syn_aa64_svc(imm16); 2194 if (s->fgt_svc) { 2195 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2196 break; 2197 } 2198 gen_ss_advance(s); 2199 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2200 break; 2201 case 2: /* HVC */ 2202 if (s->current_el == 0) { 2203 unallocated_encoding(s); 2204 break; 2205 } 2206 /* The pre HVC helper handles cases when HVC gets trapped 2207 * as an undefined insn by runtime configuration. 2208 */ 2209 gen_a64_update_pc(s, 0); 2210 gen_helper_pre_hvc(cpu_env); 2211 gen_ss_advance(s); 2212 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2); 2213 break; 2214 case 3: /* SMC */ 2215 if (s->current_el == 0) { 2216 unallocated_encoding(s); 2217 break; 2218 } 2219 gen_a64_update_pc(s, 0); 2220 gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16))); 2221 gen_ss_advance(s); 2222 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3); 2223 break; 2224 default: 2225 unallocated_encoding(s); 2226 break; 2227 } 2228 break; 2229 case 1: 2230 if (op2_ll != 0) { 2231 unallocated_encoding(s); 2232 break; 2233 } 2234 /* BRK */ 2235 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); 2236 break; 2237 case 2: 2238 if (op2_ll != 0) { 2239 unallocated_encoding(s); 2240 break; 2241 } 2242 /* HLT. This has two purposes. 2243 * Architecturally, it is an external halting debug instruction. 2244 * Since QEMU doesn't implement external debug, we treat this as 2245 * it is required for halting debug disabled: it will UNDEF. 2246 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2247 */ 2248 if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) { 2249 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2250 } else { 2251 unallocated_encoding(s); 2252 } 2253 break; 2254 case 5: 2255 if (op2_ll < 1 || op2_ll > 3) { 2256 unallocated_encoding(s); 2257 break; 2258 } 2259 /* DCPS1, DCPS2, DCPS3 */ 2260 unallocated_encoding(s); 2261 break; 2262 default: 2263 unallocated_encoding(s); 2264 break; 2265 } 2266 } 2267 2268 /* Unconditional branch (register) 2269 * 31 25 24 21 20 16 15 10 9 5 4 0 2270 * +---------------+-------+-------+-------+------+-------+ 2271 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 | 2272 * +---------------+-------+-------+-------+------+-------+ 2273 */ 2274 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) 2275 { 2276 unsigned int opc, op2, op3, rn, op4; 2277 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ 2278 TCGv_i64 dst; 2279 TCGv_i64 modifier; 2280 2281 opc = extract32(insn, 21, 4); 2282 op2 = extract32(insn, 16, 5); 2283 op3 = extract32(insn, 10, 6); 2284 rn = extract32(insn, 5, 5); 2285 op4 = extract32(insn, 0, 5); 2286 2287 if (op2 != 0x1f) { 2288 goto do_unallocated; 2289 } 2290 2291 switch (opc) { 2292 case 0: /* BR */ 2293 case 1: /* BLR */ 2294 case 2: /* RET */ 2295 btype_mod = opc; 2296 switch (op3) { 2297 case 0: 2298 /* BR, BLR, RET */ 2299 if (op4 != 0) { 2300 goto do_unallocated; 2301 } 2302 dst = cpu_reg(s, rn); 2303 break; 2304 2305 case 2: 2306 case 3: 2307 if (!dc_isar_feature(aa64_pauth, s)) { 2308 goto do_unallocated; 2309 } 2310 if (opc == 2) { 2311 /* RETAA, RETAB */ 2312 if (rn != 0x1f || op4 != 0x1f) { 2313 goto do_unallocated; 2314 } 2315 rn = 30; 2316 modifier = cpu_X[31]; 2317 } else { 2318 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */ 2319 if (op4 != 0x1f) { 2320 goto do_unallocated; 2321 } 2322 modifier = new_tmp_a64_zero(s); 2323 } 2324 if (s->pauth_active) { 2325 dst = new_tmp_a64(s); 2326 if (op3 == 2) { 2327 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2328 } else { 2329 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2330 } 2331 } else { 2332 dst = cpu_reg(s, rn); 2333 } 2334 break; 2335 2336 default: 2337 goto do_unallocated; 2338 } 2339 /* BLR also needs to load return address */ 2340 if (opc == 1) { 2341 TCGv_i64 lr = cpu_reg(s, 30); 2342 if (dst == lr) { 2343 TCGv_i64 tmp = new_tmp_a64(s); 2344 tcg_gen_mov_i64(tmp, dst); 2345 dst = tmp; 2346 } 2347 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2348 } 2349 gen_a64_set_pc(s, dst); 2350 break; 2351 2352 case 8: /* BRAA */ 2353 case 9: /* BLRAA */ 2354 if (!dc_isar_feature(aa64_pauth, s)) { 2355 goto do_unallocated; 2356 } 2357 if ((op3 & ~1) != 2) { 2358 goto do_unallocated; 2359 } 2360 btype_mod = opc & 1; 2361 if (s->pauth_active) { 2362 dst = new_tmp_a64(s); 2363 modifier = cpu_reg_sp(s, op4); 2364 if (op3 == 2) { 2365 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2366 } else { 2367 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2368 } 2369 } else { 2370 dst = cpu_reg(s, rn); 2371 } 2372 /* BLRAA also needs to load return address */ 2373 if (opc == 9) { 2374 TCGv_i64 lr = cpu_reg(s, 30); 2375 if (dst == lr) { 2376 TCGv_i64 tmp = new_tmp_a64(s); 2377 tcg_gen_mov_i64(tmp, dst); 2378 dst = tmp; 2379 } 2380 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2381 } 2382 gen_a64_set_pc(s, dst); 2383 break; 2384 2385 case 4: /* ERET */ 2386 if (s->current_el == 0) { 2387 goto do_unallocated; 2388 } 2389 switch (op3) { 2390 case 0: /* ERET */ 2391 if (op4 != 0) { 2392 goto do_unallocated; 2393 } 2394 if (s->fgt_eret) { 2395 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2396 return; 2397 } 2398 dst = tcg_temp_new_i64(); 2399 tcg_gen_ld_i64(dst, cpu_env, 2400 offsetof(CPUARMState, elr_el[s->current_el])); 2401 break; 2402 2403 case 2: /* ERETAA */ 2404 case 3: /* ERETAB */ 2405 if (!dc_isar_feature(aa64_pauth, s)) { 2406 goto do_unallocated; 2407 } 2408 if (rn != 0x1f || op4 != 0x1f) { 2409 goto do_unallocated; 2410 } 2411 /* The FGT trap takes precedence over an auth trap. */ 2412 if (s->fgt_eret) { 2413 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2414 return; 2415 } 2416 dst = tcg_temp_new_i64(); 2417 tcg_gen_ld_i64(dst, cpu_env, 2418 offsetof(CPUARMState, elr_el[s->current_el])); 2419 if (s->pauth_active) { 2420 modifier = cpu_X[31]; 2421 if (op3 == 2) { 2422 gen_helper_autia(dst, cpu_env, dst, modifier); 2423 } else { 2424 gen_helper_autib(dst, cpu_env, dst, modifier); 2425 } 2426 } 2427 break; 2428 2429 default: 2430 goto do_unallocated; 2431 } 2432 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { 2433 gen_io_start(); 2434 } 2435 2436 gen_helper_exception_return(cpu_env, dst); 2437 tcg_temp_free_i64(dst); 2438 /* Must exit loop to check un-masked IRQs */ 2439 s->base.is_jmp = DISAS_EXIT; 2440 return; 2441 2442 case 5: /* DRPS */ 2443 if (op3 != 0 || op4 != 0 || rn != 0x1f) { 2444 goto do_unallocated; 2445 } else { 2446 unallocated_encoding(s); 2447 } 2448 return; 2449 2450 default: 2451 do_unallocated: 2452 unallocated_encoding(s); 2453 return; 2454 } 2455 2456 switch (btype_mod) { 2457 case 0: /* BR */ 2458 if (dc_isar_feature(aa64_bti, s)) { 2459 /* BR to {x16,x17} or !guard -> 1, else 3. */ 2460 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 2461 } 2462 break; 2463 2464 case 1: /* BLR */ 2465 if (dc_isar_feature(aa64_bti, s)) { 2466 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 2467 set_btype(s, 2); 2468 } 2469 break; 2470 2471 default: /* RET or none of the above. */ 2472 /* BTYPE will be set to 0 by normal end-of-insn processing. */ 2473 break; 2474 } 2475 2476 s->base.is_jmp = DISAS_JUMP; 2477 } 2478 2479 /* Branches, exception generating and system instructions */ 2480 static void disas_b_exc_sys(DisasContext *s, uint32_t insn) 2481 { 2482 switch (extract32(insn, 25, 7)) { 2483 case 0x0a: case 0x0b: 2484 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ 2485 disas_uncond_b_imm(s, insn); 2486 break; 2487 case 0x1a: case 0x5a: /* Compare & branch (immediate) */ 2488 disas_comp_b_imm(s, insn); 2489 break; 2490 case 0x1b: case 0x5b: /* Test & branch (immediate) */ 2491 disas_test_b_imm(s, insn); 2492 break; 2493 case 0x2a: /* Conditional branch (immediate) */ 2494 disas_cond_b_imm(s, insn); 2495 break; 2496 case 0x6a: /* Exception generation / System */ 2497 if (insn & (1 << 24)) { 2498 if (extract32(insn, 22, 2) == 0) { 2499 disas_system(s, insn); 2500 } else { 2501 unallocated_encoding(s); 2502 } 2503 } else { 2504 disas_exc(s, insn); 2505 } 2506 break; 2507 case 0x6b: /* Unconditional branch (register) */ 2508 disas_uncond_b_reg(s, insn); 2509 break; 2510 default: 2511 unallocated_encoding(s); 2512 break; 2513 } 2514 } 2515 2516 /* 2517 * Load/Store exclusive instructions are implemented by remembering 2518 * the value/address loaded, and seeing if these are the same 2519 * when the store is performed. This is not actually the architecturally 2520 * mandated semantics, but it works for typical guest code sequences 2521 * and avoids having to monitor regular stores. 2522 * 2523 * The store exclusive uses the atomic cmpxchg primitives to avoid 2524 * races in multi-threaded linux-user and when MTTCG softmmu is 2525 * enabled. 2526 */ 2527 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, 2528 TCGv_i64 addr, int size, bool is_pair) 2529 { 2530 int idx = get_mem_index(s); 2531 MemOp memop = s->be_data; 2532 2533 g_assert(size <= 3); 2534 if (is_pair) { 2535 g_assert(size >= 2); 2536 if (size == 2) { 2537 /* The pair must be single-copy atomic for the doubleword. */ 2538 memop |= MO_64 | MO_ALIGN; 2539 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2540 if (s->be_data == MO_LE) { 2541 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2542 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2543 } else { 2544 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2545 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2546 } 2547 } else { 2548 /* The pair must be single-copy atomic for *each* doubleword, not 2549 the entire quadword, however it must be quadword aligned. */ 2550 memop |= MO_64; 2551 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, 2552 memop | MO_ALIGN_16); 2553 2554 TCGv_i64 addr2 = tcg_temp_new_i64(); 2555 tcg_gen_addi_i64(addr2, addr, 8); 2556 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); 2557 tcg_temp_free_i64(addr2); 2558 2559 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2560 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2561 } 2562 } else { 2563 memop |= size | MO_ALIGN; 2564 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2565 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2566 } 2567 tcg_gen_mov_i64(cpu_exclusive_addr, addr); 2568 } 2569 2570 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2571 TCGv_i64 addr, int size, int is_pair) 2572 { 2573 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2574 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2575 * [addr] = {Rt}; 2576 * if (is_pair) { 2577 * [addr + datasize] = {Rt2}; 2578 * } 2579 * {Rd} = 0; 2580 * } else { 2581 * {Rd} = 1; 2582 * } 2583 * env->exclusive_addr = -1; 2584 */ 2585 TCGLabel *fail_label = gen_new_label(); 2586 TCGLabel *done_label = gen_new_label(); 2587 TCGv_i64 tmp; 2588 2589 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); 2590 2591 tmp = tcg_temp_new_i64(); 2592 if (is_pair) { 2593 if (size == 2) { 2594 if (s->be_data == MO_LE) { 2595 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2596 } else { 2597 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2598 } 2599 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2600 cpu_exclusive_val, tmp, 2601 get_mem_index(s), 2602 MO_64 | MO_ALIGN | s->be_data); 2603 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2604 } else { 2605 TCGv_i128 t16 = tcg_temp_new_i128(); 2606 TCGv_i128 c16 = tcg_temp_new_i128(); 2607 TCGv_i64 a, b; 2608 2609 if (s->be_data == MO_LE) { 2610 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2611 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2612 cpu_exclusive_high); 2613 } else { 2614 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2615 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2616 cpu_exclusive_val); 2617 } 2618 2619 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2620 get_mem_index(s), 2621 MO_128 | MO_ALIGN | s->be_data); 2622 tcg_temp_free_i128(c16); 2623 2624 a = tcg_temp_new_i64(); 2625 b = tcg_temp_new_i64(); 2626 if (s->be_data == MO_LE) { 2627 tcg_gen_extr_i128_i64(a, b, t16); 2628 } else { 2629 tcg_gen_extr_i128_i64(b, a, t16); 2630 } 2631 2632 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2633 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2634 tcg_gen_or_i64(tmp, a, b); 2635 tcg_temp_free_i64(a); 2636 tcg_temp_free_i64(b); 2637 tcg_temp_free_i128(t16); 2638 2639 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2640 } 2641 } else { 2642 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2643 cpu_reg(s, rt), get_mem_index(s), 2644 size | MO_ALIGN | s->be_data); 2645 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2646 } 2647 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2648 tcg_temp_free_i64(tmp); 2649 tcg_gen_br(done_label); 2650 2651 gen_set_label(fail_label); 2652 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2653 gen_set_label(done_label); 2654 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2655 } 2656 2657 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2658 int rn, int size) 2659 { 2660 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2661 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2662 int memidx = get_mem_index(s); 2663 TCGv_i64 clean_addr; 2664 2665 if (rn == 31) { 2666 gen_check_sp_alignment(s); 2667 } 2668 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); 2669 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, 2670 size | MO_ALIGN | s->be_data); 2671 } 2672 2673 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2674 int rn, int size) 2675 { 2676 TCGv_i64 s1 = cpu_reg(s, rs); 2677 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2678 TCGv_i64 t1 = cpu_reg(s, rt); 2679 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2680 TCGv_i64 clean_addr; 2681 int memidx = get_mem_index(s); 2682 2683 if (rn == 31) { 2684 gen_check_sp_alignment(s); 2685 } 2686 2687 /* This is a single atomic access, despite the "pair". */ 2688 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); 2689 2690 if (size == 2) { 2691 TCGv_i64 cmp = tcg_temp_new_i64(); 2692 TCGv_i64 val = tcg_temp_new_i64(); 2693 2694 if (s->be_data == MO_LE) { 2695 tcg_gen_concat32_i64(val, t1, t2); 2696 tcg_gen_concat32_i64(cmp, s1, s2); 2697 } else { 2698 tcg_gen_concat32_i64(val, t2, t1); 2699 tcg_gen_concat32_i64(cmp, s2, s1); 2700 } 2701 2702 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, 2703 MO_64 | MO_ALIGN | s->be_data); 2704 tcg_temp_free_i64(val); 2705 2706 if (s->be_data == MO_LE) { 2707 tcg_gen_extr32_i64(s1, s2, cmp); 2708 } else { 2709 tcg_gen_extr32_i64(s2, s1, cmp); 2710 } 2711 tcg_temp_free_i64(cmp); 2712 } else { 2713 TCGv_i128 cmp = tcg_temp_new_i128(); 2714 TCGv_i128 val = tcg_temp_new_i128(); 2715 2716 if (s->be_data == MO_LE) { 2717 tcg_gen_concat_i64_i128(val, t1, t2); 2718 tcg_gen_concat_i64_i128(cmp, s1, s2); 2719 } else { 2720 tcg_gen_concat_i64_i128(val, t2, t1); 2721 tcg_gen_concat_i64_i128(cmp, s2, s1); 2722 } 2723 2724 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, 2725 MO_128 | MO_ALIGN | s->be_data); 2726 tcg_temp_free_i128(val); 2727 2728 if (s->be_data == MO_LE) { 2729 tcg_gen_extr_i128_i64(s1, s2, cmp); 2730 } else { 2731 tcg_gen_extr_i128_i64(s2, s1, cmp); 2732 } 2733 tcg_temp_free_i128(cmp); 2734 } 2735 } 2736 2737 /* Update the Sixty-Four bit (SF) registersize. This logic is derived 2738 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2739 */ 2740 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) 2741 { 2742 int opc0 = extract32(opc, 0, 1); 2743 int regsize; 2744 2745 if (is_signed) { 2746 regsize = opc0 ? 32 : 64; 2747 } else { 2748 regsize = size == 3 ? 64 : 32; 2749 } 2750 return regsize == 64; 2751 } 2752 2753 /* Load/store exclusive 2754 * 2755 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 2756 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2757 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | 2758 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2759 * 2760 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit 2761 * L: 0 -> store, 1 -> load 2762 * o2: 0 -> exclusive, 1 -> not 2763 * o1: 0 -> single register, 1 -> register pair 2764 * o0: 1 -> load-acquire/store-release, 0 -> not 2765 */ 2766 static void disas_ldst_excl(DisasContext *s, uint32_t insn) 2767 { 2768 int rt = extract32(insn, 0, 5); 2769 int rn = extract32(insn, 5, 5); 2770 int rt2 = extract32(insn, 10, 5); 2771 int rs = extract32(insn, 16, 5); 2772 int is_lasr = extract32(insn, 15, 1); 2773 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; 2774 int size = extract32(insn, 30, 2); 2775 TCGv_i64 clean_addr; 2776 2777 switch (o2_L_o1_o0) { 2778 case 0x0: /* STXR */ 2779 case 0x1: /* STLXR */ 2780 if (rn == 31) { 2781 gen_check_sp_alignment(s); 2782 } 2783 if (is_lasr) { 2784 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2785 } 2786 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2787 true, rn != 31, size); 2788 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); 2789 return; 2790 2791 case 0x4: /* LDXR */ 2792 case 0x5: /* LDAXR */ 2793 if (rn == 31) { 2794 gen_check_sp_alignment(s); 2795 } 2796 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2797 false, rn != 31, size); 2798 s->is_ldex = true; 2799 gen_load_exclusive(s, rt, rt2, clean_addr, size, false); 2800 if (is_lasr) { 2801 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2802 } 2803 return; 2804 2805 case 0x8: /* STLLR */ 2806 if (!dc_isar_feature(aa64_lor, s)) { 2807 break; 2808 } 2809 /* StoreLORelease is the same as Store-Release for QEMU. */ 2810 /* fall through */ 2811 case 0x9: /* STLR */ 2812 /* Generate ISS for non-exclusive accesses including LASR. */ 2813 if (rn == 31) { 2814 gen_check_sp_alignment(s); 2815 } 2816 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2817 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2818 true, rn != 31, size); 2819 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2820 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, 2821 disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2822 return; 2823 2824 case 0xc: /* LDLAR */ 2825 if (!dc_isar_feature(aa64_lor, s)) { 2826 break; 2827 } 2828 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2829 /* fall through */ 2830 case 0xd: /* LDAR */ 2831 /* Generate ISS for non-exclusive accesses including LASR. */ 2832 if (rn == 31) { 2833 gen_check_sp_alignment(s); 2834 } 2835 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2836 false, rn != 31, size); 2837 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2838 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, 2839 rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2840 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2841 return; 2842 2843 case 0x2: case 0x3: /* CASP / STXP */ 2844 if (size & 2) { /* STXP / STLXP */ 2845 if (rn == 31) { 2846 gen_check_sp_alignment(s); 2847 } 2848 if (is_lasr) { 2849 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2850 } 2851 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2852 true, rn != 31, size); 2853 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); 2854 return; 2855 } 2856 if (rt2 == 31 2857 && ((rt | rs) & 1) == 0 2858 && dc_isar_feature(aa64_atomics, s)) { 2859 /* CASP / CASPL */ 2860 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2861 return; 2862 } 2863 break; 2864 2865 case 0x6: case 0x7: /* CASPA / LDXP */ 2866 if (size & 2) { /* LDXP / LDAXP */ 2867 if (rn == 31) { 2868 gen_check_sp_alignment(s); 2869 } 2870 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2871 false, rn != 31, size); 2872 s->is_ldex = true; 2873 gen_load_exclusive(s, rt, rt2, clean_addr, size, true); 2874 if (is_lasr) { 2875 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2876 } 2877 return; 2878 } 2879 if (rt2 == 31 2880 && ((rt | rs) & 1) == 0 2881 && dc_isar_feature(aa64_atomics, s)) { 2882 /* CASPA / CASPAL */ 2883 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2884 return; 2885 } 2886 break; 2887 2888 case 0xa: /* CAS */ 2889 case 0xb: /* CASL */ 2890 case 0xe: /* CASA */ 2891 case 0xf: /* CASAL */ 2892 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { 2893 gen_compare_and_swap(s, rs, rt, rn, size); 2894 return; 2895 } 2896 break; 2897 } 2898 unallocated_encoding(s); 2899 } 2900 2901 /* 2902 * Load register (literal) 2903 * 2904 * 31 30 29 27 26 25 24 23 5 4 0 2905 * +-----+-------+---+-----+-------------------+-------+ 2906 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | 2907 * +-----+-------+---+-----+-------------------+-------+ 2908 * 2909 * V: 1 -> vector (simd/fp) 2910 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, 2911 * 10-> 32 bit signed, 11 -> prefetch 2912 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) 2913 */ 2914 static void disas_ld_lit(DisasContext *s, uint32_t insn) 2915 { 2916 int rt = extract32(insn, 0, 5); 2917 int64_t imm = sextract32(insn, 5, 19) << 2; 2918 bool is_vector = extract32(insn, 26, 1); 2919 int opc = extract32(insn, 30, 2); 2920 bool is_signed = false; 2921 int size = 2; 2922 TCGv_i64 tcg_rt, clean_addr; 2923 2924 if (is_vector) { 2925 if (opc == 3) { 2926 unallocated_encoding(s); 2927 return; 2928 } 2929 size = 2 + opc; 2930 if (!fp_access_check(s)) { 2931 return; 2932 } 2933 } else { 2934 if (opc == 3) { 2935 /* PRFM (literal) : prefetch */ 2936 return; 2937 } 2938 size = 2 + extract32(opc, 0, 1); 2939 is_signed = extract32(opc, 1, 1); 2940 } 2941 2942 tcg_rt = cpu_reg(s, rt); 2943 2944 clean_addr = new_tmp_a64(s); 2945 gen_pc_plus_diff(s, clean_addr, imm); 2946 if (is_vector) { 2947 do_fp_ld(s, rt, clean_addr, size); 2948 } else { 2949 /* Only unsigned 32bit loads target 32bit registers. */ 2950 bool iss_sf = opc != 0; 2951 2952 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 2953 false, true, rt, iss_sf, false); 2954 } 2955 } 2956 2957 /* 2958 * LDNP (Load Pair - non-temporal hint) 2959 * LDP (Load Pair - non vector) 2960 * LDPSW (Load Pair Signed Word - non vector) 2961 * STNP (Store Pair - non-temporal hint) 2962 * STP (Store Pair - non vector) 2963 * LDNP (Load Pair of SIMD&FP - non-temporal hint) 2964 * LDP (Load Pair of SIMD&FP) 2965 * STNP (Store Pair of SIMD&FP - non-temporal hint) 2966 * STP (Store Pair of SIMD&FP) 2967 * 2968 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 2969 * +-----+-------+---+---+-------+---+-----------------------------+ 2970 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | 2971 * +-----+-------+---+---+-------+---+-------+-------+------+------+ 2972 * 2973 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit 2974 * LDPSW/STGP 01 2975 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit 2976 * V: 0 -> GPR, 1 -> Vector 2977 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, 2978 * 10 -> signed offset, 11 -> pre-index 2979 * L: 0 -> Store 1 -> Load 2980 * 2981 * Rt, Rt2 = GPR or SIMD registers to be stored 2982 * Rn = general purpose register containing address 2983 * imm7 = signed offset (multiple of 4 or 8 depending on size) 2984 */ 2985 static void disas_ldst_pair(DisasContext *s, uint32_t insn) 2986 { 2987 int rt = extract32(insn, 0, 5); 2988 int rn = extract32(insn, 5, 5); 2989 int rt2 = extract32(insn, 10, 5); 2990 uint64_t offset = sextract64(insn, 15, 7); 2991 int index = extract32(insn, 23, 2); 2992 bool is_vector = extract32(insn, 26, 1); 2993 bool is_load = extract32(insn, 22, 1); 2994 int opc = extract32(insn, 30, 2); 2995 2996 bool is_signed = false; 2997 bool postindex = false; 2998 bool wback = false; 2999 bool set_tag = false; 3000 3001 TCGv_i64 clean_addr, dirty_addr; 3002 3003 int size; 3004 3005 if (opc == 3) { 3006 unallocated_encoding(s); 3007 return; 3008 } 3009 3010 if (is_vector) { 3011 size = 2 + opc; 3012 } else if (opc == 1 && !is_load) { 3013 /* STGP */ 3014 if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { 3015 unallocated_encoding(s); 3016 return; 3017 } 3018 size = 3; 3019 set_tag = true; 3020 } else { 3021 size = 2 + extract32(opc, 1, 1); 3022 is_signed = extract32(opc, 0, 1); 3023 if (!is_load && is_signed) { 3024 unallocated_encoding(s); 3025 return; 3026 } 3027 } 3028 3029 switch (index) { 3030 case 1: /* post-index */ 3031 postindex = true; 3032 wback = true; 3033 break; 3034 case 0: 3035 /* signed offset with "non-temporal" hint. Since we don't emulate 3036 * caches we don't care about hints to the cache system about 3037 * data access patterns, and handle this identically to plain 3038 * signed offset. 3039 */ 3040 if (is_signed) { 3041 /* There is no non-temporal-hint version of LDPSW */ 3042 unallocated_encoding(s); 3043 return; 3044 } 3045 postindex = false; 3046 break; 3047 case 2: /* signed offset, rn not updated */ 3048 postindex = false; 3049 break; 3050 case 3: /* pre-index */ 3051 postindex = false; 3052 wback = true; 3053 break; 3054 } 3055 3056 if (is_vector && !fp_access_check(s)) { 3057 return; 3058 } 3059 3060 offset <<= (set_tag ? LOG2_TAG_GRANULE : size); 3061 3062 if (rn == 31) { 3063 gen_check_sp_alignment(s); 3064 } 3065 3066 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3067 if (!postindex) { 3068 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3069 } 3070 3071 if (set_tag) { 3072 if (!s->ata) { 3073 /* 3074 * TODO: We could rely on the stores below, at least for 3075 * system mode, if we arrange to add MO_ALIGN_16. 3076 */ 3077 gen_helper_stg_stub(cpu_env, dirty_addr); 3078 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3079 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); 3080 } else { 3081 gen_helper_stg(cpu_env, dirty_addr, dirty_addr); 3082 } 3083 } 3084 3085 clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, 3086 (wback || rn != 31) && !set_tag, 2 << size); 3087 3088 if (is_vector) { 3089 if (is_load) { 3090 do_fp_ld(s, rt, clean_addr, size); 3091 } else { 3092 do_fp_st(s, rt, clean_addr, size); 3093 } 3094 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3095 if (is_load) { 3096 do_fp_ld(s, rt2, clean_addr, size); 3097 } else { 3098 do_fp_st(s, rt2, clean_addr, size); 3099 } 3100 } else { 3101 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3102 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); 3103 3104 if (is_load) { 3105 TCGv_i64 tmp = tcg_temp_new_i64(); 3106 3107 /* Do not modify tcg_rt before recognizing any exception 3108 * from the second load. 3109 */ 3110 do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, 3111 false, false, 0, false, false); 3112 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3113 do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, 3114 false, false, 0, false, false); 3115 3116 tcg_gen_mov_i64(tcg_rt, tmp); 3117 tcg_temp_free_i64(tmp); 3118 } else { 3119 do_gpr_st(s, tcg_rt, clean_addr, size, 3120 false, 0, false, false); 3121 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3122 do_gpr_st(s, tcg_rt2, clean_addr, size, 3123 false, 0, false, false); 3124 } 3125 } 3126 3127 if (wback) { 3128 if (postindex) { 3129 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3130 } 3131 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3132 } 3133 } 3134 3135 /* 3136 * Load/store (immediate post-indexed) 3137 * Load/store (immediate pre-indexed) 3138 * Load/store (unscaled immediate) 3139 * 3140 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 3141 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3142 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | 3143 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3144 * 3145 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) 3146 10 -> unprivileged 3147 * V = 0 -> non-vector 3148 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit 3149 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3150 */ 3151 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, 3152 int opc, 3153 int size, 3154 int rt, 3155 bool is_vector) 3156 { 3157 int rn = extract32(insn, 5, 5); 3158 int imm9 = sextract32(insn, 12, 9); 3159 int idx = extract32(insn, 10, 2); 3160 bool is_signed = false; 3161 bool is_store = false; 3162 bool is_extended = false; 3163 bool is_unpriv = (idx == 2); 3164 bool iss_valid; 3165 bool post_index; 3166 bool writeback; 3167 int memidx; 3168 3169 TCGv_i64 clean_addr, dirty_addr; 3170 3171 if (is_vector) { 3172 size |= (opc & 2) << 1; 3173 if (size > 4 || is_unpriv) { 3174 unallocated_encoding(s); 3175 return; 3176 } 3177 is_store = ((opc & 1) == 0); 3178 if (!fp_access_check(s)) { 3179 return; 3180 } 3181 } else { 3182 if (size == 3 && opc == 2) { 3183 /* PRFM - prefetch */ 3184 if (idx != 0) { 3185 unallocated_encoding(s); 3186 return; 3187 } 3188 return; 3189 } 3190 if (opc == 3 && size > 1) { 3191 unallocated_encoding(s); 3192 return; 3193 } 3194 is_store = (opc == 0); 3195 is_signed = extract32(opc, 1, 1); 3196 is_extended = (size < 3) && extract32(opc, 0, 1); 3197 } 3198 3199 switch (idx) { 3200 case 0: 3201 case 2: 3202 post_index = false; 3203 writeback = false; 3204 break; 3205 case 1: 3206 post_index = true; 3207 writeback = true; 3208 break; 3209 case 3: 3210 post_index = false; 3211 writeback = true; 3212 break; 3213 default: 3214 g_assert_not_reached(); 3215 } 3216 3217 iss_valid = !is_vector && !writeback; 3218 3219 if (rn == 31) { 3220 gen_check_sp_alignment(s); 3221 } 3222 3223 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3224 if (!post_index) { 3225 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3226 } 3227 3228 memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3229 clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, 3230 writeback || rn != 31, 3231 size, is_unpriv, memidx); 3232 3233 if (is_vector) { 3234 if (is_store) { 3235 do_fp_st(s, rt, clean_addr, size); 3236 } else { 3237 do_fp_ld(s, rt, clean_addr, size); 3238 } 3239 } else { 3240 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3241 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3242 3243 if (is_store) { 3244 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, 3245 iss_valid, rt, iss_sf, false); 3246 } else { 3247 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3248 is_extended, memidx, 3249 iss_valid, rt, iss_sf, false); 3250 } 3251 } 3252 3253 if (writeback) { 3254 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 3255 if (post_index) { 3256 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3257 } 3258 tcg_gen_mov_i64(tcg_rn, dirty_addr); 3259 } 3260 } 3261 3262 /* 3263 * Load/store (register offset) 3264 * 3265 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3266 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3267 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | 3268 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3269 * 3270 * For non-vector: 3271 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3272 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3273 * For vector: 3274 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3275 * opc<0>: 0 -> store, 1 -> load 3276 * V: 1 -> vector/simd 3277 * opt: extend encoding (see DecodeRegExtend) 3278 * S: if S=1 then scale (essentially index by sizeof(size)) 3279 * Rt: register to transfer into/out of 3280 * Rn: address register or SP for base 3281 * Rm: offset register or ZR for offset 3282 */ 3283 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, 3284 int opc, 3285 int size, 3286 int rt, 3287 bool is_vector) 3288 { 3289 int rn = extract32(insn, 5, 5); 3290 int shift = extract32(insn, 12, 1); 3291 int rm = extract32(insn, 16, 5); 3292 int opt = extract32(insn, 13, 3); 3293 bool is_signed = false; 3294 bool is_store = false; 3295 bool is_extended = false; 3296 3297 TCGv_i64 tcg_rm, clean_addr, dirty_addr; 3298 3299 if (extract32(opt, 1, 1) == 0) { 3300 unallocated_encoding(s); 3301 return; 3302 } 3303 3304 if (is_vector) { 3305 size |= (opc & 2) << 1; 3306 if (size > 4) { 3307 unallocated_encoding(s); 3308 return; 3309 } 3310 is_store = !extract32(opc, 0, 1); 3311 if (!fp_access_check(s)) { 3312 return; 3313 } 3314 } else { 3315 if (size == 3 && opc == 2) { 3316 /* PRFM - prefetch */ 3317 return; 3318 } 3319 if (opc == 3 && size > 1) { 3320 unallocated_encoding(s); 3321 return; 3322 } 3323 is_store = (opc == 0); 3324 is_signed = extract32(opc, 1, 1); 3325 is_extended = (size < 3) && extract32(opc, 0, 1); 3326 } 3327 3328 if (rn == 31) { 3329 gen_check_sp_alignment(s); 3330 } 3331 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3332 3333 tcg_rm = read_cpu_reg(s, rm, 1); 3334 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); 3335 3336 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); 3337 clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); 3338 3339 if (is_vector) { 3340 if (is_store) { 3341 do_fp_st(s, rt, clean_addr, size); 3342 } else { 3343 do_fp_ld(s, rt, clean_addr, size); 3344 } 3345 } else { 3346 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3347 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3348 if (is_store) { 3349 do_gpr_st(s, tcg_rt, clean_addr, size, 3350 true, rt, iss_sf, false); 3351 } else { 3352 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3353 is_extended, true, rt, iss_sf, false); 3354 } 3355 } 3356 } 3357 3358 /* 3359 * Load/store (unsigned immediate) 3360 * 3361 * 31 30 29 27 26 25 24 23 22 21 10 9 5 3362 * +----+-------+---+-----+-----+------------+-------+------+ 3363 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | 3364 * +----+-------+---+-----+-----+------------+-------+------+ 3365 * 3366 * For non-vector: 3367 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3368 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3369 * For vector: 3370 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3371 * opc<0>: 0 -> store, 1 -> load 3372 * Rn: base address register (inc SP) 3373 * Rt: target register 3374 */ 3375 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, 3376 int opc, 3377 int size, 3378 int rt, 3379 bool is_vector) 3380 { 3381 int rn = extract32(insn, 5, 5); 3382 unsigned int imm12 = extract32(insn, 10, 12); 3383 unsigned int offset; 3384 3385 TCGv_i64 clean_addr, dirty_addr; 3386 3387 bool is_store; 3388 bool is_signed = false; 3389 bool is_extended = false; 3390 3391 if (is_vector) { 3392 size |= (opc & 2) << 1; 3393 if (size > 4) { 3394 unallocated_encoding(s); 3395 return; 3396 } 3397 is_store = !extract32(opc, 0, 1); 3398 if (!fp_access_check(s)) { 3399 return; 3400 } 3401 } else { 3402 if (size == 3 && opc == 2) { 3403 /* PRFM - prefetch */ 3404 return; 3405 } 3406 if (opc == 3 && size > 1) { 3407 unallocated_encoding(s); 3408 return; 3409 } 3410 is_store = (opc == 0); 3411 is_signed = extract32(opc, 1, 1); 3412 is_extended = (size < 3) && extract32(opc, 0, 1); 3413 } 3414 3415 if (rn == 31) { 3416 gen_check_sp_alignment(s); 3417 } 3418 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3419 offset = imm12 << size; 3420 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3421 clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); 3422 3423 if (is_vector) { 3424 if (is_store) { 3425 do_fp_st(s, rt, clean_addr, size); 3426 } else { 3427 do_fp_ld(s, rt, clean_addr, size); 3428 } 3429 } else { 3430 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3431 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3432 if (is_store) { 3433 do_gpr_st(s, tcg_rt, clean_addr, size, 3434 true, rt, iss_sf, false); 3435 } else { 3436 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3437 is_extended, true, rt, iss_sf, false); 3438 } 3439 } 3440 } 3441 3442 /* Atomic memory operations 3443 * 3444 * 31 30 27 26 24 22 21 16 15 12 10 5 0 3445 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ 3446 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | 3447 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ 3448 * 3449 * Rt: the result register 3450 * Rn: base address or SP 3451 * Rs: the source register for the operation 3452 * V: vector flag (always 0 as of v8.3) 3453 * A: acquire flag 3454 * R: release flag 3455 */ 3456 static void disas_ldst_atomic(DisasContext *s, uint32_t insn, 3457 int size, int rt, bool is_vector) 3458 { 3459 int rs = extract32(insn, 16, 5); 3460 int rn = extract32(insn, 5, 5); 3461 int o3_opc = extract32(insn, 12, 4); 3462 bool r = extract32(insn, 22, 1); 3463 bool a = extract32(insn, 23, 1); 3464 TCGv_i64 tcg_rs, tcg_rt, clean_addr; 3465 AtomicThreeOpFn *fn = NULL; 3466 MemOp mop = s->be_data | size | MO_ALIGN; 3467 3468 if (is_vector || !dc_isar_feature(aa64_atomics, s)) { 3469 unallocated_encoding(s); 3470 return; 3471 } 3472 switch (o3_opc) { 3473 case 000: /* LDADD */ 3474 fn = tcg_gen_atomic_fetch_add_i64; 3475 break; 3476 case 001: /* LDCLR */ 3477 fn = tcg_gen_atomic_fetch_and_i64; 3478 break; 3479 case 002: /* LDEOR */ 3480 fn = tcg_gen_atomic_fetch_xor_i64; 3481 break; 3482 case 003: /* LDSET */ 3483 fn = tcg_gen_atomic_fetch_or_i64; 3484 break; 3485 case 004: /* LDSMAX */ 3486 fn = tcg_gen_atomic_fetch_smax_i64; 3487 mop |= MO_SIGN; 3488 break; 3489 case 005: /* LDSMIN */ 3490 fn = tcg_gen_atomic_fetch_smin_i64; 3491 mop |= MO_SIGN; 3492 break; 3493 case 006: /* LDUMAX */ 3494 fn = tcg_gen_atomic_fetch_umax_i64; 3495 break; 3496 case 007: /* LDUMIN */ 3497 fn = tcg_gen_atomic_fetch_umin_i64; 3498 break; 3499 case 010: /* SWP */ 3500 fn = tcg_gen_atomic_xchg_i64; 3501 break; 3502 case 014: /* LDAPR, LDAPRH, LDAPRB */ 3503 if (!dc_isar_feature(aa64_rcpc_8_3, s) || 3504 rs != 31 || a != 1 || r != 0) { 3505 unallocated_encoding(s); 3506 return; 3507 } 3508 break; 3509 default: 3510 unallocated_encoding(s); 3511 return; 3512 } 3513 3514 if (rn == 31) { 3515 gen_check_sp_alignment(s); 3516 } 3517 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); 3518 3519 if (o3_opc == 014) { 3520 /* 3521 * LDAPR* are a special case because they are a simple load, not a 3522 * fetch-and-do-something op. 3523 * The architectural consistency requirements here are weaker than 3524 * full load-acquire (we only need "load-acquire processor consistent"), 3525 * but we choose to implement them as full LDAQ. 3526 */ 3527 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, 3528 true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); 3529 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3530 return; 3531 } 3532 3533 tcg_rs = read_cpu_reg(s, rs, true); 3534 tcg_rt = cpu_reg(s, rt); 3535 3536 if (o3_opc == 1) { /* LDCLR */ 3537 tcg_gen_not_i64(tcg_rs, tcg_rs); 3538 } 3539 3540 /* The tcg atomic primitives are all full barriers. Therefore we 3541 * can ignore the Acquire and Release bits of this instruction. 3542 */ 3543 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3544 3545 if ((mop & MO_SIGN) && size != MO_64) { 3546 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3547 } 3548 } 3549 3550 /* 3551 * PAC memory operations 3552 * 3553 * 31 30 27 26 24 22 21 12 11 10 5 0 3554 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3555 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | 3556 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3557 * 3558 * Rt: the result register 3559 * Rn: base address or SP 3560 * V: vector flag (always 0 as of v8.3) 3561 * M: clear for key DA, set for key DB 3562 * W: pre-indexing flag 3563 * S: sign for imm9. 3564 */ 3565 static void disas_ldst_pac(DisasContext *s, uint32_t insn, 3566 int size, int rt, bool is_vector) 3567 { 3568 int rn = extract32(insn, 5, 5); 3569 bool is_wback = extract32(insn, 11, 1); 3570 bool use_key_a = !extract32(insn, 23, 1); 3571 int offset; 3572 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3573 3574 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { 3575 unallocated_encoding(s); 3576 return; 3577 } 3578 3579 if (rn == 31) { 3580 gen_check_sp_alignment(s); 3581 } 3582 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3583 3584 if (s->pauth_active) { 3585 if (use_key_a) { 3586 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, 3587 new_tmp_a64_zero(s)); 3588 } else { 3589 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, 3590 new_tmp_a64_zero(s)); 3591 } 3592 } 3593 3594 /* Form the 10-bit signed, scaled offset. */ 3595 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); 3596 offset = sextract32(offset << size, 0, 10 + size); 3597 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3598 3599 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3600 clean_addr = gen_mte_check1(s, dirty_addr, false, 3601 is_wback || rn != 31, size); 3602 3603 tcg_rt = cpu_reg(s, rt); 3604 do_gpr_ld(s, tcg_rt, clean_addr, size, 3605 /* extend */ false, /* iss_valid */ !is_wback, 3606 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); 3607 3608 if (is_wback) { 3609 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3610 } 3611 } 3612 3613 /* 3614 * LDAPR/STLR (unscaled immediate) 3615 * 3616 * 31 30 24 22 21 12 10 5 0 3617 * +------+-------------+-----+---+--------+-----+----+-----+ 3618 * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | 3619 * +------+-------------+-----+---+--------+-----+----+-----+ 3620 * 3621 * Rt: source or destination register 3622 * Rn: base register 3623 * imm9: unscaled immediate offset 3624 * opc: 00: STLUR*, 01/10/11: various LDAPUR* 3625 * size: size of load/store 3626 */ 3627 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) 3628 { 3629 int rt = extract32(insn, 0, 5); 3630 int rn = extract32(insn, 5, 5); 3631 int offset = sextract32(insn, 12, 9); 3632 int opc = extract32(insn, 22, 2); 3633 int size = extract32(insn, 30, 2); 3634 TCGv_i64 clean_addr, dirty_addr; 3635 bool is_store = false; 3636 bool extend = false; 3637 bool iss_sf; 3638 MemOp mop; 3639 3640 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3641 unallocated_encoding(s); 3642 return; 3643 } 3644 3645 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3646 mop = size | MO_ALIGN; 3647 3648 switch (opc) { 3649 case 0: /* STLURB */ 3650 is_store = true; 3651 break; 3652 case 1: /* LDAPUR* */ 3653 break; 3654 case 2: /* LDAPURS* 64-bit variant */ 3655 if (size == 3) { 3656 unallocated_encoding(s); 3657 return; 3658 } 3659 mop |= MO_SIGN; 3660 break; 3661 case 3: /* LDAPURS* 32-bit variant */ 3662 if (size > 1) { 3663 unallocated_encoding(s); 3664 return; 3665 } 3666 mop |= MO_SIGN; 3667 extend = true; /* zero-extend 32->64 after signed load */ 3668 break; 3669 default: 3670 g_assert_not_reached(); 3671 } 3672 3673 iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); 3674 3675 if (rn == 31) { 3676 gen_check_sp_alignment(s); 3677 } 3678 3679 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3680 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3681 clean_addr = clean_data_tbi(s, dirty_addr); 3682 3683 if (is_store) { 3684 /* Store-Release semantics */ 3685 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3686 do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); 3687 } else { 3688 /* 3689 * Load-AcquirePC semantics; we implement as the slightly more 3690 * restrictive Load-Acquire. 3691 */ 3692 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, 3693 extend, true, rt, iss_sf, true); 3694 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3695 } 3696 } 3697 3698 /* Load/store register (all forms) */ 3699 static void disas_ldst_reg(DisasContext *s, uint32_t insn) 3700 { 3701 int rt = extract32(insn, 0, 5); 3702 int opc = extract32(insn, 22, 2); 3703 bool is_vector = extract32(insn, 26, 1); 3704 int size = extract32(insn, 30, 2); 3705 3706 switch (extract32(insn, 24, 2)) { 3707 case 0: 3708 if (extract32(insn, 21, 1) == 0) { 3709 /* Load/store register (unscaled immediate) 3710 * Load/store immediate pre/post-indexed 3711 * Load/store register unprivileged 3712 */ 3713 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); 3714 return; 3715 } 3716 switch (extract32(insn, 10, 2)) { 3717 case 0: 3718 disas_ldst_atomic(s, insn, size, rt, is_vector); 3719 return; 3720 case 2: 3721 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); 3722 return; 3723 default: 3724 disas_ldst_pac(s, insn, size, rt, is_vector); 3725 return; 3726 } 3727 break; 3728 case 1: 3729 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); 3730 return; 3731 } 3732 unallocated_encoding(s); 3733 } 3734 3735 /* AdvSIMD load/store multiple structures 3736 * 3737 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 3738 * +---+---+---------------+---+-------------+--------+------+------+------+ 3739 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | 3740 * +---+---+---------------+---+-------------+--------+------+------+------+ 3741 * 3742 * AdvSIMD load/store multiple structures (post-indexed) 3743 * 3744 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 3745 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3746 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | 3747 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3748 * 3749 * Rt: first (or only) SIMD&FP register to be transferred 3750 * Rn: base address or SP 3751 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3752 */ 3753 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) 3754 { 3755 int rt = extract32(insn, 0, 5); 3756 int rn = extract32(insn, 5, 5); 3757 int rm = extract32(insn, 16, 5); 3758 int size = extract32(insn, 10, 2); 3759 int opcode = extract32(insn, 12, 4); 3760 bool is_store = !extract32(insn, 22, 1); 3761 bool is_postidx = extract32(insn, 23, 1); 3762 bool is_q = extract32(insn, 30, 1); 3763 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3764 MemOp endian, align, mop; 3765 3766 int total; /* total bytes */ 3767 int elements; /* elements per vector */ 3768 int rpt; /* num iterations */ 3769 int selem; /* structure elements */ 3770 int r; 3771 3772 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { 3773 unallocated_encoding(s); 3774 return; 3775 } 3776 3777 if (!is_postidx && rm != 0) { 3778 unallocated_encoding(s); 3779 return; 3780 } 3781 3782 /* From the shared decode logic */ 3783 switch (opcode) { 3784 case 0x0: 3785 rpt = 1; 3786 selem = 4; 3787 break; 3788 case 0x2: 3789 rpt = 4; 3790 selem = 1; 3791 break; 3792 case 0x4: 3793 rpt = 1; 3794 selem = 3; 3795 break; 3796 case 0x6: 3797 rpt = 3; 3798 selem = 1; 3799 break; 3800 case 0x7: 3801 rpt = 1; 3802 selem = 1; 3803 break; 3804 case 0x8: 3805 rpt = 1; 3806 selem = 2; 3807 break; 3808 case 0xa: 3809 rpt = 2; 3810 selem = 1; 3811 break; 3812 default: 3813 unallocated_encoding(s); 3814 return; 3815 } 3816 3817 if (size == 3 && !is_q && selem != 1) { 3818 /* reserved */ 3819 unallocated_encoding(s); 3820 return; 3821 } 3822 3823 if (!fp_access_check(s)) { 3824 return; 3825 } 3826 3827 if (rn == 31) { 3828 gen_check_sp_alignment(s); 3829 } 3830 3831 /* For our purposes, bytes are always little-endian. */ 3832 endian = s->be_data; 3833 if (size == 0) { 3834 endian = MO_LE; 3835 } 3836 3837 total = rpt * selem * (is_q ? 16 : 8); 3838 tcg_rn = cpu_reg_sp(s, rn); 3839 3840 /* 3841 * Issue the MTE check vs the logical repeat count, before we 3842 * promote consecutive little-endian elements below. 3843 */ 3844 clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, 3845 total); 3846 3847 /* 3848 * Consecutive little-endian elements from a single register 3849 * can be promoted to a larger little-endian operation. 3850 */ 3851 align = MO_ALIGN; 3852 if (selem == 1 && endian == MO_LE) { 3853 align = pow2_align(size); 3854 size = 3; 3855 } 3856 if (!s->align_mem) { 3857 align = 0; 3858 } 3859 mop = endian | size | align; 3860 3861 elements = (is_q ? 16 : 8) >> size; 3862 tcg_ebytes = tcg_constant_i64(1 << size); 3863 for (r = 0; r < rpt; r++) { 3864 int e; 3865 for (e = 0; e < elements; e++) { 3866 int xs; 3867 for (xs = 0; xs < selem; xs++) { 3868 int tt = (rt + r + xs) % 32; 3869 if (is_store) { 3870 do_vec_st(s, tt, e, clean_addr, mop); 3871 } else { 3872 do_vec_ld(s, tt, e, clean_addr, mop); 3873 } 3874 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3875 } 3876 } 3877 } 3878 3879 if (!is_store) { 3880 /* For non-quad operations, setting a slice of the low 3881 * 64 bits of the register clears the high 64 bits (in 3882 * the ARM ARM pseudocode this is implicit in the fact 3883 * that 'rval' is a 64 bit wide variable). 3884 * For quad operations, we might still need to zero the 3885 * high bits of SVE. 3886 */ 3887 for (r = 0; r < rpt * selem; r++) { 3888 int tt = (rt + r) % 32; 3889 clear_vec_high(s, is_q, tt); 3890 } 3891 } 3892 3893 if (is_postidx) { 3894 if (rm == 31) { 3895 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3896 } else { 3897 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 3898 } 3899 } 3900 } 3901 3902 /* AdvSIMD load/store single structure 3903 * 3904 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3905 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3906 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | 3907 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3908 * 3909 * AdvSIMD load/store single structure (post-indexed) 3910 * 3911 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3912 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3913 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | 3914 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3915 * 3916 * Rt: first (or only) SIMD&FP register to be transferred 3917 * Rn: base address or SP 3918 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3919 * index = encoded in Q:S:size dependent on size 3920 * 3921 * lane_size = encoded in R, opc 3922 * transfer width = encoded in opc, S, size 3923 */ 3924 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) 3925 { 3926 int rt = extract32(insn, 0, 5); 3927 int rn = extract32(insn, 5, 5); 3928 int rm = extract32(insn, 16, 5); 3929 int size = extract32(insn, 10, 2); 3930 int S = extract32(insn, 12, 1); 3931 int opc = extract32(insn, 13, 3); 3932 int R = extract32(insn, 21, 1); 3933 int is_load = extract32(insn, 22, 1); 3934 int is_postidx = extract32(insn, 23, 1); 3935 int is_q = extract32(insn, 30, 1); 3936 3937 int scale = extract32(opc, 1, 2); 3938 int selem = (extract32(opc, 0, 1) << 1 | R) + 1; 3939 bool replicate = false; 3940 int index = is_q << 3 | S << 2 | size; 3941 int xs, total; 3942 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3943 MemOp mop; 3944 3945 if (extract32(insn, 31, 1)) { 3946 unallocated_encoding(s); 3947 return; 3948 } 3949 if (!is_postidx && rm != 0) { 3950 unallocated_encoding(s); 3951 return; 3952 } 3953 3954 switch (scale) { 3955 case 3: 3956 if (!is_load || S) { 3957 unallocated_encoding(s); 3958 return; 3959 } 3960 scale = size; 3961 replicate = true; 3962 break; 3963 case 0: 3964 break; 3965 case 1: 3966 if (extract32(size, 0, 1)) { 3967 unallocated_encoding(s); 3968 return; 3969 } 3970 index >>= 1; 3971 break; 3972 case 2: 3973 if (extract32(size, 1, 1)) { 3974 unallocated_encoding(s); 3975 return; 3976 } 3977 if (!extract32(size, 0, 1)) { 3978 index >>= 2; 3979 } else { 3980 if (S) { 3981 unallocated_encoding(s); 3982 return; 3983 } 3984 index >>= 3; 3985 scale = 3; 3986 } 3987 break; 3988 default: 3989 g_assert_not_reached(); 3990 } 3991 3992 if (!fp_access_check(s)) { 3993 return; 3994 } 3995 3996 if (rn == 31) { 3997 gen_check_sp_alignment(s); 3998 } 3999 4000 total = selem << scale; 4001 tcg_rn = cpu_reg_sp(s, rn); 4002 4003 clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, 4004 total); 4005 mop = finalize_memop(s, scale); 4006 4007 tcg_ebytes = tcg_constant_i64(1 << scale); 4008 for (xs = 0; xs < selem; xs++) { 4009 if (replicate) { 4010 /* Load and replicate to all elements */ 4011 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 4012 4013 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 4014 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), 4015 (is_q + 1) * 8, vec_full_reg_size(s), 4016 tcg_tmp); 4017 tcg_temp_free_i64(tcg_tmp); 4018 } else { 4019 /* Load/store one element per register */ 4020 if (is_load) { 4021 do_vec_ld(s, rt, index, clean_addr, mop); 4022 } else { 4023 do_vec_st(s, rt, index, clean_addr, mop); 4024 } 4025 } 4026 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 4027 rt = (rt + 1) % 32; 4028 } 4029 4030 if (is_postidx) { 4031 if (rm == 31) { 4032 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 4033 } else { 4034 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 4035 } 4036 } 4037 } 4038 4039 /* 4040 * Load/Store memory tags 4041 * 4042 * 31 30 29 24 22 21 12 10 5 0 4043 * +-----+-------------+-----+---+------+-----+------+------+ 4044 * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | 4045 * +-----+-------------+-----+---+------+-----+------+------+ 4046 */ 4047 static void disas_ldst_tag(DisasContext *s, uint32_t insn) 4048 { 4049 int rt = extract32(insn, 0, 5); 4050 int rn = extract32(insn, 5, 5); 4051 uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; 4052 int op2 = extract32(insn, 10, 2); 4053 int op1 = extract32(insn, 22, 2); 4054 bool is_load = false, is_pair = false, is_zero = false, is_mult = false; 4055 int index = 0; 4056 TCGv_i64 addr, clean_addr, tcg_rt; 4057 4058 /* We checked insn bits [29:24,21] in the caller. */ 4059 if (extract32(insn, 30, 2) != 3) { 4060 goto do_unallocated; 4061 } 4062 4063 /* 4064 * @index is a tri-state variable which has 3 states: 4065 * < 0 : post-index, writeback 4066 * = 0 : signed offset 4067 * > 0 : pre-index, writeback 4068 */ 4069 switch (op1) { 4070 case 0: 4071 if (op2 != 0) { 4072 /* STG */ 4073 index = op2 - 2; 4074 } else { 4075 /* STZGM */ 4076 if (s->current_el == 0 || offset != 0) { 4077 goto do_unallocated; 4078 } 4079 is_mult = is_zero = true; 4080 } 4081 break; 4082 case 1: 4083 if (op2 != 0) { 4084 /* STZG */ 4085 is_zero = true; 4086 index = op2 - 2; 4087 } else { 4088 /* LDG */ 4089 is_load = true; 4090 } 4091 break; 4092 case 2: 4093 if (op2 != 0) { 4094 /* ST2G */ 4095 is_pair = true; 4096 index = op2 - 2; 4097 } else { 4098 /* STGM */ 4099 if (s->current_el == 0 || offset != 0) { 4100 goto do_unallocated; 4101 } 4102 is_mult = true; 4103 } 4104 break; 4105 case 3: 4106 if (op2 != 0) { 4107 /* STZ2G */ 4108 is_pair = is_zero = true; 4109 index = op2 - 2; 4110 } else { 4111 /* LDGM */ 4112 if (s->current_el == 0 || offset != 0) { 4113 goto do_unallocated; 4114 } 4115 is_mult = is_load = true; 4116 } 4117 break; 4118 4119 default: 4120 do_unallocated: 4121 unallocated_encoding(s); 4122 return; 4123 } 4124 4125 if (is_mult 4126 ? !dc_isar_feature(aa64_mte, s) 4127 : !dc_isar_feature(aa64_mte_insn_reg, s)) { 4128 goto do_unallocated; 4129 } 4130 4131 if (rn == 31) { 4132 gen_check_sp_alignment(s); 4133 } 4134 4135 addr = read_cpu_reg_sp(s, rn, true); 4136 if (index >= 0) { 4137 /* pre-index or signed offset */ 4138 tcg_gen_addi_i64(addr, addr, offset); 4139 } 4140 4141 if (is_mult) { 4142 tcg_rt = cpu_reg(s, rt); 4143 4144 if (is_zero) { 4145 int size = 4 << s->dcz_blocksize; 4146 4147 if (s->ata) { 4148 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); 4149 } 4150 /* 4151 * The non-tags portion of STZGM is mostly like DC_ZVA, 4152 * except the alignment happens before the access. 4153 */ 4154 clean_addr = clean_data_tbi(s, addr); 4155 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4156 gen_helper_dc_zva(cpu_env, clean_addr); 4157 } else if (s->ata) { 4158 if (is_load) { 4159 gen_helper_ldgm(tcg_rt, cpu_env, addr); 4160 } else { 4161 gen_helper_stgm(cpu_env, addr, tcg_rt); 4162 } 4163 } else { 4164 MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 4165 int size = 4 << GMID_EL1_BS; 4166 4167 clean_addr = clean_data_tbi(s, addr); 4168 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4169 gen_probe_access(s, clean_addr, acc, size); 4170 4171 if (is_load) { 4172 /* The result tags are zeros. */ 4173 tcg_gen_movi_i64(tcg_rt, 0); 4174 } 4175 } 4176 return; 4177 } 4178 4179 if (is_load) { 4180 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4181 tcg_rt = cpu_reg(s, rt); 4182 if (s->ata) { 4183 gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); 4184 } else { 4185 clean_addr = clean_data_tbi(s, addr); 4186 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4187 gen_address_with_allocation_tag0(tcg_rt, addr); 4188 } 4189 } else { 4190 tcg_rt = cpu_reg_sp(s, rt); 4191 if (!s->ata) { 4192 /* 4193 * For STG and ST2G, we need to check alignment and probe memory. 4194 * TODO: For STZG and STZ2G, we could rely on the stores below, 4195 * at least for system mode; user-only won't enforce alignment. 4196 */ 4197 if (is_pair) { 4198 gen_helper_st2g_stub(cpu_env, addr); 4199 } else { 4200 gen_helper_stg_stub(cpu_env, addr); 4201 } 4202 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4203 if (is_pair) { 4204 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); 4205 } else { 4206 gen_helper_stg_parallel(cpu_env, addr, tcg_rt); 4207 } 4208 } else { 4209 if (is_pair) { 4210 gen_helper_st2g(cpu_env, addr, tcg_rt); 4211 } else { 4212 gen_helper_stg(cpu_env, addr, tcg_rt); 4213 } 4214 } 4215 } 4216 4217 if (is_zero) { 4218 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4219 TCGv_i64 tcg_zero = tcg_constant_i64(0); 4220 int mem_index = get_mem_index(s); 4221 int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; 4222 4223 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, 4224 MO_UQ | MO_ALIGN_16); 4225 for (i = 8; i < n; i += 8) { 4226 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4227 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ); 4228 } 4229 } 4230 4231 if (index != 0) { 4232 /* pre-index or post-index */ 4233 if (index < 0) { 4234 /* post-index */ 4235 tcg_gen_addi_i64(addr, addr, offset); 4236 } 4237 tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); 4238 } 4239 } 4240 4241 /* Loads and stores */ 4242 static void disas_ldst(DisasContext *s, uint32_t insn) 4243 { 4244 switch (extract32(insn, 24, 6)) { 4245 case 0x08: /* Load/store exclusive */ 4246 disas_ldst_excl(s, insn); 4247 break; 4248 case 0x18: case 0x1c: /* Load register (literal) */ 4249 disas_ld_lit(s, insn); 4250 break; 4251 case 0x28: case 0x29: 4252 case 0x2c: case 0x2d: /* Load/store pair (all forms) */ 4253 disas_ldst_pair(s, insn); 4254 break; 4255 case 0x38: case 0x39: 4256 case 0x3c: case 0x3d: /* Load/store register (all forms) */ 4257 disas_ldst_reg(s, insn); 4258 break; 4259 case 0x0c: /* AdvSIMD load/store multiple structures */ 4260 disas_ldst_multiple_struct(s, insn); 4261 break; 4262 case 0x0d: /* AdvSIMD load/store single structure */ 4263 disas_ldst_single_struct(s, insn); 4264 break; 4265 case 0x19: 4266 if (extract32(insn, 21, 1) != 0) { 4267 disas_ldst_tag(s, insn); 4268 } else if (extract32(insn, 10, 2) == 0) { 4269 disas_ldst_ldapr_stlr(s, insn); 4270 } else { 4271 unallocated_encoding(s); 4272 } 4273 break; 4274 default: 4275 unallocated_encoding(s); 4276 break; 4277 } 4278 } 4279 4280 /* PC-rel. addressing 4281 * 31 30 29 28 24 23 5 4 0 4282 * +----+-------+-----------+-------------------+------+ 4283 * | op | immlo | 1 0 0 0 0 | immhi | Rd | 4284 * +----+-------+-----------+-------------------+------+ 4285 */ 4286 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) 4287 { 4288 unsigned int page, rd; 4289 int64_t offset; 4290 4291 page = extract32(insn, 31, 1); 4292 /* SignExtend(immhi:immlo) -> offset */ 4293 offset = sextract64(insn, 5, 19); 4294 offset = offset << 2 | extract32(insn, 29, 2); 4295 rd = extract32(insn, 0, 5); 4296 4297 if (page) { 4298 /* ADRP (page based) */ 4299 offset <<= 12; 4300 /* The page offset is ok for CF_PCREL. */ 4301 offset -= s->pc_curr & 0xfff; 4302 } 4303 4304 gen_pc_plus_diff(s, cpu_reg(s, rd), offset); 4305 } 4306 4307 /* 4308 * Add/subtract (immediate) 4309 * 4310 * 31 30 29 28 23 22 21 10 9 5 4 0 4311 * +--+--+--+-------------+--+-------------+-----+-----+ 4312 * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | 4313 * +--+--+--+-------------+--+-------------+-----+-----+ 4314 * 4315 * sf: 0 -> 32bit, 1 -> 64bit 4316 * op: 0 -> add , 1 -> sub 4317 * S: 1 -> set flags 4318 * sh: 1 -> LSL imm by 12 4319 */ 4320 static void disas_add_sub_imm(DisasContext *s, uint32_t insn) 4321 { 4322 int rd = extract32(insn, 0, 5); 4323 int rn = extract32(insn, 5, 5); 4324 uint64_t imm = extract32(insn, 10, 12); 4325 bool shift = extract32(insn, 22, 1); 4326 bool setflags = extract32(insn, 29, 1); 4327 bool sub_op = extract32(insn, 30, 1); 4328 bool is_64bit = extract32(insn, 31, 1); 4329 4330 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 4331 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); 4332 TCGv_i64 tcg_result; 4333 4334 if (shift) { 4335 imm <<= 12; 4336 } 4337 4338 tcg_result = tcg_temp_new_i64(); 4339 if (!setflags) { 4340 if (sub_op) { 4341 tcg_gen_subi_i64(tcg_result, tcg_rn, imm); 4342 } else { 4343 tcg_gen_addi_i64(tcg_result, tcg_rn, imm); 4344 } 4345 } else { 4346 TCGv_i64 tcg_imm = tcg_constant_i64(imm); 4347 if (sub_op) { 4348 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4349 } else { 4350 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4351 } 4352 } 4353 4354 if (is_64bit) { 4355 tcg_gen_mov_i64(tcg_rd, tcg_result); 4356 } else { 4357 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4358 } 4359 4360 tcg_temp_free_i64(tcg_result); 4361 } 4362 4363 /* 4364 * Add/subtract (immediate, with tags) 4365 * 4366 * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 4367 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4368 * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | 4369 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4370 * 4371 * op: 0 -> add, 1 -> sub 4372 */ 4373 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) 4374 { 4375 int rd = extract32(insn, 0, 5); 4376 int rn = extract32(insn, 5, 5); 4377 int uimm4 = extract32(insn, 10, 4); 4378 int uimm6 = extract32(insn, 16, 6); 4379 bool sub_op = extract32(insn, 30, 1); 4380 TCGv_i64 tcg_rn, tcg_rd; 4381 int imm; 4382 4383 /* Test all of sf=1, S=0, o2=0, o3=0. */ 4384 if ((insn & 0xa040c000u) != 0x80000000u || 4385 !dc_isar_feature(aa64_mte_insn_reg, s)) { 4386 unallocated_encoding(s); 4387 return; 4388 } 4389 4390 imm = uimm6 << LOG2_TAG_GRANULE; 4391 if (sub_op) { 4392 imm = -imm; 4393 } 4394 4395 tcg_rn = cpu_reg_sp(s, rn); 4396 tcg_rd = cpu_reg_sp(s, rd); 4397 4398 if (s->ata) { 4399 gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, 4400 tcg_constant_i32(imm), 4401 tcg_constant_i32(uimm4)); 4402 } else { 4403 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4404 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4405 } 4406 } 4407 4408 /* The input should be a value in the bottom e bits (with higher 4409 * bits zero); returns that value replicated into every element 4410 * of size e in a 64 bit integer. 4411 */ 4412 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4413 { 4414 assert(e != 0); 4415 while (e < 64) { 4416 mask |= mask << e; 4417 e *= 2; 4418 } 4419 return mask; 4420 } 4421 4422 /* Return a value with the bottom len bits set (where 0 < len <= 64) */ 4423 static inline uint64_t bitmask64(unsigned int length) 4424 { 4425 assert(length > 0 && length <= 64); 4426 return ~0ULL >> (64 - length); 4427 } 4428 4429 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we 4430 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4431 * value (ie should cause a guest UNDEF exception), and true if they are 4432 * valid, in which case the decoded bit pattern is written to result. 4433 */ 4434 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4435 unsigned int imms, unsigned int immr) 4436 { 4437 uint64_t mask; 4438 unsigned e, levels, s, r; 4439 int len; 4440 4441 assert(immn < 2 && imms < 64 && immr < 64); 4442 4443 /* The bit patterns we create here are 64 bit patterns which 4444 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4445 * 64 bits each. Each element contains the same value: a run 4446 * of between 1 and e-1 non-zero bits, rotated within the 4447 * element by between 0 and e-1 bits. 4448 * 4449 * The element size and run length are encoded into immn (1 bit) 4450 * and imms (6 bits) as follows: 4451 * 64 bit elements: immn = 1, imms = <length of run - 1> 4452 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4453 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4454 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4455 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4456 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4457 * Notice that immn = 0, imms = 11111x is the only combination 4458 * not covered by one of the above options; this is reserved. 4459 * Further, <length of run - 1> all-ones is a reserved pattern. 4460 * 4461 * In all cases the rotation is by immr % e (and immr is 6 bits). 4462 */ 4463 4464 /* First determine the element size */ 4465 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4466 if (len < 1) { 4467 /* This is the immn == 0, imms == 0x11111x case */ 4468 return false; 4469 } 4470 e = 1 << len; 4471 4472 levels = e - 1; 4473 s = imms & levels; 4474 r = immr & levels; 4475 4476 if (s == levels) { 4477 /* <length of run - 1> mustn't be all-ones. */ 4478 return false; 4479 } 4480 4481 /* Create the value of one element: s+1 set bits rotated 4482 * by r within the element (which is e bits wide)... 4483 */ 4484 mask = bitmask64(s + 1); 4485 if (r) { 4486 mask = (mask >> r) | (mask << (e - r)); 4487 mask &= bitmask64(e); 4488 } 4489 /* ...then replicate the element over the whole 64 bit value */ 4490 mask = bitfield_replicate(mask, e); 4491 *result = mask; 4492 return true; 4493 } 4494 4495 /* Logical (immediate) 4496 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4497 * +----+-----+-------------+---+------+------+------+------+ 4498 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd | 4499 * +----+-----+-------------+---+------+------+------+------+ 4500 */ 4501 static void disas_logic_imm(DisasContext *s, uint32_t insn) 4502 { 4503 unsigned int sf, opc, is_n, immr, imms, rn, rd; 4504 TCGv_i64 tcg_rd, tcg_rn; 4505 uint64_t wmask; 4506 bool is_and = false; 4507 4508 sf = extract32(insn, 31, 1); 4509 opc = extract32(insn, 29, 2); 4510 is_n = extract32(insn, 22, 1); 4511 immr = extract32(insn, 16, 6); 4512 imms = extract32(insn, 10, 6); 4513 rn = extract32(insn, 5, 5); 4514 rd = extract32(insn, 0, 5); 4515 4516 if (!sf && is_n) { 4517 unallocated_encoding(s); 4518 return; 4519 } 4520 4521 if (opc == 0x3) { /* ANDS */ 4522 tcg_rd = cpu_reg(s, rd); 4523 } else { 4524 tcg_rd = cpu_reg_sp(s, rd); 4525 } 4526 tcg_rn = cpu_reg(s, rn); 4527 4528 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) { 4529 /* some immediate field values are reserved */ 4530 unallocated_encoding(s); 4531 return; 4532 } 4533 4534 if (!sf) { 4535 wmask &= 0xffffffff; 4536 } 4537 4538 switch (opc) { 4539 case 0x3: /* ANDS */ 4540 case 0x0: /* AND */ 4541 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask); 4542 is_and = true; 4543 break; 4544 case 0x1: /* ORR */ 4545 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask); 4546 break; 4547 case 0x2: /* EOR */ 4548 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask); 4549 break; 4550 default: 4551 assert(FALSE); /* must handle all above */ 4552 break; 4553 } 4554 4555 if (!sf && !is_and) { 4556 /* zero extend final result; we know we can skip this for AND 4557 * since the immediate had the high 32 bits clear. 4558 */ 4559 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4560 } 4561 4562 if (opc == 3) { /* ANDS */ 4563 gen_logic_CC(sf, tcg_rd); 4564 } 4565 } 4566 4567 /* 4568 * Move wide (immediate) 4569 * 4570 * 31 30 29 28 23 22 21 20 5 4 0 4571 * +--+-----+-------------+-----+----------------+------+ 4572 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd | 4573 * +--+-----+-------------+-----+----------------+------+ 4574 * 4575 * sf: 0 -> 32 bit, 1 -> 64 bit 4576 * opc: 00 -> N, 10 -> Z, 11 -> K 4577 * hw: shift/16 (0,16, and sf only 32, 48) 4578 */ 4579 static void disas_movw_imm(DisasContext *s, uint32_t insn) 4580 { 4581 int rd = extract32(insn, 0, 5); 4582 uint64_t imm = extract32(insn, 5, 16); 4583 int sf = extract32(insn, 31, 1); 4584 int opc = extract32(insn, 29, 2); 4585 int pos = extract32(insn, 21, 2) << 4; 4586 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4587 4588 if (!sf && (pos >= 32)) { 4589 unallocated_encoding(s); 4590 return; 4591 } 4592 4593 switch (opc) { 4594 case 0: /* MOVN */ 4595 case 2: /* MOVZ */ 4596 imm <<= pos; 4597 if (opc == 0) { 4598 imm = ~imm; 4599 } 4600 if (!sf) { 4601 imm &= 0xffffffffu; 4602 } 4603 tcg_gen_movi_i64(tcg_rd, imm); 4604 break; 4605 case 3: /* MOVK */ 4606 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16); 4607 if (!sf) { 4608 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4609 } 4610 break; 4611 default: 4612 unallocated_encoding(s); 4613 break; 4614 } 4615 } 4616 4617 /* Bitfield 4618 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4619 * +----+-----+-------------+---+------+------+------+------+ 4620 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd | 4621 * +----+-----+-------------+---+------+------+------+------+ 4622 */ 4623 static void disas_bitfield(DisasContext *s, uint32_t insn) 4624 { 4625 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len; 4626 TCGv_i64 tcg_rd, tcg_tmp; 4627 4628 sf = extract32(insn, 31, 1); 4629 opc = extract32(insn, 29, 2); 4630 n = extract32(insn, 22, 1); 4631 ri = extract32(insn, 16, 6); 4632 si = extract32(insn, 10, 6); 4633 rn = extract32(insn, 5, 5); 4634 rd = extract32(insn, 0, 5); 4635 bitsize = sf ? 64 : 32; 4636 4637 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) { 4638 unallocated_encoding(s); 4639 return; 4640 } 4641 4642 tcg_rd = cpu_reg(s, rd); 4643 4644 /* Suppress the zero-extend for !sf. Since RI and SI are constrained 4645 to be smaller than bitsize, we'll never reference data outside the 4646 low 32-bits anyway. */ 4647 tcg_tmp = read_cpu_reg(s, rn, 1); 4648 4649 /* Recognize simple(r) extractions. */ 4650 if (si >= ri) { 4651 /* Wd<s-r:0> = Wn<s:r> */ 4652 len = (si - ri) + 1; 4653 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ 4654 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4655 goto done; 4656 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */ 4657 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4658 return; 4659 } 4660 /* opc == 1, BFXIL fall through to deposit */ 4661 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4662 pos = 0; 4663 } else { 4664 /* Handle the ri > si case with a deposit 4665 * Wd<32+s-r,32-r> = Wn<s:0> 4666 */ 4667 len = si + 1; 4668 pos = (bitsize - ri) & (bitsize - 1); 4669 } 4670 4671 if (opc == 0 && len < ri) { 4672 /* SBFM: sign extend the destination field from len to fill 4673 the balance of the word. Let the deposit below insert all 4674 of those sign bits. */ 4675 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4676 len = ri; 4677 } 4678 4679 if (opc == 1) { /* BFM, BFXIL */ 4680 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4681 } else { 4682 /* SBFM or UBFM: We start with zero, and we haven't modified 4683 any bits outside bitsize, therefore the zero-extension 4684 below is unneeded. */ 4685 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4686 return; 4687 } 4688 4689 done: 4690 if (!sf) { /* zero extend final result */ 4691 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4692 } 4693 } 4694 4695 /* Extract 4696 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0 4697 * +----+------+-------------+---+----+------+--------+------+------+ 4698 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd | 4699 * +----+------+-------------+---+----+------+--------+------+------+ 4700 */ 4701 static void disas_extract(DisasContext *s, uint32_t insn) 4702 { 4703 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0; 4704 4705 sf = extract32(insn, 31, 1); 4706 n = extract32(insn, 22, 1); 4707 rm = extract32(insn, 16, 5); 4708 imm = extract32(insn, 10, 6); 4709 rn = extract32(insn, 5, 5); 4710 rd = extract32(insn, 0, 5); 4711 op21 = extract32(insn, 29, 2); 4712 op0 = extract32(insn, 21, 1); 4713 bitsize = sf ? 64 : 32; 4714 4715 if (sf != n || op21 || op0 || imm >= bitsize) { 4716 unallocated_encoding(s); 4717 } else { 4718 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4719 4720 tcg_rd = cpu_reg(s, rd); 4721 4722 if (unlikely(imm == 0)) { 4723 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4724 * so an extract from bit 0 is a special case. 4725 */ 4726 if (sf) { 4727 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm)); 4728 } else { 4729 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); 4730 } 4731 } else { 4732 tcg_rm = cpu_reg(s, rm); 4733 tcg_rn = cpu_reg(s, rn); 4734 4735 if (sf) { 4736 /* Specialization to ROR happens in EXTRACT2. */ 4737 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm); 4738 } else { 4739 TCGv_i32 t0 = tcg_temp_new_i32(); 4740 4741 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4742 if (rm == rn) { 4743 tcg_gen_rotri_i32(t0, t0, imm); 4744 } else { 4745 TCGv_i32 t1 = tcg_temp_new_i32(); 4746 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4747 tcg_gen_extract2_i32(t0, t0, t1, imm); 4748 tcg_temp_free_i32(t1); 4749 } 4750 tcg_gen_extu_i32_i64(tcg_rd, t0); 4751 tcg_temp_free_i32(t0); 4752 } 4753 } 4754 } 4755 } 4756 4757 /* Data processing - immediate */ 4758 static void disas_data_proc_imm(DisasContext *s, uint32_t insn) 4759 { 4760 switch (extract32(insn, 23, 6)) { 4761 case 0x20: case 0x21: /* PC-rel. addressing */ 4762 disas_pc_rel_adr(s, insn); 4763 break; 4764 case 0x22: /* Add/subtract (immediate) */ 4765 disas_add_sub_imm(s, insn); 4766 break; 4767 case 0x23: /* Add/subtract (immediate, with tags) */ 4768 disas_add_sub_imm_with_tags(s, insn); 4769 break; 4770 case 0x24: /* Logical (immediate) */ 4771 disas_logic_imm(s, insn); 4772 break; 4773 case 0x25: /* Move wide (immediate) */ 4774 disas_movw_imm(s, insn); 4775 break; 4776 case 0x26: /* Bitfield */ 4777 disas_bitfield(s, insn); 4778 break; 4779 case 0x27: /* Extract */ 4780 disas_extract(s, insn); 4781 break; 4782 default: 4783 unallocated_encoding(s); 4784 break; 4785 } 4786 } 4787 4788 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4789 * Note that it is the caller's responsibility to ensure that the 4790 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4791 * mandated semantics for out of range shifts. 4792 */ 4793 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4794 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4795 { 4796 switch (shift_type) { 4797 case A64_SHIFT_TYPE_LSL: 4798 tcg_gen_shl_i64(dst, src, shift_amount); 4799 break; 4800 case A64_SHIFT_TYPE_LSR: 4801 tcg_gen_shr_i64(dst, src, shift_amount); 4802 break; 4803 case A64_SHIFT_TYPE_ASR: 4804 if (!sf) { 4805 tcg_gen_ext32s_i64(dst, src); 4806 } 4807 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4808 break; 4809 case A64_SHIFT_TYPE_ROR: 4810 if (sf) { 4811 tcg_gen_rotr_i64(dst, src, shift_amount); 4812 } else { 4813 TCGv_i32 t0, t1; 4814 t0 = tcg_temp_new_i32(); 4815 t1 = tcg_temp_new_i32(); 4816 tcg_gen_extrl_i64_i32(t0, src); 4817 tcg_gen_extrl_i64_i32(t1, shift_amount); 4818 tcg_gen_rotr_i32(t0, t0, t1); 4819 tcg_gen_extu_i32_i64(dst, t0); 4820 tcg_temp_free_i32(t0); 4821 tcg_temp_free_i32(t1); 4822 } 4823 break; 4824 default: 4825 assert(FALSE); /* all shift types should be handled */ 4826 break; 4827 } 4828 4829 if (!sf) { /* zero extend final result */ 4830 tcg_gen_ext32u_i64(dst, dst); 4831 } 4832 } 4833 4834 /* Shift a TCGv src by immediate, put result in dst. 4835 * The shift amount must be in range (this should always be true as the 4836 * relevant instructions will UNDEF on bad shift immediates). 4837 */ 4838 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4839 enum a64_shift_type shift_type, unsigned int shift_i) 4840 { 4841 assert(shift_i < (sf ? 64 : 32)); 4842 4843 if (shift_i == 0) { 4844 tcg_gen_mov_i64(dst, src); 4845 } else { 4846 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4847 } 4848 } 4849 4850 /* Logical (shifted register) 4851 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4852 * +----+-----+-----------+-------+---+------+--------+------+------+ 4853 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4854 * +----+-----+-----------+-------+---+------+--------+------+------+ 4855 */ 4856 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4857 { 4858 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4859 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4860 4861 sf = extract32(insn, 31, 1); 4862 opc = extract32(insn, 29, 2); 4863 shift_type = extract32(insn, 22, 2); 4864 invert = extract32(insn, 21, 1); 4865 rm = extract32(insn, 16, 5); 4866 shift_amount = extract32(insn, 10, 6); 4867 rn = extract32(insn, 5, 5); 4868 rd = extract32(insn, 0, 5); 4869 4870 if (!sf && (shift_amount & (1 << 5))) { 4871 unallocated_encoding(s); 4872 return; 4873 } 4874 4875 tcg_rd = cpu_reg(s, rd); 4876 4877 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4878 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4879 * register-register MOV and MVN, so it is worth special casing. 4880 */ 4881 tcg_rm = cpu_reg(s, rm); 4882 if (invert) { 4883 tcg_gen_not_i64(tcg_rd, tcg_rm); 4884 if (!sf) { 4885 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4886 } 4887 } else { 4888 if (sf) { 4889 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4890 } else { 4891 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4892 } 4893 } 4894 return; 4895 } 4896 4897 tcg_rm = read_cpu_reg(s, rm, sf); 4898 4899 if (shift_amount) { 4900 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4901 } 4902 4903 tcg_rn = cpu_reg(s, rn); 4904 4905 switch (opc | (invert << 2)) { 4906 case 0: /* AND */ 4907 case 3: /* ANDS */ 4908 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4909 break; 4910 case 1: /* ORR */ 4911 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4912 break; 4913 case 2: /* EOR */ 4914 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4915 break; 4916 case 4: /* BIC */ 4917 case 7: /* BICS */ 4918 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4919 break; 4920 case 5: /* ORN */ 4921 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4922 break; 4923 case 6: /* EON */ 4924 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4925 break; 4926 default: 4927 assert(FALSE); 4928 break; 4929 } 4930 4931 if (!sf) { 4932 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4933 } 4934 4935 if (opc == 3) { 4936 gen_logic_CC(sf, tcg_rd); 4937 } 4938 } 4939 4940 /* 4941 * Add/subtract (extended register) 4942 * 4943 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4944 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4945 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4946 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4947 * 4948 * sf: 0 -> 32bit, 1 -> 64bit 4949 * op: 0 -> add , 1 -> sub 4950 * S: 1 -> set flags 4951 * opt: 00 4952 * option: extension type (see DecodeRegExtend) 4953 * imm3: optional shift to Rm 4954 * 4955 * Rd = Rn + LSL(extend(Rm), amount) 4956 */ 4957 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4958 { 4959 int rd = extract32(insn, 0, 5); 4960 int rn = extract32(insn, 5, 5); 4961 int imm3 = extract32(insn, 10, 3); 4962 int option = extract32(insn, 13, 3); 4963 int rm = extract32(insn, 16, 5); 4964 int opt = extract32(insn, 22, 2); 4965 bool setflags = extract32(insn, 29, 1); 4966 bool sub_op = extract32(insn, 30, 1); 4967 bool sf = extract32(insn, 31, 1); 4968 4969 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4970 TCGv_i64 tcg_rd; 4971 TCGv_i64 tcg_result; 4972 4973 if (imm3 > 4 || opt != 0) { 4974 unallocated_encoding(s); 4975 return; 4976 } 4977 4978 /* non-flag setting ops may use SP */ 4979 if (!setflags) { 4980 tcg_rd = cpu_reg_sp(s, rd); 4981 } else { 4982 tcg_rd = cpu_reg(s, rd); 4983 } 4984 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4985 4986 tcg_rm = read_cpu_reg(s, rm, sf); 4987 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4988 4989 tcg_result = tcg_temp_new_i64(); 4990 4991 if (!setflags) { 4992 if (sub_op) { 4993 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4994 } else { 4995 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4996 } 4997 } else { 4998 if (sub_op) { 4999 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 5000 } else { 5001 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 5002 } 5003 } 5004 5005 if (sf) { 5006 tcg_gen_mov_i64(tcg_rd, tcg_result); 5007 } else { 5008 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 5009 } 5010 5011 tcg_temp_free_i64(tcg_result); 5012 } 5013 5014 /* 5015 * Add/subtract (shifted register) 5016 * 5017 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 5018 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 5019 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 5020 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 5021 * 5022 * sf: 0 -> 32bit, 1 -> 64bit 5023 * op: 0 -> add , 1 -> sub 5024 * S: 1 -> set flags 5025 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 5026 * imm6: Shift amount to apply to Rm before the add/sub 5027 */ 5028 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 5029 { 5030 int rd = extract32(insn, 0, 5); 5031 int rn = extract32(insn, 5, 5); 5032 int imm6 = extract32(insn, 10, 6); 5033 int rm = extract32(insn, 16, 5); 5034 int shift_type = extract32(insn, 22, 2); 5035 bool setflags = extract32(insn, 29, 1); 5036 bool sub_op = extract32(insn, 30, 1); 5037 bool sf = extract32(insn, 31, 1); 5038 5039 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5040 TCGv_i64 tcg_rn, tcg_rm; 5041 TCGv_i64 tcg_result; 5042 5043 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 5044 unallocated_encoding(s); 5045 return; 5046 } 5047 5048 tcg_rn = read_cpu_reg(s, rn, sf); 5049 tcg_rm = read_cpu_reg(s, rm, sf); 5050 5051 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 5052 5053 tcg_result = tcg_temp_new_i64(); 5054 5055 if (!setflags) { 5056 if (sub_op) { 5057 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 5058 } else { 5059 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 5060 } 5061 } else { 5062 if (sub_op) { 5063 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 5064 } else { 5065 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 5066 } 5067 } 5068 5069 if (sf) { 5070 tcg_gen_mov_i64(tcg_rd, tcg_result); 5071 } else { 5072 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 5073 } 5074 5075 tcg_temp_free_i64(tcg_result); 5076 } 5077 5078 /* Data-processing (3 source) 5079 * 5080 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 5081 * +--+------+-----------+------+------+----+------+------+------+ 5082 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 5083 * +--+------+-----------+------+------+----+------+------+------+ 5084 */ 5085 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 5086 { 5087 int rd = extract32(insn, 0, 5); 5088 int rn = extract32(insn, 5, 5); 5089 int ra = extract32(insn, 10, 5); 5090 int rm = extract32(insn, 16, 5); 5091 int op_id = (extract32(insn, 29, 3) << 4) | 5092 (extract32(insn, 21, 3) << 1) | 5093 extract32(insn, 15, 1); 5094 bool sf = extract32(insn, 31, 1); 5095 bool is_sub = extract32(op_id, 0, 1); 5096 bool is_high = extract32(op_id, 2, 1); 5097 bool is_signed = false; 5098 TCGv_i64 tcg_op1; 5099 TCGv_i64 tcg_op2; 5100 TCGv_i64 tcg_tmp; 5101 5102 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 5103 switch (op_id) { 5104 case 0x42: /* SMADDL */ 5105 case 0x43: /* SMSUBL */ 5106 case 0x44: /* SMULH */ 5107 is_signed = true; 5108 break; 5109 case 0x0: /* MADD (32bit) */ 5110 case 0x1: /* MSUB (32bit) */ 5111 case 0x40: /* MADD (64bit) */ 5112 case 0x41: /* MSUB (64bit) */ 5113 case 0x4a: /* UMADDL */ 5114 case 0x4b: /* UMSUBL */ 5115 case 0x4c: /* UMULH */ 5116 break; 5117 default: 5118 unallocated_encoding(s); 5119 return; 5120 } 5121 5122 if (is_high) { 5123 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 5124 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5125 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5126 TCGv_i64 tcg_rm = cpu_reg(s, rm); 5127 5128 if (is_signed) { 5129 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5130 } else { 5131 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5132 } 5133 5134 tcg_temp_free_i64(low_bits); 5135 return; 5136 } 5137 5138 tcg_op1 = tcg_temp_new_i64(); 5139 tcg_op2 = tcg_temp_new_i64(); 5140 tcg_tmp = tcg_temp_new_i64(); 5141 5142 if (op_id < 0x42) { 5143 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 5144 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 5145 } else { 5146 if (is_signed) { 5147 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 5148 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 5149 } else { 5150 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 5151 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 5152 } 5153 } 5154 5155 if (ra == 31 && !is_sub) { 5156 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 5157 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 5158 } else { 5159 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 5160 if (is_sub) { 5161 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5162 } else { 5163 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5164 } 5165 } 5166 5167 if (!sf) { 5168 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 5169 } 5170 5171 tcg_temp_free_i64(tcg_op1); 5172 tcg_temp_free_i64(tcg_op2); 5173 tcg_temp_free_i64(tcg_tmp); 5174 } 5175 5176 /* Add/subtract (with carry) 5177 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 5178 * +--+--+--+------------------------+------+-------------+------+-----+ 5179 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 5180 * +--+--+--+------------------------+------+-------------+------+-----+ 5181 */ 5182 5183 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 5184 { 5185 unsigned int sf, op, setflags, rm, rn, rd; 5186 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 5187 5188 sf = extract32(insn, 31, 1); 5189 op = extract32(insn, 30, 1); 5190 setflags = extract32(insn, 29, 1); 5191 rm = extract32(insn, 16, 5); 5192 rn = extract32(insn, 5, 5); 5193 rd = extract32(insn, 0, 5); 5194 5195 tcg_rd = cpu_reg(s, rd); 5196 tcg_rn = cpu_reg(s, rn); 5197 5198 if (op) { 5199 tcg_y = new_tmp_a64(s); 5200 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 5201 } else { 5202 tcg_y = cpu_reg(s, rm); 5203 } 5204 5205 if (setflags) { 5206 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 5207 } else { 5208 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 5209 } 5210 } 5211 5212 /* 5213 * Rotate right into flags 5214 * 31 30 29 21 15 10 5 4 0 5215 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5216 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 5217 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5218 */ 5219 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 5220 { 5221 int mask = extract32(insn, 0, 4); 5222 int o2 = extract32(insn, 4, 1); 5223 int rn = extract32(insn, 5, 5); 5224 int imm6 = extract32(insn, 15, 6); 5225 int sf_op_s = extract32(insn, 29, 3); 5226 TCGv_i64 tcg_rn; 5227 TCGv_i32 nzcv; 5228 5229 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 5230 unallocated_encoding(s); 5231 return; 5232 } 5233 5234 tcg_rn = read_cpu_reg(s, rn, 1); 5235 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 5236 5237 nzcv = tcg_temp_new_i32(); 5238 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 5239 5240 if (mask & 8) { /* N */ 5241 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 5242 } 5243 if (mask & 4) { /* Z */ 5244 tcg_gen_not_i32(cpu_ZF, nzcv); 5245 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 5246 } 5247 if (mask & 2) { /* C */ 5248 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 5249 } 5250 if (mask & 1) { /* V */ 5251 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 5252 } 5253 5254 tcg_temp_free_i32(nzcv); 5255 } 5256 5257 /* 5258 * Evaluate into flags 5259 * 31 30 29 21 15 14 10 5 4 0 5260 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5261 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 5262 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5263 */ 5264 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 5265 { 5266 int o3_mask = extract32(insn, 0, 5); 5267 int rn = extract32(insn, 5, 5); 5268 int o2 = extract32(insn, 15, 6); 5269 int sz = extract32(insn, 14, 1); 5270 int sf_op_s = extract32(insn, 29, 3); 5271 TCGv_i32 tmp; 5272 int shift; 5273 5274 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 5275 !dc_isar_feature(aa64_condm_4, s)) { 5276 unallocated_encoding(s); 5277 return; 5278 } 5279 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 5280 5281 tmp = tcg_temp_new_i32(); 5282 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 5283 tcg_gen_shli_i32(cpu_NF, tmp, shift); 5284 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 5285 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 5286 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 5287 tcg_temp_free_i32(tmp); 5288 } 5289 5290 /* Conditional compare (immediate / register) 5291 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5292 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5293 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 5294 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5295 * [1] y [0] [0] 5296 */ 5297 static void disas_cc(DisasContext *s, uint32_t insn) 5298 { 5299 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 5300 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 5301 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 5302 DisasCompare c; 5303 5304 if (!extract32(insn, 29, 1)) { 5305 unallocated_encoding(s); 5306 return; 5307 } 5308 if (insn & (1 << 10 | 1 << 4)) { 5309 unallocated_encoding(s); 5310 return; 5311 } 5312 sf = extract32(insn, 31, 1); 5313 op = extract32(insn, 30, 1); 5314 is_imm = extract32(insn, 11, 1); 5315 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 5316 cond = extract32(insn, 12, 4); 5317 rn = extract32(insn, 5, 5); 5318 nzcv = extract32(insn, 0, 4); 5319 5320 /* Set T0 = !COND. */ 5321 tcg_t0 = tcg_temp_new_i32(); 5322 arm_test_cc(&c, cond); 5323 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 5324 arm_free_cc(&c); 5325 5326 /* Load the arguments for the new comparison. */ 5327 if (is_imm) { 5328 tcg_y = new_tmp_a64(s); 5329 tcg_gen_movi_i64(tcg_y, y); 5330 } else { 5331 tcg_y = cpu_reg(s, y); 5332 } 5333 tcg_rn = cpu_reg(s, rn); 5334 5335 /* Set the flags for the new comparison. */ 5336 tcg_tmp = tcg_temp_new_i64(); 5337 if (op) { 5338 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5339 } else { 5340 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5341 } 5342 tcg_temp_free_i64(tcg_tmp); 5343 5344 /* If COND was false, force the flags to #nzcv. Compute two masks 5345 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 5346 * For tcg hosts that support ANDC, we can make do with just T1. 5347 * In either case, allow the tcg optimizer to delete any unused mask. 5348 */ 5349 tcg_t1 = tcg_temp_new_i32(); 5350 tcg_t2 = tcg_temp_new_i32(); 5351 tcg_gen_neg_i32(tcg_t1, tcg_t0); 5352 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 5353 5354 if (nzcv & 8) { /* N */ 5355 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 5356 } else { 5357 if (TCG_TARGET_HAS_andc_i32) { 5358 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 5359 } else { 5360 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 5361 } 5362 } 5363 if (nzcv & 4) { /* Z */ 5364 if (TCG_TARGET_HAS_andc_i32) { 5365 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 5366 } else { 5367 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 5368 } 5369 } else { 5370 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 5371 } 5372 if (nzcv & 2) { /* C */ 5373 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 5374 } else { 5375 if (TCG_TARGET_HAS_andc_i32) { 5376 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 5377 } else { 5378 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 5379 } 5380 } 5381 if (nzcv & 1) { /* V */ 5382 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 5383 } else { 5384 if (TCG_TARGET_HAS_andc_i32) { 5385 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 5386 } else { 5387 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 5388 } 5389 } 5390 tcg_temp_free_i32(tcg_t0); 5391 tcg_temp_free_i32(tcg_t1); 5392 tcg_temp_free_i32(tcg_t2); 5393 } 5394 5395 /* Conditional select 5396 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 5397 * +----+----+---+-----------------+------+------+-----+------+------+ 5398 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 5399 * +----+----+---+-----------------+------+------+-----+------+------+ 5400 */ 5401 static void disas_cond_select(DisasContext *s, uint32_t insn) 5402 { 5403 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 5404 TCGv_i64 tcg_rd, zero; 5405 DisasCompare64 c; 5406 5407 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 5408 /* S == 1 or op2<1> == 1 */ 5409 unallocated_encoding(s); 5410 return; 5411 } 5412 sf = extract32(insn, 31, 1); 5413 else_inv = extract32(insn, 30, 1); 5414 rm = extract32(insn, 16, 5); 5415 cond = extract32(insn, 12, 4); 5416 else_inc = extract32(insn, 10, 1); 5417 rn = extract32(insn, 5, 5); 5418 rd = extract32(insn, 0, 5); 5419 5420 tcg_rd = cpu_reg(s, rd); 5421 5422 a64_test_cc(&c, cond); 5423 zero = tcg_constant_i64(0); 5424 5425 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 5426 /* CSET & CSETM. */ 5427 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero); 5428 if (else_inv) { 5429 tcg_gen_neg_i64(tcg_rd, tcg_rd); 5430 } 5431 } else { 5432 TCGv_i64 t_true = cpu_reg(s, rn); 5433 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 5434 if (else_inv && else_inc) { 5435 tcg_gen_neg_i64(t_false, t_false); 5436 } else if (else_inv) { 5437 tcg_gen_not_i64(t_false, t_false); 5438 } else if (else_inc) { 5439 tcg_gen_addi_i64(t_false, t_false, 1); 5440 } 5441 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 5442 } 5443 5444 a64_free_cc(&c); 5445 5446 if (!sf) { 5447 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5448 } 5449 } 5450 5451 static void handle_clz(DisasContext *s, unsigned int sf, 5452 unsigned int rn, unsigned int rd) 5453 { 5454 TCGv_i64 tcg_rd, tcg_rn; 5455 tcg_rd = cpu_reg(s, rd); 5456 tcg_rn = cpu_reg(s, rn); 5457 5458 if (sf) { 5459 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 5460 } else { 5461 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5462 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5463 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 5464 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5465 tcg_temp_free_i32(tcg_tmp32); 5466 } 5467 } 5468 5469 static void handle_cls(DisasContext *s, unsigned int sf, 5470 unsigned int rn, unsigned int rd) 5471 { 5472 TCGv_i64 tcg_rd, tcg_rn; 5473 tcg_rd = cpu_reg(s, rd); 5474 tcg_rn = cpu_reg(s, rn); 5475 5476 if (sf) { 5477 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 5478 } else { 5479 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5480 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5481 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 5482 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5483 tcg_temp_free_i32(tcg_tmp32); 5484 } 5485 } 5486 5487 static void handle_rbit(DisasContext *s, unsigned int sf, 5488 unsigned int rn, unsigned int rd) 5489 { 5490 TCGv_i64 tcg_rd, tcg_rn; 5491 tcg_rd = cpu_reg(s, rd); 5492 tcg_rn = cpu_reg(s, rn); 5493 5494 if (sf) { 5495 gen_helper_rbit64(tcg_rd, tcg_rn); 5496 } else { 5497 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5498 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5499 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5500 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5501 tcg_temp_free_i32(tcg_tmp32); 5502 } 5503 } 5504 5505 /* REV with sf==1, opcode==3 ("REV64") */ 5506 static void handle_rev64(DisasContext *s, unsigned int sf, 5507 unsigned int rn, unsigned int rd) 5508 { 5509 if (!sf) { 5510 unallocated_encoding(s); 5511 return; 5512 } 5513 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5514 } 5515 5516 /* REV with sf==0, opcode==2 5517 * REV32 (sf==1, opcode==2) 5518 */ 5519 static void handle_rev32(DisasContext *s, unsigned int sf, 5520 unsigned int rn, unsigned int rd) 5521 { 5522 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5523 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5524 5525 if (sf) { 5526 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5527 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5528 } else { 5529 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5530 } 5531 } 5532 5533 /* REV16 (opcode==1) */ 5534 static void handle_rev16(DisasContext *s, unsigned int sf, 5535 unsigned int rn, unsigned int rd) 5536 { 5537 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5538 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5539 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5540 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5541 5542 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5543 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5544 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5545 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5546 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5547 5548 tcg_temp_free_i64(tcg_tmp); 5549 } 5550 5551 /* Data-processing (1 source) 5552 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5553 * +----+---+---+-----------------+---------+--------+------+------+ 5554 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5555 * +----+---+---+-----------------+---------+--------+------+------+ 5556 */ 5557 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5558 { 5559 unsigned int sf, opcode, opcode2, rn, rd; 5560 TCGv_i64 tcg_rd; 5561 5562 if (extract32(insn, 29, 1)) { 5563 unallocated_encoding(s); 5564 return; 5565 } 5566 5567 sf = extract32(insn, 31, 1); 5568 opcode = extract32(insn, 10, 6); 5569 opcode2 = extract32(insn, 16, 5); 5570 rn = extract32(insn, 5, 5); 5571 rd = extract32(insn, 0, 5); 5572 5573 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5574 5575 switch (MAP(sf, opcode2, opcode)) { 5576 case MAP(0, 0x00, 0x00): /* RBIT */ 5577 case MAP(1, 0x00, 0x00): 5578 handle_rbit(s, sf, rn, rd); 5579 break; 5580 case MAP(0, 0x00, 0x01): /* REV16 */ 5581 case MAP(1, 0x00, 0x01): 5582 handle_rev16(s, sf, rn, rd); 5583 break; 5584 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5585 case MAP(1, 0x00, 0x02): 5586 handle_rev32(s, sf, rn, rd); 5587 break; 5588 case MAP(1, 0x00, 0x03): /* REV64 */ 5589 handle_rev64(s, sf, rn, rd); 5590 break; 5591 case MAP(0, 0x00, 0x04): /* CLZ */ 5592 case MAP(1, 0x00, 0x04): 5593 handle_clz(s, sf, rn, rd); 5594 break; 5595 case MAP(0, 0x00, 0x05): /* CLS */ 5596 case MAP(1, 0x00, 0x05): 5597 handle_cls(s, sf, rn, rd); 5598 break; 5599 case MAP(1, 0x01, 0x00): /* PACIA */ 5600 if (s->pauth_active) { 5601 tcg_rd = cpu_reg(s, rd); 5602 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5603 } else if (!dc_isar_feature(aa64_pauth, s)) { 5604 goto do_unallocated; 5605 } 5606 break; 5607 case MAP(1, 0x01, 0x01): /* PACIB */ 5608 if (s->pauth_active) { 5609 tcg_rd = cpu_reg(s, rd); 5610 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5611 } else if (!dc_isar_feature(aa64_pauth, s)) { 5612 goto do_unallocated; 5613 } 5614 break; 5615 case MAP(1, 0x01, 0x02): /* PACDA */ 5616 if (s->pauth_active) { 5617 tcg_rd = cpu_reg(s, rd); 5618 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5619 } else if (!dc_isar_feature(aa64_pauth, s)) { 5620 goto do_unallocated; 5621 } 5622 break; 5623 case MAP(1, 0x01, 0x03): /* PACDB */ 5624 if (s->pauth_active) { 5625 tcg_rd = cpu_reg(s, rd); 5626 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5627 } else if (!dc_isar_feature(aa64_pauth, s)) { 5628 goto do_unallocated; 5629 } 5630 break; 5631 case MAP(1, 0x01, 0x04): /* AUTIA */ 5632 if (s->pauth_active) { 5633 tcg_rd = cpu_reg(s, rd); 5634 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5635 } else if (!dc_isar_feature(aa64_pauth, s)) { 5636 goto do_unallocated; 5637 } 5638 break; 5639 case MAP(1, 0x01, 0x05): /* AUTIB */ 5640 if (s->pauth_active) { 5641 tcg_rd = cpu_reg(s, rd); 5642 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5643 } else if (!dc_isar_feature(aa64_pauth, s)) { 5644 goto do_unallocated; 5645 } 5646 break; 5647 case MAP(1, 0x01, 0x06): /* AUTDA */ 5648 if (s->pauth_active) { 5649 tcg_rd = cpu_reg(s, rd); 5650 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5651 } else if (!dc_isar_feature(aa64_pauth, s)) { 5652 goto do_unallocated; 5653 } 5654 break; 5655 case MAP(1, 0x01, 0x07): /* AUTDB */ 5656 if (s->pauth_active) { 5657 tcg_rd = cpu_reg(s, rd); 5658 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5659 } else if (!dc_isar_feature(aa64_pauth, s)) { 5660 goto do_unallocated; 5661 } 5662 break; 5663 case MAP(1, 0x01, 0x08): /* PACIZA */ 5664 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5665 goto do_unallocated; 5666 } else if (s->pauth_active) { 5667 tcg_rd = cpu_reg(s, rd); 5668 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5669 } 5670 break; 5671 case MAP(1, 0x01, 0x09): /* PACIZB */ 5672 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5673 goto do_unallocated; 5674 } else if (s->pauth_active) { 5675 tcg_rd = cpu_reg(s, rd); 5676 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5677 } 5678 break; 5679 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5680 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5681 goto do_unallocated; 5682 } else if (s->pauth_active) { 5683 tcg_rd = cpu_reg(s, rd); 5684 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5685 } 5686 break; 5687 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5688 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5689 goto do_unallocated; 5690 } else if (s->pauth_active) { 5691 tcg_rd = cpu_reg(s, rd); 5692 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5693 } 5694 break; 5695 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5696 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5697 goto do_unallocated; 5698 } else if (s->pauth_active) { 5699 tcg_rd = cpu_reg(s, rd); 5700 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5701 } 5702 break; 5703 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5704 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5705 goto do_unallocated; 5706 } else if (s->pauth_active) { 5707 tcg_rd = cpu_reg(s, rd); 5708 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5709 } 5710 break; 5711 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5712 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5713 goto do_unallocated; 5714 } else if (s->pauth_active) { 5715 tcg_rd = cpu_reg(s, rd); 5716 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5717 } 5718 break; 5719 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5720 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5721 goto do_unallocated; 5722 } else if (s->pauth_active) { 5723 tcg_rd = cpu_reg(s, rd); 5724 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5725 } 5726 break; 5727 case MAP(1, 0x01, 0x10): /* XPACI */ 5728 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5729 goto do_unallocated; 5730 } else if (s->pauth_active) { 5731 tcg_rd = cpu_reg(s, rd); 5732 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd); 5733 } 5734 break; 5735 case MAP(1, 0x01, 0x11): /* XPACD */ 5736 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5737 goto do_unallocated; 5738 } else if (s->pauth_active) { 5739 tcg_rd = cpu_reg(s, rd); 5740 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd); 5741 } 5742 break; 5743 default: 5744 do_unallocated: 5745 unallocated_encoding(s); 5746 break; 5747 } 5748 5749 #undef MAP 5750 } 5751 5752 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5753 unsigned int rm, unsigned int rn, unsigned int rd) 5754 { 5755 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5756 tcg_rd = cpu_reg(s, rd); 5757 5758 if (!sf && is_signed) { 5759 tcg_n = new_tmp_a64(s); 5760 tcg_m = new_tmp_a64(s); 5761 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5762 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5763 } else { 5764 tcg_n = read_cpu_reg(s, rn, sf); 5765 tcg_m = read_cpu_reg(s, rm, sf); 5766 } 5767 5768 if (is_signed) { 5769 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5770 } else { 5771 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5772 } 5773 5774 if (!sf) { /* zero extend final result */ 5775 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5776 } 5777 } 5778 5779 /* LSLV, LSRV, ASRV, RORV */ 5780 static void handle_shift_reg(DisasContext *s, 5781 enum a64_shift_type shift_type, unsigned int sf, 5782 unsigned int rm, unsigned int rn, unsigned int rd) 5783 { 5784 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5785 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5786 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5787 5788 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5789 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5790 tcg_temp_free_i64(tcg_shift); 5791 } 5792 5793 /* CRC32[BHWX], CRC32C[BHWX] */ 5794 static void handle_crc32(DisasContext *s, 5795 unsigned int sf, unsigned int sz, bool crc32c, 5796 unsigned int rm, unsigned int rn, unsigned int rd) 5797 { 5798 TCGv_i64 tcg_acc, tcg_val; 5799 TCGv_i32 tcg_bytes; 5800 5801 if (!dc_isar_feature(aa64_crc32, s) 5802 || (sf == 1 && sz != 3) 5803 || (sf == 0 && sz == 3)) { 5804 unallocated_encoding(s); 5805 return; 5806 } 5807 5808 if (sz == 3) { 5809 tcg_val = cpu_reg(s, rm); 5810 } else { 5811 uint64_t mask; 5812 switch (sz) { 5813 case 0: 5814 mask = 0xFF; 5815 break; 5816 case 1: 5817 mask = 0xFFFF; 5818 break; 5819 case 2: 5820 mask = 0xFFFFFFFF; 5821 break; 5822 default: 5823 g_assert_not_reached(); 5824 } 5825 tcg_val = new_tmp_a64(s); 5826 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5827 } 5828 5829 tcg_acc = cpu_reg(s, rn); 5830 tcg_bytes = tcg_constant_i32(1 << sz); 5831 5832 if (crc32c) { 5833 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5834 } else { 5835 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5836 } 5837 } 5838 5839 /* Data-processing (2 source) 5840 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5841 * +----+---+---+-----------------+------+--------+------+------+ 5842 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5843 * +----+---+---+-----------------+------+--------+------+------+ 5844 */ 5845 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5846 { 5847 unsigned int sf, rm, opcode, rn, rd, setflag; 5848 sf = extract32(insn, 31, 1); 5849 setflag = extract32(insn, 29, 1); 5850 rm = extract32(insn, 16, 5); 5851 opcode = extract32(insn, 10, 6); 5852 rn = extract32(insn, 5, 5); 5853 rd = extract32(insn, 0, 5); 5854 5855 if (setflag && opcode != 0) { 5856 unallocated_encoding(s); 5857 return; 5858 } 5859 5860 switch (opcode) { 5861 case 0: /* SUBP(S) */ 5862 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5863 goto do_unallocated; 5864 } else { 5865 TCGv_i64 tcg_n, tcg_m, tcg_d; 5866 5867 tcg_n = read_cpu_reg_sp(s, rn, true); 5868 tcg_m = read_cpu_reg_sp(s, rm, true); 5869 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5870 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5871 tcg_d = cpu_reg(s, rd); 5872 5873 if (setflag) { 5874 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5875 } else { 5876 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5877 } 5878 } 5879 break; 5880 case 2: /* UDIV */ 5881 handle_div(s, false, sf, rm, rn, rd); 5882 break; 5883 case 3: /* SDIV */ 5884 handle_div(s, true, sf, rm, rn, rd); 5885 break; 5886 case 4: /* IRG */ 5887 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5888 goto do_unallocated; 5889 } 5890 if (s->ata) { 5891 gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, 5892 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5893 } else { 5894 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5895 cpu_reg_sp(s, rn)); 5896 } 5897 break; 5898 case 5: /* GMI */ 5899 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5900 goto do_unallocated; 5901 } else { 5902 TCGv_i64 t = tcg_temp_new_i64(); 5903 5904 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5905 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5906 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5907 5908 tcg_temp_free_i64(t); 5909 } 5910 break; 5911 case 8: /* LSLV */ 5912 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5913 break; 5914 case 9: /* LSRV */ 5915 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5916 break; 5917 case 10: /* ASRV */ 5918 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5919 break; 5920 case 11: /* RORV */ 5921 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5922 break; 5923 case 12: /* PACGA */ 5924 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5925 goto do_unallocated; 5926 } 5927 gen_helper_pacga(cpu_reg(s, rd), cpu_env, 5928 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5929 break; 5930 case 16: 5931 case 17: 5932 case 18: 5933 case 19: 5934 case 20: 5935 case 21: 5936 case 22: 5937 case 23: /* CRC32 */ 5938 { 5939 int sz = extract32(opcode, 0, 2); 5940 bool crc32c = extract32(opcode, 2, 1); 5941 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5942 break; 5943 } 5944 default: 5945 do_unallocated: 5946 unallocated_encoding(s); 5947 break; 5948 } 5949 } 5950 5951 /* 5952 * Data processing - register 5953 * 31 30 29 28 25 21 20 16 10 0 5954 * +--+---+--+---+-------+-----+-------+-------+---------+ 5955 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5956 * +--+---+--+---+-------+-----+-------+-------+---------+ 5957 */ 5958 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5959 { 5960 int op0 = extract32(insn, 30, 1); 5961 int op1 = extract32(insn, 28, 1); 5962 int op2 = extract32(insn, 21, 4); 5963 int op3 = extract32(insn, 10, 6); 5964 5965 if (!op1) { 5966 if (op2 & 8) { 5967 if (op2 & 1) { 5968 /* Add/sub (extended register) */ 5969 disas_add_sub_ext_reg(s, insn); 5970 } else { 5971 /* Add/sub (shifted register) */ 5972 disas_add_sub_reg(s, insn); 5973 } 5974 } else { 5975 /* Logical (shifted register) */ 5976 disas_logic_reg(s, insn); 5977 } 5978 return; 5979 } 5980 5981 switch (op2) { 5982 case 0x0: 5983 switch (op3) { 5984 case 0x00: /* Add/subtract (with carry) */ 5985 disas_adc_sbc(s, insn); 5986 break; 5987 5988 case 0x01: /* Rotate right into flags */ 5989 case 0x21: 5990 disas_rotate_right_into_flags(s, insn); 5991 break; 5992 5993 case 0x02: /* Evaluate into flags */ 5994 case 0x12: 5995 case 0x22: 5996 case 0x32: 5997 disas_evaluate_into_flags(s, insn); 5998 break; 5999 6000 default: 6001 goto do_unallocated; 6002 } 6003 break; 6004 6005 case 0x2: /* Conditional compare */ 6006 disas_cc(s, insn); /* both imm and reg forms */ 6007 break; 6008 6009 case 0x4: /* Conditional select */ 6010 disas_cond_select(s, insn); 6011 break; 6012 6013 case 0x6: /* Data-processing */ 6014 if (op0) { /* (1 source) */ 6015 disas_data_proc_1src(s, insn); 6016 } else { /* (2 source) */ 6017 disas_data_proc_2src(s, insn); 6018 } 6019 break; 6020 case 0x8 ... 0xf: /* (3 source) */ 6021 disas_data_proc_3src(s, insn); 6022 break; 6023 6024 default: 6025 do_unallocated: 6026 unallocated_encoding(s); 6027 break; 6028 } 6029 } 6030 6031 static void handle_fp_compare(DisasContext *s, int size, 6032 unsigned int rn, unsigned int rm, 6033 bool cmp_with_zero, bool signal_all_nans) 6034 { 6035 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 6036 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 6037 6038 if (size == MO_64) { 6039 TCGv_i64 tcg_vn, tcg_vm; 6040 6041 tcg_vn = read_fp_dreg(s, rn); 6042 if (cmp_with_zero) { 6043 tcg_vm = tcg_constant_i64(0); 6044 } else { 6045 tcg_vm = read_fp_dreg(s, rm); 6046 } 6047 if (signal_all_nans) { 6048 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6049 } else { 6050 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6051 } 6052 tcg_temp_free_i64(tcg_vn); 6053 tcg_temp_free_i64(tcg_vm); 6054 } else { 6055 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 6056 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 6057 6058 read_vec_element_i32(s, tcg_vn, rn, 0, size); 6059 if (cmp_with_zero) { 6060 tcg_gen_movi_i32(tcg_vm, 0); 6061 } else { 6062 read_vec_element_i32(s, tcg_vm, rm, 0, size); 6063 } 6064 6065 switch (size) { 6066 case MO_32: 6067 if (signal_all_nans) { 6068 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6069 } else { 6070 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6071 } 6072 break; 6073 case MO_16: 6074 if (signal_all_nans) { 6075 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6076 } else { 6077 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6078 } 6079 break; 6080 default: 6081 g_assert_not_reached(); 6082 } 6083 6084 tcg_temp_free_i32(tcg_vn); 6085 tcg_temp_free_i32(tcg_vm); 6086 } 6087 6088 tcg_temp_free_ptr(fpst); 6089 6090 gen_set_nzcv(tcg_flags); 6091 6092 tcg_temp_free_i64(tcg_flags); 6093 } 6094 6095 /* Floating point compare 6096 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 6097 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 6098 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 6099 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 6100 */ 6101 static void disas_fp_compare(DisasContext *s, uint32_t insn) 6102 { 6103 unsigned int mos, type, rm, op, rn, opc, op2r; 6104 int size; 6105 6106 mos = extract32(insn, 29, 3); 6107 type = extract32(insn, 22, 2); 6108 rm = extract32(insn, 16, 5); 6109 op = extract32(insn, 14, 2); 6110 rn = extract32(insn, 5, 5); 6111 opc = extract32(insn, 3, 2); 6112 op2r = extract32(insn, 0, 3); 6113 6114 if (mos || op || op2r) { 6115 unallocated_encoding(s); 6116 return; 6117 } 6118 6119 switch (type) { 6120 case 0: 6121 size = MO_32; 6122 break; 6123 case 1: 6124 size = MO_64; 6125 break; 6126 case 3: 6127 size = MO_16; 6128 if (dc_isar_feature(aa64_fp16, s)) { 6129 break; 6130 } 6131 /* fallthru */ 6132 default: 6133 unallocated_encoding(s); 6134 return; 6135 } 6136 6137 if (!fp_access_check(s)) { 6138 return; 6139 } 6140 6141 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 6142 } 6143 6144 /* Floating point conditional compare 6145 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 6146 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6147 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 6148 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6149 */ 6150 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 6151 { 6152 unsigned int mos, type, rm, cond, rn, op, nzcv; 6153 TCGLabel *label_continue = NULL; 6154 int size; 6155 6156 mos = extract32(insn, 29, 3); 6157 type = extract32(insn, 22, 2); 6158 rm = extract32(insn, 16, 5); 6159 cond = extract32(insn, 12, 4); 6160 rn = extract32(insn, 5, 5); 6161 op = extract32(insn, 4, 1); 6162 nzcv = extract32(insn, 0, 4); 6163 6164 if (mos) { 6165 unallocated_encoding(s); 6166 return; 6167 } 6168 6169 switch (type) { 6170 case 0: 6171 size = MO_32; 6172 break; 6173 case 1: 6174 size = MO_64; 6175 break; 6176 case 3: 6177 size = MO_16; 6178 if (dc_isar_feature(aa64_fp16, s)) { 6179 break; 6180 } 6181 /* fallthru */ 6182 default: 6183 unallocated_encoding(s); 6184 return; 6185 } 6186 6187 if (!fp_access_check(s)) { 6188 return; 6189 } 6190 6191 if (cond < 0x0e) { /* not always */ 6192 TCGLabel *label_match = gen_new_label(); 6193 label_continue = gen_new_label(); 6194 arm_gen_test_cc(cond, label_match); 6195 /* nomatch: */ 6196 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 6197 tcg_gen_br(label_continue); 6198 gen_set_label(label_match); 6199 } 6200 6201 handle_fp_compare(s, size, rn, rm, false, op); 6202 6203 if (cond < 0x0e) { 6204 gen_set_label(label_continue); 6205 } 6206 } 6207 6208 /* Floating point conditional select 6209 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6210 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6211 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 6212 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6213 */ 6214 static void disas_fp_csel(DisasContext *s, uint32_t insn) 6215 { 6216 unsigned int mos, type, rm, cond, rn, rd; 6217 TCGv_i64 t_true, t_false; 6218 DisasCompare64 c; 6219 MemOp sz; 6220 6221 mos = extract32(insn, 29, 3); 6222 type = extract32(insn, 22, 2); 6223 rm = extract32(insn, 16, 5); 6224 cond = extract32(insn, 12, 4); 6225 rn = extract32(insn, 5, 5); 6226 rd = extract32(insn, 0, 5); 6227 6228 if (mos) { 6229 unallocated_encoding(s); 6230 return; 6231 } 6232 6233 switch (type) { 6234 case 0: 6235 sz = MO_32; 6236 break; 6237 case 1: 6238 sz = MO_64; 6239 break; 6240 case 3: 6241 sz = MO_16; 6242 if (dc_isar_feature(aa64_fp16, s)) { 6243 break; 6244 } 6245 /* fallthru */ 6246 default: 6247 unallocated_encoding(s); 6248 return; 6249 } 6250 6251 if (!fp_access_check(s)) { 6252 return; 6253 } 6254 6255 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6256 t_true = tcg_temp_new_i64(); 6257 t_false = tcg_temp_new_i64(); 6258 read_vec_element(s, t_true, rn, 0, sz); 6259 read_vec_element(s, t_false, rm, 0, sz); 6260 6261 a64_test_cc(&c, cond); 6262 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6263 t_true, t_false); 6264 tcg_temp_free_i64(t_false); 6265 a64_free_cc(&c); 6266 6267 /* Note that sregs & hregs write back zeros to the high bits, 6268 and we've already done the zero-extension. */ 6269 write_fp_dreg(s, rd, t_true); 6270 tcg_temp_free_i64(t_true); 6271 } 6272 6273 /* Floating-point data-processing (1 source) - half precision */ 6274 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 6275 { 6276 TCGv_ptr fpst = NULL; 6277 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 6278 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6279 6280 switch (opcode) { 6281 case 0x0: /* FMOV */ 6282 tcg_gen_mov_i32(tcg_res, tcg_op); 6283 break; 6284 case 0x1: /* FABS */ 6285 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 6286 break; 6287 case 0x2: /* FNEG */ 6288 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 6289 break; 6290 case 0x3: /* FSQRT */ 6291 fpst = fpstatus_ptr(FPST_FPCR_F16); 6292 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 6293 break; 6294 case 0x8: /* FRINTN */ 6295 case 0x9: /* FRINTP */ 6296 case 0xa: /* FRINTM */ 6297 case 0xb: /* FRINTZ */ 6298 case 0xc: /* FRINTA */ 6299 { 6300 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); 6301 fpst = fpstatus_ptr(FPST_FPCR_F16); 6302 6303 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6304 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6305 6306 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6307 tcg_temp_free_i32(tcg_rmode); 6308 break; 6309 } 6310 case 0xe: /* FRINTX */ 6311 fpst = fpstatus_ptr(FPST_FPCR_F16); 6312 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 6313 break; 6314 case 0xf: /* FRINTI */ 6315 fpst = fpstatus_ptr(FPST_FPCR_F16); 6316 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6317 break; 6318 default: 6319 g_assert_not_reached(); 6320 } 6321 6322 write_fp_sreg(s, rd, tcg_res); 6323 6324 if (fpst) { 6325 tcg_temp_free_ptr(fpst); 6326 } 6327 tcg_temp_free_i32(tcg_op); 6328 tcg_temp_free_i32(tcg_res); 6329 } 6330 6331 /* Floating-point data-processing (1 source) - single precision */ 6332 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 6333 { 6334 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 6335 TCGv_i32 tcg_op, tcg_res; 6336 TCGv_ptr fpst; 6337 int rmode = -1; 6338 6339 tcg_op = read_fp_sreg(s, rn); 6340 tcg_res = tcg_temp_new_i32(); 6341 6342 switch (opcode) { 6343 case 0x0: /* FMOV */ 6344 tcg_gen_mov_i32(tcg_res, tcg_op); 6345 goto done; 6346 case 0x1: /* FABS */ 6347 gen_helper_vfp_abss(tcg_res, tcg_op); 6348 goto done; 6349 case 0x2: /* FNEG */ 6350 gen_helper_vfp_negs(tcg_res, tcg_op); 6351 goto done; 6352 case 0x3: /* FSQRT */ 6353 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 6354 goto done; 6355 case 0x6: /* BFCVT */ 6356 gen_fpst = gen_helper_bfcvt; 6357 break; 6358 case 0x8: /* FRINTN */ 6359 case 0x9: /* FRINTP */ 6360 case 0xa: /* FRINTM */ 6361 case 0xb: /* FRINTZ */ 6362 case 0xc: /* FRINTA */ 6363 rmode = arm_rmode_to_sf(opcode & 7); 6364 gen_fpst = gen_helper_rints; 6365 break; 6366 case 0xe: /* FRINTX */ 6367 gen_fpst = gen_helper_rints_exact; 6368 break; 6369 case 0xf: /* FRINTI */ 6370 gen_fpst = gen_helper_rints; 6371 break; 6372 case 0x10: /* FRINT32Z */ 6373 rmode = float_round_to_zero; 6374 gen_fpst = gen_helper_frint32_s; 6375 break; 6376 case 0x11: /* FRINT32X */ 6377 gen_fpst = gen_helper_frint32_s; 6378 break; 6379 case 0x12: /* FRINT64Z */ 6380 rmode = float_round_to_zero; 6381 gen_fpst = gen_helper_frint64_s; 6382 break; 6383 case 0x13: /* FRINT64X */ 6384 gen_fpst = gen_helper_frint64_s; 6385 break; 6386 default: 6387 g_assert_not_reached(); 6388 } 6389 6390 fpst = fpstatus_ptr(FPST_FPCR); 6391 if (rmode >= 0) { 6392 TCGv_i32 tcg_rmode = tcg_const_i32(rmode); 6393 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6394 gen_fpst(tcg_res, tcg_op, fpst); 6395 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6396 tcg_temp_free_i32(tcg_rmode); 6397 } else { 6398 gen_fpst(tcg_res, tcg_op, fpst); 6399 } 6400 tcg_temp_free_ptr(fpst); 6401 6402 done: 6403 write_fp_sreg(s, rd, tcg_res); 6404 tcg_temp_free_i32(tcg_op); 6405 tcg_temp_free_i32(tcg_res); 6406 } 6407 6408 /* Floating-point data-processing (1 source) - double precision */ 6409 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 6410 { 6411 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 6412 TCGv_i64 tcg_op, tcg_res; 6413 TCGv_ptr fpst; 6414 int rmode = -1; 6415 6416 switch (opcode) { 6417 case 0x0: /* FMOV */ 6418 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 6419 return; 6420 } 6421 6422 tcg_op = read_fp_dreg(s, rn); 6423 tcg_res = tcg_temp_new_i64(); 6424 6425 switch (opcode) { 6426 case 0x1: /* FABS */ 6427 gen_helper_vfp_absd(tcg_res, tcg_op); 6428 goto done; 6429 case 0x2: /* FNEG */ 6430 gen_helper_vfp_negd(tcg_res, tcg_op); 6431 goto done; 6432 case 0x3: /* FSQRT */ 6433 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); 6434 goto done; 6435 case 0x8: /* FRINTN */ 6436 case 0x9: /* FRINTP */ 6437 case 0xa: /* FRINTM */ 6438 case 0xb: /* FRINTZ */ 6439 case 0xc: /* FRINTA */ 6440 rmode = arm_rmode_to_sf(opcode & 7); 6441 gen_fpst = gen_helper_rintd; 6442 break; 6443 case 0xe: /* FRINTX */ 6444 gen_fpst = gen_helper_rintd_exact; 6445 break; 6446 case 0xf: /* FRINTI */ 6447 gen_fpst = gen_helper_rintd; 6448 break; 6449 case 0x10: /* FRINT32Z */ 6450 rmode = float_round_to_zero; 6451 gen_fpst = gen_helper_frint32_d; 6452 break; 6453 case 0x11: /* FRINT32X */ 6454 gen_fpst = gen_helper_frint32_d; 6455 break; 6456 case 0x12: /* FRINT64Z */ 6457 rmode = float_round_to_zero; 6458 gen_fpst = gen_helper_frint64_d; 6459 break; 6460 case 0x13: /* FRINT64X */ 6461 gen_fpst = gen_helper_frint64_d; 6462 break; 6463 default: 6464 g_assert_not_reached(); 6465 } 6466 6467 fpst = fpstatus_ptr(FPST_FPCR); 6468 if (rmode >= 0) { 6469 TCGv_i32 tcg_rmode = tcg_const_i32(rmode); 6470 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6471 gen_fpst(tcg_res, tcg_op, fpst); 6472 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6473 tcg_temp_free_i32(tcg_rmode); 6474 } else { 6475 gen_fpst(tcg_res, tcg_op, fpst); 6476 } 6477 tcg_temp_free_ptr(fpst); 6478 6479 done: 6480 write_fp_dreg(s, rd, tcg_res); 6481 tcg_temp_free_i64(tcg_op); 6482 tcg_temp_free_i64(tcg_res); 6483 } 6484 6485 static void handle_fp_fcvt(DisasContext *s, int opcode, 6486 int rd, int rn, int dtype, int ntype) 6487 { 6488 switch (ntype) { 6489 case 0x0: 6490 { 6491 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6492 if (dtype == 1) { 6493 /* Single to double */ 6494 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6495 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); 6496 write_fp_dreg(s, rd, tcg_rd); 6497 tcg_temp_free_i64(tcg_rd); 6498 } else { 6499 /* Single to half */ 6500 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6501 TCGv_i32 ahp = get_ahp_flag(); 6502 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6503 6504 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6505 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6506 write_fp_sreg(s, rd, tcg_rd); 6507 tcg_temp_free_i32(tcg_rd); 6508 tcg_temp_free_i32(ahp); 6509 tcg_temp_free_ptr(fpst); 6510 } 6511 tcg_temp_free_i32(tcg_rn); 6512 break; 6513 } 6514 case 0x1: 6515 { 6516 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 6517 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6518 if (dtype == 0) { 6519 /* Double to single */ 6520 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); 6521 } else { 6522 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6523 TCGv_i32 ahp = get_ahp_flag(); 6524 /* Double to half */ 6525 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6526 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6527 tcg_temp_free_ptr(fpst); 6528 tcg_temp_free_i32(ahp); 6529 } 6530 write_fp_sreg(s, rd, tcg_rd); 6531 tcg_temp_free_i32(tcg_rd); 6532 tcg_temp_free_i64(tcg_rn); 6533 break; 6534 } 6535 case 0x3: 6536 { 6537 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6538 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6539 TCGv_i32 tcg_ahp = get_ahp_flag(); 6540 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6541 if (dtype == 0) { 6542 /* Half to single */ 6543 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6544 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6545 write_fp_sreg(s, rd, tcg_rd); 6546 tcg_temp_free_i32(tcg_rd); 6547 } else { 6548 /* Half to double */ 6549 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6550 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6551 write_fp_dreg(s, rd, tcg_rd); 6552 tcg_temp_free_i64(tcg_rd); 6553 } 6554 tcg_temp_free_i32(tcg_rn); 6555 tcg_temp_free_ptr(tcg_fpst); 6556 tcg_temp_free_i32(tcg_ahp); 6557 break; 6558 } 6559 default: 6560 g_assert_not_reached(); 6561 } 6562 } 6563 6564 /* Floating point data-processing (1 source) 6565 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6566 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6567 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6568 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6569 */ 6570 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6571 { 6572 int mos = extract32(insn, 29, 3); 6573 int type = extract32(insn, 22, 2); 6574 int opcode = extract32(insn, 15, 6); 6575 int rn = extract32(insn, 5, 5); 6576 int rd = extract32(insn, 0, 5); 6577 6578 if (mos) { 6579 goto do_unallocated; 6580 } 6581 6582 switch (opcode) { 6583 case 0x4: case 0x5: case 0x7: 6584 { 6585 /* FCVT between half, single and double precision */ 6586 int dtype = extract32(opcode, 0, 2); 6587 if (type == 2 || dtype == type) { 6588 goto do_unallocated; 6589 } 6590 if (!fp_access_check(s)) { 6591 return; 6592 } 6593 6594 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6595 break; 6596 } 6597 6598 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6599 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6600 goto do_unallocated; 6601 } 6602 /* fall through */ 6603 case 0x0 ... 0x3: 6604 case 0x8 ... 0xc: 6605 case 0xe ... 0xf: 6606 /* 32-to-32 and 64-to-64 ops */ 6607 switch (type) { 6608 case 0: 6609 if (!fp_access_check(s)) { 6610 return; 6611 } 6612 handle_fp_1src_single(s, opcode, rd, rn); 6613 break; 6614 case 1: 6615 if (!fp_access_check(s)) { 6616 return; 6617 } 6618 handle_fp_1src_double(s, opcode, rd, rn); 6619 break; 6620 case 3: 6621 if (!dc_isar_feature(aa64_fp16, s)) { 6622 goto do_unallocated; 6623 } 6624 6625 if (!fp_access_check(s)) { 6626 return; 6627 } 6628 handle_fp_1src_half(s, opcode, rd, rn); 6629 break; 6630 default: 6631 goto do_unallocated; 6632 } 6633 break; 6634 6635 case 0x6: 6636 switch (type) { 6637 case 1: /* BFCVT */ 6638 if (!dc_isar_feature(aa64_bf16, s)) { 6639 goto do_unallocated; 6640 } 6641 if (!fp_access_check(s)) { 6642 return; 6643 } 6644 handle_fp_1src_single(s, opcode, rd, rn); 6645 break; 6646 default: 6647 goto do_unallocated; 6648 } 6649 break; 6650 6651 default: 6652 do_unallocated: 6653 unallocated_encoding(s); 6654 break; 6655 } 6656 } 6657 6658 /* Floating-point data-processing (2 source) - single precision */ 6659 static void handle_fp_2src_single(DisasContext *s, int opcode, 6660 int rd, int rn, int rm) 6661 { 6662 TCGv_i32 tcg_op1; 6663 TCGv_i32 tcg_op2; 6664 TCGv_i32 tcg_res; 6665 TCGv_ptr fpst; 6666 6667 tcg_res = tcg_temp_new_i32(); 6668 fpst = fpstatus_ptr(FPST_FPCR); 6669 tcg_op1 = read_fp_sreg(s, rn); 6670 tcg_op2 = read_fp_sreg(s, rm); 6671 6672 switch (opcode) { 6673 case 0x0: /* FMUL */ 6674 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6675 break; 6676 case 0x1: /* FDIV */ 6677 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6678 break; 6679 case 0x2: /* FADD */ 6680 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6681 break; 6682 case 0x3: /* FSUB */ 6683 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6684 break; 6685 case 0x4: /* FMAX */ 6686 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6687 break; 6688 case 0x5: /* FMIN */ 6689 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6690 break; 6691 case 0x6: /* FMAXNM */ 6692 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6693 break; 6694 case 0x7: /* FMINNM */ 6695 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6696 break; 6697 case 0x8: /* FNMUL */ 6698 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6699 gen_helper_vfp_negs(tcg_res, tcg_res); 6700 break; 6701 } 6702 6703 write_fp_sreg(s, rd, tcg_res); 6704 6705 tcg_temp_free_ptr(fpst); 6706 tcg_temp_free_i32(tcg_op1); 6707 tcg_temp_free_i32(tcg_op2); 6708 tcg_temp_free_i32(tcg_res); 6709 } 6710 6711 /* Floating-point data-processing (2 source) - double precision */ 6712 static void handle_fp_2src_double(DisasContext *s, int opcode, 6713 int rd, int rn, int rm) 6714 { 6715 TCGv_i64 tcg_op1; 6716 TCGv_i64 tcg_op2; 6717 TCGv_i64 tcg_res; 6718 TCGv_ptr fpst; 6719 6720 tcg_res = tcg_temp_new_i64(); 6721 fpst = fpstatus_ptr(FPST_FPCR); 6722 tcg_op1 = read_fp_dreg(s, rn); 6723 tcg_op2 = read_fp_dreg(s, rm); 6724 6725 switch (opcode) { 6726 case 0x0: /* FMUL */ 6727 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6728 break; 6729 case 0x1: /* FDIV */ 6730 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6731 break; 6732 case 0x2: /* FADD */ 6733 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6734 break; 6735 case 0x3: /* FSUB */ 6736 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6737 break; 6738 case 0x4: /* FMAX */ 6739 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6740 break; 6741 case 0x5: /* FMIN */ 6742 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6743 break; 6744 case 0x6: /* FMAXNM */ 6745 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6746 break; 6747 case 0x7: /* FMINNM */ 6748 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6749 break; 6750 case 0x8: /* FNMUL */ 6751 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6752 gen_helper_vfp_negd(tcg_res, tcg_res); 6753 break; 6754 } 6755 6756 write_fp_dreg(s, rd, tcg_res); 6757 6758 tcg_temp_free_ptr(fpst); 6759 tcg_temp_free_i64(tcg_op1); 6760 tcg_temp_free_i64(tcg_op2); 6761 tcg_temp_free_i64(tcg_res); 6762 } 6763 6764 /* Floating-point data-processing (2 source) - half precision */ 6765 static void handle_fp_2src_half(DisasContext *s, int opcode, 6766 int rd, int rn, int rm) 6767 { 6768 TCGv_i32 tcg_op1; 6769 TCGv_i32 tcg_op2; 6770 TCGv_i32 tcg_res; 6771 TCGv_ptr fpst; 6772 6773 tcg_res = tcg_temp_new_i32(); 6774 fpst = fpstatus_ptr(FPST_FPCR_F16); 6775 tcg_op1 = read_fp_hreg(s, rn); 6776 tcg_op2 = read_fp_hreg(s, rm); 6777 6778 switch (opcode) { 6779 case 0x0: /* FMUL */ 6780 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6781 break; 6782 case 0x1: /* FDIV */ 6783 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6784 break; 6785 case 0x2: /* FADD */ 6786 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6787 break; 6788 case 0x3: /* FSUB */ 6789 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6790 break; 6791 case 0x4: /* FMAX */ 6792 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6793 break; 6794 case 0x5: /* FMIN */ 6795 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6796 break; 6797 case 0x6: /* FMAXNM */ 6798 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6799 break; 6800 case 0x7: /* FMINNM */ 6801 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6802 break; 6803 case 0x8: /* FNMUL */ 6804 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6805 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6806 break; 6807 default: 6808 g_assert_not_reached(); 6809 } 6810 6811 write_fp_sreg(s, rd, tcg_res); 6812 6813 tcg_temp_free_ptr(fpst); 6814 tcg_temp_free_i32(tcg_op1); 6815 tcg_temp_free_i32(tcg_op2); 6816 tcg_temp_free_i32(tcg_res); 6817 } 6818 6819 /* Floating point data-processing (2 source) 6820 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6821 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6822 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6823 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6824 */ 6825 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6826 { 6827 int mos = extract32(insn, 29, 3); 6828 int type = extract32(insn, 22, 2); 6829 int rd = extract32(insn, 0, 5); 6830 int rn = extract32(insn, 5, 5); 6831 int rm = extract32(insn, 16, 5); 6832 int opcode = extract32(insn, 12, 4); 6833 6834 if (opcode > 8 || mos) { 6835 unallocated_encoding(s); 6836 return; 6837 } 6838 6839 switch (type) { 6840 case 0: 6841 if (!fp_access_check(s)) { 6842 return; 6843 } 6844 handle_fp_2src_single(s, opcode, rd, rn, rm); 6845 break; 6846 case 1: 6847 if (!fp_access_check(s)) { 6848 return; 6849 } 6850 handle_fp_2src_double(s, opcode, rd, rn, rm); 6851 break; 6852 case 3: 6853 if (!dc_isar_feature(aa64_fp16, s)) { 6854 unallocated_encoding(s); 6855 return; 6856 } 6857 if (!fp_access_check(s)) { 6858 return; 6859 } 6860 handle_fp_2src_half(s, opcode, rd, rn, rm); 6861 break; 6862 default: 6863 unallocated_encoding(s); 6864 } 6865 } 6866 6867 /* Floating-point data-processing (3 source) - single precision */ 6868 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6869 int rd, int rn, int rm, int ra) 6870 { 6871 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6872 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6873 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6874 6875 tcg_op1 = read_fp_sreg(s, rn); 6876 tcg_op2 = read_fp_sreg(s, rm); 6877 tcg_op3 = read_fp_sreg(s, ra); 6878 6879 /* These are fused multiply-add, and must be done as one 6880 * floating point operation with no rounding between the 6881 * multiplication and addition steps. 6882 * NB that doing the negations here as separate steps is 6883 * correct : an input NaN should come out with its sign bit 6884 * flipped if it is a negated-input. 6885 */ 6886 if (o1 == true) { 6887 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6888 } 6889 6890 if (o0 != o1) { 6891 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6892 } 6893 6894 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6895 6896 write_fp_sreg(s, rd, tcg_res); 6897 6898 tcg_temp_free_ptr(fpst); 6899 tcg_temp_free_i32(tcg_op1); 6900 tcg_temp_free_i32(tcg_op2); 6901 tcg_temp_free_i32(tcg_op3); 6902 tcg_temp_free_i32(tcg_res); 6903 } 6904 6905 /* Floating-point data-processing (3 source) - double precision */ 6906 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6907 int rd, int rn, int rm, int ra) 6908 { 6909 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6910 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6911 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6912 6913 tcg_op1 = read_fp_dreg(s, rn); 6914 tcg_op2 = read_fp_dreg(s, rm); 6915 tcg_op3 = read_fp_dreg(s, ra); 6916 6917 /* These are fused multiply-add, and must be done as one 6918 * floating point operation with no rounding between the 6919 * multiplication and addition steps. 6920 * NB that doing the negations here as separate steps is 6921 * correct : an input NaN should come out with its sign bit 6922 * flipped if it is a negated-input. 6923 */ 6924 if (o1 == true) { 6925 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6926 } 6927 6928 if (o0 != o1) { 6929 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6930 } 6931 6932 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6933 6934 write_fp_dreg(s, rd, tcg_res); 6935 6936 tcg_temp_free_ptr(fpst); 6937 tcg_temp_free_i64(tcg_op1); 6938 tcg_temp_free_i64(tcg_op2); 6939 tcg_temp_free_i64(tcg_op3); 6940 tcg_temp_free_i64(tcg_res); 6941 } 6942 6943 /* Floating-point data-processing (3 source) - half precision */ 6944 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6945 int rd, int rn, int rm, int ra) 6946 { 6947 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6948 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6949 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6950 6951 tcg_op1 = read_fp_hreg(s, rn); 6952 tcg_op2 = read_fp_hreg(s, rm); 6953 tcg_op3 = read_fp_hreg(s, ra); 6954 6955 /* These are fused multiply-add, and must be done as one 6956 * floating point operation with no rounding between the 6957 * multiplication and addition steps. 6958 * NB that doing the negations here as separate steps is 6959 * correct : an input NaN should come out with its sign bit 6960 * flipped if it is a negated-input. 6961 */ 6962 if (o1 == true) { 6963 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6964 } 6965 6966 if (o0 != o1) { 6967 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6968 } 6969 6970 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6971 6972 write_fp_sreg(s, rd, tcg_res); 6973 6974 tcg_temp_free_ptr(fpst); 6975 tcg_temp_free_i32(tcg_op1); 6976 tcg_temp_free_i32(tcg_op2); 6977 tcg_temp_free_i32(tcg_op3); 6978 tcg_temp_free_i32(tcg_res); 6979 } 6980 6981 /* Floating point data-processing (3 source) 6982 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6983 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6984 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6985 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6986 */ 6987 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6988 { 6989 int mos = extract32(insn, 29, 3); 6990 int type = extract32(insn, 22, 2); 6991 int rd = extract32(insn, 0, 5); 6992 int rn = extract32(insn, 5, 5); 6993 int ra = extract32(insn, 10, 5); 6994 int rm = extract32(insn, 16, 5); 6995 bool o0 = extract32(insn, 15, 1); 6996 bool o1 = extract32(insn, 21, 1); 6997 6998 if (mos) { 6999 unallocated_encoding(s); 7000 return; 7001 } 7002 7003 switch (type) { 7004 case 0: 7005 if (!fp_access_check(s)) { 7006 return; 7007 } 7008 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 7009 break; 7010 case 1: 7011 if (!fp_access_check(s)) { 7012 return; 7013 } 7014 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 7015 break; 7016 case 3: 7017 if (!dc_isar_feature(aa64_fp16, s)) { 7018 unallocated_encoding(s); 7019 return; 7020 } 7021 if (!fp_access_check(s)) { 7022 return; 7023 } 7024 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 7025 break; 7026 default: 7027 unallocated_encoding(s); 7028 } 7029 } 7030 7031 /* Floating point immediate 7032 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 7033 * +---+---+---+-----------+------+---+------------+-------+------+------+ 7034 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 7035 * +---+---+---+-----------+------+---+------------+-------+------+------+ 7036 */ 7037 static void disas_fp_imm(DisasContext *s, uint32_t insn) 7038 { 7039 int rd = extract32(insn, 0, 5); 7040 int imm5 = extract32(insn, 5, 5); 7041 int imm8 = extract32(insn, 13, 8); 7042 int type = extract32(insn, 22, 2); 7043 int mos = extract32(insn, 29, 3); 7044 uint64_t imm; 7045 MemOp sz; 7046 7047 if (mos || imm5) { 7048 unallocated_encoding(s); 7049 return; 7050 } 7051 7052 switch (type) { 7053 case 0: 7054 sz = MO_32; 7055 break; 7056 case 1: 7057 sz = MO_64; 7058 break; 7059 case 3: 7060 sz = MO_16; 7061 if (dc_isar_feature(aa64_fp16, s)) { 7062 break; 7063 } 7064 /* fallthru */ 7065 default: 7066 unallocated_encoding(s); 7067 return; 7068 } 7069 7070 if (!fp_access_check(s)) { 7071 return; 7072 } 7073 7074 imm = vfp_expand_imm(sz, imm8); 7075 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 7076 } 7077 7078 /* Handle floating point <=> fixed point conversions. Note that we can 7079 * also deal with fp <=> integer conversions as a special case (scale == 64) 7080 * OPTME: consider handling that special case specially or at least skipping 7081 * the call to scalbn in the helpers for zero shifts. 7082 */ 7083 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 7084 bool itof, int rmode, int scale, int sf, int type) 7085 { 7086 bool is_signed = !(opcode & 1); 7087 TCGv_ptr tcg_fpstatus; 7088 TCGv_i32 tcg_shift, tcg_single; 7089 TCGv_i64 tcg_double; 7090 7091 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 7092 7093 tcg_shift = tcg_constant_i32(64 - scale); 7094 7095 if (itof) { 7096 TCGv_i64 tcg_int = cpu_reg(s, rn); 7097 if (!sf) { 7098 TCGv_i64 tcg_extend = new_tmp_a64(s); 7099 7100 if (is_signed) { 7101 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 7102 } else { 7103 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 7104 } 7105 7106 tcg_int = tcg_extend; 7107 } 7108 7109 switch (type) { 7110 case 1: /* float64 */ 7111 tcg_double = tcg_temp_new_i64(); 7112 if (is_signed) { 7113 gen_helper_vfp_sqtod(tcg_double, tcg_int, 7114 tcg_shift, tcg_fpstatus); 7115 } else { 7116 gen_helper_vfp_uqtod(tcg_double, tcg_int, 7117 tcg_shift, tcg_fpstatus); 7118 } 7119 write_fp_dreg(s, rd, tcg_double); 7120 tcg_temp_free_i64(tcg_double); 7121 break; 7122 7123 case 0: /* float32 */ 7124 tcg_single = tcg_temp_new_i32(); 7125 if (is_signed) { 7126 gen_helper_vfp_sqtos(tcg_single, tcg_int, 7127 tcg_shift, tcg_fpstatus); 7128 } else { 7129 gen_helper_vfp_uqtos(tcg_single, tcg_int, 7130 tcg_shift, tcg_fpstatus); 7131 } 7132 write_fp_sreg(s, rd, tcg_single); 7133 tcg_temp_free_i32(tcg_single); 7134 break; 7135 7136 case 3: /* float16 */ 7137 tcg_single = tcg_temp_new_i32(); 7138 if (is_signed) { 7139 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 7140 tcg_shift, tcg_fpstatus); 7141 } else { 7142 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 7143 tcg_shift, tcg_fpstatus); 7144 } 7145 write_fp_sreg(s, rd, tcg_single); 7146 tcg_temp_free_i32(tcg_single); 7147 break; 7148 7149 default: 7150 g_assert_not_reached(); 7151 } 7152 } else { 7153 TCGv_i64 tcg_int = cpu_reg(s, rd); 7154 TCGv_i32 tcg_rmode; 7155 7156 if (extract32(opcode, 2, 1)) { 7157 /* There are too many rounding modes to all fit into rmode, 7158 * so FCVTA[US] is a special case. 7159 */ 7160 rmode = FPROUNDING_TIEAWAY; 7161 } 7162 7163 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 7164 7165 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7166 7167 switch (type) { 7168 case 1: /* float64 */ 7169 tcg_double = read_fp_dreg(s, rn); 7170 if (is_signed) { 7171 if (!sf) { 7172 gen_helper_vfp_tosld(tcg_int, tcg_double, 7173 tcg_shift, tcg_fpstatus); 7174 } else { 7175 gen_helper_vfp_tosqd(tcg_int, tcg_double, 7176 tcg_shift, tcg_fpstatus); 7177 } 7178 } else { 7179 if (!sf) { 7180 gen_helper_vfp_tould(tcg_int, tcg_double, 7181 tcg_shift, tcg_fpstatus); 7182 } else { 7183 gen_helper_vfp_touqd(tcg_int, tcg_double, 7184 tcg_shift, tcg_fpstatus); 7185 } 7186 } 7187 if (!sf) { 7188 tcg_gen_ext32u_i64(tcg_int, tcg_int); 7189 } 7190 tcg_temp_free_i64(tcg_double); 7191 break; 7192 7193 case 0: /* float32 */ 7194 tcg_single = read_fp_sreg(s, rn); 7195 if (sf) { 7196 if (is_signed) { 7197 gen_helper_vfp_tosqs(tcg_int, tcg_single, 7198 tcg_shift, tcg_fpstatus); 7199 } else { 7200 gen_helper_vfp_touqs(tcg_int, tcg_single, 7201 tcg_shift, tcg_fpstatus); 7202 } 7203 } else { 7204 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7205 if (is_signed) { 7206 gen_helper_vfp_tosls(tcg_dest, tcg_single, 7207 tcg_shift, tcg_fpstatus); 7208 } else { 7209 gen_helper_vfp_touls(tcg_dest, tcg_single, 7210 tcg_shift, tcg_fpstatus); 7211 } 7212 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7213 tcg_temp_free_i32(tcg_dest); 7214 } 7215 tcg_temp_free_i32(tcg_single); 7216 break; 7217 7218 case 3: /* float16 */ 7219 tcg_single = read_fp_sreg(s, rn); 7220 if (sf) { 7221 if (is_signed) { 7222 gen_helper_vfp_tosqh(tcg_int, tcg_single, 7223 tcg_shift, tcg_fpstatus); 7224 } else { 7225 gen_helper_vfp_touqh(tcg_int, tcg_single, 7226 tcg_shift, tcg_fpstatus); 7227 } 7228 } else { 7229 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7230 if (is_signed) { 7231 gen_helper_vfp_toslh(tcg_dest, tcg_single, 7232 tcg_shift, tcg_fpstatus); 7233 } else { 7234 gen_helper_vfp_toulh(tcg_dest, tcg_single, 7235 tcg_shift, tcg_fpstatus); 7236 } 7237 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7238 tcg_temp_free_i32(tcg_dest); 7239 } 7240 tcg_temp_free_i32(tcg_single); 7241 break; 7242 7243 default: 7244 g_assert_not_reached(); 7245 } 7246 7247 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7248 tcg_temp_free_i32(tcg_rmode); 7249 } 7250 7251 tcg_temp_free_ptr(tcg_fpstatus); 7252 } 7253 7254 /* Floating point <-> fixed point conversions 7255 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7256 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7257 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 7258 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7259 */ 7260 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 7261 { 7262 int rd = extract32(insn, 0, 5); 7263 int rn = extract32(insn, 5, 5); 7264 int scale = extract32(insn, 10, 6); 7265 int opcode = extract32(insn, 16, 3); 7266 int rmode = extract32(insn, 19, 2); 7267 int type = extract32(insn, 22, 2); 7268 bool sbit = extract32(insn, 29, 1); 7269 bool sf = extract32(insn, 31, 1); 7270 bool itof; 7271 7272 if (sbit || (!sf && scale < 32)) { 7273 unallocated_encoding(s); 7274 return; 7275 } 7276 7277 switch (type) { 7278 case 0: /* float32 */ 7279 case 1: /* float64 */ 7280 break; 7281 case 3: /* float16 */ 7282 if (dc_isar_feature(aa64_fp16, s)) { 7283 break; 7284 } 7285 /* fallthru */ 7286 default: 7287 unallocated_encoding(s); 7288 return; 7289 } 7290 7291 switch ((rmode << 3) | opcode) { 7292 case 0x2: /* SCVTF */ 7293 case 0x3: /* UCVTF */ 7294 itof = true; 7295 break; 7296 case 0x18: /* FCVTZS */ 7297 case 0x19: /* FCVTZU */ 7298 itof = false; 7299 break; 7300 default: 7301 unallocated_encoding(s); 7302 return; 7303 } 7304 7305 if (!fp_access_check(s)) { 7306 return; 7307 } 7308 7309 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 7310 } 7311 7312 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 7313 { 7314 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 7315 * without conversion. 7316 */ 7317 7318 if (itof) { 7319 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7320 TCGv_i64 tmp; 7321 7322 switch (type) { 7323 case 0: 7324 /* 32 bit */ 7325 tmp = tcg_temp_new_i64(); 7326 tcg_gen_ext32u_i64(tmp, tcg_rn); 7327 write_fp_dreg(s, rd, tmp); 7328 tcg_temp_free_i64(tmp); 7329 break; 7330 case 1: 7331 /* 64 bit */ 7332 write_fp_dreg(s, rd, tcg_rn); 7333 break; 7334 case 2: 7335 /* 64 bit to top half. */ 7336 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); 7337 clear_vec_high(s, true, rd); 7338 break; 7339 case 3: 7340 /* 16 bit */ 7341 tmp = tcg_temp_new_i64(); 7342 tcg_gen_ext16u_i64(tmp, tcg_rn); 7343 write_fp_dreg(s, rd, tmp); 7344 tcg_temp_free_i64(tmp); 7345 break; 7346 default: 7347 g_assert_not_reached(); 7348 } 7349 } else { 7350 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7351 7352 switch (type) { 7353 case 0: 7354 /* 32 bit */ 7355 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); 7356 break; 7357 case 1: 7358 /* 64 bit */ 7359 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); 7360 break; 7361 case 2: 7362 /* 64 bits from top half */ 7363 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); 7364 break; 7365 case 3: 7366 /* 16 bit */ 7367 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); 7368 break; 7369 default: 7370 g_assert_not_reached(); 7371 } 7372 } 7373 } 7374 7375 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 7376 { 7377 TCGv_i64 t = read_fp_dreg(s, rn); 7378 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 7379 7380 gen_helper_fjcvtzs(t, t, fpstatus); 7381 7382 tcg_temp_free_ptr(fpstatus); 7383 7384 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 7385 tcg_gen_extrh_i64_i32(cpu_ZF, t); 7386 tcg_gen_movi_i32(cpu_CF, 0); 7387 tcg_gen_movi_i32(cpu_NF, 0); 7388 tcg_gen_movi_i32(cpu_VF, 0); 7389 7390 tcg_temp_free_i64(t); 7391 } 7392 7393 /* Floating point <-> integer conversions 7394 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7395 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7396 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 7397 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7398 */ 7399 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 7400 { 7401 int rd = extract32(insn, 0, 5); 7402 int rn = extract32(insn, 5, 5); 7403 int opcode = extract32(insn, 16, 3); 7404 int rmode = extract32(insn, 19, 2); 7405 int type = extract32(insn, 22, 2); 7406 bool sbit = extract32(insn, 29, 1); 7407 bool sf = extract32(insn, 31, 1); 7408 bool itof = false; 7409 7410 if (sbit) { 7411 goto do_unallocated; 7412 } 7413 7414 switch (opcode) { 7415 case 2: /* SCVTF */ 7416 case 3: /* UCVTF */ 7417 itof = true; 7418 /* fallthru */ 7419 case 4: /* FCVTAS */ 7420 case 5: /* FCVTAU */ 7421 if (rmode != 0) { 7422 goto do_unallocated; 7423 } 7424 /* fallthru */ 7425 case 0: /* FCVT[NPMZ]S */ 7426 case 1: /* FCVT[NPMZ]U */ 7427 switch (type) { 7428 case 0: /* float32 */ 7429 case 1: /* float64 */ 7430 break; 7431 case 3: /* float16 */ 7432 if (!dc_isar_feature(aa64_fp16, s)) { 7433 goto do_unallocated; 7434 } 7435 break; 7436 default: 7437 goto do_unallocated; 7438 } 7439 if (!fp_access_check(s)) { 7440 return; 7441 } 7442 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 7443 break; 7444 7445 default: 7446 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 7447 case 0b01100110: /* FMOV half <-> 32-bit int */ 7448 case 0b01100111: 7449 case 0b11100110: /* FMOV half <-> 64-bit int */ 7450 case 0b11100111: 7451 if (!dc_isar_feature(aa64_fp16, s)) { 7452 goto do_unallocated; 7453 } 7454 /* fallthru */ 7455 case 0b00000110: /* FMOV 32-bit */ 7456 case 0b00000111: 7457 case 0b10100110: /* FMOV 64-bit */ 7458 case 0b10100111: 7459 case 0b11001110: /* FMOV top half of 128-bit */ 7460 case 0b11001111: 7461 if (!fp_access_check(s)) { 7462 return; 7463 } 7464 itof = opcode & 1; 7465 handle_fmov(s, rd, rn, type, itof); 7466 break; 7467 7468 case 0b00111110: /* FJCVTZS */ 7469 if (!dc_isar_feature(aa64_jscvt, s)) { 7470 goto do_unallocated; 7471 } else if (fp_access_check(s)) { 7472 handle_fjcvtzs(s, rd, rn); 7473 } 7474 break; 7475 7476 default: 7477 do_unallocated: 7478 unallocated_encoding(s); 7479 return; 7480 } 7481 break; 7482 } 7483 } 7484 7485 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 7486 * 31 30 29 28 25 24 0 7487 * +---+---+---+---------+-----------------------------+ 7488 * | | 0 | | 1 1 1 1 | | 7489 * +---+---+---+---------+-----------------------------+ 7490 */ 7491 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 7492 { 7493 if (extract32(insn, 24, 1)) { 7494 /* Floating point data-processing (3 source) */ 7495 disas_fp_3src(s, insn); 7496 } else if (extract32(insn, 21, 1) == 0) { 7497 /* Floating point to fixed point conversions */ 7498 disas_fp_fixed_conv(s, insn); 7499 } else { 7500 switch (extract32(insn, 10, 2)) { 7501 case 1: 7502 /* Floating point conditional compare */ 7503 disas_fp_ccomp(s, insn); 7504 break; 7505 case 2: 7506 /* Floating point data-processing (2 source) */ 7507 disas_fp_2src(s, insn); 7508 break; 7509 case 3: 7510 /* Floating point conditional select */ 7511 disas_fp_csel(s, insn); 7512 break; 7513 case 0: 7514 switch (ctz32(extract32(insn, 12, 4))) { 7515 case 0: /* [15:12] == xxx1 */ 7516 /* Floating point immediate */ 7517 disas_fp_imm(s, insn); 7518 break; 7519 case 1: /* [15:12] == xx10 */ 7520 /* Floating point compare */ 7521 disas_fp_compare(s, insn); 7522 break; 7523 case 2: /* [15:12] == x100 */ 7524 /* Floating point data-processing (1 source) */ 7525 disas_fp_1src(s, insn); 7526 break; 7527 case 3: /* [15:12] == 1000 */ 7528 unallocated_encoding(s); 7529 break; 7530 default: /* [15:12] == 0000 */ 7531 /* Floating point <-> integer conversions */ 7532 disas_fp_int_conv(s, insn); 7533 break; 7534 } 7535 break; 7536 } 7537 } 7538 } 7539 7540 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 7541 int pos) 7542 { 7543 /* Extract 64 bits from the middle of two concatenated 64 bit 7544 * vector register slices left:right. The extracted bits start 7545 * at 'pos' bits into the right (least significant) side. 7546 * We return the result in tcg_right, and guarantee not to 7547 * trash tcg_left. 7548 */ 7549 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7550 assert(pos > 0 && pos < 64); 7551 7552 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 7553 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 7554 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 7555 7556 tcg_temp_free_i64(tcg_tmp); 7557 } 7558 7559 /* EXT 7560 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 7561 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7562 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 7563 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7564 */ 7565 static void disas_simd_ext(DisasContext *s, uint32_t insn) 7566 { 7567 int is_q = extract32(insn, 30, 1); 7568 int op2 = extract32(insn, 22, 2); 7569 int imm4 = extract32(insn, 11, 4); 7570 int rm = extract32(insn, 16, 5); 7571 int rn = extract32(insn, 5, 5); 7572 int rd = extract32(insn, 0, 5); 7573 int pos = imm4 << 3; 7574 TCGv_i64 tcg_resl, tcg_resh; 7575 7576 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 7577 unallocated_encoding(s); 7578 return; 7579 } 7580 7581 if (!fp_access_check(s)) { 7582 return; 7583 } 7584 7585 tcg_resh = tcg_temp_new_i64(); 7586 tcg_resl = tcg_temp_new_i64(); 7587 7588 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7589 * either extracting 128 bits from a 128:128 concatenation, or 7590 * extracting 64 bits from a 64:64 concatenation. 7591 */ 7592 if (!is_q) { 7593 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7594 if (pos != 0) { 7595 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7596 do_ext64(s, tcg_resh, tcg_resl, pos); 7597 } 7598 } else { 7599 TCGv_i64 tcg_hh; 7600 typedef struct { 7601 int reg; 7602 int elt; 7603 } EltPosns; 7604 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7605 EltPosns *elt = eltposns; 7606 7607 if (pos >= 64) { 7608 elt++; 7609 pos -= 64; 7610 } 7611 7612 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7613 elt++; 7614 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7615 elt++; 7616 if (pos != 0) { 7617 do_ext64(s, tcg_resh, tcg_resl, pos); 7618 tcg_hh = tcg_temp_new_i64(); 7619 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7620 do_ext64(s, tcg_hh, tcg_resh, pos); 7621 tcg_temp_free_i64(tcg_hh); 7622 } 7623 } 7624 7625 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7626 tcg_temp_free_i64(tcg_resl); 7627 if (is_q) { 7628 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7629 } 7630 tcg_temp_free_i64(tcg_resh); 7631 clear_vec_high(s, is_q, rd); 7632 } 7633 7634 /* TBL/TBX 7635 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7636 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7637 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7638 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7639 */ 7640 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7641 { 7642 int op2 = extract32(insn, 22, 2); 7643 int is_q = extract32(insn, 30, 1); 7644 int rm = extract32(insn, 16, 5); 7645 int rn = extract32(insn, 5, 5); 7646 int rd = extract32(insn, 0, 5); 7647 int is_tbx = extract32(insn, 12, 1); 7648 int len = (extract32(insn, 13, 2) + 1) * 16; 7649 7650 if (op2 != 0) { 7651 unallocated_encoding(s); 7652 return; 7653 } 7654 7655 if (!fp_access_check(s)) { 7656 return; 7657 } 7658 7659 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7660 vec_full_reg_offset(s, rm), cpu_env, 7661 is_q ? 16 : 8, vec_full_reg_size(s), 7662 (len << 6) | (is_tbx << 5) | rn, 7663 gen_helper_simd_tblx); 7664 } 7665 7666 /* ZIP/UZP/TRN 7667 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7668 * +---+---+-------------+------+---+------+---+------------------+------+ 7669 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7670 * +---+---+-------------+------+---+------+---+------------------+------+ 7671 */ 7672 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7673 { 7674 int rd = extract32(insn, 0, 5); 7675 int rn = extract32(insn, 5, 5); 7676 int rm = extract32(insn, 16, 5); 7677 int size = extract32(insn, 22, 2); 7678 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7679 * bit 2 indicates 1 vs 2 variant of the insn. 7680 */ 7681 int opcode = extract32(insn, 12, 2); 7682 bool part = extract32(insn, 14, 1); 7683 bool is_q = extract32(insn, 30, 1); 7684 int esize = 8 << size; 7685 int i, ofs; 7686 int datasize = is_q ? 128 : 64; 7687 int elements = datasize / esize; 7688 TCGv_i64 tcg_res, tcg_resl, tcg_resh; 7689 7690 if (opcode == 0 || (size == 3 && !is_q)) { 7691 unallocated_encoding(s); 7692 return; 7693 } 7694 7695 if (!fp_access_check(s)) { 7696 return; 7697 } 7698 7699 tcg_resl = tcg_const_i64(0); 7700 tcg_resh = is_q ? tcg_const_i64(0) : NULL; 7701 tcg_res = tcg_temp_new_i64(); 7702 7703 for (i = 0; i < elements; i++) { 7704 switch (opcode) { 7705 case 1: /* UZP1/2 */ 7706 { 7707 int midpoint = elements / 2; 7708 if (i < midpoint) { 7709 read_vec_element(s, tcg_res, rn, 2 * i + part, size); 7710 } else { 7711 read_vec_element(s, tcg_res, rm, 7712 2 * (i - midpoint) + part, size); 7713 } 7714 break; 7715 } 7716 case 2: /* TRN1/2 */ 7717 if (i & 1) { 7718 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size); 7719 } else { 7720 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size); 7721 } 7722 break; 7723 case 3: /* ZIP1/2 */ 7724 { 7725 int base = part * elements / 2; 7726 if (i & 1) { 7727 read_vec_element(s, tcg_res, rm, base + (i >> 1), size); 7728 } else { 7729 read_vec_element(s, tcg_res, rn, base + (i >> 1), size); 7730 } 7731 break; 7732 } 7733 default: 7734 g_assert_not_reached(); 7735 } 7736 7737 ofs = i * esize; 7738 if (ofs < 64) { 7739 tcg_gen_shli_i64(tcg_res, tcg_res, ofs); 7740 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res); 7741 } else { 7742 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64); 7743 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res); 7744 } 7745 } 7746 7747 tcg_temp_free_i64(tcg_res); 7748 7749 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7750 tcg_temp_free_i64(tcg_resl); 7751 7752 if (is_q) { 7753 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7754 tcg_temp_free_i64(tcg_resh); 7755 } 7756 clear_vec_high(s, is_q, rd); 7757 } 7758 7759 /* 7760 * do_reduction_op helper 7761 * 7762 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7763 * important for correct NaN propagation that we do these 7764 * operations in exactly the order specified by the pseudocode. 7765 * 7766 * This is a recursive function, TCG temps should be freed by the 7767 * calling function once it is done with the values. 7768 */ 7769 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7770 int esize, int size, int vmap, TCGv_ptr fpst) 7771 { 7772 if (esize == size) { 7773 int element; 7774 MemOp msize = esize == 16 ? MO_16 : MO_32; 7775 TCGv_i32 tcg_elem; 7776 7777 /* We should have one register left here */ 7778 assert(ctpop8(vmap) == 1); 7779 element = ctz32(vmap); 7780 assert(element < 8); 7781 7782 tcg_elem = tcg_temp_new_i32(); 7783 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7784 return tcg_elem; 7785 } else { 7786 int bits = size / 2; 7787 int shift = ctpop8(vmap) / 2; 7788 int vmap_lo = (vmap >> shift) & vmap; 7789 int vmap_hi = (vmap & ~vmap_lo); 7790 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7791 7792 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7793 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7794 tcg_res = tcg_temp_new_i32(); 7795 7796 switch (fpopcode) { 7797 case 0x0c: /* fmaxnmv half-precision */ 7798 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7799 break; 7800 case 0x0f: /* fmaxv half-precision */ 7801 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7802 break; 7803 case 0x1c: /* fminnmv half-precision */ 7804 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7805 break; 7806 case 0x1f: /* fminv half-precision */ 7807 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7808 break; 7809 case 0x2c: /* fmaxnmv */ 7810 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7811 break; 7812 case 0x2f: /* fmaxv */ 7813 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7814 break; 7815 case 0x3c: /* fminnmv */ 7816 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7817 break; 7818 case 0x3f: /* fminv */ 7819 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7820 break; 7821 default: 7822 g_assert_not_reached(); 7823 } 7824 7825 tcg_temp_free_i32(tcg_hi); 7826 tcg_temp_free_i32(tcg_lo); 7827 return tcg_res; 7828 } 7829 } 7830 7831 /* AdvSIMD across lanes 7832 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7833 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7834 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7835 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7836 */ 7837 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7838 { 7839 int rd = extract32(insn, 0, 5); 7840 int rn = extract32(insn, 5, 5); 7841 int size = extract32(insn, 22, 2); 7842 int opcode = extract32(insn, 12, 5); 7843 bool is_q = extract32(insn, 30, 1); 7844 bool is_u = extract32(insn, 29, 1); 7845 bool is_fp = false; 7846 bool is_min = false; 7847 int esize; 7848 int elements; 7849 int i; 7850 TCGv_i64 tcg_res, tcg_elt; 7851 7852 switch (opcode) { 7853 case 0x1b: /* ADDV */ 7854 if (is_u) { 7855 unallocated_encoding(s); 7856 return; 7857 } 7858 /* fall through */ 7859 case 0x3: /* SADDLV, UADDLV */ 7860 case 0xa: /* SMAXV, UMAXV */ 7861 case 0x1a: /* SMINV, UMINV */ 7862 if (size == 3 || (size == 2 && !is_q)) { 7863 unallocated_encoding(s); 7864 return; 7865 } 7866 break; 7867 case 0xc: /* FMAXNMV, FMINNMV */ 7868 case 0xf: /* FMAXV, FMINV */ 7869 /* Bit 1 of size field encodes min vs max and the actual size 7870 * depends on the encoding of the U bit. If not set (and FP16 7871 * enabled) then we do half-precision float instead of single 7872 * precision. 7873 */ 7874 is_min = extract32(size, 1, 1); 7875 is_fp = true; 7876 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7877 size = 1; 7878 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7879 unallocated_encoding(s); 7880 return; 7881 } else { 7882 size = 2; 7883 } 7884 break; 7885 default: 7886 unallocated_encoding(s); 7887 return; 7888 } 7889 7890 if (!fp_access_check(s)) { 7891 return; 7892 } 7893 7894 esize = 8 << size; 7895 elements = (is_q ? 128 : 64) / esize; 7896 7897 tcg_res = tcg_temp_new_i64(); 7898 tcg_elt = tcg_temp_new_i64(); 7899 7900 /* These instructions operate across all lanes of a vector 7901 * to produce a single result. We can guarantee that a 64 7902 * bit intermediate is sufficient: 7903 * + for [US]ADDLV the maximum element size is 32 bits, and 7904 * the result type is 64 bits 7905 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7906 * same as the element size, which is 32 bits at most 7907 * For the integer operations we can choose to work at 64 7908 * or 32 bits and truncate at the end; for simplicity 7909 * we use 64 bits always. The floating point 7910 * ops do require 32 bit intermediates, though. 7911 */ 7912 if (!is_fp) { 7913 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7914 7915 for (i = 1; i < elements; i++) { 7916 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7917 7918 switch (opcode) { 7919 case 0x03: /* SADDLV / UADDLV */ 7920 case 0x1b: /* ADDV */ 7921 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7922 break; 7923 case 0x0a: /* SMAXV / UMAXV */ 7924 if (is_u) { 7925 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7926 } else { 7927 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7928 } 7929 break; 7930 case 0x1a: /* SMINV / UMINV */ 7931 if (is_u) { 7932 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7933 } else { 7934 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7935 } 7936 break; 7937 default: 7938 g_assert_not_reached(); 7939 } 7940 7941 } 7942 } else { 7943 /* Floating point vector reduction ops which work across 32 7944 * bit (single) or 16 bit (half-precision) intermediates. 7945 * Note that correct NaN propagation requires that we do these 7946 * operations in exactly the order specified by the pseudocode. 7947 */ 7948 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7949 int fpopcode = opcode | is_min << 4 | is_u << 5; 7950 int vmap = (1 << elements) - 1; 7951 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7952 (is_q ? 128 : 64), vmap, fpst); 7953 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7954 tcg_temp_free_i32(tcg_res32); 7955 tcg_temp_free_ptr(fpst); 7956 } 7957 7958 tcg_temp_free_i64(tcg_elt); 7959 7960 /* Now truncate the result to the width required for the final output */ 7961 if (opcode == 0x03) { 7962 /* SADDLV, UADDLV: result is 2*esize */ 7963 size++; 7964 } 7965 7966 switch (size) { 7967 case 0: 7968 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7969 break; 7970 case 1: 7971 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7972 break; 7973 case 2: 7974 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7975 break; 7976 case 3: 7977 break; 7978 default: 7979 g_assert_not_reached(); 7980 } 7981 7982 write_fp_dreg(s, rd, tcg_res); 7983 tcg_temp_free_i64(tcg_res); 7984 } 7985 7986 /* DUP (Element, Vector) 7987 * 7988 * 31 30 29 21 20 16 15 10 9 5 4 0 7989 * +---+---+-------------------+--------+-------------+------+------+ 7990 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7991 * +---+---+-------------------+--------+-------------+------+------+ 7992 * 7993 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7994 */ 7995 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7996 int imm5) 7997 { 7998 int size = ctz32(imm5); 7999 int index; 8000 8001 if (size > 3 || (size == 3 && !is_q)) { 8002 unallocated_encoding(s); 8003 return; 8004 } 8005 8006 if (!fp_access_check(s)) { 8007 return; 8008 } 8009 8010 index = imm5 >> (size + 1); 8011 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 8012 vec_reg_offset(s, rn, index, size), 8013 is_q ? 16 : 8, vec_full_reg_size(s)); 8014 } 8015 8016 /* DUP (element, scalar) 8017 * 31 21 20 16 15 10 9 5 4 0 8018 * +-----------------------+--------+-------------+------+------+ 8019 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 8020 * +-----------------------+--------+-------------+------+------+ 8021 */ 8022 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 8023 int imm5) 8024 { 8025 int size = ctz32(imm5); 8026 int index; 8027 TCGv_i64 tmp; 8028 8029 if (size > 3) { 8030 unallocated_encoding(s); 8031 return; 8032 } 8033 8034 if (!fp_access_check(s)) { 8035 return; 8036 } 8037 8038 index = imm5 >> (size + 1); 8039 8040 /* This instruction just extracts the specified element and 8041 * zero-extends it into the bottom of the destination register. 8042 */ 8043 tmp = tcg_temp_new_i64(); 8044 read_vec_element(s, tmp, rn, index, size); 8045 write_fp_dreg(s, rd, tmp); 8046 tcg_temp_free_i64(tmp); 8047 } 8048 8049 /* DUP (General) 8050 * 8051 * 31 30 29 21 20 16 15 10 9 5 4 0 8052 * +---+---+-------------------+--------+-------------+------+------+ 8053 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 8054 * +---+---+-------------------+--------+-------------+------+------+ 8055 * 8056 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8057 */ 8058 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 8059 int imm5) 8060 { 8061 int size = ctz32(imm5); 8062 uint32_t dofs, oprsz, maxsz; 8063 8064 if (size > 3 || ((size == 3) && !is_q)) { 8065 unallocated_encoding(s); 8066 return; 8067 } 8068 8069 if (!fp_access_check(s)) { 8070 return; 8071 } 8072 8073 dofs = vec_full_reg_offset(s, rd); 8074 oprsz = is_q ? 16 : 8; 8075 maxsz = vec_full_reg_size(s); 8076 8077 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 8078 } 8079 8080 /* INS (Element) 8081 * 8082 * 31 21 20 16 15 14 11 10 9 5 4 0 8083 * +-----------------------+--------+------------+---+------+------+ 8084 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8085 * +-----------------------+--------+------------+---+------+------+ 8086 * 8087 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8088 * index: encoded in imm5<4:size+1> 8089 */ 8090 static void handle_simd_inse(DisasContext *s, int rd, int rn, 8091 int imm4, int imm5) 8092 { 8093 int size = ctz32(imm5); 8094 int src_index, dst_index; 8095 TCGv_i64 tmp; 8096 8097 if (size > 3) { 8098 unallocated_encoding(s); 8099 return; 8100 } 8101 8102 if (!fp_access_check(s)) { 8103 return; 8104 } 8105 8106 dst_index = extract32(imm5, 1+size, 5); 8107 src_index = extract32(imm4, size, 4); 8108 8109 tmp = tcg_temp_new_i64(); 8110 8111 read_vec_element(s, tmp, rn, src_index, size); 8112 write_vec_element(s, tmp, rd, dst_index, size); 8113 8114 tcg_temp_free_i64(tmp); 8115 8116 /* INS is considered a 128-bit write for SVE. */ 8117 clear_vec_high(s, true, rd); 8118 } 8119 8120 8121 /* INS (General) 8122 * 8123 * 31 21 20 16 15 10 9 5 4 0 8124 * +-----------------------+--------+-------------+------+------+ 8125 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 8126 * +-----------------------+--------+-------------+------+------+ 8127 * 8128 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8129 * index: encoded in imm5<4:size+1> 8130 */ 8131 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 8132 { 8133 int size = ctz32(imm5); 8134 int idx; 8135 8136 if (size > 3) { 8137 unallocated_encoding(s); 8138 return; 8139 } 8140 8141 if (!fp_access_check(s)) { 8142 return; 8143 } 8144 8145 idx = extract32(imm5, 1 + size, 4 - size); 8146 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 8147 8148 /* INS is considered a 128-bit write for SVE. */ 8149 clear_vec_high(s, true, rd); 8150 } 8151 8152 /* 8153 * UMOV (General) 8154 * SMOV (General) 8155 * 8156 * 31 30 29 21 20 16 15 12 10 9 5 4 0 8157 * +---+---+-------------------+--------+-------------+------+------+ 8158 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 8159 * +---+---+-------------------+--------+-------------+------+------+ 8160 * 8161 * U: unsigned when set 8162 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8163 */ 8164 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 8165 int rn, int rd, int imm5) 8166 { 8167 int size = ctz32(imm5); 8168 int element; 8169 TCGv_i64 tcg_rd; 8170 8171 /* Check for UnallocatedEncodings */ 8172 if (is_signed) { 8173 if (size > 2 || (size == 2 && !is_q)) { 8174 unallocated_encoding(s); 8175 return; 8176 } 8177 } else { 8178 if (size > 3 8179 || (size < 3 && is_q) 8180 || (size == 3 && !is_q)) { 8181 unallocated_encoding(s); 8182 return; 8183 } 8184 } 8185 8186 if (!fp_access_check(s)) { 8187 return; 8188 } 8189 8190 element = extract32(imm5, 1+size, 4); 8191 8192 tcg_rd = cpu_reg(s, rd); 8193 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 8194 if (is_signed && !is_q) { 8195 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8196 } 8197 } 8198 8199 /* AdvSIMD copy 8200 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8201 * +---+---+----+-----------------+------+---+------+---+------+------+ 8202 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8203 * +---+---+----+-----------------+------+---+------+---+------+------+ 8204 */ 8205 static void disas_simd_copy(DisasContext *s, uint32_t insn) 8206 { 8207 int rd = extract32(insn, 0, 5); 8208 int rn = extract32(insn, 5, 5); 8209 int imm4 = extract32(insn, 11, 4); 8210 int op = extract32(insn, 29, 1); 8211 int is_q = extract32(insn, 30, 1); 8212 int imm5 = extract32(insn, 16, 5); 8213 8214 if (op) { 8215 if (is_q) { 8216 /* INS (element) */ 8217 handle_simd_inse(s, rd, rn, imm4, imm5); 8218 } else { 8219 unallocated_encoding(s); 8220 } 8221 } else { 8222 switch (imm4) { 8223 case 0: 8224 /* DUP (element - vector) */ 8225 handle_simd_dupe(s, is_q, rd, rn, imm5); 8226 break; 8227 case 1: 8228 /* DUP (general) */ 8229 handle_simd_dupg(s, is_q, rd, rn, imm5); 8230 break; 8231 case 3: 8232 if (is_q) { 8233 /* INS (general) */ 8234 handle_simd_insg(s, rd, rn, imm5); 8235 } else { 8236 unallocated_encoding(s); 8237 } 8238 break; 8239 case 5: 8240 case 7: 8241 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 8242 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 8243 break; 8244 default: 8245 unallocated_encoding(s); 8246 break; 8247 } 8248 } 8249 } 8250 8251 /* AdvSIMD modified immediate 8252 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 8253 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8254 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 8255 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8256 * 8257 * There are a number of operations that can be carried out here: 8258 * MOVI - move (shifted) imm into register 8259 * MVNI - move inverted (shifted) imm into register 8260 * ORR - bitwise OR of (shifted) imm with register 8261 * BIC - bitwise clear of (shifted) imm with register 8262 * With ARMv8.2 we also have: 8263 * FMOV half-precision 8264 */ 8265 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 8266 { 8267 int rd = extract32(insn, 0, 5); 8268 int cmode = extract32(insn, 12, 4); 8269 int o2 = extract32(insn, 11, 1); 8270 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 8271 bool is_neg = extract32(insn, 29, 1); 8272 bool is_q = extract32(insn, 30, 1); 8273 uint64_t imm = 0; 8274 8275 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 8276 /* Check for FMOV (vector, immediate) - half-precision */ 8277 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 8278 unallocated_encoding(s); 8279 return; 8280 } 8281 } 8282 8283 if (!fp_access_check(s)) { 8284 return; 8285 } 8286 8287 if (cmode == 15 && o2 && !is_neg) { 8288 /* FMOV (vector, immediate) - half-precision */ 8289 imm = vfp_expand_imm(MO_16, abcdefgh); 8290 /* now duplicate across the lanes */ 8291 imm = dup_const(MO_16, imm); 8292 } else { 8293 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 8294 } 8295 8296 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 8297 /* MOVI or MVNI, with MVNI negation handled above. */ 8298 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 8299 vec_full_reg_size(s), imm); 8300 } else { 8301 /* ORR or BIC, with BIC negation to AND handled above. */ 8302 if (is_neg) { 8303 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 8304 } else { 8305 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 8306 } 8307 } 8308 } 8309 8310 /* AdvSIMD scalar copy 8311 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8312 * +-----+----+-----------------+------+---+------+---+------+------+ 8313 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8314 * +-----+----+-----------------+------+---+------+---+------+------+ 8315 */ 8316 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 8317 { 8318 int rd = extract32(insn, 0, 5); 8319 int rn = extract32(insn, 5, 5); 8320 int imm4 = extract32(insn, 11, 4); 8321 int imm5 = extract32(insn, 16, 5); 8322 int op = extract32(insn, 29, 1); 8323 8324 if (op != 0 || imm4 != 0) { 8325 unallocated_encoding(s); 8326 return; 8327 } 8328 8329 /* DUP (element, scalar) */ 8330 handle_simd_dupes(s, rd, rn, imm5); 8331 } 8332 8333 /* AdvSIMD scalar pairwise 8334 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 8335 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8336 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 8337 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8338 */ 8339 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 8340 { 8341 int u = extract32(insn, 29, 1); 8342 int size = extract32(insn, 22, 2); 8343 int opcode = extract32(insn, 12, 5); 8344 int rn = extract32(insn, 5, 5); 8345 int rd = extract32(insn, 0, 5); 8346 TCGv_ptr fpst; 8347 8348 /* For some ops (the FP ones), size[1] is part of the encoding. 8349 * For ADDP strictly it is not but size[1] is always 1 for valid 8350 * encodings. 8351 */ 8352 opcode |= (extract32(size, 1, 1) << 5); 8353 8354 switch (opcode) { 8355 case 0x3b: /* ADDP */ 8356 if (u || size != 3) { 8357 unallocated_encoding(s); 8358 return; 8359 } 8360 if (!fp_access_check(s)) { 8361 return; 8362 } 8363 8364 fpst = NULL; 8365 break; 8366 case 0xc: /* FMAXNMP */ 8367 case 0xd: /* FADDP */ 8368 case 0xf: /* FMAXP */ 8369 case 0x2c: /* FMINNMP */ 8370 case 0x2f: /* FMINP */ 8371 /* FP op, size[0] is 32 or 64 bit*/ 8372 if (!u) { 8373 if (!dc_isar_feature(aa64_fp16, s)) { 8374 unallocated_encoding(s); 8375 return; 8376 } else { 8377 size = MO_16; 8378 } 8379 } else { 8380 size = extract32(size, 0, 1) ? MO_64 : MO_32; 8381 } 8382 8383 if (!fp_access_check(s)) { 8384 return; 8385 } 8386 8387 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8388 break; 8389 default: 8390 unallocated_encoding(s); 8391 return; 8392 } 8393 8394 if (size == MO_64) { 8395 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8396 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8397 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8398 8399 read_vec_element(s, tcg_op1, rn, 0, MO_64); 8400 read_vec_element(s, tcg_op2, rn, 1, MO_64); 8401 8402 switch (opcode) { 8403 case 0x3b: /* ADDP */ 8404 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 8405 break; 8406 case 0xc: /* FMAXNMP */ 8407 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8408 break; 8409 case 0xd: /* FADDP */ 8410 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8411 break; 8412 case 0xf: /* FMAXP */ 8413 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8414 break; 8415 case 0x2c: /* FMINNMP */ 8416 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8417 break; 8418 case 0x2f: /* FMINP */ 8419 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8420 break; 8421 default: 8422 g_assert_not_reached(); 8423 } 8424 8425 write_fp_dreg(s, rd, tcg_res); 8426 8427 tcg_temp_free_i64(tcg_op1); 8428 tcg_temp_free_i64(tcg_op2); 8429 tcg_temp_free_i64(tcg_res); 8430 } else { 8431 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8432 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8433 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8434 8435 read_vec_element_i32(s, tcg_op1, rn, 0, size); 8436 read_vec_element_i32(s, tcg_op2, rn, 1, size); 8437 8438 if (size == MO_16) { 8439 switch (opcode) { 8440 case 0xc: /* FMAXNMP */ 8441 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8442 break; 8443 case 0xd: /* FADDP */ 8444 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 8445 break; 8446 case 0xf: /* FMAXP */ 8447 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 8448 break; 8449 case 0x2c: /* FMINNMP */ 8450 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8451 break; 8452 case 0x2f: /* FMINP */ 8453 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 8454 break; 8455 default: 8456 g_assert_not_reached(); 8457 } 8458 } else { 8459 switch (opcode) { 8460 case 0xc: /* FMAXNMP */ 8461 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8462 break; 8463 case 0xd: /* FADDP */ 8464 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8465 break; 8466 case 0xf: /* FMAXP */ 8467 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8468 break; 8469 case 0x2c: /* FMINNMP */ 8470 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8471 break; 8472 case 0x2f: /* FMINP */ 8473 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8474 break; 8475 default: 8476 g_assert_not_reached(); 8477 } 8478 } 8479 8480 write_fp_sreg(s, rd, tcg_res); 8481 8482 tcg_temp_free_i32(tcg_op1); 8483 tcg_temp_free_i32(tcg_op2); 8484 tcg_temp_free_i32(tcg_res); 8485 } 8486 8487 if (fpst) { 8488 tcg_temp_free_ptr(fpst); 8489 } 8490 } 8491 8492 /* 8493 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 8494 * 8495 * This code is handles the common shifting code and is used by both 8496 * the vector and scalar code. 8497 */ 8498 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 8499 TCGv_i64 tcg_rnd, bool accumulate, 8500 bool is_u, int size, int shift) 8501 { 8502 bool extended_result = false; 8503 bool round = tcg_rnd != NULL; 8504 int ext_lshift = 0; 8505 TCGv_i64 tcg_src_hi; 8506 8507 if (round && size == 3) { 8508 extended_result = true; 8509 ext_lshift = 64 - shift; 8510 tcg_src_hi = tcg_temp_new_i64(); 8511 } else if (shift == 64) { 8512 if (!accumulate && is_u) { 8513 /* result is zero */ 8514 tcg_gen_movi_i64(tcg_res, 0); 8515 return; 8516 } 8517 } 8518 8519 /* Deal with the rounding step */ 8520 if (round) { 8521 if (extended_result) { 8522 TCGv_i64 tcg_zero = tcg_constant_i64(0); 8523 if (!is_u) { 8524 /* take care of sign extending tcg_res */ 8525 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 8526 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8527 tcg_src, tcg_src_hi, 8528 tcg_rnd, tcg_zero); 8529 } else { 8530 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8531 tcg_src, tcg_zero, 8532 tcg_rnd, tcg_zero); 8533 } 8534 } else { 8535 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 8536 } 8537 } 8538 8539 /* Now do the shift right */ 8540 if (round && extended_result) { 8541 /* extended case, >64 bit precision required */ 8542 if (ext_lshift == 0) { 8543 /* special case, only high bits matter */ 8544 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 8545 } else { 8546 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8547 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 8548 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 8549 } 8550 } else { 8551 if (is_u) { 8552 if (shift == 64) { 8553 /* essentially shifting in 64 zeros */ 8554 tcg_gen_movi_i64(tcg_src, 0); 8555 } else { 8556 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8557 } 8558 } else { 8559 if (shift == 64) { 8560 /* effectively extending the sign-bit */ 8561 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 8562 } else { 8563 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 8564 } 8565 } 8566 } 8567 8568 if (accumulate) { 8569 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 8570 } else { 8571 tcg_gen_mov_i64(tcg_res, tcg_src); 8572 } 8573 8574 if (extended_result) { 8575 tcg_temp_free_i64(tcg_src_hi); 8576 } 8577 } 8578 8579 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 8580 static void handle_scalar_simd_shri(DisasContext *s, 8581 bool is_u, int immh, int immb, 8582 int opcode, int rn, int rd) 8583 { 8584 const int size = 3; 8585 int immhb = immh << 3 | immb; 8586 int shift = 2 * (8 << size) - immhb; 8587 bool accumulate = false; 8588 bool round = false; 8589 bool insert = false; 8590 TCGv_i64 tcg_rn; 8591 TCGv_i64 tcg_rd; 8592 TCGv_i64 tcg_round; 8593 8594 if (!extract32(immh, 3, 1)) { 8595 unallocated_encoding(s); 8596 return; 8597 } 8598 8599 if (!fp_access_check(s)) { 8600 return; 8601 } 8602 8603 switch (opcode) { 8604 case 0x02: /* SSRA / USRA (accumulate) */ 8605 accumulate = true; 8606 break; 8607 case 0x04: /* SRSHR / URSHR (rounding) */ 8608 round = true; 8609 break; 8610 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 8611 accumulate = round = true; 8612 break; 8613 case 0x08: /* SRI */ 8614 insert = true; 8615 break; 8616 } 8617 8618 if (round) { 8619 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8620 } else { 8621 tcg_round = NULL; 8622 } 8623 8624 tcg_rn = read_fp_dreg(s, rn); 8625 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8626 8627 if (insert) { 8628 /* shift count same as element size is valid but does nothing; 8629 * special case to avoid potential shift by 64. 8630 */ 8631 int esize = 8 << size; 8632 if (shift != esize) { 8633 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8634 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8635 } 8636 } else { 8637 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8638 accumulate, is_u, size, shift); 8639 } 8640 8641 write_fp_dreg(s, rd, tcg_rd); 8642 8643 tcg_temp_free_i64(tcg_rn); 8644 tcg_temp_free_i64(tcg_rd); 8645 } 8646 8647 /* SHL/SLI - Scalar shift left */ 8648 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8649 int immh, int immb, int opcode, 8650 int rn, int rd) 8651 { 8652 int size = 32 - clz32(immh) - 1; 8653 int immhb = immh << 3 | immb; 8654 int shift = immhb - (8 << size); 8655 TCGv_i64 tcg_rn; 8656 TCGv_i64 tcg_rd; 8657 8658 if (!extract32(immh, 3, 1)) { 8659 unallocated_encoding(s); 8660 return; 8661 } 8662 8663 if (!fp_access_check(s)) { 8664 return; 8665 } 8666 8667 tcg_rn = read_fp_dreg(s, rn); 8668 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8669 8670 if (insert) { 8671 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8672 } else { 8673 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8674 } 8675 8676 write_fp_dreg(s, rd, tcg_rd); 8677 8678 tcg_temp_free_i64(tcg_rn); 8679 tcg_temp_free_i64(tcg_rd); 8680 } 8681 8682 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8683 * (signed/unsigned) narrowing */ 8684 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8685 bool is_u_shift, bool is_u_narrow, 8686 int immh, int immb, int opcode, 8687 int rn, int rd) 8688 { 8689 int immhb = immh << 3 | immb; 8690 int size = 32 - clz32(immh) - 1; 8691 int esize = 8 << size; 8692 int shift = (2 * esize) - immhb; 8693 int elements = is_scalar ? 1 : (64 / esize); 8694 bool round = extract32(opcode, 0, 1); 8695 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8696 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8697 TCGv_i32 tcg_rd_narrowed; 8698 TCGv_i64 tcg_final; 8699 8700 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8701 { gen_helper_neon_narrow_sat_s8, 8702 gen_helper_neon_unarrow_sat8 }, 8703 { gen_helper_neon_narrow_sat_s16, 8704 gen_helper_neon_unarrow_sat16 }, 8705 { gen_helper_neon_narrow_sat_s32, 8706 gen_helper_neon_unarrow_sat32 }, 8707 { NULL, NULL }, 8708 }; 8709 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8710 gen_helper_neon_narrow_sat_u8, 8711 gen_helper_neon_narrow_sat_u16, 8712 gen_helper_neon_narrow_sat_u32, 8713 NULL 8714 }; 8715 NeonGenNarrowEnvFn *narrowfn; 8716 8717 int i; 8718 8719 assert(size < 4); 8720 8721 if (extract32(immh, 3, 1)) { 8722 unallocated_encoding(s); 8723 return; 8724 } 8725 8726 if (!fp_access_check(s)) { 8727 return; 8728 } 8729 8730 if (is_u_shift) { 8731 narrowfn = unsigned_narrow_fns[size]; 8732 } else { 8733 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8734 } 8735 8736 tcg_rn = tcg_temp_new_i64(); 8737 tcg_rd = tcg_temp_new_i64(); 8738 tcg_rd_narrowed = tcg_temp_new_i32(); 8739 tcg_final = tcg_const_i64(0); 8740 8741 if (round) { 8742 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8743 } else { 8744 tcg_round = NULL; 8745 } 8746 8747 for (i = 0; i < elements; i++) { 8748 read_vec_element(s, tcg_rn, rn, i, ldop); 8749 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8750 false, is_u_shift, size+1, shift); 8751 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); 8752 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8753 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8754 } 8755 8756 if (!is_q) { 8757 write_vec_element(s, tcg_final, rd, 0, MO_64); 8758 } else { 8759 write_vec_element(s, tcg_final, rd, 1, MO_64); 8760 } 8761 8762 tcg_temp_free_i64(tcg_rn); 8763 tcg_temp_free_i64(tcg_rd); 8764 tcg_temp_free_i32(tcg_rd_narrowed); 8765 tcg_temp_free_i64(tcg_final); 8766 8767 clear_vec_high(s, is_q, rd); 8768 } 8769 8770 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8771 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8772 bool src_unsigned, bool dst_unsigned, 8773 int immh, int immb, int rn, int rd) 8774 { 8775 int immhb = immh << 3 | immb; 8776 int size = 32 - clz32(immh) - 1; 8777 int shift = immhb - (8 << size); 8778 int pass; 8779 8780 assert(immh != 0); 8781 assert(!(scalar && is_q)); 8782 8783 if (!scalar) { 8784 if (!is_q && extract32(immh, 3, 1)) { 8785 unallocated_encoding(s); 8786 return; 8787 } 8788 8789 /* Since we use the variable-shift helpers we must 8790 * replicate the shift count into each element of 8791 * the tcg_shift value. 8792 */ 8793 switch (size) { 8794 case 0: 8795 shift |= shift << 8; 8796 /* fall through */ 8797 case 1: 8798 shift |= shift << 16; 8799 break; 8800 case 2: 8801 case 3: 8802 break; 8803 default: 8804 g_assert_not_reached(); 8805 } 8806 } 8807 8808 if (!fp_access_check(s)) { 8809 return; 8810 } 8811 8812 if (size == 3) { 8813 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8814 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8815 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8816 { NULL, gen_helper_neon_qshl_u64 }, 8817 }; 8818 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8819 int maxpass = is_q ? 2 : 1; 8820 8821 for (pass = 0; pass < maxpass; pass++) { 8822 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8823 8824 read_vec_element(s, tcg_op, rn, pass, MO_64); 8825 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8826 write_vec_element(s, tcg_op, rd, pass, MO_64); 8827 8828 tcg_temp_free_i64(tcg_op); 8829 } 8830 clear_vec_high(s, is_q, rd); 8831 } else { 8832 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8833 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8834 { 8835 { gen_helper_neon_qshl_s8, 8836 gen_helper_neon_qshl_s16, 8837 gen_helper_neon_qshl_s32 }, 8838 { gen_helper_neon_qshlu_s8, 8839 gen_helper_neon_qshlu_s16, 8840 gen_helper_neon_qshlu_s32 } 8841 }, { 8842 { NULL, NULL, NULL }, 8843 { gen_helper_neon_qshl_u8, 8844 gen_helper_neon_qshl_u16, 8845 gen_helper_neon_qshl_u32 } 8846 } 8847 }; 8848 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8849 MemOp memop = scalar ? size : MO_32; 8850 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8851 8852 for (pass = 0; pass < maxpass; pass++) { 8853 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8854 8855 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8856 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8857 if (scalar) { 8858 switch (size) { 8859 case 0: 8860 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8861 break; 8862 case 1: 8863 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8864 break; 8865 case 2: 8866 break; 8867 default: 8868 g_assert_not_reached(); 8869 } 8870 write_fp_sreg(s, rd, tcg_op); 8871 } else { 8872 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8873 } 8874 8875 tcg_temp_free_i32(tcg_op); 8876 } 8877 8878 if (!scalar) { 8879 clear_vec_high(s, is_q, rd); 8880 } 8881 } 8882 } 8883 8884 /* Common vector code for handling integer to FP conversion */ 8885 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8886 int elements, int is_signed, 8887 int fracbits, int size) 8888 { 8889 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8890 TCGv_i32 tcg_shift = NULL; 8891 8892 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8893 int pass; 8894 8895 if (fracbits || size == MO_64) { 8896 tcg_shift = tcg_constant_i32(fracbits); 8897 } 8898 8899 if (size == MO_64) { 8900 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8901 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8902 8903 for (pass = 0; pass < elements; pass++) { 8904 read_vec_element(s, tcg_int64, rn, pass, mop); 8905 8906 if (is_signed) { 8907 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8908 tcg_shift, tcg_fpst); 8909 } else { 8910 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8911 tcg_shift, tcg_fpst); 8912 } 8913 if (elements == 1) { 8914 write_fp_dreg(s, rd, tcg_double); 8915 } else { 8916 write_vec_element(s, tcg_double, rd, pass, MO_64); 8917 } 8918 } 8919 8920 tcg_temp_free_i64(tcg_int64); 8921 tcg_temp_free_i64(tcg_double); 8922 8923 } else { 8924 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8925 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8926 8927 for (pass = 0; pass < elements; pass++) { 8928 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8929 8930 switch (size) { 8931 case MO_32: 8932 if (fracbits) { 8933 if (is_signed) { 8934 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8935 tcg_shift, tcg_fpst); 8936 } else { 8937 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8938 tcg_shift, tcg_fpst); 8939 } 8940 } else { 8941 if (is_signed) { 8942 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8943 } else { 8944 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8945 } 8946 } 8947 break; 8948 case MO_16: 8949 if (fracbits) { 8950 if (is_signed) { 8951 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8952 tcg_shift, tcg_fpst); 8953 } else { 8954 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8955 tcg_shift, tcg_fpst); 8956 } 8957 } else { 8958 if (is_signed) { 8959 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8960 } else { 8961 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8962 } 8963 } 8964 break; 8965 default: 8966 g_assert_not_reached(); 8967 } 8968 8969 if (elements == 1) { 8970 write_fp_sreg(s, rd, tcg_float); 8971 } else { 8972 write_vec_element_i32(s, tcg_float, rd, pass, size); 8973 } 8974 } 8975 8976 tcg_temp_free_i32(tcg_int32); 8977 tcg_temp_free_i32(tcg_float); 8978 } 8979 8980 tcg_temp_free_ptr(tcg_fpst); 8981 8982 clear_vec_high(s, elements << size == 16, rd); 8983 } 8984 8985 /* UCVTF/SCVTF - Integer to FP conversion */ 8986 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8987 bool is_q, bool is_u, 8988 int immh, int immb, int opcode, 8989 int rn, int rd) 8990 { 8991 int size, elements, fracbits; 8992 int immhb = immh << 3 | immb; 8993 8994 if (immh & 8) { 8995 size = MO_64; 8996 if (!is_scalar && !is_q) { 8997 unallocated_encoding(s); 8998 return; 8999 } 9000 } else if (immh & 4) { 9001 size = MO_32; 9002 } else if (immh & 2) { 9003 size = MO_16; 9004 if (!dc_isar_feature(aa64_fp16, s)) { 9005 unallocated_encoding(s); 9006 return; 9007 } 9008 } else { 9009 /* immh == 0 would be a failure of the decode logic */ 9010 g_assert(immh == 1); 9011 unallocated_encoding(s); 9012 return; 9013 } 9014 9015 if (is_scalar) { 9016 elements = 1; 9017 } else { 9018 elements = (8 << is_q) >> size; 9019 } 9020 fracbits = (16 << size) - immhb; 9021 9022 if (!fp_access_check(s)) { 9023 return; 9024 } 9025 9026 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 9027 } 9028 9029 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 9030 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 9031 bool is_q, bool is_u, 9032 int immh, int immb, int rn, int rd) 9033 { 9034 int immhb = immh << 3 | immb; 9035 int pass, size, fracbits; 9036 TCGv_ptr tcg_fpstatus; 9037 TCGv_i32 tcg_rmode, tcg_shift; 9038 9039 if (immh & 0x8) { 9040 size = MO_64; 9041 if (!is_scalar && !is_q) { 9042 unallocated_encoding(s); 9043 return; 9044 } 9045 } else if (immh & 0x4) { 9046 size = MO_32; 9047 } else if (immh & 0x2) { 9048 size = MO_16; 9049 if (!dc_isar_feature(aa64_fp16, s)) { 9050 unallocated_encoding(s); 9051 return; 9052 } 9053 } else { 9054 /* Should have split out AdvSIMD modified immediate earlier. */ 9055 assert(immh == 1); 9056 unallocated_encoding(s); 9057 return; 9058 } 9059 9060 if (!fp_access_check(s)) { 9061 return; 9062 } 9063 9064 assert(!(is_scalar && is_q)); 9065 9066 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); 9067 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9068 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 9069 fracbits = (16 << size) - immhb; 9070 tcg_shift = tcg_constant_i32(fracbits); 9071 9072 if (size == MO_64) { 9073 int maxpass = is_scalar ? 1 : 2; 9074 9075 for (pass = 0; pass < maxpass; pass++) { 9076 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9077 9078 read_vec_element(s, tcg_op, rn, pass, MO_64); 9079 if (is_u) { 9080 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9081 } else { 9082 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9083 } 9084 write_vec_element(s, tcg_op, rd, pass, MO_64); 9085 tcg_temp_free_i64(tcg_op); 9086 } 9087 clear_vec_high(s, is_q, rd); 9088 } else { 9089 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 9090 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 9091 9092 switch (size) { 9093 case MO_16: 9094 if (is_u) { 9095 fn = gen_helper_vfp_touhh; 9096 } else { 9097 fn = gen_helper_vfp_toshh; 9098 } 9099 break; 9100 case MO_32: 9101 if (is_u) { 9102 fn = gen_helper_vfp_touls; 9103 } else { 9104 fn = gen_helper_vfp_tosls; 9105 } 9106 break; 9107 default: 9108 g_assert_not_reached(); 9109 } 9110 9111 for (pass = 0; pass < maxpass; pass++) { 9112 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9113 9114 read_vec_element_i32(s, tcg_op, rn, pass, size); 9115 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9116 if (is_scalar) { 9117 write_fp_sreg(s, rd, tcg_op); 9118 } else { 9119 write_vec_element_i32(s, tcg_op, rd, pass, size); 9120 } 9121 tcg_temp_free_i32(tcg_op); 9122 } 9123 if (!is_scalar) { 9124 clear_vec_high(s, is_q, rd); 9125 } 9126 } 9127 9128 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 9129 tcg_temp_free_ptr(tcg_fpstatus); 9130 tcg_temp_free_i32(tcg_rmode); 9131 } 9132 9133 /* AdvSIMD scalar shift by immediate 9134 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 9135 * +-----+---+-------------+------+------+--------+---+------+------+ 9136 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 9137 * +-----+---+-------------+------+------+--------+---+------+------+ 9138 * 9139 * This is the scalar version so it works on a fixed sized registers 9140 */ 9141 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 9142 { 9143 int rd = extract32(insn, 0, 5); 9144 int rn = extract32(insn, 5, 5); 9145 int opcode = extract32(insn, 11, 5); 9146 int immb = extract32(insn, 16, 3); 9147 int immh = extract32(insn, 19, 4); 9148 bool is_u = extract32(insn, 29, 1); 9149 9150 if (immh == 0) { 9151 unallocated_encoding(s); 9152 return; 9153 } 9154 9155 switch (opcode) { 9156 case 0x08: /* SRI */ 9157 if (!is_u) { 9158 unallocated_encoding(s); 9159 return; 9160 } 9161 /* fall through */ 9162 case 0x00: /* SSHR / USHR */ 9163 case 0x02: /* SSRA / USRA */ 9164 case 0x04: /* SRSHR / URSHR */ 9165 case 0x06: /* SRSRA / URSRA */ 9166 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 9167 break; 9168 case 0x0a: /* SHL / SLI */ 9169 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 9170 break; 9171 case 0x1c: /* SCVTF, UCVTF */ 9172 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 9173 opcode, rn, rd); 9174 break; 9175 case 0x10: /* SQSHRUN, SQSHRUN2 */ 9176 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 9177 if (!is_u) { 9178 unallocated_encoding(s); 9179 return; 9180 } 9181 handle_vec_simd_sqshrn(s, true, false, false, true, 9182 immh, immb, opcode, rn, rd); 9183 break; 9184 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 9185 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 9186 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 9187 immh, immb, opcode, rn, rd); 9188 break; 9189 case 0xc: /* SQSHLU */ 9190 if (!is_u) { 9191 unallocated_encoding(s); 9192 return; 9193 } 9194 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 9195 break; 9196 case 0xe: /* SQSHL, UQSHL */ 9197 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 9198 break; 9199 case 0x1f: /* FCVTZS, FCVTZU */ 9200 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 9201 break; 9202 default: 9203 unallocated_encoding(s); 9204 break; 9205 } 9206 } 9207 9208 /* AdvSIMD scalar three different 9209 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 9210 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 9211 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 9212 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 9213 */ 9214 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 9215 { 9216 bool is_u = extract32(insn, 29, 1); 9217 int size = extract32(insn, 22, 2); 9218 int opcode = extract32(insn, 12, 4); 9219 int rm = extract32(insn, 16, 5); 9220 int rn = extract32(insn, 5, 5); 9221 int rd = extract32(insn, 0, 5); 9222 9223 if (is_u) { 9224 unallocated_encoding(s); 9225 return; 9226 } 9227 9228 switch (opcode) { 9229 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9230 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9231 case 0xd: /* SQDMULL, SQDMULL2 */ 9232 if (size == 0 || size == 3) { 9233 unallocated_encoding(s); 9234 return; 9235 } 9236 break; 9237 default: 9238 unallocated_encoding(s); 9239 return; 9240 } 9241 9242 if (!fp_access_check(s)) { 9243 return; 9244 } 9245 9246 if (size == 2) { 9247 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9248 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9249 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9250 9251 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 9252 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 9253 9254 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 9255 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); 9256 9257 switch (opcode) { 9258 case 0xd: /* SQDMULL, SQDMULL2 */ 9259 break; 9260 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9261 tcg_gen_neg_i64(tcg_res, tcg_res); 9262 /* fall through */ 9263 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9264 read_vec_element(s, tcg_op1, rd, 0, MO_64); 9265 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, 9266 tcg_res, tcg_op1); 9267 break; 9268 default: 9269 g_assert_not_reached(); 9270 } 9271 9272 write_fp_dreg(s, rd, tcg_res); 9273 9274 tcg_temp_free_i64(tcg_op1); 9275 tcg_temp_free_i64(tcg_op2); 9276 tcg_temp_free_i64(tcg_res); 9277 } else { 9278 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 9279 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 9280 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9281 9282 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 9283 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); 9284 9285 switch (opcode) { 9286 case 0xd: /* SQDMULL, SQDMULL2 */ 9287 break; 9288 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9289 gen_helper_neon_negl_u32(tcg_res, tcg_res); 9290 /* fall through */ 9291 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9292 { 9293 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 9294 read_vec_element(s, tcg_op3, rd, 0, MO_32); 9295 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, 9296 tcg_res, tcg_op3); 9297 tcg_temp_free_i64(tcg_op3); 9298 break; 9299 } 9300 default: 9301 g_assert_not_reached(); 9302 } 9303 9304 tcg_gen_ext32u_i64(tcg_res, tcg_res); 9305 write_fp_dreg(s, rd, tcg_res); 9306 9307 tcg_temp_free_i32(tcg_op1); 9308 tcg_temp_free_i32(tcg_op2); 9309 tcg_temp_free_i64(tcg_res); 9310 } 9311 } 9312 9313 static void handle_3same_64(DisasContext *s, int opcode, bool u, 9314 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 9315 { 9316 /* Handle 64x64->64 opcodes which are shared between the scalar 9317 * and vector 3-same groups. We cover every opcode where size == 3 9318 * is valid in either the three-reg-same (integer, not pairwise) 9319 * or scalar-three-reg-same groups. 9320 */ 9321 TCGCond cond; 9322 9323 switch (opcode) { 9324 case 0x1: /* SQADD */ 9325 if (u) { 9326 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9327 } else { 9328 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9329 } 9330 break; 9331 case 0x5: /* SQSUB */ 9332 if (u) { 9333 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9334 } else { 9335 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9336 } 9337 break; 9338 case 0x6: /* CMGT, CMHI */ 9339 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. 9340 * We implement this using setcond (test) and then negating. 9341 */ 9342 cond = u ? TCG_COND_GTU : TCG_COND_GT; 9343 do_cmop: 9344 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 9345 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9346 break; 9347 case 0x7: /* CMGE, CMHS */ 9348 cond = u ? TCG_COND_GEU : TCG_COND_GE; 9349 goto do_cmop; 9350 case 0x11: /* CMTST, CMEQ */ 9351 if (u) { 9352 cond = TCG_COND_EQ; 9353 goto do_cmop; 9354 } 9355 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 9356 break; 9357 case 0x8: /* SSHL, USHL */ 9358 if (u) { 9359 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 9360 } else { 9361 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 9362 } 9363 break; 9364 case 0x9: /* SQSHL, UQSHL */ 9365 if (u) { 9366 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9367 } else { 9368 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9369 } 9370 break; 9371 case 0xa: /* SRSHL, URSHL */ 9372 if (u) { 9373 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 9374 } else { 9375 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 9376 } 9377 break; 9378 case 0xb: /* SQRSHL, UQRSHL */ 9379 if (u) { 9380 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9381 } else { 9382 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9383 } 9384 break; 9385 case 0x10: /* ADD, SUB */ 9386 if (u) { 9387 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 9388 } else { 9389 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 9390 } 9391 break; 9392 default: 9393 g_assert_not_reached(); 9394 } 9395 } 9396 9397 /* Handle the 3-same-operands float operations; shared by the scalar 9398 * and vector encodings. The caller must filter out any encodings 9399 * not allocated for the encoding it is dealing with. 9400 */ 9401 static void handle_3same_float(DisasContext *s, int size, int elements, 9402 int fpopcode, int rd, int rn, int rm) 9403 { 9404 int pass; 9405 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9406 9407 for (pass = 0; pass < elements; pass++) { 9408 if (size) { 9409 /* Double */ 9410 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9411 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9412 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9413 9414 read_vec_element(s, tcg_op1, rn, pass, MO_64); 9415 read_vec_element(s, tcg_op2, rm, pass, MO_64); 9416 9417 switch (fpopcode) { 9418 case 0x39: /* FMLS */ 9419 /* As usual for ARM, separate negation for fused multiply-add */ 9420 gen_helper_vfp_negd(tcg_op1, tcg_op1); 9421 /* fall through */ 9422 case 0x19: /* FMLA */ 9423 read_vec_element(s, tcg_res, rd, pass, MO_64); 9424 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 9425 tcg_res, fpst); 9426 break; 9427 case 0x18: /* FMAXNM */ 9428 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9429 break; 9430 case 0x1a: /* FADD */ 9431 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 9432 break; 9433 case 0x1b: /* FMULX */ 9434 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 9435 break; 9436 case 0x1c: /* FCMEQ */ 9437 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9438 break; 9439 case 0x1e: /* FMAX */ 9440 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 9441 break; 9442 case 0x1f: /* FRECPS */ 9443 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9444 break; 9445 case 0x38: /* FMINNM */ 9446 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9447 break; 9448 case 0x3a: /* FSUB */ 9449 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9450 break; 9451 case 0x3e: /* FMIN */ 9452 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 9453 break; 9454 case 0x3f: /* FRSQRTS */ 9455 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9456 break; 9457 case 0x5b: /* FMUL */ 9458 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 9459 break; 9460 case 0x5c: /* FCMGE */ 9461 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9462 break; 9463 case 0x5d: /* FACGE */ 9464 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9465 break; 9466 case 0x5f: /* FDIV */ 9467 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 9468 break; 9469 case 0x7a: /* FABD */ 9470 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9471 gen_helper_vfp_absd(tcg_res, tcg_res); 9472 break; 9473 case 0x7c: /* FCMGT */ 9474 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9475 break; 9476 case 0x7d: /* FACGT */ 9477 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9478 break; 9479 default: 9480 g_assert_not_reached(); 9481 } 9482 9483 write_vec_element(s, tcg_res, rd, pass, MO_64); 9484 9485 tcg_temp_free_i64(tcg_res); 9486 tcg_temp_free_i64(tcg_op1); 9487 tcg_temp_free_i64(tcg_op2); 9488 } else { 9489 /* Single */ 9490 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 9491 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 9492 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9493 9494 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 9495 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 9496 9497 switch (fpopcode) { 9498 case 0x39: /* FMLS */ 9499 /* As usual for ARM, separate negation for fused multiply-add */ 9500 gen_helper_vfp_negs(tcg_op1, tcg_op1); 9501 /* fall through */ 9502 case 0x19: /* FMLA */ 9503 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9504 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 9505 tcg_res, fpst); 9506 break; 9507 case 0x1a: /* FADD */ 9508 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 9509 break; 9510 case 0x1b: /* FMULX */ 9511 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 9512 break; 9513 case 0x1c: /* FCMEQ */ 9514 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9515 break; 9516 case 0x1e: /* FMAX */ 9517 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 9518 break; 9519 case 0x1f: /* FRECPS */ 9520 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9521 break; 9522 case 0x18: /* FMAXNM */ 9523 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 9524 break; 9525 case 0x38: /* FMINNM */ 9526 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 9527 break; 9528 case 0x3a: /* FSUB */ 9529 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9530 break; 9531 case 0x3e: /* FMIN */ 9532 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 9533 break; 9534 case 0x3f: /* FRSQRTS */ 9535 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9536 break; 9537 case 0x5b: /* FMUL */ 9538 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 9539 break; 9540 case 0x5c: /* FCMGE */ 9541 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9542 break; 9543 case 0x5d: /* FACGE */ 9544 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9545 break; 9546 case 0x5f: /* FDIV */ 9547 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 9548 break; 9549 case 0x7a: /* FABD */ 9550 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9551 gen_helper_vfp_abss(tcg_res, tcg_res); 9552 break; 9553 case 0x7c: /* FCMGT */ 9554 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9555 break; 9556 case 0x7d: /* FACGT */ 9557 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9558 break; 9559 default: 9560 g_assert_not_reached(); 9561 } 9562 9563 if (elements == 1) { 9564 /* scalar single so clear high part */ 9565 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9566 9567 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 9568 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 9569 tcg_temp_free_i64(tcg_tmp); 9570 } else { 9571 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9572 } 9573 9574 tcg_temp_free_i32(tcg_res); 9575 tcg_temp_free_i32(tcg_op1); 9576 tcg_temp_free_i32(tcg_op2); 9577 } 9578 } 9579 9580 tcg_temp_free_ptr(fpst); 9581 9582 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 9583 } 9584 9585 /* AdvSIMD scalar three same 9586 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 9587 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9588 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 9589 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9590 */ 9591 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 9592 { 9593 int rd = extract32(insn, 0, 5); 9594 int rn = extract32(insn, 5, 5); 9595 int opcode = extract32(insn, 11, 5); 9596 int rm = extract32(insn, 16, 5); 9597 int size = extract32(insn, 22, 2); 9598 bool u = extract32(insn, 29, 1); 9599 TCGv_i64 tcg_rd; 9600 9601 if (opcode >= 0x18) { 9602 /* Floating point: U, size[1] and opcode indicate operation */ 9603 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 9604 switch (fpopcode) { 9605 case 0x1b: /* FMULX */ 9606 case 0x1f: /* FRECPS */ 9607 case 0x3f: /* FRSQRTS */ 9608 case 0x5d: /* FACGE */ 9609 case 0x7d: /* FACGT */ 9610 case 0x1c: /* FCMEQ */ 9611 case 0x5c: /* FCMGE */ 9612 case 0x7c: /* FCMGT */ 9613 case 0x7a: /* FABD */ 9614 break; 9615 default: 9616 unallocated_encoding(s); 9617 return; 9618 } 9619 9620 if (!fp_access_check(s)) { 9621 return; 9622 } 9623 9624 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 9625 return; 9626 } 9627 9628 switch (opcode) { 9629 case 0x1: /* SQADD, UQADD */ 9630 case 0x5: /* SQSUB, UQSUB */ 9631 case 0x9: /* SQSHL, UQSHL */ 9632 case 0xb: /* SQRSHL, UQRSHL */ 9633 break; 9634 case 0x8: /* SSHL, USHL */ 9635 case 0xa: /* SRSHL, URSHL */ 9636 case 0x6: /* CMGT, CMHI */ 9637 case 0x7: /* CMGE, CMHS */ 9638 case 0x11: /* CMTST, CMEQ */ 9639 case 0x10: /* ADD, SUB (vector) */ 9640 if (size != 3) { 9641 unallocated_encoding(s); 9642 return; 9643 } 9644 break; 9645 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 9646 if (size != 1 && size != 2) { 9647 unallocated_encoding(s); 9648 return; 9649 } 9650 break; 9651 default: 9652 unallocated_encoding(s); 9653 return; 9654 } 9655 9656 if (!fp_access_check(s)) { 9657 return; 9658 } 9659 9660 tcg_rd = tcg_temp_new_i64(); 9661 9662 if (size == 3) { 9663 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9664 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 9665 9666 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 9667 tcg_temp_free_i64(tcg_rn); 9668 tcg_temp_free_i64(tcg_rm); 9669 } else { 9670 /* Do a single operation on the lowest element in the vector. 9671 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 9672 * no side effects for all these operations. 9673 * OPTME: special-purpose helpers would avoid doing some 9674 * unnecessary work in the helper for the 8 and 16 bit cases. 9675 */ 9676 NeonGenTwoOpEnvFn *genenvfn; 9677 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9678 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9679 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9680 9681 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9682 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9683 9684 switch (opcode) { 9685 case 0x1: /* SQADD, UQADD */ 9686 { 9687 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9688 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9689 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9690 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9691 }; 9692 genenvfn = fns[size][u]; 9693 break; 9694 } 9695 case 0x5: /* SQSUB, UQSUB */ 9696 { 9697 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9698 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9699 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9700 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9701 }; 9702 genenvfn = fns[size][u]; 9703 break; 9704 } 9705 case 0x9: /* SQSHL, UQSHL */ 9706 { 9707 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9708 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9709 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9710 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9711 }; 9712 genenvfn = fns[size][u]; 9713 break; 9714 } 9715 case 0xb: /* SQRSHL, UQRSHL */ 9716 { 9717 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9718 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9719 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9720 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9721 }; 9722 genenvfn = fns[size][u]; 9723 break; 9724 } 9725 case 0x16: /* SQDMULH, SQRDMULH */ 9726 { 9727 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9728 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9729 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9730 }; 9731 assert(size == 1 || size == 2); 9732 genenvfn = fns[size - 1][u]; 9733 break; 9734 } 9735 default: 9736 g_assert_not_reached(); 9737 } 9738 9739 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); 9740 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9741 tcg_temp_free_i32(tcg_rd32); 9742 tcg_temp_free_i32(tcg_rn); 9743 tcg_temp_free_i32(tcg_rm); 9744 } 9745 9746 write_fp_dreg(s, rd, tcg_rd); 9747 9748 tcg_temp_free_i64(tcg_rd); 9749 } 9750 9751 /* AdvSIMD scalar three same FP16 9752 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9753 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9754 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9755 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9756 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9757 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9758 */ 9759 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9760 uint32_t insn) 9761 { 9762 int rd = extract32(insn, 0, 5); 9763 int rn = extract32(insn, 5, 5); 9764 int opcode = extract32(insn, 11, 3); 9765 int rm = extract32(insn, 16, 5); 9766 bool u = extract32(insn, 29, 1); 9767 bool a = extract32(insn, 23, 1); 9768 int fpopcode = opcode | (a << 3) | (u << 4); 9769 TCGv_ptr fpst; 9770 TCGv_i32 tcg_op1; 9771 TCGv_i32 tcg_op2; 9772 TCGv_i32 tcg_res; 9773 9774 switch (fpopcode) { 9775 case 0x03: /* FMULX */ 9776 case 0x04: /* FCMEQ (reg) */ 9777 case 0x07: /* FRECPS */ 9778 case 0x0f: /* FRSQRTS */ 9779 case 0x14: /* FCMGE (reg) */ 9780 case 0x15: /* FACGE */ 9781 case 0x1a: /* FABD */ 9782 case 0x1c: /* FCMGT (reg) */ 9783 case 0x1d: /* FACGT */ 9784 break; 9785 default: 9786 unallocated_encoding(s); 9787 return; 9788 } 9789 9790 if (!dc_isar_feature(aa64_fp16, s)) { 9791 unallocated_encoding(s); 9792 } 9793 9794 if (!fp_access_check(s)) { 9795 return; 9796 } 9797 9798 fpst = fpstatus_ptr(FPST_FPCR_F16); 9799 9800 tcg_op1 = read_fp_hreg(s, rn); 9801 tcg_op2 = read_fp_hreg(s, rm); 9802 tcg_res = tcg_temp_new_i32(); 9803 9804 switch (fpopcode) { 9805 case 0x03: /* FMULX */ 9806 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9807 break; 9808 case 0x04: /* FCMEQ (reg) */ 9809 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9810 break; 9811 case 0x07: /* FRECPS */ 9812 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9813 break; 9814 case 0x0f: /* FRSQRTS */ 9815 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9816 break; 9817 case 0x14: /* FCMGE (reg) */ 9818 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9819 break; 9820 case 0x15: /* FACGE */ 9821 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9822 break; 9823 case 0x1a: /* FABD */ 9824 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9825 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9826 break; 9827 case 0x1c: /* FCMGT (reg) */ 9828 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9829 break; 9830 case 0x1d: /* FACGT */ 9831 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9832 break; 9833 default: 9834 g_assert_not_reached(); 9835 } 9836 9837 write_fp_sreg(s, rd, tcg_res); 9838 9839 9840 tcg_temp_free_i32(tcg_res); 9841 tcg_temp_free_i32(tcg_op1); 9842 tcg_temp_free_i32(tcg_op2); 9843 tcg_temp_free_ptr(fpst); 9844 } 9845 9846 /* AdvSIMD scalar three same extra 9847 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9848 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9849 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9850 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9851 */ 9852 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9853 uint32_t insn) 9854 { 9855 int rd = extract32(insn, 0, 5); 9856 int rn = extract32(insn, 5, 5); 9857 int opcode = extract32(insn, 11, 4); 9858 int rm = extract32(insn, 16, 5); 9859 int size = extract32(insn, 22, 2); 9860 bool u = extract32(insn, 29, 1); 9861 TCGv_i32 ele1, ele2, ele3; 9862 TCGv_i64 res; 9863 bool feature; 9864 9865 switch (u * 16 + opcode) { 9866 case 0x10: /* SQRDMLAH (vector) */ 9867 case 0x11: /* SQRDMLSH (vector) */ 9868 if (size != 1 && size != 2) { 9869 unallocated_encoding(s); 9870 return; 9871 } 9872 feature = dc_isar_feature(aa64_rdm, s); 9873 break; 9874 default: 9875 unallocated_encoding(s); 9876 return; 9877 } 9878 if (!feature) { 9879 unallocated_encoding(s); 9880 return; 9881 } 9882 if (!fp_access_check(s)) { 9883 return; 9884 } 9885 9886 /* Do a single operation on the lowest element in the vector. 9887 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9888 * with no side effects for all these operations. 9889 * OPTME: special-purpose helpers would avoid doing some 9890 * unnecessary work in the helper for the 16 bit cases. 9891 */ 9892 ele1 = tcg_temp_new_i32(); 9893 ele2 = tcg_temp_new_i32(); 9894 ele3 = tcg_temp_new_i32(); 9895 9896 read_vec_element_i32(s, ele1, rn, 0, size); 9897 read_vec_element_i32(s, ele2, rm, 0, size); 9898 read_vec_element_i32(s, ele3, rd, 0, size); 9899 9900 switch (opcode) { 9901 case 0x0: /* SQRDMLAH */ 9902 if (size == 1) { 9903 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3); 9904 } else { 9905 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3); 9906 } 9907 break; 9908 case 0x1: /* SQRDMLSH */ 9909 if (size == 1) { 9910 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3); 9911 } else { 9912 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3); 9913 } 9914 break; 9915 default: 9916 g_assert_not_reached(); 9917 } 9918 tcg_temp_free_i32(ele1); 9919 tcg_temp_free_i32(ele2); 9920 9921 res = tcg_temp_new_i64(); 9922 tcg_gen_extu_i32_i64(res, ele3); 9923 tcg_temp_free_i32(ele3); 9924 9925 write_fp_dreg(s, rd, res); 9926 tcg_temp_free_i64(res); 9927 } 9928 9929 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9930 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9931 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9932 { 9933 /* Handle 64->64 opcodes which are shared between the scalar and 9934 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9935 * is valid in either group and also the double-precision fp ops. 9936 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9937 * requires them. 9938 */ 9939 TCGCond cond; 9940 9941 switch (opcode) { 9942 case 0x4: /* CLS, CLZ */ 9943 if (u) { 9944 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9945 } else { 9946 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9947 } 9948 break; 9949 case 0x5: /* NOT */ 9950 /* This opcode is shared with CNT and RBIT but we have earlier 9951 * enforced that size == 3 if and only if this is the NOT insn. 9952 */ 9953 tcg_gen_not_i64(tcg_rd, tcg_rn); 9954 break; 9955 case 0x7: /* SQABS, SQNEG */ 9956 if (u) { 9957 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); 9958 } else { 9959 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); 9960 } 9961 break; 9962 case 0xa: /* CMLT */ 9963 /* 64 bit integer comparison against zero, result is 9964 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and 9965 * subtracting 1. 9966 */ 9967 cond = TCG_COND_LT; 9968 do_cmop: 9969 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); 9970 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9971 break; 9972 case 0x8: /* CMGT, CMGE */ 9973 cond = u ? TCG_COND_GE : TCG_COND_GT; 9974 goto do_cmop; 9975 case 0x9: /* CMEQ, CMLE */ 9976 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9977 goto do_cmop; 9978 case 0xb: /* ABS, NEG */ 9979 if (u) { 9980 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9981 } else { 9982 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9983 } 9984 break; 9985 case 0x2f: /* FABS */ 9986 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9987 break; 9988 case 0x6f: /* FNEG */ 9989 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9990 break; 9991 case 0x7f: /* FSQRT */ 9992 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); 9993 break; 9994 case 0x1a: /* FCVTNS */ 9995 case 0x1b: /* FCVTMS */ 9996 case 0x1c: /* FCVTAS */ 9997 case 0x3a: /* FCVTPS */ 9998 case 0x3b: /* FCVTZS */ 9999 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10000 break; 10001 case 0x5a: /* FCVTNU */ 10002 case 0x5b: /* FCVTMU */ 10003 case 0x5c: /* FCVTAU */ 10004 case 0x7a: /* FCVTPU */ 10005 case 0x7b: /* FCVTZU */ 10006 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 10007 break; 10008 case 0x18: /* FRINTN */ 10009 case 0x19: /* FRINTM */ 10010 case 0x38: /* FRINTP */ 10011 case 0x39: /* FRINTZ */ 10012 case 0x58: /* FRINTA */ 10013 case 0x79: /* FRINTI */ 10014 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 10015 break; 10016 case 0x59: /* FRINTX */ 10017 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 10018 break; 10019 case 0x1e: /* FRINT32Z */ 10020 case 0x5e: /* FRINT32X */ 10021 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 10022 break; 10023 case 0x1f: /* FRINT64Z */ 10024 case 0x5f: /* FRINT64X */ 10025 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 10026 break; 10027 default: 10028 g_assert_not_reached(); 10029 } 10030 } 10031 10032 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 10033 bool is_scalar, bool is_u, bool is_q, 10034 int size, int rn, int rd) 10035 { 10036 bool is_double = (size == MO_64); 10037 TCGv_ptr fpst; 10038 10039 if (!fp_access_check(s)) { 10040 return; 10041 } 10042 10043 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 10044 10045 if (is_double) { 10046 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10047 TCGv_i64 tcg_zero = tcg_constant_i64(0); 10048 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10049 NeonGenTwoDoubleOpFn *genfn; 10050 bool swap = false; 10051 int pass; 10052 10053 switch (opcode) { 10054 case 0x2e: /* FCMLT (zero) */ 10055 swap = true; 10056 /* fallthrough */ 10057 case 0x2c: /* FCMGT (zero) */ 10058 genfn = gen_helper_neon_cgt_f64; 10059 break; 10060 case 0x2d: /* FCMEQ (zero) */ 10061 genfn = gen_helper_neon_ceq_f64; 10062 break; 10063 case 0x6d: /* FCMLE (zero) */ 10064 swap = true; 10065 /* fall through */ 10066 case 0x6c: /* FCMGE (zero) */ 10067 genfn = gen_helper_neon_cge_f64; 10068 break; 10069 default: 10070 g_assert_not_reached(); 10071 } 10072 10073 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10074 read_vec_element(s, tcg_op, rn, pass, MO_64); 10075 if (swap) { 10076 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10077 } else { 10078 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10079 } 10080 write_vec_element(s, tcg_res, rd, pass, MO_64); 10081 } 10082 tcg_temp_free_i64(tcg_res); 10083 tcg_temp_free_i64(tcg_op); 10084 10085 clear_vec_high(s, !is_scalar, rd); 10086 } else { 10087 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10088 TCGv_i32 tcg_zero = tcg_constant_i32(0); 10089 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10090 NeonGenTwoSingleOpFn *genfn; 10091 bool swap = false; 10092 int pass, maxpasses; 10093 10094 if (size == MO_16) { 10095 switch (opcode) { 10096 case 0x2e: /* FCMLT (zero) */ 10097 swap = true; 10098 /* fall through */ 10099 case 0x2c: /* FCMGT (zero) */ 10100 genfn = gen_helper_advsimd_cgt_f16; 10101 break; 10102 case 0x2d: /* FCMEQ (zero) */ 10103 genfn = gen_helper_advsimd_ceq_f16; 10104 break; 10105 case 0x6d: /* FCMLE (zero) */ 10106 swap = true; 10107 /* fall through */ 10108 case 0x6c: /* FCMGE (zero) */ 10109 genfn = gen_helper_advsimd_cge_f16; 10110 break; 10111 default: 10112 g_assert_not_reached(); 10113 } 10114 } else { 10115 switch (opcode) { 10116 case 0x2e: /* FCMLT (zero) */ 10117 swap = true; 10118 /* fall through */ 10119 case 0x2c: /* FCMGT (zero) */ 10120 genfn = gen_helper_neon_cgt_f32; 10121 break; 10122 case 0x2d: /* FCMEQ (zero) */ 10123 genfn = gen_helper_neon_ceq_f32; 10124 break; 10125 case 0x6d: /* FCMLE (zero) */ 10126 swap = true; 10127 /* fall through */ 10128 case 0x6c: /* FCMGE (zero) */ 10129 genfn = gen_helper_neon_cge_f32; 10130 break; 10131 default: 10132 g_assert_not_reached(); 10133 } 10134 } 10135 10136 if (is_scalar) { 10137 maxpasses = 1; 10138 } else { 10139 int vector_size = 8 << is_q; 10140 maxpasses = vector_size >> size; 10141 } 10142 10143 for (pass = 0; pass < maxpasses; pass++) { 10144 read_vec_element_i32(s, tcg_op, rn, pass, size); 10145 if (swap) { 10146 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10147 } else { 10148 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10149 } 10150 if (is_scalar) { 10151 write_fp_sreg(s, rd, tcg_res); 10152 } else { 10153 write_vec_element_i32(s, tcg_res, rd, pass, size); 10154 } 10155 } 10156 tcg_temp_free_i32(tcg_res); 10157 tcg_temp_free_i32(tcg_op); 10158 if (!is_scalar) { 10159 clear_vec_high(s, is_q, rd); 10160 } 10161 } 10162 10163 tcg_temp_free_ptr(fpst); 10164 } 10165 10166 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 10167 bool is_scalar, bool is_u, bool is_q, 10168 int size, int rn, int rd) 10169 { 10170 bool is_double = (size == 3); 10171 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10172 10173 if (is_double) { 10174 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10175 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10176 int pass; 10177 10178 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10179 read_vec_element(s, tcg_op, rn, pass, MO_64); 10180 switch (opcode) { 10181 case 0x3d: /* FRECPE */ 10182 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 10183 break; 10184 case 0x3f: /* FRECPX */ 10185 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 10186 break; 10187 case 0x7d: /* FRSQRTE */ 10188 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 10189 break; 10190 default: 10191 g_assert_not_reached(); 10192 } 10193 write_vec_element(s, tcg_res, rd, pass, MO_64); 10194 } 10195 tcg_temp_free_i64(tcg_res); 10196 tcg_temp_free_i64(tcg_op); 10197 clear_vec_high(s, !is_scalar, rd); 10198 } else { 10199 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10200 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10201 int pass, maxpasses; 10202 10203 if (is_scalar) { 10204 maxpasses = 1; 10205 } else { 10206 maxpasses = is_q ? 4 : 2; 10207 } 10208 10209 for (pass = 0; pass < maxpasses; pass++) { 10210 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 10211 10212 switch (opcode) { 10213 case 0x3c: /* URECPE */ 10214 gen_helper_recpe_u32(tcg_res, tcg_op); 10215 break; 10216 case 0x3d: /* FRECPE */ 10217 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 10218 break; 10219 case 0x3f: /* FRECPX */ 10220 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 10221 break; 10222 case 0x7d: /* FRSQRTE */ 10223 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 10224 break; 10225 default: 10226 g_assert_not_reached(); 10227 } 10228 10229 if (is_scalar) { 10230 write_fp_sreg(s, rd, tcg_res); 10231 } else { 10232 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 10233 } 10234 } 10235 tcg_temp_free_i32(tcg_res); 10236 tcg_temp_free_i32(tcg_op); 10237 if (!is_scalar) { 10238 clear_vec_high(s, is_q, rd); 10239 } 10240 } 10241 tcg_temp_free_ptr(fpst); 10242 } 10243 10244 static void handle_2misc_narrow(DisasContext *s, bool scalar, 10245 int opcode, bool u, bool is_q, 10246 int size, int rn, int rd) 10247 { 10248 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 10249 * in the source becomes a size element in the destination). 10250 */ 10251 int pass; 10252 TCGv_i32 tcg_res[2]; 10253 int destelt = is_q ? 2 : 0; 10254 int passes = scalar ? 1 : 2; 10255 10256 if (scalar) { 10257 tcg_res[1] = tcg_constant_i32(0); 10258 } 10259 10260 for (pass = 0; pass < passes; pass++) { 10261 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10262 NeonGenNarrowFn *genfn = NULL; 10263 NeonGenNarrowEnvFn *genenvfn = NULL; 10264 10265 if (scalar) { 10266 read_vec_element(s, tcg_op, rn, pass, size + 1); 10267 } else { 10268 read_vec_element(s, tcg_op, rn, pass, MO_64); 10269 } 10270 tcg_res[pass] = tcg_temp_new_i32(); 10271 10272 switch (opcode) { 10273 case 0x12: /* XTN, SQXTUN */ 10274 { 10275 static NeonGenNarrowFn * const xtnfns[3] = { 10276 gen_helper_neon_narrow_u8, 10277 gen_helper_neon_narrow_u16, 10278 tcg_gen_extrl_i64_i32, 10279 }; 10280 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 10281 gen_helper_neon_unarrow_sat8, 10282 gen_helper_neon_unarrow_sat16, 10283 gen_helper_neon_unarrow_sat32, 10284 }; 10285 if (u) { 10286 genenvfn = sqxtunfns[size]; 10287 } else { 10288 genfn = xtnfns[size]; 10289 } 10290 break; 10291 } 10292 case 0x14: /* SQXTN, UQXTN */ 10293 { 10294 static NeonGenNarrowEnvFn * const fns[3][2] = { 10295 { gen_helper_neon_narrow_sat_s8, 10296 gen_helper_neon_narrow_sat_u8 }, 10297 { gen_helper_neon_narrow_sat_s16, 10298 gen_helper_neon_narrow_sat_u16 }, 10299 { gen_helper_neon_narrow_sat_s32, 10300 gen_helper_neon_narrow_sat_u32 }, 10301 }; 10302 genenvfn = fns[size][u]; 10303 break; 10304 } 10305 case 0x16: /* FCVTN, FCVTN2 */ 10306 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 10307 if (size == 2) { 10308 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); 10309 } else { 10310 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10311 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10312 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10313 TCGv_i32 ahp = get_ahp_flag(); 10314 10315 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 10316 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10317 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10318 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 10319 tcg_temp_free_i32(tcg_lo); 10320 tcg_temp_free_i32(tcg_hi); 10321 tcg_temp_free_ptr(fpst); 10322 tcg_temp_free_i32(ahp); 10323 } 10324 break; 10325 case 0x36: /* BFCVTN, BFCVTN2 */ 10326 { 10327 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10328 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 10329 tcg_temp_free_ptr(fpst); 10330 } 10331 break; 10332 case 0x56: /* FCVTXN, FCVTXN2 */ 10333 /* 64 bit to 32 bit float conversion 10334 * with von Neumann rounding (round to odd) 10335 */ 10336 assert(size == 2); 10337 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); 10338 break; 10339 default: 10340 g_assert_not_reached(); 10341 } 10342 10343 if (genfn) { 10344 genfn(tcg_res[pass], tcg_op); 10345 } else if (genenvfn) { 10346 genenvfn(tcg_res[pass], cpu_env, tcg_op); 10347 } 10348 10349 tcg_temp_free_i64(tcg_op); 10350 } 10351 10352 for (pass = 0; pass < 2; pass++) { 10353 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 10354 tcg_temp_free_i32(tcg_res[pass]); 10355 } 10356 clear_vec_high(s, is_q, rd); 10357 } 10358 10359 /* Remaining saturating accumulating ops */ 10360 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 10361 bool is_q, int size, int rn, int rd) 10362 { 10363 bool is_double = (size == 3); 10364 10365 if (is_double) { 10366 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10367 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10368 int pass; 10369 10370 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10371 read_vec_element(s, tcg_rn, rn, pass, MO_64); 10372 read_vec_element(s, tcg_rd, rd, pass, MO_64); 10373 10374 if (is_u) { /* USQADD */ 10375 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10376 } else { /* SUQADD */ 10377 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10378 } 10379 write_vec_element(s, tcg_rd, rd, pass, MO_64); 10380 } 10381 tcg_temp_free_i64(tcg_rd); 10382 tcg_temp_free_i64(tcg_rn); 10383 clear_vec_high(s, !is_scalar, rd); 10384 } else { 10385 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10386 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10387 int pass, maxpasses; 10388 10389 if (is_scalar) { 10390 maxpasses = 1; 10391 } else { 10392 maxpasses = is_q ? 4 : 2; 10393 } 10394 10395 for (pass = 0; pass < maxpasses; pass++) { 10396 if (is_scalar) { 10397 read_vec_element_i32(s, tcg_rn, rn, pass, size); 10398 read_vec_element_i32(s, tcg_rd, rd, pass, size); 10399 } else { 10400 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 10401 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10402 } 10403 10404 if (is_u) { /* USQADD */ 10405 switch (size) { 10406 case 0: 10407 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10408 break; 10409 case 1: 10410 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10411 break; 10412 case 2: 10413 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10414 break; 10415 default: 10416 g_assert_not_reached(); 10417 } 10418 } else { /* SUQADD */ 10419 switch (size) { 10420 case 0: 10421 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10422 break; 10423 case 1: 10424 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10425 break; 10426 case 2: 10427 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10428 break; 10429 default: 10430 g_assert_not_reached(); 10431 } 10432 } 10433 10434 if (is_scalar) { 10435 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 10436 } 10437 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10438 } 10439 tcg_temp_free_i32(tcg_rd); 10440 tcg_temp_free_i32(tcg_rn); 10441 clear_vec_high(s, is_q, rd); 10442 } 10443 } 10444 10445 /* AdvSIMD scalar two reg misc 10446 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10447 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10448 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10449 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10450 */ 10451 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10452 { 10453 int rd = extract32(insn, 0, 5); 10454 int rn = extract32(insn, 5, 5); 10455 int opcode = extract32(insn, 12, 5); 10456 int size = extract32(insn, 22, 2); 10457 bool u = extract32(insn, 29, 1); 10458 bool is_fcvt = false; 10459 int rmode; 10460 TCGv_i32 tcg_rmode; 10461 TCGv_ptr tcg_fpstatus; 10462 10463 switch (opcode) { 10464 case 0x3: /* USQADD / SUQADD*/ 10465 if (!fp_access_check(s)) { 10466 return; 10467 } 10468 handle_2misc_satacc(s, true, u, false, size, rn, rd); 10469 return; 10470 case 0x7: /* SQABS / SQNEG */ 10471 break; 10472 case 0xa: /* CMLT */ 10473 if (u) { 10474 unallocated_encoding(s); 10475 return; 10476 } 10477 /* fall through */ 10478 case 0x8: /* CMGT, CMGE */ 10479 case 0x9: /* CMEQ, CMLE */ 10480 case 0xb: /* ABS, NEG */ 10481 if (size != 3) { 10482 unallocated_encoding(s); 10483 return; 10484 } 10485 break; 10486 case 0x12: /* SQXTUN */ 10487 if (!u) { 10488 unallocated_encoding(s); 10489 return; 10490 } 10491 /* fall through */ 10492 case 0x14: /* SQXTN, UQXTN */ 10493 if (size == 3) { 10494 unallocated_encoding(s); 10495 return; 10496 } 10497 if (!fp_access_check(s)) { 10498 return; 10499 } 10500 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10501 return; 10502 case 0xc ... 0xf: 10503 case 0x16 ... 0x1d: 10504 case 0x1f: 10505 /* Floating point: U, size[1] and opcode indicate operation; 10506 * size[0] indicates single or double precision. 10507 */ 10508 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10509 size = extract32(size, 0, 1) ? 3 : 2; 10510 switch (opcode) { 10511 case 0x2c: /* FCMGT (zero) */ 10512 case 0x2d: /* FCMEQ (zero) */ 10513 case 0x2e: /* FCMLT (zero) */ 10514 case 0x6c: /* FCMGE (zero) */ 10515 case 0x6d: /* FCMLE (zero) */ 10516 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10517 return; 10518 case 0x1d: /* SCVTF */ 10519 case 0x5d: /* UCVTF */ 10520 { 10521 bool is_signed = (opcode == 0x1d); 10522 if (!fp_access_check(s)) { 10523 return; 10524 } 10525 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10526 return; 10527 } 10528 case 0x3d: /* FRECPE */ 10529 case 0x3f: /* FRECPX */ 10530 case 0x7d: /* FRSQRTE */ 10531 if (!fp_access_check(s)) { 10532 return; 10533 } 10534 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10535 return; 10536 case 0x1a: /* FCVTNS */ 10537 case 0x1b: /* FCVTMS */ 10538 case 0x3a: /* FCVTPS */ 10539 case 0x3b: /* FCVTZS */ 10540 case 0x5a: /* FCVTNU */ 10541 case 0x5b: /* FCVTMU */ 10542 case 0x7a: /* FCVTPU */ 10543 case 0x7b: /* FCVTZU */ 10544 is_fcvt = true; 10545 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10546 break; 10547 case 0x1c: /* FCVTAS */ 10548 case 0x5c: /* FCVTAU */ 10549 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10550 is_fcvt = true; 10551 rmode = FPROUNDING_TIEAWAY; 10552 break; 10553 case 0x56: /* FCVTXN, FCVTXN2 */ 10554 if (size == 2) { 10555 unallocated_encoding(s); 10556 return; 10557 } 10558 if (!fp_access_check(s)) { 10559 return; 10560 } 10561 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10562 return; 10563 default: 10564 unallocated_encoding(s); 10565 return; 10566 } 10567 break; 10568 default: 10569 unallocated_encoding(s); 10570 return; 10571 } 10572 10573 if (!fp_access_check(s)) { 10574 return; 10575 } 10576 10577 if (is_fcvt) { 10578 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 10579 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10580 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 10581 } else { 10582 tcg_rmode = NULL; 10583 tcg_fpstatus = NULL; 10584 } 10585 10586 if (size == 3) { 10587 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10588 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10589 10590 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10591 write_fp_dreg(s, rd, tcg_rd); 10592 tcg_temp_free_i64(tcg_rd); 10593 tcg_temp_free_i64(tcg_rn); 10594 } else { 10595 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10596 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10597 10598 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10599 10600 switch (opcode) { 10601 case 0x7: /* SQABS, SQNEG */ 10602 { 10603 NeonGenOneOpEnvFn *genfn; 10604 static NeonGenOneOpEnvFn * const fns[3][2] = { 10605 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10606 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10607 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10608 }; 10609 genfn = fns[size][u]; 10610 genfn(tcg_rd, cpu_env, tcg_rn); 10611 break; 10612 } 10613 case 0x1a: /* FCVTNS */ 10614 case 0x1b: /* FCVTMS */ 10615 case 0x1c: /* FCVTAS */ 10616 case 0x3a: /* FCVTPS */ 10617 case 0x3b: /* FCVTZS */ 10618 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10619 tcg_fpstatus); 10620 break; 10621 case 0x5a: /* FCVTNU */ 10622 case 0x5b: /* FCVTMU */ 10623 case 0x5c: /* FCVTAU */ 10624 case 0x7a: /* FCVTPU */ 10625 case 0x7b: /* FCVTZU */ 10626 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10627 tcg_fpstatus); 10628 break; 10629 default: 10630 g_assert_not_reached(); 10631 } 10632 10633 write_fp_sreg(s, rd, tcg_rd); 10634 tcg_temp_free_i32(tcg_rd); 10635 tcg_temp_free_i32(tcg_rn); 10636 } 10637 10638 if (is_fcvt) { 10639 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 10640 tcg_temp_free_i32(tcg_rmode); 10641 tcg_temp_free_ptr(tcg_fpstatus); 10642 } 10643 } 10644 10645 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10646 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10647 int immh, int immb, int opcode, int rn, int rd) 10648 { 10649 int size = 32 - clz32(immh) - 1; 10650 int immhb = immh << 3 | immb; 10651 int shift = 2 * (8 << size) - immhb; 10652 GVecGen2iFn *gvec_fn; 10653 10654 if (extract32(immh, 3, 1) && !is_q) { 10655 unallocated_encoding(s); 10656 return; 10657 } 10658 tcg_debug_assert(size <= 3); 10659 10660 if (!fp_access_check(s)) { 10661 return; 10662 } 10663 10664 switch (opcode) { 10665 case 0x02: /* SSRA / USRA (accumulate) */ 10666 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10667 break; 10668 10669 case 0x08: /* SRI */ 10670 gvec_fn = gen_gvec_sri; 10671 break; 10672 10673 case 0x00: /* SSHR / USHR */ 10674 if (is_u) { 10675 if (shift == 8 << size) { 10676 /* Shift count the same size as element size produces zero. */ 10677 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10678 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10679 return; 10680 } 10681 gvec_fn = tcg_gen_gvec_shri; 10682 } else { 10683 /* Shift count the same size as element size produces all sign. */ 10684 if (shift == 8 << size) { 10685 shift -= 1; 10686 } 10687 gvec_fn = tcg_gen_gvec_sari; 10688 } 10689 break; 10690 10691 case 0x04: /* SRSHR / URSHR (rounding) */ 10692 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10693 break; 10694 10695 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10696 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10697 break; 10698 10699 default: 10700 g_assert_not_reached(); 10701 } 10702 10703 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10704 } 10705 10706 /* SHL/SLI - Vector shift left */ 10707 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10708 int immh, int immb, int opcode, int rn, int rd) 10709 { 10710 int size = 32 - clz32(immh) - 1; 10711 int immhb = immh << 3 | immb; 10712 int shift = immhb - (8 << size); 10713 10714 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10715 assert(size >= 0 && size <= 3); 10716 10717 if (extract32(immh, 3, 1) && !is_q) { 10718 unallocated_encoding(s); 10719 return; 10720 } 10721 10722 if (!fp_access_check(s)) { 10723 return; 10724 } 10725 10726 if (insert) { 10727 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10728 } else { 10729 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10730 } 10731 } 10732 10733 /* USHLL/SHLL - Vector shift left with widening */ 10734 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10735 int immh, int immb, int opcode, int rn, int rd) 10736 { 10737 int size = 32 - clz32(immh) - 1; 10738 int immhb = immh << 3 | immb; 10739 int shift = immhb - (8 << size); 10740 int dsize = 64; 10741 int esize = 8 << size; 10742 int elements = dsize/esize; 10743 TCGv_i64 tcg_rn = new_tmp_a64(s); 10744 TCGv_i64 tcg_rd = new_tmp_a64(s); 10745 int i; 10746 10747 if (size >= 3) { 10748 unallocated_encoding(s); 10749 return; 10750 } 10751 10752 if (!fp_access_check(s)) { 10753 return; 10754 } 10755 10756 /* For the LL variants the store is larger than the load, 10757 * so if rd == rn we would overwrite parts of our input. 10758 * So load everything right now and use shifts in the main loop. 10759 */ 10760 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10761 10762 for (i = 0; i < elements; i++) { 10763 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10764 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10765 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10766 write_vec_element(s, tcg_rd, rd, i, size + 1); 10767 } 10768 } 10769 10770 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10771 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10772 int immh, int immb, int opcode, int rn, int rd) 10773 { 10774 int immhb = immh << 3 | immb; 10775 int size = 32 - clz32(immh) - 1; 10776 int dsize = 64; 10777 int esize = 8 << size; 10778 int elements = dsize/esize; 10779 int shift = (2 * esize) - immhb; 10780 bool round = extract32(opcode, 0, 1); 10781 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10782 TCGv_i64 tcg_round; 10783 int i; 10784 10785 if (extract32(immh, 3, 1)) { 10786 unallocated_encoding(s); 10787 return; 10788 } 10789 10790 if (!fp_access_check(s)) { 10791 return; 10792 } 10793 10794 tcg_rn = tcg_temp_new_i64(); 10795 tcg_rd = tcg_temp_new_i64(); 10796 tcg_final = tcg_temp_new_i64(); 10797 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10798 10799 if (round) { 10800 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10801 } else { 10802 tcg_round = NULL; 10803 } 10804 10805 for (i = 0; i < elements; i++) { 10806 read_vec_element(s, tcg_rn, rn, i, size+1); 10807 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10808 false, true, size+1, shift); 10809 10810 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10811 } 10812 10813 if (!is_q) { 10814 write_vec_element(s, tcg_final, rd, 0, MO_64); 10815 } else { 10816 write_vec_element(s, tcg_final, rd, 1, MO_64); 10817 } 10818 tcg_temp_free_i64(tcg_rn); 10819 tcg_temp_free_i64(tcg_rd); 10820 tcg_temp_free_i64(tcg_final); 10821 10822 clear_vec_high(s, is_q, rd); 10823 } 10824 10825 10826 /* AdvSIMD shift by immediate 10827 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10828 * +---+---+---+-------------+------+------+--------+---+------+------+ 10829 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10830 * +---+---+---+-------------+------+------+--------+---+------+------+ 10831 */ 10832 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10833 { 10834 int rd = extract32(insn, 0, 5); 10835 int rn = extract32(insn, 5, 5); 10836 int opcode = extract32(insn, 11, 5); 10837 int immb = extract32(insn, 16, 3); 10838 int immh = extract32(insn, 19, 4); 10839 bool is_u = extract32(insn, 29, 1); 10840 bool is_q = extract32(insn, 30, 1); 10841 10842 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10843 assert(immh != 0); 10844 10845 switch (opcode) { 10846 case 0x08: /* SRI */ 10847 if (!is_u) { 10848 unallocated_encoding(s); 10849 return; 10850 } 10851 /* fall through */ 10852 case 0x00: /* SSHR / USHR */ 10853 case 0x02: /* SSRA / USRA (accumulate) */ 10854 case 0x04: /* SRSHR / URSHR (rounding) */ 10855 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10856 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10857 break; 10858 case 0x0a: /* SHL / SLI */ 10859 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10860 break; 10861 case 0x10: /* SHRN */ 10862 case 0x11: /* RSHRN / SQRSHRUN */ 10863 if (is_u) { 10864 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10865 opcode, rn, rd); 10866 } else { 10867 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10868 } 10869 break; 10870 case 0x12: /* SQSHRN / UQSHRN */ 10871 case 0x13: /* SQRSHRN / UQRSHRN */ 10872 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10873 opcode, rn, rd); 10874 break; 10875 case 0x14: /* SSHLL / USHLL */ 10876 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10877 break; 10878 case 0x1c: /* SCVTF / UCVTF */ 10879 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10880 opcode, rn, rd); 10881 break; 10882 case 0xc: /* SQSHLU */ 10883 if (!is_u) { 10884 unallocated_encoding(s); 10885 return; 10886 } 10887 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10888 break; 10889 case 0xe: /* SQSHL, UQSHL */ 10890 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10891 break; 10892 case 0x1f: /* FCVTZS/ FCVTZU */ 10893 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10894 return; 10895 default: 10896 unallocated_encoding(s); 10897 return; 10898 } 10899 } 10900 10901 /* Generate code to do a "long" addition or subtraction, ie one done in 10902 * TCGv_i64 on vector lanes twice the width specified by size. 10903 */ 10904 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10905 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10906 { 10907 static NeonGenTwo64OpFn * const fns[3][2] = { 10908 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10909 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10910 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10911 }; 10912 NeonGenTwo64OpFn *genfn; 10913 assert(size < 3); 10914 10915 genfn = fns[size][is_sub]; 10916 genfn(tcg_res, tcg_op1, tcg_op2); 10917 } 10918 10919 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10920 int opcode, int rd, int rn, int rm) 10921 { 10922 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10923 TCGv_i64 tcg_res[2]; 10924 int pass, accop; 10925 10926 tcg_res[0] = tcg_temp_new_i64(); 10927 tcg_res[1] = tcg_temp_new_i64(); 10928 10929 /* Does this op do an adding accumulate, a subtracting accumulate, 10930 * or no accumulate at all? 10931 */ 10932 switch (opcode) { 10933 case 5: 10934 case 8: 10935 case 9: 10936 accop = 1; 10937 break; 10938 case 10: 10939 case 11: 10940 accop = -1; 10941 break; 10942 default: 10943 accop = 0; 10944 break; 10945 } 10946 10947 if (accop != 0) { 10948 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10949 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10950 } 10951 10952 /* size == 2 means two 32x32->64 operations; this is worth special 10953 * casing because we can generally handle it inline. 10954 */ 10955 if (size == 2) { 10956 for (pass = 0; pass < 2; pass++) { 10957 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10958 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10959 TCGv_i64 tcg_passres; 10960 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10961 10962 int elt = pass + is_q * 2; 10963 10964 read_vec_element(s, tcg_op1, rn, elt, memop); 10965 read_vec_element(s, tcg_op2, rm, elt, memop); 10966 10967 if (accop == 0) { 10968 tcg_passres = tcg_res[pass]; 10969 } else { 10970 tcg_passres = tcg_temp_new_i64(); 10971 } 10972 10973 switch (opcode) { 10974 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10975 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10976 break; 10977 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10978 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10979 break; 10980 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10981 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10982 { 10983 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10984 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10985 10986 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10987 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10988 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10989 tcg_passres, 10990 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10991 tcg_temp_free_i64(tcg_tmp1); 10992 tcg_temp_free_i64(tcg_tmp2); 10993 break; 10994 } 10995 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10996 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10997 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10998 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10999 break; 11000 case 9: /* SQDMLAL, SQDMLAL2 */ 11001 case 11: /* SQDMLSL, SQDMLSL2 */ 11002 case 13: /* SQDMULL, SQDMULL2 */ 11003 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 11004 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 11005 tcg_passres, tcg_passres); 11006 break; 11007 default: 11008 g_assert_not_reached(); 11009 } 11010 11011 if (opcode == 9 || opcode == 11) { 11012 /* saturating accumulate ops */ 11013 if (accop < 0) { 11014 tcg_gen_neg_i64(tcg_passres, tcg_passres); 11015 } 11016 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 11017 tcg_res[pass], tcg_passres); 11018 } else if (accop > 0) { 11019 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 11020 } else if (accop < 0) { 11021 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 11022 } 11023 11024 if (accop != 0) { 11025 tcg_temp_free_i64(tcg_passres); 11026 } 11027 11028 tcg_temp_free_i64(tcg_op1); 11029 tcg_temp_free_i64(tcg_op2); 11030 } 11031 } else { 11032 /* size 0 or 1, generally helper functions */ 11033 for (pass = 0; pass < 2; pass++) { 11034 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11035 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11036 TCGv_i64 tcg_passres; 11037 int elt = pass + is_q * 2; 11038 11039 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 11040 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 11041 11042 if (accop == 0) { 11043 tcg_passres = tcg_res[pass]; 11044 } else { 11045 tcg_passres = tcg_temp_new_i64(); 11046 } 11047 11048 switch (opcode) { 11049 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 11050 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 11051 { 11052 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 11053 static NeonGenWidenFn * const widenfns[2][2] = { 11054 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 11055 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 11056 }; 11057 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 11058 11059 widenfn(tcg_op2_64, tcg_op2); 11060 widenfn(tcg_passres, tcg_op1); 11061 gen_neon_addl(size, (opcode == 2), tcg_passres, 11062 tcg_passres, tcg_op2_64); 11063 tcg_temp_free_i64(tcg_op2_64); 11064 break; 11065 } 11066 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 11067 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 11068 if (size == 0) { 11069 if (is_u) { 11070 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 11071 } else { 11072 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 11073 } 11074 } else { 11075 if (is_u) { 11076 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 11077 } else { 11078 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 11079 } 11080 } 11081 break; 11082 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 11083 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 11084 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 11085 if (size == 0) { 11086 if (is_u) { 11087 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 11088 } else { 11089 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 11090 } 11091 } else { 11092 if (is_u) { 11093 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 11094 } else { 11095 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 11096 } 11097 } 11098 break; 11099 case 9: /* SQDMLAL, SQDMLAL2 */ 11100 case 11: /* SQDMLSL, SQDMLSL2 */ 11101 case 13: /* SQDMULL, SQDMULL2 */ 11102 assert(size == 1); 11103 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 11104 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 11105 tcg_passres, tcg_passres); 11106 break; 11107 default: 11108 g_assert_not_reached(); 11109 } 11110 tcg_temp_free_i32(tcg_op1); 11111 tcg_temp_free_i32(tcg_op2); 11112 11113 if (accop != 0) { 11114 if (opcode == 9 || opcode == 11) { 11115 /* saturating accumulate ops */ 11116 if (accop < 0) { 11117 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 11118 } 11119 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 11120 tcg_res[pass], 11121 tcg_passres); 11122 } else { 11123 gen_neon_addl(size, (accop < 0), tcg_res[pass], 11124 tcg_res[pass], tcg_passres); 11125 } 11126 tcg_temp_free_i64(tcg_passres); 11127 } 11128 } 11129 } 11130 11131 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 11132 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 11133 tcg_temp_free_i64(tcg_res[0]); 11134 tcg_temp_free_i64(tcg_res[1]); 11135 } 11136 11137 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 11138 int opcode, int rd, int rn, int rm) 11139 { 11140 TCGv_i64 tcg_res[2]; 11141 int part = is_q ? 2 : 0; 11142 int pass; 11143 11144 for (pass = 0; pass < 2; pass++) { 11145 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11146 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11147 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 11148 static NeonGenWidenFn * const widenfns[3][2] = { 11149 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 11150 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 11151 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 11152 }; 11153 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 11154 11155 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11156 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 11157 widenfn(tcg_op2_wide, tcg_op2); 11158 tcg_temp_free_i32(tcg_op2); 11159 tcg_res[pass] = tcg_temp_new_i64(); 11160 gen_neon_addl(size, (opcode == 3), 11161 tcg_res[pass], tcg_op1, tcg_op2_wide); 11162 tcg_temp_free_i64(tcg_op1); 11163 tcg_temp_free_i64(tcg_op2_wide); 11164 } 11165 11166 for (pass = 0; pass < 2; pass++) { 11167 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11168 tcg_temp_free_i64(tcg_res[pass]); 11169 } 11170 } 11171 11172 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 11173 { 11174 tcg_gen_addi_i64(in, in, 1U << 31); 11175 tcg_gen_extrh_i64_i32(res, in); 11176 } 11177 11178 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 11179 int opcode, int rd, int rn, int rm) 11180 { 11181 TCGv_i32 tcg_res[2]; 11182 int part = is_q ? 2 : 0; 11183 int pass; 11184 11185 for (pass = 0; pass < 2; pass++) { 11186 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11187 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11188 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 11189 static NeonGenNarrowFn * const narrowfns[3][2] = { 11190 { gen_helper_neon_narrow_high_u8, 11191 gen_helper_neon_narrow_round_high_u8 }, 11192 { gen_helper_neon_narrow_high_u16, 11193 gen_helper_neon_narrow_round_high_u16 }, 11194 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 11195 }; 11196 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 11197 11198 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11199 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11200 11201 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 11202 11203 tcg_temp_free_i64(tcg_op1); 11204 tcg_temp_free_i64(tcg_op2); 11205 11206 tcg_res[pass] = tcg_temp_new_i32(); 11207 gennarrow(tcg_res[pass], tcg_wideres); 11208 tcg_temp_free_i64(tcg_wideres); 11209 } 11210 11211 for (pass = 0; pass < 2; pass++) { 11212 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 11213 tcg_temp_free_i32(tcg_res[pass]); 11214 } 11215 clear_vec_high(s, is_q, rd); 11216 } 11217 11218 /* AdvSIMD three different 11219 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 11220 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 11221 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 11222 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 11223 */ 11224 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 11225 { 11226 /* Instructions in this group fall into three basic classes 11227 * (in each case with the operation working on each element in 11228 * the input vectors): 11229 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 11230 * 128 bit input) 11231 * (2) wide 64 x 128 -> 128 11232 * (3) narrowing 128 x 128 -> 64 11233 * Here we do initial decode, catch unallocated cases and 11234 * dispatch to separate functions for each class. 11235 */ 11236 int is_q = extract32(insn, 30, 1); 11237 int is_u = extract32(insn, 29, 1); 11238 int size = extract32(insn, 22, 2); 11239 int opcode = extract32(insn, 12, 4); 11240 int rm = extract32(insn, 16, 5); 11241 int rn = extract32(insn, 5, 5); 11242 int rd = extract32(insn, 0, 5); 11243 11244 switch (opcode) { 11245 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 11246 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 11247 /* 64 x 128 -> 128 */ 11248 if (size == 3) { 11249 unallocated_encoding(s); 11250 return; 11251 } 11252 if (!fp_access_check(s)) { 11253 return; 11254 } 11255 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 11256 break; 11257 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 11258 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 11259 /* 128 x 128 -> 64 */ 11260 if (size == 3) { 11261 unallocated_encoding(s); 11262 return; 11263 } 11264 if (!fp_access_check(s)) { 11265 return; 11266 } 11267 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 11268 break; 11269 case 14: /* PMULL, PMULL2 */ 11270 if (is_u) { 11271 unallocated_encoding(s); 11272 return; 11273 } 11274 switch (size) { 11275 case 0: /* PMULL.P8 */ 11276 if (!fp_access_check(s)) { 11277 return; 11278 } 11279 /* The Q field specifies lo/hi half input for this insn. */ 11280 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 11281 gen_helper_neon_pmull_h); 11282 break; 11283 11284 case 3: /* PMULL.P64 */ 11285 if (!dc_isar_feature(aa64_pmull, s)) { 11286 unallocated_encoding(s); 11287 return; 11288 } 11289 if (!fp_access_check(s)) { 11290 return; 11291 } 11292 /* The Q field specifies lo/hi half input for this insn. */ 11293 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 11294 gen_helper_gvec_pmull_q); 11295 break; 11296 11297 default: 11298 unallocated_encoding(s); 11299 break; 11300 } 11301 return; 11302 case 9: /* SQDMLAL, SQDMLAL2 */ 11303 case 11: /* SQDMLSL, SQDMLSL2 */ 11304 case 13: /* SQDMULL, SQDMULL2 */ 11305 if (is_u || size == 0) { 11306 unallocated_encoding(s); 11307 return; 11308 } 11309 /* fall through */ 11310 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 11311 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 11312 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 11313 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 11314 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 11315 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 11316 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 11317 /* 64 x 64 -> 128 */ 11318 if (size == 3) { 11319 unallocated_encoding(s); 11320 return; 11321 } 11322 if (!fp_access_check(s)) { 11323 return; 11324 } 11325 11326 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 11327 break; 11328 default: 11329 /* opcode 15 not allocated */ 11330 unallocated_encoding(s); 11331 break; 11332 } 11333 } 11334 11335 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 11336 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 11337 { 11338 int rd = extract32(insn, 0, 5); 11339 int rn = extract32(insn, 5, 5); 11340 int rm = extract32(insn, 16, 5); 11341 int size = extract32(insn, 22, 2); 11342 bool is_u = extract32(insn, 29, 1); 11343 bool is_q = extract32(insn, 30, 1); 11344 11345 if (!fp_access_check(s)) { 11346 return; 11347 } 11348 11349 switch (size + 4 * is_u) { 11350 case 0: /* AND */ 11351 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 11352 return; 11353 case 1: /* BIC */ 11354 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 11355 return; 11356 case 2: /* ORR */ 11357 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 11358 return; 11359 case 3: /* ORN */ 11360 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 11361 return; 11362 case 4: /* EOR */ 11363 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 11364 return; 11365 11366 case 5: /* BSL bitwise select */ 11367 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 11368 return; 11369 case 6: /* BIT, bitwise insert if true */ 11370 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 11371 return; 11372 case 7: /* BIF, bitwise insert if false */ 11373 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 11374 return; 11375 11376 default: 11377 g_assert_not_reached(); 11378 } 11379 } 11380 11381 /* Pairwise op subgroup of C3.6.16. 11382 * 11383 * This is called directly or via the handle_3same_float for float pairwise 11384 * operations where the opcode and size are calculated differently. 11385 */ 11386 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 11387 int size, int rn, int rm, int rd) 11388 { 11389 TCGv_ptr fpst; 11390 int pass; 11391 11392 /* Floating point operations need fpst */ 11393 if (opcode >= 0x58) { 11394 fpst = fpstatus_ptr(FPST_FPCR); 11395 } else { 11396 fpst = NULL; 11397 } 11398 11399 if (!fp_access_check(s)) { 11400 return; 11401 } 11402 11403 /* These operations work on the concatenated rm:rn, with each pair of 11404 * adjacent elements being operated on to produce an element in the result. 11405 */ 11406 if (size == 3) { 11407 TCGv_i64 tcg_res[2]; 11408 11409 for (pass = 0; pass < 2; pass++) { 11410 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11411 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11412 int passreg = (pass == 0) ? rn : rm; 11413 11414 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 11415 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 11416 tcg_res[pass] = tcg_temp_new_i64(); 11417 11418 switch (opcode) { 11419 case 0x17: /* ADDP */ 11420 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11421 break; 11422 case 0x58: /* FMAXNMP */ 11423 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11424 break; 11425 case 0x5a: /* FADDP */ 11426 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11427 break; 11428 case 0x5e: /* FMAXP */ 11429 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11430 break; 11431 case 0x78: /* FMINNMP */ 11432 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11433 break; 11434 case 0x7e: /* FMINP */ 11435 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11436 break; 11437 default: 11438 g_assert_not_reached(); 11439 } 11440 11441 tcg_temp_free_i64(tcg_op1); 11442 tcg_temp_free_i64(tcg_op2); 11443 } 11444 11445 for (pass = 0; pass < 2; pass++) { 11446 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11447 tcg_temp_free_i64(tcg_res[pass]); 11448 } 11449 } else { 11450 int maxpass = is_q ? 4 : 2; 11451 TCGv_i32 tcg_res[4]; 11452 11453 for (pass = 0; pass < maxpass; pass++) { 11454 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11455 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11456 NeonGenTwoOpFn *genfn = NULL; 11457 int passreg = pass < (maxpass / 2) ? rn : rm; 11458 int passelt = (is_q && (pass & 1)) ? 2 : 0; 11459 11460 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 11461 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 11462 tcg_res[pass] = tcg_temp_new_i32(); 11463 11464 switch (opcode) { 11465 case 0x17: /* ADDP */ 11466 { 11467 static NeonGenTwoOpFn * const fns[3] = { 11468 gen_helper_neon_padd_u8, 11469 gen_helper_neon_padd_u16, 11470 tcg_gen_add_i32, 11471 }; 11472 genfn = fns[size]; 11473 break; 11474 } 11475 case 0x14: /* SMAXP, UMAXP */ 11476 { 11477 static NeonGenTwoOpFn * const fns[3][2] = { 11478 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 11479 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 11480 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 11481 }; 11482 genfn = fns[size][u]; 11483 break; 11484 } 11485 case 0x15: /* SMINP, UMINP */ 11486 { 11487 static NeonGenTwoOpFn * const fns[3][2] = { 11488 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 11489 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 11490 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 11491 }; 11492 genfn = fns[size][u]; 11493 break; 11494 } 11495 /* The FP operations are all on single floats (32 bit) */ 11496 case 0x58: /* FMAXNMP */ 11497 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11498 break; 11499 case 0x5a: /* FADDP */ 11500 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11501 break; 11502 case 0x5e: /* FMAXP */ 11503 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11504 break; 11505 case 0x78: /* FMINNMP */ 11506 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11507 break; 11508 case 0x7e: /* FMINP */ 11509 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11510 break; 11511 default: 11512 g_assert_not_reached(); 11513 } 11514 11515 /* FP ops called directly, otherwise call now */ 11516 if (genfn) { 11517 genfn(tcg_res[pass], tcg_op1, tcg_op2); 11518 } 11519 11520 tcg_temp_free_i32(tcg_op1); 11521 tcg_temp_free_i32(tcg_op2); 11522 } 11523 11524 for (pass = 0; pass < maxpass; pass++) { 11525 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11526 tcg_temp_free_i32(tcg_res[pass]); 11527 } 11528 clear_vec_high(s, is_q, rd); 11529 } 11530 11531 if (fpst) { 11532 tcg_temp_free_ptr(fpst); 11533 } 11534 } 11535 11536 /* Floating point op subgroup of C3.6.16. */ 11537 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 11538 { 11539 /* For floating point ops, the U, size[1] and opcode bits 11540 * together indicate the operation. size[0] indicates single 11541 * or double. 11542 */ 11543 int fpopcode = extract32(insn, 11, 5) 11544 | (extract32(insn, 23, 1) << 5) 11545 | (extract32(insn, 29, 1) << 6); 11546 int is_q = extract32(insn, 30, 1); 11547 int size = extract32(insn, 22, 1); 11548 int rm = extract32(insn, 16, 5); 11549 int rn = extract32(insn, 5, 5); 11550 int rd = extract32(insn, 0, 5); 11551 11552 int datasize = is_q ? 128 : 64; 11553 int esize = 32 << size; 11554 int elements = datasize / esize; 11555 11556 if (size == 1 && !is_q) { 11557 unallocated_encoding(s); 11558 return; 11559 } 11560 11561 switch (fpopcode) { 11562 case 0x58: /* FMAXNMP */ 11563 case 0x5a: /* FADDP */ 11564 case 0x5e: /* FMAXP */ 11565 case 0x78: /* FMINNMP */ 11566 case 0x7e: /* FMINP */ 11567 if (size && !is_q) { 11568 unallocated_encoding(s); 11569 return; 11570 } 11571 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 11572 rn, rm, rd); 11573 return; 11574 case 0x1b: /* FMULX */ 11575 case 0x1f: /* FRECPS */ 11576 case 0x3f: /* FRSQRTS */ 11577 case 0x5d: /* FACGE */ 11578 case 0x7d: /* FACGT */ 11579 case 0x19: /* FMLA */ 11580 case 0x39: /* FMLS */ 11581 case 0x18: /* FMAXNM */ 11582 case 0x1a: /* FADD */ 11583 case 0x1c: /* FCMEQ */ 11584 case 0x1e: /* FMAX */ 11585 case 0x38: /* FMINNM */ 11586 case 0x3a: /* FSUB */ 11587 case 0x3e: /* FMIN */ 11588 case 0x5b: /* FMUL */ 11589 case 0x5c: /* FCMGE */ 11590 case 0x5f: /* FDIV */ 11591 case 0x7a: /* FABD */ 11592 case 0x7c: /* FCMGT */ 11593 if (!fp_access_check(s)) { 11594 return; 11595 } 11596 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 11597 return; 11598 11599 case 0x1d: /* FMLAL */ 11600 case 0x3d: /* FMLSL */ 11601 case 0x59: /* FMLAL2 */ 11602 case 0x79: /* FMLSL2 */ 11603 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 11604 unallocated_encoding(s); 11605 return; 11606 } 11607 if (fp_access_check(s)) { 11608 int is_s = extract32(insn, 23, 1); 11609 int is_2 = extract32(insn, 29, 1); 11610 int data = (is_2 << 1) | is_s; 11611 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 11612 vec_full_reg_offset(s, rn), 11613 vec_full_reg_offset(s, rm), cpu_env, 11614 is_q ? 16 : 8, vec_full_reg_size(s), 11615 data, gen_helper_gvec_fmlal_a64); 11616 } 11617 return; 11618 11619 default: 11620 unallocated_encoding(s); 11621 return; 11622 } 11623 } 11624 11625 /* Integer op subgroup of C3.6.16. */ 11626 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 11627 { 11628 int is_q = extract32(insn, 30, 1); 11629 int u = extract32(insn, 29, 1); 11630 int size = extract32(insn, 22, 2); 11631 int opcode = extract32(insn, 11, 5); 11632 int rm = extract32(insn, 16, 5); 11633 int rn = extract32(insn, 5, 5); 11634 int rd = extract32(insn, 0, 5); 11635 int pass; 11636 TCGCond cond; 11637 11638 switch (opcode) { 11639 case 0x13: /* MUL, PMUL */ 11640 if (u && size != 0) { 11641 unallocated_encoding(s); 11642 return; 11643 } 11644 /* fall through */ 11645 case 0x0: /* SHADD, UHADD */ 11646 case 0x2: /* SRHADD, URHADD */ 11647 case 0x4: /* SHSUB, UHSUB */ 11648 case 0xc: /* SMAX, UMAX */ 11649 case 0xd: /* SMIN, UMIN */ 11650 case 0xe: /* SABD, UABD */ 11651 case 0xf: /* SABA, UABA */ 11652 case 0x12: /* MLA, MLS */ 11653 if (size == 3) { 11654 unallocated_encoding(s); 11655 return; 11656 } 11657 break; 11658 case 0x16: /* SQDMULH, SQRDMULH */ 11659 if (size == 0 || size == 3) { 11660 unallocated_encoding(s); 11661 return; 11662 } 11663 break; 11664 default: 11665 if (size == 3 && !is_q) { 11666 unallocated_encoding(s); 11667 return; 11668 } 11669 break; 11670 } 11671 11672 if (!fp_access_check(s)) { 11673 return; 11674 } 11675 11676 switch (opcode) { 11677 case 0x01: /* SQADD, UQADD */ 11678 if (u) { 11679 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 11680 } else { 11681 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 11682 } 11683 return; 11684 case 0x05: /* SQSUB, UQSUB */ 11685 if (u) { 11686 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 11687 } else { 11688 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 11689 } 11690 return; 11691 case 0x08: /* SSHL, USHL */ 11692 if (u) { 11693 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 11694 } else { 11695 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 11696 } 11697 return; 11698 case 0x0c: /* SMAX, UMAX */ 11699 if (u) { 11700 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 11701 } else { 11702 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 11703 } 11704 return; 11705 case 0x0d: /* SMIN, UMIN */ 11706 if (u) { 11707 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 11708 } else { 11709 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 11710 } 11711 return; 11712 case 0xe: /* SABD, UABD */ 11713 if (u) { 11714 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 11715 } else { 11716 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 11717 } 11718 return; 11719 case 0xf: /* SABA, UABA */ 11720 if (u) { 11721 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 11722 } else { 11723 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 11724 } 11725 return; 11726 case 0x10: /* ADD, SUB */ 11727 if (u) { 11728 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 11729 } else { 11730 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 11731 } 11732 return; 11733 case 0x13: /* MUL, PMUL */ 11734 if (!u) { /* MUL */ 11735 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 11736 } else { /* PMUL */ 11737 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 11738 } 11739 return; 11740 case 0x12: /* MLA, MLS */ 11741 if (u) { 11742 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 11743 } else { 11744 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 11745 } 11746 return; 11747 case 0x16: /* SQDMULH, SQRDMULH */ 11748 { 11749 static gen_helper_gvec_3_ptr * const fns[2][2] = { 11750 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 11751 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 11752 }; 11753 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 11754 } 11755 return; 11756 case 0x11: 11757 if (!u) { /* CMTST */ 11758 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 11759 return; 11760 } 11761 /* else CMEQ */ 11762 cond = TCG_COND_EQ; 11763 goto do_gvec_cmp; 11764 case 0x06: /* CMGT, CMHI */ 11765 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11766 goto do_gvec_cmp; 11767 case 0x07: /* CMGE, CMHS */ 11768 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11769 do_gvec_cmp: 11770 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11771 vec_full_reg_offset(s, rn), 11772 vec_full_reg_offset(s, rm), 11773 is_q ? 16 : 8, vec_full_reg_size(s)); 11774 return; 11775 } 11776 11777 if (size == 3) { 11778 assert(is_q); 11779 for (pass = 0; pass < 2; pass++) { 11780 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11781 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11782 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11783 11784 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11785 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11786 11787 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11788 11789 write_vec_element(s, tcg_res, rd, pass, MO_64); 11790 11791 tcg_temp_free_i64(tcg_res); 11792 tcg_temp_free_i64(tcg_op1); 11793 tcg_temp_free_i64(tcg_op2); 11794 } 11795 } else { 11796 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11797 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11798 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11799 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11800 NeonGenTwoOpFn *genfn = NULL; 11801 NeonGenTwoOpEnvFn *genenvfn = NULL; 11802 11803 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11804 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11805 11806 switch (opcode) { 11807 case 0x0: /* SHADD, UHADD */ 11808 { 11809 static NeonGenTwoOpFn * const fns[3][2] = { 11810 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11811 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11812 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11813 }; 11814 genfn = fns[size][u]; 11815 break; 11816 } 11817 case 0x2: /* SRHADD, URHADD */ 11818 { 11819 static NeonGenTwoOpFn * const fns[3][2] = { 11820 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11821 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11822 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11823 }; 11824 genfn = fns[size][u]; 11825 break; 11826 } 11827 case 0x4: /* SHSUB, UHSUB */ 11828 { 11829 static NeonGenTwoOpFn * const fns[3][2] = { 11830 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11831 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11832 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11833 }; 11834 genfn = fns[size][u]; 11835 break; 11836 } 11837 case 0x9: /* SQSHL, UQSHL */ 11838 { 11839 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11840 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11841 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11842 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11843 }; 11844 genenvfn = fns[size][u]; 11845 break; 11846 } 11847 case 0xa: /* SRSHL, URSHL */ 11848 { 11849 static NeonGenTwoOpFn * const fns[3][2] = { 11850 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11851 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11852 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11853 }; 11854 genfn = fns[size][u]; 11855 break; 11856 } 11857 case 0xb: /* SQRSHL, UQRSHL */ 11858 { 11859 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11860 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11861 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11862 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11863 }; 11864 genenvfn = fns[size][u]; 11865 break; 11866 } 11867 default: 11868 g_assert_not_reached(); 11869 } 11870 11871 if (genenvfn) { 11872 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); 11873 } else { 11874 genfn(tcg_res, tcg_op1, tcg_op2); 11875 } 11876 11877 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11878 11879 tcg_temp_free_i32(tcg_res); 11880 tcg_temp_free_i32(tcg_op1); 11881 tcg_temp_free_i32(tcg_op2); 11882 } 11883 } 11884 clear_vec_high(s, is_q, rd); 11885 } 11886 11887 /* AdvSIMD three same 11888 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11889 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11890 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11891 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11892 */ 11893 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11894 { 11895 int opcode = extract32(insn, 11, 5); 11896 11897 switch (opcode) { 11898 case 0x3: /* logic ops */ 11899 disas_simd_3same_logic(s, insn); 11900 break; 11901 case 0x17: /* ADDP */ 11902 case 0x14: /* SMAXP, UMAXP */ 11903 case 0x15: /* SMINP, UMINP */ 11904 { 11905 /* Pairwise operations */ 11906 int is_q = extract32(insn, 30, 1); 11907 int u = extract32(insn, 29, 1); 11908 int size = extract32(insn, 22, 2); 11909 int rm = extract32(insn, 16, 5); 11910 int rn = extract32(insn, 5, 5); 11911 int rd = extract32(insn, 0, 5); 11912 if (opcode == 0x17) { 11913 if (u || (size == 3 && !is_q)) { 11914 unallocated_encoding(s); 11915 return; 11916 } 11917 } else { 11918 if (size == 3) { 11919 unallocated_encoding(s); 11920 return; 11921 } 11922 } 11923 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11924 break; 11925 } 11926 case 0x18 ... 0x31: 11927 /* floating point ops, sz[1] and U are part of opcode */ 11928 disas_simd_3same_float(s, insn); 11929 break; 11930 default: 11931 disas_simd_3same_int(s, insn); 11932 break; 11933 } 11934 } 11935 11936 /* 11937 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11938 * 11939 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11940 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11941 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11942 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11943 * 11944 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11945 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11946 * 11947 */ 11948 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11949 { 11950 int opcode = extract32(insn, 11, 3); 11951 int u = extract32(insn, 29, 1); 11952 int a = extract32(insn, 23, 1); 11953 int is_q = extract32(insn, 30, 1); 11954 int rm = extract32(insn, 16, 5); 11955 int rn = extract32(insn, 5, 5); 11956 int rd = extract32(insn, 0, 5); 11957 /* 11958 * For these floating point ops, the U, a and opcode bits 11959 * together indicate the operation. 11960 */ 11961 int fpopcode = opcode | (a << 3) | (u << 4); 11962 int datasize = is_q ? 128 : 64; 11963 int elements = datasize / 16; 11964 bool pairwise; 11965 TCGv_ptr fpst; 11966 int pass; 11967 11968 switch (fpopcode) { 11969 case 0x0: /* FMAXNM */ 11970 case 0x1: /* FMLA */ 11971 case 0x2: /* FADD */ 11972 case 0x3: /* FMULX */ 11973 case 0x4: /* FCMEQ */ 11974 case 0x6: /* FMAX */ 11975 case 0x7: /* FRECPS */ 11976 case 0x8: /* FMINNM */ 11977 case 0x9: /* FMLS */ 11978 case 0xa: /* FSUB */ 11979 case 0xe: /* FMIN */ 11980 case 0xf: /* FRSQRTS */ 11981 case 0x13: /* FMUL */ 11982 case 0x14: /* FCMGE */ 11983 case 0x15: /* FACGE */ 11984 case 0x17: /* FDIV */ 11985 case 0x1a: /* FABD */ 11986 case 0x1c: /* FCMGT */ 11987 case 0x1d: /* FACGT */ 11988 pairwise = false; 11989 break; 11990 case 0x10: /* FMAXNMP */ 11991 case 0x12: /* FADDP */ 11992 case 0x16: /* FMAXP */ 11993 case 0x18: /* FMINNMP */ 11994 case 0x1e: /* FMINP */ 11995 pairwise = true; 11996 break; 11997 default: 11998 unallocated_encoding(s); 11999 return; 12000 } 12001 12002 if (!dc_isar_feature(aa64_fp16, s)) { 12003 unallocated_encoding(s); 12004 return; 12005 } 12006 12007 if (!fp_access_check(s)) { 12008 return; 12009 } 12010 12011 fpst = fpstatus_ptr(FPST_FPCR_F16); 12012 12013 if (pairwise) { 12014 int maxpass = is_q ? 8 : 4; 12015 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 12016 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 12017 TCGv_i32 tcg_res[8]; 12018 12019 for (pass = 0; pass < maxpass; pass++) { 12020 int passreg = pass < (maxpass / 2) ? rn : rm; 12021 int passelt = (pass << 1) & (maxpass - 1); 12022 12023 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 12024 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 12025 tcg_res[pass] = tcg_temp_new_i32(); 12026 12027 switch (fpopcode) { 12028 case 0x10: /* FMAXNMP */ 12029 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 12030 fpst); 12031 break; 12032 case 0x12: /* FADDP */ 12033 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 12034 break; 12035 case 0x16: /* FMAXP */ 12036 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 12037 break; 12038 case 0x18: /* FMINNMP */ 12039 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 12040 fpst); 12041 break; 12042 case 0x1e: /* FMINP */ 12043 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 12044 break; 12045 default: 12046 g_assert_not_reached(); 12047 } 12048 } 12049 12050 for (pass = 0; pass < maxpass; pass++) { 12051 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 12052 tcg_temp_free_i32(tcg_res[pass]); 12053 } 12054 12055 tcg_temp_free_i32(tcg_op1); 12056 tcg_temp_free_i32(tcg_op2); 12057 12058 } else { 12059 for (pass = 0; pass < elements; pass++) { 12060 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 12061 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 12062 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12063 12064 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 12065 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 12066 12067 switch (fpopcode) { 12068 case 0x0: /* FMAXNM */ 12069 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 12070 break; 12071 case 0x1: /* FMLA */ 12072 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12073 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 12074 fpst); 12075 break; 12076 case 0x2: /* FADD */ 12077 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 12078 break; 12079 case 0x3: /* FMULX */ 12080 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 12081 break; 12082 case 0x4: /* FCMEQ */ 12083 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12084 break; 12085 case 0x6: /* FMAX */ 12086 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 12087 break; 12088 case 0x7: /* FRECPS */ 12089 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12090 break; 12091 case 0x8: /* FMINNM */ 12092 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 12093 break; 12094 case 0x9: /* FMLS */ 12095 /* As usual for ARM, separate negation for fused multiply-add */ 12096 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 12097 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12098 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 12099 fpst); 12100 break; 12101 case 0xa: /* FSUB */ 12102 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 12103 break; 12104 case 0xe: /* FMIN */ 12105 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 12106 break; 12107 case 0xf: /* FRSQRTS */ 12108 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12109 break; 12110 case 0x13: /* FMUL */ 12111 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 12112 break; 12113 case 0x14: /* FCMGE */ 12114 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12115 break; 12116 case 0x15: /* FACGE */ 12117 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12118 break; 12119 case 0x17: /* FDIV */ 12120 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 12121 break; 12122 case 0x1a: /* FABD */ 12123 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 12124 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 12125 break; 12126 case 0x1c: /* FCMGT */ 12127 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12128 break; 12129 case 0x1d: /* FACGT */ 12130 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12131 break; 12132 default: 12133 g_assert_not_reached(); 12134 } 12135 12136 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12137 tcg_temp_free_i32(tcg_res); 12138 tcg_temp_free_i32(tcg_op1); 12139 tcg_temp_free_i32(tcg_op2); 12140 } 12141 } 12142 12143 tcg_temp_free_ptr(fpst); 12144 12145 clear_vec_high(s, is_q, rd); 12146 } 12147 12148 /* AdvSIMD three same extra 12149 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 12150 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 12151 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 12152 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 12153 */ 12154 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 12155 { 12156 int rd = extract32(insn, 0, 5); 12157 int rn = extract32(insn, 5, 5); 12158 int opcode = extract32(insn, 11, 4); 12159 int rm = extract32(insn, 16, 5); 12160 int size = extract32(insn, 22, 2); 12161 bool u = extract32(insn, 29, 1); 12162 bool is_q = extract32(insn, 30, 1); 12163 bool feature; 12164 int rot; 12165 12166 switch (u * 16 + opcode) { 12167 case 0x10: /* SQRDMLAH (vector) */ 12168 case 0x11: /* SQRDMLSH (vector) */ 12169 if (size != 1 && size != 2) { 12170 unallocated_encoding(s); 12171 return; 12172 } 12173 feature = dc_isar_feature(aa64_rdm, s); 12174 break; 12175 case 0x02: /* SDOT (vector) */ 12176 case 0x12: /* UDOT (vector) */ 12177 if (size != MO_32) { 12178 unallocated_encoding(s); 12179 return; 12180 } 12181 feature = dc_isar_feature(aa64_dp, s); 12182 break; 12183 case 0x03: /* USDOT */ 12184 if (size != MO_32) { 12185 unallocated_encoding(s); 12186 return; 12187 } 12188 feature = dc_isar_feature(aa64_i8mm, s); 12189 break; 12190 case 0x04: /* SMMLA */ 12191 case 0x14: /* UMMLA */ 12192 case 0x05: /* USMMLA */ 12193 if (!is_q || size != MO_32) { 12194 unallocated_encoding(s); 12195 return; 12196 } 12197 feature = dc_isar_feature(aa64_i8mm, s); 12198 break; 12199 case 0x18: /* FCMLA, #0 */ 12200 case 0x19: /* FCMLA, #90 */ 12201 case 0x1a: /* FCMLA, #180 */ 12202 case 0x1b: /* FCMLA, #270 */ 12203 case 0x1c: /* FCADD, #90 */ 12204 case 0x1e: /* FCADD, #270 */ 12205 if (size == 0 12206 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 12207 || (size == 3 && !is_q)) { 12208 unallocated_encoding(s); 12209 return; 12210 } 12211 feature = dc_isar_feature(aa64_fcma, s); 12212 break; 12213 case 0x1d: /* BFMMLA */ 12214 if (size != MO_16 || !is_q) { 12215 unallocated_encoding(s); 12216 return; 12217 } 12218 feature = dc_isar_feature(aa64_bf16, s); 12219 break; 12220 case 0x1f: 12221 switch (size) { 12222 case 1: /* BFDOT */ 12223 case 3: /* BFMLAL{B,T} */ 12224 feature = dc_isar_feature(aa64_bf16, s); 12225 break; 12226 default: 12227 unallocated_encoding(s); 12228 return; 12229 } 12230 break; 12231 default: 12232 unallocated_encoding(s); 12233 return; 12234 } 12235 if (!feature) { 12236 unallocated_encoding(s); 12237 return; 12238 } 12239 if (!fp_access_check(s)) { 12240 return; 12241 } 12242 12243 switch (opcode) { 12244 case 0x0: /* SQRDMLAH (vector) */ 12245 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 12246 return; 12247 12248 case 0x1: /* SQRDMLSH (vector) */ 12249 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 12250 return; 12251 12252 case 0x2: /* SDOT / UDOT */ 12253 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 12254 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 12255 return; 12256 12257 case 0x3: /* USDOT */ 12258 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 12259 return; 12260 12261 case 0x04: /* SMMLA, UMMLA */ 12262 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 12263 u ? gen_helper_gvec_ummla_b 12264 : gen_helper_gvec_smmla_b); 12265 return; 12266 case 0x05: /* USMMLA */ 12267 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 12268 return; 12269 12270 case 0x8: /* FCMLA, #0 */ 12271 case 0x9: /* FCMLA, #90 */ 12272 case 0xa: /* FCMLA, #180 */ 12273 case 0xb: /* FCMLA, #270 */ 12274 rot = extract32(opcode, 0, 2); 12275 switch (size) { 12276 case 1: 12277 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 12278 gen_helper_gvec_fcmlah); 12279 break; 12280 case 2: 12281 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 12282 gen_helper_gvec_fcmlas); 12283 break; 12284 case 3: 12285 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 12286 gen_helper_gvec_fcmlad); 12287 break; 12288 default: 12289 g_assert_not_reached(); 12290 } 12291 return; 12292 12293 case 0xc: /* FCADD, #90 */ 12294 case 0xe: /* FCADD, #270 */ 12295 rot = extract32(opcode, 1, 1); 12296 switch (size) { 12297 case 1: 12298 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12299 gen_helper_gvec_fcaddh); 12300 break; 12301 case 2: 12302 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12303 gen_helper_gvec_fcadds); 12304 break; 12305 case 3: 12306 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12307 gen_helper_gvec_fcaddd); 12308 break; 12309 default: 12310 g_assert_not_reached(); 12311 } 12312 return; 12313 12314 case 0xd: /* BFMMLA */ 12315 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 12316 return; 12317 case 0xf: 12318 switch (size) { 12319 case 1: /* BFDOT */ 12320 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 12321 break; 12322 case 3: /* BFMLAL{B,T} */ 12323 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 12324 gen_helper_gvec_bfmlal); 12325 break; 12326 default: 12327 g_assert_not_reached(); 12328 } 12329 return; 12330 12331 default: 12332 g_assert_not_reached(); 12333 } 12334 } 12335 12336 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 12337 int size, int rn, int rd) 12338 { 12339 /* Handle 2-reg-misc ops which are widening (so each size element 12340 * in the source becomes a 2*size element in the destination. 12341 * The only instruction like this is FCVTL. 12342 */ 12343 int pass; 12344 12345 if (size == 3) { 12346 /* 32 -> 64 bit fp conversion */ 12347 TCGv_i64 tcg_res[2]; 12348 int srcelt = is_q ? 2 : 0; 12349 12350 for (pass = 0; pass < 2; pass++) { 12351 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12352 tcg_res[pass] = tcg_temp_new_i64(); 12353 12354 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 12355 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); 12356 tcg_temp_free_i32(tcg_op); 12357 } 12358 for (pass = 0; pass < 2; pass++) { 12359 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12360 tcg_temp_free_i64(tcg_res[pass]); 12361 } 12362 } else { 12363 /* 16 -> 32 bit fp conversion */ 12364 int srcelt = is_q ? 4 : 0; 12365 TCGv_i32 tcg_res[4]; 12366 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 12367 TCGv_i32 ahp = get_ahp_flag(); 12368 12369 for (pass = 0; pass < 4; pass++) { 12370 tcg_res[pass] = tcg_temp_new_i32(); 12371 12372 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 12373 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 12374 fpst, ahp); 12375 } 12376 for (pass = 0; pass < 4; pass++) { 12377 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 12378 tcg_temp_free_i32(tcg_res[pass]); 12379 } 12380 12381 tcg_temp_free_ptr(fpst); 12382 tcg_temp_free_i32(ahp); 12383 } 12384 } 12385 12386 static void handle_rev(DisasContext *s, int opcode, bool u, 12387 bool is_q, int size, int rn, int rd) 12388 { 12389 int op = (opcode << 1) | u; 12390 int opsz = op + size; 12391 int grp_size = 3 - opsz; 12392 int dsize = is_q ? 128 : 64; 12393 int i; 12394 12395 if (opsz >= 3) { 12396 unallocated_encoding(s); 12397 return; 12398 } 12399 12400 if (!fp_access_check(s)) { 12401 return; 12402 } 12403 12404 if (size == 0) { 12405 /* Special case bytes, use bswap op on each group of elements */ 12406 int groups = dsize / (8 << grp_size); 12407 12408 for (i = 0; i < groups; i++) { 12409 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 12410 12411 read_vec_element(s, tcg_tmp, rn, i, grp_size); 12412 switch (grp_size) { 12413 case MO_16: 12414 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 12415 break; 12416 case MO_32: 12417 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 12418 break; 12419 case MO_64: 12420 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 12421 break; 12422 default: 12423 g_assert_not_reached(); 12424 } 12425 write_vec_element(s, tcg_tmp, rd, i, grp_size); 12426 tcg_temp_free_i64(tcg_tmp); 12427 } 12428 clear_vec_high(s, is_q, rd); 12429 } else { 12430 int revmask = (1 << grp_size) - 1; 12431 int esize = 8 << size; 12432 int elements = dsize / esize; 12433 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 12434 TCGv_i64 tcg_rd = tcg_const_i64(0); 12435 TCGv_i64 tcg_rd_hi = tcg_const_i64(0); 12436 12437 for (i = 0; i < elements; i++) { 12438 int e_rev = (i & 0xf) ^ revmask; 12439 int off = e_rev * esize; 12440 read_vec_element(s, tcg_rn, rn, i, size); 12441 if (off >= 64) { 12442 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi, 12443 tcg_rn, off - 64, esize); 12444 } else { 12445 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize); 12446 } 12447 } 12448 write_vec_element(s, tcg_rd, rd, 0, MO_64); 12449 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64); 12450 12451 tcg_temp_free_i64(tcg_rd_hi); 12452 tcg_temp_free_i64(tcg_rd); 12453 tcg_temp_free_i64(tcg_rn); 12454 } 12455 } 12456 12457 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 12458 bool is_q, int size, int rn, int rd) 12459 { 12460 /* Implement the pairwise operations from 2-misc: 12461 * SADDLP, UADDLP, SADALP, UADALP. 12462 * These all add pairs of elements in the input to produce a 12463 * double-width result element in the output (possibly accumulating). 12464 */ 12465 bool accum = (opcode == 0x6); 12466 int maxpass = is_q ? 2 : 1; 12467 int pass; 12468 TCGv_i64 tcg_res[2]; 12469 12470 if (size == 2) { 12471 /* 32 + 32 -> 64 op */ 12472 MemOp memop = size + (u ? 0 : MO_SIGN); 12473 12474 for (pass = 0; pass < maxpass; pass++) { 12475 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 12476 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 12477 12478 tcg_res[pass] = tcg_temp_new_i64(); 12479 12480 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 12481 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 12482 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 12483 if (accum) { 12484 read_vec_element(s, tcg_op1, rd, pass, MO_64); 12485 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 12486 } 12487 12488 tcg_temp_free_i64(tcg_op1); 12489 tcg_temp_free_i64(tcg_op2); 12490 } 12491 } else { 12492 for (pass = 0; pass < maxpass; pass++) { 12493 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12494 NeonGenOne64OpFn *genfn; 12495 static NeonGenOne64OpFn * const fns[2][2] = { 12496 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 12497 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 12498 }; 12499 12500 genfn = fns[size][u]; 12501 12502 tcg_res[pass] = tcg_temp_new_i64(); 12503 12504 read_vec_element(s, tcg_op, rn, pass, MO_64); 12505 genfn(tcg_res[pass], tcg_op); 12506 12507 if (accum) { 12508 read_vec_element(s, tcg_op, rd, pass, MO_64); 12509 if (size == 0) { 12510 gen_helper_neon_addl_u16(tcg_res[pass], 12511 tcg_res[pass], tcg_op); 12512 } else { 12513 gen_helper_neon_addl_u32(tcg_res[pass], 12514 tcg_res[pass], tcg_op); 12515 } 12516 } 12517 tcg_temp_free_i64(tcg_op); 12518 } 12519 } 12520 if (!is_q) { 12521 tcg_res[1] = tcg_constant_i64(0); 12522 } 12523 for (pass = 0; pass < 2; pass++) { 12524 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12525 tcg_temp_free_i64(tcg_res[pass]); 12526 } 12527 } 12528 12529 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 12530 { 12531 /* Implement SHLL and SHLL2 */ 12532 int pass; 12533 int part = is_q ? 2 : 0; 12534 TCGv_i64 tcg_res[2]; 12535 12536 for (pass = 0; pass < 2; pass++) { 12537 static NeonGenWidenFn * const widenfns[3] = { 12538 gen_helper_neon_widen_u8, 12539 gen_helper_neon_widen_u16, 12540 tcg_gen_extu_i32_i64, 12541 }; 12542 NeonGenWidenFn *widenfn = widenfns[size]; 12543 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12544 12545 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 12546 tcg_res[pass] = tcg_temp_new_i64(); 12547 widenfn(tcg_res[pass], tcg_op); 12548 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 12549 12550 tcg_temp_free_i32(tcg_op); 12551 } 12552 12553 for (pass = 0; pass < 2; pass++) { 12554 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12555 tcg_temp_free_i64(tcg_res[pass]); 12556 } 12557 } 12558 12559 /* AdvSIMD two reg misc 12560 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 12561 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12562 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 12563 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12564 */ 12565 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 12566 { 12567 int size = extract32(insn, 22, 2); 12568 int opcode = extract32(insn, 12, 5); 12569 bool u = extract32(insn, 29, 1); 12570 bool is_q = extract32(insn, 30, 1); 12571 int rn = extract32(insn, 5, 5); 12572 int rd = extract32(insn, 0, 5); 12573 bool need_fpstatus = false; 12574 bool need_rmode = false; 12575 int rmode = -1; 12576 TCGv_i32 tcg_rmode; 12577 TCGv_ptr tcg_fpstatus; 12578 12579 switch (opcode) { 12580 case 0x0: /* REV64, REV32 */ 12581 case 0x1: /* REV16 */ 12582 handle_rev(s, opcode, u, is_q, size, rn, rd); 12583 return; 12584 case 0x5: /* CNT, NOT, RBIT */ 12585 if (u && size == 0) { 12586 /* NOT */ 12587 break; 12588 } else if (u && size == 1) { 12589 /* RBIT */ 12590 break; 12591 } else if (!u && size == 0) { 12592 /* CNT */ 12593 break; 12594 } 12595 unallocated_encoding(s); 12596 return; 12597 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 12598 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 12599 if (size == 3) { 12600 unallocated_encoding(s); 12601 return; 12602 } 12603 if (!fp_access_check(s)) { 12604 return; 12605 } 12606 12607 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 12608 return; 12609 case 0x4: /* CLS, CLZ */ 12610 if (size == 3) { 12611 unallocated_encoding(s); 12612 return; 12613 } 12614 break; 12615 case 0x2: /* SADDLP, UADDLP */ 12616 case 0x6: /* SADALP, UADALP */ 12617 if (size == 3) { 12618 unallocated_encoding(s); 12619 return; 12620 } 12621 if (!fp_access_check(s)) { 12622 return; 12623 } 12624 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 12625 return; 12626 case 0x13: /* SHLL, SHLL2 */ 12627 if (u == 0 || size == 3) { 12628 unallocated_encoding(s); 12629 return; 12630 } 12631 if (!fp_access_check(s)) { 12632 return; 12633 } 12634 handle_shll(s, is_q, size, rn, rd); 12635 return; 12636 case 0xa: /* CMLT */ 12637 if (u == 1) { 12638 unallocated_encoding(s); 12639 return; 12640 } 12641 /* fall through */ 12642 case 0x8: /* CMGT, CMGE */ 12643 case 0x9: /* CMEQ, CMLE */ 12644 case 0xb: /* ABS, NEG */ 12645 if (size == 3 && !is_q) { 12646 unallocated_encoding(s); 12647 return; 12648 } 12649 break; 12650 case 0x3: /* SUQADD, USQADD */ 12651 if (size == 3 && !is_q) { 12652 unallocated_encoding(s); 12653 return; 12654 } 12655 if (!fp_access_check(s)) { 12656 return; 12657 } 12658 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 12659 return; 12660 case 0x7: /* SQABS, SQNEG */ 12661 if (size == 3 && !is_q) { 12662 unallocated_encoding(s); 12663 return; 12664 } 12665 break; 12666 case 0xc ... 0xf: 12667 case 0x16 ... 0x1f: 12668 { 12669 /* Floating point: U, size[1] and opcode indicate operation; 12670 * size[0] indicates single or double precision. 12671 */ 12672 int is_double = extract32(size, 0, 1); 12673 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 12674 size = is_double ? 3 : 2; 12675 switch (opcode) { 12676 case 0x2f: /* FABS */ 12677 case 0x6f: /* FNEG */ 12678 if (size == 3 && !is_q) { 12679 unallocated_encoding(s); 12680 return; 12681 } 12682 break; 12683 case 0x1d: /* SCVTF */ 12684 case 0x5d: /* UCVTF */ 12685 { 12686 bool is_signed = (opcode == 0x1d) ? true : false; 12687 int elements = is_double ? 2 : is_q ? 4 : 2; 12688 if (is_double && !is_q) { 12689 unallocated_encoding(s); 12690 return; 12691 } 12692 if (!fp_access_check(s)) { 12693 return; 12694 } 12695 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 12696 return; 12697 } 12698 case 0x2c: /* FCMGT (zero) */ 12699 case 0x2d: /* FCMEQ (zero) */ 12700 case 0x2e: /* FCMLT (zero) */ 12701 case 0x6c: /* FCMGE (zero) */ 12702 case 0x6d: /* FCMLE (zero) */ 12703 if (size == 3 && !is_q) { 12704 unallocated_encoding(s); 12705 return; 12706 } 12707 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 12708 return; 12709 case 0x7f: /* FSQRT */ 12710 if (size == 3 && !is_q) { 12711 unallocated_encoding(s); 12712 return; 12713 } 12714 break; 12715 case 0x1a: /* FCVTNS */ 12716 case 0x1b: /* FCVTMS */ 12717 case 0x3a: /* FCVTPS */ 12718 case 0x3b: /* FCVTZS */ 12719 case 0x5a: /* FCVTNU */ 12720 case 0x5b: /* FCVTMU */ 12721 case 0x7a: /* FCVTPU */ 12722 case 0x7b: /* FCVTZU */ 12723 need_fpstatus = true; 12724 need_rmode = true; 12725 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12726 if (size == 3 && !is_q) { 12727 unallocated_encoding(s); 12728 return; 12729 } 12730 break; 12731 case 0x5c: /* FCVTAU */ 12732 case 0x1c: /* FCVTAS */ 12733 need_fpstatus = true; 12734 need_rmode = true; 12735 rmode = FPROUNDING_TIEAWAY; 12736 if (size == 3 && !is_q) { 12737 unallocated_encoding(s); 12738 return; 12739 } 12740 break; 12741 case 0x3c: /* URECPE */ 12742 if (size == 3) { 12743 unallocated_encoding(s); 12744 return; 12745 } 12746 /* fall through */ 12747 case 0x3d: /* FRECPE */ 12748 case 0x7d: /* FRSQRTE */ 12749 if (size == 3 && !is_q) { 12750 unallocated_encoding(s); 12751 return; 12752 } 12753 if (!fp_access_check(s)) { 12754 return; 12755 } 12756 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 12757 return; 12758 case 0x56: /* FCVTXN, FCVTXN2 */ 12759 if (size == 2) { 12760 unallocated_encoding(s); 12761 return; 12762 } 12763 /* fall through */ 12764 case 0x16: /* FCVTN, FCVTN2 */ 12765 /* handle_2misc_narrow does a 2*size -> size operation, but these 12766 * instructions encode the source size rather than dest size. 12767 */ 12768 if (!fp_access_check(s)) { 12769 return; 12770 } 12771 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12772 return; 12773 case 0x36: /* BFCVTN, BFCVTN2 */ 12774 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 12775 unallocated_encoding(s); 12776 return; 12777 } 12778 if (!fp_access_check(s)) { 12779 return; 12780 } 12781 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12782 return; 12783 case 0x17: /* FCVTL, FCVTL2 */ 12784 if (!fp_access_check(s)) { 12785 return; 12786 } 12787 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 12788 return; 12789 case 0x18: /* FRINTN */ 12790 case 0x19: /* FRINTM */ 12791 case 0x38: /* FRINTP */ 12792 case 0x39: /* FRINTZ */ 12793 need_rmode = true; 12794 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12795 /* fall through */ 12796 case 0x59: /* FRINTX */ 12797 case 0x79: /* FRINTI */ 12798 need_fpstatus = true; 12799 if (size == 3 && !is_q) { 12800 unallocated_encoding(s); 12801 return; 12802 } 12803 break; 12804 case 0x58: /* FRINTA */ 12805 need_rmode = true; 12806 rmode = FPROUNDING_TIEAWAY; 12807 need_fpstatus = true; 12808 if (size == 3 && !is_q) { 12809 unallocated_encoding(s); 12810 return; 12811 } 12812 break; 12813 case 0x7c: /* URSQRTE */ 12814 if (size == 3) { 12815 unallocated_encoding(s); 12816 return; 12817 } 12818 break; 12819 case 0x1e: /* FRINT32Z */ 12820 case 0x1f: /* FRINT64Z */ 12821 need_rmode = true; 12822 rmode = FPROUNDING_ZERO; 12823 /* fall through */ 12824 case 0x5e: /* FRINT32X */ 12825 case 0x5f: /* FRINT64X */ 12826 need_fpstatus = true; 12827 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12828 unallocated_encoding(s); 12829 return; 12830 } 12831 break; 12832 default: 12833 unallocated_encoding(s); 12834 return; 12835 } 12836 break; 12837 } 12838 default: 12839 unallocated_encoding(s); 12840 return; 12841 } 12842 12843 if (!fp_access_check(s)) { 12844 return; 12845 } 12846 12847 if (need_fpstatus || need_rmode) { 12848 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12849 } else { 12850 tcg_fpstatus = NULL; 12851 } 12852 if (need_rmode) { 12853 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 12854 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 12855 } else { 12856 tcg_rmode = NULL; 12857 } 12858 12859 switch (opcode) { 12860 case 0x5: 12861 if (u && size == 0) { /* NOT */ 12862 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12863 return; 12864 } 12865 break; 12866 case 0x8: /* CMGT, CMGE */ 12867 if (u) { 12868 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12869 } else { 12870 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12871 } 12872 return; 12873 case 0x9: /* CMEQ, CMLE */ 12874 if (u) { 12875 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12876 } else { 12877 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12878 } 12879 return; 12880 case 0xa: /* CMLT */ 12881 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12882 return; 12883 case 0xb: 12884 if (u) { /* ABS, NEG */ 12885 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12886 } else { 12887 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12888 } 12889 return; 12890 } 12891 12892 if (size == 3) { 12893 /* All 64-bit element operations can be shared with scalar 2misc */ 12894 int pass; 12895 12896 /* Coverity claims (size == 3 && !is_q) has been eliminated 12897 * from all paths leading to here. 12898 */ 12899 tcg_debug_assert(is_q); 12900 for (pass = 0; pass < 2; pass++) { 12901 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12902 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12903 12904 read_vec_element(s, tcg_op, rn, pass, MO_64); 12905 12906 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12907 tcg_rmode, tcg_fpstatus); 12908 12909 write_vec_element(s, tcg_res, rd, pass, MO_64); 12910 12911 tcg_temp_free_i64(tcg_res); 12912 tcg_temp_free_i64(tcg_op); 12913 } 12914 } else { 12915 int pass; 12916 12917 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12918 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12919 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12920 12921 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12922 12923 if (size == 2) { 12924 /* Special cases for 32 bit elements */ 12925 switch (opcode) { 12926 case 0x4: /* CLS */ 12927 if (u) { 12928 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12929 } else { 12930 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12931 } 12932 break; 12933 case 0x7: /* SQABS, SQNEG */ 12934 if (u) { 12935 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); 12936 } else { 12937 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); 12938 } 12939 break; 12940 case 0x2f: /* FABS */ 12941 gen_helper_vfp_abss(tcg_res, tcg_op); 12942 break; 12943 case 0x6f: /* FNEG */ 12944 gen_helper_vfp_negs(tcg_res, tcg_op); 12945 break; 12946 case 0x7f: /* FSQRT */ 12947 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 12948 break; 12949 case 0x1a: /* FCVTNS */ 12950 case 0x1b: /* FCVTMS */ 12951 case 0x1c: /* FCVTAS */ 12952 case 0x3a: /* FCVTPS */ 12953 case 0x3b: /* FCVTZS */ 12954 gen_helper_vfp_tosls(tcg_res, tcg_op, 12955 tcg_constant_i32(0), tcg_fpstatus); 12956 break; 12957 case 0x5a: /* FCVTNU */ 12958 case 0x5b: /* FCVTMU */ 12959 case 0x5c: /* FCVTAU */ 12960 case 0x7a: /* FCVTPU */ 12961 case 0x7b: /* FCVTZU */ 12962 gen_helper_vfp_touls(tcg_res, tcg_op, 12963 tcg_constant_i32(0), tcg_fpstatus); 12964 break; 12965 case 0x18: /* FRINTN */ 12966 case 0x19: /* FRINTM */ 12967 case 0x38: /* FRINTP */ 12968 case 0x39: /* FRINTZ */ 12969 case 0x58: /* FRINTA */ 12970 case 0x79: /* FRINTI */ 12971 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12972 break; 12973 case 0x59: /* FRINTX */ 12974 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12975 break; 12976 case 0x7c: /* URSQRTE */ 12977 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12978 break; 12979 case 0x1e: /* FRINT32Z */ 12980 case 0x5e: /* FRINT32X */ 12981 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12982 break; 12983 case 0x1f: /* FRINT64Z */ 12984 case 0x5f: /* FRINT64X */ 12985 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12986 break; 12987 default: 12988 g_assert_not_reached(); 12989 } 12990 } else { 12991 /* Use helpers for 8 and 16 bit elements */ 12992 switch (opcode) { 12993 case 0x5: /* CNT, RBIT */ 12994 /* For these two insns size is part of the opcode specifier 12995 * (handled earlier); they always operate on byte elements. 12996 */ 12997 if (u) { 12998 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12999 } else { 13000 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 13001 } 13002 break; 13003 case 0x7: /* SQABS, SQNEG */ 13004 { 13005 NeonGenOneOpEnvFn *genfn; 13006 static NeonGenOneOpEnvFn * const fns[2][2] = { 13007 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 13008 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 13009 }; 13010 genfn = fns[size][u]; 13011 genfn(tcg_res, cpu_env, tcg_op); 13012 break; 13013 } 13014 case 0x4: /* CLS, CLZ */ 13015 if (u) { 13016 if (size == 0) { 13017 gen_helper_neon_clz_u8(tcg_res, tcg_op); 13018 } else { 13019 gen_helper_neon_clz_u16(tcg_res, tcg_op); 13020 } 13021 } else { 13022 if (size == 0) { 13023 gen_helper_neon_cls_s8(tcg_res, tcg_op); 13024 } else { 13025 gen_helper_neon_cls_s16(tcg_res, tcg_op); 13026 } 13027 } 13028 break; 13029 default: 13030 g_assert_not_reached(); 13031 } 13032 } 13033 13034 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13035 13036 tcg_temp_free_i32(tcg_res); 13037 tcg_temp_free_i32(tcg_op); 13038 } 13039 } 13040 clear_vec_high(s, is_q, rd); 13041 13042 if (need_rmode) { 13043 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13044 tcg_temp_free_i32(tcg_rmode); 13045 } 13046 if (need_fpstatus) { 13047 tcg_temp_free_ptr(tcg_fpstatus); 13048 } 13049 } 13050 13051 /* AdvSIMD [scalar] two register miscellaneous (FP16) 13052 * 13053 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 13054 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 13055 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 13056 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 13057 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 13058 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 13059 * 13060 * This actually covers two groups where scalar access is governed by 13061 * bit 28. A bunch of the instructions (float to integral) only exist 13062 * in the vector form and are un-allocated for the scalar decode. Also 13063 * in the scalar decode Q is always 1. 13064 */ 13065 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 13066 { 13067 int fpop, opcode, a, u; 13068 int rn, rd; 13069 bool is_q; 13070 bool is_scalar; 13071 bool only_in_vector = false; 13072 13073 int pass; 13074 TCGv_i32 tcg_rmode = NULL; 13075 TCGv_ptr tcg_fpstatus = NULL; 13076 bool need_rmode = false; 13077 bool need_fpst = true; 13078 int rmode; 13079 13080 if (!dc_isar_feature(aa64_fp16, s)) { 13081 unallocated_encoding(s); 13082 return; 13083 } 13084 13085 rd = extract32(insn, 0, 5); 13086 rn = extract32(insn, 5, 5); 13087 13088 a = extract32(insn, 23, 1); 13089 u = extract32(insn, 29, 1); 13090 is_scalar = extract32(insn, 28, 1); 13091 is_q = extract32(insn, 30, 1); 13092 13093 opcode = extract32(insn, 12, 5); 13094 fpop = deposit32(opcode, 5, 1, a); 13095 fpop = deposit32(fpop, 6, 1, u); 13096 13097 switch (fpop) { 13098 case 0x1d: /* SCVTF */ 13099 case 0x5d: /* UCVTF */ 13100 { 13101 int elements; 13102 13103 if (is_scalar) { 13104 elements = 1; 13105 } else { 13106 elements = (is_q ? 8 : 4); 13107 } 13108 13109 if (!fp_access_check(s)) { 13110 return; 13111 } 13112 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 13113 return; 13114 } 13115 break; 13116 case 0x2c: /* FCMGT (zero) */ 13117 case 0x2d: /* FCMEQ (zero) */ 13118 case 0x2e: /* FCMLT (zero) */ 13119 case 0x6c: /* FCMGE (zero) */ 13120 case 0x6d: /* FCMLE (zero) */ 13121 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 13122 return; 13123 case 0x3d: /* FRECPE */ 13124 case 0x3f: /* FRECPX */ 13125 break; 13126 case 0x18: /* FRINTN */ 13127 need_rmode = true; 13128 only_in_vector = true; 13129 rmode = FPROUNDING_TIEEVEN; 13130 break; 13131 case 0x19: /* FRINTM */ 13132 need_rmode = true; 13133 only_in_vector = true; 13134 rmode = FPROUNDING_NEGINF; 13135 break; 13136 case 0x38: /* FRINTP */ 13137 need_rmode = true; 13138 only_in_vector = true; 13139 rmode = FPROUNDING_POSINF; 13140 break; 13141 case 0x39: /* FRINTZ */ 13142 need_rmode = true; 13143 only_in_vector = true; 13144 rmode = FPROUNDING_ZERO; 13145 break; 13146 case 0x58: /* FRINTA */ 13147 need_rmode = true; 13148 only_in_vector = true; 13149 rmode = FPROUNDING_TIEAWAY; 13150 break; 13151 case 0x59: /* FRINTX */ 13152 case 0x79: /* FRINTI */ 13153 only_in_vector = true; 13154 /* current rounding mode */ 13155 break; 13156 case 0x1a: /* FCVTNS */ 13157 need_rmode = true; 13158 rmode = FPROUNDING_TIEEVEN; 13159 break; 13160 case 0x1b: /* FCVTMS */ 13161 need_rmode = true; 13162 rmode = FPROUNDING_NEGINF; 13163 break; 13164 case 0x1c: /* FCVTAS */ 13165 need_rmode = true; 13166 rmode = FPROUNDING_TIEAWAY; 13167 break; 13168 case 0x3a: /* FCVTPS */ 13169 need_rmode = true; 13170 rmode = FPROUNDING_POSINF; 13171 break; 13172 case 0x3b: /* FCVTZS */ 13173 need_rmode = true; 13174 rmode = FPROUNDING_ZERO; 13175 break; 13176 case 0x5a: /* FCVTNU */ 13177 need_rmode = true; 13178 rmode = FPROUNDING_TIEEVEN; 13179 break; 13180 case 0x5b: /* FCVTMU */ 13181 need_rmode = true; 13182 rmode = FPROUNDING_NEGINF; 13183 break; 13184 case 0x5c: /* FCVTAU */ 13185 need_rmode = true; 13186 rmode = FPROUNDING_TIEAWAY; 13187 break; 13188 case 0x7a: /* FCVTPU */ 13189 need_rmode = true; 13190 rmode = FPROUNDING_POSINF; 13191 break; 13192 case 0x7b: /* FCVTZU */ 13193 need_rmode = true; 13194 rmode = FPROUNDING_ZERO; 13195 break; 13196 case 0x2f: /* FABS */ 13197 case 0x6f: /* FNEG */ 13198 need_fpst = false; 13199 break; 13200 case 0x7d: /* FRSQRTE */ 13201 case 0x7f: /* FSQRT (vector) */ 13202 break; 13203 default: 13204 unallocated_encoding(s); 13205 return; 13206 } 13207 13208 13209 /* Check additional constraints for the scalar encoding */ 13210 if (is_scalar) { 13211 if (!is_q) { 13212 unallocated_encoding(s); 13213 return; 13214 } 13215 /* FRINTxx is only in the vector form */ 13216 if (only_in_vector) { 13217 unallocated_encoding(s); 13218 return; 13219 } 13220 } 13221 13222 if (!fp_access_check(s)) { 13223 return; 13224 } 13225 13226 if (need_rmode || need_fpst) { 13227 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 13228 } 13229 13230 if (need_rmode) { 13231 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 13232 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13233 } 13234 13235 if (is_scalar) { 13236 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 13237 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13238 13239 switch (fpop) { 13240 case 0x1a: /* FCVTNS */ 13241 case 0x1b: /* FCVTMS */ 13242 case 0x1c: /* FCVTAS */ 13243 case 0x3a: /* FCVTPS */ 13244 case 0x3b: /* FCVTZS */ 13245 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 13246 break; 13247 case 0x3d: /* FRECPE */ 13248 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 13249 break; 13250 case 0x3f: /* FRECPX */ 13251 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 13252 break; 13253 case 0x5a: /* FCVTNU */ 13254 case 0x5b: /* FCVTMU */ 13255 case 0x5c: /* FCVTAU */ 13256 case 0x7a: /* FCVTPU */ 13257 case 0x7b: /* FCVTZU */ 13258 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 13259 break; 13260 case 0x6f: /* FNEG */ 13261 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 13262 break; 13263 case 0x7d: /* FRSQRTE */ 13264 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 13265 break; 13266 default: 13267 g_assert_not_reached(); 13268 } 13269 13270 /* limit any sign extension going on */ 13271 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 13272 write_fp_sreg(s, rd, tcg_res); 13273 13274 tcg_temp_free_i32(tcg_res); 13275 tcg_temp_free_i32(tcg_op); 13276 } else { 13277 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 13278 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13279 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13280 13281 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 13282 13283 switch (fpop) { 13284 case 0x1a: /* FCVTNS */ 13285 case 0x1b: /* FCVTMS */ 13286 case 0x1c: /* FCVTAS */ 13287 case 0x3a: /* FCVTPS */ 13288 case 0x3b: /* FCVTZS */ 13289 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 13290 break; 13291 case 0x3d: /* FRECPE */ 13292 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 13293 break; 13294 case 0x5a: /* FCVTNU */ 13295 case 0x5b: /* FCVTMU */ 13296 case 0x5c: /* FCVTAU */ 13297 case 0x7a: /* FCVTPU */ 13298 case 0x7b: /* FCVTZU */ 13299 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 13300 break; 13301 case 0x18: /* FRINTN */ 13302 case 0x19: /* FRINTM */ 13303 case 0x38: /* FRINTP */ 13304 case 0x39: /* FRINTZ */ 13305 case 0x58: /* FRINTA */ 13306 case 0x79: /* FRINTI */ 13307 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 13308 break; 13309 case 0x59: /* FRINTX */ 13310 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 13311 break; 13312 case 0x2f: /* FABS */ 13313 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 13314 break; 13315 case 0x6f: /* FNEG */ 13316 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 13317 break; 13318 case 0x7d: /* FRSQRTE */ 13319 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 13320 break; 13321 case 0x7f: /* FSQRT */ 13322 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 13323 break; 13324 default: 13325 g_assert_not_reached(); 13326 } 13327 13328 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 13329 13330 tcg_temp_free_i32(tcg_res); 13331 tcg_temp_free_i32(tcg_op); 13332 } 13333 13334 clear_vec_high(s, is_q, rd); 13335 } 13336 13337 if (tcg_rmode) { 13338 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13339 tcg_temp_free_i32(tcg_rmode); 13340 } 13341 13342 if (tcg_fpstatus) { 13343 tcg_temp_free_ptr(tcg_fpstatus); 13344 } 13345 } 13346 13347 /* AdvSIMD scalar x indexed element 13348 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 13349 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 13350 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 13351 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 13352 * AdvSIMD vector x indexed element 13353 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 13354 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 13355 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 13356 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 13357 */ 13358 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 13359 { 13360 /* This encoding has two kinds of instruction: 13361 * normal, where we perform elt x idxelt => elt for each 13362 * element in the vector 13363 * long, where we perform elt x idxelt and generate a result of 13364 * double the width of the input element 13365 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 13366 */ 13367 bool is_scalar = extract32(insn, 28, 1); 13368 bool is_q = extract32(insn, 30, 1); 13369 bool u = extract32(insn, 29, 1); 13370 int size = extract32(insn, 22, 2); 13371 int l = extract32(insn, 21, 1); 13372 int m = extract32(insn, 20, 1); 13373 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 13374 int rm = extract32(insn, 16, 4); 13375 int opcode = extract32(insn, 12, 4); 13376 int h = extract32(insn, 11, 1); 13377 int rn = extract32(insn, 5, 5); 13378 int rd = extract32(insn, 0, 5); 13379 bool is_long = false; 13380 int is_fp = 0; 13381 bool is_fp16 = false; 13382 int index; 13383 TCGv_ptr fpst; 13384 13385 switch (16 * u + opcode) { 13386 case 0x08: /* MUL */ 13387 case 0x10: /* MLA */ 13388 case 0x14: /* MLS */ 13389 if (is_scalar) { 13390 unallocated_encoding(s); 13391 return; 13392 } 13393 break; 13394 case 0x02: /* SMLAL, SMLAL2 */ 13395 case 0x12: /* UMLAL, UMLAL2 */ 13396 case 0x06: /* SMLSL, SMLSL2 */ 13397 case 0x16: /* UMLSL, UMLSL2 */ 13398 case 0x0a: /* SMULL, SMULL2 */ 13399 case 0x1a: /* UMULL, UMULL2 */ 13400 if (is_scalar) { 13401 unallocated_encoding(s); 13402 return; 13403 } 13404 is_long = true; 13405 break; 13406 case 0x03: /* SQDMLAL, SQDMLAL2 */ 13407 case 0x07: /* SQDMLSL, SQDMLSL2 */ 13408 case 0x0b: /* SQDMULL, SQDMULL2 */ 13409 is_long = true; 13410 break; 13411 case 0x0c: /* SQDMULH */ 13412 case 0x0d: /* SQRDMULH */ 13413 break; 13414 case 0x01: /* FMLA */ 13415 case 0x05: /* FMLS */ 13416 case 0x09: /* FMUL */ 13417 case 0x19: /* FMULX */ 13418 is_fp = 1; 13419 break; 13420 case 0x1d: /* SQRDMLAH */ 13421 case 0x1f: /* SQRDMLSH */ 13422 if (!dc_isar_feature(aa64_rdm, s)) { 13423 unallocated_encoding(s); 13424 return; 13425 } 13426 break; 13427 case 0x0e: /* SDOT */ 13428 case 0x1e: /* UDOT */ 13429 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 13430 unallocated_encoding(s); 13431 return; 13432 } 13433 break; 13434 case 0x0f: 13435 switch (size) { 13436 case 0: /* SUDOT */ 13437 case 2: /* USDOT */ 13438 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 13439 unallocated_encoding(s); 13440 return; 13441 } 13442 size = MO_32; 13443 break; 13444 case 1: /* BFDOT */ 13445 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 13446 unallocated_encoding(s); 13447 return; 13448 } 13449 size = MO_32; 13450 break; 13451 case 3: /* BFMLAL{B,T} */ 13452 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 13453 unallocated_encoding(s); 13454 return; 13455 } 13456 /* can't set is_fp without other incorrect size checks */ 13457 size = MO_16; 13458 break; 13459 default: 13460 unallocated_encoding(s); 13461 return; 13462 } 13463 break; 13464 case 0x11: /* FCMLA #0 */ 13465 case 0x13: /* FCMLA #90 */ 13466 case 0x15: /* FCMLA #180 */ 13467 case 0x17: /* FCMLA #270 */ 13468 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 13469 unallocated_encoding(s); 13470 return; 13471 } 13472 is_fp = 2; 13473 break; 13474 case 0x00: /* FMLAL */ 13475 case 0x04: /* FMLSL */ 13476 case 0x18: /* FMLAL2 */ 13477 case 0x1c: /* FMLSL2 */ 13478 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 13479 unallocated_encoding(s); 13480 return; 13481 } 13482 size = MO_16; 13483 /* is_fp, but we pass cpu_env not fp_status. */ 13484 break; 13485 default: 13486 unallocated_encoding(s); 13487 return; 13488 } 13489 13490 switch (is_fp) { 13491 case 1: /* normal fp */ 13492 /* convert insn encoded size to MemOp size */ 13493 switch (size) { 13494 case 0: /* half-precision */ 13495 size = MO_16; 13496 is_fp16 = true; 13497 break; 13498 case MO_32: /* single precision */ 13499 case MO_64: /* double precision */ 13500 break; 13501 default: 13502 unallocated_encoding(s); 13503 return; 13504 } 13505 break; 13506 13507 case 2: /* complex fp */ 13508 /* Each indexable element is a complex pair. */ 13509 size += 1; 13510 switch (size) { 13511 case MO_32: 13512 if (h && !is_q) { 13513 unallocated_encoding(s); 13514 return; 13515 } 13516 is_fp16 = true; 13517 break; 13518 case MO_64: 13519 break; 13520 default: 13521 unallocated_encoding(s); 13522 return; 13523 } 13524 break; 13525 13526 default: /* integer */ 13527 switch (size) { 13528 case MO_8: 13529 case MO_64: 13530 unallocated_encoding(s); 13531 return; 13532 } 13533 break; 13534 } 13535 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 13536 unallocated_encoding(s); 13537 return; 13538 } 13539 13540 /* Given MemOp size, adjust register and indexing. */ 13541 switch (size) { 13542 case MO_16: 13543 index = h << 2 | l << 1 | m; 13544 break; 13545 case MO_32: 13546 index = h << 1 | l; 13547 rm |= m << 4; 13548 break; 13549 case MO_64: 13550 if (l || !is_q) { 13551 unallocated_encoding(s); 13552 return; 13553 } 13554 index = h; 13555 rm |= m << 4; 13556 break; 13557 default: 13558 g_assert_not_reached(); 13559 } 13560 13561 if (!fp_access_check(s)) { 13562 return; 13563 } 13564 13565 if (is_fp) { 13566 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 13567 } else { 13568 fpst = NULL; 13569 } 13570 13571 switch (16 * u + opcode) { 13572 case 0x0e: /* SDOT */ 13573 case 0x1e: /* UDOT */ 13574 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13575 u ? gen_helper_gvec_udot_idx_b 13576 : gen_helper_gvec_sdot_idx_b); 13577 return; 13578 case 0x0f: 13579 switch (extract32(insn, 22, 2)) { 13580 case 0: /* SUDOT */ 13581 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13582 gen_helper_gvec_sudot_idx_b); 13583 return; 13584 case 1: /* BFDOT */ 13585 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13586 gen_helper_gvec_bfdot_idx); 13587 return; 13588 case 2: /* USDOT */ 13589 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13590 gen_helper_gvec_usdot_idx_b); 13591 return; 13592 case 3: /* BFMLAL{B,T} */ 13593 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 13594 gen_helper_gvec_bfmlal_idx); 13595 return; 13596 } 13597 g_assert_not_reached(); 13598 case 0x11: /* FCMLA #0 */ 13599 case 0x13: /* FCMLA #90 */ 13600 case 0x15: /* FCMLA #180 */ 13601 case 0x17: /* FCMLA #270 */ 13602 { 13603 int rot = extract32(insn, 13, 2); 13604 int data = (index << 2) | rot; 13605 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 13606 vec_full_reg_offset(s, rn), 13607 vec_full_reg_offset(s, rm), 13608 vec_full_reg_offset(s, rd), fpst, 13609 is_q ? 16 : 8, vec_full_reg_size(s), data, 13610 size == MO_64 13611 ? gen_helper_gvec_fcmlas_idx 13612 : gen_helper_gvec_fcmlah_idx); 13613 tcg_temp_free_ptr(fpst); 13614 } 13615 return; 13616 13617 case 0x00: /* FMLAL */ 13618 case 0x04: /* FMLSL */ 13619 case 0x18: /* FMLAL2 */ 13620 case 0x1c: /* FMLSL2 */ 13621 { 13622 int is_s = extract32(opcode, 2, 1); 13623 int is_2 = u; 13624 int data = (index << 2) | (is_2 << 1) | is_s; 13625 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 13626 vec_full_reg_offset(s, rn), 13627 vec_full_reg_offset(s, rm), cpu_env, 13628 is_q ? 16 : 8, vec_full_reg_size(s), 13629 data, gen_helper_gvec_fmlal_idx_a64); 13630 } 13631 return; 13632 13633 case 0x08: /* MUL */ 13634 if (!is_long && !is_scalar) { 13635 static gen_helper_gvec_3 * const fns[3] = { 13636 gen_helper_gvec_mul_idx_h, 13637 gen_helper_gvec_mul_idx_s, 13638 gen_helper_gvec_mul_idx_d, 13639 }; 13640 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 13641 vec_full_reg_offset(s, rn), 13642 vec_full_reg_offset(s, rm), 13643 is_q ? 16 : 8, vec_full_reg_size(s), 13644 index, fns[size - 1]); 13645 return; 13646 } 13647 break; 13648 13649 case 0x10: /* MLA */ 13650 if (!is_long && !is_scalar) { 13651 static gen_helper_gvec_4 * const fns[3] = { 13652 gen_helper_gvec_mla_idx_h, 13653 gen_helper_gvec_mla_idx_s, 13654 gen_helper_gvec_mla_idx_d, 13655 }; 13656 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13657 vec_full_reg_offset(s, rn), 13658 vec_full_reg_offset(s, rm), 13659 vec_full_reg_offset(s, rd), 13660 is_q ? 16 : 8, vec_full_reg_size(s), 13661 index, fns[size - 1]); 13662 return; 13663 } 13664 break; 13665 13666 case 0x14: /* MLS */ 13667 if (!is_long && !is_scalar) { 13668 static gen_helper_gvec_4 * const fns[3] = { 13669 gen_helper_gvec_mls_idx_h, 13670 gen_helper_gvec_mls_idx_s, 13671 gen_helper_gvec_mls_idx_d, 13672 }; 13673 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13674 vec_full_reg_offset(s, rn), 13675 vec_full_reg_offset(s, rm), 13676 vec_full_reg_offset(s, rd), 13677 is_q ? 16 : 8, vec_full_reg_size(s), 13678 index, fns[size - 1]); 13679 return; 13680 } 13681 break; 13682 } 13683 13684 if (size == 3) { 13685 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13686 int pass; 13687 13688 assert(is_fp && is_q && !is_long); 13689 13690 read_vec_element(s, tcg_idx, rm, index, MO_64); 13691 13692 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13693 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13694 TCGv_i64 tcg_res = tcg_temp_new_i64(); 13695 13696 read_vec_element(s, tcg_op, rn, pass, MO_64); 13697 13698 switch (16 * u + opcode) { 13699 case 0x05: /* FMLS */ 13700 /* As usual for ARM, separate negation for fused multiply-add */ 13701 gen_helper_vfp_negd(tcg_op, tcg_op); 13702 /* fall through */ 13703 case 0x01: /* FMLA */ 13704 read_vec_element(s, tcg_res, rd, pass, MO_64); 13705 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 13706 break; 13707 case 0x09: /* FMUL */ 13708 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 13709 break; 13710 case 0x19: /* FMULX */ 13711 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 13712 break; 13713 default: 13714 g_assert_not_reached(); 13715 } 13716 13717 write_vec_element(s, tcg_res, rd, pass, MO_64); 13718 tcg_temp_free_i64(tcg_op); 13719 tcg_temp_free_i64(tcg_res); 13720 } 13721 13722 tcg_temp_free_i64(tcg_idx); 13723 clear_vec_high(s, !is_scalar, rd); 13724 } else if (!is_long) { 13725 /* 32 bit floating point, or 16 or 32 bit integer. 13726 * For the 16 bit scalar case we use the usual Neon helpers and 13727 * rely on the fact that 0 op 0 == 0 with no side effects. 13728 */ 13729 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13730 int pass, maxpasses; 13731 13732 if (is_scalar) { 13733 maxpasses = 1; 13734 } else { 13735 maxpasses = is_q ? 4 : 2; 13736 } 13737 13738 read_vec_element_i32(s, tcg_idx, rm, index, size); 13739 13740 if (size == 1 && !is_scalar) { 13741 /* The simplest way to handle the 16x16 indexed ops is to duplicate 13742 * the index into both halves of the 32 bit tcg_idx and then use 13743 * the usual Neon helpers. 13744 */ 13745 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13746 } 13747 13748 for (pass = 0; pass < maxpasses; pass++) { 13749 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13750 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13751 13752 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 13753 13754 switch (16 * u + opcode) { 13755 case 0x08: /* MUL */ 13756 case 0x10: /* MLA */ 13757 case 0x14: /* MLS */ 13758 { 13759 static NeonGenTwoOpFn * const fns[2][2] = { 13760 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 13761 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 13762 }; 13763 NeonGenTwoOpFn *genfn; 13764 bool is_sub = opcode == 0x4; 13765 13766 if (size == 1) { 13767 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 13768 } else { 13769 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 13770 } 13771 if (opcode == 0x8) { 13772 break; 13773 } 13774 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 13775 genfn = fns[size - 1][is_sub]; 13776 genfn(tcg_res, tcg_op, tcg_res); 13777 break; 13778 } 13779 case 0x05: /* FMLS */ 13780 case 0x01: /* FMLA */ 13781 read_vec_element_i32(s, tcg_res, rd, pass, 13782 is_scalar ? size : MO_32); 13783 switch (size) { 13784 case 1: 13785 if (opcode == 0x5) { 13786 /* As usual for ARM, separate negation for fused 13787 * multiply-add */ 13788 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 13789 } 13790 if (is_scalar) { 13791 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 13792 tcg_res, fpst); 13793 } else { 13794 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 13795 tcg_res, fpst); 13796 } 13797 break; 13798 case 2: 13799 if (opcode == 0x5) { 13800 /* As usual for ARM, separate negation for 13801 * fused multiply-add */ 13802 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 13803 } 13804 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 13805 tcg_res, fpst); 13806 break; 13807 default: 13808 g_assert_not_reached(); 13809 } 13810 break; 13811 case 0x09: /* FMUL */ 13812 switch (size) { 13813 case 1: 13814 if (is_scalar) { 13815 gen_helper_advsimd_mulh(tcg_res, tcg_op, 13816 tcg_idx, fpst); 13817 } else { 13818 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 13819 tcg_idx, fpst); 13820 } 13821 break; 13822 case 2: 13823 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 13824 break; 13825 default: 13826 g_assert_not_reached(); 13827 } 13828 break; 13829 case 0x19: /* FMULX */ 13830 switch (size) { 13831 case 1: 13832 if (is_scalar) { 13833 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 13834 tcg_idx, fpst); 13835 } else { 13836 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 13837 tcg_idx, fpst); 13838 } 13839 break; 13840 case 2: 13841 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 13842 break; 13843 default: 13844 g_assert_not_reached(); 13845 } 13846 break; 13847 case 0x0c: /* SQDMULH */ 13848 if (size == 1) { 13849 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, 13850 tcg_op, tcg_idx); 13851 } else { 13852 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, 13853 tcg_op, tcg_idx); 13854 } 13855 break; 13856 case 0x0d: /* SQRDMULH */ 13857 if (size == 1) { 13858 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, 13859 tcg_op, tcg_idx); 13860 } else { 13861 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, 13862 tcg_op, tcg_idx); 13863 } 13864 break; 13865 case 0x1d: /* SQRDMLAH */ 13866 read_vec_element_i32(s, tcg_res, rd, pass, 13867 is_scalar ? size : MO_32); 13868 if (size == 1) { 13869 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env, 13870 tcg_op, tcg_idx, tcg_res); 13871 } else { 13872 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env, 13873 tcg_op, tcg_idx, tcg_res); 13874 } 13875 break; 13876 case 0x1f: /* SQRDMLSH */ 13877 read_vec_element_i32(s, tcg_res, rd, pass, 13878 is_scalar ? size : MO_32); 13879 if (size == 1) { 13880 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env, 13881 tcg_op, tcg_idx, tcg_res); 13882 } else { 13883 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env, 13884 tcg_op, tcg_idx, tcg_res); 13885 } 13886 break; 13887 default: 13888 g_assert_not_reached(); 13889 } 13890 13891 if (is_scalar) { 13892 write_fp_sreg(s, rd, tcg_res); 13893 } else { 13894 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13895 } 13896 13897 tcg_temp_free_i32(tcg_op); 13898 tcg_temp_free_i32(tcg_res); 13899 } 13900 13901 tcg_temp_free_i32(tcg_idx); 13902 clear_vec_high(s, is_q, rd); 13903 } else { 13904 /* long ops: 16x16->32 or 32x32->64 */ 13905 TCGv_i64 tcg_res[2]; 13906 int pass; 13907 bool satop = extract32(opcode, 0, 1); 13908 MemOp memop = MO_32; 13909 13910 if (satop || !u) { 13911 memop |= MO_SIGN; 13912 } 13913 13914 if (size == 2) { 13915 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13916 13917 read_vec_element(s, tcg_idx, rm, index, memop); 13918 13919 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13920 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13921 TCGv_i64 tcg_passres; 13922 int passelt; 13923 13924 if (is_scalar) { 13925 passelt = 0; 13926 } else { 13927 passelt = pass + (is_q * 2); 13928 } 13929 13930 read_vec_element(s, tcg_op, rn, passelt, memop); 13931 13932 tcg_res[pass] = tcg_temp_new_i64(); 13933 13934 if (opcode == 0xa || opcode == 0xb) { 13935 /* Non-accumulating ops */ 13936 tcg_passres = tcg_res[pass]; 13937 } else { 13938 tcg_passres = tcg_temp_new_i64(); 13939 } 13940 13941 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13942 tcg_temp_free_i64(tcg_op); 13943 13944 if (satop) { 13945 /* saturating, doubling */ 13946 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 13947 tcg_passres, tcg_passres); 13948 } 13949 13950 if (opcode == 0xa || opcode == 0xb) { 13951 continue; 13952 } 13953 13954 /* Accumulating op: handle accumulate step */ 13955 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13956 13957 switch (opcode) { 13958 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13959 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13960 break; 13961 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13962 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13963 break; 13964 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13965 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13966 /* fall through */ 13967 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13968 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 13969 tcg_res[pass], 13970 tcg_passres); 13971 break; 13972 default: 13973 g_assert_not_reached(); 13974 } 13975 tcg_temp_free_i64(tcg_passres); 13976 } 13977 tcg_temp_free_i64(tcg_idx); 13978 13979 clear_vec_high(s, !is_scalar, rd); 13980 } else { 13981 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13982 13983 assert(size == 1); 13984 read_vec_element_i32(s, tcg_idx, rm, index, size); 13985 13986 if (!is_scalar) { 13987 /* The simplest way to handle the 16x16 indexed ops is to 13988 * duplicate the index into both halves of the 32 bit tcg_idx 13989 * and then use the usual Neon helpers. 13990 */ 13991 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13992 } 13993 13994 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13995 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13996 TCGv_i64 tcg_passres; 13997 13998 if (is_scalar) { 13999 read_vec_element_i32(s, tcg_op, rn, pass, size); 14000 } else { 14001 read_vec_element_i32(s, tcg_op, rn, 14002 pass + (is_q * 2), MO_32); 14003 } 14004 14005 tcg_res[pass] = tcg_temp_new_i64(); 14006 14007 if (opcode == 0xa || opcode == 0xb) { 14008 /* Non-accumulating ops */ 14009 tcg_passres = tcg_res[pass]; 14010 } else { 14011 tcg_passres = tcg_temp_new_i64(); 14012 } 14013 14014 if (memop & MO_SIGN) { 14015 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 14016 } else { 14017 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 14018 } 14019 if (satop) { 14020 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 14021 tcg_passres, tcg_passres); 14022 } 14023 tcg_temp_free_i32(tcg_op); 14024 14025 if (opcode == 0xa || opcode == 0xb) { 14026 continue; 14027 } 14028 14029 /* Accumulating op: handle accumulate step */ 14030 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 14031 14032 switch (opcode) { 14033 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 14034 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 14035 tcg_passres); 14036 break; 14037 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 14038 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 14039 tcg_passres); 14040 break; 14041 case 0x7: /* SQDMLSL, SQDMLSL2 */ 14042 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 14043 /* fall through */ 14044 case 0x3: /* SQDMLAL, SQDMLAL2 */ 14045 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 14046 tcg_res[pass], 14047 tcg_passres); 14048 break; 14049 default: 14050 g_assert_not_reached(); 14051 } 14052 tcg_temp_free_i64(tcg_passres); 14053 } 14054 tcg_temp_free_i32(tcg_idx); 14055 14056 if (is_scalar) { 14057 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 14058 } 14059 } 14060 14061 if (is_scalar) { 14062 tcg_res[1] = tcg_constant_i64(0); 14063 } 14064 14065 for (pass = 0; pass < 2; pass++) { 14066 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 14067 tcg_temp_free_i64(tcg_res[pass]); 14068 } 14069 } 14070 14071 if (fpst) { 14072 tcg_temp_free_ptr(fpst); 14073 } 14074 } 14075 14076 /* Crypto AES 14077 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 14078 * +-----------------+------+-----------+--------+-----+------+------+ 14079 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 14080 * +-----------------+------+-----------+--------+-----+------+------+ 14081 */ 14082 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 14083 { 14084 int size = extract32(insn, 22, 2); 14085 int opcode = extract32(insn, 12, 5); 14086 int rn = extract32(insn, 5, 5); 14087 int rd = extract32(insn, 0, 5); 14088 int decrypt; 14089 gen_helper_gvec_2 *genfn2 = NULL; 14090 gen_helper_gvec_3 *genfn3 = NULL; 14091 14092 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 14093 unallocated_encoding(s); 14094 return; 14095 } 14096 14097 switch (opcode) { 14098 case 0x4: /* AESE */ 14099 decrypt = 0; 14100 genfn3 = gen_helper_crypto_aese; 14101 break; 14102 case 0x6: /* AESMC */ 14103 decrypt = 0; 14104 genfn2 = gen_helper_crypto_aesmc; 14105 break; 14106 case 0x5: /* AESD */ 14107 decrypt = 1; 14108 genfn3 = gen_helper_crypto_aese; 14109 break; 14110 case 0x7: /* AESIMC */ 14111 decrypt = 1; 14112 genfn2 = gen_helper_crypto_aesmc; 14113 break; 14114 default: 14115 unallocated_encoding(s); 14116 return; 14117 } 14118 14119 if (!fp_access_check(s)) { 14120 return; 14121 } 14122 if (genfn2) { 14123 gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); 14124 } else { 14125 gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); 14126 } 14127 } 14128 14129 /* Crypto three-reg SHA 14130 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 14131 * +-----------------+------+---+------+---+--------+-----+------+------+ 14132 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 14133 * +-----------------+------+---+------+---+--------+-----+------+------+ 14134 */ 14135 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 14136 { 14137 int size = extract32(insn, 22, 2); 14138 int opcode = extract32(insn, 12, 3); 14139 int rm = extract32(insn, 16, 5); 14140 int rn = extract32(insn, 5, 5); 14141 int rd = extract32(insn, 0, 5); 14142 gen_helper_gvec_3 *genfn; 14143 bool feature; 14144 14145 if (size != 0) { 14146 unallocated_encoding(s); 14147 return; 14148 } 14149 14150 switch (opcode) { 14151 case 0: /* SHA1C */ 14152 genfn = gen_helper_crypto_sha1c; 14153 feature = dc_isar_feature(aa64_sha1, s); 14154 break; 14155 case 1: /* SHA1P */ 14156 genfn = gen_helper_crypto_sha1p; 14157 feature = dc_isar_feature(aa64_sha1, s); 14158 break; 14159 case 2: /* SHA1M */ 14160 genfn = gen_helper_crypto_sha1m; 14161 feature = dc_isar_feature(aa64_sha1, s); 14162 break; 14163 case 3: /* SHA1SU0 */ 14164 genfn = gen_helper_crypto_sha1su0; 14165 feature = dc_isar_feature(aa64_sha1, s); 14166 break; 14167 case 4: /* SHA256H */ 14168 genfn = gen_helper_crypto_sha256h; 14169 feature = dc_isar_feature(aa64_sha256, s); 14170 break; 14171 case 5: /* SHA256H2 */ 14172 genfn = gen_helper_crypto_sha256h2; 14173 feature = dc_isar_feature(aa64_sha256, s); 14174 break; 14175 case 6: /* SHA256SU1 */ 14176 genfn = gen_helper_crypto_sha256su1; 14177 feature = dc_isar_feature(aa64_sha256, s); 14178 break; 14179 default: 14180 unallocated_encoding(s); 14181 return; 14182 } 14183 14184 if (!feature) { 14185 unallocated_encoding(s); 14186 return; 14187 } 14188 14189 if (!fp_access_check(s)) { 14190 return; 14191 } 14192 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 14193 } 14194 14195 /* Crypto two-reg SHA 14196 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 14197 * +-----------------+------+-----------+--------+-----+------+------+ 14198 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 14199 * +-----------------+------+-----------+--------+-----+------+------+ 14200 */ 14201 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 14202 { 14203 int size = extract32(insn, 22, 2); 14204 int opcode = extract32(insn, 12, 5); 14205 int rn = extract32(insn, 5, 5); 14206 int rd = extract32(insn, 0, 5); 14207 gen_helper_gvec_2 *genfn; 14208 bool feature; 14209 14210 if (size != 0) { 14211 unallocated_encoding(s); 14212 return; 14213 } 14214 14215 switch (opcode) { 14216 case 0: /* SHA1H */ 14217 feature = dc_isar_feature(aa64_sha1, s); 14218 genfn = gen_helper_crypto_sha1h; 14219 break; 14220 case 1: /* SHA1SU1 */ 14221 feature = dc_isar_feature(aa64_sha1, s); 14222 genfn = gen_helper_crypto_sha1su1; 14223 break; 14224 case 2: /* SHA256SU0 */ 14225 feature = dc_isar_feature(aa64_sha256, s); 14226 genfn = gen_helper_crypto_sha256su0; 14227 break; 14228 default: 14229 unallocated_encoding(s); 14230 return; 14231 } 14232 14233 if (!feature) { 14234 unallocated_encoding(s); 14235 return; 14236 } 14237 14238 if (!fp_access_check(s)) { 14239 return; 14240 } 14241 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 14242 } 14243 14244 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 14245 { 14246 tcg_gen_rotli_i64(d, m, 1); 14247 tcg_gen_xor_i64(d, d, n); 14248 } 14249 14250 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 14251 { 14252 tcg_gen_rotli_vec(vece, d, m, 1); 14253 tcg_gen_xor_vec(vece, d, d, n); 14254 } 14255 14256 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 14257 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 14258 { 14259 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 14260 static const GVecGen3 op = { 14261 .fni8 = gen_rax1_i64, 14262 .fniv = gen_rax1_vec, 14263 .opt_opc = vecop_list, 14264 .fno = gen_helper_crypto_rax1, 14265 .vece = MO_64, 14266 }; 14267 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 14268 } 14269 14270 /* Crypto three-reg SHA512 14271 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14272 * +-----------------------+------+---+---+-----+--------+------+------+ 14273 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 14274 * +-----------------------+------+---+---+-----+--------+------+------+ 14275 */ 14276 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 14277 { 14278 int opcode = extract32(insn, 10, 2); 14279 int o = extract32(insn, 14, 1); 14280 int rm = extract32(insn, 16, 5); 14281 int rn = extract32(insn, 5, 5); 14282 int rd = extract32(insn, 0, 5); 14283 bool feature; 14284 gen_helper_gvec_3 *oolfn = NULL; 14285 GVecGen3Fn *gvecfn = NULL; 14286 14287 if (o == 0) { 14288 switch (opcode) { 14289 case 0: /* SHA512H */ 14290 feature = dc_isar_feature(aa64_sha512, s); 14291 oolfn = gen_helper_crypto_sha512h; 14292 break; 14293 case 1: /* SHA512H2 */ 14294 feature = dc_isar_feature(aa64_sha512, s); 14295 oolfn = gen_helper_crypto_sha512h2; 14296 break; 14297 case 2: /* SHA512SU1 */ 14298 feature = dc_isar_feature(aa64_sha512, s); 14299 oolfn = gen_helper_crypto_sha512su1; 14300 break; 14301 case 3: /* RAX1 */ 14302 feature = dc_isar_feature(aa64_sha3, s); 14303 gvecfn = gen_gvec_rax1; 14304 break; 14305 default: 14306 g_assert_not_reached(); 14307 } 14308 } else { 14309 switch (opcode) { 14310 case 0: /* SM3PARTW1 */ 14311 feature = dc_isar_feature(aa64_sm3, s); 14312 oolfn = gen_helper_crypto_sm3partw1; 14313 break; 14314 case 1: /* SM3PARTW2 */ 14315 feature = dc_isar_feature(aa64_sm3, s); 14316 oolfn = gen_helper_crypto_sm3partw2; 14317 break; 14318 case 2: /* SM4EKEY */ 14319 feature = dc_isar_feature(aa64_sm4, s); 14320 oolfn = gen_helper_crypto_sm4ekey; 14321 break; 14322 default: 14323 unallocated_encoding(s); 14324 return; 14325 } 14326 } 14327 14328 if (!feature) { 14329 unallocated_encoding(s); 14330 return; 14331 } 14332 14333 if (!fp_access_check(s)) { 14334 return; 14335 } 14336 14337 if (oolfn) { 14338 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 14339 } else { 14340 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 14341 } 14342 } 14343 14344 /* Crypto two-reg SHA512 14345 * 31 12 11 10 9 5 4 0 14346 * +-----------------------------------------+--------+------+------+ 14347 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 14348 * +-----------------------------------------+--------+------+------+ 14349 */ 14350 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 14351 { 14352 int opcode = extract32(insn, 10, 2); 14353 int rn = extract32(insn, 5, 5); 14354 int rd = extract32(insn, 0, 5); 14355 bool feature; 14356 14357 switch (opcode) { 14358 case 0: /* SHA512SU0 */ 14359 feature = dc_isar_feature(aa64_sha512, s); 14360 break; 14361 case 1: /* SM4E */ 14362 feature = dc_isar_feature(aa64_sm4, s); 14363 break; 14364 default: 14365 unallocated_encoding(s); 14366 return; 14367 } 14368 14369 if (!feature) { 14370 unallocated_encoding(s); 14371 return; 14372 } 14373 14374 if (!fp_access_check(s)) { 14375 return; 14376 } 14377 14378 switch (opcode) { 14379 case 0: /* SHA512SU0 */ 14380 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 14381 break; 14382 case 1: /* SM4E */ 14383 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 14384 break; 14385 default: 14386 g_assert_not_reached(); 14387 } 14388 } 14389 14390 /* Crypto four-register 14391 * 31 23 22 21 20 16 15 14 10 9 5 4 0 14392 * +-------------------+-----+------+---+------+------+------+ 14393 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 14394 * +-------------------+-----+------+---+------+------+------+ 14395 */ 14396 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 14397 { 14398 int op0 = extract32(insn, 21, 2); 14399 int rm = extract32(insn, 16, 5); 14400 int ra = extract32(insn, 10, 5); 14401 int rn = extract32(insn, 5, 5); 14402 int rd = extract32(insn, 0, 5); 14403 bool feature; 14404 14405 switch (op0) { 14406 case 0: /* EOR3 */ 14407 case 1: /* BCAX */ 14408 feature = dc_isar_feature(aa64_sha3, s); 14409 break; 14410 case 2: /* SM3SS1 */ 14411 feature = dc_isar_feature(aa64_sm3, s); 14412 break; 14413 default: 14414 unallocated_encoding(s); 14415 return; 14416 } 14417 14418 if (!feature) { 14419 unallocated_encoding(s); 14420 return; 14421 } 14422 14423 if (!fp_access_check(s)) { 14424 return; 14425 } 14426 14427 if (op0 < 2) { 14428 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 14429 int pass; 14430 14431 tcg_op1 = tcg_temp_new_i64(); 14432 tcg_op2 = tcg_temp_new_i64(); 14433 tcg_op3 = tcg_temp_new_i64(); 14434 tcg_res[0] = tcg_temp_new_i64(); 14435 tcg_res[1] = tcg_temp_new_i64(); 14436 14437 for (pass = 0; pass < 2; pass++) { 14438 read_vec_element(s, tcg_op1, rn, pass, MO_64); 14439 read_vec_element(s, tcg_op2, rm, pass, MO_64); 14440 read_vec_element(s, tcg_op3, ra, pass, MO_64); 14441 14442 if (op0 == 0) { 14443 /* EOR3 */ 14444 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 14445 } else { 14446 /* BCAX */ 14447 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 14448 } 14449 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 14450 } 14451 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 14452 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 14453 14454 tcg_temp_free_i64(tcg_op1); 14455 tcg_temp_free_i64(tcg_op2); 14456 tcg_temp_free_i64(tcg_op3); 14457 tcg_temp_free_i64(tcg_res[0]); 14458 tcg_temp_free_i64(tcg_res[1]); 14459 } else { 14460 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 14461 14462 tcg_op1 = tcg_temp_new_i32(); 14463 tcg_op2 = tcg_temp_new_i32(); 14464 tcg_op3 = tcg_temp_new_i32(); 14465 tcg_res = tcg_temp_new_i32(); 14466 tcg_zero = tcg_constant_i32(0); 14467 14468 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 14469 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 14470 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 14471 14472 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 14473 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 14474 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 14475 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 14476 14477 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 14478 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 14479 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 14480 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 14481 14482 tcg_temp_free_i32(tcg_op1); 14483 tcg_temp_free_i32(tcg_op2); 14484 tcg_temp_free_i32(tcg_op3); 14485 tcg_temp_free_i32(tcg_res); 14486 } 14487 } 14488 14489 /* Crypto XAR 14490 * 31 21 20 16 15 10 9 5 4 0 14491 * +-----------------------+------+--------+------+------+ 14492 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 14493 * +-----------------------+------+--------+------+------+ 14494 */ 14495 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 14496 { 14497 int rm = extract32(insn, 16, 5); 14498 int imm6 = extract32(insn, 10, 6); 14499 int rn = extract32(insn, 5, 5); 14500 int rd = extract32(insn, 0, 5); 14501 14502 if (!dc_isar_feature(aa64_sha3, s)) { 14503 unallocated_encoding(s); 14504 return; 14505 } 14506 14507 if (!fp_access_check(s)) { 14508 return; 14509 } 14510 14511 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 14512 vec_full_reg_offset(s, rn), 14513 vec_full_reg_offset(s, rm), imm6, 16, 14514 vec_full_reg_size(s)); 14515 } 14516 14517 /* Crypto three-reg imm2 14518 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14519 * +-----------------------+------+-----+------+--------+------+------+ 14520 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 14521 * +-----------------------+------+-----+------+--------+------+------+ 14522 */ 14523 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 14524 { 14525 static gen_helper_gvec_3 * const fns[4] = { 14526 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 14527 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 14528 }; 14529 int opcode = extract32(insn, 10, 2); 14530 int imm2 = extract32(insn, 12, 2); 14531 int rm = extract32(insn, 16, 5); 14532 int rn = extract32(insn, 5, 5); 14533 int rd = extract32(insn, 0, 5); 14534 14535 if (!dc_isar_feature(aa64_sm3, s)) { 14536 unallocated_encoding(s); 14537 return; 14538 } 14539 14540 if (!fp_access_check(s)) { 14541 return; 14542 } 14543 14544 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 14545 } 14546 14547 /* C3.6 Data processing - SIMD, inc Crypto 14548 * 14549 * As the decode gets a little complex we are using a table based 14550 * approach for this part of the decode. 14551 */ 14552 static const AArch64DecodeTable data_proc_simd[] = { 14553 /* pattern , mask , fn */ 14554 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 14555 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 14556 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 14557 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 14558 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 14559 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 14560 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 14561 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 14562 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 14563 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 14564 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 14565 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 14566 { 0x2e000000, 0xbf208400, disas_simd_ext }, 14567 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 14568 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 14569 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 14570 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 14571 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 14572 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 14573 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 14574 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 14575 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 14576 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 14577 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 14578 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 14579 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 14580 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 14581 { 0xce800000, 0xffe00000, disas_crypto_xar }, 14582 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 14583 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 14584 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 14585 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 14586 { 0x00000000, 0x00000000, NULL } 14587 }; 14588 14589 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 14590 { 14591 /* Note that this is called with all non-FP cases from 14592 * table C3-6 so it must UNDEF for entries not specifically 14593 * allocated to instructions in that table. 14594 */ 14595 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 14596 if (fn) { 14597 fn(s, insn); 14598 } else { 14599 unallocated_encoding(s); 14600 } 14601 } 14602 14603 /* C3.6 Data processing - SIMD and floating point */ 14604 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 14605 { 14606 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 14607 disas_data_proc_fp(s, insn); 14608 } else { 14609 /* SIMD, including crypto */ 14610 disas_data_proc_simd(s, insn); 14611 } 14612 } 14613 14614 /* 14615 * Include the generated SME FA64 decoder. 14616 */ 14617 14618 #include "decode-sme-fa64.c.inc" 14619 14620 static bool trans_OK(DisasContext *s, arg_OK *a) 14621 { 14622 return true; 14623 } 14624 14625 static bool trans_FAIL(DisasContext *s, arg_OK *a) 14626 { 14627 s->is_nonstreaming = true; 14628 return true; 14629 } 14630 14631 /** 14632 * is_guarded_page: 14633 * @env: The cpu environment 14634 * @s: The DisasContext 14635 * 14636 * Return true if the page is guarded. 14637 */ 14638 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 14639 { 14640 uint64_t addr = s->base.pc_first; 14641 #ifdef CONFIG_USER_ONLY 14642 return page_get_flags(addr) & PAGE_BTI; 14643 #else 14644 CPUTLBEntryFull *full; 14645 void *host; 14646 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 14647 int flags; 14648 14649 /* 14650 * We test this immediately after reading an insn, which means 14651 * that the TLB entry must be present and valid, and thus this 14652 * access will never raise an exception. 14653 */ 14654 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 14655 false, &host, &full, 0); 14656 assert(!(flags & TLB_INVALID_MASK)); 14657 14658 return full->guarded; 14659 #endif 14660 } 14661 14662 /** 14663 * btype_destination_ok: 14664 * @insn: The instruction at the branch destination 14665 * @bt: SCTLR_ELx.BT 14666 * @btype: PSTATE.BTYPE, and is non-zero 14667 * 14668 * On a guarded page, there are a limited number of insns 14669 * that may be present at the branch target: 14670 * - branch target identifiers, 14671 * - paciasp, pacibsp, 14672 * - BRK insn 14673 * - HLT insn 14674 * Anything else causes a Branch Target Exception. 14675 * 14676 * Return true if the branch is compatible, false to raise BTITRAP. 14677 */ 14678 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 14679 { 14680 if ((insn & 0xfffff01fu) == 0xd503201fu) { 14681 /* HINT space */ 14682 switch (extract32(insn, 5, 7)) { 14683 case 0b011001: /* PACIASP */ 14684 case 0b011011: /* PACIBSP */ 14685 /* 14686 * If SCTLR_ELx.BT, then PACI*SP are not compatible 14687 * with btype == 3. Otherwise all btype are ok. 14688 */ 14689 return !bt || btype != 3; 14690 case 0b100000: /* BTI */ 14691 /* Not compatible with any btype. */ 14692 return false; 14693 case 0b100010: /* BTI c */ 14694 /* Not compatible with btype == 3 */ 14695 return btype != 3; 14696 case 0b100100: /* BTI j */ 14697 /* Not compatible with btype == 2 */ 14698 return btype != 2; 14699 case 0b100110: /* BTI jc */ 14700 /* Compatible with any btype. */ 14701 return true; 14702 } 14703 } else { 14704 switch (insn & 0xffe0001fu) { 14705 case 0xd4200000u: /* BRK */ 14706 case 0xd4400000u: /* HLT */ 14707 /* Give priority to the breakpoint exception. */ 14708 return true; 14709 } 14710 } 14711 return false; 14712 } 14713 14714 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 14715 CPUState *cpu) 14716 { 14717 DisasContext *dc = container_of(dcbase, DisasContext, base); 14718 CPUARMState *env = cpu->env_ptr; 14719 ARMCPU *arm_cpu = env_archcpu(env); 14720 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 14721 int bound, core_mmu_idx; 14722 14723 dc->isar = &arm_cpu->isar; 14724 dc->condjmp = 0; 14725 dc->pc_save = dc->base.pc_first; 14726 dc->aarch64 = true; 14727 dc->thumb = false; 14728 dc->sctlr_b = 0; 14729 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 14730 dc->condexec_mask = 0; 14731 dc->condexec_cond = 0; 14732 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 14733 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 14734 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 14735 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 14736 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 14737 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 14738 #if !defined(CONFIG_USER_ONLY) 14739 dc->user = (dc->current_el == 0); 14740 #endif 14741 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 14742 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 14743 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 14744 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 14745 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 14746 dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET); 14747 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 14748 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 14749 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 14750 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 14751 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 14752 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 14753 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 14754 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 14755 dc->ata = EX_TBFLAG_A64(tb_flags, ATA); 14756 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 14757 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 14758 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 14759 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 14760 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 14761 dc->vec_len = 0; 14762 dc->vec_stride = 0; 14763 dc->cp_regs = arm_cpu->cp_regs; 14764 dc->features = env->features; 14765 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 14766 14767 #ifdef CONFIG_USER_ONLY 14768 /* In sve_probe_page, we assume TBI is enabled. */ 14769 tcg_debug_assert(dc->tbid & 1); 14770 #endif 14771 14772 /* Single step state. The code-generation logic here is: 14773 * SS_ACTIVE == 0: 14774 * generate code with no special handling for single-stepping (except 14775 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 14776 * this happens anyway because those changes are all system register or 14777 * PSTATE writes). 14778 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 14779 * emit code for one insn 14780 * emit code to clear PSTATE.SS 14781 * emit code to generate software step exception for completed step 14782 * end TB (as usual for having generated an exception) 14783 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 14784 * emit code to generate a software step exception 14785 * end the TB 14786 */ 14787 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 14788 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 14789 dc->is_ldex = false; 14790 14791 /* Bound the number of insns to execute to those left on the page. */ 14792 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 14793 14794 /* If architectural single step active, limit to 1. */ 14795 if (dc->ss_active) { 14796 bound = 1; 14797 } 14798 dc->base.max_insns = MIN(dc->base.max_insns, bound); 14799 14800 init_tmp_a64_array(dc); 14801 } 14802 14803 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 14804 { 14805 } 14806 14807 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 14808 { 14809 DisasContext *dc = container_of(dcbase, DisasContext, base); 14810 target_ulong pc_arg = dc->base.pc_next; 14811 14812 if (tb_cflags(dcbase->tb) & CF_PCREL) { 14813 pc_arg &= ~TARGET_PAGE_MASK; 14814 } 14815 tcg_gen_insn_start(pc_arg, 0, 0); 14816 dc->insn_start = tcg_last_op(); 14817 } 14818 14819 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 14820 { 14821 DisasContext *s = container_of(dcbase, DisasContext, base); 14822 CPUARMState *env = cpu->env_ptr; 14823 uint64_t pc = s->base.pc_next; 14824 uint32_t insn; 14825 14826 /* Singlestep exceptions have the highest priority. */ 14827 if (s->ss_active && !s->pstate_ss) { 14828 /* Singlestep state is Active-pending. 14829 * If we're in this state at the start of a TB then either 14830 * a) we just took an exception to an EL which is being debugged 14831 * and this is the first insn in the exception handler 14832 * b) debug exceptions were masked and we just unmasked them 14833 * without changing EL (eg by clearing PSTATE.D) 14834 * In either case we're going to take a swstep exception in the 14835 * "did not step an insn" case, and so the syndrome ISV and EX 14836 * bits should be zero. 14837 */ 14838 assert(s->base.num_insns == 1); 14839 gen_swstep_exception(s, 0, 0); 14840 s->base.is_jmp = DISAS_NORETURN; 14841 s->base.pc_next = pc + 4; 14842 return; 14843 } 14844 14845 if (pc & 3) { 14846 /* 14847 * PC alignment fault. This has priority over the instruction abort 14848 * that we would receive from a translation fault via arm_ldl_code. 14849 * This should only be possible after an indirect branch, at the 14850 * start of the TB. 14851 */ 14852 assert(s->base.num_insns == 1); 14853 gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc)); 14854 s->base.is_jmp = DISAS_NORETURN; 14855 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 14856 return; 14857 } 14858 14859 s->pc_curr = pc; 14860 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14861 s->insn = insn; 14862 s->base.pc_next = pc + 4; 14863 14864 s->fp_access_checked = false; 14865 s->sve_access_checked = false; 14866 14867 if (s->pstate_il) { 14868 /* 14869 * Illegal execution state. This has priority over BTI 14870 * exceptions, but comes after instruction abort exceptions. 14871 */ 14872 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14873 return; 14874 } 14875 14876 if (dc_isar_feature(aa64_bti, s)) { 14877 if (s->base.num_insns == 1) { 14878 /* 14879 * At the first insn of the TB, compute s->guarded_page. 14880 * We delayed computing this until successfully reading 14881 * the first insn of the TB, above. This (mostly) ensures 14882 * that the softmmu tlb entry has been populated, and the 14883 * page table GP bit is available. 14884 * 14885 * Note that we need to compute this even if btype == 0, 14886 * because this value is used for BR instructions later 14887 * where ENV is not available. 14888 */ 14889 s->guarded_page = is_guarded_page(env, s); 14890 14891 /* First insn can have btype set to non-zero. */ 14892 tcg_debug_assert(s->btype >= 0); 14893 14894 /* 14895 * Note that the Branch Target Exception has fairly high 14896 * priority -- below debugging exceptions but above most 14897 * everything else. This allows us to handle this now 14898 * instead of waiting until the insn is otherwise decoded. 14899 */ 14900 if (s->btype != 0 14901 && s->guarded_page 14902 && !btype_destination_ok(insn, s->bt, s->btype)) { 14903 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14904 return; 14905 } 14906 } else { 14907 /* Not the first insn: btype must be 0. */ 14908 tcg_debug_assert(s->btype == 0); 14909 } 14910 } 14911 14912 s->is_nonstreaming = false; 14913 if (s->sme_trap_nonstreaming) { 14914 disas_sme_fa64(s, insn); 14915 } 14916 14917 switch (extract32(insn, 25, 4)) { 14918 case 0x0: 14919 if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { 14920 unallocated_encoding(s); 14921 } 14922 break; 14923 case 0x1: case 0x3: /* UNALLOCATED */ 14924 unallocated_encoding(s); 14925 break; 14926 case 0x2: 14927 if (!disas_sve(s, insn)) { 14928 unallocated_encoding(s); 14929 } 14930 break; 14931 case 0x8: case 0x9: /* Data processing - immediate */ 14932 disas_data_proc_imm(s, insn); 14933 break; 14934 case 0xa: case 0xb: /* Branch, exception generation and system insns */ 14935 disas_b_exc_sys(s, insn); 14936 break; 14937 case 0x4: 14938 case 0x6: 14939 case 0xc: 14940 case 0xe: /* Loads and stores */ 14941 disas_ldst(s, insn); 14942 break; 14943 case 0x5: 14944 case 0xd: /* Data processing - register */ 14945 disas_data_proc_reg(s, insn); 14946 break; 14947 case 0x7: 14948 case 0xf: /* Data processing - SIMD and floating point */ 14949 disas_data_proc_simd_fp(s, insn); 14950 break; 14951 default: 14952 assert(FALSE); /* all 15 cases should be handled above */ 14953 break; 14954 } 14955 14956 /* if we allocated any temporaries, free them here */ 14957 free_tmp_a64(s); 14958 14959 /* 14960 * After execution of most insns, btype is reset to 0. 14961 * Note that we set btype == -1 when the insn sets btype. 14962 */ 14963 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14964 reset_btype(s); 14965 } 14966 14967 translator_loop_temp_check(&s->base); 14968 } 14969 14970 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14971 { 14972 DisasContext *dc = container_of(dcbase, DisasContext, base); 14973 14974 if (unlikely(dc->ss_active)) { 14975 /* Note that this means single stepping WFI doesn't halt the CPU. 14976 * For conditional branch insns this is harmless unreachable code as 14977 * gen_goto_tb() has already handled emitting the debug exception 14978 * (and thus a tb-jump is not possible when singlestepping). 14979 */ 14980 switch (dc->base.is_jmp) { 14981 default: 14982 gen_a64_update_pc(dc, 4); 14983 /* fall through */ 14984 case DISAS_EXIT: 14985 case DISAS_JUMP: 14986 gen_step_complete_exception(dc); 14987 break; 14988 case DISAS_NORETURN: 14989 break; 14990 } 14991 } else { 14992 switch (dc->base.is_jmp) { 14993 case DISAS_NEXT: 14994 case DISAS_TOO_MANY: 14995 gen_goto_tb(dc, 1, 4); 14996 break; 14997 default: 14998 case DISAS_UPDATE_EXIT: 14999 gen_a64_update_pc(dc, 4); 15000 /* fall through */ 15001 case DISAS_EXIT: 15002 tcg_gen_exit_tb(NULL, 0); 15003 break; 15004 case DISAS_UPDATE_NOCHAIN: 15005 gen_a64_update_pc(dc, 4); 15006 /* fall through */ 15007 case DISAS_JUMP: 15008 tcg_gen_lookup_and_goto_ptr(); 15009 break; 15010 case DISAS_NORETURN: 15011 case DISAS_SWI: 15012 break; 15013 case DISAS_WFE: 15014 gen_a64_update_pc(dc, 4); 15015 gen_helper_wfe(cpu_env); 15016 break; 15017 case DISAS_YIELD: 15018 gen_a64_update_pc(dc, 4); 15019 gen_helper_yield(cpu_env); 15020 break; 15021 case DISAS_WFI: 15022 /* 15023 * This is a special case because we don't want to just halt 15024 * the CPU if trying to debug across a WFI. 15025 */ 15026 gen_a64_update_pc(dc, 4); 15027 gen_helper_wfi(cpu_env, tcg_constant_i32(4)); 15028 /* 15029 * The helper doesn't necessarily throw an exception, but we 15030 * must go back to the main loop to check for interrupts anyway. 15031 */ 15032 tcg_gen_exit_tb(NULL, 0); 15033 break; 15034 } 15035 } 15036 } 15037 15038 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 15039 CPUState *cpu, FILE *logfile) 15040 { 15041 DisasContext *dc = container_of(dcbase, DisasContext, base); 15042 15043 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 15044 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 15045 } 15046 15047 const TranslatorOps aarch64_translator_ops = { 15048 .init_disas_context = aarch64_tr_init_disas_context, 15049 .tb_start = aarch64_tr_tb_start, 15050 .insn_start = aarch64_tr_insn_start, 15051 .translate_insn = aarch64_tr_translate_insn, 15052 .tb_stop = aarch64_tr_tb_stop, 15053 .disas_log = aarch64_tr_disas_log, 15054 }; 15055