1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "qemu/log.h" 26 #include "arm_ldst.h" 27 #include "translate.h" 28 #include "internals.h" 29 #include "qemu/host-utils.h" 30 #include "semihosting/semihost.h" 31 #include "exec/gen-icount.h" 32 #include "exec/helper-proto.h" 33 #include "exec/helper-gen.h" 34 #include "exec/log.h" 35 #include "cpregs.h" 36 #include "translate-a64.h" 37 #include "qemu/atomic128.h" 38 39 static TCGv_i64 cpu_X[32]; 40 static TCGv_i64 cpu_pc; 41 42 /* Load/store exclusive handling */ 43 static TCGv_i64 cpu_exclusive_high; 44 45 static const char *regnames[] = { 46 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 47 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 48 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 49 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 50 }; 51 52 enum a64_shift_type { 53 A64_SHIFT_TYPE_LSL = 0, 54 A64_SHIFT_TYPE_LSR = 1, 55 A64_SHIFT_TYPE_ASR = 2, 56 A64_SHIFT_TYPE_ROR = 3 57 }; 58 59 /* Table based decoder typedefs - used when the relevant bits for decode 60 * are too awkwardly scattered across the instruction (eg SIMD). 61 */ 62 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 63 64 typedef struct AArch64DecodeTable { 65 uint32_t pattern; 66 uint32_t mask; 67 AArch64DecodeFn *disas_fn; 68 } AArch64DecodeTable; 69 70 /* initialize TCG globals. */ 71 void a64_translate_init(void) 72 { 73 int i; 74 75 cpu_pc = tcg_global_mem_new_i64(cpu_env, 76 offsetof(CPUARMState, pc), 77 "pc"); 78 for (i = 0; i < 32; i++) { 79 cpu_X[i] = tcg_global_mem_new_i64(cpu_env, 80 offsetof(CPUARMState, xregs[i]), 81 regnames[i]); 82 } 83 84 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env, 85 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 86 } 87 88 /* 89 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns 90 */ 91 static int get_a64_user_mem_index(DisasContext *s) 92 { 93 /* 94 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 95 * which is the usual mmu_idx for this cpu state. 96 */ 97 ARMMMUIdx useridx = s->mmu_idx; 98 99 if (s->unpriv) { 100 /* 101 * We have pre-computed the condition for AccType_UNPRIV. 102 * Therefore we should never get here with a mmu_idx for 103 * which we do not know the corresponding user mmu_idx. 104 */ 105 switch (useridx) { 106 case ARMMMUIdx_E10_1: 107 case ARMMMUIdx_E10_1_PAN: 108 useridx = ARMMMUIdx_E10_0; 109 break; 110 case ARMMMUIdx_E20_2: 111 case ARMMMUIdx_E20_2_PAN: 112 useridx = ARMMMUIdx_E20_0; 113 break; 114 default: 115 g_assert_not_reached(); 116 } 117 } 118 return arm_to_core_mmu_idx(useridx); 119 } 120 121 static void set_btype_raw(int val) 122 { 123 tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, 124 offsetof(CPUARMState, btype)); 125 } 126 127 static void set_btype(DisasContext *s, int val) 128 { 129 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 130 tcg_debug_assert(val >= 1 && val <= 3); 131 set_btype_raw(val); 132 s->btype = -1; 133 } 134 135 static void reset_btype(DisasContext *s) 136 { 137 if (s->btype != 0) { 138 set_btype_raw(0); 139 s->btype = 0; 140 } 141 } 142 143 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 144 { 145 assert(s->pc_save != -1); 146 if (tb_cflags(s->base.tb) & CF_PCREL) { 147 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 148 } else { 149 tcg_gen_movi_i64(dest, s->pc_curr + diff); 150 } 151 } 152 153 void gen_a64_update_pc(DisasContext *s, target_long diff) 154 { 155 gen_pc_plus_diff(s, cpu_pc, diff); 156 s->pc_save = s->pc_curr + diff; 157 } 158 159 /* 160 * Handle Top Byte Ignore (TBI) bits. 161 * 162 * If address tagging is enabled via the TCR TBI bits: 163 * + for EL2 and EL3 there is only one TBI bit, and if it is set 164 * then the address is zero-extended, clearing bits [63:56] 165 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 166 * and TBI1 controls addressses with bit 55 == 1. 167 * If the appropriate TBI bit is set for the address then 168 * the address is sign-extended from bit 55 into bits [63:56] 169 * 170 * Here We have concatenated TBI{1,0} into tbi. 171 */ 172 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 173 TCGv_i64 src, int tbi) 174 { 175 if (tbi == 0) { 176 /* Load unmodified address */ 177 tcg_gen_mov_i64(dst, src); 178 } else if (!regime_has_2_ranges(s->mmu_idx)) { 179 /* Force tag byte to all zero */ 180 tcg_gen_extract_i64(dst, src, 0, 56); 181 } else { 182 /* Sign-extend from bit 55. */ 183 tcg_gen_sextract_i64(dst, src, 0, 56); 184 185 switch (tbi) { 186 case 1: 187 /* tbi0 but !tbi1: only use the extension if positive */ 188 tcg_gen_and_i64(dst, dst, src); 189 break; 190 case 2: 191 /* !tbi0 but tbi1: only use the extension if negative */ 192 tcg_gen_or_i64(dst, dst, src); 193 break; 194 case 3: 195 /* tbi0 and tbi1: always use the extension */ 196 break; 197 default: 198 g_assert_not_reached(); 199 } 200 } 201 } 202 203 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 204 { 205 /* 206 * If address tagging is enabled for instructions via the TCR TBI bits, 207 * then loading an address into the PC will clear out any tag. 208 */ 209 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 210 s->pc_save = -1; 211 } 212 213 /* 214 * Handle MTE and/or TBI. 215 * 216 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 217 * for the tag to be present in the FAR_ELx register. But for user-only 218 * mode we do not have a TLB with which to implement this, so we must 219 * remove the top byte now. 220 * 221 * Always return a fresh temporary that we can increment independently 222 * of the write-back address. 223 */ 224 225 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 226 { 227 TCGv_i64 clean = tcg_temp_new_i64(); 228 #ifdef CONFIG_USER_ONLY 229 gen_top_byte_ignore(s, clean, addr, s->tbid); 230 #else 231 tcg_gen_mov_i64(clean, addr); 232 #endif 233 return clean; 234 } 235 236 /* Insert a zero tag into src, with the result at dst. */ 237 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 238 { 239 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 240 } 241 242 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 243 MMUAccessType acc, int log2_size) 244 { 245 gen_helper_probe_access(cpu_env, ptr, 246 tcg_constant_i32(acc), 247 tcg_constant_i32(get_mem_index(s)), 248 tcg_constant_i32(1 << log2_size)); 249 } 250 251 /* 252 * For MTE, check a single logical or atomic access. This probes a single 253 * address, the exact one specified. The size and alignment of the access 254 * is not relevant to MTE, per se, but watchpoints do require the size, 255 * and we want to recognize those before making any other changes to state. 256 */ 257 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 258 bool is_write, bool tag_checked, 259 int log2_size, bool is_unpriv, 260 int core_idx) 261 { 262 if (tag_checked && s->mte_active[is_unpriv]) { 263 TCGv_i64 ret; 264 int desc = 0; 265 266 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 267 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 268 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 269 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 270 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); 271 272 ret = tcg_temp_new_i64(); 273 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 274 275 return ret; 276 } 277 return clean_data_tbi(s, addr); 278 } 279 280 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 281 bool tag_checked, int log2_size) 282 { 283 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, 284 false, get_mem_index(s)); 285 } 286 287 /* 288 * For MTE, check multiple logical sequential accesses. 289 */ 290 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 291 bool tag_checked, int size) 292 { 293 if (tag_checked && s->mte_active[0]) { 294 TCGv_i64 ret; 295 int desc = 0; 296 297 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 298 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 299 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 300 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 301 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); 302 303 ret = tcg_temp_new_i64(); 304 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 305 306 return ret; 307 } 308 return clean_data_tbi(s, addr); 309 } 310 311 typedef struct DisasCompare64 { 312 TCGCond cond; 313 TCGv_i64 value; 314 } DisasCompare64; 315 316 static void a64_test_cc(DisasCompare64 *c64, int cc) 317 { 318 DisasCompare c32; 319 320 arm_test_cc(&c32, cc); 321 322 /* 323 * Sign-extend the 32-bit value so that the GE/LT comparisons work 324 * properly. The NE/EQ comparisons are also fine with this choice. 325 */ 326 c64->cond = c32.cond; 327 c64->value = tcg_temp_new_i64(); 328 tcg_gen_ext_i32_i64(c64->value, c32.value); 329 } 330 331 static void gen_rebuild_hflags(DisasContext *s) 332 { 333 gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el)); 334 } 335 336 static void gen_exception_internal(int excp) 337 { 338 assert(excp_is_internal(excp)); 339 gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp)); 340 } 341 342 static void gen_exception_internal_insn(DisasContext *s, int excp) 343 { 344 gen_a64_update_pc(s, 0); 345 gen_exception_internal(excp); 346 s->base.is_jmp = DISAS_NORETURN; 347 } 348 349 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 350 { 351 gen_a64_update_pc(s, 0); 352 gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome)); 353 s->base.is_jmp = DISAS_NORETURN; 354 } 355 356 static void gen_step_complete_exception(DisasContext *s) 357 { 358 /* We just completed step of an insn. Move from Active-not-pending 359 * to Active-pending, and then also take the swstep exception. 360 * This corresponds to making the (IMPDEF) choice to prioritize 361 * swstep exceptions over asynchronous exceptions taken to an exception 362 * level where debug is disabled. This choice has the advantage that 363 * we do not need to maintain internal state corresponding to the 364 * ISV/EX syndrome bits between completion of the step and generation 365 * of the exception, and our syndrome information is always correct. 366 */ 367 gen_ss_advance(s); 368 gen_swstep_exception(s, 1, s->is_ldex); 369 s->base.is_jmp = DISAS_NORETURN; 370 } 371 372 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 373 { 374 if (s->ss_active) { 375 return false; 376 } 377 return translator_use_goto_tb(&s->base, dest); 378 } 379 380 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 381 { 382 if (use_goto_tb(s, s->pc_curr + diff)) { 383 /* 384 * For pcrel, the pc must always be up-to-date on entry to 385 * the linked TB, so that it can use simple additions for all 386 * further adjustments. For !pcrel, the linked TB is compiled 387 * to know its full virtual address, so we can delay the 388 * update to pc to the unlinked path. A long chain of links 389 * can thus avoid many updates to the PC. 390 */ 391 if (tb_cflags(s->base.tb) & CF_PCREL) { 392 gen_a64_update_pc(s, diff); 393 tcg_gen_goto_tb(n); 394 } else { 395 tcg_gen_goto_tb(n); 396 gen_a64_update_pc(s, diff); 397 } 398 tcg_gen_exit_tb(s->base.tb, n); 399 s->base.is_jmp = DISAS_NORETURN; 400 } else { 401 gen_a64_update_pc(s, diff); 402 if (s->ss_active) { 403 gen_step_complete_exception(s); 404 } else { 405 tcg_gen_lookup_and_goto_ptr(); 406 s->base.is_jmp = DISAS_NORETURN; 407 } 408 } 409 } 410 411 /* 412 * Register access functions 413 * 414 * These functions are used for directly accessing a register in where 415 * changes to the final register value are likely to be made. If you 416 * need to use a register for temporary calculation (e.g. index type 417 * operations) use the read_* form. 418 * 419 * B1.2.1 Register mappings 420 * 421 * In instruction register encoding 31 can refer to ZR (zero register) or 422 * the SP (stack pointer) depending on context. In QEMU's case we map SP 423 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 424 * This is the point of the _sp forms. 425 */ 426 TCGv_i64 cpu_reg(DisasContext *s, int reg) 427 { 428 if (reg == 31) { 429 TCGv_i64 t = tcg_temp_new_i64(); 430 tcg_gen_movi_i64(t, 0); 431 return t; 432 } else { 433 return cpu_X[reg]; 434 } 435 } 436 437 /* register access for when 31 == SP */ 438 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 439 { 440 return cpu_X[reg]; 441 } 442 443 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 444 * representing the register contents. This TCGv is an auto-freed 445 * temporary so it need not be explicitly freed, and may be modified. 446 */ 447 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 448 { 449 TCGv_i64 v = tcg_temp_new_i64(); 450 if (reg != 31) { 451 if (sf) { 452 tcg_gen_mov_i64(v, cpu_X[reg]); 453 } else { 454 tcg_gen_ext32u_i64(v, cpu_X[reg]); 455 } 456 } else { 457 tcg_gen_movi_i64(v, 0); 458 } 459 return v; 460 } 461 462 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 463 { 464 TCGv_i64 v = tcg_temp_new_i64(); 465 if (sf) { 466 tcg_gen_mov_i64(v, cpu_X[reg]); 467 } else { 468 tcg_gen_ext32u_i64(v, cpu_X[reg]); 469 } 470 return v; 471 } 472 473 /* Return the offset into CPUARMState of a slice (from 474 * the least significant end) of FP register Qn (ie 475 * Dn, Sn, Hn or Bn). 476 * (Note that this is not the same mapping as for A32; see cpu.h) 477 */ 478 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 479 { 480 return vec_reg_offset(s, regno, 0, size); 481 } 482 483 /* Offset of the high half of the 128 bit vector Qn */ 484 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 485 { 486 return vec_reg_offset(s, regno, 1, MO_64); 487 } 488 489 /* Convenience accessors for reading and writing single and double 490 * FP registers. Writing clears the upper parts of the associated 491 * 128 bit vector register, as required by the architecture. 492 * Note that unlike the GP register accessors, the values returned 493 * by the read functions must be manually freed. 494 */ 495 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 496 { 497 TCGv_i64 v = tcg_temp_new_i64(); 498 499 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); 500 return v; 501 } 502 503 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 504 { 505 TCGv_i32 v = tcg_temp_new_i32(); 506 507 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); 508 return v; 509 } 510 511 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 512 { 513 TCGv_i32 v = tcg_temp_new_i32(); 514 515 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); 516 return v; 517 } 518 519 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 520 * If SVE is not enabled, then there are only 128 bits in the vector. 521 */ 522 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 523 { 524 unsigned ofs = fp_reg_offset(s, rd, MO_64); 525 unsigned vsz = vec_full_reg_size(s); 526 527 /* Nop move, with side effect of clearing the tail. */ 528 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 529 } 530 531 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 532 { 533 unsigned ofs = fp_reg_offset(s, reg, MO_64); 534 535 tcg_gen_st_i64(v, cpu_env, ofs); 536 clear_vec_high(s, false, reg); 537 } 538 539 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 540 { 541 TCGv_i64 tmp = tcg_temp_new_i64(); 542 543 tcg_gen_extu_i32_i64(tmp, v); 544 write_fp_dreg(s, reg, tmp); 545 tcg_temp_free_i64(tmp); 546 } 547 548 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 549 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 550 GVecGen2Fn *gvec_fn, int vece) 551 { 552 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 553 is_q ? 16 : 8, vec_full_reg_size(s)); 554 } 555 556 /* Expand a 2-operand + immediate AdvSIMD vector operation using 557 * an expander function. 558 */ 559 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 560 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 561 { 562 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 563 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 564 } 565 566 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 567 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 568 GVecGen3Fn *gvec_fn, int vece) 569 { 570 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 571 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 572 } 573 574 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 575 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 576 int rx, GVecGen4Fn *gvec_fn, int vece) 577 { 578 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 579 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 580 is_q ? 16 : 8, vec_full_reg_size(s)); 581 } 582 583 /* Expand a 2-operand operation using an out-of-line helper. */ 584 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 585 int rn, int data, gen_helper_gvec_2 *fn) 586 { 587 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 588 vec_full_reg_offset(s, rn), 589 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 590 } 591 592 /* Expand a 3-operand operation using an out-of-line helper. */ 593 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 594 int rn, int rm, int data, gen_helper_gvec_3 *fn) 595 { 596 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 597 vec_full_reg_offset(s, rn), 598 vec_full_reg_offset(s, rm), 599 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 600 } 601 602 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 603 * an out-of-line helper. 604 */ 605 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 606 int rm, bool is_fp16, int data, 607 gen_helper_gvec_3_ptr *fn) 608 { 609 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 610 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 611 vec_full_reg_offset(s, rn), 612 vec_full_reg_offset(s, rm), fpst, 613 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 614 tcg_temp_free_ptr(fpst); 615 } 616 617 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 618 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 619 int rm, gen_helper_gvec_3_ptr *fn) 620 { 621 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 622 623 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); 624 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 625 vec_full_reg_offset(s, rn), 626 vec_full_reg_offset(s, rm), qc_ptr, 627 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 628 tcg_temp_free_ptr(qc_ptr); 629 } 630 631 /* Expand a 4-operand operation using an out-of-line helper. */ 632 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 633 int rm, int ra, int data, gen_helper_gvec_4 *fn) 634 { 635 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 636 vec_full_reg_offset(s, rn), 637 vec_full_reg_offset(s, rm), 638 vec_full_reg_offset(s, ra), 639 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 640 } 641 642 /* 643 * Expand a 4-operand + fpstatus pointer + simd data value operation using 644 * an out-of-line helper. 645 */ 646 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 647 int rm, int ra, bool is_fp16, int data, 648 gen_helper_gvec_4_ptr *fn) 649 { 650 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 651 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 652 vec_full_reg_offset(s, rn), 653 vec_full_reg_offset(s, rm), 654 vec_full_reg_offset(s, ra), fpst, 655 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 656 tcg_temp_free_ptr(fpst); 657 } 658 659 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 660 * than the 32 bit equivalent. 661 */ 662 static inline void gen_set_NZ64(TCGv_i64 result) 663 { 664 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 665 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 666 } 667 668 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 669 static inline void gen_logic_CC(int sf, TCGv_i64 result) 670 { 671 if (sf) { 672 gen_set_NZ64(result); 673 } else { 674 tcg_gen_extrl_i64_i32(cpu_ZF, result); 675 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 676 } 677 tcg_gen_movi_i32(cpu_CF, 0); 678 tcg_gen_movi_i32(cpu_VF, 0); 679 } 680 681 /* dest = T0 + T1; compute C, N, V and Z flags */ 682 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 683 { 684 if (sf) { 685 TCGv_i64 result, flag, tmp; 686 result = tcg_temp_new_i64(); 687 flag = tcg_temp_new_i64(); 688 tmp = tcg_temp_new_i64(); 689 690 tcg_gen_movi_i64(tmp, 0); 691 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 692 693 tcg_gen_extrl_i64_i32(cpu_CF, flag); 694 695 gen_set_NZ64(result); 696 697 tcg_gen_xor_i64(flag, result, t0); 698 tcg_gen_xor_i64(tmp, t0, t1); 699 tcg_gen_andc_i64(flag, flag, tmp); 700 tcg_temp_free_i64(tmp); 701 tcg_gen_extrh_i64_i32(cpu_VF, flag); 702 703 tcg_gen_mov_i64(dest, result); 704 tcg_temp_free_i64(result); 705 tcg_temp_free_i64(flag); 706 } else { 707 /* 32 bit arithmetic */ 708 TCGv_i32 t0_32 = tcg_temp_new_i32(); 709 TCGv_i32 t1_32 = tcg_temp_new_i32(); 710 TCGv_i32 tmp = tcg_temp_new_i32(); 711 712 tcg_gen_movi_i32(tmp, 0); 713 tcg_gen_extrl_i64_i32(t0_32, t0); 714 tcg_gen_extrl_i64_i32(t1_32, t1); 715 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 716 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 717 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 718 tcg_gen_xor_i32(tmp, t0_32, t1_32); 719 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 720 tcg_gen_extu_i32_i64(dest, cpu_NF); 721 722 tcg_temp_free_i32(tmp); 723 tcg_temp_free_i32(t0_32); 724 tcg_temp_free_i32(t1_32); 725 } 726 } 727 728 /* dest = T0 - T1; compute C, N, V and Z flags */ 729 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 730 { 731 if (sf) { 732 /* 64 bit arithmetic */ 733 TCGv_i64 result, flag, tmp; 734 735 result = tcg_temp_new_i64(); 736 flag = tcg_temp_new_i64(); 737 tcg_gen_sub_i64(result, t0, t1); 738 739 gen_set_NZ64(result); 740 741 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 742 tcg_gen_extrl_i64_i32(cpu_CF, flag); 743 744 tcg_gen_xor_i64(flag, result, t0); 745 tmp = tcg_temp_new_i64(); 746 tcg_gen_xor_i64(tmp, t0, t1); 747 tcg_gen_and_i64(flag, flag, tmp); 748 tcg_temp_free_i64(tmp); 749 tcg_gen_extrh_i64_i32(cpu_VF, flag); 750 tcg_gen_mov_i64(dest, result); 751 tcg_temp_free_i64(flag); 752 tcg_temp_free_i64(result); 753 } else { 754 /* 32 bit arithmetic */ 755 TCGv_i32 t0_32 = tcg_temp_new_i32(); 756 TCGv_i32 t1_32 = tcg_temp_new_i32(); 757 TCGv_i32 tmp; 758 759 tcg_gen_extrl_i64_i32(t0_32, t0); 760 tcg_gen_extrl_i64_i32(t1_32, t1); 761 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 762 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 763 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 764 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 765 tmp = tcg_temp_new_i32(); 766 tcg_gen_xor_i32(tmp, t0_32, t1_32); 767 tcg_temp_free_i32(t0_32); 768 tcg_temp_free_i32(t1_32); 769 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 770 tcg_temp_free_i32(tmp); 771 tcg_gen_extu_i32_i64(dest, cpu_NF); 772 } 773 } 774 775 /* dest = T0 + T1 + CF; do not compute flags. */ 776 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 777 { 778 TCGv_i64 flag = tcg_temp_new_i64(); 779 tcg_gen_extu_i32_i64(flag, cpu_CF); 780 tcg_gen_add_i64(dest, t0, t1); 781 tcg_gen_add_i64(dest, dest, flag); 782 tcg_temp_free_i64(flag); 783 784 if (!sf) { 785 tcg_gen_ext32u_i64(dest, dest); 786 } 787 } 788 789 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 790 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 791 { 792 if (sf) { 793 TCGv_i64 result = tcg_temp_new_i64(); 794 TCGv_i64 cf_64 = tcg_temp_new_i64(); 795 TCGv_i64 vf_64 = tcg_temp_new_i64(); 796 TCGv_i64 tmp = tcg_temp_new_i64(); 797 TCGv_i64 zero = tcg_constant_i64(0); 798 799 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 800 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 801 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 802 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 803 gen_set_NZ64(result); 804 805 tcg_gen_xor_i64(vf_64, result, t0); 806 tcg_gen_xor_i64(tmp, t0, t1); 807 tcg_gen_andc_i64(vf_64, vf_64, tmp); 808 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 809 810 tcg_gen_mov_i64(dest, result); 811 812 tcg_temp_free_i64(tmp); 813 tcg_temp_free_i64(vf_64); 814 tcg_temp_free_i64(cf_64); 815 tcg_temp_free_i64(result); 816 } else { 817 TCGv_i32 t0_32 = tcg_temp_new_i32(); 818 TCGv_i32 t1_32 = tcg_temp_new_i32(); 819 TCGv_i32 tmp = tcg_temp_new_i32(); 820 TCGv_i32 zero = tcg_constant_i32(0); 821 822 tcg_gen_extrl_i64_i32(t0_32, t0); 823 tcg_gen_extrl_i64_i32(t1_32, t1); 824 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 825 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 826 827 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 828 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 829 tcg_gen_xor_i32(tmp, t0_32, t1_32); 830 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 831 tcg_gen_extu_i32_i64(dest, cpu_NF); 832 833 tcg_temp_free_i32(tmp); 834 tcg_temp_free_i32(t1_32); 835 tcg_temp_free_i32(t0_32); 836 } 837 } 838 839 /* 840 * Load/Store generators 841 */ 842 843 /* 844 * Store from GPR register to memory. 845 */ 846 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 847 TCGv_i64 tcg_addr, MemOp memop, int memidx, 848 bool iss_valid, 849 unsigned int iss_srt, 850 bool iss_sf, bool iss_ar) 851 { 852 memop = finalize_memop(s, memop); 853 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 854 855 if (iss_valid) { 856 uint32_t syn; 857 858 syn = syn_data_abort_with_iss(0, 859 (memop & MO_SIZE), 860 false, 861 iss_srt, 862 iss_sf, 863 iss_ar, 864 0, 0, 0, 0, 0, false); 865 disas_set_insn_syndrome(s, syn); 866 } 867 } 868 869 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 870 TCGv_i64 tcg_addr, MemOp memop, 871 bool iss_valid, 872 unsigned int iss_srt, 873 bool iss_sf, bool iss_ar) 874 { 875 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 876 iss_valid, iss_srt, iss_sf, iss_ar); 877 } 878 879 /* 880 * Load from memory to GPR register 881 */ 882 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 883 MemOp memop, bool extend, int memidx, 884 bool iss_valid, unsigned int iss_srt, 885 bool iss_sf, bool iss_ar) 886 { 887 memop = finalize_memop(s, memop); 888 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 889 890 if (extend && (memop & MO_SIGN)) { 891 g_assert((memop & MO_SIZE) <= MO_32); 892 tcg_gen_ext32u_i64(dest, dest); 893 } 894 895 if (iss_valid) { 896 uint32_t syn; 897 898 syn = syn_data_abort_with_iss(0, 899 (memop & MO_SIZE), 900 (memop & MO_SIGN) != 0, 901 iss_srt, 902 iss_sf, 903 iss_ar, 904 0, 0, 0, 0, 0, false); 905 disas_set_insn_syndrome(s, syn); 906 } 907 } 908 909 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 910 MemOp memop, bool extend, 911 bool iss_valid, unsigned int iss_srt, 912 bool iss_sf, bool iss_ar) 913 { 914 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 915 iss_valid, iss_srt, iss_sf, iss_ar); 916 } 917 918 /* 919 * Store from FP register to memory 920 */ 921 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) 922 { 923 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 924 TCGv_i64 tmplo = tcg_temp_new_i64(); 925 MemOp mop; 926 927 tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); 928 929 if (size < 4) { 930 mop = finalize_memop(s, size); 931 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 932 } else { 933 bool be = s->be_data == MO_BE; 934 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); 935 TCGv_i64 tmphi = tcg_temp_new_i64(); 936 937 tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); 938 939 mop = s->be_data | MO_UQ; 940 tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 941 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 942 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 943 tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, 944 get_mem_index(s), mop); 945 946 tcg_temp_free_i64(tcg_hiaddr); 947 tcg_temp_free_i64(tmphi); 948 } 949 950 tcg_temp_free_i64(tmplo); 951 } 952 953 /* 954 * Load from memory to FP register 955 */ 956 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) 957 { 958 /* This always zero-extends and writes to a full 128 bit wide vector */ 959 TCGv_i64 tmplo = tcg_temp_new_i64(); 960 TCGv_i64 tmphi = NULL; 961 MemOp mop; 962 963 if (size < 4) { 964 mop = finalize_memop(s, size); 965 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 966 } else { 967 bool be = s->be_data == MO_BE; 968 TCGv_i64 tcg_hiaddr; 969 970 tmphi = tcg_temp_new_i64(); 971 tcg_hiaddr = tcg_temp_new_i64(); 972 973 mop = s->be_data | MO_UQ; 974 tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 975 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 976 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 977 tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, 978 get_mem_index(s), mop); 979 tcg_temp_free_i64(tcg_hiaddr); 980 } 981 982 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); 983 tcg_temp_free_i64(tmplo); 984 985 if (tmphi) { 986 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); 987 tcg_temp_free_i64(tmphi); 988 } 989 clear_vec_high(s, tmphi != NULL, destidx); 990 } 991 992 /* 993 * Vector load/store helpers. 994 * 995 * The principal difference between this and a FP load is that we don't 996 * zero extend as we are filling a partial chunk of the vector register. 997 * These functions don't support 128 bit loads/stores, which would be 998 * normal load/store operations. 999 * 1000 * The _i32 versions are useful when operating on 32 bit quantities 1001 * (eg for floating point single or using Neon helper functions). 1002 */ 1003 1004 /* Get value of an element within a vector register */ 1005 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1006 int element, MemOp memop) 1007 { 1008 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1009 switch ((unsigned)memop) { 1010 case MO_8: 1011 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); 1012 break; 1013 case MO_16: 1014 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); 1015 break; 1016 case MO_32: 1017 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); 1018 break; 1019 case MO_8|MO_SIGN: 1020 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); 1021 break; 1022 case MO_16|MO_SIGN: 1023 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); 1024 break; 1025 case MO_32|MO_SIGN: 1026 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); 1027 break; 1028 case MO_64: 1029 case MO_64|MO_SIGN: 1030 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); 1031 break; 1032 default: 1033 g_assert_not_reached(); 1034 } 1035 } 1036 1037 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1038 int element, MemOp memop) 1039 { 1040 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1041 switch (memop) { 1042 case MO_8: 1043 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); 1044 break; 1045 case MO_16: 1046 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); 1047 break; 1048 case MO_8|MO_SIGN: 1049 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); 1050 break; 1051 case MO_16|MO_SIGN: 1052 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); 1053 break; 1054 case MO_32: 1055 case MO_32|MO_SIGN: 1056 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); 1057 break; 1058 default: 1059 g_assert_not_reached(); 1060 } 1061 } 1062 1063 /* Set value of an element within a vector register */ 1064 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1065 int element, MemOp memop) 1066 { 1067 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1068 switch (memop) { 1069 case MO_8: 1070 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); 1071 break; 1072 case MO_16: 1073 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); 1074 break; 1075 case MO_32: 1076 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); 1077 break; 1078 case MO_64: 1079 tcg_gen_st_i64(tcg_src, cpu_env, vect_off); 1080 break; 1081 default: 1082 g_assert_not_reached(); 1083 } 1084 } 1085 1086 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1087 int destidx, int element, MemOp memop) 1088 { 1089 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1090 switch (memop) { 1091 case MO_8: 1092 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); 1093 break; 1094 case MO_16: 1095 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); 1096 break; 1097 case MO_32: 1098 tcg_gen_st_i32(tcg_src, cpu_env, vect_off); 1099 break; 1100 default: 1101 g_assert_not_reached(); 1102 } 1103 } 1104 1105 /* Store from vector register to memory */ 1106 static void do_vec_st(DisasContext *s, int srcidx, int element, 1107 TCGv_i64 tcg_addr, MemOp mop) 1108 { 1109 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1110 1111 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1112 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1113 1114 tcg_temp_free_i64(tcg_tmp); 1115 } 1116 1117 /* Load from memory to vector register */ 1118 static void do_vec_ld(DisasContext *s, int destidx, int element, 1119 TCGv_i64 tcg_addr, MemOp mop) 1120 { 1121 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1122 1123 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1124 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1125 1126 tcg_temp_free_i64(tcg_tmp); 1127 } 1128 1129 /* Check that FP/Neon access is enabled. If it is, return 1130 * true. If not, emit code to generate an appropriate exception, 1131 * and return false; the caller should not emit any code for 1132 * the instruction. Note that this check must happen after all 1133 * unallocated-encoding checks (otherwise the syndrome information 1134 * for the resulting exception will be incorrect). 1135 */ 1136 static bool fp_access_check_only(DisasContext *s) 1137 { 1138 if (s->fp_excp_el) { 1139 assert(!s->fp_access_checked); 1140 s->fp_access_checked = true; 1141 1142 gen_exception_insn_el(s, 0, EXCP_UDEF, 1143 syn_fp_access_trap(1, 0xe, false, 0), 1144 s->fp_excp_el); 1145 return false; 1146 } 1147 s->fp_access_checked = true; 1148 return true; 1149 } 1150 1151 static bool fp_access_check(DisasContext *s) 1152 { 1153 if (!fp_access_check_only(s)) { 1154 return false; 1155 } 1156 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1157 gen_exception_insn(s, 0, EXCP_UDEF, 1158 syn_smetrap(SME_ET_Streaming, false)); 1159 return false; 1160 } 1161 return true; 1162 } 1163 1164 /* 1165 * Check that SVE access is enabled. If it is, return true. 1166 * If not, emit code to generate an appropriate exception and return false. 1167 * This function corresponds to CheckSVEEnabled(). 1168 */ 1169 bool sve_access_check(DisasContext *s) 1170 { 1171 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1172 assert(dc_isar_feature(aa64_sme, s)); 1173 if (!sme_sm_enabled_check(s)) { 1174 goto fail_exit; 1175 } 1176 } else if (s->sve_excp_el) { 1177 gen_exception_insn_el(s, 0, EXCP_UDEF, 1178 syn_sve_access_trap(), s->sve_excp_el); 1179 goto fail_exit; 1180 } 1181 s->sve_access_checked = true; 1182 return fp_access_check(s); 1183 1184 fail_exit: 1185 /* Assert that we only raise one exception per instruction. */ 1186 assert(!s->sve_access_checked); 1187 s->sve_access_checked = true; 1188 return false; 1189 } 1190 1191 /* 1192 * Check that SME access is enabled, raise an exception if not. 1193 * Note that this function corresponds to CheckSMEAccess and is 1194 * only used directly for cpregs. 1195 */ 1196 static bool sme_access_check(DisasContext *s) 1197 { 1198 if (s->sme_excp_el) { 1199 gen_exception_insn_el(s, 0, EXCP_UDEF, 1200 syn_smetrap(SME_ET_AccessTrap, false), 1201 s->sme_excp_el); 1202 return false; 1203 } 1204 return true; 1205 } 1206 1207 /* This function corresponds to CheckSMEEnabled. */ 1208 bool sme_enabled_check(DisasContext *s) 1209 { 1210 /* 1211 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1212 * to be zero when fp_excp_el has priority. This is because we need 1213 * sme_excp_el by itself for cpregs access checks. 1214 */ 1215 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1216 s->fp_access_checked = true; 1217 return sme_access_check(s); 1218 } 1219 return fp_access_check_only(s); 1220 } 1221 1222 /* Common subroutine for CheckSMEAnd*Enabled. */ 1223 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1224 { 1225 if (!sme_enabled_check(s)) { 1226 return false; 1227 } 1228 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1229 gen_exception_insn(s, 0, EXCP_UDEF, 1230 syn_smetrap(SME_ET_NotStreaming, false)); 1231 return false; 1232 } 1233 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1234 gen_exception_insn(s, 0, EXCP_UDEF, 1235 syn_smetrap(SME_ET_InactiveZA, false)); 1236 return false; 1237 } 1238 return true; 1239 } 1240 1241 /* 1242 * This utility function is for doing register extension with an 1243 * optional shift. You will likely want to pass a temporary for the 1244 * destination register. See DecodeRegExtend() in the ARM ARM. 1245 */ 1246 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1247 int option, unsigned int shift) 1248 { 1249 int extsize = extract32(option, 0, 2); 1250 bool is_signed = extract32(option, 2, 1); 1251 1252 if (is_signed) { 1253 switch (extsize) { 1254 case 0: 1255 tcg_gen_ext8s_i64(tcg_out, tcg_in); 1256 break; 1257 case 1: 1258 tcg_gen_ext16s_i64(tcg_out, tcg_in); 1259 break; 1260 case 2: 1261 tcg_gen_ext32s_i64(tcg_out, tcg_in); 1262 break; 1263 case 3: 1264 tcg_gen_mov_i64(tcg_out, tcg_in); 1265 break; 1266 } 1267 } else { 1268 switch (extsize) { 1269 case 0: 1270 tcg_gen_ext8u_i64(tcg_out, tcg_in); 1271 break; 1272 case 1: 1273 tcg_gen_ext16u_i64(tcg_out, tcg_in); 1274 break; 1275 case 2: 1276 tcg_gen_ext32u_i64(tcg_out, tcg_in); 1277 break; 1278 case 3: 1279 tcg_gen_mov_i64(tcg_out, tcg_in); 1280 break; 1281 } 1282 } 1283 1284 if (shift) { 1285 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1286 } 1287 } 1288 1289 static inline void gen_check_sp_alignment(DisasContext *s) 1290 { 1291 /* The AArch64 architecture mandates that (if enabled via PSTATE 1292 * or SCTLR bits) there is a check that SP is 16-aligned on every 1293 * SP-relative load or store (with an exception generated if it is not). 1294 * In line with general QEMU practice regarding misaligned accesses, 1295 * we omit these checks for the sake of guest program performance. 1296 * This function is provided as a hook so we can more easily add these 1297 * checks in future (possibly as a "favour catching guest program bugs 1298 * over speed" user selectable option). 1299 */ 1300 } 1301 1302 /* 1303 * This provides a simple table based table lookup decoder. It is 1304 * intended to be used when the relevant bits for decode are too 1305 * awkwardly placed and switch/if based logic would be confusing and 1306 * deeply nested. Since it's a linear search through the table, tables 1307 * should be kept small. 1308 * 1309 * It returns the first handler where insn & mask == pattern, or 1310 * NULL if there is no match. 1311 * The table is terminated by an empty mask (i.e. 0) 1312 */ 1313 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1314 uint32_t insn) 1315 { 1316 const AArch64DecodeTable *tptr = table; 1317 1318 while (tptr->mask) { 1319 if ((insn & tptr->mask) == tptr->pattern) { 1320 return tptr->disas_fn; 1321 } 1322 tptr++; 1323 } 1324 return NULL; 1325 } 1326 1327 /* 1328 * The instruction disassembly implemented here matches 1329 * the instruction encoding classifications in chapter C4 1330 * of the ARM Architecture Reference Manual (DDI0487B_a); 1331 * classification names and decode diagrams here should generally 1332 * match up with those in the manual. 1333 */ 1334 1335 /* Unconditional branch (immediate) 1336 * 31 30 26 25 0 1337 * +----+-----------+-------------------------------------+ 1338 * | op | 0 0 1 0 1 | imm26 | 1339 * +----+-----------+-------------------------------------+ 1340 */ 1341 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) 1342 { 1343 int64_t diff = sextract32(insn, 0, 26) * 4; 1344 1345 if (insn & (1U << 31)) { 1346 /* BL Branch with link */ 1347 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1348 } 1349 1350 /* B Branch / BL Branch with link */ 1351 reset_btype(s); 1352 gen_goto_tb(s, 0, diff); 1353 } 1354 1355 /* Compare and branch (immediate) 1356 * 31 30 25 24 23 5 4 0 1357 * +----+-------------+----+---------------------+--------+ 1358 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt | 1359 * +----+-------------+----+---------------------+--------+ 1360 */ 1361 static void disas_comp_b_imm(DisasContext *s, uint32_t insn) 1362 { 1363 unsigned int sf, op, rt; 1364 int64_t diff; 1365 DisasLabel match; 1366 TCGv_i64 tcg_cmp; 1367 1368 sf = extract32(insn, 31, 1); 1369 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */ 1370 rt = extract32(insn, 0, 5); 1371 diff = sextract32(insn, 5, 19) * 4; 1372 1373 tcg_cmp = read_cpu_reg(s, rt, sf); 1374 reset_btype(s); 1375 1376 match = gen_disas_label(s); 1377 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1378 tcg_cmp, 0, match.label); 1379 gen_goto_tb(s, 0, 4); 1380 set_disas_label(s, match); 1381 gen_goto_tb(s, 1, diff); 1382 } 1383 1384 /* Test and branch (immediate) 1385 * 31 30 25 24 23 19 18 5 4 0 1386 * +----+-------------+----+-------+-------------+------+ 1387 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt | 1388 * +----+-------------+----+-------+-------------+------+ 1389 */ 1390 static void disas_test_b_imm(DisasContext *s, uint32_t insn) 1391 { 1392 unsigned int bit_pos, op, rt; 1393 int64_t diff; 1394 DisasLabel match; 1395 TCGv_i64 tcg_cmp; 1396 1397 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5); 1398 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */ 1399 diff = sextract32(insn, 5, 14) * 4; 1400 rt = extract32(insn, 0, 5); 1401 1402 tcg_cmp = tcg_temp_new_i64(); 1403 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos)); 1404 1405 reset_btype(s); 1406 1407 match = gen_disas_label(s); 1408 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1409 tcg_cmp, 0, match.label); 1410 tcg_temp_free_i64(tcg_cmp); 1411 gen_goto_tb(s, 0, 4); 1412 set_disas_label(s, match); 1413 gen_goto_tb(s, 1, diff); 1414 } 1415 1416 /* Conditional branch (immediate) 1417 * 31 25 24 23 5 4 3 0 1418 * +---------------+----+---------------------+----+------+ 1419 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond | 1420 * +---------------+----+---------------------+----+------+ 1421 */ 1422 static void disas_cond_b_imm(DisasContext *s, uint32_t insn) 1423 { 1424 unsigned int cond; 1425 int64_t diff; 1426 1427 if ((insn & (1 << 4)) || (insn & (1 << 24))) { 1428 unallocated_encoding(s); 1429 return; 1430 } 1431 diff = sextract32(insn, 5, 19) * 4; 1432 cond = extract32(insn, 0, 4); 1433 1434 reset_btype(s); 1435 if (cond < 0x0e) { 1436 /* genuinely conditional branches */ 1437 DisasLabel match = gen_disas_label(s); 1438 arm_gen_test_cc(cond, match.label); 1439 gen_goto_tb(s, 0, 4); 1440 set_disas_label(s, match); 1441 gen_goto_tb(s, 1, diff); 1442 } else { 1443 /* 0xe and 0xf are both "always" conditions */ 1444 gen_goto_tb(s, 0, diff); 1445 } 1446 } 1447 1448 /* HINT instruction group, including various allocated HINTs */ 1449 static void handle_hint(DisasContext *s, uint32_t insn, 1450 unsigned int op1, unsigned int op2, unsigned int crm) 1451 { 1452 unsigned int selector = crm << 3 | op2; 1453 1454 if (op1 != 3) { 1455 unallocated_encoding(s); 1456 return; 1457 } 1458 1459 switch (selector) { 1460 case 0b00000: /* NOP */ 1461 break; 1462 case 0b00011: /* WFI */ 1463 s->base.is_jmp = DISAS_WFI; 1464 break; 1465 case 0b00001: /* YIELD */ 1466 /* When running in MTTCG we don't generate jumps to the yield and 1467 * WFE helpers as it won't affect the scheduling of other vCPUs. 1468 * If we wanted to more completely model WFE/SEV so we don't busy 1469 * spin unnecessarily we would need to do something more involved. 1470 */ 1471 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1472 s->base.is_jmp = DISAS_YIELD; 1473 } 1474 break; 1475 case 0b00010: /* WFE */ 1476 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1477 s->base.is_jmp = DISAS_WFE; 1478 } 1479 break; 1480 case 0b00100: /* SEV */ 1481 case 0b00101: /* SEVL */ 1482 case 0b00110: /* DGH */ 1483 /* we treat all as NOP at least for now */ 1484 break; 1485 case 0b00111: /* XPACLRI */ 1486 if (s->pauth_active) { 1487 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); 1488 } 1489 break; 1490 case 0b01000: /* PACIA1716 */ 1491 if (s->pauth_active) { 1492 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1493 } 1494 break; 1495 case 0b01010: /* PACIB1716 */ 1496 if (s->pauth_active) { 1497 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1498 } 1499 break; 1500 case 0b01100: /* AUTIA1716 */ 1501 if (s->pauth_active) { 1502 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1503 } 1504 break; 1505 case 0b01110: /* AUTIB1716 */ 1506 if (s->pauth_active) { 1507 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1508 } 1509 break; 1510 case 0b10000: /* ESB */ 1511 /* Without RAS, we must implement this as NOP. */ 1512 if (dc_isar_feature(aa64_ras, s)) { 1513 /* 1514 * QEMU does not have a source of physical SErrors, 1515 * so we are only concerned with virtual SErrors. 1516 * The pseudocode in the ARM for this case is 1517 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1518 * AArch64.vESBOperation(); 1519 * Most of the condition can be evaluated at translation time. 1520 * Test for EL2 present, and defer test for SEL2 to runtime. 1521 */ 1522 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1523 gen_helper_vesb(cpu_env); 1524 } 1525 } 1526 break; 1527 case 0b11000: /* PACIAZ */ 1528 if (s->pauth_active) { 1529 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], 1530 tcg_constant_i64(0)); 1531 } 1532 break; 1533 case 0b11001: /* PACIASP */ 1534 if (s->pauth_active) { 1535 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1536 } 1537 break; 1538 case 0b11010: /* PACIBZ */ 1539 if (s->pauth_active) { 1540 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], 1541 tcg_constant_i64(0)); 1542 } 1543 break; 1544 case 0b11011: /* PACIBSP */ 1545 if (s->pauth_active) { 1546 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1547 } 1548 break; 1549 case 0b11100: /* AUTIAZ */ 1550 if (s->pauth_active) { 1551 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], 1552 tcg_constant_i64(0)); 1553 } 1554 break; 1555 case 0b11101: /* AUTIASP */ 1556 if (s->pauth_active) { 1557 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1558 } 1559 break; 1560 case 0b11110: /* AUTIBZ */ 1561 if (s->pauth_active) { 1562 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], 1563 tcg_constant_i64(0)); 1564 } 1565 break; 1566 case 0b11111: /* AUTIBSP */ 1567 if (s->pauth_active) { 1568 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1569 } 1570 break; 1571 default: 1572 /* default specified as NOP equivalent */ 1573 break; 1574 } 1575 } 1576 1577 static void gen_clrex(DisasContext *s, uint32_t insn) 1578 { 1579 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1580 } 1581 1582 /* CLREX, DSB, DMB, ISB */ 1583 static void handle_sync(DisasContext *s, uint32_t insn, 1584 unsigned int op1, unsigned int op2, unsigned int crm) 1585 { 1586 TCGBar bar; 1587 1588 if (op1 != 3) { 1589 unallocated_encoding(s); 1590 return; 1591 } 1592 1593 switch (op2) { 1594 case 2: /* CLREX */ 1595 gen_clrex(s, insn); 1596 return; 1597 case 4: /* DSB */ 1598 case 5: /* DMB */ 1599 switch (crm & 3) { 1600 case 1: /* MBReqTypes_Reads */ 1601 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1602 break; 1603 case 2: /* MBReqTypes_Writes */ 1604 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1605 break; 1606 default: /* MBReqTypes_All */ 1607 bar = TCG_BAR_SC | TCG_MO_ALL; 1608 break; 1609 } 1610 tcg_gen_mb(bar); 1611 return; 1612 case 6: /* ISB */ 1613 /* We need to break the TB after this insn to execute 1614 * a self-modified code correctly and also to take 1615 * any pending interrupts immediately. 1616 */ 1617 reset_btype(s); 1618 gen_goto_tb(s, 0, 4); 1619 return; 1620 1621 case 7: /* SB */ 1622 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { 1623 goto do_unallocated; 1624 } 1625 /* 1626 * TODO: There is no speculation barrier opcode for TCG; 1627 * MB and end the TB instead. 1628 */ 1629 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1630 gen_goto_tb(s, 0, 4); 1631 return; 1632 1633 default: 1634 do_unallocated: 1635 unallocated_encoding(s); 1636 return; 1637 } 1638 } 1639 1640 static void gen_xaflag(void) 1641 { 1642 TCGv_i32 z = tcg_temp_new_i32(); 1643 1644 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1645 1646 /* 1647 * (!C & !Z) << 31 1648 * (!(C | Z)) << 31 1649 * ~((C | Z) << 31) 1650 * ~-(C | Z) 1651 * (C | Z) - 1 1652 */ 1653 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1654 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1655 1656 /* !(Z & C) */ 1657 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1658 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1659 1660 /* (!C & Z) << 31 -> -(Z & ~C) */ 1661 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1662 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1663 1664 /* C | Z */ 1665 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1666 1667 tcg_temp_free_i32(z); 1668 } 1669 1670 static void gen_axflag(void) 1671 { 1672 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1673 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1674 1675 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1676 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1677 1678 tcg_gen_movi_i32(cpu_NF, 0); 1679 tcg_gen_movi_i32(cpu_VF, 0); 1680 } 1681 1682 /* MSR (immediate) - move immediate to processor state field */ 1683 static void handle_msr_i(DisasContext *s, uint32_t insn, 1684 unsigned int op1, unsigned int op2, unsigned int crm) 1685 { 1686 int op = op1 << 3 | op2; 1687 1688 /* End the TB by default, chaining is ok. */ 1689 s->base.is_jmp = DISAS_TOO_MANY; 1690 1691 switch (op) { 1692 case 0x00: /* CFINV */ 1693 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { 1694 goto do_unallocated; 1695 } 1696 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1697 s->base.is_jmp = DISAS_NEXT; 1698 break; 1699 1700 case 0x01: /* XAFlag */ 1701 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1702 goto do_unallocated; 1703 } 1704 gen_xaflag(); 1705 s->base.is_jmp = DISAS_NEXT; 1706 break; 1707 1708 case 0x02: /* AXFlag */ 1709 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1710 goto do_unallocated; 1711 } 1712 gen_axflag(); 1713 s->base.is_jmp = DISAS_NEXT; 1714 break; 1715 1716 case 0x03: /* UAO */ 1717 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1718 goto do_unallocated; 1719 } 1720 if (crm & 1) { 1721 set_pstate_bits(PSTATE_UAO); 1722 } else { 1723 clear_pstate_bits(PSTATE_UAO); 1724 } 1725 gen_rebuild_hflags(s); 1726 break; 1727 1728 case 0x04: /* PAN */ 1729 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1730 goto do_unallocated; 1731 } 1732 if (crm & 1) { 1733 set_pstate_bits(PSTATE_PAN); 1734 } else { 1735 clear_pstate_bits(PSTATE_PAN); 1736 } 1737 gen_rebuild_hflags(s); 1738 break; 1739 1740 case 0x05: /* SPSel */ 1741 if (s->current_el == 0) { 1742 goto do_unallocated; 1743 } 1744 gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP)); 1745 break; 1746 1747 case 0x19: /* SSBS */ 1748 if (!dc_isar_feature(aa64_ssbs, s)) { 1749 goto do_unallocated; 1750 } 1751 if (crm & 1) { 1752 set_pstate_bits(PSTATE_SSBS); 1753 } else { 1754 clear_pstate_bits(PSTATE_SSBS); 1755 } 1756 /* Don't need to rebuild hflags since SSBS is a nop */ 1757 break; 1758 1759 case 0x1a: /* DIT */ 1760 if (!dc_isar_feature(aa64_dit, s)) { 1761 goto do_unallocated; 1762 } 1763 if (crm & 1) { 1764 set_pstate_bits(PSTATE_DIT); 1765 } else { 1766 clear_pstate_bits(PSTATE_DIT); 1767 } 1768 /* There's no need to rebuild hflags because DIT is a nop */ 1769 break; 1770 1771 case 0x1e: /* DAIFSet */ 1772 gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm)); 1773 break; 1774 1775 case 0x1f: /* DAIFClear */ 1776 gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm)); 1777 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ 1778 s->base.is_jmp = DISAS_UPDATE_EXIT; 1779 break; 1780 1781 case 0x1c: /* TCO */ 1782 if (dc_isar_feature(aa64_mte, s)) { 1783 /* Full MTE is enabled -- set the TCO bit as directed. */ 1784 if (crm & 1) { 1785 set_pstate_bits(PSTATE_TCO); 1786 } else { 1787 clear_pstate_bits(PSTATE_TCO); 1788 } 1789 gen_rebuild_hflags(s); 1790 /* Many factors, including TCO, go into MTE_ACTIVE. */ 1791 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1792 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 1793 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 1794 s->base.is_jmp = DISAS_NEXT; 1795 } else { 1796 goto do_unallocated; 1797 } 1798 break; 1799 1800 case 0x1b: /* SVCR* */ 1801 if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) { 1802 goto do_unallocated; 1803 } 1804 if (sme_access_check(s)) { 1805 int old = s->pstate_sm | (s->pstate_za << 1); 1806 int new = (crm & 1) * 3; 1807 int msk = (crm >> 1) & 3; 1808 1809 if ((old ^ new) & msk) { 1810 /* At least one bit changes. */ 1811 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), 1812 tcg_constant_i32(msk)); 1813 } else { 1814 s->base.is_jmp = DISAS_NEXT; 1815 } 1816 } 1817 break; 1818 1819 default: 1820 do_unallocated: 1821 unallocated_encoding(s); 1822 return; 1823 } 1824 } 1825 1826 static void gen_get_nzcv(TCGv_i64 tcg_rt) 1827 { 1828 TCGv_i32 tmp = tcg_temp_new_i32(); 1829 TCGv_i32 nzcv = tcg_temp_new_i32(); 1830 1831 /* build bit 31, N */ 1832 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 1833 /* build bit 30, Z */ 1834 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 1835 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 1836 /* build bit 29, C */ 1837 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 1838 /* build bit 28, V */ 1839 tcg_gen_shri_i32(tmp, cpu_VF, 31); 1840 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 1841 /* generate result */ 1842 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 1843 1844 tcg_temp_free_i32(nzcv); 1845 tcg_temp_free_i32(tmp); 1846 } 1847 1848 static void gen_set_nzcv(TCGv_i64 tcg_rt) 1849 { 1850 TCGv_i32 nzcv = tcg_temp_new_i32(); 1851 1852 /* take NZCV from R[t] */ 1853 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 1854 1855 /* bit 31, N */ 1856 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 1857 /* bit 30, Z */ 1858 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 1859 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 1860 /* bit 29, C */ 1861 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 1862 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 1863 /* bit 28, V */ 1864 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 1865 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 1866 tcg_temp_free_i32(nzcv); 1867 } 1868 1869 static void gen_sysreg_undef(DisasContext *s, bool isread, 1870 uint8_t op0, uint8_t op1, uint8_t op2, 1871 uint8_t crn, uint8_t crm, uint8_t rt) 1872 { 1873 /* 1874 * Generate code to emit an UNDEF with correct syndrome 1875 * information for a failed system register access. 1876 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 1877 * but if FEAT_IDST is implemented then read accesses to registers 1878 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 1879 * syndrome. 1880 */ 1881 uint32_t syndrome; 1882 1883 if (isread && dc_isar_feature(aa64_ids, s) && 1884 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 1885 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1886 } else { 1887 syndrome = syn_uncategorized(); 1888 } 1889 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 1890 } 1891 1892 /* MRS - move from system register 1893 * MSR (register) - move to system register 1894 * SYS 1895 * SYSL 1896 * These are all essentially the same insn in 'read' and 'write' 1897 * versions, with varying op0 fields. 1898 */ 1899 static void handle_sys(DisasContext *s, uint32_t insn, bool isread, 1900 unsigned int op0, unsigned int op1, unsigned int op2, 1901 unsigned int crn, unsigned int crm, unsigned int rt) 1902 { 1903 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 1904 crn, crm, op0, op1, op2); 1905 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 1906 TCGv_ptr tcg_ri = NULL; 1907 TCGv_i64 tcg_rt; 1908 1909 if (!ri) { 1910 /* Unknown register; this might be a guest error or a QEMU 1911 * unimplemented feature. 1912 */ 1913 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 1914 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 1915 isread ? "read" : "write", op0, op1, crn, crm, op2); 1916 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1917 return; 1918 } 1919 1920 /* Check access permissions */ 1921 if (!cp_access_ok(s->current_el, ri, isread)) { 1922 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1923 return; 1924 } 1925 1926 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 1927 /* Emit code to perform further access permissions checks at 1928 * runtime; this may result in an exception. 1929 */ 1930 uint32_t syndrome; 1931 1932 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1933 gen_a64_update_pc(s, 0); 1934 tcg_ri = tcg_temp_new_ptr(); 1935 gen_helper_access_check_cp_reg(tcg_ri, cpu_env, 1936 tcg_constant_i32(key), 1937 tcg_constant_i32(syndrome), 1938 tcg_constant_i32(isread)); 1939 } else if (ri->type & ARM_CP_RAISES_EXC) { 1940 /* 1941 * The readfn or writefn might raise an exception; 1942 * synchronize the CPU state in case it does. 1943 */ 1944 gen_a64_update_pc(s, 0); 1945 } 1946 1947 /* Handle special cases first */ 1948 switch (ri->type & ARM_CP_SPECIAL_MASK) { 1949 case 0: 1950 break; 1951 case ARM_CP_NOP: 1952 goto exit; 1953 case ARM_CP_NZCV: 1954 tcg_rt = cpu_reg(s, rt); 1955 if (isread) { 1956 gen_get_nzcv(tcg_rt); 1957 } else { 1958 gen_set_nzcv(tcg_rt); 1959 } 1960 goto exit; 1961 case ARM_CP_CURRENTEL: 1962 /* Reads as current EL value from pstate, which is 1963 * guaranteed to be constant by the tb flags. 1964 */ 1965 tcg_rt = cpu_reg(s, rt); 1966 tcg_gen_movi_i64(tcg_rt, s->current_el << 2); 1967 goto exit; 1968 case ARM_CP_DC_ZVA: 1969 /* Writes clear the aligned block of memory which rt points into. */ 1970 if (s->mte_active[0]) { 1971 int desc = 0; 1972 1973 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 1974 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 1975 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 1976 1977 tcg_rt = tcg_temp_new_i64(); 1978 gen_helper_mte_check_zva(tcg_rt, cpu_env, 1979 tcg_constant_i32(desc), cpu_reg(s, rt)); 1980 } else { 1981 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 1982 } 1983 gen_helper_dc_zva(cpu_env, tcg_rt); 1984 goto exit; 1985 case ARM_CP_DC_GVA: 1986 { 1987 TCGv_i64 clean_addr, tag; 1988 1989 /* 1990 * DC_GVA, like DC_ZVA, requires that we supply the original 1991 * pointer for an invalid page. Probe that address first. 1992 */ 1993 tcg_rt = cpu_reg(s, rt); 1994 clean_addr = clean_data_tbi(s, tcg_rt); 1995 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 1996 1997 if (s->ata) { 1998 /* Extract the tag from the register to match STZGM. */ 1999 tag = tcg_temp_new_i64(); 2000 tcg_gen_shri_i64(tag, tcg_rt, 56); 2001 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2002 tcg_temp_free_i64(tag); 2003 } 2004 } 2005 goto exit; 2006 case ARM_CP_DC_GZVA: 2007 { 2008 TCGv_i64 clean_addr, tag; 2009 2010 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2011 tcg_rt = cpu_reg(s, rt); 2012 clean_addr = clean_data_tbi(s, tcg_rt); 2013 gen_helper_dc_zva(cpu_env, clean_addr); 2014 2015 if (s->ata) { 2016 /* Extract the tag from the register to match STZGM. */ 2017 tag = tcg_temp_new_i64(); 2018 tcg_gen_shri_i64(tag, tcg_rt, 56); 2019 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2020 tcg_temp_free_i64(tag); 2021 } 2022 } 2023 goto exit; 2024 default: 2025 g_assert_not_reached(); 2026 } 2027 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2028 goto exit; 2029 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2030 goto exit; 2031 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2032 goto exit; 2033 } 2034 2035 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2036 gen_io_start(); 2037 } 2038 2039 tcg_rt = cpu_reg(s, rt); 2040 2041 if (isread) { 2042 if (ri->type & ARM_CP_CONST) { 2043 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2044 } else if (ri->readfn) { 2045 if (!tcg_ri) { 2046 tcg_ri = gen_lookup_cp_reg(key); 2047 } 2048 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri); 2049 } else { 2050 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); 2051 } 2052 } else { 2053 if (ri->type & ARM_CP_CONST) { 2054 /* If not forbidden by access permissions, treat as WI */ 2055 goto exit; 2056 } else if (ri->writefn) { 2057 if (!tcg_ri) { 2058 tcg_ri = gen_lookup_cp_reg(key); 2059 } 2060 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt); 2061 } else { 2062 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); 2063 } 2064 } 2065 2066 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2067 /* I/O operations must end the TB here (whether read or write) */ 2068 s->base.is_jmp = DISAS_UPDATE_EXIT; 2069 } 2070 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2071 /* 2072 * A write to any coprocessor regiser that ends a TB 2073 * must rebuild the hflags for the next TB. 2074 */ 2075 gen_rebuild_hflags(s); 2076 /* 2077 * We default to ending the TB on a coprocessor register write, 2078 * but allow this to be suppressed by the register definition 2079 * (usually only necessary to work around guest bugs). 2080 */ 2081 s->base.is_jmp = DISAS_UPDATE_EXIT; 2082 } 2083 2084 exit: 2085 if (tcg_ri) { 2086 tcg_temp_free_ptr(tcg_ri); 2087 } 2088 } 2089 2090 /* System 2091 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 2092 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2093 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | 2094 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2095 */ 2096 static void disas_system(DisasContext *s, uint32_t insn) 2097 { 2098 unsigned int l, op0, op1, crn, crm, op2, rt; 2099 l = extract32(insn, 21, 1); 2100 op0 = extract32(insn, 19, 2); 2101 op1 = extract32(insn, 16, 3); 2102 crn = extract32(insn, 12, 4); 2103 crm = extract32(insn, 8, 4); 2104 op2 = extract32(insn, 5, 3); 2105 rt = extract32(insn, 0, 5); 2106 2107 if (op0 == 0) { 2108 if (l || rt != 31) { 2109 unallocated_encoding(s); 2110 return; 2111 } 2112 switch (crn) { 2113 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ 2114 handle_hint(s, insn, op1, op2, crm); 2115 break; 2116 case 3: /* CLREX, DSB, DMB, ISB */ 2117 handle_sync(s, insn, op1, op2, crm); 2118 break; 2119 case 4: /* MSR (immediate) */ 2120 handle_msr_i(s, insn, op1, op2, crm); 2121 break; 2122 default: 2123 unallocated_encoding(s); 2124 break; 2125 } 2126 return; 2127 } 2128 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); 2129 } 2130 2131 /* Exception generation 2132 * 2133 * 31 24 23 21 20 5 4 2 1 0 2134 * +-----------------+-----+------------------------+-----+----+ 2135 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | 2136 * +-----------------------+------------------------+----------+ 2137 */ 2138 static void disas_exc(DisasContext *s, uint32_t insn) 2139 { 2140 int opc = extract32(insn, 21, 3); 2141 int op2_ll = extract32(insn, 0, 5); 2142 int imm16 = extract32(insn, 5, 16); 2143 uint32_t syndrome; 2144 2145 switch (opc) { 2146 case 0: 2147 /* For SVC, HVC and SMC we advance the single-step state 2148 * machine before taking the exception. This is architecturally 2149 * mandated, to ensure that single-stepping a system call 2150 * instruction works properly. 2151 */ 2152 switch (op2_ll) { 2153 case 1: /* SVC */ 2154 syndrome = syn_aa64_svc(imm16); 2155 if (s->fgt_svc) { 2156 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2157 break; 2158 } 2159 gen_ss_advance(s); 2160 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2161 break; 2162 case 2: /* HVC */ 2163 if (s->current_el == 0) { 2164 unallocated_encoding(s); 2165 break; 2166 } 2167 /* The pre HVC helper handles cases when HVC gets trapped 2168 * as an undefined insn by runtime configuration. 2169 */ 2170 gen_a64_update_pc(s, 0); 2171 gen_helper_pre_hvc(cpu_env); 2172 gen_ss_advance(s); 2173 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2); 2174 break; 2175 case 3: /* SMC */ 2176 if (s->current_el == 0) { 2177 unallocated_encoding(s); 2178 break; 2179 } 2180 gen_a64_update_pc(s, 0); 2181 gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16))); 2182 gen_ss_advance(s); 2183 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3); 2184 break; 2185 default: 2186 unallocated_encoding(s); 2187 break; 2188 } 2189 break; 2190 case 1: 2191 if (op2_ll != 0) { 2192 unallocated_encoding(s); 2193 break; 2194 } 2195 /* BRK */ 2196 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); 2197 break; 2198 case 2: 2199 if (op2_ll != 0) { 2200 unallocated_encoding(s); 2201 break; 2202 } 2203 /* HLT. This has two purposes. 2204 * Architecturally, it is an external halting debug instruction. 2205 * Since QEMU doesn't implement external debug, we treat this as 2206 * it is required for halting debug disabled: it will UNDEF. 2207 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2208 */ 2209 if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) { 2210 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2211 } else { 2212 unallocated_encoding(s); 2213 } 2214 break; 2215 case 5: 2216 if (op2_ll < 1 || op2_ll > 3) { 2217 unallocated_encoding(s); 2218 break; 2219 } 2220 /* DCPS1, DCPS2, DCPS3 */ 2221 unallocated_encoding(s); 2222 break; 2223 default: 2224 unallocated_encoding(s); 2225 break; 2226 } 2227 } 2228 2229 /* Unconditional branch (register) 2230 * 31 25 24 21 20 16 15 10 9 5 4 0 2231 * +---------------+-------+-------+-------+------+-------+ 2232 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 | 2233 * +---------------+-------+-------+-------+------+-------+ 2234 */ 2235 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) 2236 { 2237 unsigned int opc, op2, op3, rn, op4; 2238 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ 2239 TCGv_i64 dst; 2240 TCGv_i64 modifier; 2241 2242 opc = extract32(insn, 21, 4); 2243 op2 = extract32(insn, 16, 5); 2244 op3 = extract32(insn, 10, 6); 2245 rn = extract32(insn, 5, 5); 2246 op4 = extract32(insn, 0, 5); 2247 2248 if (op2 != 0x1f) { 2249 goto do_unallocated; 2250 } 2251 2252 switch (opc) { 2253 case 0: /* BR */ 2254 case 1: /* BLR */ 2255 case 2: /* RET */ 2256 btype_mod = opc; 2257 switch (op3) { 2258 case 0: 2259 /* BR, BLR, RET */ 2260 if (op4 != 0) { 2261 goto do_unallocated; 2262 } 2263 dst = cpu_reg(s, rn); 2264 break; 2265 2266 case 2: 2267 case 3: 2268 if (!dc_isar_feature(aa64_pauth, s)) { 2269 goto do_unallocated; 2270 } 2271 if (opc == 2) { 2272 /* RETAA, RETAB */ 2273 if (rn != 0x1f || op4 != 0x1f) { 2274 goto do_unallocated; 2275 } 2276 rn = 30; 2277 modifier = cpu_X[31]; 2278 } else { 2279 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */ 2280 if (op4 != 0x1f) { 2281 goto do_unallocated; 2282 } 2283 modifier = tcg_constant_i64(0); 2284 } 2285 if (s->pauth_active) { 2286 dst = tcg_temp_new_i64(); 2287 if (op3 == 2) { 2288 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2289 } else { 2290 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2291 } 2292 } else { 2293 dst = cpu_reg(s, rn); 2294 } 2295 break; 2296 2297 default: 2298 goto do_unallocated; 2299 } 2300 /* BLR also needs to load return address */ 2301 if (opc == 1) { 2302 TCGv_i64 lr = cpu_reg(s, 30); 2303 if (dst == lr) { 2304 TCGv_i64 tmp = tcg_temp_new_i64(); 2305 tcg_gen_mov_i64(tmp, dst); 2306 dst = tmp; 2307 } 2308 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2309 } 2310 gen_a64_set_pc(s, dst); 2311 break; 2312 2313 case 8: /* BRAA */ 2314 case 9: /* BLRAA */ 2315 if (!dc_isar_feature(aa64_pauth, s)) { 2316 goto do_unallocated; 2317 } 2318 if ((op3 & ~1) != 2) { 2319 goto do_unallocated; 2320 } 2321 btype_mod = opc & 1; 2322 if (s->pauth_active) { 2323 dst = tcg_temp_new_i64(); 2324 modifier = cpu_reg_sp(s, op4); 2325 if (op3 == 2) { 2326 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2327 } else { 2328 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2329 } 2330 } else { 2331 dst = cpu_reg(s, rn); 2332 } 2333 /* BLRAA also needs to load return address */ 2334 if (opc == 9) { 2335 TCGv_i64 lr = cpu_reg(s, 30); 2336 if (dst == lr) { 2337 TCGv_i64 tmp = tcg_temp_new_i64(); 2338 tcg_gen_mov_i64(tmp, dst); 2339 dst = tmp; 2340 } 2341 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2342 } 2343 gen_a64_set_pc(s, dst); 2344 break; 2345 2346 case 4: /* ERET */ 2347 if (s->current_el == 0) { 2348 goto do_unallocated; 2349 } 2350 switch (op3) { 2351 case 0: /* ERET */ 2352 if (op4 != 0) { 2353 goto do_unallocated; 2354 } 2355 if (s->fgt_eret) { 2356 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2357 return; 2358 } 2359 dst = tcg_temp_new_i64(); 2360 tcg_gen_ld_i64(dst, cpu_env, 2361 offsetof(CPUARMState, elr_el[s->current_el])); 2362 break; 2363 2364 case 2: /* ERETAA */ 2365 case 3: /* ERETAB */ 2366 if (!dc_isar_feature(aa64_pauth, s)) { 2367 goto do_unallocated; 2368 } 2369 if (rn != 0x1f || op4 != 0x1f) { 2370 goto do_unallocated; 2371 } 2372 /* The FGT trap takes precedence over an auth trap. */ 2373 if (s->fgt_eret) { 2374 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2375 return; 2376 } 2377 dst = tcg_temp_new_i64(); 2378 tcg_gen_ld_i64(dst, cpu_env, 2379 offsetof(CPUARMState, elr_el[s->current_el])); 2380 if (s->pauth_active) { 2381 modifier = cpu_X[31]; 2382 if (op3 == 2) { 2383 gen_helper_autia(dst, cpu_env, dst, modifier); 2384 } else { 2385 gen_helper_autib(dst, cpu_env, dst, modifier); 2386 } 2387 } 2388 break; 2389 2390 default: 2391 goto do_unallocated; 2392 } 2393 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { 2394 gen_io_start(); 2395 } 2396 2397 gen_helper_exception_return(cpu_env, dst); 2398 tcg_temp_free_i64(dst); 2399 /* Must exit loop to check un-masked IRQs */ 2400 s->base.is_jmp = DISAS_EXIT; 2401 return; 2402 2403 case 5: /* DRPS */ 2404 if (op3 != 0 || op4 != 0 || rn != 0x1f) { 2405 goto do_unallocated; 2406 } else { 2407 unallocated_encoding(s); 2408 } 2409 return; 2410 2411 default: 2412 do_unallocated: 2413 unallocated_encoding(s); 2414 return; 2415 } 2416 2417 switch (btype_mod) { 2418 case 0: /* BR */ 2419 if (dc_isar_feature(aa64_bti, s)) { 2420 /* BR to {x16,x17} or !guard -> 1, else 3. */ 2421 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 2422 } 2423 break; 2424 2425 case 1: /* BLR */ 2426 if (dc_isar_feature(aa64_bti, s)) { 2427 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 2428 set_btype(s, 2); 2429 } 2430 break; 2431 2432 default: /* RET or none of the above. */ 2433 /* BTYPE will be set to 0 by normal end-of-insn processing. */ 2434 break; 2435 } 2436 2437 s->base.is_jmp = DISAS_JUMP; 2438 } 2439 2440 /* Branches, exception generating and system instructions */ 2441 static void disas_b_exc_sys(DisasContext *s, uint32_t insn) 2442 { 2443 switch (extract32(insn, 25, 7)) { 2444 case 0x0a: case 0x0b: 2445 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ 2446 disas_uncond_b_imm(s, insn); 2447 break; 2448 case 0x1a: case 0x5a: /* Compare & branch (immediate) */ 2449 disas_comp_b_imm(s, insn); 2450 break; 2451 case 0x1b: case 0x5b: /* Test & branch (immediate) */ 2452 disas_test_b_imm(s, insn); 2453 break; 2454 case 0x2a: /* Conditional branch (immediate) */ 2455 disas_cond_b_imm(s, insn); 2456 break; 2457 case 0x6a: /* Exception generation / System */ 2458 if (insn & (1 << 24)) { 2459 if (extract32(insn, 22, 2) == 0) { 2460 disas_system(s, insn); 2461 } else { 2462 unallocated_encoding(s); 2463 } 2464 } else { 2465 disas_exc(s, insn); 2466 } 2467 break; 2468 case 0x6b: /* Unconditional branch (register) */ 2469 disas_uncond_b_reg(s, insn); 2470 break; 2471 default: 2472 unallocated_encoding(s); 2473 break; 2474 } 2475 } 2476 2477 /* 2478 * Load/Store exclusive instructions are implemented by remembering 2479 * the value/address loaded, and seeing if these are the same 2480 * when the store is performed. This is not actually the architecturally 2481 * mandated semantics, but it works for typical guest code sequences 2482 * and avoids having to monitor regular stores. 2483 * 2484 * The store exclusive uses the atomic cmpxchg primitives to avoid 2485 * races in multi-threaded linux-user and when MTTCG softmmu is 2486 * enabled. 2487 */ 2488 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, 2489 TCGv_i64 addr, int size, bool is_pair) 2490 { 2491 int idx = get_mem_index(s); 2492 MemOp memop = s->be_data; 2493 2494 g_assert(size <= 3); 2495 if (is_pair) { 2496 g_assert(size >= 2); 2497 if (size == 2) { 2498 /* The pair must be single-copy atomic for the doubleword. */ 2499 memop |= MO_64 | MO_ALIGN; 2500 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2501 if (s->be_data == MO_LE) { 2502 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2503 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2504 } else { 2505 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2506 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2507 } 2508 } else { 2509 /* The pair must be single-copy atomic for *each* doubleword, not 2510 the entire quadword, however it must be quadword aligned. */ 2511 memop |= MO_64; 2512 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, 2513 memop | MO_ALIGN_16); 2514 2515 TCGv_i64 addr2 = tcg_temp_new_i64(); 2516 tcg_gen_addi_i64(addr2, addr, 8); 2517 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); 2518 tcg_temp_free_i64(addr2); 2519 2520 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2521 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2522 } 2523 } else { 2524 memop |= size | MO_ALIGN; 2525 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2526 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2527 } 2528 tcg_gen_mov_i64(cpu_exclusive_addr, addr); 2529 } 2530 2531 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2532 TCGv_i64 addr, int size, int is_pair) 2533 { 2534 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2535 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2536 * [addr] = {Rt}; 2537 * if (is_pair) { 2538 * [addr + datasize] = {Rt2}; 2539 * } 2540 * {Rd} = 0; 2541 * } else { 2542 * {Rd} = 1; 2543 * } 2544 * env->exclusive_addr = -1; 2545 */ 2546 TCGLabel *fail_label = gen_new_label(); 2547 TCGLabel *done_label = gen_new_label(); 2548 TCGv_i64 tmp; 2549 2550 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); 2551 2552 tmp = tcg_temp_new_i64(); 2553 if (is_pair) { 2554 if (size == 2) { 2555 if (s->be_data == MO_LE) { 2556 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2557 } else { 2558 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2559 } 2560 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2561 cpu_exclusive_val, tmp, 2562 get_mem_index(s), 2563 MO_64 | MO_ALIGN | s->be_data); 2564 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2565 } else { 2566 TCGv_i128 t16 = tcg_temp_new_i128(); 2567 TCGv_i128 c16 = tcg_temp_new_i128(); 2568 TCGv_i64 a, b; 2569 2570 if (s->be_data == MO_LE) { 2571 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2572 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2573 cpu_exclusive_high); 2574 } else { 2575 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2576 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2577 cpu_exclusive_val); 2578 } 2579 2580 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2581 get_mem_index(s), 2582 MO_128 | MO_ALIGN | s->be_data); 2583 tcg_temp_free_i128(c16); 2584 2585 a = tcg_temp_new_i64(); 2586 b = tcg_temp_new_i64(); 2587 if (s->be_data == MO_LE) { 2588 tcg_gen_extr_i128_i64(a, b, t16); 2589 } else { 2590 tcg_gen_extr_i128_i64(b, a, t16); 2591 } 2592 2593 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2594 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2595 tcg_gen_or_i64(tmp, a, b); 2596 tcg_temp_free_i64(a); 2597 tcg_temp_free_i64(b); 2598 tcg_temp_free_i128(t16); 2599 2600 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2601 } 2602 } else { 2603 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2604 cpu_reg(s, rt), get_mem_index(s), 2605 size | MO_ALIGN | s->be_data); 2606 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2607 } 2608 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2609 tcg_temp_free_i64(tmp); 2610 tcg_gen_br(done_label); 2611 2612 gen_set_label(fail_label); 2613 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2614 gen_set_label(done_label); 2615 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2616 } 2617 2618 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2619 int rn, int size) 2620 { 2621 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2622 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2623 int memidx = get_mem_index(s); 2624 TCGv_i64 clean_addr; 2625 2626 if (rn == 31) { 2627 gen_check_sp_alignment(s); 2628 } 2629 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); 2630 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, 2631 size | MO_ALIGN | s->be_data); 2632 } 2633 2634 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2635 int rn, int size) 2636 { 2637 TCGv_i64 s1 = cpu_reg(s, rs); 2638 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2639 TCGv_i64 t1 = cpu_reg(s, rt); 2640 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2641 TCGv_i64 clean_addr; 2642 int memidx = get_mem_index(s); 2643 2644 if (rn == 31) { 2645 gen_check_sp_alignment(s); 2646 } 2647 2648 /* This is a single atomic access, despite the "pair". */ 2649 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); 2650 2651 if (size == 2) { 2652 TCGv_i64 cmp = tcg_temp_new_i64(); 2653 TCGv_i64 val = tcg_temp_new_i64(); 2654 2655 if (s->be_data == MO_LE) { 2656 tcg_gen_concat32_i64(val, t1, t2); 2657 tcg_gen_concat32_i64(cmp, s1, s2); 2658 } else { 2659 tcg_gen_concat32_i64(val, t2, t1); 2660 tcg_gen_concat32_i64(cmp, s2, s1); 2661 } 2662 2663 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, 2664 MO_64 | MO_ALIGN | s->be_data); 2665 tcg_temp_free_i64(val); 2666 2667 if (s->be_data == MO_LE) { 2668 tcg_gen_extr32_i64(s1, s2, cmp); 2669 } else { 2670 tcg_gen_extr32_i64(s2, s1, cmp); 2671 } 2672 tcg_temp_free_i64(cmp); 2673 } else { 2674 TCGv_i128 cmp = tcg_temp_new_i128(); 2675 TCGv_i128 val = tcg_temp_new_i128(); 2676 2677 if (s->be_data == MO_LE) { 2678 tcg_gen_concat_i64_i128(val, t1, t2); 2679 tcg_gen_concat_i64_i128(cmp, s1, s2); 2680 } else { 2681 tcg_gen_concat_i64_i128(val, t2, t1); 2682 tcg_gen_concat_i64_i128(cmp, s2, s1); 2683 } 2684 2685 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, 2686 MO_128 | MO_ALIGN | s->be_data); 2687 tcg_temp_free_i128(val); 2688 2689 if (s->be_data == MO_LE) { 2690 tcg_gen_extr_i128_i64(s1, s2, cmp); 2691 } else { 2692 tcg_gen_extr_i128_i64(s2, s1, cmp); 2693 } 2694 tcg_temp_free_i128(cmp); 2695 } 2696 } 2697 2698 /* Update the Sixty-Four bit (SF) registersize. This logic is derived 2699 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2700 */ 2701 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) 2702 { 2703 int opc0 = extract32(opc, 0, 1); 2704 int regsize; 2705 2706 if (is_signed) { 2707 regsize = opc0 ? 32 : 64; 2708 } else { 2709 regsize = size == 3 ? 64 : 32; 2710 } 2711 return regsize == 64; 2712 } 2713 2714 /* Load/store exclusive 2715 * 2716 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 2717 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2718 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | 2719 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2720 * 2721 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit 2722 * L: 0 -> store, 1 -> load 2723 * o2: 0 -> exclusive, 1 -> not 2724 * o1: 0 -> single register, 1 -> register pair 2725 * o0: 1 -> load-acquire/store-release, 0 -> not 2726 */ 2727 static void disas_ldst_excl(DisasContext *s, uint32_t insn) 2728 { 2729 int rt = extract32(insn, 0, 5); 2730 int rn = extract32(insn, 5, 5); 2731 int rt2 = extract32(insn, 10, 5); 2732 int rs = extract32(insn, 16, 5); 2733 int is_lasr = extract32(insn, 15, 1); 2734 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; 2735 int size = extract32(insn, 30, 2); 2736 TCGv_i64 clean_addr; 2737 2738 switch (o2_L_o1_o0) { 2739 case 0x0: /* STXR */ 2740 case 0x1: /* STLXR */ 2741 if (rn == 31) { 2742 gen_check_sp_alignment(s); 2743 } 2744 if (is_lasr) { 2745 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2746 } 2747 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2748 true, rn != 31, size); 2749 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); 2750 return; 2751 2752 case 0x4: /* LDXR */ 2753 case 0x5: /* LDAXR */ 2754 if (rn == 31) { 2755 gen_check_sp_alignment(s); 2756 } 2757 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2758 false, rn != 31, size); 2759 s->is_ldex = true; 2760 gen_load_exclusive(s, rt, rt2, clean_addr, size, false); 2761 if (is_lasr) { 2762 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2763 } 2764 return; 2765 2766 case 0x8: /* STLLR */ 2767 if (!dc_isar_feature(aa64_lor, s)) { 2768 break; 2769 } 2770 /* StoreLORelease is the same as Store-Release for QEMU. */ 2771 /* fall through */ 2772 case 0x9: /* STLR */ 2773 /* Generate ISS for non-exclusive accesses including LASR. */ 2774 if (rn == 31) { 2775 gen_check_sp_alignment(s); 2776 } 2777 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2778 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2779 true, rn != 31, size); 2780 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2781 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, 2782 disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2783 return; 2784 2785 case 0xc: /* LDLAR */ 2786 if (!dc_isar_feature(aa64_lor, s)) { 2787 break; 2788 } 2789 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2790 /* fall through */ 2791 case 0xd: /* LDAR */ 2792 /* Generate ISS for non-exclusive accesses including LASR. */ 2793 if (rn == 31) { 2794 gen_check_sp_alignment(s); 2795 } 2796 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2797 false, rn != 31, size); 2798 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2799 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, 2800 rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2801 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2802 return; 2803 2804 case 0x2: case 0x3: /* CASP / STXP */ 2805 if (size & 2) { /* STXP / STLXP */ 2806 if (rn == 31) { 2807 gen_check_sp_alignment(s); 2808 } 2809 if (is_lasr) { 2810 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2811 } 2812 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2813 true, rn != 31, size); 2814 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); 2815 return; 2816 } 2817 if (rt2 == 31 2818 && ((rt | rs) & 1) == 0 2819 && dc_isar_feature(aa64_atomics, s)) { 2820 /* CASP / CASPL */ 2821 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2822 return; 2823 } 2824 break; 2825 2826 case 0x6: case 0x7: /* CASPA / LDXP */ 2827 if (size & 2) { /* LDXP / LDAXP */ 2828 if (rn == 31) { 2829 gen_check_sp_alignment(s); 2830 } 2831 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2832 false, rn != 31, size); 2833 s->is_ldex = true; 2834 gen_load_exclusive(s, rt, rt2, clean_addr, size, true); 2835 if (is_lasr) { 2836 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2837 } 2838 return; 2839 } 2840 if (rt2 == 31 2841 && ((rt | rs) & 1) == 0 2842 && dc_isar_feature(aa64_atomics, s)) { 2843 /* CASPA / CASPAL */ 2844 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2845 return; 2846 } 2847 break; 2848 2849 case 0xa: /* CAS */ 2850 case 0xb: /* CASL */ 2851 case 0xe: /* CASA */ 2852 case 0xf: /* CASAL */ 2853 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { 2854 gen_compare_and_swap(s, rs, rt, rn, size); 2855 return; 2856 } 2857 break; 2858 } 2859 unallocated_encoding(s); 2860 } 2861 2862 /* 2863 * Load register (literal) 2864 * 2865 * 31 30 29 27 26 25 24 23 5 4 0 2866 * +-----+-------+---+-----+-------------------+-------+ 2867 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | 2868 * +-----+-------+---+-----+-------------------+-------+ 2869 * 2870 * V: 1 -> vector (simd/fp) 2871 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, 2872 * 10-> 32 bit signed, 11 -> prefetch 2873 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) 2874 */ 2875 static void disas_ld_lit(DisasContext *s, uint32_t insn) 2876 { 2877 int rt = extract32(insn, 0, 5); 2878 int64_t imm = sextract32(insn, 5, 19) << 2; 2879 bool is_vector = extract32(insn, 26, 1); 2880 int opc = extract32(insn, 30, 2); 2881 bool is_signed = false; 2882 int size = 2; 2883 TCGv_i64 tcg_rt, clean_addr; 2884 2885 if (is_vector) { 2886 if (opc == 3) { 2887 unallocated_encoding(s); 2888 return; 2889 } 2890 size = 2 + opc; 2891 if (!fp_access_check(s)) { 2892 return; 2893 } 2894 } else { 2895 if (opc == 3) { 2896 /* PRFM (literal) : prefetch */ 2897 return; 2898 } 2899 size = 2 + extract32(opc, 0, 1); 2900 is_signed = extract32(opc, 1, 1); 2901 } 2902 2903 tcg_rt = cpu_reg(s, rt); 2904 2905 clean_addr = tcg_temp_new_i64(); 2906 gen_pc_plus_diff(s, clean_addr, imm); 2907 if (is_vector) { 2908 do_fp_ld(s, rt, clean_addr, size); 2909 } else { 2910 /* Only unsigned 32bit loads target 32bit registers. */ 2911 bool iss_sf = opc != 0; 2912 2913 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 2914 false, true, rt, iss_sf, false); 2915 } 2916 } 2917 2918 /* 2919 * LDNP (Load Pair - non-temporal hint) 2920 * LDP (Load Pair - non vector) 2921 * LDPSW (Load Pair Signed Word - non vector) 2922 * STNP (Store Pair - non-temporal hint) 2923 * STP (Store Pair - non vector) 2924 * LDNP (Load Pair of SIMD&FP - non-temporal hint) 2925 * LDP (Load Pair of SIMD&FP) 2926 * STNP (Store Pair of SIMD&FP - non-temporal hint) 2927 * STP (Store Pair of SIMD&FP) 2928 * 2929 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 2930 * +-----+-------+---+---+-------+---+-----------------------------+ 2931 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | 2932 * +-----+-------+---+---+-------+---+-------+-------+------+------+ 2933 * 2934 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit 2935 * LDPSW/STGP 01 2936 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit 2937 * V: 0 -> GPR, 1 -> Vector 2938 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, 2939 * 10 -> signed offset, 11 -> pre-index 2940 * L: 0 -> Store 1 -> Load 2941 * 2942 * Rt, Rt2 = GPR or SIMD registers to be stored 2943 * Rn = general purpose register containing address 2944 * imm7 = signed offset (multiple of 4 or 8 depending on size) 2945 */ 2946 static void disas_ldst_pair(DisasContext *s, uint32_t insn) 2947 { 2948 int rt = extract32(insn, 0, 5); 2949 int rn = extract32(insn, 5, 5); 2950 int rt2 = extract32(insn, 10, 5); 2951 uint64_t offset = sextract64(insn, 15, 7); 2952 int index = extract32(insn, 23, 2); 2953 bool is_vector = extract32(insn, 26, 1); 2954 bool is_load = extract32(insn, 22, 1); 2955 int opc = extract32(insn, 30, 2); 2956 2957 bool is_signed = false; 2958 bool postindex = false; 2959 bool wback = false; 2960 bool set_tag = false; 2961 2962 TCGv_i64 clean_addr, dirty_addr; 2963 2964 int size; 2965 2966 if (opc == 3) { 2967 unallocated_encoding(s); 2968 return; 2969 } 2970 2971 if (is_vector) { 2972 size = 2 + opc; 2973 } else if (opc == 1 && !is_load) { 2974 /* STGP */ 2975 if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { 2976 unallocated_encoding(s); 2977 return; 2978 } 2979 size = 3; 2980 set_tag = true; 2981 } else { 2982 size = 2 + extract32(opc, 1, 1); 2983 is_signed = extract32(opc, 0, 1); 2984 if (!is_load && is_signed) { 2985 unallocated_encoding(s); 2986 return; 2987 } 2988 } 2989 2990 switch (index) { 2991 case 1: /* post-index */ 2992 postindex = true; 2993 wback = true; 2994 break; 2995 case 0: 2996 /* signed offset with "non-temporal" hint. Since we don't emulate 2997 * caches we don't care about hints to the cache system about 2998 * data access patterns, and handle this identically to plain 2999 * signed offset. 3000 */ 3001 if (is_signed) { 3002 /* There is no non-temporal-hint version of LDPSW */ 3003 unallocated_encoding(s); 3004 return; 3005 } 3006 postindex = false; 3007 break; 3008 case 2: /* signed offset, rn not updated */ 3009 postindex = false; 3010 break; 3011 case 3: /* pre-index */ 3012 postindex = false; 3013 wback = true; 3014 break; 3015 } 3016 3017 if (is_vector && !fp_access_check(s)) { 3018 return; 3019 } 3020 3021 offset <<= (set_tag ? LOG2_TAG_GRANULE : size); 3022 3023 if (rn == 31) { 3024 gen_check_sp_alignment(s); 3025 } 3026 3027 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3028 if (!postindex) { 3029 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3030 } 3031 3032 if (set_tag) { 3033 if (!s->ata) { 3034 /* 3035 * TODO: We could rely on the stores below, at least for 3036 * system mode, if we arrange to add MO_ALIGN_16. 3037 */ 3038 gen_helper_stg_stub(cpu_env, dirty_addr); 3039 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3040 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); 3041 } else { 3042 gen_helper_stg(cpu_env, dirty_addr, dirty_addr); 3043 } 3044 } 3045 3046 clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, 3047 (wback || rn != 31) && !set_tag, 2 << size); 3048 3049 if (is_vector) { 3050 if (is_load) { 3051 do_fp_ld(s, rt, clean_addr, size); 3052 } else { 3053 do_fp_st(s, rt, clean_addr, size); 3054 } 3055 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3056 if (is_load) { 3057 do_fp_ld(s, rt2, clean_addr, size); 3058 } else { 3059 do_fp_st(s, rt2, clean_addr, size); 3060 } 3061 } else { 3062 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3063 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); 3064 3065 if (is_load) { 3066 TCGv_i64 tmp = tcg_temp_new_i64(); 3067 3068 /* Do not modify tcg_rt before recognizing any exception 3069 * from the second load. 3070 */ 3071 do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, 3072 false, false, 0, false, false); 3073 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3074 do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, 3075 false, false, 0, false, false); 3076 3077 tcg_gen_mov_i64(tcg_rt, tmp); 3078 tcg_temp_free_i64(tmp); 3079 } else { 3080 do_gpr_st(s, tcg_rt, clean_addr, size, 3081 false, 0, false, false); 3082 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3083 do_gpr_st(s, tcg_rt2, clean_addr, size, 3084 false, 0, false, false); 3085 } 3086 } 3087 3088 if (wback) { 3089 if (postindex) { 3090 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3091 } 3092 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3093 } 3094 } 3095 3096 /* 3097 * Load/store (immediate post-indexed) 3098 * Load/store (immediate pre-indexed) 3099 * Load/store (unscaled immediate) 3100 * 3101 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 3102 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3103 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | 3104 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3105 * 3106 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) 3107 10 -> unprivileged 3108 * V = 0 -> non-vector 3109 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit 3110 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3111 */ 3112 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, 3113 int opc, 3114 int size, 3115 int rt, 3116 bool is_vector) 3117 { 3118 int rn = extract32(insn, 5, 5); 3119 int imm9 = sextract32(insn, 12, 9); 3120 int idx = extract32(insn, 10, 2); 3121 bool is_signed = false; 3122 bool is_store = false; 3123 bool is_extended = false; 3124 bool is_unpriv = (idx == 2); 3125 bool iss_valid; 3126 bool post_index; 3127 bool writeback; 3128 int memidx; 3129 3130 TCGv_i64 clean_addr, dirty_addr; 3131 3132 if (is_vector) { 3133 size |= (opc & 2) << 1; 3134 if (size > 4 || is_unpriv) { 3135 unallocated_encoding(s); 3136 return; 3137 } 3138 is_store = ((opc & 1) == 0); 3139 if (!fp_access_check(s)) { 3140 return; 3141 } 3142 } else { 3143 if (size == 3 && opc == 2) { 3144 /* PRFM - prefetch */ 3145 if (idx != 0) { 3146 unallocated_encoding(s); 3147 return; 3148 } 3149 return; 3150 } 3151 if (opc == 3 && size > 1) { 3152 unallocated_encoding(s); 3153 return; 3154 } 3155 is_store = (opc == 0); 3156 is_signed = extract32(opc, 1, 1); 3157 is_extended = (size < 3) && extract32(opc, 0, 1); 3158 } 3159 3160 switch (idx) { 3161 case 0: 3162 case 2: 3163 post_index = false; 3164 writeback = false; 3165 break; 3166 case 1: 3167 post_index = true; 3168 writeback = true; 3169 break; 3170 case 3: 3171 post_index = false; 3172 writeback = true; 3173 break; 3174 default: 3175 g_assert_not_reached(); 3176 } 3177 3178 iss_valid = !is_vector && !writeback; 3179 3180 if (rn == 31) { 3181 gen_check_sp_alignment(s); 3182 } 3183 3184 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3185 if (!post_index) { 3186 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3187 } 3188 3189 memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3190 clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, 3191 writeback || rn != 31, 3192 size, is_unpriv, memidx); 3193 3194 if (is_vector) { 3195 if (is_store) { 3196 do_fp_st(s, rt, clean_addr, size); 3197 } else { 3198 do_fp_ld(s, rt, clean_addr, size); 3199 } 3200 } else { 3201 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3202 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3203 3204 if (is_store) { 3205 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, 3206 iss_valid, rt, iss_sf, false); 3207 } else { 3208 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3209 is_extended, memidx, 3210 iss_valid, rt, iss_sf, false); 3211 } 3212 } 3213 3214 if (writeback) { 3215 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 3216 if (post_index) { 3217 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3218 } 3219 tcg_gen_mov_i64(tcg_rn, dirty_addr); 3220 } 3221 } 3222 3223 /* 3224 * Load/store (register offset) 3225 * 3226 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3227 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3228 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | 3229 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3230 * 3231 * For non-vector: 3232 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3233 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3234 * For vector: 3235 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3236 * opc<0>: 0 -> store, 1 -> load 3237 * V: 1 -> vector/simd 3238 * opt: extend encoding (see DecodeRegExtend) 3239 * S: if S=1 then scale (essentially index by sizeof(size)) 3240 * Rt: register to transfer into/out of 3241 * Rn: address register or SP for base 3242 * Rm: offset register or ZR for offset 3243 */ 3244 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, 3245 int opc, 3246 int size, 3247 int rt, 3248 bool is_vector) 3249 { 3250 int rn = extract32(insn, 5, 5); 3251 int shift = extract32(insn, 12, 1); 3252 int rm = extract32(insn, 16, 5); 3253 int opt = extract32(insn, 13, 3); 3254 bool is_signed = false; 3255 bool is_store = false; 3256 bool is_extended = false; 3257 3258 TCGv_i64 tcg_rm, clean_addr, dirty_addr; 3259 3260 if (extract32(opt, 1, 1) == 0) { 3261 unallocated_encoding(s); 3262 return; 3263 } 3264 3265 if (is_vector) { 3266 size |= (opc & 2) << 1; 3267 if (size > 4) { 3268 unallocated_encoding(s); 3269 return; 3270 } 3271 is_store = !extract32(opc, 0, 1); 3272 if (!fp_access_check(s)) { 3273 return; 3274 } 3275 } else { 3276 if (size == 3 && opc == 2) { 3277 /* PRFM - prefetch */ 3278 return; 3279 } 3280 if (opc == 3 && size > 1) { 3281 unallocated_encoding(s); 3282 return; 3283 } 3284 is_store = (opc == 0); 3285 is_signed = extract32(opc, 1, 1); 3286 is_extended = (size < 3) && extract32(opc, 0, 1); 3287 } 3288 3289 if (rn == 31) { 3290 gen_check_sp_alignment(s); 3291 } 3292 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3293 3294 tcg_rm = read_cpu_reg(s, rm, 1); 3295 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); 3296 3297 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); 3298 clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); 3299 3300 if (is_vector) { 3301 if (is_store) { 3302 do_fp_st(s, rt, clean_addr, size); 3303 } else { 3304 do_fp_ld(s, rt, clean_addr, size); 3305 } 3306 } else { 3307 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3308 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3309 if (is_store) { 3310 do_gpr_st(s, tcg_rt, clean_addr, size, 3311 true, rt, iss_sf, false); 3312 } else { 3313 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3314 is_extended, true, rt, iss_sf, false); 3315 } 3316 } 3317 } 3318 3319 /* 3320 * Load/store (unsigned immediate) 3321 * 3322 * 31 30 29 27 26 25 24 23 22 21 10 9 5 3323 * +----+-------+---+-----+-----+------------+-------+------+ 3324 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | 3325 * +----+-------+---+-----+-----+------------+-------+------+ 3326 * 3327 * For non-vector: 3328 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3329 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3330 * For vector: 3331 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3332 * opc<0>: 0 -> store, 1 -> load 3333 * Rn: base address register (inc SP) 3334 * Rt: target register 3335 */ 3336 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, 3337 int opc, 3338 int size, 3339 int rt, 3340 bool is_vector) 3341 { 3342 int rn = extract32(insn, 5, 5); 3343 unsigned int imm12 = extract32(insn, 10, 12); 3344 unsigned int offset; 3345 3346 TCGv_i64 clean_addr, dirty_addr; 3347 3348 bool is_store; 3349 bool is_signed = false; 3350 bool is_extended = false; 3351 3352 if (is_vector) { 3353 size |= (opc & 2) << 1; 3354 if (size > 4) { 3355 unallocated_encoding(s); 3356 return; 3357 } 3358 is_store = !extract32(opc, 0, 1); 3359 if (!fp_access_check(s)) { 3360 return; 3361 } 3362 } else { 3363 if (size == 3 && opc == 2) { 3364 /* PRFM - prefetch */ 3365 return; 3366 } 3367 if (opc == 3 && size > 1) { 3368 unallocated_encoding(s); 3369 return; 3370 } 3371 is_store = (opc == 0); 3372 is_signed = extract32(opc, 1, 1); 3373 is_extended = (size < 3) && extract32(opc, 0, 1); 3374 } 3375 3376 if (rn == 31) { 3377 gen_check_sp_alignment(s); 3378 } 3379 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3380 offset = imm12 << size; 3381 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3382 clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); 3383 3384 if (is_vector) { 3385 if (is_store) { 3386 do_fp_st(s, rt, clean_addr, size); 3387 } else { 3388 do_fp_ld(s, rt, clean_addr, size); 3389 } 3390 } else { 3391 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3392 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3393 if (is_store) { 3394 do_gpr_st(s, tcg_rt, clean_addr, size, 3395 true, rt, iss_sf, false); 3396 } else { 3397 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3398 is_extended, true, rt, iss_sf, false); 3399 } 3400 } 3401 } 3402 3403 /* Atomic memory operations 3404 * 3405 * 31 30 27 26 24 22 21 16 15 12 10 5 0 3406 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ 3407 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | 3408 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ 3409 * 3410 * Rt: the result register 3411 * Rn: base address or SP 3412 * Rs: the source register for the operation 3413 * V: vector flag (always 0 as of v8.3) 3414 * A: acquire flag 3415 * R: release flag 3416 */ 3417 static void disas_ldst_atomic(DisasContext *s, uint32_t insn, 3418 int size, int rt, bool is_vector) 3419 { 3420 int rs = extract32(insn, 16, 5); 3421 int rn = extract32(insn, 5, 5); 3422 int o3_opc = extract32(insn, 12, 4); 3423 bool r = extract32(insn, 22, 1); 3424 bool a = extract32(insn, 23, 1); 3425 TCGv_i64 tcg_rs, tcg_rt, clean_addr; 3426 AtomicThreeOpFn *fn = NULL; 3427 MemOp mop = s->be_data | size | MO_ALIGN; 3428 3429 if (is_vector || !dc_isar_feature(aa64_atomics, s)) { 3430 unallocated_encoding(s); 3431 return; 3432 } 3433 switch (o3_opc) { 3434 case 000: /* LDADD */ 3435 fn = tcg_gen_atomic_fetch_add_i64; 3436 break; 3437 case 001: /* LDCLR */ 3438 fn = tcg_gen_atomic_fetch_and_i64; 3439 break; 3440 case 002: /* LDEOR */ 3441 fn = tcg_gen_atomic_fetch_xor_i64; 3442 break; 3443 case 003: /* LDSET */ 3444 fn = tcg_gen_atomic_fetch_or_i64; 3445 break; 3446 case 004: /* LDSMAX */ 3447 fn = tcg_gen_atomic_fetch_smax_i64; 3448 mop |= MO_SIGN; 3449 break; 3450 case 005: /* LDSMIN */ 3451 fn = tcg_gen_atomic_fetch_smin_i64; 3452 mop |= MO_SIGN; 3453 break; 3454 case 006: /* LDUMAX */ 3455 fn = tcg_gen_atomic_fetch_umax_i64; 3456 break; 3457 case 007: /* LDUMIN */ 3458 fn = tcg_gen_atomic_fetch_umin_i64; 3459 break; 3460 case 010: /* SWP */ 3461 fn = tcg_gen_atomic_xchg_i64; 3462 break; 3463 case 014: /* LDAPR, LDAPRH, LDAPRB */ 3464 if (!dc_isar_feature(aa64_rcpc_8_3, s) || 3465 rs != 31 || a != 1 || r != 0) { 3466 unallocated_encoding(s); 3467 return; 3468 } 3469 break; 3470 default: 3471 unallocated_encoding(s); 3472 return; 3473 } 3474 3475 if (rn == 31) { 3476 gen_check_sp_alignment(s); 3477 } 3478 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); 3479 3480 if (o3_opc == 014) { 3481 /* 3482 * LDAPR* are a special case because they are a simple load, not a 3483 * fetch-and-do-something op. 3484 * The architectural consistency requirements here are weaker than 3485 * full load-acquire (we only need "load-acquire processor consistent"), 3486 * but we choose to implement them as full LDAQ. 3487 */ 3488 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, 3489 true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); 3490 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3491 return; 3492 } 3493 3494 tcg_rs = read_cpu_reg(s, rs, true); 3495 tcg_rt = cpu_reg(s, rt); 3496 3497 if (o3_opc == 1) { /* LDCLR */ 3498 tcg_gen_not_i64(tcg_rs, tcg_rs); 3499 } 3500 3501 /* The tcg atomic primitives are all full barriers. Therefore we 3502 * can ignore the Acquire and Release bits of this instruction. 3503 */ 3504 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3505 3506 if ((mop & MO_SIGN) && size != MO_64) { 3507 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3508 } 3509 } 3510 3511 /* 3512 * PAC memory operations 3513 * 3514 * 31 30 27 26 24 22 21 12 11 10 5 0 3515 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3516 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | 3517 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3518 * 3519 * Rt: the result register 3520 * Rn: base address or SP 3521 * V: vector flag (always 0 as of v8.3) 3522 * M: clear for key DA, set for key DB 3523 * W: pre-indexing flag 3524 * S: sign for imm9. 3525 */ 3526 static void disas_ldst_pac(DisasContext *s, uint32_t insn, 3527 int size, int rt, bool is_vector) 3528 { 3529 int rn = extract32(insn, 5, 5); 3530 bool is_wback = extract32(insn, 11, 1); 3531 bool use_key_a = !extract32(insn, 23, 1); 3532 int offset; 3533 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3534 3535 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { 3536 unallocated_encoding(s); 3537 return; 3538 } 3539 3540 if (rn == 31) { 3541 gen_check_sp_alignment(s); 3542 } 3543 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3544 3545 if (s->pauth_active) { 3546 if (use_key_a) { 3547 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, 3548 tcg_constant_i64(0)); 3549 } else { 3550 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, 3551 tcg_constant_i64(0)); 3552 } 3553 } 3554 3555 /* Form the 10-bit signed, scaled offset. */ 3556 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); 3557 offset = sextract32(offset << size, 0, 10 + size); 3558 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3559 3560 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3561 clean_addr = gen_mte_check1(s, dirty_addr, false, 3562 is_wback || rn != 31, size); 3563 3564 tcg_rt = cpu_reg(s, rt); 3565 do_gpr_ld(s, tcg_rt, clean_addr, size, 3566 /* extend */ false, /* iss_valid */ !is_wback, 3567 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); 3568 3569 if (is_wback) { 3570 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3571 } 3572 } 3573 3574 /* 3575 * LDAPR/STLR (unscaled immediate) 3576 * 3577 * 31 30 24 22 21 12 10 5 0 3578 * +------+-------------+-----+---+--------+-----+----+-----+ 3579 * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | 3580 * +------+-------------+-----+---+--------+-----+----+-----+ 3581 * 3582 * Rt: source or destination register 3583 * Rn: base register 3584 * imm9: unscaled immediate offset 3585 * opc: 00: STLUR*, 01/10/11: various LDAPUR* 3586 * size: size of load/store 3587 */ 3588 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) 3589 { 3590 int rt = extract32(insn, 0, 5); 3591 int rn = extract32(insn, 5, 5); 3592 int offset = sextract32(insn, 12, 9); 3593 int opc = extract32(insn, 22, 2); 3594 int size = extract32(insn, 30, 2); 3595 TCGv_i64 clean_addr, dirty_addr; 3596 bool is_store = false; 3597 bool extend = false; 3598 bool iss_sf; 3599 MemOp mop; 3600 3601 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3602 unallocated_encoding(s); 3603 return; 3604 } 3605 3606 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3607 mop = size | MO_ALIGN; 3608 3609 switch (opc) { 3610 case 0: /* STLURB */ 3611 is_store = true; 3612 break; 3613 case 1: /* LDAPUR* */ 3614 break; 3615 case 2: /* LDAPURS* 64-bit variant */ 3616 if (size == 3) { 3617 unallocated_encoding(s); 3618 return; 3619 } 3620 mop |= MO_SIGN; 3621 break; 3622 case 3: /* LDAPURS* 32-bit variant */ 3623 if (size > 1) { 3624 unallocated_encoding(s); 3625 return; 3626 } 3627 mop |= MO_SIGN; 3628 extend = true; /* zero-extend 32->64 after signed load */ 3629 break; 3630 default: 3631 g_assert_not_reached(); 3632 } 3633 3634 iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); 3635 3636 if (rn == 31) { 3637 gen_check_sp_alignment(s); 3638 } 3639 3640 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3641 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3642 clean_addr = clean_data_tbi(s, dirty_addr); 3643 3644 if (is_store) { 3645 /* Store-Release semantics */ 3646 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3647 do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); 3648 } else { 3649 /* 3650 * Load-AcquirePC semantics; we implement as the slightly more 3651 * restrictive Load-Acquire. 3652 */ 3653 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, 3654 extend, true, rt, iss_sf, true); 3655 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3656 } 3657 } 3658 3659 /* Load/store register (all forms) */ 3660 static void disas_ldst_reg(DisasContext *s, uint32_t insn) 3661 { 3662 int rt = extract32(insn, 0, 5); 3663 int opc = extract32(insn, 22, 2); 3664 bool is_vector = extract32(insn, 26, 1); 3665 int size = extract32(insn, 30, 2); 3666 3667 switch (extract32(insn, 24, 2)) { 3668 case 0: 3669 if (extract32(insn, 21, 1) == 0) { 3670 /* Load/store register (unscaled immediate) 3671 * Load/store immediate pre/post-indexed 3672 * Load/store register unprivileged 3673 */ 3674 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); 3675 return; 3676 } 3677 switch (extract32(insn, 10, 2)) { 3678 case 0: 3679 disas_ldst_atomic(s, insn, size, rt, is_vector); 3680 return; 3681 case 2: 3682 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); 3683 return; 3684 default: 3685 disas_ldst_pac(s, insn, size, rt, is_vector); 3686 return; 3687 } 3688 break; 3689 case 1: 3690 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); 3691 return; 3692 } 3693 unallocated_encoding(s); 3694 } 3695 3696 /* AdvSIMD load/store multiple structures 3697 * 3698 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 3699 * +---+---+---------------+---+-------------+--------+------+------+------+ 3700 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | 3701 * +---+---+---------------+---+-------------+--------+------+------+------+ 3702 * 3703 * AdvSIMD load/store multiple structures (post-indexed) 3704 * 3705 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 3706 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3707 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | 3708 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3709 * 3710 * Rt: first (or only) SIMD&FP register to be transferred 3711 * Rn: base address or SP 3712 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3713 */ 3714 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) 3715 { 3716 int rt = extract32(insn, 0, 5); 3717 int rn = extract32(insn, 5, 5); 3718 int rm = extract32(insn, 16, 5); 3719 int size = extract32(insn, 10, 2); 3720 int opcode = extract32(insn, 12, 4); 3721 bool is_store = !extract32(insn, 22, 1); 3722 bool is_postidx = extract32(insn, 23, 1); 3723 bool is_q = extract32(insn, 30, 1); 3724 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3725 MemOp endian, align, mop; 3726 3727 int total; /* total bytes */ 3728 int elements; /* elements per vector */ 3729 int rpt; /* num iterations */ 3730 int selem; /* structure elements */ 3731 int r; 3732 3733 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { 3734 unallocated_encoding(s); 3735 return; 3736 } 3737 3738 if (!is_postidx && rm != 0) { 3739 unallocated_encoding(s); 3740 return; 3741 } 3742 3743 /* From the shared decode logic */ 3744 switch (opcode) { 3745 case 0x0: 3746 rpt = 1; 3747 selem = 4; 3748 break; 3749 case 0x2: 3750 rpt = 4; 3751 selem = 1; 3752 break; 3753 case 0x4: 3754 rpt = 1; 3755 selem = 3; 3756 break; 3757 case 0x6: 3758 rpt = 3; 3759 selem = 1; 3760 break; 3761 case 0x7: 3762 rpt = 1; 3763 selem = 1; 3764 break; 3765 case 0x8: 3766 rpt = 1; 3767 selem = 2; 3768 break; 3769 case 0xa: 3770 rpt = 2; 3771 selem = 1; 3772 break; 3773 default: 3774 unallocated_encoding(s); 3775 return; 3776 } 3777 3778 if (size == 3 && !is_q && selem != 1) { 3779 /* reserved */ 3780 unallocated_encoding(s); 3781 return; 3782 } 3783 3784 if (!fp_access_check(s)) { 3785 return; 3786 } 3787 3788 if (rn == 31) { 3789 gen_check_sp_alignment(s); 3790 } 3791 3792 /* For our purposes, bytes are always little-endian. */ 3793 endian = s->be_data; 3794 if (size == 0) { 3795 endian = MO_LE; 3796 } 3797 3798 total = rpt * selem * (is_q ? 16 : 8); 3799 tcg_rn = cpu_reg_sp(s, rn); 3800 3801 /* 3802 * Issue the MTE check vs the logical repeat count, before we 3803 * promote consecutive little-endian elements below. 3804 */ 3805 clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, 3806 total); 3807 3808 /* 3809 * Consecutive little-endian elements from a single register 3810 * can be promoted to a larger little-endian operation. 3811 */ 3812 align = MO_ALIGN; 3813 if (selem == 1 && endian == MO_LE) { 3814 align = pow2_align(size); 3815 size = 3; 3816 } 3817 if (!s->align_mem) { 3818 align = 0; 3819 } 3820 mop = endian | size | align; 3821 3822 elements = (is_q ? 16 : 8) >> size; 3823 tcg_ebytes = tcg_constant_i64(1 << size); 3824 for (r = 0; r < rpt; r++) { 3825 int e; 3826 for (e = 0; e < elements; e++) { 3827 int xs; 3828 for (xs = 0; xs < selem; xs++) { 3829 int tt = (rt + r + xs) % 32; 3830 if (is_store) { 3831 do_vec_st(s, tt, e, clean_addr, mop); 3832 } else { 3833 do_vec_ld(s, tt, e, clean_addr, mop); 3834 } 3835 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3836 } 3837 } 3838 } 3839 3840 if (!is_store) { 3841 /* For non-quad operations, setting a slice of the low 3842 * 64 bits of the register clears the high 64 bits (in 3843 * the ARM ARM pseudocode this is implicit in the fact 3844 * that 'rval' is a 64 bit wide variable). 3845 * For quad operations, we might still need to zero the 3846 * high bits of SVE. 3847 */ 3848 for (r = 0; r < rpt * selem; r++) { 3849 int tt = (rt + r) % 32; 3850 clear_vec_high(s, is_q, tt); 3851 } 3852 } 3853 3854 if (is_postidx) { 3855 if (rm == 31) { 3856 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3857 } else { 3858 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 3859 } 3860 } 3861 } 3862 3863 /* AdvSIMD load/store single structure 3864 * 3865 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3866 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3867 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | 3868 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3869 * 3870 * AdvSIMD load/store single structure (post-indexed) 3871 * 3872 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3873 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3874 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | 3875 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3876 * 3877 * Rt: first (or only) SIMD&FP register to be transferred 3878 * Rn: base address or SP 3879 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3880 * index = encoded in Q:S:size dependent on size 3881 * 3882 * lane_size = encoded in R, opc 3883 * transfer width = encoded in opc, S, size 3884 */ 3885 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) 3886 { 3887 int rt = extract32(insn, 0, 5); 3888 int rn = extract32(insn, 5, 5); 3889 int rm = extract32(insn, 16, 5); 3890 int size = extract32(insn, 10, 2); 3891 int S = extract32(insn, 12, 1); 3892 int opc = extract32(insn, 13, 3); 3893 int R = extract32(insn, 21, 1); 3894 int is_load = extract32(insn, 22, 1); 3895 int is_postidx = extract32(insn, 23, 1); 3896 int is_q = extract32(insn, 30, 1); 3897 3898 int scale = extract32(opc, 1, 2); 3899 int selem = (extract32(opc, 0, 1) << 1 | R) + 1; 3900 bool replicate = false; 3901 int index = is_q << 3 | S << 2 | size; 3902 int xs, total; 3903 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3904 MemOp mop; 3905 3906 if (extract32(insn, 31, 1)) { 3907 unallocated_encoding(s); 3908 return; 3909 } 3910 if (!is_postidx && rm != 0) { 3911 unallocated_encoding(s); 3912 return; 3913 } 3914 3915 switch (scale) { 3916 case 3: 3917 if (!is_load || S) { 3918 unallocated_encoding(s); 3919 return; 3920 } 3921 scale = size; 3922 replicate = true; 3923 break; 3924 case 0: 3925 break; 3926 case 1: 3927 if (extract32(size, 0, 1)) { 3928 unallocated_encoding(s); 3929 return; 3930 } 3931 index >>= 1; 3932 break; 3933 case 2: 3934 if (extract32(size, 1, 1)) { 3935 unallocated_encoding(s); 3936 return; 3937 } 3938 if (!extract32(size, 0, 1)) { 3939 index >>= 2; 3940 } else { 3941 if (S) { 3942 unallocated_encoding(s); 3943 return; 3944 } 3945 index >>= 3; 3946 scale = 3; 3947 } 3948 break; 3949 default: 3950 g_assert_not_reached(); 3951 } 3952 3953 if (!fp_access_check(s)) { 3954 return; 3955 } 3956 3957 if (rn == 31) { 3958 gen_check_sp_alignment(s); 3959 } 3960 3961 total = selem << scale; 3962 tcg_rn = cpu_reg_sp(s, rn); 3963 3964 clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, 3965 total); 3966 mop = finalize_memop(s, scale); 3967 3968 tcg_ebytes = tcg_constant_i64(1 << scale); 3969 for (xs = 0; xs < selem; xs++) { 3970 if (replicate) { 3971 /* Load and replicate to all elements */ 3972 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3973 3974 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3975 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), 3976 (is_q + 1) * 8, vec_full_reg_size(s), 3977 tcg_tmp); 3978 tcg_temp_free_i64(tcg_tmp); 3979 } else { 3980 /* Load/store one element per register */ 3981 if (is_load) { 3982 do_vec_ld(s, rt, index, clean_addr, mop); 3983 } else { 3984 do_vec_st(s, rt, index, clean_addr, mop); 3985 } 3986 } 3987 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3988 rt = (rt + 1) % 32; 3989 } 3990 3991 if (is_postidx) { 3992 if (rm == 31) { 3993 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3994 } else { 3995 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 3996 } 3997 } 3998 } 3999 4000 /* 4001 * Load/Store memory tags 4002 * 4003 * 31 30 29 24 22 21 12 10 5 0 4004 * +-----+-------------+-----+---+------+-----+------+------+ 4005 * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | 4006 * +-----+-------------+-----+---+------+-----+------+------+ 4007 */ 4008 static void disas_ldst_tag(DisasContext *s, uint32_t insn) 4009 { 4010 int rt = extract32(insn, 0, 5); 4011 int rn = extract32(insn, 5, 5); 4012 uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; 4013 int op2 = extract32(insn, 10, 2); 4014 int op1 = extract32(insn, 22, 2); 4015 bool is_load = false, is_pair = false, is_zero = false, is_mult = false; 4016 int index = 0; 4017 TCGv_i64 addr, clean_addr, tcg_rt; 4018 4019 /* We checked insn bits [29:24,21] in the caller. */ 4020 if (extract32(insn, 30, 2) != 3) { 4021 goto do_unallocated; 4022 } 4023 4024 /* 4025 * @index is a tri-state variable which has 3 states: 4026 * < 0 : post-index, writeback 4027 * = 0 : signed offset 4028 * > 0 : pre-index, writeback 4029 */ 4030 switch (op1) { 4031 case 0: 4032 if (op2 != 0) { 4033 /* STG */ 4034 index = op2 - 2; 4035 } else { 4036 /* STZGM */ 4037 if (s->current_el == 0 || offset != 0) { 4038 goto do_unallocated; 4039 } 4040 is_mult = is_zero = true; 4041 } 4042 break; 4043 case 1: 4044 if (op2 != 0) { 4045 /* STZG */ 4046 is_zero = true; 4047 index = op2 - 2; 4048 } else { 4049 /* LDG */ 4050 is_load = true; 4051 } 4052 break; 4053 case 2: 4054 if (op2 != 0) { 4055 /* ST2G */ 4056 is_pair = true; 4057 index = op2 - 2; 4058 } else { 4059 /* STGM */ 4060 if (s->current_el == 0 || offset != 0) { 4061 goto do_unallocated; 4062 } 4063 is_mult = true; 4064 } 4065 break; 4066 case 3: 4067 if (op2 != 0) { 4068 /* STZ2G */ 4069 is_pair = is_zero = true; 4070 index = op2 - 2; 4071 } else { 4072 /* LDGM */ 4073 if (s->current_el == 0 || offset != 0) { 4074 goto do_unallocated; 4075 } 4076 is_mult = is_load = true; 4077 } 4078 break; 4079 4080 default: 4081 do_unallocated: 4082 unallocated_encoding(s); 4083 return; 4084 } 4085 4086 if (is_mult 4087 ? !dc_isar_feature(aa64_mte, s) 4088 : !dc_isar_feature(aa64_mte_insn_reg, s)) { 4089 goto do_unallocated; 4090 } 4091 4092 if (rn == 31) { 4093 gen_check_sp_alignment(s); 4094 } 4095 4096 addr = read_cpu_reg_sp(s, rn, true); 4097 if (index >= 0) { 4098 /* pre-index or signed offset */ 4099 tcg_gen_addi_i64(addr, addr, offset); 4100 } 4101 4102 if (is_mult) { 4103 tcg_rt = cpu_reg(s, rt); 4104 4105 if (is_zero) { 4106 int size = 4 << s->dcz_blocksize; 4107 4108 if (s->ata) { 4109 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); 4110 } 4111 /* 4112 * The non-tags portion of STZGM is mostly like DC_ZVA, 4113 * except the alignment happens before the access. 4114 */ 4115 clean_addr = clean_data_tbi(s, addr); 4116 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4117 gen_helper_dc_zva(cpu_env, clean_addr); 4118 } else if (s->ata) { 4119 if (is_load) { 4120 gen_helper_ldgm(tcg_rt, cpu_env, addr); 4121 } else { 4122 gen_helper_stgm(cpu_env, addr, tcg_rt); 4123 } 4124 } else { 4125 MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 4126 int size = 4 << GMID_EL1_BS; 4127 4128 clean_addr = clean_data_tbi(s, addr); 4129 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4130 gen_probe_access(s, clean_addr, acc, size); 4131 4132 if (is_load) { 4133 /* The result tags are zeros. */ 4134 tcg_gen_movi_i64(tcg_rt, 0); 4135 } 4136 } 4137 return; 4138 } 4139 4140 if (is_load) { 4141 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4142 tcg_rt = cpu_reg(s, rt); 4143 if (s->ata) { 4144 gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); 4145 } else { 4146 clean_addr = clean_data_tbi(s, addr); 4147 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4148 gen_address_with_allocation_tag0(tcg_rt, addr); 4149 } 4150 } else { 4151 tcg_rt = cpu_reg_sp(s, rt); 4152 if (!s->ata) { 4153 /* 4154 * For STG and ST2G, we need to check alignment and probe memory. 4155 * TODO: For STZG and STZ2G, we could rely on the stores below, 4156 * at least for system mode; user-only won't enforce alignment. 4157 */ 4158 if (is_pair) { 4159 gen_helper_st2g_stub(cpu_env, addr); 4160 } else { 4161 gen_helper_stg_stub(cpu_env, addr); 4162 } 4163 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4164 if (is_pair) { 4165 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); 4166 } else { 4167 gen_helper_stg_parallel(cpu_env, addr, tcg_rt); 4168 } 4169 } else { 4170 if (is_pair) { 4171 gen_helper_st2g(cpu_env, addr, tcg_rt); 4172 } else { 4173 gen_helper_stg(cpu_env, addr, tcg_rt); 4174 } 4175 } 4176 } 4177 4178 if (is_zero) { 4179 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4180 TCGv_i64 tcg_zero = tcg_constant_i64(0); 4181 int mem_index = get_mem_index(s); 4182 int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; 4183 4184 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, 4185 MO_UQ | MO_ALIGN_16); 4186 for (i = 8; i < n; i += 8) { 4187 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4188 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ); 4189 } 4190 } 4191 4192 if (index != 0) { 4193 /* pre-index or post-index */ 4194 if (index < 0) { 4195 /* post-index */ 4196 tcg_gen_addi_i64(addr, addr, offset); 4197 } 4198 tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); 4199 } 4200 } 4201 4202 /* Loads and stores */ 4203 static void disas_ldst(DisasContext *s, uint32_t insn) 4204 { 4205 switch (extract32(insn, 24, 6)) { 4206 case 0x08: /* Load/store exclusive */ 4207 disas_ldst_excl(s, insn); 4208 break; 4209 case 0x18: case 0x1c: /* Load register (literal) */ 4210 disas_ld_lit(s, insn); 4211 break; 4212 case 0x28: case 0x29: 4213 case 0x2c: case 0x2d: /* Load/store pair (all forms) */ 4214 disas_ldst_pair(s, insn); 4215 break; 4216 case 0x38: case 0x39: 4217 case 0x3c: case 0x3d: /* Load/store register (all forms) */ 4218 disas_ldst_reg(s, insn); 4219 break; 4220 case 0x0c: /* AdvSIMD load/store multiple structures */ 4221 disas_ldst_multiple_struct(s, insn); 4222 break; 4223 case 0x0d: /* AdvSIMD load/store single structure */ 4224 disas_ldst_single_struct(s, insn); 4225 break; 4226 case 0x19: 4227 if (extract32(insn, 21, 1) != 0) { 4228 disas_ldst_tag(s, insn); 4229 } else if (extract32(insn, 10, 2) == 0) { 4230 disas_ldst_ldapr_stlr(s, insn); 4231 } else { 4232 unallocated_encoding(s); 4233 } 4234 break; 4235 default: 4236 unallocated_encoding(s); 4237 break; 4238 } 4239 } 4240 4241 /* PC-rel. addressing 4242 * 31 30 29 28 24 23 5 4 0 4243 * +----+-------+-----------+-------------------+------+ 4244 * | op | immlo | 1 0 0 0 0 | immhi | Rd | 4245 * +----+-------+-----------+-------------------+------+ 4246 */ 4247 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) 4248 { 4249 unsigned int page, rd; 4250 int64_t offset; 4251 4252 page = extract32(insn, 31, 1); 4253 /* SignExtend(immhi:immlo) -> offset */ 4254 offset = sextract64(insn, 5, 19); 4255 offset = offset << 2 | extract32(insn, 29, 2); 4256 rd = extract32(insn, 0, 5); 4257 4258 if (page) { 4259 /* ADRP (page based) */ 4260 offset <<= 12; 4261 /* The page offset is ok for CF_PCREL. */ 4262 offset -= s->pc_curr & 0xfff; 4263 } 4264 4265 gen_pc_plus_diff(s, cpu_reg(s, rd), offset); 4266 } 4267 4268 /* 4269 * Add/subtract (immediate) 4270 * 4271 * 31 30 29 28 23 22 21 10 9 5 4 0 4272 * +--+--+--+-------------+--+-------------+-----+-----+ 4273 * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | 4274 * +--+--+--+-------------+--+-------------+-----+-----+ 4275 * 4276 * sf: 0 -> 32bit, 1 -> 64bit 4277 * op: 0 -> add , 1 -> sub 4278 * S: 1 -> set flags 4279 * sh: 1 -> LSL imm by 12 4280 */ 4281 static void disas_add_sub_imm(DisasContext *s, uint32_t insn) 4282 { 4283 int rd = extract32(insn, 0, 5); 4284 int rn = extract32(insn, 5, 5); 4285 uint64_t imm = extract32(insn, 10, 12); 4286 bool shift = extract32(insn, 22, 1); 4287 bool setflags = extract32(insn, 29, 1); 4288 bool sub_op = extract32(insn, 30, 1); 4289 bool is_64bit = extract32(insn, 31, 1); 4290 4291 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 4292 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); 4293 TCGv_i64 tcg_result; 4294 4295 if (shift) { 4296 imm <<= 12; 4297 } 4298 4299 tcg_result = tcg_temp_new_i64(); 4300 if (!setflags) { 4301 if (sub_op) { 4302 tcg_gen_subi_i64(tcg_result, tcg_rn, imm); 4303 } else { 4304 tcg_gen_addi_i64(tcg_result, tcg_rn, imm); 4305 } 4306 } else { 4307 TCGv_i64 tcg_imm = tcg_constant_i64(imm); 4308 if (sub_op) { 4309 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4310 } else { 4311 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4312 } 4313 } 4314 4315 if (is_64bit) { 4316 tcg_gen_mov_i64(tcg_rd, tcg_result); 4317 } else { 4318 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4319 } 4320 4321 tcg_temp_free_i64(tcg_result); 4322 } 4323 4324 /* 4325 * Add/subtract (immediate, with tags) 4326 * 4327 * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 4328 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4329 * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | 4330 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4331 * 4332 * op: 0 -> add, 1 -> sub 4333 */ 4334 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) 4335 { 4336 int rd = extract32(insn, 0, 5); 4337 int rn = extract32(insn, 5, 5); 4338 int uimm4 = extract32(insn, 10, 4); 4339 int uimm6 = extract32(insn, 16, 6); 4340 bool sub_op = extract32(insn, 30, 1); 4341 TCGv_i64 tcg_rn, tcg_rd; 4342 int imm; 4343 4344 /* Test all of sf=1, S=0, o2=0, o3=0. */ 4345 if ((insn & 0xa040c000u) != 0x80000000u || 4346 !dc_isar_feature(aa64_mte_insn_reg, s)) { 4347 unallocated_encoding(s); 4348 return; 4349 } 4350 4351 imm = uimm6 << LOG2_TAG_GRANULE; 4352 if (sub_op) { 4353 imm = -imm; 4354 } 4355 4356 tcg_rn = cpu_reg_sp(s, rn); 4357 tcg_rd = cpu_reg_sp(s, rd); 4358 4359 if (s->ata) { 4360 gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, 4361 tcg_constant_i32(imm), 4362 tcg_constant_i32(uimm4)); 4363 } else { 4364 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4365 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4366 } 4367 } 4368 4369 /* The input should be a value in the bottom e bits (with higher 4370 * bits zero); returns that value replicated into every element 4371 * of size e in a 64 bit integer. 4372 */ 4373 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4374 { 4375 assert(e != 0); 4376 while (e < 64) { 4377 mask |= mask << e; 4378 e *= 2; 4379 } 4380 return mask; 4381 } 4382 4383 /* Return a value with the bottom len bits set (where 0 < len <= 64) */ 4384 static inline uint64_t bitmask64(unsigned int length) 4385 { 4386 assert(length > 0 && length <= 64); 4387 return ~0ULL >> (64 - length); 4388 } 4389 4390 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we 4391 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4392 * value (ie should cause a guest UNDEF exception), and true if they are 4393 * valid, in which case the decoded bit pattern is written to result. 4394 */ 4395 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4396 unsigned int imms, unsigned int immr) 4397 { 4398 uint64_t mask; 4399 unsigned e, levels, s, r; 4400 int len; 4401 4402 assert(immn < 2 && imms < 64 && immr < 64); 4403 4404 /* The bit patterns we create here are 64 bit patterns which 4405 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4406 * 64 bits each. Each element contains the same value: a run 4407 * of between 1 and e-1 non-zero bits, rotated within the 4408 * element by between 0 and e-1 bits. 4409 * 4410 * The element size and run length are encoded into immn (1 bit) 4411 * and imms (6 bits) as follows: 4412 * 64 bit elements: immn = 1, imms = <length of run - 1> 4413 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4414 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4415 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4416 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4417 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4418 * Notice that immn = 0, imms = 11111x is the only combination 4419 * not covered by one of the above options; this is reserved. 4420 * Further, <length of run - 1> all-ones is a reserved pattern. 4421 * 4422 * In all cases the rotation is by immr % e (and immr is 6 bits). 4423 */ 4424 4425 /* First determine the element size */ 4426 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4427 if (len < 1) { 4428 /* This is the immn == 0, imms == 0x11111x case */ 4429 return false; 4430 } 4431 e = 1 << len; 4432 4433 levels = e - 1; 4434 s = imms & levels; 4435 r = immr & levels; 4436 4437 if (s == levels) { 4438 /* <length of run - 1> mustn't be all-ones. */ 4439 return false; 4440 } 4441 4442 /* Create the value of one element: s+1 set bits rotated 4443 * by r within the element (which is e bits wide)... 4444 */ 4445 mask = bitmask64(s + 1); 4446 if (r) { 4447 mask = (mask >> r) | (mask << (e - r)); 4448 mask &= bitmask64(e); 4449 } 4450 /* ...then replicate the element over the whole 64 bit value */ 4451 mask = bitfield_replicate(mask, e); 4452 *result = mask; 4453 return true; 4454 } 4455 4456 /* Logical (immediate) 4457 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4458 * +----+-----+-------------+---+------+------+------+------+ 4459 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd | 4460 * +----+-----+-------------+---+------+------+------+------+ 4461 */ 4462 static void disas_logic_imm(DisasContext *s, uint32_t insn) 4463 { 4464 unsigned int sf, opc, is_n, immr, imms, rn, rd; 4465 TCGv_i64 tcg_rd, tcg_rn; 4466 uint64_t wmask; 4467 bool is_and = false; 4468 4469 sf = extract32(insn, 31, 1); 4470 opc = extract32(insn, 29, 2); 4471 is_n = extract32(insn, 22, 1); 4472 immr = extract32(insn, 16, 6); 4473 imms = extract32(insn, 10, 6); 4474 rn = extract32(insn, 5, 5); 4475 rd = extract32(insn, 0, 5); 4476 4477 if (!sf && is_n) { 4478 unallocated_encoding(s); 4479 return; 4480 } 4481 4482 if (opc == 0x3) { /* ANDS */ 4483 tcg_rd = cpu_reg(s, rd); 4484 } else { 4485 tcg_rd = cpu_reg_sp(s, rd); 4486 } 4487 tcg_rn = cpu_reg(s, rn); 4488 4489 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) { 4490 /* some immediate field values are reserved */ 4491 unallocated_encoding(s); 4492 return; 4493 } 4494 4495 if (!sf) { 4496 wmask &= 0xffffffff; 4497 } 4498 4499 switch (opc) { 4500 case 0x3: /* ANDS */ 4501 case 0x0: /* AND */ 4502 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask); 4503 is_and = true; 4504 break; 4505 case 0x1: /* ORR */ 4506 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask); 4507 break; 4508 case 0x2: /* EOR */ 4509 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask); 4510 break; 4511 default: 4512 assert(FALSE); /* must handle all above */ 4513 break; 4514 } 4515 4516 if (!sf && !is_and) { 4517 /* zero extend final result; we know we can skip this for AND 4518 * since the immediate had the high 32 bits clear. 4519 */ 4520 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4521 } 4522 4523 if (opc == 3) { /* ANDS */ 4524 gen_logic_CC(sf, tcg_rd); 4525 } 4526 } 4527 4528 /* 4529 * Move wide (immediate) 4530 * 4531 * 31 30 29 28 23 22 21 20 5 4 0 4532 * +--+-----+-------------+-----+----------------+------+ 4533 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd | 4534 * +--+-----+-------------+-----+----------------+------+ 4535 * 4536 * sf: 0 -> 32 bit, 1 -> 64 bit 4537 * opc: 00 -> N, 10 -> Z, 11 -> K 4538 * hw: shift/16 (0,16, and sf only 32, 48) 4539 */ 4540 static void disas_movw_imm(DisasContext *s, uint32_t insn) 4541 { 4542 int rd = extract32(insn, 0, 5); 4543 uint64_t imm = extract32(insn, 5, 16); 4544 int sf = extract32(insn, 31, 1); 4545 int opc = extract32(insn, 29, 2); 4546 int pos = extract32(insn, 21, 2) << 4; 4547 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4548 4549 if (!sf && (pos >= 32)) { 4550 unallocated_encoding(s); 4551 return; 4552 } 4553 4554 switch (opc) { 4555 case 0: /* MOVN */ 4556 case 2: /* MOVZ */ 4557 imm <<= pos; 4558 if (opc == 0) { 4559 imm = ~imm; 4560 } 4561 if (!sf) { 4562 imm &= 0xffffffffu; 4563 } 4564 tcg_gen_movi_i64(tcg_rd, imm); 4565 break; 4566 case 3: /* MOVK */ 4567 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16); 4568 if (!sf) { 4569 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4570 } 4571 break; 4572 default: 4573 unallocated_encoding(s); 4574 break; 4575 } 4576 } 4577 4578 /* Bitfield 4579 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4580 * +----+-----+-------------+---+------+------+------+------+ 4581 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd | 4582 * +----+-----+-------------+---+------+------+------+------+ 4583 */ 4584 static void disas_bitfield(DisasContext *s, uint32_t insn) 4585 { 4586 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len; 4587 TCGv_i64 tcg_rd, tcg_tmp; 4588 4589 sf = extract32(insn, 31, 1); 4590 opc = extract32(insn, 29, 2); 4591 n = extract32(insn, 22, 1); 4592 ri = extract32(insn, 16, 6); 4593 si = extract32(insn, 10, 6); 4594 rn = extract32(insn, 5, 5); 4595 rd = extract32(insn, 0, 5); 4596 bitsize = sf ? 64 : 32; 4597 4598 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) { 4599 unallocated_encoding(s); 4600 return; 4601 } 4602 4603 tcg_rd = cpu_reg(s, rd); 4604 4605 /* Suppress the zero-extend for !sf. Since RI and SI are constrained 4606 to be smaller than bitsize, we'll never reference data outside the 4607 low 32-bits anyway. */ 4608 tcg_tmp = read_cpu_reg(s, rn, 1); 4609 4610 /* Recognize simple(r) extractions. */ 4611 if (si >= ri) { 4612 /* Wd<s-r:0> = Wn<s:r> */ 4613 len = (si - ri) + 1; 4614 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ 4615 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4616 goto done; 4617 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */ 4618 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4619 return; 4620 } 4621 /* opc == 1, BFXIL fall through to deposit */ 4622 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4623 pos = 0; 4624 } else { 4625 /* Handle the ri > si case with a deposit 4626 * Wd<32+s-r,32-r> = Wn<s:0> 4627 */ 4628 len = si + 1; 4629 pos = (bitsize - ri) & (bitsize - 1); 4630 } 4631 4632 if (opc == 0 && len < ri) { 4633 /* SBFM: sign extend the destination field from len to fill 4634 the balance of the word. Let the deposit below insert all 4635 of those sign bits. */ 4636 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4637 len = ri; 4638 } 4639 4640 if (opc == 1) { /* BFM, BFXIL */ 4641 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4642 } else { 4643 /* SBFM or UBFM: We start with zero, and we haven't modified 4644 any bits outside bitsize, therefore the zero-extension 4645 below is unneeded. */ 4646 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4647 return; 4648 } 4649 4650 done: 4651 if (!sf) { /* zero extend final result */ 4652 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4653 } 4654 } 4655 4656 /* Extract 4657 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0 4658 * +----+------+-------------+---+----+------+--------+------+------+ 4659 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd | 4660 * +----+------+-------------+---+----+------+--------+------+------+ 4661 */ 4662 static void disas_extract(DisasContext *s, uint32_t insn) 4663 { 4664 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0; 4665 4666 sf = extract32(insn, 31, 1); 4667 n = extract32(insn, 22, 1); 4668 rm = extract32(insn, 16, 5); 4669 imm = extract32(insn, 10, 6); 4670 rn = extract32(insn, 5, 5); 4671 rd = extract32(insn, 0, 5); 4672 op21 = extract32(insn, 29, 2); 4673 op0 = extract32(insn, 21, 1); 4674 bitsize = sf ? 64 : 32; 4675 4676 if (sf != n || op21 || op0 || imm >= bitsize) { 4677 unallocated_encoding(s); 4678 } else { 4679 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4680 4681 tcg_rd = cpu_reg(s, rd); 4682 4683 if (unlikely(imm == 0)) { 4684 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4685 * so an extract from bit 0 is a special case. 4686 */ 4687 if (sf) { 4688 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm)); 4689 } else { 4690 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); 4691 } 4692 } else { 4693 tcg_rm = cpu_reg(s, rm); 4694 tcg_rn = cpu_reg(s, rn); 4695 4696 if (sf) { 4697 /* Specialization to ROR happens in EXTRACT2. */ 4698 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm); 4699 } else { 4700 TCGv_i32 t0 = tcg_temp_new_i32(); 4701 4702 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4703 if (rm == rn) { 4704 tcg_gen_rotri_i32(t0, t0, imm); 4705 } else { 4706 TCGv_i32 t1 = tcg_temp_new_i32(); 4707 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4708 tcg_gen_extract2_i32(t0, t0, t1, imm); 4709 tcg_temp_free_i32(t1); 4710 } 4711 tcg_gen_extu_i32_i64(tcg_rd, t0); 4712 tcg_temp_free_i32(t0); 4713 } 4714 } 4715 } 4716 } 4717 4718 /* Data processing - immediate */ 4719 static void disas_data_proc_imm(DisasContext *s, uint32_t insn) 4720 { 4721 switch (extract32(insn, 23, 6)) { 4722 case 0x20: case 0x21: /* PC-rel. addressing */ 4723 disas_pc_rel_adr(s, insn); 4724 break; 4725 case 0x22: /* Add/subtract (immediate) */ 4726 disas_add_sub_imm(s, insn); 4727 break; 4728 case 0x23: /* Add/subtract (immediate, with tags) */ 4729 disas_add_sub_imm_with_tags(s, insn); 4730 break; 4731 case 0x24: /* Logical (immediate) */ 4732 disas_logic_imm(s, insn); 4733 break; 4734 case 0x25: /* Move wide (immediate) */ 4735 disas_movw_imm(s, insn); 4736 break; 4737 case 0x26: /* Bitfield */ 4738 disas_bitfield(s, insn); 4739 break; 4740 case 0x27: /* Extract */ 4741 disas_extract(s, insn); 4742 break; 4743 default: 4744 unallocated_encoding(s); 4745 break; 4746 } 4747 } 4748 4749 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4750 * Note that it is the caller's responsibility to ensure that the 4751 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4752 * mandated semantics for out of range shifts. 4753 */ 4754 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4755 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4756 { 4757 switch (shift_type) { 4758 case A64_SHIFT_TYPE_LSL: 4759 tcg_gen_shl_i64(dst, src, shift_amount); 4760 break; 4761 case A64_SHIFT_TYPE_LSR: 4762 tcg_gen_shr_i64(dst, src, shift_amount); 4763 break; 4764 case A64_SHIFT_TYPE_ASR: 4765 if (!sf) { 4766 tcg_gen_ext32s_i64(dst, src); 4767 } 4768 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4769 break; 4770 case A64_SHIFT_TYPE_ROR: 4771 if (sf) { 4772 tcg_gen_rotr_i64(dst, src, shift_amount); 4773 } else { 4774 TCGv_i32 t0, t1; 4775 t0 = tcg_temp_new_i32(); 4776 t1 = tcg_temp_new_i32(); 4777 tcg_gen_extrl_i64_i32(t0, src); 4778 tcg_gen_extrl_i64_i32(t1, shift_amount); 4779 tcg_gen_rotr_i32(t0, t0, t1); 4780 tcg_gen_extu_i32_i64(dst, t0); 4781 tcg_temp_free_i32(t0); 4782 tcg_temp_free_i32(t1); 4783 } 4784 break; 4785 default: 4786 assert(FALSE); /* all shift types should be handled */ 4787 break; 4788 } 4789 4790 if (!sf) { /* zero extend final result */ 4791 tcg_gen_ext32u_i64(dst, dst); 4792 } 4793 } 4794 4795 /* Shift a TCGv src by immediate, put result in dst. 4796 * The shift amount must be in range (this should always be true as the 4797 * relevant instructions will UNDEF on bad shift immediates). 4798 */ 4799 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4800 enum a64_shift_type shift_type, unsigned int shift_i) 4801 { 4802 assert(shift_i < (sf ? 64 : 32)); 4803 4804 if (shift_i == 0) { 4805 tcg_gen_mov_i64(dst, src); 4806 } else { 4807 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4808 } 4809 } 4810 4811 /* Logical (shifted register) 4812 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4813 * +----+-----+-----------+-------+---+------+--------+------+------+ 4814 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4815 * +----+-----+-----------+-------+---+------+--------+------+------+ 4816 */ 4817 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4818 { 4819 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4820 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4821 4822 sf = extract32(insn, 31, 1); 4823 opc = extract32(insn, 29, 2); 4824 shift_type = extract32(insn, 22, 2); 4825 invert = extract32(insn, 21, 1); 4826 rm = extract32(insn, 16, 5); 4827 shift_amount = extract32(insn, 10, 6); 4828 rn = extract32(insn, 5, 5); 4829 rd = extract32(insn, 0, 5); 4830 4831 if (!sf && (shift_amount & (1 << 5))) { 4832 unallocated_encoding(s); 4833 return; 4834 } 4835 4836 tcg_rd = cpu_reg(s, rd); 4837 4838 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4839 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4840 * register-register MOV and MVN, so it is worth special casing. 4841 */ 4842 tcg_rm = cpu_reg(s, rm); 4843 if (invert) { 4844 tcg_gen_not_i64(tcg_rd, tcg_rm); 4845 if (!sf) { 4846 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4847 } 4848 } else { 4849 if (sf) { 4850 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4851 } else { 4852 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4853 } 4854 } 4855 return; 4856 } 4857 4858 tcg_rm = read_cpu_reg(s, rm, sf); 4859 4860 if (shift_amount) { 4861 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4862 } 4863 4864 tcg_rn = cpu_reg(s, rn); 4865 4866 switch (opc | (invert << 2)) { 4867 case 0: /* AND */ 4868 case 3: /* ANDS */ 4869 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4870 break; 4871 case 1: /* ORR */ 4872 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4873 break; 4874 case 2: /* EOR */ 4875 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4876 break; 4877 case 4: /* BIC */ 4878 case 7: /* BICS */ 4879 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4880 break; 4881 case 5: /* ORN */ 4882 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4883 break; 4884 case 6: /* EON */ 4885 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4886 break; 4887 default: 4888 assert(FALSE); 4889 break; 4890 } 4891 4892 if (!sf) { 4893 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4894 } 4895 4896 if (opc == 3) { 4897 gen_logic_CC(sf, tcg_rd); 4898 } 4899 } 4900 4901 /* 4902 * Add/subtract (extended register) 4903 * 4904 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4905 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4906 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4907 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4908 * 4909 * sf: 0 -> 32bit, 1 -> 64bit 4910 * op: 0 -> add , 1 -> sub 4911 * S: 1 -> set flags 4912 * opt: 00 4913 * option: extension type (see DecodeRegExtend) 4914 * imm3: optional shift to Rm 4915 * 4916 * Rd = Rn + LSL(extend(Rm), amount) 4917 */ 4918 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4919 { 4920 int rd = extract32(insn, 0, 5); 4921 int rn = extract32(insn, 5, 5); 4922 int imm3 = extract32(insn, 10, 3); 4923 int option = extract32(insn, 13, 3); 4924 int rm = extract32(insn, 16, 5); 4925 int opt = extract32(insn, 22, 2); 4926 bool setflags = extract32(insn, 29, 1); 4927 bool sub_op = extract32(insn, 30, 1); 4928 bool sf = extract32(insn, 31, 1); 4929 4930 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4931 TCGv_i64 tcg_rd; 4932 TCGv_i64 tcg_result; 4933 4934 if (imm3 > 4 || opt != 0) { 4935 unallocated_encoding(s); 4936 return; 4937 } 4938 4939 /* non-flag setting ops may use SP */ 4940 if (!setflags) { 4941 tcg_rd = cpu_reg_sp(s, rd); 4942 } else { 4943 tcg_rd = cpu_reg(s, rd); 4944 } 4945 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4946 4947 tcg_rm = read_cpu_reg(s, rm, sf); 4948 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4949 4950 tcg_result = tcg_temp_new_i64(); 4951 4952 if (!setflags) { 4953 if (sub_op) { 4954 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4955 } else { 4956 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4957 } 4958 } else { 4959 if (sub_op) { 4960 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4961 } else { 4962 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4963 } 4964 } 4965 4966 if (sf) { 4967 tcg_gen_mov_i64(tcg_rd, tcg_result); 4968 } else { 4969 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4970 } 4971 4972 tcg_temp_free_i64(tcg_result); 4973 } 4974 4975 /* 4976 * Add/subtract (shifted register) 4977 * 4978 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4979 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4980 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 4981 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4982 * 4983 * sf: 0 -> 32bit, 1 -> 64bit 4984 * op: 0 -> add , 1 -> sub 4985 * S: 1 -> set flags 4986 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 4987 * imm6: Shift amount to apply to Rm before the add/sub 4988 */ 4989 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 4990 { 4991 int rd = extract32(insn, 0, 5); 4992 int rn = extract32(insn, 5, 5); 4993 int imm6 = extract32(insn, 10, 6); 4994 int rm = extract32(insn, 16, 5); 4995 int shift_type = extract32(insn, 22, 2); 4996 bool setflags = extract32(insn, 29, 1); 4997 bool sub_op = extract32(insn, 30, 1); 4998 bool sf = extract32(insn, 31, 1); 4999 5000 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5001 TCGv_i64 tcg_rn, tcg_rm; 5002 TCGv_i64 tcg_result; 5003 5004 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 5005 unallocated_encoding(s); 5006 return; 5007 } 5008 5009 tcg_rn = read_cpu_reg(s, rn, sf); 5010 tcg_rm = read_cpu_reg(s, rm, sf); 5011 5012 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 5013 5014 tcg_result = tcg_temp_new_i64(); 5015 5016 if (!setflags) { 5017 if (sub_op) { 5018 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 5019 } else { 5020 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 5021 } 5022 } else { 5023 if (sub_op) { 5024 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 5025 } else { 5026 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 5027 } 5028 } 5029 5030 if (sf) { 5031 tcg_gen_mov_i64(tcg_rd, tcg_result); 5032 } else { 5033 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 5034 } 5035 5036 tcg_temp_free_i64(tcg_result); 5037 } 5038 5039 /* Data-processing (3 source) 5040 * 5041 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 5042 * +--+------+-----------+------+------+----+------+------+------+ 5043 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 5044 * +--+------+-----------+------+------+----+------+------+------+ 5045 */ 5046 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 5047 { 5048 int rd = extract32(insn, 0, 5); 5049 int rn = extract32(insn, 5, 5); 5050 int ra = extract32(insn, 10, 5); 5051 int rm = extract32(insn, 16, 5); 5052 int op_id = (extract32(insn, 29, 3) << 4) | 5053 (extract32(insn, 21, 3) << 1) | 5054 extract32(insn, 15, 1); 5055 bool sf = extract32(insn, 31, 1); 5056 bool is_sub = extract32(op_id, 0, 1); 5057 bool is_high = extract32(op_id, 2, 1); 5058 bool is_signed = false; 5059 TCGv_i64 tcg_op1; 5060 TCGv_i64 tcg_op2; 5061 TCGv_i64 tcg_tmp; 5062 5063 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 5064 switch (op_id) { 5065 case 0x42: /* SMADDL */ 5066 case 0x43: /* SMSUBL */ 5067 case 0x44: /* SMULH */ 5068 is_signed = true; 5069 break; 5070 case 0x0: /* MADD (32bit) */ 5071 case 0x1: /* MSUB (32bit) */ 5072 case 0x40: /* MADD (64bit) */ 5073 case 0x41: /* MSUB (64bit) */ 5074 case 0x4a: /* UMADDL */ 5075 case 0x4b: /* UMSUBL */ 5076 case 0x4c: /* UMULH */ 5077 break; 5078 default: 5079 unallocated_encoding(s); 5080 return; 5081 } 5082 5083 if (is_high) { 5084 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 5085 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5086 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5087 TCGv_i64 tcg_rm = cpu_reg(s, rm); 5088 5089 if (is_signed) { 5090 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5091 } else { 5092 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5093 } 5094 5095 tcg_temp_free_i64(low_bits); 5096 return; 5097 } 5098 5099 tcg_op1 = tcg_temp_new_i64(); 5100 tcg_op2 = tcg_temp_new_i64(); 5101 tcg_tmp = tcg_temp_new_i64(); 5102 5103 if (op_id < 0x42) { 5104 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 5105 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 5106 } else { 5107 if (is_signed) { 5108 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 5109 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 5110 } else { 5111 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 5112 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 5113 } 5114 } 5115 5116 if (ra == 31 && !is_sub) { 5117 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 5118 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 5119 } else { 5120 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 5121 if (is_sub) { 5122 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5123 } else { 5124 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5125 } 5126 } 5127 5128 if (!sf) { 5129 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 5130 } 5131 5132 tcg_temp_free_i64(tcg_op1); 5133 tcg_temp_free_i64(tcg_op2); 5134 tcg_temp_free_i64(tcg_tmp); 5135 } 5136 5137 /* Add/subtract (with carry) 5138 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 5139 * +--+--+--+------------------------+------+-------------+------+-----+ 5140 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 5141 * +--+--+--+------------------------+------+-------------+------+-----+ 5142 */ 5143 5144 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 5145 { 5146 unsigned int sf, op, setflags, rm, rn, rd; 5147 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 5148 5149 sf = extract32(insn, 31, 1); 5150 op = extract32(insn, 30, 1); 5151 setflags = extract32(insn, 29, 1); 5152 rm = extract32(insn, 16, 5); 5153 rn = extract32(insn, 5, 5); 5154 rd = extract32(insn, 0, 5); 5155 5156 tcg_rd = cpu_reg(s, rd); 5157 tcg_rn = cpu_reg(s, rn); 5158 5159 if (op) { 5160 tcg_y = tcg_temp_new_i64(); 5161 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 5162 } else { 5163 tcg_y = cpu_reg(s, rm); 5164 } 5165 5166 if (setflags) { 5167 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 5168 } else { 5169 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 5170 } 5171 } 5172 5173 /* 5174 * Rotate right into flags 5175 * 31 30 29 21 15 10 5 4 0 5176 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5177 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 5178 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5179 */ 5180 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 5181 { 5182 int mask = extract32(insn, 0, 4); 5183 int o2 = extract32(insn, 4, 1); 5184 int rn = extract32(insn, 5, 5); 5185 int imm6 = extract32(insn, 15, 6); 5186 int sf_op_s = extract32(insn, 29, 3); 5187 TCGv_i64 tcg_rn; 5188 TCGv_i32 nzcv; 5189 5190 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 5191 unallocated_encoding(s); 5192 return; 5193 } 5194 5195 tcg_rn = read_cpu_reg(s, rn, 1); 5196 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 5197 5198 nzcv = tcg_temp_new_i32(); 5199 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 5200 5201 if (mask & 8) { /* N */ 5202 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 5203 } 5204 if (mask & 4) { /* Z */ 5205 tcg_gen_not_i32(cpu_ZF, nzcv); 5206 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 5207 } 5208 if (mask & 2) { /* C */ 5209 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 5210 } 5211 if (mask & 1) { /* V */ 5212 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 5213 } 5214 5215 tcg_temp_free_i32(nzcv); 5216 } 5217 5218 /* 5219 * Evaluate into flags 5220 * 31 30 29 21 15 14 10 5 4 0 5221 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5222 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 5223 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5224 */ 5225 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 5226 { 5227 int o3_mask = extract32(insn, 0, 5); 5228 int rn = extract32(insn, 5, 5); 5229 int o2 = extract32(insn, 15, 6); 5230 int sz = extract32(insn, 14, 1); 5231 int sf_op_s = extract32(insn, 29, 3); 5232 TCGv_i32 tmp; 5233 int shift; 5234 5235 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 5236 !dc_isar_feature(aa64_condm_4, s)) { 5237 unallocated_encoding(s); 5238 return; 5239 } 5240 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 5241 5242 tmp = tcg_temp_new_i32(); 5243 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 5244 tcg_gen_shli_i32(cpu_NF, tmp, shift); 5245 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 5246 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 5247 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 5248 tcg_temp_free_i32(tmp); 5249 } 5250 5251 /* Conditional compare (immediate / register) 5252 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5253 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5254 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 5255 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5256 * [1] y [0] [0] 5257 */ 5258 static void disas_cc(DisasContext *s, uint32_t insn) 5259 { 5260 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 5261 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 5262 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 5263 DisasCompare c; 5264 5265 if (!extract32(insn, 29, 1)) { 5266 unallocated_encoding(s); 5267 return; 5268 } 5269 if (insn & (1 << 10 | 1 << 4)) { 5270 unallocated_encoding(s); 5271 return; 5272 } 5273 sf = extract32(insn, 31, 1); 5274 op = extract32(insn, 30, 1); 5275 is_imm = extract32(insn, 11, 1); 5276 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 5277 cond = extract32(insn, 12, 4); 5278 rn = extract32(insn, 5, 5); 5279 nzcv = extract32(insn, 0, 4); 5280 5281 /* Set T0 = !COND. */ 5282 tcg_t0 = tcg_temp_new_i32(); 5283 arm_test_cc(&c, cond); 5284 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 5285 5286 /* Load the arguments for the new comparison. */ 5287 if (is_imm) { 5288 tcg_y = tcg_temp_new_i64(); 5289 tcg_gen_movi_i64(tcg_y, y); 5290 } else { 5291 tcg_y = cpu_reg(s, y); 5292 } 5293 tcg_rn = cpu_reg(s, rn); 5294 5295 /* Set the flags for the new comparison. */ 5296 tcg_tmp = tcg_temp_new_i64(); 5297 if (op) { 5298 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5299 } else { 5300 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5301 } 5302 tcg_temp_free_i64(tcg_tmp); 5303 5304 /* If COND was false, force the flags to #nzcv. Compute two masks 5305 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 5306 * For tcg hosts that support ANDC, we can make do with just T1. 5307 * In either case, allow the tcg optimizer to delete any unused mask. 5308 */ 5309 tcg_t1 = tcg_temp_new_i32(); 5310 tcg_t2 = tcg_temp_new_i32(); 5311 tcg_gen_neg_i32(tcg_t1, tcg_t0); 5312 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 5313 5314 if (nzcv & 8) { /* N */ 5315 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 5316 } else { 5317 if (TCG_TARGET_HAS_andc_i32) { 5318 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 5319 } else { 5320 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 5321 } 5322 } 5323 if (nzcv & 4) { /* Z */ 5324 if (TCG_TARGET_HAS_andc_i32) { 5325 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 5326 } else { 5327 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 5328 } 5329 } else { 5330 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 5331 } 5332 if (nzcv & 2) { /* C */ 5333 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 5334 } else { 5335 if (TCG_TARGET_HAS_andc_i32) { 5336 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 5337 } else { 5338 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 5339 } 5340 } 5341 if (nzcv & 1) { /* V */ 5342 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 5343 } else { 5344 if (TCG_TARGET_HAS_andc_i32) { 5345 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 5346 } else { 5347 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 5348 } 5349 } 5350 tcg_temp_free_i32(tcg_t0); 5351 tcg_temp_free_i32(tcg_t1); 5352 tcg_temp_free_i32(tcg_t2); 5353 } 5354 5355 /* Conditional select 5356 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 5357 * +----+----+---+-----------------+------+------+-----+------+------+ 5358 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 5359 * +----+----+---+-----------------+------+------+-----+------+------+ 5360 */ 5361 static void disas_cond_select(DisasContext *s, uint32_t insn) 5362 { 5363 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 5364 TCGv_i64 tcg_rd, zero; 5365 DisasCompare64 c; 5366 5367 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 5368 /* S == 1 or op2<1> == 1 */ 5369 unallocated_encoding(s); 5370 return; 5371 } 5372 sf = extract32(insn, 31, 1); 5373 else_inv = extract32(insn, 30, 1); 5374 rm = extract32(insn, 16, 5); 5375 cond = extract32(insn, 12, 4); 5376 else_inc = extract32(insn, 10, 1); 5377 rn = extract32(insn, 5, 5); 5378 rd = extract32(insn, 0, 5); 5379 5380 tcg_rd = cpu_reg(s, rd); 5381 5382 a64_test_cc(&c, cond); 5383 zero = tcg_constant_i64(0); 5384 5385 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 5386 /* CSET & CSETM. */ 5387 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero); 5388 if (else_inv) { 5389 tcg_gen_neg_i64(tcg_rd, tcg_rd); 5390 } 5391 } else { 5392 TCGv_i64 t_true = cpu_reg(s, rn); 5393 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 5394 if (else_inv && else_inc) { 5395 tcg_gen_neg_i64(t_false, t_false); 5396 } else if (else_inv) { 5397 tcg_gen_not_i64(t_false, t_false); 5398 } else if (else_inc) { 5399 tcg_gen_addi_i64(t_false, t_false, 1); 5400 } 5401 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 5402 } 5403 5404 if (!sf) { 5405 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5406 } 5407 } 5408 5409 static void handle_clz(DisasContext *s, unsigned int sf, 5410 unsigned int rn, unsigned int rd) 5411 { 5412 TCGv_i64 tcg_rd, tcg_rn; 5413 tcg_rd = cpu_reg(s, rd); 5414 tcg_rn = cpu_reg(s, rn); 5415 5416 if (sf) { 5417 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 5418 } else { 5419 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5420 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5421 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 5422 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5423 tcg_temp_free_i32(tcg_tmp32); 5424 } 5425 } 5426 5427 static void handle_cls(DisasContext *s, unsigned int sf, 5428 unsigned int rn, unsigned int rd) 5429 { 5430 TCGv_i64 tcg_rd, tcg_rn; 5431 tcg_rd = cpu_reg(s, rd); 5432 tcg_rn = cpu_reg(s, rn); 5433 5434 if (sf) { 5435 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 5436 } else { 5437 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5438 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5439 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 5440 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5441 tcg_temp_free_i32(tcg_tmp32); 5442 } 5443 } 5444 5445 static void handle_rbit(DisasContext *s, unsigned int sf, 5446 unsigned int rn, unsigned int rd) 5447 { 5448 TCGv_i64 tcg_rd, tcg_rn; 5449 tcg_rd = cpu_reg(s, rd); 5450 tcg_rn = cpu_reg(s, rn); 5451 5452 if (sf) { 5453 gen_helper_rbit64(tcg_rd, tcg_rn); 5454 } else { 5455 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5456 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5457 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5458 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5459 tcg_temp_free_i32(tcg_tmp32); 5460 } 5461 } 5462 5463 /* REV with sf==1, opcode==3 ("REV64") */ 5464 static void handle_rev64(DisasContext *s, unsigned int sf, 5465 unsigned int rn, unsigned int rd) 5466 { 5467 if (!sf) { 5468 unallocated_encoding(s); 5469 return; 5470 } 5471 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5472 } 5473 5474 /* REV with sf==0, opcode==2 5475 * REV32 (sf==1, opcode==2) 5476 */ 5477 static void handle_rev32(DisasContext *s, unsigned int sf, 5478 unsigned int rn, unsigned int rd) 5479 { 5480 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5481 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5482 5483 if (sf) { 5484 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5485 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5486 } else { 5487 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5488 } 5489 } 5490 5491 /* REV16 (opcode==1) */ 5492 static void handle_rev16(DisasContext *s, unsigned int sf, 5493 unsigned int rn, unsigned int rd) 5494 { 5495 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5496 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5497 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5498 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5499 5500 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5501 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5502 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5503 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5504 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5505 5506 tcg_temp_free_i64(tcg_tmp); 5507 } 5508 5509 /* Data-processing (1 source) 5510 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5511 * +----+---+---+-----------------+---------+--------+------+------+ 5512 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5513 * +----+---+---+-----------------+---------+--------+------+------+ 5514 */ 5515 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5516 { 5517 unsigned int sf, opcode, opcode2, rn, rd; 5518 TCGv_i64 tcg_rd; 5519 5520 if (extract32(insn, 29, 1)) { 5521 unallocated_encoding(s); 5522 return; 5523 } 5524 5525 sf = extract32(insn, 31, 1); 5526 opcode = extract32(insn, 10, 6); 5527 opcode2 = extract32(insn, 16, 5); 5528 rn = extract32(insn, 5, 5); 5529 rd = extract32(insn, 0, 5); 5530 5531 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5532 5533 switch (MAP(sf, opcode2, opcode)) { 5534 case MAP(0, 0x00, 0x00): /* RBIT */ 5535 case MAP(1, 0x00, 0x00): 5536 handle_rbit(s, sf, rn, rd); 5537 break; 5538 case MAP(0, 0x00, 0x01): /* REV16 */ 5539 case MAP(1, 0x00, 0x01): 5540 handle_rev16(s, sf, rn, rd); 5541 break; 5542 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5543 case MAP(1, 0x00, 0x02): 5544 handle_rev32(s, sf, rn, rd); 5545 break; 5546 case MAP(1, 0x00, 0x03): /* REV64 */ 5547 handle_rev64(s, sf, rn, rd); 5548 break; 5549 case MAP(0, 0x00, 0x04): /* CLZ */ 5550 case MAP(1, 0x00, 0x04): 5551 handle_clz(s, sf, rn, rd); 5552 break; 5553 case MAP(0, 0x00, 0x05): /* CLS */ 5554 case MAP(1, 0x00, 0x05): 5555 handle_cls(s, sf, rn, rd); 5556 break; 5557 case MAP(1, 0x01, 0x00): /* PACIA */ 5558 if (s->pauth_active) { 5559 tcg_rd = cpu_reg(s, rd); 5560 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5561 } else if (!dc_isar_feature(aa64_pauth, s)) { 5562 goto do_unallocated; 5563 } 5564 break; 5565 case MAP(1, 0x01, 0x01): /* PACIB */ 5566 if (s->pauth_active) { 5567 tcg_rd = cpu_reg(s, rd); 5568 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5569 } else if (!dc_isar_feature(aa64_pauth, s)) { 5570 goto do_unallocated; 5571 } 5572 break; 5573 case MAP(1, 0x01, 0x02): /* PACDA */ 5574 if (s->pauth_active) { 5575 tcg_rd = cpu_reg(s, rd); 5576 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5577 } else if (!dc_isar_feature(aa64_pauth, s)) { 5578 goto do_unallocated; 5579 } 5580 break; 5581 case MAP(1, 0x01, 0x03): /* PACDB */ 5582 if (s->pauth_active) { 5583 tcg_rd = cpu_reg(s, rd); 5584 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5585 } else if (!dc_isar_feature(aa64_pauth, s)) { 5586 goto do_unallocated; 5587 } 5588 break; 5589 case MAP(1, 0x01, 0x04): /* AUTIA */ 5590 if (s->pauth_active) { 5591 tcg_rd = cpu_reg(s, rd); 5592 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5593 } else if (!dc_isar_feature(aa64_pauth, s)) { 5594 goto do_unallocated; 5595 } 5596 break; 5597 case MAP(1, 0x01, 0x05): /* AUTIB */ 5598 if (s->pauth_active) { 5599 tcg_rd = cpu_reg(s, rd); 5600 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5601 } else if (!dc_isar_feature(aa64_pauth, s)) { 5602 goto do_unallocated; 5603 } 5604 break; 5605 case MAP(1, 0x01, 0x06): /* AUTDA */ 5606 if (s->pauth_active) { 5607 tcg_rd = cpu_reg(s, rd); 5608 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5609 } else if (!dc_isar_feature(aa64_pauth, s)) { 5610 goto do_unallocated; 5611 } 5612 break; 5613 case MAP(1, 0x01, 0x07): /* AUTDB */ 5614 if (s->pauth_active) { 5615 tcg_rd = cpu_reg(s, rd); 5616 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5617 } else if (!dc_isar_feature(aa64_pauth, s)) { 5618 goto do_unallocated; 5619 } 5620 break; 5621 case MAP(1, 0x01, 0x08): /* PACIZA */ 5622 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5623 goto do_unallocated; 5624 } else if (s->pauth_active) { 5625 tcg_rd = cpu_reg(s, rd); 5626 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5627 } 5628 break; 5629 case MAP(1, 0x01, 0x09): /* PACIZB */ 5630 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5631 goto do_unallocated; 5632 } else if (s->pauth_active) { 5633 tcg_rd = cpu_reg(s, rd); 5634 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5635 } 5636 break; 5637 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5638 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5639 goto do_unallocated; 5640 } else if (s->pauth_active) { 5641 tcg_rd = cpu_reg(s, rd); 5642 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5643 } 5644 break; 5645 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5646 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5647 goto do_unallocated; 5648 } else if (s->pauth_active) { 5649 tcg_rd = cpu_reg(s, rd); 5650 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5651 } 5652 break; 5653 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5654 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5655 goto do_unallocated; 5656 } else if (s->pauth_active) { 5657 tcg_rd = cpu_reg(s, rd); 5658 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5659 } 5660 break; 5661 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5662 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5663 goto do_unallocated; 5664 } else if (s->pauth_active) { 5665 tcg_rd = cpu_reg(s, rd); 5666 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5667 } 5668 break; 5669 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5670 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5671 goto do_unallocated; 5672 } else if (s->pauth_active) { 5673 tcg_rd = cpu_reg(s, rd); 5674 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5675 } 5676 break; 5677 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5678 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5679 goto do_unallocated; 5680 } else if (s->pauth_active) { 5681 tcg_rd = cpu_reg(s, rd); 5682 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, tcg_constant_i64(0)); 5683 } 5684 break; 5685 case MAP(1, 0x01, 0x10): /* XPACI */ 5686 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5687 goto do_unallocated; 5688 } else if (s->pauth_active) { 5689 tcg_rd = cpu_reg(s, rd); 5690 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd); 5691 } 5692 break; 5693 case MAP(1, 0x01, 0x11): /* XPACD */ 5694 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5695 goto do_unallocated; 5696 } else if (s->pauth_active) { 5697 tcg_rd = cpu_reg(s, rd); 5698 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd); 5699 } 5700 break; 5701 default: 5702 do_unallocated: 5703 unallocated_encoding(s); 5704 break; 5705 } 5706 5707 #undef MAP 5708 } 5709 5710 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5711 unsigned int rm, unsigned int rn, unsigned int rd) 5712 { 5713 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5714 tcg_rd = cpu_reg(s, rd); 5715 5716 if (!sf && is_signed) { 5717 tcg_n = tcg_temp_new_i64(); 5718 tcg_m = tcg_temp_new_i64(); 5719 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5720 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5721 } else { 5722 tcg_n = read_cpu_reg(s, rn, sf); 5723 tcg_m = read_cpu_reg(s, rm, sf); 5724 } 5725 5726 if (is_signed) { 5727 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5728 } else { 5729 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5730 } 5731 5732 if (!sf) { /* zero extend final result */ 5733 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5734 } 5735 } 5736 5737 /* LSLV, LSRV, ASRV, RORV */ 5738 static void handle_shift_reg(DisasContext *s, 5739 enum a64_shift_type shift_type, unsigned int sf, 5740 unsigned int rm, unsigned int rn, unsigned int rd) 5741 { 5742 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5743 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5744 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5745 5746 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5747 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5748 tcg_temp_free_i64(tcg_shift); 5749 } 5750 5751 /* CRC32[BHWX], CRC32C[BHWX] */ 5752 static void handle_crc32(DisasContext *s, 5753 unsigned int sf, unsigned int sz, bool crc32c, 5754 unsigned int rm, unsigned int rn, unsigned int rd) 5755 { 5756 TCGv_i64 tcg_acc, tcg_val; 5757 TCGv_i32 tcg_bytes; 5758 5759 if (!dc_isar_feature(aa64_crc32, s) 5760 || (sf == 1 && sz != 3) 5761 || (sf == 0 && sz == 3)) { 5762 unallocated_encoding(s); 5763 return; 5764 } 5765 5766 if (sz == 3) { 5767 tcg_val = cpu_reg(s, rm); 5768 } else { 5769 uint64_t mask; 5770 switch (sz) { 5771 case 0: 5772 mask = 0xFF; 5773 break; 5774 case 1: 5775 mask = 0xFFFF; 5776 break; 5777 case 2: 5778 mask = 0xFFFFFFFF; 5779 break; 5780 default: 5781 g_assert_not_reached(); 5782 } 5783 tcg_val = tcg_temp_new_i64(); 5784 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5785 } 5786 5787 tcg_acc = cpu_reg(s, rn); 5788 tcg_bytes = tcg_constant_i32(1 << sz); 5789 5790 if (crc32c) { 5791 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5792 } else { 5793 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5794 } 5795 } 5796 5797 /* Data-processing (2 source) 5798 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5799 * +----+---+---+-----------------+------+--------+------+------+ 5800 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5801 * +----+---+---+-----------------+------+--------+------+------+ 5802 */ 5803 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5804 { 5805 unsigned int sf, rm, opcode, rn, rd, setflag; 5806 sf = extract32(insn, 31, 1); 5807 setflag = extract32(insn, 29, 1); 5808 rm = extract32(insn, 16, 5); 5809 opcode = extract32(insn, 10, 6); 5810 rn = extract32(insn, 5, 5); 5811 rd = extract32(insn, 0, 5); 5812 5813 if (setflag && opcode != 0) { 5814 unallocated_encoding(s); 5815 return; 5816 } 5817 5818 switch (opcode) { 5819 case 0: /* SUBP(S) */ 5820 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5821 goto do_unallocated; 5822 } else { 5823 TCGv_i64 tcg_n, tcg_m, tcg_d; 5824 5825 tcg_n = read_cpu_reg_sp(s, rn, true); 5826 tcg_m = read_cpu_reg_sp(s, rm, true); 5827 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5828 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5829 tcg_d = cpu_reg(s, rd); 5830 5831 if (setflag) { 5832 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5833 } else { 5834 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5835 } 5836 } 5837 break; 5838 case 2: /* UDIV */ 5839 handle_div(s, false, sf, rm, rn, rd); 5840 break; 5841 case 3: /* SDIV */ 5842 handle_div(s, true, sf, rm, rn, rd); 5843 break; 5844 case 4: /* IRG */ 5845 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5846 goto do_unallocated; 5847 } 5848 if (s->ata) { 5849 gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, 5850 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5851 } else { 5852 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5853 cpu_reg_sp(s, rn)); 5854 } 5855 break; 5856 case 5: /* GMI */ 5857 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5858 goto do_unallocated; 5859 } else { 5860 TCGv_i64 t = tcg_temp_new_i64(); 5861 5862 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5863 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5864 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5865 5866 tcg_temp_free_i64(t); 5867 } 5868 break; 5869 case 8: /* LSLV */ 5870 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5871 break; 5872 case 9: /* LSRV */ 5873 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5874 break; 5875 case 10: /* ASRV */ 5876 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5877 break; 5878 case 11: /* RORV */ 5879 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5880 break; 5881 case 12: /* PACGA */ 5882 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5883 goto do_unallocated; 5884 } 5885 gen_helper_pacga(cpu_reg(s, rd), cpu_env, 5886 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5887 break; 5888 case 16: 5889 case 17: 5890 case 18: 5891 case 19: 5892 case 20: 5893 case 21: 5894 case 22: 5895 case 23: /* CRC32 */ 5896 { 5897 int sz = extract32(opcode, 0, 2); 5898 bool crc32c = extract32(opcode, 2, 1); 5899 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5900 break; 5901 } 5902 default: 5903 do_unallocated: 5904 unallocated_encoding(s); 5905 break; 5906 } 5907 } 5908 5909 /* 5910 * Data processing - register 5911 * 31 30 29 28 25 21 20 16 10 0 5912 * +--+---+--+---+-------+-----+-------+-------+---------+ 5913 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5914 * +--+---+--+---+-------+-----+-------+-------+---------+ 5915 */ 5916 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5917 { 5918 int op0 = extract32(insn, 30, 1); 5919 int op1 = extract32(insn, 28, 1); 5920 int op2 = extract32(insn, 21, 4); 5921 int op3 = extract32(insn, 10, 6); 5922 5923 if (!op1) { 5924 if (op2 & 8) { 5925 if (op2 & 1) { 5926 /* Add/sub (extended register) */ 5927 disas_add_sub_ext_reg(s, insn); 5928 } else { 5929 /* Add/sub (shifted register) */ 5930 disas_add_sub_reg(s, insn); 5931 } 5932 } else { 5933 /* Logical (shifted register) */ 5934 disas_logic_reg(s, insn); 5935 } 5936 return; 5937 } 5938 5939 switch (op2) { 5940 case 0x0: 5941 switch (op3) { 5942 case 0x00: /* Add/subtract (with carry) */ 5943 disas_adc_sbc(s, insn); 5944 break; 5945 5946 case 0x01: /* Rotate right into flags */ 5947 case 0x21: 5948 disas_rotate_right_into_flags(s, insn); 5949 break; 5950 5951 case 0x02: /* Evaluate into flags */ 5952 case 0x12: 5953 case 0x22: 5954 case 0x32: 5955 disas_evaluate_into_flags(s, insn); 5956 break; 5957 5958 default: 5959 goto do_unallocated; 5960 } 5961 break; 5962 5963 case 0x2: /* Conditional compare */ 5964 disas_cc(s, insn); /* both imm and reg forms */ 5965 break; 5966 5967 case 0x4: /* Conditional select */ 5968 disas_cond_select(s, insn); 5969 break; 5970 5971 case 0x6: /* Data-processing */ 5972 if (op0) { /* (1 source) */ 5973 disas_data_proc_1src(s, insn); 5974 } else { /* (2 source) */ 5975 disas_data_proc_2src(s, insn); 5976 } 5977 break; 5978 case 0x8 ... 0xf: /* (3 source) */ 5979 disas_data_proc_3src(s, insn); 5980 break; 5981 5982 default: 5983 do_unallocated: 5984 unallocated_encoding(s); 5985 break; 5986 } 5987 } 5988 5989 static void handle_fp_compare(DisasContext *s, int size, 5990 unsigned int rn, unsigned int rm, 5991 bool cmp_with_zero, bool signal_all_nans) 5992 { 5993 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 5994 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 5995 5996 if (size == MO_64) { 5997 TCGv_i64 tcg_vn, tcg_vm; 5998 5999 tcg_vn = read_fp_dreg(s, rn); 6000 if (cmp_with_zero) { 6001 tcg_vm = tcg_constant_i64(0); 6002 } else { 6003 tcg_vm = read_fp_dreg(s, rm); 6004 } 6005 if (signal_all_nans) { 6006 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6007 } else { 6008 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6009 } 6010 tcg_temp_free_i64(tcg_vn); 6011 tcg_temp_free_i64(tcg_vm); 6012 } else { 6013 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 6014 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 6015 6016 read_vec_element_i32(s, tcg_vn, rn, 0, size); 6017 if (cmp_with_zero) { 6018 tcg_gen_movi_i32(tcg_vm, 0); 6019 } else { 6020 read_vec_element_i32(s, tcg_vm, rm, 0, size); 6021 } 6022 6023 switch (size) { 6024 case MO_32: 6025 if (signal_all_nans) { 6026 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6027 } else { 6028 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6029 } 6030 break; 6031 case MO_16: 6032 if (signal_all_nans) { 6033 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6034 } else { 6035 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6036 } 6037 break; 6038 default: 6039 g_assert_not_reached(); 6040 } 6041 6042 tcg_temp_free_i32(tcg_vn); 6043 tcg_temp_free_i32(tcg_vm); 6044 } 6045 6046 tcg_temp_free_ptr(fpst); 6047 6048 gen_set_nzcv(tcg_flags); 6049 6050 tcg_temp_free_i64(tcg_flags); 6051 } 6052 6053 /* Floating point compare 6054 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 6055 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 6056 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 6057 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 6058 */ 6059 static void disas_fp_compare(DisasContext *s, uint32_t insn) 6060 { 6061 unsigned int mos, type, rm, op, rn, opc, op2r; 6062 int size; 6063 6064 mos = extract32(insn, 29, 3); 6065 type = extract32(insn, 22, 2); 6066 rm = extract32(insn, 16, 5); 6067 op = extract32(insn, 14, 2); 6068 rn = extract32(insn, 5, 5); 6069 opc = extract32(insn, 3, 2); 6070 op2r = extract32(insn, 0, 3); 6071 6072 if (mos || op || op2r) { 6073 unallocated_encoding(s); 6074 return; 6075 } 6076 6077 switch (type) { 6078 case 0: 6079 size = MO_32; 6080 break; 6081 case 1: 6082 size = MO_64; 6083 break; 6084 case 3: 6085 size = MO_16; 6086 if (dc_isar_feature(aa64_fp16, s)) { 6087 break; 6088 } 6089 /* fallthru */ 6090 default: 6091 unallocated_encoding(s); 6092 return; 6093 } 6094 6095 if (!fp_access_check(s)) { 6096 return; 6097 } 6098 6099 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 6100 } 6101 6102 /* Floating point conditional compare 6103 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 6104 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6105 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 6106 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6107 */ 6108 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 6109 { 6110 unsigned int mos, type, rm, cond, rn, op, nzcv; 6111 TCGLabel *label_continue = NULL; 6112 int size; 6113 6114 mos = extract32(insn, 29, 3); 6115 type = extract32(insn, 22, 2); 6116 rm = extract32(insn, 16, 5); 6117 cond = extract32(insn, 12, 4); 6118 rn = extract32(insn, 5, 5); 6119 op = extract32(insn, 4, 1); 6120 nzcv = extract32(insn, 0, 4); 6121 6122 if (mos) { 6123 unallocated_encoding(s); 6124 return; 6125 } 6126 6127 switch (type) { 6128 case 0: 6129 size = MO_32; 6130 break; 6131 case 1: 6132 size = MO_64; 6133 break; 6134 case 3: 6135 size = MO_16; 6136 if (dc_isar_feature(aa64_fp16, s)) { 6137 break; 6138 } 6139 /* fallthru */ 6140 default: 6141 unallocated_encoding(s); 6142 return; 6143 } 6144 6145 if (!fp_access_check(s)) { 6146 return; 6147 } 6148 6149 if (cond < 0x0e) { /* not always */ 6150 TCGLabel *label_match = gen_new_label(); 6151 label_continue = gen_new_label(); 6152 arm_gen_test_cc(cond, label_match); 6153 /* nomatch: */ 6154 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 6155 tcg_gen_br(label_continue); 6156 gen_set_label(label_match); 6157 } 6158 6159 handle_fp_compare(s, size, rn, rm, false, op); 6160 6161 if (cond < 0x0e) { 6162 gen_set_label(label_continue); 6163 } 6164 } 6165 6166 /* Floating point conditional select 6167 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6168 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6169 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 6170 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6171 */ 6172 static void disas_fp_csel(DisasContext *s, uint32_t insn) 6173 { 6174 unsigned int mos, type, rm, cond, rn, rd; 6175 TCGv_i64 t_true, t_false; 6176 DisasCompare64 c; 6177 MemOp sz; 6178 6179 mos = extract32(insn, 29, 3); 6180 type = extract32(insn, 22, 2); 6181 rm = extract32(insn, 16, 5); 6182 cond = extract32(insn, 12, 4); 6183 rn = extract32(insn, 5, 5); 6184 rd = extract32(insn, 0, 5); 6185 6186 if (mos) { 6187 unallocated_encoding(s); 6188 return; 6189 } 6190 6191 switch (type) { 6192 case 0: 6193 sz = MO_32; 6194 break; 6195 case 1: 6196 sz = MO_64; 6197 break; 6198 case 3: 6199 sz = MO_16; 6200 if (dc_isar_feature(aa64_fp16, s)) { 6201 break; 6202 } 6203 /* fallthru */ 6204 default: 6205 unallocated_encoding(s); 6206 return; 6207 } 6208 6209 if (!fp_access_check(s)) { 6210 return; 6211 } 6212 6213 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6214 t_true = tcg_temp_new_i64(); 6215 t_false = tcg_temp_new_i64(); 6216 read_vec_element(s, t_true, rn, 0, sz); 6217 read_vec_element(s, t_false, rm, 0, sz); 6218 6219 a64_test_cc(&c, cond); 6220 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6221 t_true, t_false); 6222 tcg_temp_free_i64(t_false); 6223 6224 /* Note that sregs & hregs write back zeros to the high bits, 6225 and we've already done the zero-extension. */ 6226 write_fp_dreg(s, rd, t_true); 6227 tcg_temp_free_i64(t_true); 6228 } 6229 6230 /* Floating-point data-processing (1 source) - half precision */ 6231 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 6232 { 6233 TCGv_ptr fpst = NULL; 6234 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 6235 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6236 6237 switch (opcode) { 6238 case 0x0: /* FMOV */ 6239 tcg_gen_mov_i32(tcg_res, tcg_op); 6240 break; 6241 case 0x1: /* FABS */ 6242 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 6243 break; 6244 case 0x2: /* FNEG */ 6245 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 6246 break; 6247 case 0x3: /* FSQRT */ 6248 fpst = fpstatus_ptr(FPST_FPCR_F16); 6249 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 6250 break; 6251 case 0x8: /* FRINTN */ 6252 case 0x9: /* FRINTP */ 6253 case 0xa: /* FRINTM */ 6254 case 0xb: /* FRINTZ */ 6255 case 0xc: /* FRINTA */ 6256 { 6257 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); 6258 fpst = fpstatus_ptr(FPST_FPCR_F16); 6259 6260 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6261 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6262 6263 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6264 tcg_temp_free_i32(tcg_rmode); 6265 break; 6266 } 6267 case 0xe: /* FRINTX */ 6268 fpst = fpstatus_ptr(FPST_FPCR_F16); 6269 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 6270 break; 6271 case 0xf: /* FRINTI */ 6272 fpst = fpstatus_ptr(FPST_FPCR_F16); 6273 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6274 break; 6275 default: 6276 g_assert_not_reached(); 6277 } 6278 6279 write_fp_sreg(s, rd, tcg_res); 6280 6281 if (fpst) { 6282 tcg_temp_free_ptr(fpst); 6283 } 6284 tcg_temp_free_i32(tcg_op); 6285 tcg_temp_free_i32(tcg_res); 6286 } 6287 6288 /* Floating-point data-processing (1 source) - single precision */ 6289 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 6290 { 6291 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 6292 TCGv_i32 tcg_op, tcg_res; 6293 TCGv_ptr fpst; 6294 int rmode = -1; 6295 6296 tcg_op = read_fp_sreg(s, rn); 6297 tcg_res = tcg_temp_new_i32(); 6298 6299 switch (opcode) { 6300 case 0x0: /* FMOV */ 6301 tcg_gen_mov_i32(tcg_res, tcg_op); 6302 goto done; 6303 case 0x1: /* FABS */ 6304 gen_helper_vfp_abss(tcg_res, tcg_op); 6305 goto done; 6306 case 0x2: /* FNEG */ 6307 gen_helper_vfp_negs(tcg_res, tcg_op); 6308 goto done; 6309 case 0x3: /* FSQRT */ 6310 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 6311 goto done; 6312 case 0x6: /* BFCVT */ 6313 gen_fpst = gen_helper_bfcvt; 6314 break; 6315 case 0x8: /* FRINTN */ 6316 case 0x9: /* FRINTP */ 6317 case 0xa: /* FRINTM */ 6318 case 0xb: /* FRINTZ */ 6319 case 0xc: /* FRINTA */ 6320 rmode = arm_rmode_to_sf(opcode & 7); 6321 gen_fpst = gen_helper_rints; 6322 break; 6323 case 0xe: /* FRINTX */ 6324 gen_fpst = gen_helper_rints_exact; 6325 break; 6326 case 0xf: /* FRINTI */ 6327 gen_fpst = gen_helper_rints; 6328 break; 6329 case 0x10: /* FRINT32Z */ 6330 rmode = float_round_to_zero; 6331 gen_fpst = gen_helper_frint32_s; 6332 break; 6333 case 0x11: /* FRINT32X */ 6334 gen_fpst = gen_helper_frint32_s; 6335 break; 6336 case 0x12: /* FRINT64Z */ 6337 rmode = float_round_to_zero; 6338 gen_fpst = gen_helper_frint64_s; 6339 break; 6340 case 0x13: /* FRINT64X */ 6341 gen_fpst = gen_helper_frint64_s; 6342 break; 6343 default: 6344 g_assert_not_reached(); 6345 } 6346 6347 fpst = fpstatus_ptr(FPST_FPCR); 6348 if (rmode >= 0) { 6349 TCGv_i32 tcg_rmode = tcg_const_i32(rmode); 6350 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6351 gen_fpst(tcg_res, tcg_op, fpst); 6352 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6353 tcg_temp_free_i32(tcg_rmode); 6354 } else { 6355 gen_fpst(tcg_res, tcg_op, fpst); 6356 } 6357 tcg_temp_free_ptr(fpst); 6358 6359 done: 6360 write_fp_sreg(s, rd, tcg_res); 6361 tcg_temp_free_i32(tcg_op); 6362 tcg_temp_free_i32(tcg_res); 6363 } 6364 6365 /* Floating-point data-processing (1 source) - double precision */ 6366 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 6367 { 6368 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 6369 TCGv_i64 tcg_op, tcg_res; 6370 TCGv_ptr fpst; 6371 int rmode = -1; 6372 6373 switch (opcode) { 6374 case 0x0: /* FMOV */ 6375 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 6376 return; 6377 } 6378 6379 tcg_op = read_fp_dreg(s, rn); 6380 tcg_res = tcg_temp_new_i64(); 6381 6382 switch (opcode) { 6383 case 0x1: /* FABS */ 6384 gen_helper_vfp_absd(tcg_res, tcg_op); 6385 goto done; 6386 case 0x2: /* FNEG */ 6387 gen_helper_vfp_negd(tcg_res, tcg_op); 6388 goto done; 6389 case 0x3: /* FSQRT */ 6390 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); 6391 goto done; 6392 case 0x8: /* FRINTN */ 6393 case 0x9: /* FRINTP */ 6394 case 0xa: /* FRINTM */ 6395 case 0xb: /* FRINTZ */ 6396 case 0xc: /* FRINTA */ 6397 rmode = arm_rmode_to_sf(opcode & 7); 6398 gen_fpst = gen_helper_rintd; 6399 break; 6400 case 0xe: /* FRINTX */ 6401 gen_fpst = gen_helper_rintd_exact; 6402 break; 6403 case 0xf: /* FRINTI */ 6404 gen_fpst = gen_helper_rintd; 6405 break; 6406 case 0x10: /* FRINT32Z */ 6407 rmode = float_round_to_zero; 6408 gen_fpst = gen_helper_frint32_d; 6409 break; 6410 case 0x11: /* FRINT32X */ 6411 gen_fpst = gen_helper_frint32_d; 6412 break; 6413 case 0x12: /* FRINT64Z */ 6414 rmode = float_round_to_zero; 6415 gen_fpst = gen_helper_frint64_d; 6416 break; 6417 case 0x13: /* FRINT64X */ 6418 gen_fpst = gen_helper_frint64_d; 6419 break; 6420 default: 6421 g_assert_not_reached(); 6422 } 6423 6424 fpst = fpstatus_ptr(FPST_FPCR); 6425 if (rmode >= 0) { 6426 TCGv_i32 tcg_rmode = tcg_const_i32(rmode); 6427 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6428 gen_fpst(tcg_res, tcg_op, fpst); 6429 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6430 tcg_temp_free_i32(tcg_rmode); 6431 } else { 6432 gen_fpst(tcg_res, tcg_op, fpst); 6433 } 6434 tcg_temp_free_ptr(fpst); 6435 6436 done: 6437 write_fp_dreg(s, rd, tcg_res); 6438 tcg_temp_free_i64(tcg_op); 6439 tcg_temp_free_i64(tcg_res); 6440 } 6441 6442 static void handle_fp_fcvt(DisasContext *s, int opcode, 6443 int rd, int rn, int dtype, int ntype) 6444 { 6445 switch (ntype) { 6446 case 0x0: 6447 { 6448 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6449 if (dtype == 1) { 6450 /* Single to double */ 6451 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6452 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); 6453 write_fp_dreg(s, rd, tcg_rd); 6454 tcg_temp_free_i64(tcg_rd); 6455 } else { 6456 /* Single to half */ 6457 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6458 TCGv_i32 ahp = get_ahp_flag(); 6459 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6460 6461 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6462 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6463 write_fp_sreg(s, rd, tcg_rd); 6464 tcg_temp_free_i32(tcg_rd); 6465 tcg_temp_free_i32(ahp); 6466 tcg_temp_free_ptr(fpst); 6467 } 6468 tcg_temp_free_i32(tcg_rn); 6469 break; 6470 } 6471 case 0x1: 6472 { 6473 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 6474 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6475 if (dtype == 0) { 6476 /* Double to single */ 6477 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); 6478 } else { 6479 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6480 TCGv_i32 ahp = get_ahp_flag(); 6481 /* Double to half */ 6482 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6483 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6484 tcg_temp_free_ptr(fpst); 6485 tcg_temp_free_i32(ahp); 6486 } 6487 write_fp_sreg(s, rd, tcg_rd); 6488 tcg_temp_free_i32(tcg_rd); 6489 tcg_temp_free_i64(tcg_rn); 6490 break; 6491 } 6492 case 0x3: 6493 { 6494 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6495 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6496 TCGv_i32 tcg_ahp = get_ahp_flag(); 6497 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6498 if (dtype == 0) { 6499 /* Half to single */ 6500 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6501 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6502 write_fp_sreg(s, rd, tcg_rd); 6503 tcg_temp_free_i32(tcg_rd); 6504 } else { 6505 /* Half to double */ 6506 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6507 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6508 write_fp_dreg(s, rd, tcg_rd); 6509 tcg_temp_free_i64(tcg_rd); 6510 } 6511 tcg_temp_free_i32(tcg_rn); 6512 tcg_temp_free_ptr(tcg_fpst); 6513 tcg_temp_free_i32(tcg_ahp); 6514 break; 6515 } 6516 default: 6517 g_assert_not_reached(); 6518 } 6519 } 6520 6521 /* Floating point data-processing (1 source) 6522 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6523 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6524 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6525 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6526 */ 6527 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6528 { 6529 int mos = extract32(insn, 29, 3); 6530 int type = extract32(insn, 22, 2); 6531 int opcode = extract32(insn, 15, 6); 6532 int rn = extract32(insn, 5, 5); 6533 int rd = extract32(insn, 0, 5); 6534 6535 if (mos) { 6536 goto do_unallocated; 6537 } 6538 6539 switch (opcode) { 6540 case 0x4: case 0x5: case 0x7: 6541 { 6542 /* FCVT between half, single and double precision */ 6543 int dtype = extract32(opcode, 0, 2); 6544 if (type == 2 || dtype == type) { 6545 goto do_unallocated; 6546 } 6547 if (!fp_access_check(s)) { 6548 return; 6549 } 6550 6551 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6552 break; 6553 } 6554 6555 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6556 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6557 goto do_unallocated; 6558 } 6559 /* fall through */ 6560 case 0x0 ... 0x3: 6561 case 0x8 ... 0xc: 6562 case 0xe ... 0xf: 6563 /* 32-to-32 and 64-to-64 ops */ 6564 switch (type) { 6565 case 0: 6566 if (!fp_access_check(s)) { 6567 return; 6568 } 6569 handle_fp_1src_single(s, opcode, rd, rn); 6570 break; 6571 case 1: 6572 if (!fp_access_check(s)) { 6573 return; 6574 } 6575 handle_fp_1src_double(s, opcode, rd, rn); 6576 break; 6577 case 3: 6578 if (!dc_isar_feature(aa64_fp16, s)) { 6579 goto do_unallocated; 6580 } 6581 6582 if (!fp_access_check(s)) { 6583 return; 6584 } 6585 handle_fp_1src_half(s, opcode, rd, rn); 6586 break; 6587 default: 6588 goto do_unallocated; 6589 } 6590 break; 6591 6592 case 0x6: 6593 switch (type) { 6594 case 1: /* BFCVT */ 6595 if (!dc_isar_feature(aa64_bf16, s)) { 6596 goto do_unallocated; 6597 } 6598 if (!fp_access_check(s)) { 6599 return; 6600 } 6601 handle_fp_1src_single(s, opcode, rd, rn); 6602 break; 6603 default: 6604 goto do_unallocated; 6605 } 6606 break; 6607 6608 default: 6609 do_unallocated: 6610 unallocated_encoding(s); 6611 break; 6612 } 6613 } 6614 6615 /* Floating-point data-processing (2 source) - single precision */ 6616 static void handle_fp_2src_single(DisasContext *s, int opcode, 6617 int rd, int rn, int rm) 6618 { 6619 TCGv_i32 tcg_op1; 6620 TCGv_i32 tcg_op2; 6621 TCGv_i32 tcg_res; 6622 TCGv_ptr fpst; 6623 6624 tcg_res = tcg_temp_new_i32(); 6625 fpst = fpstatus_ptr(FPST_FPCR); 6626 tcg_op1 = read_fp_sreg(s, rn); 6627 tcg_op2 = read_fp_sreg(s, rm); 6628 6629 switch (opcode) { 6630 case 0x0: /* FMUL */ 6631 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6632 break; 6633 case 0x1: /* FDIV */ 6634 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6635 break; 6636 case 0x2: /* FADD */ 6637 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6638 break; 6639 case 0x3: /* FSUB */ 6640 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6641 break; 6642 case 0x4: /* FMAX */ 6643 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6644 break; 6645 case 0x5: /* FMIN */ 6646 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6647 break; 6648 case 0x6: /* FMAXNM */ 6649 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6650 break; 6651 case 0x7: /* FMINNM */ 6652 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6653 break; 6654 case 0x8: /* FNMUL */ 6655 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6656 gen_helper_vfp_negs(tcg_res, tcg_res); 6657 break; 6658 } 6659 6660 write_fp_sreg(s, rd, tcg_res); 6661 6662 tcg_temp_free_ptr(fpst); 6663 tcg_temp_free_i32(tcg_op1); 6664 tcg_temp_free_i32(tcg_op2); 6665 tcg_temp_free_i32(tcg_res); 6666 } 6667 6668 /* Floating-point data-processing (2 source) - double precision */ 6669 static void handle_fp_2src_double(DisasContext *s, int opcode, 6670 int rd, int rn, int rm) 6671 { 6672 TCGv_i64 tcg_op1; 6673 TCGv_i64 tcg_op2; 6674 TCGv_i64 tcg_res; 6675 TCGv_ptr fpst; 6676 6677 tcg_res = tcg_temp_new_i64(); 6678 fpst = fpstatus_ptr(FPST_FPCR); 6679 tcg_op1 = read_fp_dreg(s, rn); 6680 tcg_op2 = read_fp_dreg(s, rm); 6681 6682 switch (opcode) { 6683 case 0x0: /* FMUL */ 6684 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6685 break; 6686 case 0x1: /* FDIV */ 6687 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6688 break; 6689 case 0x2: /* FADD */ 6690 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6691 break; 6692 case 0x3: /* FSUB */ 6693 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6694 break; 6695 case 0x4: /* FMAX */ 6696 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6697 break; 6698 case 0x5: /* FMIN */ 6699 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6700 break; 6701 case 0x6: /* FMAXNM */ 6702 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6703 break; 6704 case 0x7: /* FMINNM */ 6705 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6706 break; 6707 case 0x8: /* FNMUL */ 6708 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6709 gen_helper_vfp_negd(tcg_res, tcg_res); 6710 break; 6711 } 6712 6713 write_fp_dreg(s, rd, tcg_res); 6714 6715 tcg_temp_free_ptr(fpst); 6716 tcg_temp_free_i64(tcg_op1); 6717 tcg_temp_free_i64(tcg_op2); 6718 tcg_temp_free_i64(tcg_res); 6719 } 6720 6721 /* Floating-point data-processing (2 source) - half precision */ 6722 static void handle_fp_2src_half(DisasContext *s, int opcode, 6723 int rd, int rn, int rm) 6724 { 6725 TCGv_i32 tcg_op1; 6726 TCGv_i32 tcg_op2; 6727 TCGv_i32 tcg_res; 6728 TCGv_ptr fpst; 6729 6730 tcg_res = tcg_temp_new_i32(); 6731 fpst = fpstatus_ptr(FPST_FPCR_F16); 6732 tcg_op1 = read_fp_hreg(s, rn); 6733 tcg_op2 = read_fp_hreg(s, rm); 6734 6735 switch (opcode) { 6736 case 0x0: /* FMUL */ 6737 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6738 break; 6739 case 0x1: /* FDIV */ 6740 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6741 break; 6742 case 0x2: /* FADD */ 6743 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6744 break; 6745 case 0x3: /* FSUB */ 6746 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6747 break; 6748 case 0x4: /* FMAX */ 6749 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6750 break; 6751 case 0x5: /* FMIN */ 6752 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6753 break; 6754 case 0x6: /* FMAXNM */ 6755 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6756 break; 6757 case 0x7: /* FMINNM */ 6758 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6759 break; 6760 case 0x8: /* FNMUL */ 6761 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6762 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6763 break; 6764 default: 6765 g_assert_not_reached(); 6766 } 6767 6768 write_fp_sreg(s, rd, tcg_res); 6769 6770 tcg_temp_free_ptr(fpst); 6771 tcg_temp_free_i32(tcg_op1); 6772 tcg_temp_free_i32(tcg_op2); 6773 tcg_temp_free_i32(tcg_res); 6774 } 6775 6776 /* Floating point data-processing (2 source) 6777 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6778 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6779 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6780 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6781 */ 6782 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6783 { 6784 int mos = extract32(insn, 29, 3); 6785 int type = extract32(insn, 22, 2); 6786 int rd = extract32(insn, 0, 5); 6787 int rn = extract32(insn, 5, 5); 6788 int rm = extract32(insn, 16, 5); 6789 int opcode = extract32(insn, 12, 4); 6790 6791 if (opcode > 8 || mos) { 6792 unallocated_encoding(s); 6793 return; 6794 } 6795 6796 switch (type) { 6797 case 0: 6798 if (!fp_access_check(s)) { 6799 return; 6800 } 6801 handle_fp_2src_single(s, opcode, rd, rn, rm); 6802 break; 6803 case 1: 6804 if (!fp_access_check(s)) { 6805 return; 6806 } 6807 handle_fp_2src_double(s, opcode, rd, rn, rm); 6808 break; 6809 case 3: 6810 if (!dc_isar_feature(aa64_fp16, s)) { 6811 unallocated_encoding(s); 6812 return; 6813 } 6814 if (!fp_access_check(s)) { 6815 return; 6816 } 6817 handle_fp_2src_half(s, opcode, rd, rn, rm); 6818 break; 6819 default: 6820 unallocated_encoding(s); 6821 } 6822 } 6823 6824 /* Floating-point data-processing (3 source) - single precision */ 6825 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6826 int rd, int rn, int rm, int ra) 6827 { 6828 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6829 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6830 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6831 6832 tcg_op1 = read_fp_sreg(s, rn); 6833 tcg_op2 = read_fp_sreg(s, rm); 6834 tcg_op3 = read_fp_sreg(s, ra); 6835 6836 /* These are fused multiply-add, and must be done as one 6837 * floating point operation with no rounding between the 6838 * multiplication and addition steps. 6839 * NB that doing the negations here as separate steps is 6840 * correct : an input NaN should come out with its sign bit 6841 * flipped if it is a negated-input. 6842 */ 6843 if (o1 == true) { 6844 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6845 } 6846 6847 if (o0 != o1) { 6848 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6849 } 6850 6851 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6852 6853 write_fp_sreg(s, rd, tcg_res); 6854 6855 tcg_temp_free_ptr(fpst); 6856 tcg_temp_free_i32(tcg_op1); 6857 tcg_temp_free_i32(tcg_op2); 6858 tcg_temp_free_i32(tcg_op3); 6859 tcg_temp_free_i32(tcg_res); 6860 } 6861 6862 /* Floating-point data-processing (3 source) - double precision */ 6863 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6864 int rd, int rn, int rm, int ra) 6865 { 6866 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6867 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6868 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6869 6870 tcg_op1 = read_fp_dreg(s, rn); 6871 tcg_op2 = read_fp_dreg(s, rm); 6872 tcg_op3 = read_fp_dreg(s, ra); 6873 6874 /* These are fused multiply-add, and must be done as one 6875 * floating point operation with no rounding between the 6876 * multiplication and addition steps. 6877 * NB that doing the negations here as separate steps is 6878 * correct : an input NaN should come out with its sign bit 6879 * flipped if it is a negated-input. 6880 */ 6881 if (o1 == true) { 6882 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6883 } 6884 6885 if (o0 != o1) { 6886 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6887 } 6888 6889 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6890 6891 write_fp_dreg(s, rd, tcg_res); 6892 6893 tcg_temp_free_ptr(fpst); 6894 tcg_temp_free_i64(tcg_op1); 6895 tcg_temp_free_i64(tcg_op2); 6896 tcg_temp_free_i64(tcg_op3); 6897 tcg_temp_free_i64(tcg_res); 6898 } 6899 6900 /* Floating-point data-processing (3 source) - half precision */ 6901 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6902 int rd, int rn, int rm, int ra) 6903 { 6904 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6905 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6906 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6907 6908 tcg_op1 = read_fp_hreg(s, rn); 6909 tcg_op2 = read_fp_hreg(s, rm); 6910 tcg_op3 = read_fp_hreg(s, ra); 6911 6912 /* These are fused multiply-add, and must be done as one 6913 * floating point operation with no rounding between the 6914 * multiplication and addition steps. 6915 * NB that doing the negations here as separate steps is 6916 * correct : an input NaN should come out with its sign bit 6917 * flipped if it is a negated-input. 6918 */ 6919 if (o1 == true) { 6920 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6921 } 6922 6923 if (o0 != o1) { 6924 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6925 } 6926 6927 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6928 6929 write_fp_sreg(s, rd, tcg_res); 6930 6931 tcg_temp_free_ptr(fpst); 6932 tcg_temp_free_i32(tcg_op1); 6933 tcg_temp_free_i32(tcg_op2); 6934 tcg_temp_free_i32(tcg_op3); 6935 tcg_temp_free_i32(tcg_res); 6936 } 6937 6938 /* Floating point data-processing (3 source) 6939 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6940 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6941 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6942 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6943 */ 6944 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6945 { 6946 int mos = extract32(insn, 29, 3); 6947 int type = extract32(insn, 22, 2); 6948 int rd = extract32(insn, 0, 5); 6949 int rn = extract32(insn, 5, 5); 6950 int ra = extract32(insn, 10, 5); 6951 int rm = extract32(insn, 16, 5); 6952 bool o0 = extract32(insn, 15, 1); 6953 bool o1 = extract32(insn, 21, 1); 6954 6955 if (mos) { 6956 unallocated_encoding(s); 6957 return; 6958 } 6959 6960 switch (type) { 6961 case 0: 6962 if (!fp_access_check(s)) { 6963 return; 6964 } 6965 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 6966 break; 6967 case 1: 6968 if (!fp_access_check(s)) { 6969 return; 6970 } 6971 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 6972 break; 6973 case 3: 6974 if (!dc_isar_feature(aa64_fp16, s)) { 6975 unallocated_encoding(s); 6976 return; 6977 } 6978 if (!fp_access_check(s)) { 6979 return; 6980 } 6981 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 6982 break; 6983 default: 6984 unallocated_encoding(s); 6985 } 6986 } 6987 6988 /* Floating point immediate 6989 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 6990 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6991 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 6992 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6993 */ 6994 static void disas_fp_imm(DisasContext *s, uint32_t insn) 6995 { 6996 int rd = extract32(insn, 0, 5); 6997 int imm5 = extract32(insn, 5, 5); 6998 int imm8 = extract32(insn, 13, 8); 6999 int type = extract32(insn, 22, 2); 7000 int mos = extract32(insn, 29, 3); 7001 uint64_t imm; 7002 MemOp sz; 7003 7004 if (mos || imm5) { 7005 unallocated_encoding(s); 7006 return; 7007 } 7008 7009 switch (type) { 7010 case 0: 7011 sz = MO_32; 7012 break; 7013 case 1: 7014 sz = MO_64; 7015 break; 7016 case 3: 7017 sz = MO_16; 7018 if (dc_isar_feature(aa64_fp16, s)) { 7019 break; 7020 } 7021 /* fallthru */ 7022 default: 7023 unallocated_encoding(s); 7024 return; 7025 } 7026 7027 if (!fp_access_check(s)) { 7028 return; 7029 } 7030 7031 imm = vfp_expand_imm(sz, imm8); 7032 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 7033 } 7034 7035 /* Handle floating point <=> fixed point conversions. Note that we can 7036 * also deal with fp <=> integer conversions as a special case (scale == 64) 7037 * OPTME: consider handling that special case specially or at least skipping 7038 * the call to scalbn in the helpers for zero shifts. 7039 */ 7040 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 7041 bool itof, int rmode, int scale, int sf, int type) 7042 { 7043 bool is_signed = !(opcode & 1); 7044 TCGv_ptr tcg_fpstatus; 7045 TCGv_i32 tcg_shift, tcg_single; 7046 TCGv_i64 tcg_double; 7047 7048 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 7049 7050 tcg_shift = tcg_constant_i32(64 - scale); 7051 7052 if (itof) { 7053 TCGv_i64 tcg_int = cpu_reg(s, rn); 7054 if (!sf) { 7055 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 7056 7057 if (is_signed) { 7058 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 7059 } else { 7060 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 7061 } 7062 7063 tcg_int = tcg_extend; 7064 } 7065 7066 switch (type) { 7067 case 1: /* float64 */ 7068 tcg_double = tcg_temp_new_i64(); 7069 if (is_signed) { 7070 gen_helper_vfp_sqtod(tcg_double, tcg_int, 7071 tcg_shift, tcg_fpstatus); 7072 } else { 7073 gen_helper_vfp_uqtod(tcg_double, tcg_int, 7074 tcg_shift, tcg_fpstatus); 7075 } 7076 write_fp_dreg(s, rd, tcg_double); 7077 tcg_temp_free_i64(tcg_double); 7078 break; 7079 7080 case 0: /* float32 */ 7081 tcg_single = tcg_temp_new_i32(); 7082 if (is_signed) { 7083 gen_helper_vfp_sqtos(tcg_single, tcg_int, 7084 tcg_shift, tcg_fpstatus); 7085 } else { 7086 gen_helper_vfp_uqtos(tcg_single, tcg_int, 7087 tcg_shift, tcg_fpstatus); 7088 } 7089 write_fp_sreg(s, rd, tcg_single); 7090 tcg_temp_free_i32(tcg_single); 7091 break; 7092 7093 case 3: /* float16 */ 7094 tcg_single = tcg_temp_new_i32(); 7095 if (is_signed) { 7096 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 7097 tcg_shift, tcg_fpstatus); 7098 } else { 7099 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 7100 tcg_shift, tcg_fpstatus); 7101 } 7102 write_fp_sreg(s, rd, tcg_single); 7103 tcg_temp_free_i32(tcg_single); 7104 break; 7105 7106 default: 7107 g_assert_not_reached(); 7108 } 7109 } else { 7110 TCGv_i64 tcg_int = cpu_reg(s, rd); 7111 TCGv_i32 tcg_rmode; 7112 7113 if (extract32(opcode, 2, 1)) { 7114 /* There are too many rounding modes to all fit into rmode, 7115 * so FCVTA[US] is a special case. 7116 */ 7117 rmode = FPROUNDING_TIEAWAY; 7118 } 7119 7120 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 7121 7122 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7123 7124 switch (type) { 7125 case 1: /* float64 */ 7126 tcg_double = read_fp_dreg(s, rn); 7127 if (is_signed) { 7128 if (!sf) { 7129 gen_helper_vfp_tosld(tcg_int, tcg_double, 7130 tcg_shift, tcg_fpstatus); 7131 } else { 7132 gen_helper_vfp_tosqd(tcg_int, tcg_double, 7133 tcg_shift, tcg_fpstatus); 7134 } 7135 } else { 7136 if (!sf) { 7137 gen_helper_vfp_tould(tcg_int, tcg_double, 7138 tcg_shift, tcg_fpstatus); 7139 } else { 7140 gen_helper_vfp_touqd(tcg_int, tcg_double, 7141 tcg_shift, tcg_fpstatus); 7142 } 7143 } 7144 if (!sf) { 7145 tcg_gen_ext32u_i64(tcg_int, tcg_int); 7146 } 7147 tcg_temp_free_i64(tcg_double); 7148 break; 7149 7150 case 0: /* float32 */ 7151 tcg_single = read_fp_sreg(s, rn); 7152 if (sf) { 7153 if (is_signed) { 7154 gen_helper_vfp_tosqs(tcg_int, tcg_single, 7155 tcg_shift, tcg_fpstatus); 7156 } else { 7157 gen_helper_vfp_touqs(tcg_int, tcg_single, 7158 tcg_shift, tcg_fpstatus); 7159 } 7160 } else { 7161 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7162 if (is_signed) { 7163 gen_helper_vfp_tosls(tcg_dest, tcg_single, 7164 tcg_shift, tcg_fpstatus); 7165 } else { 7166 gen_helper_vfp_touls(tcg_dest, tcg_single, 7167 tcg_shift, tcg_fpstatus); 7168 } 7169 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7170 tcg_temp_free_i32(tcg_dest); 7171 } 7172 tcg_temp_free_i32(tcg_single); 7173 break; 7174 7175 case 3: /* float16 */ 7176 tcg_single = read_fp_sreg(s, rn); 7177 if (sf) { 7178 if (is_signed) { 7179 gen_helper_vfp_tosqh(tcg_int, tcg_single, 7180 tcg_shift, tcg_fpstatus); 7181 } else { 7182 gen_helper_vfp_touqh(tcg_int, tcg_single, 7183 tcg_shift, tcg_fpstatus); 7184 } 7185 } else { 7186 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7187 if (is_signed) { 7188 gen_helper_vfp_toslh(tcg_dest, tcg_single, 7189 tcg_shift, tcg_fpstatus); 7190 } else { 7191 gen_helper_vfp_toulh(tcg_dest, tcg_single, 7192 tcg_shift, tcg_fpstatus); 7193 } 7194 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7195 tcg_temp_free_i32(tcg_dest); 7196 } 7197 tcg_temp_free_i32(tcg_single); 7198 break; 7199 7200 default: 7201 g_assert_not_reached(); 7202 } 7203 7204 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7205 tcg_temp_free_i32(tcg_rmode); 7206 } 7207 7208 tcg_temp_free_ptr(tcg_fpstatus); 7209 } 7210 7211 /* Floating point <-> fixed point conversions 7212 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7213 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7214 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 7215 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7216 */ 7217 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 7218 { 7219 int rd = extract32(insn, 0, 5); 7220 int rn = extract32(insn, 5, 5); 7221 int scale = extract32(insn, 10, 6); 7222 int opcode = extract32(insn, 16, 3); 7223 int rmode = extract32(insn, 19, 2); 7224 int type = extract32(insn, 22, 2); 7225 bool sbit = extract32(insn, 29, 1); 7226 bool sf = extract32(insn, 31, 1); 7227 bool itof; 7228 7229 if (sbit || (!sf && scale < 32)) { 7230 unallocated_encoding(s); 7231 return; 7232 } 7233 7234 switch (type) { 7235 case 0: /* float32 */ 7236 case 1: /* float64 */ 7237 break; 7238 case 3: /* float16 */ 7239 if (dc_isar_feature(aa64_fp16, s)) { 7240 break; 7241 } 7242 /* fallthru */ 7243 default: 7244 unallocated_encoding(s); 7245 return; 7246 } 7247 7248 switch ((rmode << 3) | opcode) { 7249 case 0x2: /* SCVTF */ 7250 case 0x3: /* UCVTF */ 7251 itof = true; 7252 break; 7253 case 0x18: /* FCVTZS */ 7254 case 0x19: /* FCVTZU */ 7255 itof = false; 7256 break; 7257 default: 7258 unallocated_encoding(s); 7259 return; 7260 } 7261 7262 if (!fp_access_check(s)) { 7263 return; 7264 } 7265 7266 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 7267 } 7268 7269 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 7270 { 7271 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 7272 * without conversion. 7273 */ 7274 7275 if (itof) { 7276 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7277 TCGv_i64 tmp; 7278 7279 switch (type) { 7280 case 0: 7281 /* 32 bit */ 7282 tmp = tcg_temp_new_i64(); 7283 tcg_gen_ext32u_i64(tmp, tcg_rn); 7284 write_fp_dreg(s, rd, tmp); 7285 tcg_temp_free_i64(tmp); 7286 break; 7287 case 1: 7288 /* 64 bit */ 7289 write_fp_dreg(s, rd, tcg_rn); 7290 break; 7291 case 2: 7292 /* 64 bit to top half. */ 7293 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); 7294 clear_vec_high(s, true, rd); 7295 break; 7296 case 3: 7297 /* 16 bit */ 7298 tmp = tcg_temp_new_i64(); 7299 tcg_gen_ext16u_i64(tmp, tcg_rn); 7300 write_fp_dreg(s, rd, tmp); 7301 tcg_temp_free_i64(tmp); 7302 break; 7303 default: 7304 g_assert_not_reached(); 7305 } 7306 } else { 7307 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7308 7309 switch (type) { 7310 case 0: 7311 /* 32 bit */ 7312 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); 7313 break; 7314 case 1: 7315 /* 64 bit */ 7316 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); 7317 break; 7318 case 2: 7319 /* 64 bits from top half */ 7320 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); 7321 break; 7322 case 3: 7323 /* 16 bit */ 7324 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); 7325 break; 7326 default: 7327 g_assert_not_reached(); 7328 } 7329 } 7330 } 7331 7332 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 7333 { 7334 TCGv_i64 t = read_fp_dreg(s, rn); 7335 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 7336 7337 gen_helper_fjcvtzs(t, t, fpstatus); 7338 7339 tcg_temp_free_ptr(fpstatus); 7340 7341 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 7342 tcg_gen_extrh_i64_i32(cpu_ZF, t); 7343 tcg_gen_movi_i32(cpu_CF, 0); 7344 tcg_gen_movi_i32(cpu_NF, 0); 7345 tcg_gen_movi_i32(cpu_VF, 0); 7346 7347 tcg_temp_free_i64(t); 7348 } 7349 7350 /* Floating point <-> integer conversions 7351 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7352 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7353 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 7354 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7355 */ 7356 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 7357 { 7358 int rd = extract32(insn, 0, 5); 7359 int rn = extract32(insn, 5, 5); 7360 int opcode = extract32(insn, 16, 3); 7361 int rmode = extract32(insn, 19, 2); 7362 int type = extract32(insn, 22, 2); 7363 bool sbit = extract32(insn, 29, 1); 7364 bool sf = extract32(insn, 31, 1); 7365 bool itof = false; 7366 7367 if (sbit) { 7368 goto do_unallocated; 7369 } 7370 7371 switch (opcode) { 7372 case 2: /* SCVTF */ 7373 case 3: /* UCVTF */ 7374 itof = true; 7375 /* fallthru */ 7376 case 4: /* FCVTAS */ 7377 case 5: /* FCVTAU */ 7378 if (rmode != 0) { 7379 goto do_unallocated; 7380 } 7381 /* fallthru */ 7382 case 0: /* FCVT[NPMZ]S */ 7383 case 1: /* FCVT[NPMZ]U */ 7384 switch (type) { 7385 case 0: /* float32 */ 7386 case 1: /* float64 */ 7387 break; 7388 case 3: /* float16 */ 7389 if (!dc_isar_feature(aa64_fp16, s)) { 7390 goto do_unallocated; 7391 } 7392 break; 7393 default: 7394 goto do_unallocated; 7395 } 7396 if (!fp_access_check(s)) { 7397 return; 7398 } 7399 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 7400 break; 7401 7402 default: 7403 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 7404 case 0b01100110: /* FMOV half <-> 32-bit int */ 7405 case 0b01100111: 7406 case 0b11100110: /* FMOV half <-> 64-bit int */ 7407 case 0b11100111: 7408 if (!dc_isar_feature(aa64_fp16, s)) { 7409 goto do_unallocated; 7410 } 7411 /* fallthru */ 7412 case 0b00000110: /* FMOV 32-bit */ 7413 case 0b00000111: 7414 case 0b10100110: /* FMOV 64-bit */ 7415 case 0b10100111: 7416 case 0b11001110: /* FMOV top half of 128-bit */ 7417 case 0b11001111: 7418 if (!fp_access_check(s)) { 7419 return; 7420 } 7421 itof = opcode & 1; 7422 handle_fmov(s, rd, rn, type, itof); 7423 break; 7424 7425 case 0b00111110: /* FJCVTZS */ 7426 if (!dc_isar_feature(aa64_jscvt, s)) { 7427 goto do_unallocated; 7428 } else if (fp_access_check(s)) { 7429 handle_fjcvtzs(s, rd, rn); 7430 } 7431 break; 7432 7433 default: 7434 do_unallocated: 7435 unallocated_encoding(s); 7436 return; 7437 } 7438 break; 7439 } 7440 } 7441 7442 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 7443 * 31 30 29 28 25 24 0 7444 * +---+---+---+---------+-----------------------------+ 7445 * | | 0 | | 1 1 1 1 | | 7446 * +---+---+---+---------+-----------------------------+ 7447 */ 7448 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 7449 { 7450 if (extract32(insn, 24, 1)) { 7451 /* Floating point data-processing (3 source) */ 7452 disas_fp_3src(s, insn); 7453 } else if (extract32(insn, 21, 1) == 0) { 7454 /* Floating point to fixed point conversions */ 7455 disas_fp_fixed_conv(s, insn); 7456 } else { 7457 switch (extract32(insn, 10, 2)) { 7458 case 1: 7459 /* Floating point conditional compare */ 7460 disas_fp_ccomp(s, insn); 7461 break; 7462 case 2: 7463 /* Floating point data-processing (2 source) */ 7464 disas_fp_2src(s, insn); 7465 break; 7466 case 3: 7467 /* Floating point conditional select */ 7468 disas_fp_csel(s, insn); 7469 break; 7470 case 0: 7471 switch (ctz32(extract32(insn, 12, 4))) { 7472 case 0: /* [15:12] == xxx1 */ 7473 /* Floating point immediate */ 7474 disas_fp_imm(s, insn); 7475 break; 7476 case 1: /* [15:12] == xx10 */ 7477 /* Floating point compare */ 7478 disas_fp_compare(s, insn); 7479 break; 7480 case 2: /* [15:12] == x100 */ 7481 /* Floating point data-processing (1 source) */ 7482 disas_fp_1src(s, insn); 7483 break; 7484 case 3: /* [15:12] == 1000 */ 7485 unallocated_encoding(s); 7486 break; 7487 default: /* [15:12] == 0000 */ 7488 /* Floating point <-> integer conversions */ 7489 disas_fp_int_conv(s, insn); 7490 break; 7491 } 7492 break; 7493 } 7494 } 7495 } 7496 7497 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 7498 int pos) 7499 { 7500 /* Extract 64 bits from the middle of two concatenated 64 bit 7501 * vector register slices left:right. The extracted bits start 7502 * at 'pos' bits into the right (least significant) side. 7503 * We return the result in tcg_right, and guarantee not to 7504 * trash tcg_left. 7505 */ 7506 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7507 assert(pos > 0 && pos < 64); 7508 7509 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 7510 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 7511 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 7512 7513 tcg_temp_free_i64(tcg_tmp); 7514 } 7515 7516 /* EXT 7517 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 7518 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7519 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 7520 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7521 */ 7522 static void disas_simd_ext(DisasContext *s, uint32_t insn) 7523 { 7524 int is_q = extract32(insn, 30, 1); 7525 int op2 = extract32(insn, 22, 2); 7526 int imm4 = extract32(insn, 11, 4); 7527 int rm = extract32(insn, 16, 5); 7528 int rn = extract32(insn, 5, 5); 7529 int rd = extract32(insn, 0, 5); 7530 int pos = imm4 << 3; 7531 TCGv_i64 tcg_resl, tcg_resh; 7532 7533 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 7534 unallocated_encoding(s); 7535 return; 7536 } 7537 7538 if (!fp_access_check(s)) { 7539 return; 7540 } 7541 7542 tcg_resh = tcg_temp_new_i64(); 7543 tcg_resl = tcg_temp_new_i64(); 7544 7545 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7546 * either extracting 128 bits from a 128:128 concatenation, or 7547 * extracting 64 bits from a 64:64 concatenation. 7548 */ 7549 if (!is_q) { 7550 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7551 if (pos != 0) { 7552 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7553 do_ext64(s, tcg_resh, tcg_resl, pos); 7554 } 7555 } else { 7556 TCGv_i64 tcg_hh; 7557 typedef struct { 7558 int reg; 7559 int elt; 7560 } EltPosns; 7561 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7562 EltPosns *elt = eltposns; 7563 7564 if (pos >= 64) { 7565 elt++; 7566 pos -= 64; 7567 } 7568 7569 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7570 elt++; 7571 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7572 elt++; 7573 if (pos != 0) { 7574 do_ext64(s, tcg_resh, tcg_resl, pos); 7575 tcg_hh = tcg_temp_new_i64(); 7576 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7577 do_ext64(s, tcg_hh, tcg_resh, pos); 7578 tcg_temp_free_i64(tcg_hh); 7579 } 7580 } 7581 7582 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7583 tcg_temp_free_i64(tcg_resl); 7584 if (is_q) { 7585 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7586 } 7587 tcg_temp_free_i64(tcg_resh); 7588 clear_vec_high(s, is_q, rd); 7589 } 7590 7591 /* TBL/TBX 7592 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7593 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7594 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7595 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7596 */ 7597 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7598 { 7599 int op2 = extract32(insn, 22, 2); 7600 int is_q = extract32(insn, 30, 1); 7601 int rm = extract32(insn, 16, 5); 7602 int rn = extract32(insn, 5, 5); 7603 int rd = extract32(insn, 0, 5); 7604 int is_tbx = extract32(insn, 12, 1); 7605 int len = (extract32(insn, 13, 2) + 1) * 16; 7606 7607 if (op2 != 0) { 7608 unallocated_encoding(s); 7609 return; 7610 } 7611 7612 if (!fp_access_check(s)) { 7613 return; 7614 } 7615 7616 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7617 vec_full_reg_offset(s, rm), cpu_env, 7618 is_q ? 16 : 8, vec_full_reg_size(s), 7619 (len << 6) | (is_tbx << 5) | rn, 7620 gen_helper_simd_tblx); 7621 } 7622 7623 /* ZIP/UZP/TRN 7624 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7625 * +---+---+-------------+------+---+------+---+------------------+------+ 7626 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7627 * +---+---+-------------+------+---+------+---+------------------+------+ 7628 */ 7629 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7630 { 7631 int rd = extract32(insn, 0, 5); 7632 int rn = extract32(insn, 5, 5); 7633 int rm = extract32(insn, 16, 5); 7634 int size = extract32(insn, 22, 2); 7635 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7636 * bit 2 indicates 1 vs 2 variant of the insn. 7637 */ 7638 int opcode = extract32(insn, 12, 2); 7639 bool part = extract32(insn, 14, 1); 7640 bool is_q = extract32(insn, 30, 1); 7641 int esize = 8 << size; 7642 int i, ofs; 7643 int datasize = is_q ? 128 : 64; 7644 int elements = datasize / esize; 7645 TCGv_i64 tcg_res, tcg_resl, tcg_resh; 7646 7647 if (opcode == 0 || (size == 3 && !is_q)) { 7648 unallocated_encoding(s); 7649 return; 7650 } 7651 7652 if (!fp_access_check(s)) { 7653 return; 7654 } 7655 7656 tcg_resl = tcg_const_i64(0); 7657 tcg_resh = is_q ? tcg_const_i64(0) : NULL; 7658 tcg_res = tcg_temp_new_i64(); 7659 7660 for (i = 0; i < elements; i++) { 7661 switch (opcode) { 7662 case 1: /* UZP1/2 */ 7663 { 7664 int midpoint = elements / 2; 7665 if (i < midpoint) { 7666 read_vec_element(s, tcg_res, rn, 2 * i + part, size); 7667 } else { 7668 read_vec_element(s, tcg_res, rm, 7669 2 * (i - midpoint) + part, size); 7670 } 7671 break; 7672 } 7673 case 2: /* TRN1/2 */ 7674 if (i & 1) { 7675 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size); 7676 } else { 7677 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size); 7678 } 7679 break; 7680 case 3: /* ZIP1/2 */ 7681 { 7682 int base = part * elements / 2; 7683 if (i & 1) { 7684 read_vec_element(s, tcg_res, rm, base + (i >> 1), size); 7685 } else { 7686 read_vec_element(s, tcg_res, rn, base + (i >> 1), size); 7687 } 7688 break; 7689 } 7690 default: 7691 g_assert_not_reached(); 7692 } 7693 7694 ofs = i * esize; 7695 if (ofs < 64) { 7696 tcg_gen_shli_i64(tcg_res, tcg_res, ofs); 7697 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res); 7698 } else { 7699 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64); 7700 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res); 7701 } 7702 } 7703 7704 tcg_temp_free_i64(tcg_res); 7705 7706 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7707 tcg_temp_free_i64(tcg_resl); 7708 7709 if (is_q) { 7710 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7711 tcg_temp_free_i64(tcg_resh); 7712 } 7713 clear_vec_high(s, is_q, rd); 7714 } 7715 7716 /* 7717 * do_reduction_op helper 7718 * 7719 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7720 * important for correct NaN propagation that we do these 7721 * operations in exactly the order specified by the pseudocode. 7722 * 7723 * This is a recursive function, TCG temps should be freed by the 7724 * calling function once it is done with the values. 7725 */ 7726 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7727 int esize, int size, int vmap, TCGv_ptr fpst) 7728 { 7729 if (esize == size) { 7730 int element; 7731 MemOp msize = esize == 16 ? MO_16 : MO_32; 7732 TCGv_i32 tcg_elem; 7733 7734 /* We should have one register left here */ 7735 assert(ctpop8(vmap) == 1); 7736 element = ctz32(vmap); 7737 assert(element < 8); 7738 7739 tcg_elem = tcg_temp_new_i32(); 7740 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7741 return tcg_elem; 7742 } else { 7743 int bits = size / 2; 7744 int shift = ctpop8(vmap) / 2; 7745 int vmap_lo = (vmap >> shift) & vmap; 7746 int vmap_hi = (vmap & ~vmap_lo); 7747 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7748 7749 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7750 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7751 tcg_res = tcg_temp_new_i32(); 7752 7753 switch (fpopcode) { 7754 case 0x0c: /* fmaxnmv half-precision */ 7755 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7756 break; 7757 case 0x0f: /* fmaxv half-precision */ 7758 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7759 break; 7760 case 0x1c: /* fminnmv half-precision */ 7761 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7762 break; 7763 case 0x1f: /* fminv half-precision */ 7764 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7765 break; 7766 case 0x2c: /* fmaxnmv */ 7767 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7768 break; 7769 case 0x2f: /* fmaxv */ 7770 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7771 break; 7772 case 0x3c: /* fminnmv */ 7773 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7774 break; 7775 case 0x3f: /* fminv */ 7776 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7777 break; 7778 default: 7779 g_assert_not_reached(); 7780 } 7781 7782 tcg_temp_free_i32(tcg_hi); 7783 tcg_temp_free_i32(tcg_lo); 7784 return tcg_res; 7785 } 7786 } 7787 7788 /* AdvSIMD across lanes 7789 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7790 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7791 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7792 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7793 */ 7794 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7795 { 7796 int rd = extract32(insn, 0, 5); 7797 int rn = extract32(insn, 5, 5); 7798 int size = extract32(insn, 22, 2); 7799 int opcode = extract32(insn, 12, 5); 7800 bool is_q = extract32(insn, 30, 1); 7801 bool is_u = extract32(insn, 29, 1); 7802 bool is_fp = false; 7803 bool is_min = false; 7804 int esize; 7805 int elements; 7806 int i; 7807 TCGv_i64 tcg_res, tcg_elt; 7808 7809 switch (opcode) { 7810 case 0x1b: /* ADDV */ 7811 if (is_u) { 7812 unallocated_encoding(s); 7813 return; 7814 } 7815 /* fall through */ 7816 case 0x3: /* SADDLV, UADDLV */ 7817 case 0xa: /* SMAXV, UMAXV */ 7818 case 0x1a: /* SMINV, UMINV */ 7819 if (size == 3 || (size == 2 && !is_q)) { 7820 unallocated_encoding(s); 7821 return; 7822 } 7823 break; 7824 case 0xc: /* FMAXNMV, FMINNMV */ 7825 case 0xf: /* FMAXV, FMINV */ 7826 /* Bit 1 of size field encodes min vs max and the actual size 7827 * depends on the encoding of the U bit. If not set (and FP16 7828 * enabled) then we do half-precision float instead of single 7829 * precision. 7830 */ 7831 is_min = extract32(size, 1, 1); 7832 is_fp = true; 7833 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7834 size = 1; 7835 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7836 unallocated_encoding(s); 7837 return; 7838 } else { 7839 size = 2; 7840 } 7841 break; 7842 default: 7843 unallocated_encoding(s); 7844 return; 7845 } 7846 7847 if (!fp_access_check(s)) { 7848 return; 7849 } 7850 7851 esize = 8 << size; 7852 elements = (is_q ? 128 : 64) / esize; 7853 7854 tcg_res = tcg_temp_new_i64(); 7855 tcg_elt = tcg_temp_new_i64(); 7856 7857 /* These instructions operate across all lanes of a vector 7858 * to produce a single result. We can guarantee that a 64 7859 * bit intermediate is sufficient: 7860 * + for [US]ADDLV the maximum element size is 32 bits, and 7861 * the result type is 64 bits 7862 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7863 * same as the element size, which is 32 bits at most 7864 * For the integer operations we can choose to work at 64 7865 * or 32 bits and truncate at the end; for simplicity 7866 * we use 64 bits always. The floating point 7867 * ops do require 32 bit intermediates, though. 7868 */ 7869 if (!is_fp) { 7870 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7871 7872 for (i = 1; i < elements; i++) { 7873 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7874 7875 switch (opcode) { 7876 case 0x03: /* SADDLV / UADDLV */ 7877 case 0x1b: /* ADDV */ 7878 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7879 break; 7880 case 0x0a: /* SMAXV / UMAXV */ 7881 if (is_u) { 7882 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7883 } else { 7884 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7885 } 7886 break; 7887 case 0x1a: /* SMINV / UMINV */ 7888 if (is_u) { 7889 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7890 } else { 7891 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7892 } 7893 break; 7894 default: 7895 g_assert_not_reached(); 7896 } 7897 7898 } 7899 } else { 7900 /* Floating point vector reduction ops which work across 32 7901 * bit (single) or 16 bit (half-precision) intermediates. 7902 * Note that correct NaN propagation requires that we do these 7903 * operations in exactly the order specified by the pseudocode. 7904 */ 7905 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7906 int fpopcode = opcode | is_min << 4 | is_u << 5; 7907 int vmap = (1 << elements) - 1; 7908 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7909 (is_q ? 128 : 64), vmap, fpst); 7910 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7911 tcg_temp_free_i32(tcg_res32); 7912 tcg_temp_free_ptr(fpst); 7913 } 7914 7915 tcg_temp_free_i64(tcg_elt); 7916 7917 /* Now truncate the result to the width required for the final output */ 7918 if (opcode == 0x03) { 7919 /* SADDLV, UADDLV: result is 2*esize */ 7920 size++; 7921 } 7922 7923 switch (size) { 7924 case 0: 7925 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7926 break; 7927 case 1: 7928 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7929 break; 7930 case 2: 7931 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7932 break; 7933 case 3: 7934 break; 7935 default: 7936 g_assert_not_reached(); 7937 } 7938 7939 write_fp_dreg(s, rd, tcg_res); 7940 tcg_temp_free_i64(tcg_res); 7941 } 7942 7943 /* DUP (Element, Vector) 7944 * 7945 * 31 30 29 21 20 16 15 10 9 5 4 0 7946 * +---+---+-------------------+--------+-------------+------+------+ 7947 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7948 * +---+---+-------------------+--------+-------------+------+------+ 7949 * 7950 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7951 */ 7952 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7953 int imm5) 7954 { 7955 int size = ctz32(imm5); 7956 int index; 7957 7958 if (size > 3 || (size == 3 && !is_q)) { 7959 unallocated_encoding(s); 7960 return; 7961 } 7962 7963 if (!fp_access_check(s)) { 7964 return; 7965 } 7966 7967 index = imm5 >> (size + 1); 7968 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 7969 vec_reg_offset(s, rn, index, size), 7970 is_q ? 16 : 8, vec_full_reg_size(s)); 7971 } 7972 7973 /* DUP (element, scalar) 7974 * 31 21 20 16 15 10 9 5 4 0 7975 * +-----------------------+--------+-------------+------+------+ 7976 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7977 * +-----------------------+--------+-------------+------+------+ 7978 */ 7979 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 7980 int imm5) 7981 { 7982 int size = ctz32(imm5); 7983 int index; 7984 TCGv_i64 tmp; 7985 7986 if (size > 3) { 7987 unallocated_encoding(s); 7988 return; 7989 } 7990 7991 if (!fp_access_check(s)) { 7992 return; 7993 } 7994 7995 index = imm5 >> (size + 1); 7996 7997 /* This instruction just extracts the specified element and 7998 * zero-extends it into the bottom of the destination register. 7999 */ 8000 tmp = tcg_temp_new_i64(); 8001 read_vec_element(s, tmp, rn, index, size); 8002 write_fp_dreg(s, rd, tmp); 8003 tcg_temp_free_i64(tmp); 8004 } 8005 8006 /* DUP (General) 8007 * 8008 * 31 30 29 21 20 16 15 10 9 5 4 0 8009 * +---+---+-------------------+--------+-------------+------+------+ 8010 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 8011 * +---+---+-------------------+--------+-------------+------+------+ 8012 * 8013 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8014 */ 8015 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 8016 int imm5) 8017 { 8018 int size = ctz32(imm5); 8019 uint32_t dofs, oprsz, maxsz; 8020 8021 if (size > 3 || ((size == 3) && !is_q)) { 8022 unallocated_encoding(s); 8023 return; 8024 } 8025 8026 if (!fp_access_check(s)) { 8027 return; 8028 } 8029 8030 dofs = vec_full_reg_offset(s, rd); 8031 oprsz = is_q ? 16 : 8; 8032 maxsz = vec_full_reg_size(s); 8033 8034 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 8035 } 8036 8037 /* INS (Element) 8038 * 8039 * 31 21 20 16 15 14 11 10 9 5 4 0 8040 * +-----------------------+--------+------------+---+------+------+ 8041 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8042 * +-----------------------+--------+------------+---+------+------+ 8043 * 8044 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8045 * index: encoded in imm5<4:size+1> 8046 */ 8047 static void handle_simd_inse(DisasContext *s, int rd, int rn, 8048 int imm4, int imm5) 8049 { 8050 int size = ctz32(imm5); 8051 int src_index, dst_index; 8052 TCGv_i64 tmp; 8053 8054 if (size > 3) { 8055 unallocated_encoding(s); 8056 return; 8057 } 8058 8059 if (!fp_access_check(s)) { 8060 return; 8061 } 8062 8063 dst_index = extract32(imm5, 1+size, 5); 8064 src_index = extract32(imm4, size, 4); 8065 8066 tmp = tcg_temp_new_i64(); 8067 8068 read_vec_element(s, tmp, rn, src_index, size); 8069 write_vec_element(s, tmp, rd, dst_index, size); 8070 8071 tcg_temp_free_i64(tmp); 8072 8073 /* INS is considered a 128-bit write for SVE. */ 8074 clear_vec_high(s, true, rd); 8075 } 8076 8077 8078 /* INS (General) 8079 * 8080 * 31 21 20 16 15 10 9 5 4 0 8081 * +-----------------------+--------+-------------+------+------+ 8082 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 8083 * +-----------------------+--------+-------------+------+------+ 8084 * 8085 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8086 * index: encoded in imm5<4:size+1> 8087 */ 8088 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 8089 { 8090 int size = ctz32(imm5); 8091 int idx; 8092 8093 if (size > 3) { 8094 unallocated_encoding(s); 8095 return; 8096 } 8097 8098 if (!fp_access_check(s)) { 8099 return; 8100 } 8101 8102 idx = extract32(imm5, 1 + size, 4 - size); 8103 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 8104 8105 /* INS is considered a 128-bit write for SVE. */ 8106 clear_vec_high(s, true, rd); 8107 } 8108 8109 /* 8110 * UMOV (General) 8111 * SMOV (General) 8112 * 8113 * 31 30 29 21 20 16 15 12 10 9 5 4 0 8114 * +---+---+-------------------+--------+-------------+------+------+ 8115 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 8116 * +---+---+-------------------+--------+-------------+------+------+ 8117 * 8118 * U: unsigned when set 8119 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8120 */ 8121 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 8122 int rn, int rd, int imm5) 8123 { 8124 int size = ctz32(imm5); 8125 int element; 8126 TCGv_i64 tcg_rd; 8127 8128 /* Check for UnallocatedEncodings */ 8129 if (is_signed) { 8130 if (size > 2 || (size == 2 && !is_q)) { 8131 unallocated_encoding(s); 8132 return; 8133 } 8134 } else { 8135 if (size > 3 8136 || (size < 3 && is_q) 8137 || (size == 3 && !is_q)) { 8138 unallocated_encoding(s); 8139 return; 8140 } 8141 } 8142 8143 if (!fp_access_check(s)) { 8144 return; 8145 } 8146 8147 element = extract32(imm5, 1+size, 4); 8148 8149 tcg_rd = cpu_reg(s, rd); 8150 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 8151 if (is_signed && !is_q) { 8152 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8153 } 8154 } 8155 8156 /* AdvSIMD copy 8157 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8158 * +---+---+----+-----------------+------+---+------+---+------+------+ 8159 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8160 * +---+---+----+-----------------+------+---+------+---+------+------+ 8161 */ 8162 static void disas_simd_copy(DisasContext *s, uint32_t insn) 8163 { 8164 int rd = extract32(insn, 0, 5); 8165 int rn = extract32(insn, 5, 5); 8166 int imm4 = extract32(insn, 11, 4); 8167 int op = extract32(insn, 29, 1); 8168 int is_q = extract32(insn, 30, 1); 8169 int imm5 = extract32(insn, 16, 5); 8170 8171 if (op) { 8172 if (is_q) { 8173 /* INS (element) */ 8174 handle_simd_inse(s, rd, rn, imm4, imm5); 8175 } else { 8176 unallocated_encoding(s); 8177 } 8178 } else { 8179 switch (imm4) { 8180 case 0: 8181 /* DUP (element - vector) */ 8182 handle_simd_dupe(s, is_q, rd, rn, imm5); 8183 break; 8184 case 1: 8185 /* DUP (general) */ 8186 handle_simd_dupg(s, is_q, rd, rn, imm5); 8187 break; 8188 case 3: 8189 if (is_q) { 8190 /* INS (general) */ 8191 handle_simd_insg(s, rd, rn, imm5); 8192 } else { 8193 unallocated_encoding(s); 8194 } 8195 break; 8196 case 5: 8197 case 7: 8198 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 8199 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 8200 break; 8201 default: 8202 unallocated_encoding(s); 8203 break; 8204 } 8205 } 8206 } 8207 8208 /* AdvSIMD modified immediate 8209 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 8210 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8211 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 8212 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8213 * 8214 * There are a number of operations that can be carried out here: 8215 * MOVI - move (shifted) imm into register 8216 * MVNI - move inverted (shifted) imm into register 8217 * ORR - bitwise OR of (shifted) imm with register 8218 * BIC - bitwise clear of (shifted) imm with register 8219 * With ARMv8.2 we also have: 8220 * FMOV half-precision 8221 */ 8222 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 8223 { 8224 int rd = extract32(insn, 0, 5); 8225 int cmode = extract32(insn, 12, 4); 8226 int o2 = extract32(insn, 11, 1); 8227 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 8228 bool is_neg = extract32(insn, 29, 1); 8229 bool is_q = extract32(insn, 30, 1); 8230 uint64_t imm = 0; 8231 8232 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 8233 /* Check for FMOV (vector, immediate) - half-precision */ 8234 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 8235 unallocated_encoding(s); 8236 return; 8237 } 8238 } 8239 8240 if (!fp_access_check(s)) { 8241 return; 8242 } 8243 8244 if (cmode == 15 && o2 && !is_neg) { 8245 /* FMOV (vector, immediate) - half-precision */ 8246 imm = vfp_expand_imm(MO_16, abcdefgh); 8247 /* now duplicate across the lanes */ 8248 imm = dup_const(MO_16, imm); 8249 } else { 8250 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 8251 } 8252 8253 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 8254 /* MOVI or MVNI, with MVNI negation handled above. */ 8255 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 8256 vec_full_reg_size(s), imm); 8257 } else { 8258 /* ORR or BIC, with BIC negation to AND handled above. */ 8259 if (is_neg) { 8260 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 8261 } else { 8262 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 8263 } 8264 } 8265 } 8266 8267 /* AdvSIMD scalar copy 8268 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8269 * +-----+----+-----------------+------+---+------+---+------+------+ 8270 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8271 * +-----+----+-----------------+------+---+------+---+------+------+ 8272 */ 8273 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 8274 { 8275 int rd = extract32(insn, 0, 5); 8276 int rn = extract32(insn, 5, 5); 8277 int imm4 = extract32(insn, 11, 4); 8278 int imm5 = extract32(insn, 16, 5); 8279 int op = extract32(insn, 29, 1); 8280 8281 if (op != 0 || imm4 != 0) { 8282 unallocated_encoding(s); 8283 return; 8284 } 8285 8286 /* DUP (element, scalar) */ 8287 handle_simd_dupes(s, rd, rn, imm5); 8288 } 8289 8290 /* AdvSIMD scalar pairwise 8291 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 8292 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8293 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 8294 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8295 */ 8296 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 8297 { 8298 int u = extract32(insn, 29, 1); 8299 int size = extract32(insn, 22, 2); 8300 int opcode = extract32(insn, 12, 5); 8301 int rn = extract32(insn, 5, 5); 8302 int rd = extract32(insn, 0, 5); 8303 TCGv_ptr fpst; 8304 8305 /* For some ops (the FP ones), size[1] is part of the encoding. 8306 * For ADDP strictly it is not but size[1] is always 1 for valid 8307 * encodings. 8308 */ 8309 opcode |= (extract32(size, 1, 1) << 5); 8310 8311 switch (opcode) { 8312 case 0x3b: /* ADDP */ 8313 if (u || size != 3) { 8314 unallocated_encoding(s); 8315 return; 8316 } 8317 if (!fp_access_check(s)) { 8318 return; 8319 } 8320 8321 fpst = NULL; 8322 break; 8323 case 0xc: /* FMAXNMP */ 8324 case 0xd: /* FADDP */ 8325 case 0xf: /* FMAXP */ 8326 case 0x2c: /* FMINNMP */ 8327 case 0x2f: /* FMINP */ 8328 /* FP op, size[0] is 32 or 64 bit*/ 8329 if (!u) { 8330 if (!dc_isar_feature(aa64_fp16, s)) { 8331 unallocated_encoding(s); 8332 return; 8333 } else { 8334 size = MO_16; 8335 } 8336 } else { 8337 size = extract32(size, 0, 1) ? MO_64 : MO_32; 8338 } 8339 8340 if (!fp_access_check(s)) { 8341 return; 8342 } 8343 8344 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8345 break; 8346 default: 8347 unallocated_encoding(s); 8348 return; 8349 } 8350 8351 if (size == MO_64) { 8352 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8353 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8354 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8355 8356 read_vec_element(s, tcg_op1, rn, 0, MO_64); 8357 read_vec_element(s, tcg_op2, rn, 1, MO_64); 8358 8359 switch (opcode) { 8360 case 0x3b: /* ADDP */ 8361 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 8362 break; 8363 case 0xc: /* FMAXNMP */ 8364 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8365 break; 8366 case 0xd: /* FADDP */ 8367 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8368 break; 8369 case 0xf: /* FMAXP */ 8370 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8371 break; 8372 case 0x2c: /* FMINNMP */ 8373 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8374 break; 8375 case 0x2f: /* FMINP */ 8376 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8377 break; 8378 default: 8379 g_assert_not_reached(); 8380 } 8381 8382 write_fp_dreg(s, rd, tcg_res); 8383 8384 tcg_temp_free_i64(tcg_op1); 8385 tcg_temp_free_i64(tcg_op2); 8386 tcg_temp_free_i64(tcg_res); 8387 } else { 8388 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8389 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8390 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8391 8392 read_vec_element_i32(s, tcg_op1, rn, 0, size); 8393 read_vec_element_i32(s, tcg_op2, rn, 1, size); 8394 8395 if (size == MO_16) { 8396 switch (opcode) { 8397 case 0xc: /* FMAXNMP */ 8398 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8399 break; 8400 case 0xd: /* FADDP */ 8401 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 8402 break; 8403 case 0xf: /* FMAXP */ 8404 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 8405 break; 8406 case 0x2c: /* FMINNMP */ 8407 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8408 break; 8409 case 0x2f: /* FMINP */ 8410 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 8411 break; 8412 default: 8413 g_assert_not_reached(); 8414 } 8415 } else { 8416 switch (opcode) { 8417 case 0xc: /* FMAXNMP */ 8418 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8419 break; 8420 case 0xd: /* FADDP */ 8421 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8422 break; 8423 case 0xf: /* FMAXP */ 8424 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8425 break; 8426 case 0x2c: /* FMINNMP */ 8427 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8428 break; 8429 case 0x2f: /* FMINP */ 8430 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8431 break; 8432 default: 8433 g_assert_not_reached(); 8434 } 8435 } 8436 8437 write_fp_sreg(s, rd, tcg_res); 8438 8439 tcg_temp_free_i32(tcg_op1); 8440 tcg_temp_free_i32(tcg_op2); 8441 tcg_temp_free_i32(tcg_res); 8442 } 8443 8444 if (fpst) { 8445 tcg_temp_free_ptr(fpst); 8446 } 8447 } 8448 8449 /* 8450 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 8451 * 8452 * This code is handles the common shifting code and is used by both 8453 * the vector and scalar code. 8454 */ 8455 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 8456 TCGv_i64 tcg_rnd, bool accumulate, 8457 bool is_u, int size, int shift) 8458 { 8459 bool extended_result = false; 8460 bool round = tcg_rnd != NULL; 8461 int ext_lshift = 0; 8462 TCGv_i64 tcg_src_hi; 8463 8464 if (round && size == 3) { 8465 extended_result = true; 8466 ext_lshift = 64 - shift; 8467 tcg_src_hi = tcg_temp_new_i64(); 8468 } else if (shift == 64) { 8469 if (!accumulate && is_u) { 8470 /* result is zero */ 8471 tcg_gen_movi_i64(tcg_res, 0); 8472 return; 8473 } 8474 } 8475 8476 /* Deal with the rounding step */ 8477 if (round) { 8478 if (extended_result) { 8479 TCGv_i64 tcg_zero = tcg_constant_i64(0); 8480 if (!is_u) { 8481 /* take care of sign extending tcg_res */ 8482 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 8483 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8484 tcg_src, tcg_src_hi, 8485 tcg_rnd, tcg_zero); 8486 } else { 8487 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8488 tcg_src, tcg_zero, 8489 tcg_rnd, tcg_zero); 8490 } 8491 } else { 8492 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 8493 } 8494 } 8495 8496 /* Now do the shift right */ 8497 if (round && extended_result) { 8498 /* extended case, >64 bit precision required */ 8499 if (ext_lshift == 0) { 8500 /* special case, only high bits matter */ 8501 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 8502 } else { 8503 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8504 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 8505 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 8506 } 8507 } else { 8508 if (is_u) { 8509 if (shift == 64) { 8510 /* essentially shifting in 64 zeros */ 8511 tcg_gen_movi_i64(tcg_src, 0); 8512 } else { 8513 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8514 } 8515 } else { 8516 if (shift == 64) { 8517 /* effectively extending the sign-bit */ 8518 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 8519 } else { 8520 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 8521 } 8522 } 8523 } 8524 8525 if (accumulate) { 8526 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 8527 } else { 8528 tcg_gen_mov_i64(tcg_res, tcg_src); 8529 } 8530 8531 if (extended_result) { 8532 tcg_temp_free_i64(tcg_src_hi); 8533 } 8534 } 8535 8536 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 8537 static void handle_scalar_simd_shri(DisasContext *s, 8538 bool is_u, int immh, int immb, 8539 int opcode, int rn, int rd) 8540 { 8541 const int size = 3; 8542 int immhb = immh << 3 | immb; 8543 int shift = 2 * (8 << size) - immhb; 8544 bool accumulate = false; 8545 bool round = false; 8546 bool insert = false; 8547 TCGv_i64 tcg_rn; 8548 TCGv_i64 tcg_rd; 8549 TCGv_i64 tcg_round; 8550 8551 if (!extract32(immh, 3, 1)) { 8552 unallocated_encoding(s); 8553 return; 8554 } 8555 8556 if (!fp_access_check(s)) { 8557 return; 8558 } 8559 8560 switch (opcode) { 8561 case 0x02: /* SSRA / USRA (accumulate) */ 8562 accumulate = true; 8563 break; 8564 case 0x04: /* SRSHR / URSHR (rounding) */ 8565 round = true; 8566 break; 8567 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 8568 accumulate = round = true; 8569 break; 8570 case 0x08: /* SRI */ 8571 insert = true; 8572 break; 8573 } 8574 8575 if (round) { 8576 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8577 } else { 8578 tcg_round = NULL; 8579 } 8580 8581 tcg_rn = read_fp_dreg(s, rn); 8582 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8583 8584 if (insert) { 8585 /* shift count same as element size is valid but does nothing; 8586 * special case to avoid potential shift by 64. 8587 */ 8588 int esize = 8 << size; 8589 if (shift != esize) { 8590 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8591 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8592 } 8593 } else { 8594 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8595 accumulate, is_u, size, shift); 8596 } 8597 8598 write_fp_dreg(s, rd, tcg_rd); 8599 8600 tcg_temp_free_i64(tcg_rn); 8601 tcg_temp_free_i64(tcg_rd); 8602 } 8603 8604 /* SHL/SLI - Scalar shift left */ 8605 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8606 int immh, int immb, int opcode, 8607 int rn, int rd) 8608 { 8609 int size = 32 - clz32(immh) - 1; 8610 int immhb = immh << 3 | immb; 8611 int shift = immhb - (8 << size); 8612 TCGv_i64 tcg_rn; 8613 TCGv_i64 tcg_rd; 8614 8615 if (!extract32(immh, 3, 1)) { 8616 unallocated_encoding(s); 8617 return; 8618 } 8619 8620 if (!fp_access_check(s)) { 8621 return; 8622 } 8623 8624 tcg_rn = read_fp_dreg(s, rn); 8625 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8626 8627 if (insert) { 8628 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8629 } else { 8630 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8631 } 8632 8633 write_fp_dreg(s, rd, tcg_rd); 8634 8635 tcg_temp_free_i64(tcg_rn); 8636 tcg_temp_free_i64(tcg_rd); 8637 } 8638 8639 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8640 * (signed/unsigned) narrowing */ 8641 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8642 bool is_u_shift, bool is_u_narrow, 8643 int immh, int immb, int opcode, 8644 int rn, int rd) 8645 { 8646 int immhb = immh << 3 | immb; 8647 int size = 32 - clz32(immh) - 1; 8648 int esize = 8 << size; 8649 int shift = (2 * esize) - immhb; 8650 int elements = is_scalar ? 1 : (64 / esize); 8651 bool round = extract32(opcode, 0, 1); 8652 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8653 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8654 TCGv_i32 tcg_rd_narrowed; 8655 TCGv_i64 tcg_final; 8656 8657 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8658 { gen_helper_neon_narrow_sat_s8, 8659 gen_helper_neon_unarrow_sat8 }, 8660 { gen_helper_neon_narrow_sat_s16, 8661 gen_helper_neon_unarrow_sat16 }, 8662 { gen_helper_neon_narrow_sat_s32, 8663 gen_helper_neon_unarrow_sat32 }, 8664 { NULL, NULL }, 8665 }; 8666 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8667 gen_helper_neon_narrow_sat_u8, 8668 gen_helper_neon_narrow_sat_u16, 8669 gen_helper_neon_narrow_sat_u32, 8670 NULL 8671 }; 8672 NeonGenNarrowEnvFn *narrowfn; 8673 8674 int i; 8675 8676 assert(size < 4); 8677 8678 if (extract32(immh, 3, 1)) { 8679 unallocated_encoding(s); 8680 return; 8681 } 8682 8683 if (!fp_access_check(s)) { 8684 return; 8685 } 8686 8687 if (is_u_shift) { 8688 narrowfn = unsigned_narrow_fns[size]; 8689 } else { 8690 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8691 } 8692 8693 tcg_rn = tcg_temp_new_i64(); 8694 tcg_rd = tcg_temp_new_i64(); 8695 tcg_rd_narrowed = tcg_temp_new_i32(); 8696 tcg_final = tcg_const_i64(0); 8697 8698 if (round) { 8699 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8700 } else { 8701 tcg_round = NULL; 8702 } 8703 8704 for (i = 0; i < elements; i++) { 8705 read_vec_element(s, tcg_rn, rn, i, ldop); 8706 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8707 false, is_u_shift, size+1, shift); 8708 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); 8709 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8710 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8711 } 8712 8713 if (!is_q) { 8714 write_vec_element(s, tcg_final, rd, 0, MO_64); 8715 } else { 8716 write_vec_element(s, tcg_final, rd, 1, MO_64); 8717 } 8718 8719 tcg_temp_free_i64(tcg_rn); 8720 tcg_temp_free_i64(tcg_rd); 8721 tcg_temp_free_i32(tcg_rd_narrowed); 8722 tcg_temp_free_i64(tcg_final); 8723 8724 clear_vec_high(s, is_q, rd); 8725 } 8726 8727 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8728 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8729 bool src_unsigned, bool dst_unsigned, 8730 int immh, int immb, int rn, int rd) 8731 { 8732 int immhb = immh << 3 | immb; 8733 int size = 32 - clz32(immh) - 1; 8734 int shift = immhb - (8 << size); 8735 int pass; 8736 8737 assert(immh != 0); 8738 assert(!(scalar && is_q)); 8739 8740 if (!scalar) { 8741 if (!is_q && extract32(immh, 3, 1)) { 8742 unallocated_encoding(s); 8743 return; 8744 } 8745 8746 /* Since we use the variable-shift helpers we must 8747 * replicate the shift count into each element of 8748 * the tcg_shift value. 8749 */ 8750 switch (size) { 8751 case 0: 8752 shift |= shift << 8; 8753 /* fall through */ 8754 case 1: 8755 shift |= shift << 16; 8756 break; 8757 case 2: 8758 case 3: 8759 break; 8760 default: 8761 g_assert_not_reached(); 8762 } 8763 } 8764 8765 if (!fp_access_check(s)) { 8766 return; 8767 } 8768 8769 if (size == 3) { 8770 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8771 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8772 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8773 { NULL, gen_helper_neon_qshl_u64 }, 8774 }; 8775 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8776 int maxpass = is_q ? 2 : 1; 8777 8778 for (pass = 0; pass < maxpass; pass++) { 8779 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8780 8781 read_vec_element(s, tcg_op, rn, pass, MO_64); 8782 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8783 write_vec_element(s, tcg_op, rd, pass, MO_64); 8784 8785 tcg_temp_free_i64(tcg_op); 8786 } 8787 clear_vec_high(s, is_q, rd); 8788 } else { 8789 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8790 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8791 { 8792 { gen_helper_neon_qshl_s8, 8793 gen_helper_neon_qshl_s16, 8794 gen_helper_neon_qshl_s32 }, 8795 { gen_helper_neon_qshlu_s8, 8796 gen_helper_neon_qshlu_s16, 8797 gen_helper_neon_qshlu_s32 } 8798 }, { 8799 { NULL, NULL, NULL }, 8800 { gen_helper_neon_qshl_u8, 8801 gen_helper_neon_qshl_u16, 8802 gen_helper_neon_qshl_u32 } 8803 } 8804 }; 8805 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8806 MemOp memop = scalar ? size : MO_32; 8807 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8808 8809 for (pass = 0; pass < maxpass; pass++) { 8810 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8811 8812 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8813 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8814 if (scalar) { 8815 switch (size) { 8816 case 0: 8817 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8818 break; 8819 case 1: 8820 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8821 break; 8822 case 2: 8823 break; 8824 default: 8825 g_assert_not_reached(); 8826 } 8827 write_fp_sreg(s, rd, tcg_op); 8828 } else { 8829 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8830 } 8831 8832 tcg_temp_free_i32(tcg_op); 8833 } 8834 8835 if (!scalar) { 8836 clear_vec_high(s, is_q, rd); 8837 } 8838 } 8839 } 8840 8841 /* Common vector code for handling integer to FP conversion */ 8842 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8843 int elements, int is_signed, 8844 int fracbits, int size) 8845 { 8846 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8847 TCGv_i32 tcg_shift = NULL; 8848 8849 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8850 int pass; 8851 8852 if (fracbits || size == MO_64) { 8853 tcg_shift = tcg_constant_i32(fracbits); 8854 } 8855 8856 if (size == MO_64) { 8857 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8858 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8859 8860 for (pass = 0; pass < elements; pass++) { 8861 read_vec_element(s, tcg_int64, rn, pass, mop); 8862 8863 if (is_signed) { 8864 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8865 tcg_shift, tcg_fpst); 8866 } else { 8867 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8868 tcg_shift, tcg_fpst); 8869 } 8870 if (elements == 1) { 8871 write_fp_dreg(s, rd, tcg_double); 8872 } else { 8873 write_vec_element(s, tcg_double, rd, pass, MO_64); 8874 } 8875 } 8876 8877 tcg_temp_free_i64(tcg_int64); 8878 tcg_temp_free_i64(tcg_double); 8879 8880 } else { 8881 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8882 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8883 8884 for (pass = 0; pass < elements; pass++) { 8885 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8886 8887 switch (size) { 8888 case MO_32: 8889 if (fracbits) { 8890 if (is_signed) { 8891 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8892 tcg_shift, tcg_fpst); 8893 } else { 8894 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8895 tcg_shift, tcg_fpst); 8896 } 8897 } else { 8898 if (is_signed) { 8899 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8900 } else { 8901 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8902 } 8903 } 8904 break; 8905 case MO_16: 8906 if (fracbits) { 8907 if (is_signed) { 8908 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8909 tcg_shift, tcg_fpst); 8910 } else { 8911 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8912 tcg_shift, tcg_fpst); 8913 } 8914 } else { 8915 if (is_signed) { 8916 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8917 } else { 8918 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8919 } 8920 } 8921 break; 8922 default: 8923 g_assert_not_reached(); 8924 } 8925 8926 if (elements == 1) { 8927 write_fp_sreg(s, rd, tcg_float); 8928 } else { 8929 write_vec_element_i32(s, tcg_float, rd, pass, size); 8930 } 8931 } 8932 8933 tcg_temp_free_i32(tcg_int32); 8934 tcg_temp_free_i32(tcg_float); 8935 } 8936 8937 tcg_temp_free_ptr(tcg_fpst); 8938 8939 clear_vec_high(s, elements << size == 16, rd); 8940 } 8941 8942 /* UCVTF/SCVTF - Integer to FP conversion */ 8943 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8944 bool is_q, bool is_u, 8945 int immh, int immb, int opcode, 8946 int rn, int rd) 8947 { 8948 int size, elements, fracbits; 8949 int immhb = immh << 3 | immb; 8950 8951 if (immh & 8) { 8952 size = MO_64; 8953 if (!is_scalar && !is_q) { 8954 unallocated_encoding(s); 8955 return; 8956 } 8957 } else if (immh & 4) { 8958 size = MO_32; 8959 } else if (immh & 2) { 8960 size = MO_16; 8961 if (!dc_isar_feature(aa64_fp16, s)) { 8962 unallocated_encoding(s); 8963 return; 8964 } 8965 } else { 8966 /* immh == 0 would be a failure of the decode logic */ 8967 g_assert(immh == 1); 8968 unallocated_encoding(s); 8969 return; 8970 } 8971 8972 if (is_scalar) { 8973 elements = 1; 8974 } else { 8975 elements = (8 << is_q) >> size; 8976 } 8977 fracbits = (16 << size) - immhb; 8978 8979 if (!fp_access_check(s)) { 8980 return; 8981 } 8982 8983 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 8984 } 8985 8986 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 8987 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 8988 bool is_q, bool is_u, 8989 int immh, int immb, int rn, int rd) 8990 { 8991 int immhb = immh << 3 | immb; 8992 int pass, size, fracbits; 8993 TCGv_ptr tcg_fpstatus; 8994 TCGv_i32 tcg_rmode, tcg_shift; 8995 8996 if (immh & 0x8) { 8997 size = MO_64; 8998 if (!is_scalar && !is_q) { 8999 unallocated_encoding(s); 9000 return; 9001 } 9002 } else if (immh & 0x4) { 9003 size = MO_32; 9004 } else if (immh & 0x2) { 9005 size = MO_16; 9006 if (!dc_isar_feature(aa64_fp16, s)) { 9007 unallocated_encoding(s); 9008 return; 9009 } 9010 } else { 9011 /* Should have split out AdvSIMD modified immediate earlier. */ 9012 assert(immh == 1); 9013 unallocated_encoding(s); 9014 return; 9015 } 9016 9017 if (!fp_access_check(s)) { 9018 return; 9019 } 9020 9021 assert(!(is_scalar && is_q)); 9022 9023 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); 9024 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9025 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 9026 fracbits = (16 << size) - immhb; 9027 tcg_shift = tcg_constant_i32(fracbits); 9028 9029 if (size == MO_64) { 9030 int maxpass = is_scalar ? 1 : 2; 9031 9032 for (pass = 0; pass < maxpass; pass++) { 9033 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9034 9035 read_vec_element(s, tcg_op, rn, pass, MO_64); 9036 if (is_u) { 9037 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9038 } else { 9039 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9040 } 9041 write_vec_element(s, tcg_op, rd, pass, MO_64); 9042 tcg_temp_free_i64(tcg_op); 9043 } 9044 clear_vec_high(s, is_q, rd); 9045 } else { 9046 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 9047 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 9048 9049 switch (size) { 9050 case MO_16: 9051 if (is_u) { 9052 fn = gen_helper_vfp_touhh; 9053 } else { 9054 fn = gen_helper_vfp_toshh; 9055 } 9056 break; 9057 case MO_32: 9058 if (is_u) { 9059 fn = gen_helper_vfp_touls; 9060 } else { 9061 fn = gen_helper_vfp_tosls; 9062 } 9063 break; 9064 default: 9065 g_assert_not_reached(); 9066 } 9067 9068 for (pass = 0; pass < maxpass; pass++) { 9069 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9070 9071 read_vec_element_i32(s, tcg_op, rn, pass, size); 9072 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9073 if (is_scalar) { 9074 write_fp_sreg(s, rd, tcg_op); 9075 } else { 9076 write_vec_element_i32(s, tcg_op, rd, pass, size); 9077 } 9078 tcg_temp_free_i32(tcg_op); 9079 } 9080 if (!is_scalar) { 9081 clear_vec_high(s, is_q, rd); 9082 } 9083 } 9084 9085 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 9086 tcg_temp_free_ptr(tcg_fpstatus); 9087 tcg_temp_free_i32(tcg_rmode); 9088 } 9089 9090 /* AdvSIMD scalar shift by immediate 9091 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 9092 * +-----+---+-------------+------+------+--------+---+------+------+ 9093 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 9094 * +-----+---+-------------+------+------+--------+---+------+------+ 9095 * 9096 * This is the scalar version so it works on a fixed sized registers 9097 */ 9098 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 9099 { 9100 int rd = extract32(insn, 0, 5); 9101 int rn = extract32(insn, 5, 5); 9102 int opcode = extract32(insn, 11, 5); 9103 int immb = extract32(insn, 16, 3); 9104 int immh = extract32(insn, 19, 4); 9105 bool is_u = extract32(insn, 29, 1); 9106 9107 if (immh == 0) { 9108 unallocated_encoding(s); 9109 return; 9110 } 9111 9112 switch (opcode) { 9113 case 0x08: /* SRI */ 9114 if (!is_u) { 9115 unallocated_encoding(s); 9116 return; 9117 } 9118 /* fall through */ 9119 case 0x00: /* SSHR / USHR */ 9120 case 0x02: /* SSRA / USRA */ 9121 case 0x04: /* SRSHR / URSHR */ 9122 case 0x06: /* SRSRA / URSRA */ 9123 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 9124 break; 9125 case 0x0a: /* SHL / SLI */ 9126 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 9127 break; 9128 case 0x1c: /* SCVTF, UCVTF */ 9129 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 9130 opcode, rn, rd); 9131 break; 9132 case 0x10: /* SQSHRUN, SQSHRUN2 */ 9133 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 9134 if (!is_u) { 9135 unallocated_encoding(s); 9136 return; 9137 } 9138 handle_vec_simd_sqshrn(s, true, false, false, true, 9139 immh, immb, opcode, rn, rd); 9140 break; 9141 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 9142 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 9143 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 9144 immh, immb, opcode, rn, rd); 9145 break; 9146 case 0xc: /* SQSHLU */ 9147 if (!is_u) { 9148 unallocated_encoding(s); 9149 return; 9150 } 9151 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 9152 break; 9153 case 0xe: /* SQSHL, UQSHL */ 9154 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 9155 break; 9156 case 0x1f: /* FCVTZS, FCVTZU */ 9157 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 9158 break; 9159 default: 9160 unallocated_encoding(s); 9161 break; 9162 } 9163 } 9164 9165 /* AdvSIMD scalar three different 9166 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 9167 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 9168 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 9169 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 9170 */ 9171 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 9172 { 9173 bool is_u = extract32(insn, 29, 1); 9174 int size = extract32(insn, 22, 2); 9175 int opcode = extract32(insn, 12, 4); 9176 int rm = extract32(insn, 16, 5); 9177 int rn = extract32(insn, 5, 5); 9178 int rd = extract32(insn, 0, 5); 9179 9180 if (is_u) { 9181 unallocated_encoding(s); 9182 return; 9183 } 9184 9185 switch (opcode) { 9186 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9187 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9188 case 0xd: /* SQDMULL, SQDMULL2 */ 9189 if (size == 0 || size == 3) { 9190 unallocated_encoding(s); 9191 return; 9192 } 9193 break; 9194 default: 9195 unallocated_encoding(s); 9196 return; 9197 } 9198 9199 if (!fp_access_check(s)) { 9200 return; 9201 } 9202 9203 if (size == 2) { 9204 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9205 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9206 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9207 9208 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 9209 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 9210 9211 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 9212 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); 9213 9214 switch (opcode) { 9215 case 0xd: /* SQDMULL, SQDMULL2 */ 9216 break; 9217 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9218 tcg_gen_neg_i64(tcg_res, tcg_res); 9219 /* fall through */ 9220 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9221 read_vec_element(s, tcg_op1, rd, 0, MO_64); 9222 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, 9223 tcg_res, tcg_op1); 9224 break; 9225 default: 9226 g_assert_not_reached(); 9227 } 9228 9229 write_fp_dreg(s, rd, tcg_res); 9230 9231 tcg_temp_free_i64(tcg_op1); 9232 tcg_temp_free_i64(tcg_op2); 9233 tcg_temp_free_i64(tcg_res); 9234 } else { 9235 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 9236 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 9237 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9238 9239 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 9240 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); 9241 9242 switch (opcode) { 9243 case 0xd: /* SQDMULL, SQDMULL2 */ 9244 break; 9245 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9246 gen_helper_neon_negl_u32(tcg_res, tcg_res); 9247 /* fall through */ 9248 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9249 { 9250 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 9251 read_vec_element(s, tcg_op3, rd, 0, MO_32); 9252 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, 9253 tcg_res, tcg_op3); 9254 tcg_temp_free_i64(tcg_op3); 9255 break; 9256 } 9257 default: 9258 g_assert_not_reached(); 9259 } 9260 9261 tcg_gen_ext32u_i64(tcg_res, tcg_res); 9262 write_fp_dreg(s, rd, tcg_res); 9263 9264 tcg_temp_free_i32(tcg_op1); 9265 tcg_temp_free_i32(tcg_op2); 9266 tcg_temp_free_i64(tcg_res); 9267 } 9268 } 9269 9270 static void handle_3same_64(DisasContext *s, int opcode, bool u, 9271 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 9272 { 9273 /* Handle 64x64->64 opcodes which are shared between the scalar 9274 * and vector 3-same groups. We cover every opcode where size == 3 9275 * is valid in either the three-reg-same (integer, not pairwise) 9276 * or scalar-three-reg-same groups. 9277 */ 9278 TCGCond cond; 9279 9280 switch (opcode) { 9281 case 0x1: /* SQADD */ 9282 if (u) { 9283 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9284 } else { 9285 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9286 } 9287 break; 9288 case 0x5: /* SQSUB */ 9289 if (u) { 9290 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9291 } else { 9292 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9293 } 9294 break; 9295 case 0x6: /* CMGT, CMHI */ 9296 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. 9297 * We implement this using setcond (test) and then negating. 9298 */ 9299 cond = u ? TCG_COND_GTU : TCG_COND_GT; 9300 do_cmop: 9301 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 9302 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9303 break; 9304 case 0x7: /* CMGE, CMHS */ 9305 cond = u ? TCG_COND_GEU : TCG_COND_GE; 9306 goto do_cmop; 9307 case 0x11: /* CMTST, CMEQ */ 9308 if (u) { 9309 cond = TCG_COND_EQ; 9310 goto do_cmop; 9311 } 9312 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 9313 break; 9314 case 0x8: /* SSHL, USHL */ 9315 if (u) { 9316 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 9317 } else { 9318 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 9319 } 9320 break; 9321 case 0x9: /* SQSHL, UQSHL */ 9322 if (u) { 9323 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9324 } else { 9325 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9326 } 9327 break; 9328 case 0xa: /* SRSHL, URSHL */ 9329 if (u) { 9330 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 9331 } else { 9332 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 9333 } 9334 break; 9335 case 0xb: /* SQRSHL, UQRSHL */ 9336 if (u) { 9337 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9338 } else { 9339 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9340 } 9341 break; 9342 case 0x10: /* ADD, SUB */ 9343 if (u) { 9344 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 9345 } else { 9346 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 9347 } 9348 break; 9349 default: 9350 g_assert_not_reached(); 9351 } 9352 } 9353 9354 /* Handle the 3-same-operands float operations; shared by the scalar 9355 * and vector encodings. The caller must filter out any encodings 9356 * not allocated for the encoding it is dealing with. 9357 */ 9358 static void handle_3same_float(DisasContext *s, int size, int elements, 9359 int fpopcode, int rd, int rn, int rm) 9360 { 9361 int pass; 9362 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9363 9364 for (pass = 0; pass < elements; pass++) { 9365 if (size) { 9366 /* Double */ 9367 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9368 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9369 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9370 9371 read_vec_element(s, tcg_op1, rn, pass, MO_64); 9372 read_vec_element(s, tcg_op2, rm, pass, MO_64); 9373 9374 switch (fpopcode) { 9375 case 0x39: /* FMLS */ 9376 /* As usual for ARM, separate negation for fused multiply-add */ 9377 gen_helper_vfp_negd(tcg_op1, tcg_op1); 9378 /* fall through */ 9379 case 0x19: /* FMLA */ 9380 read_vec_element(s, tcg_res, rd, pass, MO_64); 9381 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 9382 tcg_res, fpst); 9383 break; 9384 case 0x18: /* FMAXNM */ 9385 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9386 break; 9387 case 0x1a: /* FADD */ 9388 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 9389 break; 9390 case 0x1b: /* FMULX */ 9391 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 9392 break; 9393 case 0x1c: /* FCMEQ */ 9394 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9395 break; 9396 case 0x1e: /* FMAX */ 9397 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 9398 break; 9399 case 0x1f: /* FRECPS */ 9400 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9401 break; 9402 case 0x38: /* FMINNM */ 9403 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9404 break; 9405 case 0x3a: /* FSUB */ 9406 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9407 break; 9408 case 0x3e: /* FMIN */ 9409 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 9410 break; 9411 case 0x3f: /* FRSQRTS */ 9412 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9413 break; 9414 case 0x5b: /* FMUL */ 9415 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 9416 break; 9417 case 0x5c: /* FCMGE */ 9418 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9419 break; 9420 case 0x5d: /* FACGE */ 9421 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9422 break; 9423 case 0x5f: /* FDIV */ 9424 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 9425 break; 9426 case 0x7a: /* FABD */ 9427 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9428 gen_helper_vfp_absd(tcg_res, tcg_res); 9429 break; 9430 case 0x7c: /* FCMGT */ 9431 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9432 break; 9433 case 0x7d: /* FACGT */ 9434 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9435 break; 9436 default: 9437 g_assert_not_reached(); 9438 } 9439 9440 write_vec_element(s, tcg_res, rd, pass, MO_64); 9441 9442 tcg_temp_free_i64(tcg_res); 9443 tcg_temp_free_i64(tcg_op1); 9444 tcg_temp_free_i64(tcg_op2); 9445 } else { 9446 /* Single */ 9447 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 9448 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 9449 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9450 9451 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 9452 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 9453 9454 switch (fpopcode) { 9455 case 0x39: /* FMLS */ 9456 /* As usual for ARM, separate negation for fused multiply-add */ 9457 gen_helper_vfp_negs(tcg_op1, tcg_op1); 9458 /* fall through */ 9459 case 0x19: /* FMLA */ 9460 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9461 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 9462 tcg_res, fpst); 9463 break; 9464 case 0x1a: /* FADD */ 9465 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 9466 break; 9467 case 0x1b: /* FMULX */ 9468 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 9469 break; 9470 case 0x1c: /* FCMEQ */ 9471 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9472 break; 9473 case 0x1e: /* FMAX */ 9474 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 9475 break; 9476 case 0x1f: /* FRECPS */ 9477 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9478 break; 9479 case 0x18: /* FMAXNM */ 9480 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 9481 break; 9482 case 0x38: /* FMINNM */ 9483 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 9484 break; 9485 case 0x3a: /* FSUB */ 9486 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9487 break; 9488 case 0x3e: /* FMIN */ 9489 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 9490 break; 9491 case 0x3f: /* FRSQRTS */ 9492 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9493 break; 9494 case 0x5b: /* FMUL */ 9495 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 9496 break; 9497 case 0x5c: /* FCMGE */ 9498 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9499 break; 9500 case 0x5d: /* FACGE */ 9501 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9502 break; 9503 case 0x5f: /* FDIV */ 9504 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 9505 break; 9506 case 0x7a: /* FABD */ 9507 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9508 gen_helper_vfp_abss(tcg_res, tcg_res); 9509 break; 9510 case 0x7c: /* FCMGT */ 9511 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9512 break; 9513 case 0x7d: /* FACGT */ 9514 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9515 break; 9516 default: 9517 g_assert_not_reached(); 9518 } 9519 9520 if (elements == 1) { 9521 /* scalar single so clear high part */ 9522 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9523 9524 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 9525 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 9526 tcg_temp_free_i64(tcg_tmp); 9527 } else { 9528 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9529 } 9530 9531 tcg_temp_free_i32(tcg_res); 9532 tcg_temp_free_i32(tcg_op1); 9533 tcg_temp_free_i32(tcg_op2); 9534 } 9535 } 9536 9537 tcg_temp_free_ptr(fpst); 9538 9539 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 9540 } 9541 9542 /* AdvSIMD scalar three same 9543 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 9544 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9545 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 9546 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9547 */ 9548 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 9549 { 9550 int rd = extract32(insn, 0, 5); 9551 int rn = extract32(insn, 5, 5); 9552 int opcode = extract32(insn, 11, 5); 9553 int rm = extract32(insn, 16, 5); 9554 int size = extract32(insn, 22, 2); 9555 bool u = extract32(insn, 29, 1); 9556 TCGv_i64 tcg_rd; 9557 9558 if (opcode >= 0x18) { 9559 /* Floating point: U, size[1] and opcode indicate operation */ 9560 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 9561 switch (fpopcode) { 9562 case 0x1b: /* FMULX */ 9563 case 0x1f: /* FRECPS */ 9564 case 0x3f: /* FRSQRTS */ 9565 case 0x5d: /* FACGE */ 9566 case 0x7d: /* FACGT */ 9567 case 0x1c: /* FCMEQ */ 9568 case 0x5c: /* FCMGE */ 9569 case 0x7c: /* FCMGT */ 9570 case 0x7a: /* FABD */ 9571 break; 9572 default: 9573 unallocated_encoding(s); 9574 return; 9575 } 9576 9577 if (!fp_access_check(s)) { 9578 return; 9579 } 9580 9581 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 9582 return; 9583 } 9584 9585 switch (opcode) { 9586 case 0x1: /* SQADD, UQADD */ 9587 case 0x5: /* SQSUB, UQSUB */ 9588 case 0x9: /* SQSHL, UQSHL */ 9589 case 0xb: /* SQRSHL, UQRSHL */ 9590 break; 9591 case 0x8: /* SSHL, USHL */ 9592 case 0xa: /* SRSHL, URSHL */ 9593 case 0x6: /* CMGT, CMHI */ 9594 case 0x7: /* CMGE, CMHS */ 9595 case 0x11: /* CMTST, CMEQ */ 9596 case 0x10: /* ADD, SUB (vector) */ 9597 if (size != 3) { 9598 unallocated_encoding(s); 9599 return; 9600 } 9601 break; 9602 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 9603 if (size != 1 && size != 2) { 9604 unallocated_encoding(s); 9605 return; 9606 } 9607 break; 9608 default: 9609 unallocated_encoding(s); 9610 return; 9611 } 9612 9613 if (!fp_access_check(s)) { 9614 return; 9615 } 9616 9617 tcg_rd = tcg_temp_new_i64(); 9618 9619 if (size == 3) { 9620 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9621 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 9622 9623 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 9624 tcg_temp_free_i64(tcg_rn); 9625 tcg_temp_free_i64(tcg_rm); 9626 } else { 9627 /* Do a single operation on the lowest element in the vector. 9628 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 9629 * no side effects for all these operations. 9630 * OPTME: special-purpose helpers would avoid doing some 9631 * unnecessary work in the helper for the 8 and 16 bit cases. 9632 */ 9633 NeonGenTwoOpEnvFn *genenvfn; 9634 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9635 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9636 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9637 9638 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9639 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9640 9641 switch (opcode) { 9642 case 0x1: /* SQADD, UQADD */ 9643 { 9644 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9645 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9646 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9647 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9648 }; 9649 genenvfn = fns[size][u]; 9650 break; 9651 } 9652 case 0x5: /* SQSUB, UQSUB */ 9653 { 9654 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9655 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9656 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9657 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9658 }; 9659 genenvfn = fns[size][u]; 9660 break; 9661 } 9662 case 0x9: /* SQSHL, UQSHL */ 9663 { 9664 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9665 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9666 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9667 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9668 }; 9669 genenvfn = fns[size][u]; 9670 break; 9671 } 9672 case 0xb: /* SQRSHL, UQRSHL */ 9673 { 9674 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9675 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9676 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9677 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9678 }; 9679 genenvfn = fns[size][u]; 9680 break; 9681 } 9682 case 0x16: /* SQDMULH, SQRDMULH */ 9683 { 9684 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9685 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9686 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9687 }; 9688 assert(size == 1 || size == 2); 9689 genenvfn = fns[size - 1][u]; 9690 break; 9691 } 9692 default: 9693 g_assert_not_reached(); 9694 } 9695 9696 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); 9697 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9698 tcg_temp_free_i32(tcg_rd32); 9699 tcg_temp_free_i32(tcg_rn); 9700 tcg_temp_free_i32(tcg_rm); 9701 } 9702 9703 write_fp_dreg(s, rd, tcg_rd); 9704 9705 tcg_temp_free_i64(tcg_rd); 9706 } 9707 9708 /* AdvSIMD scalar three same FP16 9709 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9710 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9711 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9712 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9713 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9714 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9715 */ 9716 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9717 uint32_t insn) 9718 { 9719 int rd = extract32(insn, 0, 5); 9720 int rn = extract32(insn, 5, 5); 9721 int opcode = extract32(insn, 11, 3); 9722 int rm = extract32(insn, 16, 5); 9723 bool u = extract32(insn, 29, 1); 9724 bool a = extract32(insn, 23, 1); 9725 int fpopcode = opcode | (a << 3) | (u << 4); 9726 TCGv_ptr fpst; 9727 TCGv_i32 tcg_op1; 9728 TCGv_i32 tcg_op2; 9729 TCGv_i32 tcg_res; 9730 9731 switch (fpopcode) { 9732 case 0x03: /* FMULX */ 9733 case 0x04: /* FCMEQ (reg) */ 9734 case 0x07: /* FRECPS */ 9735 case 0x0f: /* FRSQRTS */ 9736 case 0x14: /* FCMGE (reg) */ 9737 case 0x15: /* FACGE */ 9738 case 0x1a: /* FABD */ 9739 case 0x1c: /* FCMGT (reg) */ 9740 case 0x1d: /* FACGT */ 9741 break; 9742 default: 9743 unallocated_encoding(s); 9744 return; 9745 } 9746 9747 if (!dc_isar_feature(aa64_fp16, s)) { 9748 unallocated_encoding(s); 9749 } 9750 9751 if (!fp_access_check(s)) { 9752 return; 9753 } 9754 9755 fpst = fpstatus_ptr(FPST_FPCR_F16); 9756 9757 tcg_op1 = read_fp_hreg(s, rn); 9758 tcg_op2 = read_fp_hreg(s, rm); 9759 tcg_res = tcg_temp_new_i32(); 9760 9761 switch (fpopcode) { 9762 case 0x03: /* FMULX */ 9763 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9764 break; 9765 case 0x04: /* FCMEQ (reg) */ 9766 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9767 break; 9768 case 0x07: /* FRECPS */ 9769 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9770 break; 9771 case 0x0f: /* FRSQRTS */ 9772 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9773 break; 9774 case 0x14: /* FCMGE (reg) */ 9775 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9776 break; 9777 case 0x15: /* FACGE */ 9778 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9779 break; 9780 case 0x1a: /* FABD */ 9781 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9782 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9783 break; 9784 case 0x1c: /* FCMGT (reg) */ 9785 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9786 break; 9787 case 0x1d: /* FACGT */ 9788 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9789 break; 9790 default: 9791 g_assert_not_reached(); 9792 } 9793 9794 write_fp_sreg(s, rd, tcg_res); 9795 9796 9797 tcg_temp_free_i32(tcg_res); 9798 tcg_temp_free_i32(tcg_op1); 9799 tcg_temp_free_i32(tcg_op2); 9800 tcg_temp_free_ptr(fpst); 9801 } 9802 9803 /* AdvSIMD scalar three same extra 9804 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9805 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9806 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9807 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9808 */ 9809 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9810 uint32_t insn) 9811 { 9812 int rd = extract32(insn, 0, 5); 9813 int rn = extract32(insn, 5, 5); 9814 int opcode = extract32(insn, 11, 4); 9815 int rm = extract32(insn, 16, 5); 9816 int size = extract32(insn, 22, 2); 9817 bool u = extract32(insn, 29, 1); 9818 TCGv_i32 ele1, ele2, ele3; 9819 TCGv_i64 res; 9820 bool feature; 9821 9822 switch (u * 16 + opcode) { 9823 case 0x10: /* SQRDMLAH (vector) */ 9824 case 0x11: /* SQRDMLSH (vector) */ 9825 if (size != 1 && size != 2) { 9826 unallocated_encoding(s); 9827 return; 9828 } 9829 feature = dc_isar_feature(aa64_rdm, s); 9830 break; 9831 default: 9832 unallocated_encoding(s); 9833 return; 9834 } 9835 if (!feature) { 9836 unallocated_encoding(s); 9837 return; 9838 } 9839 if (!fp_access_check(s)) { 9840 return; 9841 } 9842 9843 /* Do a single operation on the lowest element in the vector. 9844 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9845 * with no side effects for all these operations. 9846 * OPTME: special-purpose helpers would avoid doing some 9847 * unnecessary work in the helper for the 16 bit cases. 9848 */ 9849 ele1 = tcg_temp_new_i32(); 9850 ele2 = tcg_temp_new_i32(); 9851 ele3 = tcg_temp_new_i32(); 9852 9853 read_vec_element_i32(s, ele1, rn, 0, size); 9854 read_vec_element_i32(s, ele2, rm, 0, size); 9855 read_vec_element_i32(s, ele3, rd, 0, size); 9856 9857 switch (opcode) { 9858 case 0x0: /* SQRDMLAH */ 9859 if (size == 1) { 9860 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3); 9861 } else { 9862 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3); 9863 } 9864 break; 9865 case 0x1: /* SQRDMLSH */ 9866 if (size == 1) { 9867 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3); 9868 } else { 9869 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3); 9870 } 9871 break; 9872 default: 9873 g_assert_not_reached(); 9874 } 9875 tcg_temp_free_i32(ele1); 9876 tcg_temp_free_i32(ele2); 9877 9878 res = tcg_temp_new_i64(); 9879 tcg_gen_extu_i32_i64(res, ele3); 9880 tcg_temp_free_i32(ele3); 9881 9882 write_fp_dreg(s, rd, res); 9883 tcg_temp_free_i64(res); 9884 } 9885 9886 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9887 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9888 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9889 { 9890 /* Handle 64->64 opcodes which are shared between the scalar and 9891 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9892 * is valid in either group and also the double-precision fp ops. 9893 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9894 * requires them. 9895 */ 9896 TCGCond cond; 9897 9898 switch (opcode) { 9899 case 0x4: /* CLS, CLZ */ 9900 if (u) { 9901 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9902 } else { 9903 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9904 } 9905 break; 9906 case 0x5: /* NOT */ 9907 /* This opcode is shared with CNT and RBIT but we have earlier 9908 * enforced that size == 3 if and only if this is the NOT insn. 9909 */ 9910 tcg_gen_not_i64(tcg_rd, tcg_rn); 9911 break; 9912 case 0x7: /* SQABS, SQNEG */ 9913 if (u) { 9914 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); 9915 } else { 9916 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); 9917 } 9918 break; 9919 case 0xa: /* CMLT */ 9920 /* 64 bit integer comparison against zero, result is 9921 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and 9922 * subtracting 1. 9923 */ 9924 cond = TCG_COND_LT; 9925 do_cmop: 9926 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); 9927 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9928 break; 9929 case 0x8: /* CMGT, CMGE */ 9930 cond = u ? TCG_COND_GE : TCG_COND_GT; 9931 goto do_cmop; 9932 case 0x9: /* CMEQ, CMLE */ 9933 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9934 goto do_cmop; 9935 case 0xb: /* ABS, NEG */ 9936 if (u) { 9937 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9938 } else { 9939 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9940 } 9941 break; 9942 case 0x2f: /* FABS */ 9943 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9944 break; 9945 case 0x6f: /* FNEG */ 9946 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9947 break; 9948 case 0x7f: /* FSQRT */ 9949 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); 9950 break; 9951 case 0x1a: /* FCVTNS */ 9952 case 0x1b: /* FCVTMS */ 9953 case 0x1c: /* FCVTAS */ 9954 case 0x3a: /* FCVTPS */ 9955 case 0x3b: /* FCVTZS */ 9956 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9957 break; 9958 case 0x5a: /* FCVTNU */ 9959 case 0x5b: /* FCVTMU */ 9960 case 0x5c: /* FCVTAU */ 9961 case 0x7a: /* FCVTPU */ 9962 case 0x7b: /* FCVTZU */ 9963 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9964 break; 9965 case 0x18: /* FRINTN */ 9966 case 0x19: /* FRINTM */ 9967 case 0x38: /* FRINTP */ 9968 case 0x39: /* FRINTZ */ 9969 case 0x58: /* FRINTA */ 9970 case 0x79: /* FRINTI */ 9971 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 9972 break; 9973 case 0x59: /* FRINTX */ 9974 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 9975 break; 9976 case 0x1e: /* FRINT32Z */ 9977 case 0x5e: /* FRINT32X */ 9978 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 9979 break; 9980 case 0x1f: /* FRINT64Z */ 9981 case 0x5f: /* FRINT64X */ 9982 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 9983 break; 9984 default: 9985 g_assert_not_reached(); 9986 } 9987 } 9988 9989 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 9990 bool is_scalar, bool is_u, bool is_q, 9991 int size, int rn, int rd) 9992 { 9993 bool is_double = (size == MO_64); 9994 TCGv_ptr fpst; 9995 9996 if (!fp_access_check(s)) { 9997 return; 9998 } 9999 10000 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 10001 10002 if (is_double) { 10003 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10004 TCGv_i64 tcg_zero = tcg_constant_i64(0); 10005 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10006 NeonGenTwoDoubleOpFn *genfn; 10007 bool swap = false; 10008 int pass; 10009 10010 switch (opcode) { 10011 case 0x2e: /* FCMLT (zero) */ 10012 swap = true; 10013 /* fallthrough */ 10014 case 0x2c: /* FCMGT (zero) */ 10015 genfn = gen_helper_neon_cgt_f64; 10016 break; 10017 case 0x2d: /* FCMEQ (zero) */ 10018 genfn = gen_helper_neon_ceq_f64; 10019 break; 10020 case 0x6d: /* FCMLE (zero) */ 10021 swap = true; 10022 /* fall through */ 10023 case 0x6c: /* FCMGE (zero) */ 10024 genfn = gen_helper_neon_cge_f64; 10025 break; 10026 default: 10027 g_assert_not_reached(); 10028 } 10029 10030 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10031 read_vec_element(s, tcg_op, rn, pass, MO_64); 10032 if (swap) { 10033 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10034 } else { 10035 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10036 } 10037 write_vec_element(s, tcg_res, rd, pass, MO_64); 10038 } 10039 tcg_temp_free_i64(tcg_res); 10040 tcg_temp_free_i64(tcg_op); 10041 10042 clear_vec_high(s, !is_scalar, rd); 10043 } else { 10044 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10045 TCGv_i32 tcg_zero = tcg_constant_i32(0); 10046 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10047 NeonGenTwoSingleOpFn *genfn; 10048 bool swap = false; 10049 int pass, maxpasses; 10050 10051 if (size == MO_16) { 10052 switch (opcode) { 10053 case 0x2e: /* FCMLT (zero) */ 10054 swap = true; 10055 /* fall through */ 10056 case 0x2c: /* FCMGT (zero) */ 10057 genfn = gen_helper_advsimd_cgt_f16; 10058 break; 10059 case 0x2d: /* FCMEQ (zero) */ 10060 genfn = gen_helper_advsimd_ceq_f16; 10061 break; 10062 case 0x6d: /* FCMLE (zero) */ 10063 swap = true; 10064 /* fall through */ 10065 case 0x6c: /* FCMGE (zero) */ 10066 genfn = gen_helper_advsimd_cge_f16; 10067 break; 10068 default: 10069 g_assert_not_reached(); 10070 } 10071 } else { 10072 switch (opcode) { 10073 case 0x2e: /* FCMLT (zero) */ 10074 swap = true; 10075 /* fall through */ 10076 case 0x2c: /* FCMGT (zero) */ 10077 genfn = gen_helper_neon_cgt_f32; 10078 break; 10079 case 0x2d: /* FCMEQ (zero) */ 10080 genfn = gen_helper_neon_ceq_f32; 10081 break; 10082 case 0x6d: /* FCMLE (zero) */ 10083 swap = true; 10084 /* fall through */ 10085 case 0x6c: /* FCMGE (zero) */ 10086 genfn = gen_helper_neon_cge_f32; 10087 break; 10088 default: 10089 g_assert_not_reached(); 10090 } 10091 } 10092 10093 if (is_scalar) { 10094 maxpasses = 1; 10095 } else { 10096 int vector_size = 8 << is_q; 10097 maxpasses = vector_size >> size; 10098 } 10099 10100 for (pass = 0; pass < maxpasses; pass++) { 10101 read_vec_element_i32(s, tcg_op, rn, pass, size); 10102 if (swap) { 10103 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10104 } else { 10105 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10106 } 10107 if (is_scalar) { 10108 write_fp_sreg(s, rd, tcg_res); 10109 } else { 10110 write_vec_element_i32(s, tcg_res, rd, pass, size); 10111 } 10112 } 10113 tcg_temp_free_i32(tcg_res); 10114 tcg_temp_free_i32(tcg_op); 10115 if (!is_scalar) { 10116 clear_vec_high(s, is_q, rd); 10117 } 10118 } 10119 10120 tcg_temp_free_ptr(fpst); 10121 } 10122 10123 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 10124 bool is_scalar, bool is_u, bool is_q, 10125 int size, int rn, int rd) 10126 { 10127 bool is_double = (size == 3); 10128 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10129 10130 if (is_double) { 10131 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10132 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10133 int pass; 10134 10135 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10136 read_vec_element(s, tcg_op, rn, pass, MO_64); 10137 switch (opcode) { 10138 case 0x3d: /* FRECPE */ 10139 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 10140 break; 10141 case 0x3f: /* FRECPX */ 10142 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 10143 break; 10144 case 0x7d: /* FRSQRTE */ 10145 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 10146 break; 10147 default: 10148 g_assert_not_reached(); 10149 } 10150 write_vec_element(s, tcg_res, rd, pass, MO_64); 10151 } 10152 tcg_temp_free_i64(tcg_res); 10153 tcg_temp_free_i64(tcg_op); 10154 clear_vec_high(s, !is_scalar, rd); 10155 } else { 10156 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10157 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10158 int pass, maxpasses; 10159 10160 if (is_scalar) { 10161 maxpasses = 1; 10162 } else { 10163 maxpasses = is_q ? 4 : 2; 10164 } 10165 10166 for (pass = 0; pass < maxpasses; pass++) { 10167 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 10168 10169 switch (opcode) { 10170 case 0x3c: /* URECPE */ 10171 gen_helper_recpe_u32(tcg_res, tcg_op); 10172 break; 10173 case 0x3d: /* FRECPE */ 10174 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 10175 break; 10176 case 0x3f: /* FRECPX */ 10177 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 10178 break; 10179 case 0x7d: /* FRSQRTE */ 10180 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 10181 break; 10182 default: 10183 g_assert_not_reached(); 10184 } 10185 10186 if (is_scalar) { 10187 write_fp_sreg(s, rd, tcg_res); 10188 } else { 10189 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 10190 } 10191 } 10192 tcg_temp_free_i32(tcg_res); 10193 tcg_temp_free_i32(tcg_op); 10194 if (!is_scalar) { 10195 clear_vec_high(s, is_q, rd); 10196 } 10197 } 10198 tcg_temp_free_ptr(fpst); 10199 } 10200 10201 static void handle_2misc_narrow(DisasContext *s, bool scalar, 10202 int opcode, bool u, bool is_q, 10203 int size, int rn, int rd) 10204 { 10205 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 10206 * in the source becomes a size element in the destination). 10207 */ 10208 int pass; 10209 TCGv_i32 tcg_res[2]; 10210 int destelt = is_q ? 2 : 0; 10211 int passes = scalar ? 1 : 2; 10212 10213 if (scalar) { 10214 tcg_res[1] = tcg_constant_i32(0); 10215 } 10216 10217 for (pass = 0; pass < passes; pass++) { 10218 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10219 NeonGenNarrowFn *genfn = NULL; 10220 NeonGenNarrowEnvFn *genenvfn = NULL; 10221 10222 if (scalar) { 10223 read_vec_element(s, tcg_op, rn, pass, size + 1); 10224 } else { 10225 read_vec_element(s, tcg_op, rn, pass, MO_64); 10226 } 10227 tcg_res[pass] = tcg_temp_new_i32(); 10228 10229 switch (opcode) { 10230 case 0x12: /* XTN, SQXTUN */ 10231 { 10232 static NeonGenNarrowFn * const xtnfns[3] = { 10233 gen_helper_neon_narrow_u8, 10234 gen_helper_neon_narrow_u16, 10235 tcg_gen_extrl_i64_i32, 10236 }; 10237 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 10238 gen_helper_neon_unarrow_sat8, 10239 gen_helper_neon_unarrow_sat16, 10240 gen_helper_neon_unarrow_sat32, 10241 }; 10242 if (u) { 10243 genenvfn = sqxtunfns[size]; 10244 } else { 10245 genfn = xtnfns[size]; 10246 } 10247 break; 10248 } 10249 case 0x14: /* SQXTN, UQXTN */ 10250 { 10251 static NeonGenNarrowEnvFn * const fns[3][2] = { 10252 { gen_helper_neon_narrow_sat_s8, 10253 gen_helper_neon_narrow_sat_u8 }, 10254 { gen_helper_neon_narrow_sat_s16, 10255 gen_helper_neon_narrow_sat_u16 }, 10256 { gen_helper_neon_narrow_sat_s32, 10257 gen_helper_neon_narrow_sat_u32 }, 10258 }; 10259 genenvfn = fns[size][u]; 10260 break; 10261 } 10262 case 0x16: /* FCVTN, FCVTN2 */ 10263 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 10264 if (size == 2) { 10265 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); 10266 } else { 10267 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10268 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10269 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10270 TCGv_i32 ahp = get_ahp_flag(); 10271 10272 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 10273 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10274 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10275 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 10276 tcg_temp_free_i32(tcg_lo); 10277 tcg_temp_free_i32(tcg_hi); 10278 tcg_temp_free_ptr(fpst); 10279 tcg_temp_free_i32(ahp); 10280 } 10281 break; 10282 case 0x36: /* BFCVTN, BFCVTN2 */ 10283 { 10284 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10285 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 10286 tcg_temp_free_ptr(fpst); 10287 } 10288 break; 10289 case 0x56: /* FCVTXN, FCVTXN2 */ 10290 /* 64 bit to 32 bit float conversion 10291 * with von Neumann rounding (round to odd) 10292 */ 10293 assert(size == 2); 10294 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); 10295 break; 10296 default: 10297 g_assert_not_reached(); 10298 } 10299 10300 if (genfn) { 10301 genfn(tcg_res[pass], tcg_op); 10302 } else if (genenvfn) { 10303 genenvfn(tcg_res[pass], cpu_env, tcg_op); 10304 } 10305 10306 tcg_temp_free_i64(tcg_op); 10307 } 10308 10309 for (pass = 0; pass < 2; pass++) { 10310 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 10311 tcg_temp_free_i32(tcg_res[pass]); 10312 } 10313 clear_vec_high(s, is_q, rd); 10314 } 10315 10316 /* Remaining saturating accumulating ops */ 10317 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 10318 bool is_q, int size, int rn, int rd) 10319 { 10320 bool is_double = (size == 3); 10321 10322 if (is_double) { 10323 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10324 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10325 int pass; 10326 10327 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10328 read_vec_element(s, tcg_rn, rn, pass, MO_64); 10329 read_vec_element(s, tcg_rd, rd, pass, MO_64); 10330 10331 if (is_u) { /* USQADD */ 10332 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10333 } else { /* SUQADD */ 10334 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10335 } 10336 write_vec_element(s, tcg_rd, rd, pass, MO_64); 10337 } 10338 tcg_temp_free_i64(tcg_rd); 10339 tcg_temp_free_i64(tcg_rn); 10340 clear_vec_high(s, !is_scalar, rd); 10341 } else { 10342 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10343 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10344 int pass, maxpasses; 10345 10346 if (is_scalar) { 10347 maxpasses = 1; 10348 } else { 10349 maxpasses = is_q ? 4 : 2; 10350 } 10351 10352 for (pass = 0; pass < maxpasses; pass++) { 10353 if (is_scalar) { 10354 read_vec_element_i32(s, tcg_rn, rn, pass, size); 10355 read_vec_element_i32(s, tcg_rd, rd, pass, size); 10356 } else { 10357 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 10358 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10359 } 10360 10361 if (is_u) { /* USQADD */ 10362 switch (size) { 10363 case 0: 10364 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10365 break; 10366 case 1: 10367 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10368 break; 10369 case 2: 10370 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10371 break; 10372 default: 10373 g_assert_not_reached(); 10374 } 10375 } else { /* SUQADD */ 10376 switch (size) { 10377 case 0: 10378 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10379 break; 10380 case 1: 10381 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10382 break; 10383 case 2: 10384 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10385 break; 10386 default: 10387 g_assert_not_reached(); 10388 } 10389 } 10390 10391 if (is_scalar) { 10392 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 10393 } 10394 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10395 } 10396 tcg_temp_free_i32(tcg_rd); 10397 tcg_temp_free_i32(tcg_rn); 10398 clear_vec_high(s, is_q, rd); 10399 } 10400 } 10401 10402 /* AdvSIMD scalar two reg misc 10403 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10404 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10405 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10406 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10407 */ 10408 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10409 { 10410 int rd = extract32(insn, 0, 5); 10411 int rn = extract32(insn, 5, 5); 10412 int opcode = extract32(insn, 12, 5); 10413 int size = extract32(insn, 22, 2); 10414 bool u = extract32(insn, 29, 1); 10415 bool is_fcvt = false; 10416 int rmode; 10417 TCGv_i32 tcg_rmode; 10418 TCGv_ptr tcg_fpstatus; 10419 10420 switch (opcode) { 10421 case 0x3: /* USQADD / SUQADD*/ 10422 if (!fp_access_check(s)) { 10423 return; 10424 } 10425 handle_2misc_satacc(s, true, u, false, size, rn, rd); 10426 return; 10427 case 0x7: /* SQABS / SQNEG */ 10428 break; 10429 case 0xa: /* CMLT */ 10430 if (u) { 10431 unallocated_encoding(s); 10432 return; 10433 } 10434 /* fall through */ 10435 case 0x8: /* CMGT, CMGE */ 10436 case 0x9: /* CMEQ, CMLE */ 10437 case 0xb: /* ABS, NEG */ 10438 if (size != 3) { 10439 unallocated_encoding(s); 10440 return; 10441 } 10442 break; 10443 case 0x12: /* SQXTUN */ 10444 if (!u) { 10445 unallocated_encoding(s); 10446 return; 10447 } 10448 /* fall through */ 10449 case 0x14: /* SQXTN, UQXTN */ 10450 if (size == 3) { 10451 unallocated_encoding(s); 10452 return; 10453 } 10454 if (!fp_access_check(s)) { 10455 return; 10456 } 10457 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10458 return; 10459 case 0xc ... 0xf: 10460 case 0x16 ... 0x1d: 10461 case 0x1f: 10462 /* Floating point: U, size[1] and opcode indicate operation; 10463 * size[0] indicates single or double precision. 10464 */ 10465 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10466 size = extract32(size, 0, 1) ? 3 : 2; 10467 switch (opcode) { 10468 case 0x2c: /* FCMGT (zero) */ 10469 case 0x2d: /* FCMEQ (zero) */ 10470 case 0x2e: /* FCMLT (zero) */ 10471 case 0x6c: /* FCMGE (zero) */ 10472 case 0x6d: /* FCMLE (zero) */ 10473 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10474 return; 10475 case 0x1d: /* SCVTF */ 10476 case 0x5d: /* UCVTF */ 10477 { 10478 bool is_signed = (opcode == 0x1d); 10479 if (!fp_access_check(s)) { 10480 return; 10481 } 10482 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10483 return; 10484 } 10485 case 0x3d: /* FRECPE */ 10486 case 0x3f: /* FRECPX */ 10487 case 0x7d: /* FRSQRTE */ 10488 if (!fp_access_check(s)) { 10489 return; 10490 } 10491 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10492 return; 10493 case 0x1a: /* FCVTNS */ 10494 case 0x1b: /* FCVTMS */ 10495 case 0x3a: /* FCVTPS */ 10496 case 0x3b: /* FCVTZS */ 10497 case 0x5a: /* FCVTNU */ 10498 case 0x5b: /* FCVTMU */ 10499 case 0x7a: /* FCVTPU */ 10500 case 0x7b: /* FCVTZU */ 10501 is_fcvt = true; 10502 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10503 break; 10504 case 0x1c: /* FCVTAS */ 10505 case 0x5c: /* FCVTAU */ 10506 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10507 is_fcvt = true; 10508 rmode = FPROUNDING_TIEAWAY; 10509 break; 10510 case 0x56: /* FCVTXN, FCVTXN2 */ 10511 if (size == 2) { 10512 unallocated_encoding(s); 10513 return; 10514 } 10515 if (!fp_access_check(s)) { 10516 return; 10517 } 10518 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10519 return; 10520 default: 10521 unallocated_encoding(s); 10522 return; 10523 } 10524 break; 10525 default: 10526 unallocated_encoding(s); 10527 return; 10528 } 10529 10530 if (!fp_access_check(s)) { 10531 return; 10532 } 10533 10534 if (is_fcvt) { 10535 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 10536 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10537 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 10538 } else { 10539 tcg_rmode = NULL; 10540 tcg_fpstatus = NULL; 10541 } 10542 10543 if (size == 3) { 10544 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10545 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10546 10547 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10548 write_fp_dreg(s, rd, tcg_rd); 10549 tcg_temp_free_i64(tcg_rd); 10550 tcg_temp_free_i64(tcg_rn); 10551 } else { 10552 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10553 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10554 10555 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10556 10557 switch (opcode) { 10558 case 0x7: /* SQABS, SQNEG */ 10559 { 10560 NeonGenOneOpEnvFn *genfn; 10561 static NeonGenOneOpEnvFn * const fns[3][2] = { 10562 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10563 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10564 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10565 }; 10566 genfn = fns[size][u]; 10567 genfn(tcg_rd, cpu_env, tcg_rn); 10568 break; 10569 } 10570 case 0x1a: /* FCVTNS */ 10571 case 0x1b: /* FCVTMS */ 10572 case 0x1c: /* FCVTAS */ 10573 case 0x3a: /* FCVTPS */ 10574 case 0x3b: /* FCVTZS */ 10575 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10576 tcg_fpstatus); 10577 break; 10578 case 0x5a: /* FCVTNU */ 10579 case 0x5b: /* FCVTMU */ 10580 case 0x5c: /* FCVTAU */ 10581 case 0x7a: /* FCVTPU */ 10582 case 0x7b: /* FCVTZU */ 10583 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10584 tcg_fpstatus); 10585 break; 10586 default: 10587 g_assert_not_reached(); 10588 } 10589 10590 write_fp_sreg(s, rd, tcg_rd); 10591 tcg_temp_free_i32(tcg_rd); 10592 tcg_temp_free_i32(tcg_rn); 10593 } 10594 10595 if (is_fcvt) { 10596 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 10597 tcg_temp_free_i32(tcg_rmode); 10598 tcg_temp_free_ptr(tcg_fpstatus); 10599 } 10600 } 10601 10602 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10603 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10604 int immh, int immb, int opcode, int rn, int rd) 10605 { 10606 int size = 32 - clz32(immh) - 1; 10607 int immhb = immh << 3 | immb; 10608 int shift = 2 * (8 << size) - immhb; 10609 GVecGen2iFn *gvec_fn; 10610 10611 if (extract32(immh, 3, 1) && !is_q) { 10612 unallocated_encoding(s); 10613 return; 10614 } 10615 tcg_debug_assert(size <= 3); 10616 10617 if (!fp_access_check(s)) { 10618 return; 10619 } 10620 10621 switch (opcode) { 10622 case 0x02: /* SSRA / USRA (accumulate) */ 10623 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10624 break; 10625 10626 case 0x08: /* SRI */ 10627 gvec_fn = gen_gvec_sri; 10628 break; 10629 10630 case 0x00: /* SSHR / USHR */ 10631 if (is_u) { 10632 if (shift == 8 << size) { 10633 /* Shift count the same size as element size produces zero. */ 10634 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10635 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10636 return; 10637 } 10638 gvec_fn = tcg_gen_gvec_shri; 10639 } else { 10640 /* Shift count the same size as element size produces all sign. */ 10641 if (shift == 8 << size) { 10642 shift -= 1; 10643 } 10644 gvec_fn = tcg_gen_gvec_sari; 10645 } 10646 break; 10647 10648 case 0x04: /* SRSHR / URSHR (rounding) */ 10649 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10650 break; 10651 10652 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10653 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10654 break; 10655 10656 default: 10657 g_assert_not_reached(); 10658 } 10659 10660 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10661 } 10662 10663 /* SHL/SLI - Vector shift left */ 10664 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10665 int immh, int immb, int opcode, int rn, int rd) 10666 { 10667 int size = 32 - clz32(immh) - 1; 10668 int immhb = immh << 3 | immb; 10669 int shift = immhb - (8 << size); 10670 10671 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10672 assert(size >= 0 && size <= 3); 10673 10674 if (extract32(immh, 3, 1) && !is_q) { 10675 unallocated_encoding(s); 10676 return; 10677 } 10678 10679 if (!fp_access_check(s)) { 10680 return; 10681 } 10682 10683 if (insert) { 10684 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10685 } else { 10686 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10687 } 10688 } 10689 10690 /* USHLL/SHLL - Vector shift left with widening */ 10691 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10692 int immh, int immb, int opcode, int rn, int rd) 10693 { 10694 int size = 32 - clz32(immh) - 1; 10695 int immhb = immh << 3 | immb; 10696 int shift = immhb - (8 << size); 10697 int dsize = 64; 10698 int esize = 8 << size; 10699 int elements = dsize/esize; 10700 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10701 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10702 int i; 10703 10704 if (size >= 3) { 10705 unallocated_encoding(s); 10706 return; 10707 } 10708 10709 if (!fp_access_check(s)) { 10710 return; 10711 } 10712 10713 /* For the LL variants the store is larger than the load, 10714 * so if rd == rn we would overwrite parts of our input. 10715 * So load everything right now and use shifts in the main loop. 10716 */ 10717 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10718 10719 for (i = 0; i < elements; i++) { 10720 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10721 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10722 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10723 write_vec_element(s, tcg_rd, rd, i, size + 1); 10724 } 10725 } 10726 10727 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10728 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10729 int immh, int immb, int opcode, int rn, int rd) 10730 { 10731 int immhb = immh << 3 | immb; 10732 int size = 32 - clz32(immh) - 1; 10733 int dsize = 64; 10734 int esize = 8 << size; 10735 int elements = dsize/esize; 10736 int shift = (2 * esize) - immhb; 10737 bool round = extract32(opcode, 0, 1); 10738 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10739 TCGv_i64 tcg_round; 10740 int i; 10741 10742 if (extract32(immh, 3, 1)) { 10743 unallocated_encoding(s); 10744 return; 10745 } 10746 10747 if (!fp_access_check(s)) { 10748 return; 10749 } 10750 10751 tcg_rn = tcg_temp_new_i64(); 10752 tcg_rd = tcg_temp_new_i64(); 10753 tcg_final = tcg_temp_new_i64(); 10754 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10755 10756 if (round) { 10757 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10758 } else { 10759 tcg_round = NULL; 10760 } 10761 10762 for (i = 0; i < elements; i++) { 10763 read_vec_element(s, tcg_rn, rn, i, size+1); 10764 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10765 false, true, size+1, shift); 10766 10767 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10768 } 10769 10770 if (!is_q) { 10771 write_vec_element(s, tcg_final, rd, 0, MO_64); 10772 } else { 10773 write_vec_element(s, tcg_final, rd, 1, MO_64); 10774 } 10775 tcg_temp_free_i64(tcg_rn); 10776 tcg_temp_free_i64(tcg_rd); 10777 tcg_temp_free_i64(tcg_final); 10778 10779 clear_vec_high(s, is_q, rd); 10780 } 10781 10782 10783 /* AdvSIMD shift by immediate 10784 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10785 * +---+---+---+-------------+------+------+--------+---+------+------+ 10786 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10787 * +---+---+---+-------------+------+------+--------+---+------+------+ 10788 */ 10789 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10790 { 10791 int rd = extract32(insn, 0, 5); 10792 int rn = extract32(insn, 5, 5); 10793 int opcode = extract32(insn, 11, 5); 10794 int immb = extract32(insn, 16, 3); 10795 int immh = extract32(insn, 19, 4); 10796 bool is_u = extract32(insn, 29, 1); 10797 bool is_q = extract32(insn, 30, 1); 10798 10799 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10800 assert(immh != 0); 10801 10802 switch (opcode) { 10803 case 0x08: /* SRI */ 10804 if (!is_u) { 10805 unallocated_encoding(s); 10806 return; 10807 } 10808 /* fall through */ 10809 case 0x00: /* SSHR / USHR */ 10810 case 0x02: /* SSRA / USRA (accumulate) */ 10811 case 0x04: /* SRSHR / URSHR (rounding) */ 10812 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10813 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10814 break; 10815 case 0x0a: /* SHL / SLI */ 10816 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10817 break; 10818 case 0x10: /* SHRN */ 10819 case 0x11: /* RSHRN / SQRSHRUN */ 10820 if (is_u) { 10821 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10822 opcode, rn, rd); 10823 } else { 10824 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10825 } 10826 break; 10827 case 0x12: /* SQSHRN / UQSHRN */ 10828 case 0x13: /* SQRSHRN / UQRSHRN */ 10829 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10830 opcode, rn, rd); 10831 break; 10832 case 0x14: /* SSHLL / USHLL */ 10833 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10834 break; 10835 case 0x1c: /* SCVTF / UCVTF */ 10836 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10837 opcode, rn, rd); 10838 break; 10839 case 0xc: /* SQSHLU */ 10840 if (!is_u) { 10841 unallocated_encoding(s); 10842 return; 10843 } 10844 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10845 break; 10846 case 0xe: /* SQSHL, UQSHL */ 10847 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10848 break; 10849 case 0x1f: /* FCVTZS/ FCVTZU */ 10850 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10851 return; 10852 default: 10853 unallocated_encoding(s); 10854 return; 10855 } 10856 } 10857 10858 /* Generate code to do a "long" addition or subtraction, ie one done in 10859 * TCGv_i64 on vector lanes twice the width specified by size. 10860 */ 10861 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10862 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10863 { 10864 static NeonGenTwo64OpFn * const fns[3][2] = { 10865 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10866 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10867 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10868 }; 10869 NeonGenTwo64OpFn *genfn; 10870 assert(size < 3); 10871 10872 genfn = fns[size][is_sub]; 10873 genfn(tcg_res, tcg_op1, tcg_op2); 10874 } 10875 10876 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10877 int opcode, int rd, int rn, int rm) 10878 { 10879 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10880 TCGv_i64 tcg_res[2]; 10881 int pass, accop; 10882 10883 tcg_res[0] = tcg_temp_new_i64(); 10884 tcg_res[1] = tcg_temp_new_i64(); 10885 10886 /* Does this op do an adding accumulate, a subtracting accumulate, 10887 * or no accumulate at all? 10888 */ 10889 switch (opcode) { 10890 case 5: 10891 case 8: 10892 case 9: 10893 accop = 1; 10894 break; 10895 case 10: 10896 case 11: 10897 accop = -1; 10898 break; 10899 default: 10900 accop = 0; 10901 break; 10902 } 10903 10904 if (accop != 0) { 10905 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10906 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10907 } 10908 10909 /* size == 2 means two 32x32->64 operations; this is worth special 10910 * casing because we can generally handle it inline. 10911 */ 10912 if (size == 2) { 10913 for (pass = 0; pass < 2; pass++) { 10914 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10915 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10916 TCGv_i64 tcg_passres; 10917 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10918 10919 int elt = pass + is_q * 2; 10920 10921 read_vec_element(s, tcg_op1, rn, elt, memop); 10922 read_vec_element(s, tcg_op2, rm, elt, memop); 10923 10924 if (accop == 0) { 10925 tcg_passres = tcg_res[pass]; 10926 } else { 10927 tcg_passres = tcg_temp_new_i64(); 10928 } 10929 10930 switch (opcode) { 10931 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10932 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10933 break; 10934 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10935 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10936 break; 10937 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10938 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10939 { 10940 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10941 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10942 10943 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10944 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10945 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10946 tcg_passres, 10947 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10948 tcg_temp_free_i64(tcg_tmp1); 10949 tcg_temp_free_i64(tcg_tmp2); 10950 break; 10951 } 10952 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10953 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10954 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10955 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10956 break; 10957 case 9: /* SQDMLAL, SQDMLAL2 */ 10958 case 11: /* SQDMLSL, SQDMLSL2 */ 10959 case 13: /* SQDMULL, SQDMULL2 */ 10960 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10961 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 10962 tcg_passres, tcg_passres); 10963 break; 10964 default: 10965 g_assert_not_reached(); 10966 } 10967 10968 if (opcode == 9 || opcode == 11) { 10969 /* saturating accumulate ops */ 10970 if (accop < 0) { 10971 tcg_gen_neg_i64(tcg_passres, tcg_passres); 10972 } 10973 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 10974 tcg_res[pass], tcg_passres); 10975 } else if (accop > 0) { 10976 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10977 } else if (accop < 0) { 10978 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10979 } 10980 10981 if (accop != 0) { 10982 tcg_temp_free_i64(tcg_passres); 10983 } 10984 10985 tcg_temp_free_i64(tcg_op1); 10986 tcg_temp_free_i64(tcg_op2); 10987 } 10988 } else { 10989 /* size 0 or 1, generally helper functions */ 10990 for (pass = 0; pass < 2; pass++) { 10991 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10992 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10993 TCGv_i64 tcg_passres; 10994 int elt = pass + is_q * 2; 10995 10996 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 10997 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 10998 10999 if (accop == 0) { 11000 tcg_passres = tcg_res[pass]; 11001 } else { 11002 tcg_passres = tcg_temp_new_i64(); 11003 } 11004 11005 switch (opcode) { 11006 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 11007 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 11008 { 11009 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 11010 static NeonGenWidenFn * const widenfns[2][2] = { 11011 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 11012 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 11013 }; 11014 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 11015 11016 widenfn(tcg_op2_64, tcg_op2); 11017 widenfn(tcg_passres, tcg_op1); 11018 gen_neon_addl(size, (opcode == 2), tcg_passres, 11019 tcg_passres, tcg_op2_64); 11020 tcg_temp_free_i64(tcg_op2_64); 11021 break; 11022 } 11023 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 11024 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 11025 if (size == 0) { 11026 if (is_u) { 11027 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 11028 } else { 11029 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 11030 } 11031 } else { 11032 if (is_u) { 11033 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 11034 } else { 11035 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 11036 } 11037 } 11038 break; 11039 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 11040 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 11041 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 11042 if (size == 0) { 11043 if (is_u) { 11044 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 11045 } else { 11046 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 11047 } 11048 } else { 11049 if (is_u) { 11050 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 11051 } else { 11052 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 11053 } 11054 } 11055 break; 11056 case 9: /* SQDMLAL, SQDMLAL2 */ 11057 case 11: /* SQDMLSL, SQDMLSL2 */ 11058 case 13: /* SQDMULL, SQDMULL2 */ 11059 assert(size == 1); 11060 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 11061 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 11062 tcg_passres, tcg_passres); 11063 break; 11064 default: 11065 g_assert_not_reached(); 11066 } 11067 tcg_temp_free_i32(tcg_op1); 11068 tcg_temp_free_i32(tcg_op2); 11069 11070 if (accop != 0) { 11071 if (opcode == 9 || opcode == 11) { 11072 /* saturating accumulate ops */ 11073 if (accop < 0) { 11074 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 11075 } 11076 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 11077 tcg_res[pass], 11078 tcg_passres); 11079 } else { 11080 gen_neon_addl(size, (accop < 0), tcg_res[pass], 11081 tcg_res[pass], tcg_passres); 11082 } 11083 tcg_temp_free_i64(tcg_passres); 11084 } 11085 } 11086 } 11087 11088 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 11089 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 11090 tcg_temp_free_i64(tcg_res[0]); 11091 tcg_temp_free_i64(tcg_res[1]); 11092 } 11093 11094 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 11095 int opcode, int rd, int rn, int rm) 11096 { 11097 TCGv_i64 tcg_res[2]; 11098 int part = is_q ? 2 : 0; 11099 int pass; 11100 11101 for (pass = 0; pass < 2; pass++) { 11102 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11103 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11104 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 11105 static NeonGenWidenFn * const widenfns[3][2] = { 11106 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 11107 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 11108 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 11109 }; 11110 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 11111 11112 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11113 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 11114 widenfn(tcg_op2_wide, tcg_op2); 11115 tcg_temp_free_i32(tcg_op2); 11116 tcg_res[pass] = tcg_temp_new_i64(); 11117 gen_neon_addl(size, (opcode == 3), 11118 tcg_res[pass], tcg_op1, tcg_op2_wide); 11119 tcg_temp_free_i64(tcg_op1); 11120 tcg_temp_free_i64(tcg_op2_wide); 11121 } 11122 11123 for (pass = 0; pass < 2; pass++) { 11124 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11125 tcg_temp_free_i64(tcg_res[pass]); 11126 } 11127 } 11128 11129 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 11130 { 11131 tcg_gen_addi_i64(in, in, 1U << 31); 11132 tcg_gen_extrh_i64_i32(res, in); 11133 } 11134 11135 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 11136 int opcode, int rd, int rn, int rm) 11137 { 11138 TCGv_i32 tcg_res[2]; 11139 int part = is_q ? 2 : 0; 11140 int pass; 11141 11142 for (pass = 0; pass < 2; pass++) { 11143 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11144 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11145 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 11146 static NeonGenNarrowFn * const narrowfns[3][2] = { 11147 { gen_helper_neon_narrow_high_u8, 11148 gen_helper_neon_narrow_round_high_u8 }, 11149 { gen_helper_neon_narrow_high_u16, 11150 gen_helper_neon_narrow_round_high_u16 }, 11151 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 11152 }; 11153 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 11154 11155 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11156 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11157 11158 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 11159 11160 tcg_temp_free_i64(tcg_op1); 11161 tcg_temp_free_i64(tcg_op2); 11162 11163 tcg_res[pass] = tcg_temp_new_i32(); 11164 gennarrow(tcg_res[pass], tcg_wideres); 11165 tcg_temp_free_i64(tcg_wideres); 11166 } 11167 11168 for (pass = 0; pass < 2; pass++) { 11169 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 11170 tcg_temp_free_i32(tcg_res[pass]); 11171 } 11172 clear_vec_high(s, is_q, rd); 11173 } 11174 11175 /* AdvSIMD three different 11176 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 11177 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 11178 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 11179 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 11180 */ 11181 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 11182 { 11183 /* Instructions in this group fall into three basic classes 11184 * (in each case with the operation working on each element in 11185 * the input vectors): 11186 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 11187 * 128 bit input) 11188 * (2) wide 64 x 128 -> 128 11189 * (3) narrowing 128 x 128 -> 64 11190 * Here we do initial decode, catch unallocated cases and 11191 * dispatch to separate functions for each class. 11192 */ 11193 int is_q = extract32(insn, 30, 1); 11194 int is_u = extract32(insn, 29, 1); 11195 int size = extract32(insn, 22, 2); 11196 int opcode = extract32(insn, 12, 4); 11197 int rm = extract32(insn, 16, 5); 11198 int rn = extract32(insn, 5, 5); 11199 int rd = extract32(insn, 0, 5); 11200 11201 switch (opcode) { 11202 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 11203 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 11204 /* 64 x 128 -> 128 */ 11205 if (size == 3) { 11206 unallocated_encoding(s); 11207 return; 11208 } 11209 if (!fp_access_check(s)) { 11210 return; 11211 } 11212 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 11213 break; 11214 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 11215 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 11216 /* 128 x 128 -> 64 */ 11217 if (size == 3) { 11218 unallocated_encoding(s); 11219 return; 11220 } 11221 if (!fp_access_check(s)) { 11222 return; 11223 } 11224 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 11225 break; 11226 case 14: /* PMULL, PMULL2 */ 11227 if (is_u) { 11228 unallocated_encoding(s); 11229 return; 11230 } 11231 switch (size) { 11232 case 0: /* PMULL.P8 */ 11233 if (!fp_access_check(s)) { 11234 return; 11235 } 11236 /* The Q field specifies lo/hi half input for this insn. */ 11237 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 11238 gen_helper_neon_pmull_h); 11239 break; 11240 11241 case 3: /* PMULL.P64 */ 11242 if (!dc_isar_feature(aa64_pmull, s)) { 11243 unallocated_encoding(s); 11244 return; 11245 } 11246 if (!fp_access_check(s)) { 11247 return; 11248 } 11249 /* The Q field specifies lo/hi half input for this insn. */ 11250 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 11251 gen_helper_gvec_pmull_q); 11252 break; 11253 11254 default: 11255 unallocated_encoding(s); 11256 break; 11257 } 11258 return; 11259 case 9: /* SQDMLAL, SQDMLAL2 */ 11260 case 11: /* SQDMLSL, SQDMLSL2 */ 11261 case 13: /* SQDMULL, SQDMULL2 */ 11262 if (is_u || size == 0) { 11263 unallocated_encoding(s); 11264 return; 11265 } 11266 /* fall through */ 11267 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 11268 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 11269 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 11270 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 11271 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 11272 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 11273 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 11274 /* 64 x 64 -> 128 */ 11275 if (size == 3) { 11276 unallocated_encoding(s); 11277 return; 11278 } 11279 if (!fp_access_check(s)) { 11280 return; 11281 } 11282 11283 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 11284 break; 11285 default: 11286 /* opcode 15 not allocated */ 11287 unallocated_encoding(s); 11288 break; 11289 } 11290 } 11291 11292 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 11293 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 11294 { 11295 int rd = extract32(insn, 0, 5); 11296 int rn = extract32(insn, 5, 5); 11297 int rm = extract32(insn, 16, 5); 11298 int size = extract32(insn, 22, 2); 11299 bool is_u = extract32(insn, 29, 1); 11300 bool is_q = extract32(insn, 30, 1); 11301 11302 if (!fp_access_check(s)) { 11303 return; 11304 } 11305 11306 switch (size + 4 * is_u) { 11307 case 0: /* AND */ 11308 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 11309 return; 11310 case 1: /* BIC */ 11311 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 11312 return; 11313 case 2: /* ORR */ 11314 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 11315 return; 11316 case 3: /* ORN */ 11317 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 11318 return; 11319 case 4: /* EOR */ 11320 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 11321 return; 11322 11323 case 5: /* BSL bitwise select */ 11324 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 11325 return; 11326 case 6: /* BIT, bitwise insert if true */ 11327 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 11328 return; 11329 case 7: /* BIF, bitwise insert if false */ 11330 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 11331 return; 11332 11333 default: 11334 g_assert_not_reached(); 11335 } 11336 } 11337 11338 /* Pairwise op subgroup of C3.6.16. 11339 * 11340 * This is called directly or via the handle_3same_float for float pairwise 11341 * operations where the opcode and size are calculated differently. 11342 */ 11343 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 11344 int size, int rn, int rm, int rd) 11345 { 11346 TCGv_ptr fpst; 11347 int pass; 11348 11349 /* Floating point operations need fpst */ 11350 if (opcode >= 0x58) { 11351 fpst = fpstatus_ptr(FPST_FPCR); 11352 } else { 11353 fpst = NULL; 11354 } 11355 11356 if (!fp_access_check(s)) { 11357 return; 11358 } 11359 11360 /* These operations work on the concatenated rm:rn, with each pair of 11361 * adjacent elements being operated on to produce an element in the result. 11362 */ 11363 if (size == 3) { 11364 TCGv_i64 tcg_res[2]; 11365 11366 for (pass = 0; pass < 2; pass++) { 11367 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11368 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11369 int passreg = (pass == 0) ? rn : rm; 11370 11371 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 11372 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 11373 tcg_res[pass] = tcg_temp_new_i64(); 11374 11375 switch (opcode) { 11376 case 0x17: /* ADDP */ 11377 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11378 break; 11379 case 0x58: /* FMAXNMP */ 11380 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11381 break; 11382 case 0x5a: /* FADDP */ 11383 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11384 break; 11385 case 0x5e: /* FMAXP */ 11386 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11387 break; 11388 case 0x78: /* FMINNMP */ 11389 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11390 break; 11391 case 0x7e: /* FMINP */ 11392 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11393 break; 11394 default: 11395 g_assert_not_reached(); 11396 } 11397 11398 tcg_temp_free_i64(tcg_op1); 11399 tcg_temp_free_i64(tcg_op2); 11400 } 11401 11402 for (pass = 0; pass < 2; pass++) { 11403 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11404 tcg_temp_free_i64(tcg_res[pass]); 11405 } 11406 } else { 11407 int maxpass = is_q ? 4 : 2; 11408 TCGv_i32 tcg_res[4]; 11409 11410 for (pass = 0; pass < maxpass; pass++) { 11411 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11412 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11413 NeonGenTwoOpFn *genfn = NULL; 11414 int passreg = pass < (maxpass / 2) ? rn : rm; 11415 int passelt = (is_q && (pass & 1)) ? 2 : 0; 11416 11417 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 11418 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 11419 tcg_res[pass] = tcg_temp_new_i32(); 11420 11421 switch (opcode) { 11422 case 0x17: /* ADDP */ 11423 { 11424 static NeonGenTwoOpFn * const fns[3] = { 11425 gen_helper_neon_padd_u8, 11426 gen_helper_neon_padd_u16, 11427 tcg_gen_add_i32, 11428 }; 11429 genfn = fns[size]; 11430 break; 11431 } 11432 case 0x14: /* SMAXP, UMAXP */ 11433 { 11434 static NeonGenTwoOpFn * const fns[3][2] = { 11435 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 11436 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 11437 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 11438 }; 11439 genfn = fns[size][u]; 11440 break; 11441 } 11442 case 0x15: /* SMINP, UMINP */ 11443 { 11444 static NeonGenTwoOpFn * const fns[3][2] = { 11445 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 11446 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 11447 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 11448 }; 11449 genfn = fns[size][u]; 11450 break; 11451 } 11452 /* The FP operations are all on single floats (32 bit) */ 11453 case 0x58: /* FMAXNMP */ 11454 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11455 break; 11456 case 0x5a: /* FADDP */ 11457 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11458 break; 11459 case 0x5e: /* FMAXP */ 11460 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11461 break; 11462 case 0x78: /* FMINNMP */ 11463 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11464 break; 11465 case 0x7e: /* FMINP */ 11466 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11467 break; 11468 default: 11469 g_assert_not_reached(); 11470 } 11471 11472 /* FP ops called directly, otherwise call now */ 11473 if (genfn) { 11474 genfn(tcg_res[pass], tcg_op1, tcg_op2); 11475 } 11476 11477 tcg_temp_free_i32(tcg_op1); 11478 tcg_temp_free_i32(tcg_op2); 11479 } 11480 11481 for (pass = 0; pass < maxpass; pass++) { 11482 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11483 tcg_temp_free_i32(tcg_res[pass]); 11484 } 11485 clear_vec_high(s, is_q, rd); 11486 } 11487 11488 if (fpst) { 11489 tcg_temp_free_ptr(fpst); 11490 } 11491 } 11492 11493 /* Floating point op subgroup of C3.6.16. */ 11494 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 11495 { 11496 /* For floating point ops, the U, size[1] and opcode bits 11497 * together indicate the operation. size[0] indicates single 11498 * or double. 11499 */ 11500 int fpopcode = extract32(insn, 11, 5) 11501 | (extract32(insn, 23, 1) << 5) 11502 | (extract32(insn, 29, 1) << 6); 11503 int is_q = extract32(insn, 30, 1); 11504 int size = extract32(insn, 22, 1); 11505 int rm = extract32(insn, 16, 5); 11506 int rn = extract32(insn, 5, 5); 11507 int rd = extract32(insn, 0, 5); 11508 11509 int datasize = is_q ? 128 : 64; 11510 int esize = 32 << size; 11511 int elements = datasize / esize; 11512 11513 if (size == 1 && !is_q) { 11514 unallocated_encoding(s); 11515 return; 11516 } 11517 11518 switch (fpopcode) { 11519 case 0x58: /* FMAXNMP */ 11520 case 0x5a: /* FADDP */ 11521 case 0x5e: /* FMAXP */ 11522 case 0x78: /* FMINNMP */ 11523 case 0x7e: /* FMINP */ 11524 if (size && !is_q) { 11525 unallocated_encoding(s); 11526 return; 11527 } 11528 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 11529 rn, rm, rd); 11530 return; 11531 case 0x1b: /* FMULX */ 11532 case 0x1f: /* FRECPS */ 11533 case 0x3f: /* FRSQRTS */ 11534 case 0x5d: /* FACGE */ 11535 case 0x7d: /* FACGT */ 11536 case 0x19: /* FMLA */ 11537 case 0x39: /* FMLS */ 11538 case 0x18: /* FMAXNM */ 11539 case 0x1a: /* FADD */ 11540 case 0x1c: /* FCMEQ */ 11541 case 0x1e: /* FMAX */ 11542 case 0x38: /* FMINNM */ 11543 case 0x3a: /* FSUB */ 11544 case 0x3e: /* FMIN */ 11545 case 0x5b: /* FMUL */ 11546 case 0x5c: /* FCMGE */ 11547 case 0x5f: /* FDIV */ 11548 case 0x7a: /* FABD */ 11549 case 0x7c: /* FCMGT */ 11550 if (!fp_access_check(s)) { 11551 return; 11552 } 11553 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 11554 return; 11555 11556 case 0x1d: /* FMLAL */ 11557 case 0x3d: /* FMLSL */ 11558 case 0x59: /* FMLAL2 */ 11559 case 0x79: /* FMLSL2 */ 11560 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 11561 unallocated_encoding(s); 11562 return; 11563 } 11564 if (fp_access_check(s)) { 11565 int is_s = extract32(insn, 23, 1); 11566 int is_2 = extract32(insn, 29, 1); 11567 int data = (is_2 << 1) | is_s; 11568 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 11569 vec_full_reg_offset(s, rn), 11570 vec_full_reg_offset(s, rm), cpu_env, 11571 is_q ? 16 : 8, vec_full_reg_size(s), 11572 data, gen_helper_gvec_fmlal_a64); 11573 } 11574 return; 11575 11576 default: 11577 unallocated_encoding(s); 11578 return; 11579 } 11580 } 11581 11582 /* Integer op subgroup of C3.6.16. */ 11583 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 11584 { 11585 int is_q = extract32(insn, 30, 1); 11586 int u = extract32(insn, 29, 1); 11587 int size = extract32(insn, 22, 2); 11588 int opcode = extract32(insn, 11, 5); 11589 int rm = extract32(insn, 16, 5); 11590 int rn = extract32(insn, 5, 5); 11591 int rd = extract32(insn, 0, 5); 11592 int pass; 11593 TCGCond cond; 11594 11595 switch (opcode) { 11596 case 0x13: /* MUL, PMUL */ 11597 if (u && size != 0) { 11598 unallocated_encoding(s); 11599 return; 11600 } 11601 /* fall through */ 11602 case 0x0: /* SHADD, UHADD */ 11603 case 0x2: /* SRHADD, URHADD */ 11604 case 0x4: /* SHSUB, UHSUB */ 11605 case 0xc: /* SMAX, UMAX */ 11606 case 0xd: /* SMIN, UMIN */ 11607 case 0xe: /* SABD, UABD */ 11608 case 0xf: /* SABA, UABA */ 11609 case 0x12: /* MLA, MLS */ 11610 if (size == 3) { 11611 unallocated_encoding(s); 11612 return; 11613 } 11614 break; 11615 case 0x16: /* SQDMULH, SQRDMULH */ 11616 if (size == 0 || size == 3) { 11617 unallocated_encoding(s); 11618 return; 11619 } 11620 break; 11621 default: 11622 if (size == 3 && !is_q) { 11623 unallocated_encoding(s); 11624 return; 11625 } 11626 break; 11627 } 11628 11629 if (!fp_access_check(s)) { 11630 return; 11631 } 11632 11633 switch (opcode) { 11634 case 0x01: /* SQADD, UQADD */ 11635 if (u) { 11636 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 11637 } else { 11638 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 11639 } 11640 return; 11641 case 0x05: /* SQSUB, UQSUB */ 11642 if (u) { 11643 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 11644 } else { 11645 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 11646 } 11647 return; 11648 case 0x08: /* SSHL, USHL */ 11649 if (u) { 11650 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 11651 } else { 11652 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 11653 } 11654 return; 11655 case 0x0c: /* SMAX, UMAX */ 11656 if (u) { 11657 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 11658 } else { 11659 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 11660 } 11661 return; 11662 case 0x0d: /* SMIN, UMIN */ 11663 if (u) { 11664 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 11665 } else { 11666 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 11667 } 11668 return; 11669 case 0xe: /* SABD, UABD */ 11670 if (u) { 11671 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 11672 } else { 11673 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 11674 } 11675 return; 11676 case 0xf: /* SABA, UABA */ 11677 if (u) { 11678 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 11679 } else { 11680 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 11681 } 11682 return; 11683 case 0x10: /* ADD, SUB */ 11684 if (u) { 11685 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 11686 } else { 11687 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 11688 } 11689 return; 11690 case 0x13: /* MUL, PMUL */ 11691 if (!u) { /* MUL */ 11692 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 11693 } else { /* PMUL */ 11694 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 11695 } 11696 return; 11697 case 0x12: /* MLA, MLS */ 11698 if (u) { 11699 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 11700 } else { 11701 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 11702 } 11703 return; 11704 case 0x16: /* SQDMULH, SQRDMULH */ 11705 { 11706 static gen_helper_gvec_3_ptr * const fns[2][2] = { 11707 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 11708 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 11709 }; 11710 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 11711 } 11712 return; 11713 case 0x11: 11714 if (!u) { /* CMTST */ 11715 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 11716 return; 11717 } 11718 /* else CMEQ */ 11719 cond = TCG_COND_EQ; 11720 goto do_gvec_cmp; 11721 case 0x06: /* CMGT, CMHI */ 11722 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11723 goto do_gvec_cmp; 11724 case 0x07: /* CMGE, CMHS */ 11725 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11726 do_gvec_cmp: 11727 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11728 vec_full_reg_offset(s, rn), 11729 vec_full_reg_offset(s, rm), 11730 is_q ? 16 : 8, vec_full_reg_size(s)); 11731 return; 11732 } 11733 11734 if (size == 3) { 11735 assert(is_q); 11736 for (pass = 0; pass < 2; pass++) { 11737 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11738 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11739 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11740 11741 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11742 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11743 11744 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11745 11746 write_vec_element(s, tcg_res, rd, pass, MO_64); 11747 11748 tcg_temp_free_i64(tcg_res); 11749 tcg_temp_free_i64(tcg_op1); 11750 tcg_temp_free_i64(tcg_op2); 11751 } 11752 } else { 11753 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11754 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11755 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11756 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11757 NeonGenTwoOpFn *genfn = NULL; 11758 NeonGenTwoOpEnvFn *genenvfn = NULL; 11759 11760 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11761 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11762 11763 switch (opcode) { 11764 case 0x0: /* SHADD, UHADD */ 11765 { 11766 static NeonGenTwoOpFn * const fns[3][2] = { 11767 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11768 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11769 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11770 }; 11771 genfn = fns[size][u]; 11772 break; 11773 } 11774 case 0x2: /* SRHADD, URHADD */ 11775 { 11776 static NeonGenTwoOpFn * const fns[3][2] = { 11777 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11778 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11779 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11780 }; 11781 genfn = fns[size][u]; 11782 break; 11783 } 11784 case 0x4: /* SHSUB, UHSUB */ 11785 { 11786 static NeonGenTwoOpFn * const fns[3][2] = { 11787 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11788 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11789 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11790 }; 11791 genfn = fns[size][u]; 11792 break; 11793 } 11794 case 0x9: /* SQSHL, UQSHL */ 11795 { 11796 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11797 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11798 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11799 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11800 }; 11801 genenvfn = fns[size][u]; 11802 break; 11803 } 11804 case 0xa: /* SRSHL, URSHL */ 11805 { 11806 static NeonGenTwoOpFn * const fns[3][2] = { 11807 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11808 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11809 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11810 }; 11811 genfn = fns[size][u]; 11812 break; 11813 } 11814 case 0xb: /* SQRSHL, UQRSHL */ 11815 { 11816 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11817 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11818 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11819 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11820 }; 11821 genenvfn = fns[size][u]; 11822 break; 11823 } 11824 default: 11825 g_assert_not_reached(); 11826 } 11827 11828 if (genenvfn) { 11829 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); 11830 } else { 11831 genfn(tcg_res, tcg_op1, tcg_op2); 11832 } 11833 11834 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11835 11836 tcg_temp_free_i32(tcg_res); 11837 tcg_temp_free_i32(tcg_op1); 11838 tcg_temp_free_i32(tcg_op2); 11839 } 11840 } 11841 clear_vec_high(s, is_q, rd); 11842 } 11843 11844 /* AdvSIMD three same 11845 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11846 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11847 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11848 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11849 */ 11850 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11851 { 11852 int opcode = extract32(insn, 11, 5); 11853 11854 switch (opcode) { 11855 case 0x3: /* logic ops */ 11856 disas_simd_3same_logic(s, insn); 11857 break; 11858 case 0x17: /* ADDP */ 11859 case 0x14: /* SMAXP, UMAXP */ 11860 case 0x15: /* SMINP, UMINP */ 11861 { 11862 /* Pairwise operations */ 11863 int is_q = extract32(insn, 30, 1); 11864 int u = extract32(insn, 29, 1); 11865 int size = extract32(insn, 22, 2); 11866 int rm = extract32(insn, 16, 5); 11867 int rn = extract32(insn, 5, 5); 11868 int rd = extract32(insn, 0, 5); 11869 if (opcode == 0x17) { 11870 if (u || (size == 3 && !is_q)) { 11871 unallocated_encoding(s); 11872 return; 11873 } 11874 } else { 11875 if (size == 3) { 11876 unallocated_encoding(s); 11877 return; 11878 } 11879 } 11880 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11881 break; 11882 } 11883 case 0x18 ... 0x31: 11884 /* floating point ops, sz[1] and U are part of opcode */ 11885 disas_simd_3same_float(s, insn); 11886 break; 11887 default: 11888 disas_simd_3same_int(s, insn); 11889 break; 11890 } 11891 } 11892 11893 /* 11894 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11895 * 11896 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11897 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11898 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11899 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11900 * 11901 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11902 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11903 * 11904 */ 11905 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11906 { 11907 int opcode = extract32(insn, 11, 3); 11908 int u = extract32(insn, 29, 1); 11909 int a = extract32(insn, 23, 1); 11910 int is_q = extract32(insn, 30, 1); 11911 int rm = extract32(insn, 16, 5); 11912 int rn = extract32(insn, 5, 5); 11913 int rd = extract32(insn, 0, 5); 11914 /* 11915 * For these floating point ops, the U, a and opcode bits 11916 * together indicate the operation. 11917 */ 11918 int fpopcode = opcode | (a << 3) | (u << 4); 11919 int datasize = is_q ? 128 : 64; 11920 int elements = datasize / 16; 11921 bool pairwise; 11922 TCGv_ptr fpst; 11923 int pass; 11924 11925 switch (fpopcode) { 11926 case 0x0: /* FMAXNM */ 11927 case 0x1: /* FMLA */ 11928 case 0x2: /* FADD */ 11929 case 0x3: /* FMULX */ 11930 case 0x4: /* FCMEQ */ 11931 case 0x6: /* FMAX */ 11932 case 0x7: /* FRECPS */ 11933 case 0x8: /* FMINNM */ 11934 case 0x9: /* FMLS */ 11935 case 0xa: /* FSUB */ 11936 case 0xe: /* FMIN */ 11937 case 0xf: /* FRSQRTS */ 11938 case 0x13: /* FMUL */ 11939 case 0x14: /* FCMGE */ 11940 case 0x15: /* FACGE */ 11941 case 0x17: /* FDIV */ 11942 case 0x1a: /* FABD */ 11943 case 0x1c: /* FCMGT */ 11944 case 0x1d: /* FACGT */ 11945 pairwise = false; 11946 break; 11947 case 0x10: /* FMAXNMP */ 11948 case 0x12: /* FADDP */ 11949 case 0x16: /* FMAXP */ 11950 case 0x18: /* FMINNMP */ 11951 case 0x1e: /* FMINP */ 11952 pairwise = true; 11953 break; 11954 default: 11955 unallocated_encoding(s); 11956 return; 11957 } 11958 11959 if (!dc_isar_feature(aa64_fp16, s)) { 11960 unallocated_encoding(s); 11961 return; 11962 } 11963 11964 if (!fp_access_check(s)) { 11965 return; 11966 } 11967 11968 fpst = fpstatus_ptr(FPST_FPCR_F16); 11969 11970 if (pairwise) { 11971 int maxpass = is_q ? 8 : 4; 11972 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11973 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11974 TCGv_i32 tcg_res[8]; 11975 11976 for (pass = 0; pass < maxpass; pass++) { 11977 int passreg = pass < (maxpass / 2) ? rn : rm; 11978 int passelt = (pass << 1) & (maxpass - 1); 11979 11980 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 11981 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 11982 tcg_res[pass] = tcg_temp_new_i32(); 11983 11984 switch (fpopcode) { 11985 case 0x10: /* FMAXNMP */ 11986 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 11987 fpst); 11988 break; 11989 case 0x12: /* FADDP */ 11990 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11991 break; 11992 case 0x16: /* FMAXP */ 11993 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11994 break; 11995 case 0x18: /* FMINNMP */ 11996 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 11997 fpst); 11998 break; 11999 case 0x1e: /* FMINP */ 12000 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 12001 break; 12002 default: 12003 g_assert_not_reached(); 12004 } 12005 } 12006 12007 for (pass = 0; pass < maxpass; pass++) { 12008 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 12009 tcg_temp_free_i32(tcg_res[pass]); 12010 } 12011 12012 tcg_temp_free_i32(tcg_op1); 12013 tcg_temp_free_i32(tcg_op2); 12014 12015 } else { 12016 for (pass = 0; pass < elements; pass++) { 12017 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 12018 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 12019 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12020 12021 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 12022 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 12023 12024 switch (fpopcode) { 12025 case 0x0: /* FMAXNM */ 12026 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 12027 break; 12028 case 0x1: /* FMLA */ 12029 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12030 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 12031 fpst); 12032 break; 12033 case 0x2: /* FADD */ 12034 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 12035 break; 12036 case 0x3: /* FMULX */ 12037 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 12038 break; 12039 case 0x4: /* FCMEQ */ 12040 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12041 break; 12042 case 0x6: /* FMAX */ 12043 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 12044 break; 12045 case 0x7: /* FRECPS */ 12046 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12047 break; 12048 case 0x8: /* FMINNM */ 12049 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 12050 break; 12051 case 0x9: /* FMLS */ 12052 /* As usual for ARM, separate negation for fused multiply-add */ 12053 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 12054 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12055 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 12056 fpst); 12057 break; 12058 case 0xa: /* FSUB */ 12059 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 12060 break; 12061 case 0xe: /* FMIN */ 12062 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 12063 break; 12064 case 0xf: /* FRSQRTS */ 12065 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12066 break; 12067 case 0x13: /* FMUL */ 12068 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 12069 break; 12070 case 0x14: /* FCMGE */ 12071 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12072 break; 12073 case 0x15: /* FACGE */ 12074 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12075 break; 12076 case 0x17: /* FDIV */ 12077 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 12078 break; 12079 case 0x1a: /* FABD */ 12080 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 12081 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 12082 break; 12083 case 0x1c: /* FCMGT */ 12084 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12085 break; 12086 case 0x1d: /* FACGT */ 12087 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12088 break; 12089 default: 12090 g_assert_not_reached(); 12091 } 12092 12093 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12094 tcg_temp_free_i32(tcg_res); 12095 tcg_temp_free_i32(tcg_op1); 12096 tcg_temp_free_i32(tcg_op2); 12097 } 12098 } 12099 12100 tcg_temp_free_ptr(fpst); 12101 12102 clear_vec_high(s, is_q, rd); 12103 } 12104 12105 /* AdvSIMD three same extra 12106 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 12107 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 12108 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 12109 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 12110 */ 12111 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 12112 { 12113 int rd = extract32(insn, 0, 5); 12114 int rn = extract32(insn, 5, 5); 12115 int opcode = extract32(insn, 11, 4); 12116 int rm = extract32(insn, 16, 5); 12117 int size = extract32(insn, 22, 2); 12118 bool u = extract32(insn, 29, 1); 12119 bool is_q = extract32(insn, 30, 1); 12120 bool feature; 12121 int rot; 12122 12123 switch (u * 16 + opcode) { 12124 case 0x10: /* SQRDMLAH (vector) */ 12125 case 0x11: /* SQRDMLSH (vector) */ 12126 if (size != 1 && size != 2) { 12127 unallocated_encoding(s); 12128 return; 12129 } 12130 feature = dc_isar_feature(aa64_rdm, s); 12131 break; 12132 case 0x02: /* SDOT (vector) */ 12133 case 0x12: /* UDOT (vector) */ 12134 if (size != MO_32) { 12135 unallocated_encoding(s); 12136 return; 12137 } 12138 feature = dc_isar_feature(aa64_dp, s); 12139 break; 12140 case 0x03: /* USDOT */ 12141 if (size != MO_32) { 12142 unallocated_encoding(s); 12143 return; 12144 } 12145 feature = dc_isar_feature(aa64_i8mm, s); 12146 break; 12147 case 0x04: /* SMMLA */ 12148 case 0x14: /* UMMLA */ 12149 case 0x05: /* USMMLA */ 12150 if (!is_q || size != MO_32) { 12151 unallocated_encoding(s); 12152 return; 12153 } 12154 feature = dc_isar_feature(aa64_i8mm, s); 12155 break; 12156 case 0x18: /* FCMLA, #0 */ 12157 case 0x19: /* FCMLA, #90 */ 12158 case 0x1a: /* FCMLA, #180 */ 12159 case 0x1b: /* FCMLA, #270 */ 12160 case 0x1c: /* FCADD, #90 */ 12161 case 0x1e: /* FCADD, #270 */ 12162 if (size == 0 12163 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 12164 || (size == 3 && !is_q)) { 12165 unallocated_encoding(s); 12166 return; 12167 } 12168 feature = dc_isar_feature(aa64_fcma, s); 12169 break; 12170 case 0x1d: /* BFMMLA */ 12171 if (size != MO_16 || !is_q) { 12172 unallocated_encoding(s); 12173 return; 12174 } 12175 feature = dc_isar_feature(aa64_bf16, s); 12176 break; 12177 case 0x1f: 12178 switch (size) { 12179 case 1: /* BFDOT */ 12180 case 3: /* BFMLAL{B,T} */ 12181 feature = dc_isar_feature(aa64_bf16, s); 12182 break; 12183 default: 12184 unallocated_encoding(s); 12185 return; 12186 } 12187 break; 12188 default: 12189 unallocated_encoding(s); 12190 return; 12191 } 12192 if (!feature) { 12193 unallocated_encoding(s); 12194 return; 12195 } 12196 if (!fp_access_check(s)) { 12197 return; 12198 } 12199 12200 switch (opcode) { 12201 case 0x0: /* SQRDMLAH (vector) */ 12202 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 12203 return; 12204 12205 case 0x1: /* SQRDMLSH (vector) */ 12206 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 12207 return; 12208 12209 case 0x2: /* SDOT / UDOT */ 12210 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 12211 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 12212 return; 12213 12214 case 0x3: /* USDOT */ 12215 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 12216 return; 12217 12218 case 0x04: /* SMMLA, UMMLA */ 12219 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 12220 u ? gen_helper_gvec_ummla_b 12221 : gen_helper_gvec_smmla_b); 12222 return; 12223 case 0x05: /* USMMLA */ 12224 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 12225 return; 12226 12227 case 0x8: /* FCMLA, #0 */ 12228 case 0x9: /* FCMLA, #90 */ 12229 case 0xa: /* FCMLA, #180 */ 12230 case 0xb: /* FCMLA, #270 */ 12231 rot = extract32(opcode, 0, 2); 12232 switch (size) { 12233 case 1: 12234 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 12235 gen_helper_gvec_fcmlah); 12236 break; 12237 case 2: 12238 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 12239 gen_helper_gvec_fcmlas); 12240 break; 12241 case 3: 12242 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 12243 gen_helper_gvec_fcmlad); 12244 break; 12245 default: 12246 g_assert_not_reached(); 12247 } 12248 return; 12249 12250 case 0xc: /* FCADD, #90 */ 12251 case 0xe: /* FCADD, #270 */ 12252 rot = extract32(opcode, 1, 1); 12253 switch (size) { 12254 case 1: 12255 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12256 gen_helper_gvec_fcaddh); 12257 break; 12258 case 2: 12259 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12260 gen_helper_gvec_fcadds); 12261 break; 12262 case 3: 12263 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12264 gen_helper_gvec_fcaddd); 12265 break; 12266 default: 12267 g_assert_not_reached(); 12268 } 12269 return; 12270 12271 case 0xd: /* BFMMLA */ 12272 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 12273 return; 12274 case 0xf: 12275 switch (size) { 12276 case 1: /* BFDOT */ 12277 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 12278 break; 12279 case 3: /* BFMLAL{B,T} */ 12280 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 12281 gen_helper_gvec_bfmlal); 12282 break; 12283 default: 12284 g_assert_not_reached(); 12285 } 12286 return; 12287 12288 default: 12289 g_assert_not_reached(); 12290 } 12291 } 12292 12293 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 12294 int size, int rn, int rd) 12295 { 12296 /* Handle 2-reg-misc ops which are widening (so each size element 12297 * in the source becomes a 2*size element in the destination. 12298 * The only instruction like this is FCVTL. 12299 */ 12300 int pass; 12301 12302 if (size == 3) { 12303 /* 32 -> 64 bit fp conversion */ 12304 TCGv_i64 tcg_res[2]; 12305 int srcelt = is_q ? 2 : 0; 12306 12307 for (pass = 0; pass < 2; pass++) { 12308 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12309 tcg_res[pass] = tcg_temp_new_i64(); 12310 12311 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 12312 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); 12313 tcg_temp_free_i32(tcg_op); 12314 } 12315 for (pass = 0; pass < 2; pass++) { 12316 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12317 tcg_temp_free_i64(tcg_res[pass]); 12318 } 12319 } else { 12320 /* 16 -> 32 bit fp conversion */ 12321 int srcelt = is_q ? 4 : 0; 12322 TCGv_i32 tcg_res[4]; 12323 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 12324 TCGv_i32 ahp = get_ahp_flag(); 12325 12326 for (pass = 0; pass < 4; pass++) { 12327 tcg_res[pass] = tcg_temp_new_i32(); 12328 12329 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 12330 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 12331 fpst, ahp); 12332 } 12333 for (pass = 0; pass < 4; pass++) { 12334 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 12335 tcg_temp_free_i32(tcg_res[pass]); 12336 } 12337 12338 tcg_temp_free_ptr(fpst); 12339 tcg_temp_free_i32(ahp); 12340 } 12341 } 12342 12343 static void handle_rev(DisasContext *s, int opcode, bool u, 12344 bool is_q, int size, int rn, int rd) 12345 { 12346 int op = (opcode << 1) | u; 12347 int opsz = op + size; 12348 int grp_size = 3 - opsz; 12349 int dsize = is_q ? 128 : 64; 12350 int i; 12351 12352 if (opsz >= 3) { 12353 unallocated_encoding(s); 12354 return; 12355 } 12356 12357 if (!fp_access_check(s)) { 12358 return; 12359 } 12360 12361 if (size == 0) { 12362 /* Special case bytes, use bswap op on each group of elements */ 12363 int groups = dsize / (8 << grp_size); 12364 12365 for (i = 0; i < groups; i++) { 12366 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 12367 12368 read_vec_element(s, tcg_tmp, rn, i, grp_size); 12369 switch (grp_size) { 12370 case MO_16: 12371 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 12372 break; 12373 case MO_32: 12374 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 12375 break; 12376 case MO_64: 12377 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 12378 break; 12379 default: 12380 g_assert_not_reached(); 12381 } 12382 write_vec_element(s, tcg_tmp, rd, i, grp_size); 12383 tcg_temp_free_i64(tcg_tmp); 12384 } 12385 clear_vec_high(s, is_q, rd); 12386 } else { 12387 int revmask = (1 << grp_size) - 1; 12388 int esize = 8 << size; 12389 int elements = dsize / esize; 12390 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 12391 TCGv_i64 tcg_rd = tcg_const_i64(0); 12392 TCGv_i64 tcg_rd_hi = tcg_const_i64(0); 12393 12394 for (i = 0; i < elements; i++) { 12395 int e_rev = (i & 0xf) ^ revmask; 12396 int off = e_rev * esize; 12397 read_vec_element(s, tcg_rn, rn, i, size); 12398 if (off >= 64) { 12399 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi, 12400 tcg_rn, off - 64, esize); 12401 } else { 12402 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize); 12403 } 12404 } 12405 write_vec_element(s, tcg_rd, rd, 0, MO_64); 12406 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64); 12407 12408 tcg_temp_free_i64(tcg_rd_hi); 12409 tcg_temp_free_i64(tcg_rd); 12410 tcg_temp_free_i64(tcg_rn); 12411 } 12412 } 12413 12414 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 12415 bool is_q, int size, int rn, int rd) 12416 { 12417 /* Implement the pairwise operations from 2-misc: 12418 * SADDLP, UADDLP, SADALP, UADALP. 12419 * These all add pairs of elements in the input to produce a 12420 * double-width result element in the output (possibly accumulating). 12421 */ 12422 bool accum = (opcode == 0x6); 12423 int maxpass = is_q ? 2 : 1; 12424 int pass; 12425 TCGv_i64 tcg_res[2]; 12426 12427 if (size == 2) { 12428 /* 32 + 32 -> 64 op */ 12429 MemOp memop = size + (u ? 0 : MO_SIGN); 12430 12431 for (pass = 0; pass < maxpass; pass++) { 12432 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 12433 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 12434 12435 tcg_res[pass] = tcg_temp_new_i64(); 12436 12437 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 12438 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 12439 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 12440 if (accum) { 12441 read_vec_element(s, tcg_op1, rd, pass, MO_64); 12442 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 12443 } 12444 12445 tcg_temp_free_i64(tcg_op1); 12446 tcg_temp_free_i64(tcg_op2); 12447 } 12448 } else { 12449 for (pass = 0; pass < maxpass; pass++) { 12450 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12451 NeonGenOne64OpFn *genfn; 12452 static NeonGenOne64OpFn * const fns[2][2] = { 12453 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 12454 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 12455 }; 12456 12457 genfn = fns[size][u]; 12458 12459 tcg_res[pass] = tcg_temp_new_i64(); 12460 12461 read_vec_element(s, tcg_op, rn, pass, MO_64); 12462 genfn(tcg_res[pass], tcg_op); 12463 12464 if (accum) { 12465 read_vec_element(s, tcg_op, rd, pass, MO_64); 12466 if (size == 0) { 12467 gen_helper_neon_addl_u16(tcg_res[pass], 12468 tcg_res[pass], tcg_op); 12469 } else { 12470 gen_helper_neon_addl_u32(tcg_res[pass], 12471 tcg_res[pass], tcg_op); 12472 } 12473 } 12474 tcg_temp_free_i64(tcg_op); 12475 } 12476 } 12477 if (!is_q) { 12478 tcg_res[1] = tcg_constant_i64(0); 12479 } 12480 for (pass = 0; pass < 2; pass++) { 12481 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12482 tcg_temp_free_i64(tcg_res[pass]); 12483 } 12484 } 12485 12486 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 12487 { 12488 /* Implement SHLL and SHLL2 */ 12489 int pass; 12490 int part = is_q ? 2 : 0; 12491 TCGv_i64 tcg_res[2]; 12492 12493 for (pass = 0; pass < 2; pass++) { 12494 static NeonGenWidenFn * const widenfns[3] = { 12495 gen_helper_neon_widen_u8, 12496 gen_helper_neon_widen_u16, 12497 tcg_gen_extu_i32_i64, 12498 }; 12499 NeonGenWidenFn *widenfn = widenfns[size]; 12500 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12501 12502 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 12503 tcg_res[pass] = tcg_temp_new_i64(); 12504 widenfn(tcg_res[pass], tcg_op); 12505 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 12506 12507 tcg_temp_free_i32(tcg_op); 12508 } 12509 12510 for (pass = 0; pass < 2; pass++) { 12511 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12512 tcg_temp_free_i64(tcg_res[pass]); 12513 } 12514 } 12515 12516 /* AdvSIMD two reg misc 12517 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 12518 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12519 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 12520 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12521 */ 12522 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 12523 { 12524 int size = extract32(insn, 22, 2); 12525 int opcode = extract32(insn, 12, 5); 12526 bool u = extract32(insn, 29, 1); 12527 bool is_q = extract32(insn, 30, 1); 12528 int rn = extract32(insn, 5, 5); 12529 int rd = extract32(insn, 0, 5); 12530 bool need_fpstatus = false; 12531 bool need_rmode = false; 12532 int rmode = -1; 12533 TCGv_i32 tcg_rmode; 12534 TCGv_ptr tcg_fpstatus; 12535 12536 switch (opcode) { 12537 case 0x0: /* REV64, REV32 */ 12538 case 0x1: /* REV16 */ 12539 handle_rev(s, opcode, u, is_q, size, rn, rd); 12540 return; 12541 case 0x5: /* CNT, NOT, RBIT */ 12542 if (u && size == 0) { 12543 /* NOT */ 12544 break; 12545 } else if (u && size == 1) { 12546 /* RBIT */ 12547 break; 12548 } else if (!u && size == 0) { 12549 /* CNT */ 12550 break; 12551 } 12552 unallocated_encoding(s); 12553 return; 12554 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 12555 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 12556 if (size == 3) { 12557 unallocated_encoding(s); 12558 return; 12559 } 12560 if (!fp_access_check(s)) { 12561 return; 12562 } 12563 12564 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 12565 return; 12566 case 0x4: /* CLS, CLZ */ 12567 if (size == 3) { 12568 unallocated_encoding(s); 12569 return; 12570 } 12571 break; 12572 case 0x2: /* SADDLP, UADDLP */ 12573 case 0x6: /* SADALP, UADALP */ 12574 if (size == 3) { 12575 unallocated_encoding(s); 12576 return; 12577 } 12578 if (!fp_access_check(s)) { 12579 return; 12580 } 12581 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 12582 return; 12583 case 0x13: /* SHLL, SHLL2 */ 12584 if (u == 0 || size == 3) { 12585 unallocated_encoding(s); 12586 return; 12587 } 12588 if (!fp_access_check(s)) { 12589 return; 12590 } 12591 handle_shll(s, is_q, size, rn, rd); 12592 return; 12593 case 0xa: /* CMLT */ 12594 if (u == 1) { 12595 unallocated_encoding(s); 12596 return; 12597 } 12598 /* fall through */ 12599 case 0x8: /* CMGT, CMGE */ 12600 case 0x9: /* CMEQ, CMLE */ 12601 case 0xb: /* ABS, NEG */ 12602 if (size == 3 && !is_q) { 12603 unallocated_encoding(s); 12604 return; 12605 } 12606 break; 12607 case 0x3: /* SUQADD, USQADD */ 12608 if (size == 3 && !is_q) { 12609 unallocated_encoding(s); 12610 return; 12611 } 12612 if (!fp_access_check(s)) { 12613 return; 12614 } 12615 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 12616 return; 12617 case 0x7: /* SQABS, SQNEG */ 12618 if (size == 3 && !is_q) { 12619 unallocated_encoding(s); 12620 return; 12621 } 12622 break; 12623 case 0xc ... 0xf: 12624 case 0x16 ... 0x1f: 12625 { 12626 /* Floating point: U, size[1] and opcode indicate operation; 12627 * size[0] indicates single or double precision. 12628 */ 12629 int is_double = extract32(size, 0, 1); 12630 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 12631 size = is_double ? 3 : 2; 12632 switch (opcode) { 12633 case 0x2f: /* FABS */ 12634 case 0x6f: /* FNEG */ 12635 if (size == 3 && !is_q) { 12636 unallocated_encoding(s); 12637 return; 12638 } 12639 break; 12640 case 0x1d: /* SCVTF */ 12641 case 0x5d: /* UCVTF */ 12642 { 12643 bool is_signed = (opcode == 0x1d) ? true : false; 12644 int elements = is_double ? 2 : is_q ? 4 : 2; 12645 if (is_double && !is_q) { 12646 unallocated_encoding(s); 12647 return; 12648 } 12649 if (!fp_access_check(s)) { 12650 return; 12651 } 12652 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 12653 return; 12654 } 12655 case 0x2c: /* FCMGT (zero) */ 12656 case 0x2d: /* FCMEQ (zero) */ 12657 case 0x2e: /* FCMLT (zero) */ 12658 case 0x6c: /* FCMGE (zero) */ 12659 case 0x6d: /* FCMLE (zero) */ 12660 if (size == 3 && !is_q) { 12661 unallocated_encoding(s); 12662 return; 12663 } 12664 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 12665 return; 12666 case 0x7f: /* FSQRT */ 12667 if (size == 3 && !is_q) { 12668 unallocated_encoding(s); 12669 return; 12670 } 12671 break; 12672 case 0x1a: /* FCVTNS */ 12673 case 0x1b: /* FCVTMS */ 12674 case 0x3a: /* FCVTPS */ 12675 case 0x3b: /* FCVTZS */ 12676 case 0x5a: /* FCVTNU */ 12677 case 0x5b: /* FCVTMU */ 12678 case 0x7a: /* FCVTPU */ 12679 case 0x7b: /* FCVTZU */ 12680 need_fpstatus = true; 12681 need_rmode = true; 12682 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12683 if (size == 3 && !is_q) { 12684 unallocated_encoding(s); 12685 return; 12686 } 12687 break; 12688 case 0x5c: /* FCVTAU */ 12689 case 0x1c: /* FCVTAS */ 12690 need_fpstatus = true; 12691 need_rmode = true; 12692 rmode = FPROUNDING_TIEAWAY; 12693 if (size == 3 && !is_q) { 12694 unallocated_encoding(s); 12695 return; 12696 } 12697 break; 12698 case 0x3c: /* URECPE */ 12699 if (size == 3) { 12700 unallocated_encoding(s); 12701 return; 12702 } 12703 /* fall through */ 12704 case 0x3d: /* FRECPE */ 12705 case 0x7d: /* FRSQRTE */ 12706 if (size == 3 && !is_q) { 12707 unallocated_encoding(s); 12708 return; 12709 } 12710 if (!fp_access_check(s)) { 12711 return; 12712 } 12713 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 12714 return; 12715 case 0x56: /* FCVTXN, FCVTXN2 */ 12716 if (size == 2) { 12717 unallocated_encoding(s); 12718 return; 12719 } 12720 /* fall through */ 12721 case 0x16: /* FCVTN, FCVTN2 */ 12722 /* handle_2misc_narrow does a 2*size -> size operation, but these 12723 * instructions encode the source size rather than dest size. 12724 */ 12725 if (!fp_access_check(s)) { 12726 return; 12727 } 12728 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12729 return; 12730 case 0x36: /* BFCVTN, BFCVTN2 */ 12731 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 12732 unallocated_encoding(s); 12733 return; 12734 } 12735 if (!fp_access_check(s)) { 12736 return; 12737 } 12738 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12739 return; 12740 case 0x17: /* FCVTL, FCVTL2 */ 12741 if (!fp_access_check(s)) { 12742 return; 12743 } 12744 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 12745 return; 12746 case 0x18: /* FRINTN */ 12747 case 0x19: /* FRINTM */ 12748 case 0x38: /* FRINTP */ 12749 case 0x39: /* FRINTZ */ 12750 need_rmode = true; 12751 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12752 /* fall through */ 12753 case 0x59: /* FRINTX */ 12754 case 0x79: /* FRINTI */ 12755 need_fpstatus = true; 12756 if (size == 3 && !is_q) { 12757 unallocated_encoding(s); 12758 return; 12759 } 12760 break; 12761 case 0x58: /* FRINTA */ 12762 need_rmode = true; 12763 rmode = FPROUNDING_TIEAWAY; 12764 need_fpstatus = true; 12765 if (size == 3 && !is_q) { 12766 unallocated_encoding(s); 12767 return; 12768 } 12769 break; 12770 case 0x7c: /* URSQRTE */ 12771 if (size == 3) { 12772 unallocated_encoding(s); 12773 return; 12774 } 12775 break; 12776 case 0x1e: /* FRINT32Z */ 12777 case 0x1f: /* FRINT64Z */ 12778 need_rmode = true; 12779 rmode = FPROUNDING_ZERO; 12780 /* fall through */ 12781 case 0x5e: /* FRINT32X */ 12782 case 0x5f: /* FRINT64X */ 12783 need_fpstatus = true; 12784 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12785 unallocated_encoding(s); 12786 return; 12787 } 12788 break; 12789 default: 12790 unallocated_encoding(s); 12791 return; 12792 } 12793 break; 12794 } 12795 default: 12796 unallocated_encoding(s); 12797 return; 12798 } 12799 12800 if (!fp_access_check(s)) { 12801 return; 12802 } 12803 12804 if (need_fpstatus || need_rmode) { 12805 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12806 } else { 12807 tcg_fpstatus = NULL; 12808 } 12809 if (need_rmode) { 12810 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 12811 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 12812 } else { 12813 tcg_rmode = NULL; 12814 } 12815 12816 switch (opcode) { 12817 case 0x5: 12818 if (u && size == 0) { /* NOT */ 12819 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12820 return; 12821 } 12822 break; 12823 case 0x8: /* CMGT, CMGE */ 12824 if (u) { 12825 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12826 } else { 12827 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12828 } 12829 return; 12830 case 0x9: /* CMEQ, CMLE */ 12831 if (u) { 12832 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12833 } else { 12834 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12835 } 12836 return; 12837 case 0xa: /* CMLT */ 12838 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12839 return; 12840 case 0xb: 12841 if (u) { /* ABS, NEG */ 12842 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12843 } else { 12844 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12845 } 12846 return; 12847 } 12848 12849 if (size == 3) { 12850 /* All 64-bit element operations can be shared with scalar 2misc */ 12851 int pass; 12852 12853 /* Coverity claims (size == 3 && !is_q) has been eliminated 12854 * from all paths leading to here. 12855 */ 12856 tcg_debug_assert(is_q); 12857 for (pass = 0; pass < 2; pass++) { 12858 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12859 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12860 12861 read_vec_element(s, tcg_op, rn, pass, MO_64); 12862 12863 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12864 tcg_rmode, tcg_fpstatus); 12865 12866 write_vec_element(s, tcg_res, rd, pass, MO_64); 12867 12868 tcg_temp_free_i64(tcg_res); 12869 tcg_temp_free_i64(tcg_op); 12870 } 12871 } else { 12872 int pass; 12873 12874 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12875 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12876 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12877 12878 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12879 12880 if (size == 2) { 12881 /* Special cases for 32 bit elements */ 12882 switch (opcode) { 12883 case 0x4: /* CLS */ 12884 if (u) { 12885 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12886 } else { 12887 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12888 } 12889 break; 12890 case 0x7: /* SQABS, SQNEG */ 12891 if (u) { 12892 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); 12893 } else { 12894 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); 12895 } 12896 break; 12897 case 0x2f: /* FABS */ 12898 gen_helper_vfp_abss(tcg_res, tcg_op); 12899 break; 12900 case 0x6f: /* FNEG */ 12901 gen_helper_vfp_negs(tcg_res, tcg_op); 12902 break; 12903 case 0x7f: /* FSQRT */ 12904 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 12905 break; 12906 case 0x1a: /* FCVTNS */ 12907 case 0x1b: /* FCVTMS */ 12908 case 0x1c: /* FCVTAS */ 12909 case 0x3a: /* FCVTPS */ 12910 case 0x3b: /* FCVTZS */ 12911 gen_helper_vfp_tosls(tcg_res, tcg_op, 12912 tcg_constant_i32(0), tcg_fpstatus); 12913 break; 12914 case 0x5a: /* FCVTNU */ 12915 case 0x5b: /* FCVTMU */ 12916 case 0x5c: /* FCVTAU */ 12917 case 0x7a: /* FCVTPU */ 12918 case 0x7b: /* FCVTZU */ 12919 gen_helper_vfp_touls(tcg_res, tcg_op, 12920 tcg_constant_i32(0), tcg_fpstatus); 12921 break; 12922 case 0x18: /* FRINTN */ 12923 case 0x19: /* FRINTM */ 12924 case 0x38: /* FRINTP */ 12925 case 0x39: /* FRINTZ */ 12926 case 0x58: /* FRINTA */ 12927 case 0x79: /* FRINTI */ 12928 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12929 break; 12930 case 0x59: /* FRINTX */ 12931 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12932 break; 12933 case 0x7c: /* URSQRTE */ 12934 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12935 break; 12936 case 0x1e: /* FRINT32Z */ 12937 case 0x5e: /* FRINT32X */ 12938 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12939 break; 12940 case 0x1f: /* FRINT64Z */ 12941 case 0x5f: /* FRINT64X */ 12942 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12943 break; 12944 default: 12945 g_assert_not_reached(); 12946 } 12947 } else { 12948 /* Use helpers for 8 and 16 bit elements */ 12949 switch (opcode) { 12950 case 0x5: /* CNT, RBIT */ 12951 /* For these two insns size is part of the opcode specifier 12952 * (handled earlier); they always operate on byte elements. 12953 */ 12954 if (u) { 12955 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12956 } else { 12957 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 12958 } 12959 break; 12960 case 0x7: /* SQABS, SQNEG */ 12961 { 12962 NeonGenOneOpEnvFn *genfn; 12963 static NeonGenOneOpEnvFn * const fns[2][2] = { 12964 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 12965 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 12966 }; 12967 genfn = fns[size][u]; 12968 genfn(tcg_res, cpu_env, tcg_op); 12969 break; 12970 } 12971 case 0x4: /* CLS, CLZ */ 12972 if (u) { 12973 if (size == 0) { 12974 gen_helper_neon_clz_u8(tcg_res, tcg_op); 12975 } else { 12976 gen_helper_neon_clz_u16(tcg_res, tcg_op); 12977 } 12978 } else { 12979 if (size == 0) { 12980 gen_helper_neon_cls_s8(tcg_res, tcg_op); 12981 } else { 12982 gen_helper_neon_cls_s16(tcg_res, tcg_op); 12983 } 12984 } 12985 break; 12986 default: 12987 g_assert_not_reached(); 12988 } 12989 } 12990 12991 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 12992 12993 tcg_temp_free_i32(tcg_res); 12994 tcg_temp_free_i32(tcg_op); 12995 } 12996 } 12997 clear_vec_high(s, is_q, rd); 12998 12999 if (need_rmode) { 13000 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13001 tcg_temp_free_i32(tcg_rmode); 13002 } 13003 if (need_fpstatus) { 13004 tcg_temp_free_ptr(tcg_fpstatus); 13005 } 13006 } 13007 13008 /* AdvSIMD [scalar] two register miscellaneous (FP16) 13009 * 13010 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 13011 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 13012 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 13013 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 13014 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 13015 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 13016 * 13017 * This actually covers two groups where scalar access is governed by 13018 * bit 28. A bunch of the instructions (float to integral) only exist 13019 * in the vector form and are un-allocated for the scalar decode. Also 13020 * in the scalar decode Q is always 1. 13021 */ 13022 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 13023 { 13024 int fpop, opcode, a, u; 13025 int rn, rd; 13026 bool is_q; 13027 bool is_scalar; 13028 bool only_in_vector = false; 13029 13030 int pass; 13031 TCGv_i32 tcg_rmode = NULL; 13032 TCGv_ptr tcg_fpstatus = NULL; 13033 bool need_rmode = false; 13034 bool need_fpst = true; 13035 int rmode; 13036 13037 if (!dc_isar_feature(aa64_fp16, s)) { 13038 unallocated_encoding(s); 13039 return; 13040 } 13041 13042 rd = extract32(insn, 0, 5); 13043 rn = extract32(insn, 5, 5); 13044 13045 a = extract32(insn, 23, 1); 13046 u = extract32(insn, 29, 1); 13047 is_scalar = extract32(insn, 28, 1); 13048 is_q = extract32(insn, 30, 1); 13049 13050 opcode = extract32(insn, 12, 5); 13051 fpop = deposit32(opcode, 5, 1, a); 13052 fpop = deposit32(fpop, 6, 1, u); 13053 13054 switch (fpop) { 13055 case 0x1d: /* SCVTF */ 13056 case 0x5d: /* UCVTF */ 13057 { 13058 int elements; 13059 13060 if (is_scalar) { 13061 elements = 1; 13062 } else { 13063 elements = (is_q ? 8 : 4); 13064 } 13065 13066 if (!fp_access_check(s)) { 13067 return; 13068 } 13069 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 13070 return; 13071 } 13072 break; 13073 case 0x2c: /* FCMGT (zero) */ 13074 case 0x2d: /* FCMEQ (zero) */ 13075 case 0x2e: /* FCMLT (zero) */ 13076 case 0x6c: /* FCMGE (zero) */ 13077 case 0x6d: /* FCMLE (zero) */ 13078 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 13079 return; 13080 case 0x3d: /* FRECPE */ 13081 case 0x3f: /* FRECPX */ 13082 break; 13083 case 0x18: /* FRINTN */ 13084 need_rmode = true; 13085 only_in_vector = true; 13086 rmode = FPROUNDING_TIEEVEN; 13087 break; 13088 case 0x19: /* FRINTM */ 13089 need_rmode = true; 13090 only_in_vector = true; 13091 rmode = FPROUNDING_NEGINF; 13092 break; 13093 case 0x38: /* FRINTP */ 13094 need_rmode = true; 13095 only_in_vector = true; 13096 rmode = FPROUNDING_POSINF; 13097 break; 13098 case 0x39: /* FRINTZ */ 13099 need_rmode = true; 13100 only_in_vector = true; 13101 rmode = FPROUNDING_ZERO; 13102 break; 13103 case 0x58: /* FRINTA */ 13104 need_rmode = true; 13105 only_in_vector = true; 13106 rmode = FPROUNDING_TIEAWAY; 13107 break; 13108 case 0x59: /* FRINTX */ 13109 case 0x79: /* FRINTI */ 13110 only_in_vector = true; 13111 /* current rounding mode */ 13112 break; 13113 case 0x1a: /* FCVTNS */ 13114 need_rmode = true; 13115 rmode = FPROUNDING_TIEEVEN; 13116 break; 13117 case 0x1b: /* FCVTMS */ 13118 need_rmode = true; 13119 rmode = FPROUNDING_NEGINF; 13120 break; 13121 case 0x1c: /* FCVTAS */ 13122 need_rmode = true; 13123 rmode = FPROUNDING_TIEAWAY; 13124 break; 13125 case 0x3a: /* FCVTPS */ 13126 need_rmode = true; 13127 rmode = FPROUNDING_POSINF; 13128 break; 13129 case 0x3b: /* FCVTZS */ 13130 need_rmode = true; 13131 rmode = FPROUNDING_ZERO; 13132 break; 13133 case 0x5a: /* FCVTNU */ 13134 need_rmode = true; 13135 rmode = FPROUNDING_TIEEVEN; 13136 break; 13137 case 0x5b: /* FCVTMU */ 13138 need_rmode = true; 13139 rmode = FPROUNDING_NEGINF; 13140 break; 13141 case 0x5c: /* FCVTAU */ 13142 need_rmode = true; 13143 rmode = FPROUNDING_TIEAWAY; 13144 break; 13145 case 0x7a: /* FCVTPU */ 13146 need_rmode = true; 13147 rmode = FPROUNDING_POSINF; 13148 break; 13149 case 0x7b: /* FCVTZU */ 13150 need_rmode = true; 13151 rmode = FPROUNDING_ZERO; 13152 break; 13153 case 0x2f: /* FABS */ 13154 case 0x6f: /* FNEG */ 13155 need_fpst = false; 13156 break; 13157 case 0x7d: /* FRSQRTE */ 13158 case 0x7f: /* FSQRT (vector) */ 13159 break; 13160 default: 13161 unallocated_encoding(s); 13162 return; 13163 } 13164 13165 13166 /* Check additional constraints for the scalar encoding */ 13167 if (is_scalar) { 13168 if (!is_q) { 13169 unallocated_encoding(s); 13170 return; 13171 } 13172 /* FRINTxx is only in the vector form */ 13173 if (only_in_vector) { 13174 unallocated_encoding(s); 13175 return; 13176 } 13177 } 13178 13179 if (!fp_access_check(s)) { 13180 return; 13181 } 13182 13183 if (need_rmode || need_fpst) { 13184 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 13185 } 13186 13187 if (need_rmode) { 13188 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 13189 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13190 } 13191 13192 if (is_scalar) { 13193 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 13194 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13195 13196 switch (fpop) { 13197 case 0x1a: /* FCVTNS */ 13198 case 0x1b: /* FCVTMS */ 13199 case 0x1c: /* FCVTAS */ 13200 case 0x3a: /* FCVTPS */ 13201 case 0x3b: /* FCVTZS */ 13202 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 13203 break; 13204 case 0x3d: /* FRECPE */ 13205 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 13206 break; 13207 case 0x3f: /* FRECPX */ 13208 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 13209 break; 13210 case 0x5a: /* FCVTNU */ 13211 case 0x5b: /* FCVTMU */ 13212 case 0x5c: /* FCVTAU */ 13213 case 0x7a: /* FCVTPU */ 13214 case 0x7b: /* FCVTZU */ 13215 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 13216 break; 13217 case 0x6f: /* FNEG */ 13218 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 13219 break; 13220 case 0x7d: /* FRSQRTE */ 13221 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 13222 break; 13223 default: 13224 g_assert_not_reached(); 13225 } 13226 13227 /* limit any sign extension going on */ 13228 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 13229 write_fp_sreg(s, rd, tcg_res); 13230 13231 tcg_temp_free_i32(tcg_res); 13232 tcg_temp_free_i32(tcg_op); 13233 } else { 13234 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 13235 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13236 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13237 13238 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 13239 13240 switch (fpop) { 13241 case 0x1a: /* FCVTNS */ 13242 case 0x1b: /* FCVTMS */ 13243 case 0x1c: /* FCVTAS */ 13244 case 0x3a: /* FCVTPS */ 13245 case 0x3b: /* FCVTZS */ 13246 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 13247 break; 13248 case 0x3d: /* FRECPE */ 13249 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 13250 break; 13251 case 0x5a: /* FCVTNU */ 13252 case 0x5b: /* FCVTMU */ 13253 case 0x5c: /* FCVTAU */ 13254 case 0x7a: /* FCVTPU */ 13255 case 0x7b: /* FCVTZU */ 13256 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 13257 break; 13258 case 0x18: /* FRINTN */ 13259 case 0x19: /* FRINTM */ 13260 case 0x38: /* FRINTP */ 13261 case 0x39: /* FRINTZ */ 13262 case 0x58: /* FRINTA */ 13263 case 0x79: /* FRINTI */ 13264 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 13265 break; 13266 case 0x59: /* FRINTX */ 13267 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 13268 break; 13269 case 0x2f: /* FABS */ 13270 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 13271 break; 13272 case 0x6f: /* FNEG */ 13273 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 13274 break; 13275 case 0x7d: /* FRSQRTE */ 13276 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 13277 break; 13278 case 0x7f: /* FSQRT */ 13279 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 13280 break; 13281 default: 13282 g_assert_not_reached(); 13283 } 13284 13285 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 13286 13287 tcg_temp_free_i32(tcg_res); 13288 tcg_temp_free_i32(tcg_op); 13289 } 13290 13291 clear_vec_high(s, is_q, rd); 13292 } 13293 13294 if (tcg_rmode) { 13295 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13296 tcg_temp_free_i32(tcg_rmode); 13297 } 13298 13299 if (tcg_fpstatus) { 13300 tcg_temp_free_ptr(tcg_fpstatus); 13301 } 13302 } 13303 13304 /* AdvSIMD scalar x indexed element 13305 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 13306 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 13307 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 13308 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 13309 * AdvSIMD vector x indexed element 13310 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 13311 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 13312 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 13313 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 13314 */ 13315 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 13316 { 13317 /* This encoding has two kinds of instruction: 13318 * normal, where we perform elt x idxelt => elt for each 13319 * element in the vector 13320 * long, where we perform elt x idxelt and generate a result of 13321 * double the width of the input element 13322 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 13323 */ 13324 bool is_scalar = extract32(insn, 28, 1); 13325 bool is_q = extract32(insn, 30, 1); 13326 bool u = extract32(insn, 29, 1); 13327 int size = extract32(insn, 22, 2); 13328 int l = extract32(insn, 21, 1); 13329 int m = extract32(insn, 20, 1); 13330 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 13331 int rm = extract32(insn, 16, 4); 13332 int opcode = extract32(insn, 12, 4); 13333 int h = extract32(insn, 11, 1); 13334 int rn = extract32(insn, 5, 5); 13335 int rd = extract32(insn, 0, 5); 13336 bool is_long = false; 13337 int is_fp = 0; 13338 bool is_fp16 = false; 13339 int index; 13340 TCGv_ptr fpst; 13341 13342 switch (16 * u + opcode) { 13343 case 0x08: /* MUL */ 13344 case 0x10: /* MLA */ 13345 case 0x14: /* MLS */ 13346 if (is_scalar) { 13347 unallocated_encoding(s); 13348 return; 13349 } 13350 break; 13351 case 0x02: /* SMLAL, SMLAL2 */ 13352 case 0x12: /* UMLAL, UMLAL2 */ 13353 case 0x06: /* SMLSL, SMLSL2 */ 13354 case 0x16: /* UMLSL, UMLSL2 */ 13355 case 0x0a: /* SMULL, SMULL2 */ 13356 case 0x1a: /* UMULL, UMULL2 */ 13357 if (is_scalar) { 13358 unallocated_encoding(s); 13359 return; 13360 } 13361 is_long = true; 13362 break; 13363 case 0x03: /* SQDMLAL, SQDMLAL2 */ 13364 case 0x07: /* SQDMLSL, SQDMLSL2 */ 13365 case 0x0b: /* SQDMULL, SQDMULL2 */ 13366 is_long = true; 13367 break; 13368 case 0x0c: /* SQDMULH */ 13369 case 0x0d: /* SQRDMULH */ 13370 break; 13371 case 0x01: /* FMLA */ 13372 case 0x05: /* FMLS */ 13373 case 0x09: /* FMUL */ 13374 case 0x19: /* FMULX */ 13375 is_fp = 1; 13376 break; 13377 case 0x1d: /* SQRDMLAH */ 13378 case 0x1f: /* SQRDMLSH */ 13379 if (!dc_isar_feature(aa64_rdm, s)) { 13380 unallocated_encoding(s); 13381 return; 13382 } 13383 break; 13384 case 0x0e: /* SDOT */ 13385 case 0x1e: /* UDOT */ 13386 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 13387 unallocated_encoding(s); 13388 return; 13389 } 13390 break; 13391 case 0x0f: 13392 switch (size) { 13393 case 0: /* SUDOT */ 13394 case 2: /* USDOT */ 13395 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 13396 unallocated_encoding(s); 13397 return; 13398 } 13399 size = MO_32; 13400 break; 13401 case 1: /* BFDOT */ 13402 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 13403 unallocated_encoding(s); 13404 return; 13405 } 13406 size = MO_32; 13407 break; 13408 case 3: /* BFMLAL{B,T} */ 13409 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 13410 unallocated_encoding(s); 13411 return; 13412 } 13413 /* can't set is_fp without other incorrect size checks */ 13414 size = MO_16; 13415 break; 13416 default: 13417 unallocated_encoding(s); 13418 return; 13419 } 13420 break; 13421 case 0x11: /* FCMLA #0 */ 13422 case 0x13: /* FCMLA #90 */ 13423 case 0x15: /* FCMLA #180 */ 13424 case 0x17: /* FCMLA #270 */ 13425 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 13426 unallocated_encoding(s); 13427 return; 13428 } 13429 is_fp = 2; 13430 break; 13431 case 0x00: /* FMLAL */ 13432 case 0x04: /* FMLSL */ 13433 case 0x18: /* FMLAL2 */ 13434 case 0x1c: /* FMLSL2 */ 13435 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 13436 unallocated_encoding(s); 13437 return; 13438 } 13439 size = MO_16; 13440 /* is_fp, but we pass cpu_env not fp_status. */ 13441 break; 13442 default: 13443 unallocated_encoding(s); 13444 return; 13445 } 13446 13447 switch (is_fp) { 13448 case 1: /* normal fp */ 13449 /* convert insn encoded size to MemOp size */ 13450 switch (size) { 13451 case 0: /* half-precision */ 13452 size = MO_16; 13453 is_fp16 = true; 13454 break; 13455 case MO_32: /* single precision */ 13456 case MO_64: /* double precision */ 13457 break; 13458 default: 13459 unallocated_encoding(s); 13460 return; 13461 } 13462 break; 13463 13464 case 2: /* complex fp */ 13465 /* Each indexable element is a complex pair. */ 13466 size += 1; 13467 switch (size) { 13468 case MO_32: 13469 if (h && !is_q) { 13470 unallocated_encoding(s); 13471 return; 13472 } 13473 is_fp16 = true; 13474 break; 13475 case MO_64: 13476 break; 13477 default: 13478 unallocated_encoding(s); 13479 return; 13480 } 13481 break; 13482 13483 default: /* integer */ 13484 switch (size) { 13485 case MO_8: 13486 case MO_64: 13487 unallocated_encoding(s); 13488 return; 13489 } 13490 break; 13491 } 13492 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 13493 unallocated_encoding(s); 13494 return; 13495 } 13496 13497 /* Given MemOp size, adjust register and indexing. */ 13498 switch (size) { 13499 case MO_16: 13500 index = h << 2 | l << 1 | m; 13501 break; 13502 case MO_32: 13503 index = h << 1 | l; 13504 rm |= m << 4; 13505 break; 13506 case MO_64: 13507 if (l || !is_q) { 13508 unallocated_encoding(s); 13509 return; 13510 } 13511 index = h; 13512 rm |= m << 4; 13513 break; 13514 default: 13515 g_assert_not_reached(); 13516 } 13517 13518 if (!fp_access_check(s)) { 13519 return; 13520 } 13521 13522 if (is_fp) { 13523 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 13524 } else { 13525 fpst = NULL; 13526 } 13527 13528 switch (16 * u + opcode) { 13529 case 0x0e: /* SDOT */ 13530 case 0x1e: /* UDOT */ 13531 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13532 u ? gen_helper_gvec_udot_idx_b 13533 : gen_helper_gvec_sdot_idx_b); 13534 return; 13535 case 0x0f: 13536 switch (extract32(insn, 22, 2)) { 13537 case 0: /* SUDOT */ 13538 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13539 gen_helper_gvec_sudot_idx_b); 13540 return; 13541 case 1: /* BFDOT */ 13542 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13543 gen_helper_gvec_bfdot_idx); 13544 return; 13545 case 2: /* USDOT */ 13546 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13547 gen_helper_gvec_usdot_idx_b); 13548 return; 13549 case 3: /* BFMLAL{B,T} */ 13550 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 13551 gen_helper_gvec_bfmlal_idx); 13552 return; 13553 } 13554 g_assert_not_reached(); 13555 case 0x11: /* FCMLA #0 */ 13556 case 0x13: /* FCMLA #90 */ 13557 case 0x15: /* FCMLA #180 */ 13558 case 0x17: /* FCMLA #270 */ 13559 { 13560 int rot = extract32(insn, 13, 2); 13561 int data = (index << 2) | rot; 13562 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 13563 vec_full_reg_offset(s, rn), 13564 vec_full_reg_offset(s, rm), 13565 vec_full_reg_offset(s, rd), fpst, 13566 is_q ? 16 : 8, vec_full_reg_size(s), data, 13567 size == MO_64 13568 ? gen_helper_gvec_fcmlas_idx 13569 : gen_helper_gvec_fcmlah_idx); 13570 tcg_temp_free_ptr(fpst); 13571 } 13572 return; 13573 13574 case 0x00: /* FMLAL */ 13575 case 0x04: /* FMLSL */ 13576 case 0x18: /* FMLAL2 */ 13577 case 0x1c: /* FMLSL2 */ 13578 { 13579 int is_s = extract32(opcode, 2, 1); 13580 int is_2 = u; 13581 int data = (index << 2) | (is_2 << 1) | is_s; 13582 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 13583 vec_full_reg_offset(s, rn), 13584 vec_full_reg_offset(s, rm), cpu_env, 13585 is_q ? 16 : 8, vec_full_reg_size(s), 13586 data, gen_helper_gvec_fmlal_idx_a64); 13587 } 13588 return; 13589 13590 case 0x08: /* MUL */ 13591 if (!is_long && !is_scalar) { 13592 static gen_helper_gvec_3 * const fns[3] = { 13593 gen_helper_gvec_mul_idx_h, 13594 gen_helper_gvec_mul_idx_s, 13595 gen_helper_gvec_mul_idx_d, 13596 }; 13597 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 13598 vec_full_reg_offset(s, rn), 13599 vec_full_reg_offset(s, rm), 13600 is_q ? 16 : 8, vec_full_reg_size(s), 13601 index, fns[size - 1]); 13602 return; 13603 } 13604 break; 13605 13606 case 0x10: /* MLA */ 13607 if (!is_long && !is_scalar) { 13608 static gen_helper_gvec_4 * const fns[3] = { 13609 gen_helper_gvec_mla_idx_h, 13610 gen_helper_gvec_mla_idx_s, 13611 gen_helper_gvec_mla_idx_d, 13612 }; 13613 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13614 vec_full_reg_offset(s, rn), 13615 vec_full_reg_offset(s, rm), 13616 vec_full_reg_offset(s, rd), 13617 is_q ? 16 : 8, vec_full_reg_size(s), 13618 index, fns[size - 1]); 13619 return; 13620 } 13621 break; 13622 13623 case 0x14: /* MLS */ 13624 if (!is_long && !is_scalar) { 13625 static gen_helper_gvec_4 * const fns[3] = { 13626 gen_helper_gvec_mls_idx_h, 13627 gen_helper_gvec_mls_idx_s, 13628 gen_helper_gvec_mls_idx_d, 13629 }; 13630 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13631 vec_full_reg_offset(s, rn), 13632 vec_full_reg_offset(s, rm), 13633 vec_full_reg_offset(s, rd), 13634 is_q ? 16 : 8, vec_full_reg_size(s), 13635 index, fns[size - 1]); 13636 return; 13637 } 13638 break; 13639 } 13640 13641 if (size == 3) { 13642 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13643 int pass; 13644 13645 assert(is_fp && is_q && !is_long); 13646 13647 read_vec_element(s, tcg_idx, rm, index, MO_64); 13648 13649 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13650 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13651 TCGv_i64 tcg_res = tcg_temp_new_i64(); 13652 13653 read_vec_element(s, tcg_op, rn, pass, MO_64); 13654 13655 switch (16 * u + opcode) { 13656 case 0x05: /* FMLS */ 13657 /* As usual for ARM, separate negation for fused multiply-add */ 13658 gen_helper_vfp_negd(tcg_op, tcg_op); 13659 /* fall through */ 13660 case 0x01: /* FMLA */ 13661 read_vec_element(s, tcg_res, rd, pass, MO_64); 13662 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 13663 break; 13664 case 0x09: /* FMUL */ 13665 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 13666 break; 13667 case 0x19: /* FMULX */ 13668 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 13669 break; 13670 default: 13671 g_assert_not_reached(); 13672 } 13673 13674 write_vec_element(s, tcg_res, rd, pass, MO_64); 13675 tcg_temp_free_i64(tcg_op); 13676 tcg_temp_free_i64(tcg_res); 13677 } 13678 13679 tcg_temp_free_i64(tcg_idx); 13680 clear_vec_high(s, !is_scalar, rd); 13681 } else if (!is_long) { 13682 /* 32 bit floating point, or 16 or 32 bit integer. 13683 * For the 16 bit scalar case we use the usual Neon helpers and 13684 * rely on the fact that 0 op 0 == 0 with no side effects. 13685 */ 13686 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13687 int pass, maxpasses; 13688 13689 if (is_scalar) { 13690 maxpasses = 1; 13691 } else { 13692 maxpasses = is_q ? 4 : 2; 13693 } 13694 13695 read_vec_element_i32(s, tcg_idx, rm, index, size); 13696 13697 if (size == 1 && !is_scalar) { 13698 /* The simplest way to handle the 16x16 indexed ops is to duplicate 13699 * the index into both halves of the 32 bit tcg_idx and then use 13700 * the usual Neon helpers. 13701 */ 13702 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13703 } 13704 13705 for (pass = 0; pass < maxpasses; pass++) { 13706 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13707 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13708 13709 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 13710 13711 switch (16 * u + opcode) { 13712 case 0x08: /* MUL */ 13713 case 0x10: /* MLA */ 13714 case 0x14: /* MLS */ 13715 { 13716 static NeonGenTwoOpFn * const fns[2][2] = { 13717 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 13718 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 13719 }; 13720 NeonGenTwoOpFn *genfn; 13721 bool is_sub = opcode == 0x4; 13722 13723 if (size == 1) { 13724 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 13725 } else { 13726 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 13727 } 13728 if (opcode == 0x8) { 13729 break; 13730 } 13731 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 13732 genfn = fns[size - 1][is_sub]; 13733 genfn(tcg_res, tcg_op, tcg_res); 13734 break; 13735 } 13736 case 0x05: /* FMLS */ 13737 case 0x01: /* FMLA */ 13738 read_vec_element_i32(s, tcg_res, rd, pass, 13739 is_scalar ? size : MO_32); 13740 switch (size) { 13741 case 1: 13742 if (opcode == 0x5) { 13743 /* As usual for ARM, separate negation for fused 13744 * multiply-add */ 13745 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 13746 } 13747 if (is_scalar) { 13748 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 13749 tcg_res, fpst); 13750 } else { 13751 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 13752 tcg_res, fpst); 13753 } 13754 break; 13755 case 2: 13756 if (opcode == 0x5) { 13757 /* As usual for ARM, separate negation for 13758 * fused multiply-add */ 13759 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 13760 } 13761 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 13762 tcg_res, fpst); 13763 break; 13764 default: 13765 g_assert_not_reached(); 13766 } 13767 break; 13768 case 0x09: /* FMUL */ 13769 switch (size) { 13770 case 1: 13771 if (is_scalar) { 13772 gen_helper_advsimd_mulh(tcg_res, tcg_op, 13773 tcg_idx, fpst); 13774 } else { 13775 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 13776 tcg_idx, fpst); 13777 } 13778 break; 13779 case 2: 13780 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 13781 break; 13782 default: 13783 g_assert_not_reached(); 13784 } 13785 break; 13786 case 0x19: /* FMULX */ 13787 switch (size) { 13788 case 1: 13789 if (is_scalar) { 13790 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 13791 tcg_idx, fpst); 13792 } else { 13793 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 13794 tcg_idx, fpst); 13795 } 13796 break; 13797 case 2: 13798 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 13799 break; 13800 default: 13801 g_assert_not_reached(); 13802 } 13803 break; 13804 case 0x0c: /* SQDMULH */ 13805 if (size == 1) { 13806 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, 13807 tcg_op, tcg_idx); 13808 } else { 13809 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, 13810 tcg_op, tcg_idx); 13811 } 13812 break; 13813 case 0x0d: /* SQRDMULH */ 13814 if (size == 1) { 13815 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, 13816 tcg_op, tcg_idx); 13817 } else { 13818 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, 13819 tcg_op, tcg_idx); 13820 } 13821 break; 13822 case 0x1d: /* SQRDMLAH */ 13823 read_vec_element_i32(s, tcg_res, rd, pass, 13824 is_scalar ? size : MO_32); 13825 if (size == 1) { 13826 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env, 13827 tcg_op, tcg_idx, tcg_res); 13828 } else { 13829 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env, 13830 tcg_op, tcg_idx, tcg_res); 13831 } 13832 break; 13833 case 0x1f: /* SQRDMLSH */ 13834 read_vec_element_i32(s, tcg_res, rd, pass, 13835 is_scalar ? size : MO_32); 13836 if (size == 1) { 13837 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env, 13838 tcg_op, tcg_idx, tcg_res); 13839 } else { 13840 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env, 13841 tcg_op, tcg_idx, tcg_res); 13842 } 13843 break; 13844 default: 13845 g_assert_not_reached(); 13846 } 13847 13848 if (is_scalar) { 13849 write_fp_sreg(s, rd, tcg_res); 13850 } else { 13851 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13852 } 13853 13854 tcg_temp_free_i32(tcg_op); 13855 tcg_temp_free_i32(tcg_res); 13856 } 13857 13858 tcg_temp_free_i32(tcg_idx); 13859 clear_vec_high(s, is_q, rd); 13860 } else { 13861 /* long ops: 16x16->32 or 32x32->64 */ 13862 TCGv_i64 tcg_res[2]; 13863 int pass; 13864 bool satop = extract32(opcode, 0, 1); 13865 MemOp memop = MO_32; 13866 13867 if (satop || !u) { 13868 memop |= MO_SIGN; 13869 } 13870 13871 if (size == 2) { 13872 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13873 13874 read_vec_element(s, tcg_idx, rm, index, memop); 13875 13876 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13877 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13878 TCGv_i64 tcg_passres; 13879 int passelt; 13880 13881 if (is_scalar) { 13882 passelt = 0; 13883 } else { 13884 passelt = pass + (is_q * 2); 13885 } 13886 13887 read_vec_element(s, tcg_op, rn, passelt, memop); 13888 13889 tcg_res[pass] = tcg_temp_new_i64(); 13890 13891 if (opcode == 0xa || opcode == 0xb) { 13892 /* Non-accumulating ops */ 13893 tcg_passres = tcg_res[pass]; 13894 } else { 13895 tcg_passres = tcg_temp_new_i64(); 13896 } 13897 13898 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13899 tcg_temp_free_i64(tcg_op); 13900 13901 if (satop) { 13902 /* saturating, doubling */ 13903 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 13904 tcg_passres, tcg_passres); 13905 } 13906 13907 if (opcode == 0xa || opcode == 0xb) { 13908 continue; 13909 } 13910 13911 /* Accumulating op: handle accumulate step */ 13912 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13913 13914 switch (opcode) { 13915 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13916 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13917 break; 13918 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13919 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13920 break; 13921 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13922 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13923 /* fall through */ 13924 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13925 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 13926 tcg_res[pass], 13927 tcg_passres); 13928 break; 13929 default: 13930 g_assert_not_reached(); 13931 } 13932 tcg_temp_free_i64(tcg_passres); 13933 } 13934 tcg_temp_free_i64(tcg_idx); 13935 13936 clear_vec_high(s, !is_scalar, rd); 13937 } else { 13938 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13939 13940 assert(size == 1); 13941 read_vec_element_i32(s, tcg_idx, rm, index, size); 13942 13943 if (!is_scalar) { 13944 /* The simplest way to handle the 16x16 indexed ops is to 13945 * duplicate the index into both halves of the 32 bit tcg_idx 13946 * and then use the usual Neon helpers. 13947 */ 13948 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13949 } 13950 13951 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13952 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13953 TCGv_i64 tcg_passres; 13954 13955 if (is_scalar) { 13956 read_vec_element_i32(s, tcg_op, rn, pass, size); 13957 } else { 13958 read_vec_element_i32(s, tcg_op, rn, 13959 pass + (is_q * 2), MO_32); 13960 } 13961 13962 tcg_res[pass] = tcg_temp_new_i64(); 13963 13964 if (opcode == 0xa || opcode == 0xb) { 13965 /* Non-accumulating ops */ 13966 tcg_passres = tcg_res[pass]; 13967 } else { 13968 tcg_passres = tcg_temp_new_i64(); 13969 } 13970 13971 if (memop & MO_SIGN) { 13972 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 13973 } else { 13974 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 13975 } 13976 if (satop) { 13977 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 13978 tcg_passres, tcg_passres); 13979 } 13980 tcg_temp_free_i32(tcg_op); 13981 13982 if (opcode == 0xa || opcode == 0xb) { 13983 continue; 13984 } 13985 13986 /* Accumulating op: handle accumulate step */ 13987 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13988 13989 switch (opcode) { 13990 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13991 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 13992 tcg_passres); 13993 break; 13994 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13995 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 13996 tcg_passres); 13997 break; 13998 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13999 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 14000 /* fall through */ 14001 case 0x3: /* SQDMLAL, SQDMLAL2 */ 14002 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 14003 tcg_res[pass], 14004 tcg_passres); 14005 break; 14006 default: 14007 g_assert_not_reached(); 14008 } 14009 tcg_temp_free_i64(tcg_passres); 14010 } 14011 tcg_temp_free_i32(tcg_idx); 14012 14013 if (is_scalar) { 14014 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 14015 } 14016 } 14017 14018 if (is_scalar) { 14019 tcg_res[1] = tcg_constant_i64(0); 14020 } 14021 14022 for (pass = 0; pass < 2; pass++) { 14023 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 14024 tcg_temp_free_i64(tcg_res[pass]); 14025 } 14026 } 14027 14028 if (fpst) { 14029 tcg_temp_free_ptr(fpst); 14030 } 14031 } 14032 14033 /* Crypto AES 14034 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 14035 * +-----------------+------+-----------+--------+-----+------+------+ 14036 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 14037 * +-----------------+------+-----------+--------+-----+------+------+ 14038 */ 14039 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 14040 { 14041 int size = extract32(insn, 22, 2); 14042 int opcode = extract32(insn, 12, 5); 14043 int rn = extract32(insn, 5, 5); 14044 int rd = extract32(insn, 0, 5); 14045 int decrypt; 14046 gen_helper_gvec_2 *genfn2 = NULL; 14047 gen_helper_gvec_3 *genfn3 = NULL; 14048 14049 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 14050 unallocated_encoding(s); 14051 return; 14052 } 14053 14054 switch (opcode) { 14055 case 0x4: /* AESE */ 14056 decrypt = 0; 14057 genfn3 = gen_helper_crypto_aese; 14058 break; 14059 case 0x6: /* AESMC */ 14060 decrypt = 0; 14061 genfn2 = gen_helper_crypto_aesmc; 14062 break; 14063 case 0x5: /* AESD */ 14064 decrypt = 1; 14065 genfn3 = gen_helper_crypto_aese; 14066 break; 14067 case 0x7: /* AESIMC */ 14068 decrypt = 1; 14069 genfn2 = gen_helper_crypto_aesmc; 14070 break; 14071 default: 14072 unallocated_encoding(s); 14073 return; 14074 } 14075 14076 if (!fp_access_check(s)) { 14077 return; 14078 } 14079 if (genfn2) { 14080 gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); 14081 } else { 14082 gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); 14083 } 14084 } 14085 14086 /* Crypto three-reg SHA 14087 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 14088 * +-----------------+------+---+------+---+--------+-----+------+------+ 14089 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 14090 * +-----------------+------+---+------+---+--------+-----+------+------+ 14091 */ 14092 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 14093 { 14094 int size = extract32(insn, 22, 2); 14095 int opcode = extract32(insn, 12, 3); 14096 int rm = extract32(insn, 16, 5); 14097 int rn = extract32(insn, 5, 5); 14098 int rd = extract32(insn, 0, 5); 14099 gen_helper_gvec_3 *genfn; 14100 bool feature; 14101 14102 if (size != 0) { 14103 unallocated_encoding(s); 14104 return; 14105 } 14106 14107 switch (opcode) { 14108 case 0: /* SHA1C */ 14109 genfn = gen_helper_crypto_sha1c; 14110 feature = dc_isar_feature(aa64_sha1, s); 14111 break; 14112 case 1: /* SHA1P */ 14113 genfn = gen_helper_crypto_sha1p; 14114 feature = dc_isar_feature(aa64_sha1, s); 14115 break; 14116 case 2: /* SHA1M */ 14117 genfn = gen_helper_crypto_sha1m; 14118 feature = dc_isar_feature(aa64_sha1, s); 14119 break; 14120 case 3: /* SHA1SU0 */ 14121 genfn = gen_helper_crypto_sha1su0; 14122 feature = dc_isar_feature(aa64_sha1, s); 14123 break; 14124 case 4: /* SHA256H */ 14125 genfn = gen_helper_crypto_sha256h; 14126 feature = dc_isar_feature(aa64_sha256, s); 14127 break; 14128 case 5: /* SHA256H2 */ 14129 genfn = gen_helper_crypto_sha256h2; 14130 feature = dc_isar_feature(aa64_sha256, s); 14131 break; 14132 case 6: /* SHA256SU1 */ 14133 genfn = gen_helper_crypto_sha256su1; 14134 feature = dc_isar_feature(aa64_sha256, s); 14135 break; 14136 default: 14137 unallocated_encoding(s); 14138 return; 14139 } 14140 14141 if (!feature) { 14142 unallocated_encoding(s); 14143 return; 14144 } 14145 14146 if (!fp_access_check(s)) { 14147 return; 14148 } 14149 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 14150 } 14151 14152 /* Crypto two-reg SHA 14153 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 14154 * +-----------------+------+-----------+--------+-----+------+------+ 14155 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 14156 * +-----------------+------+-----------+--------+-----+------+------+ 14157 */ 14158 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 14159 { 14160 int size = extract32(insn, 22, 2); 14161 int opcode = extract32(insn, 12, 5); 14162 int rn = extract32(insn, 5, 5); 14163 int rd = extract32(insn, 0, 5); 14164 gen_helper_gvec_2 *genfn; 14165 bool feature; 14166 14167 if (size != 0) { 14168 unallocated_encoding(s); 14169 return; 14170 } 14171 14172 switch (opcode) { 14173 case 0: /* SHA1H */ 14174 feature = dc_isar_feature(aa64_sha1, s); 14175 genfn = gen_helper_crypto_sha1h; 14176 break; 14177 case 1: /* SHA1SU1 */ 14178 feature = dc_isar_feature(aa64_sha1, s); 14179 genfn = gen_helper_crypto_sha1su1; 14180 break; 14181 case 2: /* SHA256SU0 */ 14182 feature = dc_isar_feature(aa64_sha256, s); 14183 genfn = gen_helper_crypto_sha256su0; 14184 break; 14185 default: 14186 unallocated_encoding(s); 14187 return; 14188 } 14189 14190 if (!feature) { 14191 unallocated_encoding(s); 14192 return; 14193 } 14194 14195 if (!fp_access_check(s)) { 14196 return; 14197 } 14198 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 14199 } 14200 14201 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 14202 { 14203 tcg_gen_rotli_i64(d, m, 1); 14204 tcg_gen_xor_i64(d, d, n); 14205 } 14206 14207 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 14208 { 14209 tcg_gen_rotli_vec(vece, d, m, 1); 14210 tcg_gen_xor_vec(vece, d, d, n); 14211 } 14212 14213 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 14214 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 14215 { 14216 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 14217 static const GVecGen3 op = { 14218 .fni8 = gen_rax1_i64, 14219 .fniv = gen_rax1_vec, 14220 .opt_opc = vecop_list, 14221 .fno = gen_helper_crypto_rax1, 14222 .vece = MO_64, 14223 }; 14224 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 14225 } 14226 14227 /* Crypto three-reg SHA512 14228 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14229 * +-----------------------+------+---+---+-----+--------+------+------+ 14230 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 14231 * +-----------------------+------+---+---+-----+--------+------+------+ 14232 */ 14233 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 14234 { 14235 int opcode = extract32(insn, 10, 2); 14236 int o = extract32(insn, 14, 1); 14237 int rm = extract32(insn, 16, 5); 14238 int rn = extract32(insn, 5, 5); 14239 int rd = extract32(insn, 0, 5); 14240 bool feature; 14241 gen_helper_gvec_3 *oolfn = NULL; 14242 GVecGen3Fn *gvecfn = NULL; 14243 14244 if (o == 0) { 14245 switch (opcode) { 14246 case 0: /* SHA512H */ 14247 feature = dc_isar_feature(aa64_sha512, s); 14248 oolfn = gen_helper_crypto_sha512h; 14249 break; 14250 case 1: /* SHA512H2 */ 14251 feature = dc_isar_feature(aa64_sha512, s); 14252 oolfn = gen_helper_crypto_sha512h2; 14253 break; 14254 case 2: /* SHA512SU1 */ 14255 feature = dc_isar_feature(aa64_sha512, s); 14256 oolfn = gen_helper_crypto_sha512su1; 14257 break; 14258 case 3: /* RAX1 */ 14259 feature = dc_isar_feature(aa64_sha3, s); 14260 gvecfn = gen_gvec_rax1; 14261 break; 14262 default: 14263 g_assert_not_reached(); 14264 } 14265 } else { 14266 switch (opcode) { 14267 case 0: /* SM3PARTW1 */ 14268 feature = dc_isar_feature(aa64_sm3, s); 14269 oolfn = gen_helper_crypto_sm3partw1; 14270 break; 14271 case 1: /* SM3PARTW2 */ 14272 feature = dc_isar_feature(aa64_sm3, s); 14273 oolfn = gen_helper_crypto_sm3partw2; 14274 break; 14275 case 2: /* SM4EKEY */ 14276 feature = dc_isar_feature(aa64_sm4, s); 14277 oolfn = gen_helper_crypto_sm4ekey; 14278 break; 14279 default: 14280 unallocated_encoding(s); 14281 return; 14282 } 14283 } 14284 14285 if (!feature) { 14286 unallocated_encoding(s); 14287 return; 14288 } 14289 14290 if (!fp_access_check(s)) { 14291 return; 14292 } 14293 14294 if (oolfn) { 14295 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 14296 } else { 14297 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 14298 } 14299 } 14300 14301 /* Crypto two-reg SHA512 14302 * 31 12 11 10 9 5 4 0 14303 * +-----------------------------------------+--------+------+------+ 14304 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 14305 * +-----------------------------------------+--------+------+------+ 14306 */ 14307 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 14308 { 14309 int opcode = extract32(insn, 10, 2); 14310 int rn = extract32(insn, 5, 5); 14311 int rd = extract32(insn, 0, 5); 14312 bool feature; 14313 14314 switch (opcode) { 14315 case 0: /* SHA512SU0 */ 14316 feature = dc_isar_feature(aa64_sha512, s); 14317 break; 14318 case 1: /* SM4E */ 14319 feature = dc_isar_feature(aa64_sm4, s); 14320 break; 14321 default: 14322 unallocated_encoding(s); 14323 return; 14324 } 14325 14326 if (!feature) { 14327 unallocated_encoding(s); 14328 return; 14329 } 14330 14331 if (!fp_access_check(s)) { 14332 return; 14333 } 14334 14335 switch (opcode) { 14336 case 0: /* SHA512SU0 */ 14337 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 14338 break; 14339 case 1: /* SM4E */ 14340 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 14341 break; 14342 default: 14343 g_assert_not_reached(); 14344 } 14345 } 14346 14347 /* Crypto four-register 14348 * 31 23 22 21 20 16 15 14 10 9 5 4 0 14349 * +-------------------+-----+------+---+------+------+------+ 14350 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 14351 * +-------------------+-----+------+---+------+------+------+ 14352 */ 14353 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 14354 { 14355 int op0 = extract32(insn, 21, 2); 14356 int rm = extract32(insn, 16, 5); 14357 int ra = extract32(insn, 10, 5); 14358 int rn = extract32(insn, 5, 5); 14359 int rd = extract32(insn, 0, 5); 14360 bool feature; 14361 14362 switch (op0) { 14363 case 0: /* EOR3 */ 14364 case 1: /* BCAX */ 14365 feature = dc_isar_feature(aa64_sha3, s); 14366 break; 14367 case 2: /* SM3SS1 */ 14368 feature = dc_isar_feature(aa64_sm3, s); 14369 break; 14370 default: 14371 unallocated_encoding(s); 14372 return; 14373 } 14374 14375 if (!feature) { 14376 unallocated_encoding(s); 14377 return; 14378 } 14379 14380 if (!fp_access_check(s)) { 14381 return; 14382 } 14383 14384 if (op0 < 2) { 14385 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 14386 int pass; 14387 14388 tcg_op1 = tcg_temp_new_i64(); 14389 tcg_op2 = tcg_temp_new_i64(); 14390 tcg_op3 = tcg_temp_new_i64(); 14391 tcg_res[0] = tcg_temp_new_i64(); 14392 tcg_res[1] = tcg_temp_new_i64(); 14393 14394 for (pass = 0; pass < 2; pass++) { 14395 read_vec_element(s, tcg_op1, rn, pass, MO_64); 14396 read_vec_element(s, tcg_op2, rm, pass, MO_64); 14397 read_vec_element(s, tcg_op3, ra, pass, MO_64); 14398 14399 if (op0 == 0) { 14400 /* EOR3 */ 14401 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 14402 } else { 14403 /* BCAX */ 14404 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 14405 } 14406 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 14407 } 14408 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 14409 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 14410 14411 tcg_temp_free_i64(tcg_op1); 14412 tcg_temp_free_i64(tcg_op2); 14413 tcg_temp_free_i64(tcg_op3); 14414 tcg_temp_free_i64(tcg_res[0]); 14415 tcg_temp_free_i64(tcg_res[1]); 14416 } else { 14417 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 14418 14419 tcg_op1 = tcg_temp_new_i32(); 14420 tcg_op2 = tcg_temp_new_i32(); 14421 tcg_op3 = tcg_temp_new_i32(); 14422 tcg_res = tcg_temp_new_i32(); 14423 tcg_zero = tcg_constant_i32(0); 14424 14425 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 14426 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 14427 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 14428 14429 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 14430 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 14431 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 14432 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 14433 14434 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 14435 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 14436 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 14437 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 14438 14439 tcg_temp_free_i32(tcg_op1); 14440 tcg_temp_free_i32(tcg_op2); 14441 tcg_temp_free_i32(tcg_op3); 14442 tcg_temp_free_i32(tcg_res); 14443 } 14444 } 14445 14446 /* Crypto XAR 14447 * 31 21 20 16 15 10 9 5 4 0 14448 * +-----------------------+------+--------+------+------+ 14449 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 14450 * +-----------------------+------+--------+------+------+ 14451 */ 14452 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 14453 { 14454 int rm = extract32(insn, 16, 5); 14455 int imm6 = extract32(insn, 10, 6); 14456 int rn = extract32(insn, 5, 5); 14457 int rd = extract32(insn, 0, 5); 14458 14459 if (!dc_isar_feature(aa64_sha3, s)) { 14460 unallocated_encoding(s); 14461 return; 14462 } 14463 14464 if (!fp_access_check(s)) { 14465 return; 14466 } 14467 14468 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 14469 vec_full_reg_offset(s, rn), 14470 vec_full_reg_offset(s, rm), imm6, 16, 14471 vec_full_reg_size(s)); 14472 } 14473 14474 /* Crypto three-reg imm2 14475 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14476 * +-----------------------+------+-----+------+--------+------+------+ 14477 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 14478 * +-----------------------+------+-----+------+--------+------+------+ 14479 */ 14480 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 14481 { 14482 static gen_helper_gvec_3 * const fns[4] = { 14483 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 14484 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 14485 }; 14486 int opcode = extract32(insn, 10, 2); 14487 int imm2 = extract32(insn, 12, 2); 14488 int rm = extract32(insn, 16, 5); 14489 int rn = extract32(insn, 5, 5); 14490 int rd = extract32(insn, 0, 5); 14491 14492 if (!dc_isar_feature(aa64_sm3, s)) { 14493 unallocated_encoding(s); 14494 return; 14495 } 14496 14497 if (!fp_access_check(s)) { 14498 return; 14499 } 14500 14501 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 14502 } 14503 14504 /* C3.6 Data processing - SIMD, inc Crypto 14505 * 14506 * As the decode gets a little complex we are using a table based 14507 * approach for this part of the decode. 14508 */ 14509 static const AArch64DecodeTable data_proc_simd[] = { 14510 /* pattern , mask , fn */ 14511 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 14512 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 14513 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 14514 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 14515 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 14516 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 14517 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 14518 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 14519 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 14520 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 14521 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 14522 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 14523 { 0x2e000000, 0xbf208400, disas_simd_ext }, 14524 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 14525 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 14526 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 14527 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 14528 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 14529 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 14530 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 14531 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 14532 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 14533 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 14534 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 14535 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 14536 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 14537 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 14538 { 0xce800000, 0xffe00000, disas_crypto_xar }, 14539 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 14540 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 14541 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 14542 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 14543 { 0x00000000, 0x00000000, NULL } 14544 }; 14545 14546 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 14547 { 14548 /* Note that this is called with all non-FP cases from 14549 * table C3-6 so it must UNDEF for entries not specifically 14550 * allocated to instructions in that table. 14551 */ 14552 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 14553 if (fn) { 14554 fn(s, insn); 14555 } else { 14556 unallocated_encoding(s); 14557 } 14558 } 14559 14560 /* C3.6 Data processing - SIMD and floating point */ 14561 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 14562 { 14563 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 14564 disas_data_proc_fp(s, insn); 14565 } else { 14566 /* SIMD, including crypto */ 14567 disas_data_proc_simd(s, insn); 14568 } 14569 } 14570 14571 /* 14572 * Include the generated SME FA64 decoder. 14573 */ 14574 14575 #include "decode-sme-fa64.c.inc" 14576 14577 static bool trans_OK(DisasContext *s, arg_OK *a) 14578 { 14579 return true; 14580 } 14581 14582 static bool trans_FAIL(DisasContext *s, arg_OK *a) 14583 { 14584 s->is_nonstreaming = true; 14585 return true; 14586 } 14587 14588 /** 14589 * is_guarded_page: 14590 * @env: The cpu environment 14591 * @s: The DisasContext 14592 * 14593 * Return true if the page is guarded. 14594 */ 14595 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 14596 { 14597 uint64_t addr = s->base.pc_first; 14598 #ifdef CONFIG_USER_ONLY 14599 return page_get_flags(addr) & PAGE_BTI; 14600 #else 14601 CPUTLBEntryFull *full; 14602 void *host; 14603 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 14604 int flags; 14605 14606 /* 14607 * We test this immediately after reading an insn, which means 14608 * that the TLB entry must be present and valid, and thus this 14609 * access will never raise an exception. 14610 */ 14611 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 14612 false, &host, &full, 0); 14613 assert(!(flags & TLB_INVALID_MASK)); 14614 14615 return full->guarded; 14616 #endif 14617 } 14618 14619 /** 14620 * btype_destination_ok: 14621 * @insn: The instruction at the branch destination 14622 * @bt: SCTLR_ELx.BT 14623 * @btype: PSTATE.BTYPE, and is non-zero 14624 * 14625 * On a guarded page, there are a limited number of insns 14626 * that may be present at the branch target: 14627 * - branch target identifiers, 14628 * - paciasp, pacibsp, 14629 * - BRK insn 14630 * - HLT insn 14631 * Anything else causes a Branch Target Exception. 14632 * 14633 * Return true if the branch is compatible, false to raise BTITRAP. 14634 */ 14635 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 14636 { 14637 if ((insn & 0xfffff01fu) == 0xd503201fu) { 14638 /* HINT space */ 14639 switch (extract32(insn, 5, 7)) { 14640 case 0b011001: /* PACIASP */ 14641 case 0b011011: /* PACIBSP */ 14642 /* 14643 * If SCTLR_ELx.BT, then PACI*SP are not compatible 14644 * with btype == 3. Otherwise all btype are ok. 14645 */ 14646 return !bt || btype != 3; 14647 case 0b100000: /* BTI */ 14648 /* Not compatible with any btype. */ 14649 return false; 14650 case 0b100010: /* BTI c */ 14651 /* Not compatible with btype == 3 */ 14652 return btype != 3; 14653 case 0b100100: /* BTI j */ 14654 /* Not compatible with btype == 2 */ 14655 return btype != 2; 14656 case 0b100110: /* BTI jc */ 14657 /* Compatible with any btype. */ 14658 return true; 14659 } 14660 } else { 14661 switch (insn & 0xffe0001fu) { 14662 case 0xd4200000u: /* BRK */ 14663 case 0xd4400000u: /* HLT */ 14664 /* Give priority to the breakpoint exception. */ 14665 return true; 14666 } 14667 } 14668 return false; 14669 } 14670 14671 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 14672 CPUState *cpu) 14673 { 14674 DisasContext *dc = container_of(dcbase, DisasContext, base); 14675 CPUARMState *env = cpu->env_ptr; 14676 ARMCPU *arm_cpu = env_archcpu(env); 14677 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 14678 int bound, core_mmu_idx; 14679 14680 dc->isar = &arm_cpu->isar; 14681 dc->condjmp = 0; 14682 dc->pc_save = dc->base.pc_first; 14683 dc->aarch64 = true; 14684 dc->thumb = false; 14685 dc->sctlr_b = 0; 14686 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 14687 dc->condexec_mask = 0; 14688 dc->condexec_cond = 0; 14689 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 14690 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 14691 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 14692 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 14693 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 14694 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 14695 #if !defined(CONFIG_USER_ONLY) 14696 dc->user = (dc->current_el == 0); 14697 #endif 14698 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 14699 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 14700 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 14701 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 14702 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 14703 dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET); 14704 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 14705 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 14706 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 14707 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 14708 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 14709 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 14710 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 14711 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 14712 dc->ata = EX_TBFLAG_A64(tb_flags, ATA); 14713 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 14714 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 14715 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 14716 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 14717 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 14718 dc->vec_len = 0; 14719 dc->vec_stride = 0; 14720 dc->cp_regs = arm_cpu->cp_regs; 14721 dc->features = env->features; 14722 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 14723 14724 #ifdef CONFIG_USER_ONLY 14725 /* In sve_probe_page, we assume TBI is enabled. */ 14726 tcg_debug_assert(dc->tbid & 1); 14727 #endif 14728 14729 /* Single step state. The code-generation logic here is: 14730 * SS_ACTIVE == 0: 14731 * generate code with no special handling for single-stepping (except 14732 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 14733 * this happens anyway because those changes are all system register or 14734 * PSTATE writes). 14735 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 14736 * emit code for one insn 14737 * emit code to clear PSTATE.SS 14738 * emit code to generate software step exception for completed step 14739 * end TB (as usual for having generated an exception) 14740 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 14741 * emit code to generate a software step exception 14742 * end the TB 14743 */ 14744 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 14745 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 14746 dc->is_ldex = false; 14747 14748 /* Bound the number of insns to execute to those left on the page. */ 14749 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 14750 14751 /* If architectural single step active, limit to 1. */ 14752 if (dc->ss_active) { 14753 bound = 1; 14754 } 14755 dc->base.max_insns = MIN(dc->base.max_insns, bound); 14756 } 14757 14758 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 14759 { 14760 } 14761 14762 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 14763 { 14764 DisasContext *dc = container_of(dcbase, DisasContext, base); 14765 target_ulong pc_arg = dc->base.pc_next; 14766 14767 if (tb_cflags(dcbase->tb) & CF_PCREL) { 14768 pc_arg &= ~TARGET_PAGE_MASK; 14769 } 14770 tcg_gen_insn_start(pc_arg, 0, 0); 14771 dc->insn_start = tcg_last_op(); 14772 } 14773 14774 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 14775 { 14776 DisasContext *s = container_of(dcbase, DisasContext, base); 14777 CPUARMState *env = cpu->env_ptr; 14778 uint64_t pc = s->base.pc_next; 14779 uint32_t insn; 14780 14781 /* Singlestep exceptions have the highest priority. */ 14782 if (s->ss_active && !s->pstate_ss) { 14783 /* Singlestep state is Active-pending. 14784 * If we're in this state at the start of a TB then either 14785 * a) we just took an exception to an EL which is being debugged 14786 * and this is the first insn in the exception handler 14787 * b) debug exceptions were masked and we just unmasked them 14788 * without changing EL (eg by clearing PSTATE.D) 14789 * In either case we're going to take a swstep exception in the 14790 * "did not step an insn" case, and so the syndrome ISV and EX 14791 * bits should be zero. 14792 */ 14793 assert(s->base.num_insns == 1); 14794 gen_swstep_exception(s, 0, 0); 14795 s->base.is_jmp = DISAS_NORETURN; 14796 s->base.pc_next = pc + 4; 14797 return; 14798 } 14799 14800 if (pc & 3) { 14801 /* 14802 * PC alignment fault. This has priority over the instruction abort 14803 * that we would receive from a translation fault via arm_ldl_code. 14804 * This should only be possible after an indirect branch, at the 14805 * start of the TB. 14806 */ 14807 assert(s->base.num_insns == 1); 14808 gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc)); 14809 s->base.is_jmp = DISAS_NORETURN; 14810 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 14811 return; 14812 } 14813 14814 s->pc_curr = pc; 14815 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14816 s->insn = insn; 14817 s->base.pc_next = pc + 4; 14818 14819 s->fp_access_checked = false; 14820 s->sve_access_checked = false; 14821 14822 if (s->pstate_il) { 14823 /* 14824 * Illegal execution state. This has priority over BTI 14825 * exceptions, but comes after instruction abort exceptions. 14826 */ 14827 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14828 return; 14829 } 14830 14831 if (dc_isar_feature(aa64_bti, s)) { 14832 if (s->base.num_insns == 1) { 14833 /* 14834 * At the first insn of the TB, compute s->guarded_page. 14835 * We delayed computing this until successfully reading 14836 * the first insn of the TB, above. This (mostly) ensures 14837 * that the softmmu tlb entry has been populated, and the 14838 * page table GP bit is available. 14839 * 14840 * Note that we need to compute this even if btype == 0, 14841 * because this value is used for BR instructions later 14842 * where ENV is not available. 14843 */ 14844 s->guarded_page = is_guarded_page(env, s); 14845 14846 /* First insn can have btype set to non-zero. */ 14847 tcg_debug_assert(s->btype >= 0); 14848 14849 /* 14850 * Note that the Branch Target Exception has fairly high 14851 * priority -- below debugging exceptions but above most 14852 * everything else. This allows us to handle this now 14853 * instead of waiting until the insn is otherwise decoded. 14854 */ 14855 if (s->btype != 0 14856 && s->guarded_page 14857 && !btype_destination_ok(insn, s->bt, s->btype)) { 14858 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14859 return; 14860 } 14861 } else { 14862 /* Not the first insn: btype must be 0. */ 14863 tcg_debug_assert(s->btype == 0); 14864 } 14865 } 14866 14867 s->is_nonstreaming = false; 14868 if (s->sme_trap_nonstreaming) { 14869 disas_sme_fa64(s, insn); 14870 } 14871 14872 switch (extract32(insn, 25, 4)) { 14873 case 0x0: 14874 if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { 14875 unallocated_encoding(s); 14876 } 14877 break; 14878 case 0x1: case 0x3: /* UNALLOCATED */ 14879 unallocated_encoding(s); 14880 break; 14881 case 0x2: 14882 if (!disas_sve(s, insn)) { 14883 unallocated_encoding(s); 14884 } 14885 break; 14886 case 0x8: case 0x9: /* Data processing - immediate */ 14887 disas_data_proc_imm(s, insn); 14888 break; 14889 case 0xa: case 0xb: /* Branch, exception generation and system insns */ 14890 disas_b_exc_sys(s, insn); 14891 break; 14892 case 0x4: 14893 case 0x6: 14894 case 0xc: 14895 case 0xe: /* Loads and stores */ 14896 disas_ldst(s, insn); 14897 break; 14898 case 0x5: 14899 case 0xd: /* Data processing - register */ 14900 disas_data_proc_reg(s, insn); 14901 break; 14902 case 0x7: 14903 case 0xf: /* Data processing - SIMD and floating point */ 14904 disas_data_proc_simd_fp(s, insn); 14905 break; 14906 default: 14907 assert(FALSE); /* all 15 cases should be handled above */ 14908 break; 14909 } 14910 14911 /* 14912 * After execution of most insns, btype is reset to 0. 14913 * Note that we set btype == -1 when the insn sets btype. 14914 */ 14915 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14916 reset_btype(s); 14917 } 14918 } 14919 14920 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14921 { 14922 DisasContext *dc = container_of(dcbase, DisasContext, base); 14923 14924 if (unlikely(dc->ss_active)) { 14925 /* Note that this means single stepping WFI doesn't halt the CPU. 14926 * For conditional branch insns this is harmless unreachable code as 14927 * gen_goto_tb() has already handled emitting the debug exception 14928 * (and thus a tb-jump is not possible when singlestepping). 14929 */ 14930 switch (dc->base.is_jmp) { 14931 default: 14932 gen_a64_update_pc(dc, 4); 14933 /* fall through */ 14934 case DISAS_EXIT: 14935 case DISAS_JUMP: 14936 gen_step_complete_exception(dc); 14937 break; 14938 case DISAS_NORETURN: 14939 break; 14940 } 14941 } else { 14942 switch (dc->base.is_jmp) { 14943 case DISAS_NEXT: 14944 case DISAS_TOO_MANY: 14945 gen_goto_tb(dc, 1, 4); 14946 break; 14947 default: 14948 case DISAS_UPDATE_EXIT: 14949 gen_a64_update_pc(dc, 4); 14950 /* fall through */ 14951 case DISAS_EXIT: 14952 tcg_gen_exit_tb(NULL, 0); 14953 break; 14954 case DISAS_UPDATE_NOCHAIN: 14955 gen_a64_update_pc(dc, 4); 14956 /* fall through */ 14957 case DISAS_JUMP: 14958 tcg_gen_lookup_and_goto_ptr(); 14959 break; 14960 case DISAS_NORETURN: 14961 case DISAS_SWI: 14962 break; 14963 case DISAS_WFE: 14964 gen_a64_update_pc(dc, 4); 14965 gen_helper_wfe(cpu_env); 14966 break; 14967 case DISAS_YIELD: 14968 gen_a64_update_pc(dc, 4); 14969 gen_helper_yield(cpu_env); 14970 break; 14971 case DISAS_WFI: 14972 /* 14973 * This is a special case because we don't want to just halt 14974 * the CPU if trying to debug across a WFI. 14975 */ 14976 gen_a64_update_pc(dc, 4); 14977 gen_helper_wfi(cpu_env, tcg_constant_i32(4)); 14978 /* 14979 * The helper doesn't necessarily throw an exception, but we 14980 * must go back to the main loop to check for interrupts anyway. 14981 */ 14982 tcg_gen_exit_tb(NULL, 0); 14983 break; 14984 } 14985 } 14986 } 14987 14988 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 14989 CPUState *cpu, FILE *logfile) 14990 { 14991 DisasContext *dc = container_of(dcbase, DisasContext, base); 14992 14993 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 14994 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 14995 } 14996 14997 const TranslatorOps aarch64_translator_ops = { 14998 .init_disas_context = aarch64_tr_init_disas_context, 14999 .tb_start = aarch64_tr_tb_start, 15000 .insn_start = aarch64_tr_insn_start, 15001 .translate_insn = aarch64_tr_translate_insn, 15002 .tb_stop = aarch64_tr_tb_stop, 15003 .disas_log = aarch64_tr_disas_log, 15004 }; 15005