1 /* 2 * AArch64 translation 3 * 4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de> 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 #include "qemu/osdep.h" 20 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "tcg/tcg-op.h" 24 #include "tcg/tcg-op-gvec.h" 25 #include "qemu/log.h" 26 #include "arm_ldst.h" 27 #include "translate.h" 28 #include "internals.h" 29 #include "qemu/host-utils.h" 30 #include "semihosting/semihost.h" 31 #include "exec/gen-icount.h" 32 #include "exec/helper-proto.h" 33 #include "exec/helper-gen.h" 34 #include "exec/log.h" 35 #include "cpregs.h" 36 #include "translate-a64.h" 37 #include "qemu/atomic128.h" 38 39 static TCGv_i64 cpu_X[32]; 40 static TCGv_i64 cpu_pc; 41 42 /* Load/store exclusive handling */ 43 static TCGv_i64 cpu_exclusive_high; 44 45 static const char *regnames[] = { 46 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", 47 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", 48 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", 49 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp" 50 }; 51 52 enum a64_shift_type { 53 A64_SHIFT_TYPE_LSL = 0, 54 A64_SHIFT_TYPE_LSR = 1, 55 A64_SHIFT_TYPE_ASR = 2, 56 A64_SHIFT_TYPE_ROR = 3 57 }; 58 59 /* Table based decoder typedefs - used when the relevant bits for decode 60 * are too awkwardly scattered across the instruction (eg SIMD). 61 */ 62 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn); 63 64 typedef struct AArch64DecodeTable { 65 uint32_t pattern; 66 uint32_t mask; 67 AArch64DecodeFn *disas_fn; 68 } AArch64DecodeTable; 69 70 /* initialize TCG globals. */ 71 void a64_translate_init(void) 72 { 73 int i; 74 75 cpu_pc = tcg_global_mem_new_i64(cpu_env, 76 offsetof(CPUARMState, pc), 77 "pc"); 78 for (i = 0; i < 32; i++) { 79 cpu_X[i] = tcg_global_mem_new_i64(cpu_env, 80 offsetof(CPUARMState, xregs[i]), 81 regnames[i]); 82 } 83 84 cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env, 85 offsetof(CPUARMState, exclusive_high), "exclusive_high"); 86 } 87 88 /* 89 * Return the core mmu_idx to use for A64 "unprivileged load/store" insns 90 */ 91 static int get_a64_user_mem_index(DisasContext *s) 92 { 93 /* 94 * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, 95 * which is the usual mmu_idx for this cpu state. 96 */ 97 ARMMMUIdx useridx = s->mmu_idx; 98 99 if (s->unpriv) { 100 /* 101 * We have pre-computed the condition for AccType_UNPRIV. 102 * Therefore we should never get here with a mmu_idx for 103 * which we do not know the corresponding user mmu_idx. 104 */ 105 switch (useridx) { 106 case ARMMMUIdx_E10_1: 107 case ARMMMUIdx_E10_1_PAN: 108 useridx = ARMMMUIdx_E10_0; 109 break; 110 case ARMMMUIdx_E20_2: 111 case ARMMMUIdx_E20_2_PAN: 112 useridx = ARMMMUIdx_E20_0; 113 break; 114 default: 115 g_assert_not_reached(); 116 } 117 } 118 return arm_to_core_mmu_idx(useridx); 119 } 120 121 static void set_btype_raw(int val) 122 { 123 tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, 124 offsetof(CPUARMState, btype)); 125 } 126 127 static void set_btype(DisasContext *s, int val) 128 { 129 /* BTYPE is a 2-bit field, and 0 should be done with reset_btype. */ 130 tcg_debug_assert(val >= 1 && val <= 3); 131 set_btype_raw(val); 132 s->btype = -1; 133 } 134 135 static void reset_btype(DisasContext *s) 136 { 137 if (s->btype != 0) { 138 set_btype_raw(0); 139 s->btype = 0; 140 } 141 } 142 143 static void gen_pc_plus_diff(DisasContext *s, TCGv_i64 dest, target_long diff) 144 { 145 assert(s->pc_save != -1); 146 if (tb_cflags(s->base.tb) & CF_PCREL) { 147 tcg_gen_addi_i64(dest, cpu_pc, (s->pc_curr - s->pc_save) + diff); 148 } else { 149 tcg_gen_movi_i64(dest, s->pc_curr + diff); 150 } 151 } 152 153 void gen_a64_update_pc(DisasContext *s, target_long diff) 154 { 155 gen_pc_plus_diff(s, cpu_pc, diff); 156 s->pc_save = s->pc_curr + diff; 157 } 158 159 /* 160 * Handle Top Byte Ignore (TBI) bits. 161 * 162 * If address tagging is enabled via the TCR TBI bits: 163 * + for EL2 and EL3 there is only one TBI bit, and if it is set 164 * then the address is zero-extended, clearing bits [63:56] 165 * + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0 166 * and TBI1 controls addressses with bit 55 == 1. 167 * If the appropriate TBI bit is set for the address then 168 * the address is sign-extended from bit 55 into bits [63:56] 169 * 170 * Here We have concatenated TBI{1,0} into tbi. 171 */ 172 static void gen_top_byte_ignore(DisasContext *s, TCGv_i64 dst, 173 TCGv_i64 src, int tbi) 174 { 175 if (tbi == 0) { 176 /* Load unmodified address */ 177 tcg_gen_mov_i64(dst, src); 178 } else if (!regime_has_2_ranges(s->mmu_idx)) { 179 /* Force tag byte to all zero */ 180 tcg_gen_extract_i64(dst, src, 0, 56); 181 } else { 182 /* Sign-extend from bit 55. */ 183 tcg_gen_sextract_i64(dst, src, 0, 56); 184 185 switch (tbi) { 186 case 1: 187 /* tbi0 but !tbi1: only use the extension if positive */ 188 tcg_gen_and_i64(dst, dst, src); 189 break; 190 case 2: 191 /* !tbi0 but tbi1: only use the extension if negative */ 192 tcg_gen_or_i64(dst, dst, src); 193 break; 194 case 3: 195 /* tbi0 and tbi1: always use the extension */ 196 break; 197 default: 198 g_assert_not_reached(); 199 } 200 } 201 } 202 203 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) 204 { 205 /* 206 * If address tagging is enabled for instructions via the TCR TBI bits, 207 * then loading an address into the PC will clear out any tag. 208 */ 209 gen_top_byte_ignore(s, cpu_pc, src, s->tbii); 210 s->pc_save = -1; 211 } 212 213 /* 214 * Handle MTE and/or TBI. 215 * 216 * For TBI, ideally, we would do nothing. Proper behaviour on fault is 217 * for the tag to be present in the FAR_ELx register. But for user-only 218 * mode we do not have a TLB with which to implement this, so we must 219 * remove the top byte now. 220 * 221 * Always return a fresh temporary that we can increment independently 222 * of the write-back address. 223 */ 224 225 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) 226 { 227 TCGv_i64 clean = tcg_temp_new_i64(); 228 #ifdef CONFIG_USER_ONLY 229 gen_top_byte_ignore(s, clean, addr, s->tbid); 230 #else 231 tcg_gen_mov_i64(clean, addr); 232 #endif 233 return clean; 234 } 235 236 /* Insert a zero tag into src, with the result at dst. */ 237 static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) 238 { 239 tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); 240 } 241 242 static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, 243 MMUAccessType acc, int log2_size) 244 { 245 gen_helper_probe_access(cpu_env, ptr, 246 tcg_constant_i32(acc), 247 tcg_constant_i32(get_mem_index(s)), 248 tcg_constant_i32(1 << log2_size)); 249 } 250 251 /* 252 * For MTE, check a single logical or atomic access. This probes a single 253 * address, the exact one specified. The size and alignment of the access 254 * is not relevant to MTE, per se, but watchpoints do require the size, 255 * and we want to recognize those before making any other changes to state. 256 */ 257 static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, 258 bool is_write, bool tag_checked, 259 int log2_size, bool is_unpriv, 260 int core_idx) 261 { 262 if (tag_checked && s->mte_active[is_unpriv]) { 263 TCGv_i64 ret; 264 int desc = 0; 265 266 desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); 267 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 268 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 269 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 270 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1); 271 272 ret = tcg_temp_new_i64(); 273 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 274 275 return ret; 276 } 277 return clean_data_tbi(s, addr); 278 } 279 280 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, 281 bool tag_checked, int log2_size) 282 { 283 return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, 284 false, get_mem_index(s)); 285 } 286 287 /* 288 * For MTE, check multiple logical sequential accesses. 289 */ 290 TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, 291 bool tag_checked, int size) 292 { 293 if (tag_checked && s->mte_active[0]) { 294 TCGv_i64 ret; 295 int desc = 0; 296 297 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 298 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 299 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 300 desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); 301 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1); 302 303 ret = tcg_temp_new_i64(); 304 gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr); 305 306 return ret; 307 } 308 return clean_data_tbi(s, addr); 309 } 310 311 typedef struct DisasCompare64 { 312 TCGCond cond; 313 TCGv_i64 value; 314 } DisasCompare64; 315 316 static void a64_test_cc(DisasCompare64 *c64, int cc) 317 { 318 DisasCompare c32; 319 320 arm_test_cc(&c32, cc); 321 322 /* 323 * Sign-extend the 32-bit value so that the GE/LT comparisons work 324 * properly. The NE/EQ comparisons are also fine with this choice. 325 */ 326 c64->cond = c32.cond; 327 c64->value = tcg_temp_new_i64(); 328 tcg_gen_ext_i32_i64(c64->value, c32.value); 329 } 330 331 static void gen_rebuild_hflags(DisasContext *s) 332 { 333 gen_helper_rebuild_hflags_a64(cpu_env, tcg_constant_i32(s->current_el)); 334 } 335 336 static void gen_exception_internal(int excp) 337 { 338 assert(excp_is_internal(excp)); 339 gen_helper_exception_internal(cpu_env, tcg_constant_i32(excp)); 340 } 341 342 static void gen_exception_internal_insn(DisasContext *s, int excp) 343 { 344 gen_a64_update_pc(s, 0); 345 gen_exception_internal(excp); 346 s->base.is_jmp = DISAS_NORETURN; 347 } 348 349 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syndrome) 350 { 351 gen_a64_update_pc(s, 0); 352 gen_helper_exception_bkpt_insn(cpu_env, tcg_constant_i32(syndrome)); 353 s->base.is_jmp = DISAS_NORETURN; 354 } 355 356 static void gen_step_complete_exception(DisasContext *s) 357 { 358 /* We just completed step of an insn. Move from Active-not-pending 359 * to Active-pending, and then also take the swstep exception. 360 * This corresponds to making the (IMPDEF) choice to prioritize 361 * swstep exceptions over asynchronous exceptions taken to an exception 362 * level where debug is disabled. This choice has the advantage that 363 * we do not need to maintain internal state corresponding to the 364 * ISV/EX syndrome bits between completion of the step and generation 365 * of the exception, and our syndrome information is always correct. 366 */ 367 gen_ss_advance(s); 368 gen_swstep_exception(s, 1, s->is_ldex); 369 s->base.is_jmp = DISAS_NORETURN; 370 } 371 372 static inline bool use_goto_tb(DisasContext *s, uint64_t dest) 373 { 374 if (s->ss_active) { 375 return false; 376 } 377 return translator_use_goto_tb(&s->base, dest); 378 } 379 380 static void gen_goto_tb(DisasContext *s, int n, int64_t diff) 381 { 382 if (use_goto_tb(s, s->pc_curr + diff)) { 383 /* 384 * For pcrel, the pc must always be up-to-date on entry to 385 * the linked TB, so that it can use simple additions for all 386 * further adjustments. For !pcrel, the linked TB is compiled 387 * to know its full virtual address, so we can delay the 388 * update to pc to the unlinked path. A long chain of links 389 * can thus avoid many updates to the PC. 390 */ 391 if (tb_cflags(s->base.tb) & CF_PCREL) { 392 gen_a64_update_pc(s, diff); 393 tcg_gen_goto_tb(n); 394 } else { 395 tcg_gen_goto_tb(n); 396 gen_a64_update_pc(s, diff); 397 } 398 tcg_gen_exit_tb(s->base.tb, n); 399 s->base.is_jmp = DISAS_NORETURN; 400 } else { 401 gen_a64_update_pc(s, diff); 402 if (s->ss_active) { 403 gen_step_complete_exception(s); 404 } else { 405 tcg_gen_lookup_and_goto_ptr(); 406 s->base.is_jmp = DISAS_NORETURN; 407 } 408 } 409 } 410 411 TCGv_i64 new_tmp_a64_zero(DisasContext *s) 412 { 413 TCGv_i64 t = tcg_temp_new_i64(); 414 tcg_gen_movi_i64(t, 0); 415 return t; 416 } 417 418 /* 419 * Register access functions 420 * 421 * These functions are used for directly accessing a register in where 422 * changes to the final register value are likely to be made. If you 423 * need to use a register for temporary calculation (e.g. index type 424 * operations) use the read_* form. 425 * 426 * B1.2.1 Register mappings 427 * 428 * In instruction register encoding 31 can refer to ZR (zero register) or 429 * the SP (stack pointer) depending on context. In QEMU's case we map SP 430 * to cpu_X[31] and ZR accesses to a temporary which can be discarded. 431 * This is the point of the _sp forms. 432 */ 433 TCGv_i64 cpu_reg(DisasContext *s, int reg) 434 { 435 if (reg == 31) { 436 return new_tmp_a64_zero(s); 437 } else { 438 return cpu_X[reg]; 439 } 440 } 441 442 /* register access for when 31 == SP */ 443 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg) 444 { 445 return cpu_X[reg]; 446 } 447 448 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64 449 * representing the register contents. This TCGv is an auto-freed 450 * temporary so it need not be explicitly freed, and may be modified. 451 */ 452 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf) 453 { 454 TCGv_i64 v = tcg_temp_new_i64(); 455 if (reg != 31) { 456 if (sf) { 457 tcg_gen_mov_i64(v, cpu_X[reg]); 458 } else { 459 tcg_gen_ext32u_i64(v, cpu_X[reg]); 460 } 461 } else { 462 tcg_gen_movi_i64(v, 0); 463 } 464 return v; 465 } 466 467 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf) 468 { 469 TCGv_i64 v = tcg_temp_new_i64(); 470 if (sf) { 471 tcg_gen_mov_i64(v, cpu_X[reg]); 472 } else { 473 tcg_gen_ext32u_i64(v, cpu_X[reg]); 474 } 475 return v; 476 } 477 478 /* Return the offset into CPUARMState of a slice (from 479 * the least significant end) of FP register Qn (ie 480 * Dn, Sn, Hn or Bn). 481 * (Note that this is not the same mapping as for A32; see cpu.h) 482 */ 483 static inline int fp_reg_offset(DisasContext *s, int regno, MemOp size) 484 { 485 return vec_reg_offset(s, regno, 0, size); 486 } 487 488 /* Offset of the high half of the 128 bit vector Qn */ 489 static inline int fp_reg_hi_offset(DisasContext *s, int regno) 490 { 491 return vec_reg_offset(s, regno, 1, MO_64); 492 } 493 494 /* Convenience accessors for reading and writing single and double 495 * FP registers. Writing clears the upper parts of the associated 496 * 128 bit vector register, as required by the architecture. 497 * Note that unlike the GP register accessors, the values returned 498 * by the read functions must be manually freed. 499 */ 500 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg) 501 { 502 TCGv_i64 v = tcg_temp_new_i64(); 503 504 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64)); 505 return v; 506 } 507 508 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg) 509 { 510 TCGv_i32 v = tcg_temp_new_i32(); 511 512 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32)); 513 return v; 514 } 515 516 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg) 517 { 518 TCGv_i32 v = tcg_temp_new_i32(); 519 520 tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16)); 521 return v; 522 } 523 524 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64). 525 * If SVE is not enabled, then there are only 128 bits in the vector. 526 */ 527 static void clear_vec_high(DisasContext *s, bool is_q, int rd) 528 { 529 unsigned ofs = fp_reg_offset(s, rd, MO_64); 530 unsigned vsz = vec_full_reg_size(s); 531 532 /* Nop move, with side effect of clearing the tail. */ 533 tcg_gen_gvec_mov(MO_64, ofs, ofs, is_q ? 16 : 8, vsz); 534 } 535 536 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v) 537 { 538 unsigned ofs = fp_reg_offset(s, reg, MO_64); 539 540 tcg_gen_st_i64(v, cpu_env, ofs); 541 clear_vec_high(s, false, reg); 542 } 543 544 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v) 545 { 546 TCGv_i64 tmp = tcg_temp_new_i64(); 547 548 tcg_gen_extu_i32_i64(tmp, v); 549 write_fp_dreg(s, reg, tmp); 550 tcg_temp_free_i64(tmp); 551 } 552 553 /* Expand a 2-operand AdvSIMD vector operation using an expander function. */ 554 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn, 555 GVecGen2Fn *gvec_fn, int vece) 556 { 557 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 558 is_q ? 16 : 8, vec_full_reg_size(s)); 559 } 560 561 /* Expand a 2-operand + immediate AdvSIMD vector operation using 562 * an expander function. 563 */ 564 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn, 565 int64_t imm, GVecGen2iFn *gvec_fn, int vece) 566 { 567 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 568 imm, is_q ? 16 : 8, vec_full_reg_size(s)); 569 } 570 571 /* Expand a 3-operand AdvSIMD vector operation using an expander function. */ 572 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm, 573 GVecGen3Fn *gvec_fn, int vece) 574 { 575 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 576 vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s)); 577 } 578 579 /* Expand a 4-operand AdvSIMD vector operation using an expander function. */ 580 static void gen_gvec_fn4(DisasContext *s, bool is_q, int rd, int rn, int rm, 581 int rx, GVecGen4Fn *gvec_fn, int vece) 582 { 583 gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn), 584 vec_full_reg_offset(s, rm), vec_full_reg_offset(s, rx), 585 is_q ? 16 : 8, vec_full_reg_size(s)); 586 } 587 588 /* Expand a 2-operand operation using an out-of-line helper. */ 589 static void gen_gvec_op2_ool(DisasContext *s, bool is_q, int rd, 590 int rn, int data, gen_helper_gvec_2 *fn) 591 { 592 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd), 593 vec_full_reg_offset(s, rn), 594 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 595 } 596 597 /* Expand a 3-operand operation using an out-of-line helper. */ 598 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd, 599 int rn, int rm, int data, gen_helper_gvec_3 *fn) 600 { 601 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 602 vec_full_reg_offset(s, rn), 603 vec_full_reg_offset(s, rm), 604 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 605 } 606 607 /* Expand a 3-operand + fpstatus pointer + simd data value operation using 608 * an out-of-line helper. 609 */ 610 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, 611 int rm, bool is_fp16, int data, 612 gen_helper_gvec_3_ptr *fn) 613 { 614 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 615 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 616 vec_full_reg_offset(s, rn), 617 vec_full_reg_offset(s, rm), fpst, 618 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 619 tcg_temp_free_ptr(fpst); 620 } 621 622 /* Expand a 3-operand + qc + operation using an out-of-line helper. */ 623 static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, 624 int rm, gen_helper_gvec_3_ptr *fn) 625 { 626 TCGv_ptr qc_ptr = tcg_temp_new_ptr(); 627 628 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc)); 629 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 630 vec_full_reg_offset(s, rn), 631 vec_full_reg_offset(s, rm), qc_ptr, 632 is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); 633 tcg_temp_free_ptr(qc_ptr); 634 } 635 636 /* Expand a 4-operand operation using an out-of-line helper. */ 637 static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, 638 int rm, int ra, int data, gen_helper_gvec_4 *fn) 639 { 640 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 641 vec_full_reg_offset(s, rn), 642 vec_full_reg_offset(s, rm), 643 vec_full_reg_offset(s, ra), 644 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 645 } 646 647 /* 648 * Expand a 4-operand + fpstatus pointer + simd data value operation using 649 * an out-of-line helper. 650 */ 651 static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn, 652 int rm, int ra, bool is_fp16, int data, 653 gen_helper_gvec_4_ptr *fn) 654 { 655 TCGv_ptr fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 656 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 657 vec_full_reg_offset(s, rn), 658 vec_full_reg_offset(s, rm), 659 vec_full_reg_offset(s, ra), fpst, 660 is_q ? 16 : 8, vec_full_reg_size(s), data, fn); 661 tcg_temp_free_ptr(fpst); 662 } 663 664 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier 665 * than the 32 bit equivalent. 666 */ 667 static inline void gen_set_NZ64(TCGv_i64 result) 668 { 669 tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result); 670 tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF); 671 } 672 673 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */ 674 static inline void gen_logic_CC(int sf, TCGv_i64 result) 675 { 676 if (sf) { 677 gen_set_NZ64(result); 678 } else { 679 tcg_gen_extrl_i64_i32(cpu_ZF, result); 680 tcg_gen_mov_i32(cpu_NF, cpu_ZF); 681 } 682 tcg_gen_movi_i32(cpu_CF, 0); 683 tcg_gen_movi_i32(cpu_VF, 0); 684 } 685 686 /* dest = T0 + T1; compute C, N, V and Z flags */ 687 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 688 { 689 if (sf) { 690 TCGv_i64 result, flag, tmp; 691 result = tcg_temp_new_i64(); 692 flag = tcg_temp_new_i64(); 693 tmp = tcg_temp_new_i64(); 694 695 tcg_gen_movi_i64(tmp, 0); 696 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); 697 698 tcg_gen_extrl_i64_i32(cpu_CF, flag); 699 700 gen_set_NZ64(result); 701 702 tcg_gen_xor_i64(flag, result, t0); 703 tcg_gen_xor_i64(tmp, t0, t1); 704 tcg_gen_andc_i64(flag, flag, tmp); 705 tcg_temp_free_i64(tmp); 706 tcg_gen_extrh_i64_i32(cpu_VF, flag); 707 708 tcg_gen_mov_i64(dest, result); 709 tcg_temp_free_i64(result); 710 tcg_temp_free_i64(flag); 711 } else { 712 /* 32 bit arithmetic */ 713 TCGv_i32 t0_32 = tcg_temp_new_i32(); 714 TCGv_i32 t1_32 = tcg_temp_new_i32(); 715 TCGv_i32 tmp = tcg_temp_new_i32(); 716 717 tcg_gen_movi_i32(tmp, 0); 718 tcg_gen_extrl_i64_i32(t0_32, t0); 719 tcg_gen_extrl_i64_i32(t1_32, t1); 720 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); 721 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 722 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 723 tcg_gen_xor_i32(tmp, t0_32, t1_32); 724 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 725 tcg_gen_extu_i32_i64(dest, cpu_NF); 726 727 tcg_temp_free_i32(tmp); 728 tcg_temp_free_i32(t0_32); 729 tcg_temp_free_i32(t1_32); 730 } 731 } 732 733 /* dest = T0 - T1; compute C, N, V and Z flags */ 734 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 735 { 736 if (sf) { 737 /* 64 bit arithmetic */ 738 TCGv_i64 result, flag, tmp; 739 740 result = tcg_temp_new_i64(); 741 flag = tcg_temp_new_i64(); 742 tcg_gen_sub_i64(result, t0, t1); 743 744 gen_set_NZ64(result); 745 746 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); 747 tcg_gen_extrl_i64_i32(cpu_CF, flag); 748 749 tcg_gen_xor_i64(flag, result, t0); 750 tmp = tcg_temp_new_i64(); 751 tcg_gen_xor_i64(tmp, t0, t1); 752 tcg_gen_and_i64(flag, flag, tmp); 753 tcg_temp_free_i64(tmp); 754 tcg_gen_extrh_i64_i32(cpu_VF, flag); 755 tcg_gen_mov_i64(dest, result); 756 tcg_temp_free_i64(flag); 757 tcg_temp_free_i64(result); 758 } else { 759 /* 32 bit arithmetic */ 760 TCGv_i32 t0_32 = tcg_temp_new_i32(); 761 TCGv_i32 t1_32 = tcg_temp_new_i32(); 762 TCGv_i32 tmp; 763 764 tcg_gen_extrl_i64_i32(t0_32, t0); 765 tcg_gen_extrl_i64_i32(t1_32, t1); 766 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); 767 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 768 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); 769 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 770 tmp = tcg_temp_new_i32(); 771 tcg_gen_xor_i32(tmp, t0_32, t1_32); 772 tcg_temp_free_i32(t0_32); 773 tcg_temp_free_i32(t1_32); 774 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp); 775 tcg_temp_free_i32(tmp); 776 tcg_gen_extu_i32_i64(dest, cpu_NF); 777 } 778 } 779 780 /* dest = T0 + T1 + CF; do not compute flags. */ 781 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 782 { 783 TCGv_i64 flag = tcg_temp_new_i64(); 784 tcg_gen_extu_i32_i64(flag, cpu_CF); 785 tcg_gen_add_i64(dest, t0, t1); 786 tcg_gen_add_i64(dest, dest, flag); 787 tcg_temp_free_i64(flag); 788 789 if (!sf) { 790 tcg_gen_ext32u_i64(dest, dest); 791 } 792 } 793 794 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */ 795 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) 796 { 797 if (sf) { 798 TCGv_i64 result = tcg_temp_new_i64(); 799 TCGv_i64 cf_64 = tcg_temp_new_i64(); 800 TCGv_i64 vf_64 = tcg_temp_new_i64(); 801 TCGv_i64 tmp = tcg_temp_new_i64(); 802 TCGv_i64 zero = tcg_constant_i64(0); 803 804 tcg_gen_extu_i32_i64(cf_64, cpu_CF); 805 tcg_gen_add2_i64(result, cf_64, t0, zero, cf_64, zero); 806 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, zero); 807 tcg_gen_extrl_i64_i32(cpu_CF, cf_64); 808 gen_set_NZ64(result); 809 810 tcg_gen_xor_i64(vf_64, result, t0); 811 tcg_gen_xor_i64(tmp, t0, t1); 812 tcg_gen_andc_i64(vf_64, vf_64, tmp); 813 tcg_gen_extrh_i64_i32(cpu_VF, vf_64); 814 815 tcg_gen_mov_i64(dest, result); 816 817 tcg_temp_free_i64(tmp); 818 tcg_temp_free_i64(vf_64); 819 tcg_temp_free_i64(cf_64); 820 tcg_temp_free_i64(result); 821 } else { 822 TCGv_i32 t0_32 = tcg_temp_new_i32(); 823 TCGv_i32 t1_32 = tcg_temp_new_i32(); 824 TCGv_i32 tmp = tcg_temp_new_i32(); 825 TCGv_i32 zero = tcg_constant_i32(0); 826 827 tcg_gen_extrl_i64_i32(t0_32, t0); 828 tcg_gen_extrl_i64_i32(t1_32, t1); 829 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, zero, cpu_CF, zero); 830 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, zero); 831 832 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 833 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); 834 tcg_gen_xor_i32(tmp, t0_32, t1_32); 835 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp); 836 tcg_gen_extu_i32_i64(dest, cpu_NF); 837 838 tcg_temp_free_i32(tmp); 839 tcg_temp_free_i32(t1_32); 840 tcg_temp_free_i32(t0_32); 841 } 842 } 843 844 /* 845 * Load/Store generators 846 */ 847 848 /* 849 * Store from GPR register to memory. 850 */ 851 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source, 852 TCGv_i64 tcg_addr, MemOp memop, int memidx, 853 bool iss_valid, 854 unsigned int iss_srt, 855 bool iss_sf, bool iss_ar) 856 { 857 memop = finalize_memop(s, memop); 858 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop); 859 860 if (iss_valid) { 861 uint32_t syn; 862 863 syn = syn_data_abort_with_iss(0, 864 (memop & MO_SIZE), 865 false, 866 iss_srt, 867 iss_sf, 868 iss_ar, 869 0, 0, 0, 0, 0, false); 870 disas_set_insn_syndrome(s, syn); 871 } 872 } 873 874 static void do_gpr_st(DisasContext *s, TCGv_i64 source, 875 TCGv_i64 tcg_addr, MemOp memop, 876 bool iss_valid, 877 unsigned int iss_srt, 878 bool iss_sf, bool iss_ar) 879 { 880 do_gpr_st_memidx(s, source, tcg_addr, memop, get_mem_index(s), 881 iss_valid, iss_srt, iss_sf, iss_ar); 882 } 883 884 /* 885 * Load from memory to GPR register 886 */ 887 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 888 MemOp memop, bool extend, int memidx, 889 bool iss_valid, unsigned int iss_srt, 890 bool iss_sf, bool iss_ar) 891 { 892 memop = finalize_memop(s, memop); 893 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop); 894 895 if (extend && (memop & MO_SIGN)) { 896 g_assert((memop & MO_SIZE) <= MO_32); 897 tcg_gen_ext32u_i64(dest, dest); 898 } 899 900 if (iss_valid) { 901 uint32_t syn; 902 903 syn = syn_data_abort_with_iss(0, 904 (memop & MO_SIZE), 905 (memop & MO_SIGN) != 0, 906 iss_srt, 907 iss_sf, 908 iss_ar, 909 0, 0, 0, 0, 0, false); 910 disas_set_insn_syndrome(s, syn); 911 } 912 } 913 914 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, 915 MemOp memop, bool extend, 916 bool iss_valid, unsigned int iss_srt, 917 bool iss_sf, bool iss_ar) 918 { 919 do_gpr_ld_memidx(s, dest, tcg_addr, memop, extend, get_mem_index(s), 920 iss_valid, iss_srt, iss_sf, iss_ar); 921 } 922 923 /* 924 * Store from FP register to memory 925 */ 926 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) 927 { 928 /* This writes the bottom N bits of a 128 bit wide vector to memory */ 929 TCGv_i64 tmplo = tcg_temp_new_i64(); 930 MemOp mop; 931 932 tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64)); 933 934 if (size < 4) { 935 mop = finalize_memop(s, size); 936 tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop); 937 } else { 938 bool be = s->be_data == MO_BE; 939 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64(); 940 TCGv_i64 tmphi = tcg_temp_new_i64(); 941 942 tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx)); 943 944 mop = s->be_data | MO_UQ; 945 tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 946 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 947 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 948 tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr, 949 get_mem_index(s), mop); 950 951 tcg_temp_free_i64(tcg_hiaddr); 952 tcg_temp_free_i64(tmphi); 953 } 954 955 tcg_temp_free_i64(tmplo); 956 } 957 958 /* 959 * Load from memory to FP register 960 */ 961 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) 962 { 963 /* This always zero-extends and writes to a full 128 bit wide vector */ 964 TCGv_i64 tmplo = tcg_temp_new_i64(); 965 TCGv_i64 tmphi = NULL; 966 MemOp mop; 967 968 if (size < 4) { 969 mop = finalize_memop(s, size); 970 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop); 971 } else { 972 bool be = s->be_data == MO_BE; 973 TCGv_i64 tcg_hiaddr; 974 975 tmphi = tcg_temp_new_i64(); 976 tcg_hiaddr = tcg_temp_new_i64(); 977 978 mop = s->be_data | MO_UQ; 979 tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s), 980 mop | (s->align_mem ? MO_ALIGN_16 : 0)); 981 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8); 982 tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr, 983 get_mem_index(s), mop); 984 tcg_temp_free_i64(tcg_hiaddr); 985 } 986 987 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64)); 988 tcg_temp_free_i64(tmplo); 989 990 if (tmphi) { 991 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx)); 992 tcg_temp_free_i64(tmphi); 993 } 994 clear_vec_high(s, tmphi != NULL, destidx); 995 } 996 997 /* 998 * Vector load/store helpers. 999 * 1000 * The principal difference between this and a FP load is that we don't 1001 * zero extend as we are filling a partial chunk of the vector register. 1002 * These functions don't support 128 bit loads/stores, which would be 1003 * normal load/store operations. 1004 * 1005 * The _i32 versions are useful when operating on 32 bit quantities 1006 * (eg for floating point single or using Neon helper functions). 1007 */ 1008 1009 /* Get value of an element within a vector register */ 1010 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx, 1011 int element, MemOp memop) 1012 { 1013 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1014 switch ((unsigned)memop) { 1015 case MO_8: 1016 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off); 1017 break; 1018 case MO_16: 1019 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off); 1020 break; 1021 case MO_32: 1022 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off); 1023 break; 1024 case MO_8|MO_SIGN: 1025 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off); 1026 break; 1027 case MO_16|MO_SIGN: 1028 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off); 1029 break; 1030 case MO_32|MO_SIGN: 1031 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off); 1032 break; 1033 case MO_64: 1034 case MO_64|MO_SIGN: 1035 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off); 1036 break; 1037 default: 1038 g_assert_not_reached(); 1039 } 1040 } 1041 1042 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx, 1043 int element, MemOp memop) 1044 { 1045 int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE); 1046 switch (memop) { 1047 case MO_8: 1048 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off); 1049 break; 1050 case MO_16: 1051 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off); 1052 break; 1053 case MO_8|MO_SIGN: 1054 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off); 1055 break; 1056 case MO_16|MO_SIGN: 1057 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off); 1058 break; 1059 case MO_32: 1060 case MO_32|MO_SIGN: 1061 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off); 1062 break; 1063 default: 1064 g_assert_not_reached(); 1065 } 1066 } 1067 1068 /* Set value of an element within a vector register */ 1069 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx, 1070 int element, MemOp memop) 1071 { 1072 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1073 switch (memop) { 1074 case MO_8: 1075 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off); 1076 break; 1077 case MO_16: 1078 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off); 1079 break; 1080 case MO_32: 1081 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off); 1082 break; 1083 case MO_64: 1084 tcg_gen_st_i64(tcg_src, cpu_env, vect_off); 1085 break; 1086 default: 1087 g_assert_not_reached(); 1088 } 1089 } 1090 1091 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src, 1092 int destidx, int element, MemOp memop) 1093 { 1094 int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE); 1095 switch (memop) { 1096 case MO_8: 1097 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off); 1098 break; 1099 case MO_16: 1100 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off); 1101 break; 1102 case MO_32: 1103 tcg_gen_st_i32(tcg_src, cpu_env, vect_off); 1104 break; 1105 default: 1106 g_assert_not_reached(); 1107 } 1108 } 1109 1110 /* Store from vector register to memory */ 1111 static void do_vec_st(DisasContext *s, int srcidx, int element, 1112 TCGv_i64 tcg_addr, MemOp mop) 1113 { 1114 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1115 1116 read_vec_element(s, tcg_tmp, srcidx, element, mop & MO_SIZE); 1117 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1118 1119 tcg_temp_free_i64(tcg_tmp); 1120 } 1121 1122 /* Load from memory to vector register */ 1123 static void do_vec_ld(DisasContext *s, int destidx, int element, 1124 TCGv_i64 tcg_addr, MemOp mop) 1125 { 1126 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 1127 1128 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), mop); 1129 write_vec_element(s, tcg_tmp, destidx, element, mop & MO_SIZE); 1130 1131 tcg_temp_free_i64(tcg_tmp); 1132 } 1133 1134 /* Check that FP/Neon access is enabled. If it is, return 1135 * true. If not, emit code to generate an appropriate exception, 1136 * and return false; the caller should not emit any code for 1137 * the instruction. Note that this check must happen after all 1138 * unallocated-encoding checks (otherwise the syndrome information 1139 * for the resulting exception will be incorrect). 1140 */ 1141 static bool fp_access_check_only(DisasContext *s) 1142 { 1143 if (s->fp_excp_el) { 1144 assert(!s->fp_access_checked); 1145 s->fp_access_checked = true; 1146 1147 gen_exception_insn_el(s, 0, EXCP_UDEF, 1148 syn_fp_access_trap(1, 0xe, false, 0), 1149 s->fp_excp_el); 1150 return false; 1151 } 1152 s->fp_access_checked = true; 1153 return true; 1154 } 1155 1156 static bool fp_access_check(DisasContext *s) 1157 { 1158 if (!fp_access_check_only(s)) { 1159 return false; 1160 } 1161 if (s->sme_trap_nonstreaming && s->is_nonstreaming) { 1162 gen_exception_insn(s, 0, EXCP_UDEF, 1163 syn_smetrap(SME_ET_Streaming, false)); 1164 return false; 1165 } 1166 return true; 1167 } 1168 1169 /* 1170 * Check that SVE access is enabled. If it is, return true. 1171 * If not, emit code to generate an appropriate exception and return false. 1172 * This function corresponds to CheckSVEEnabled(). 1173 */ 1174 bool sve_access_check(DisasContext *s) 1175 { 1176 if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) { 1177 assert(dc_isar_feature(aa64_sme, s)); 1178 if (!sme_sm_enabled_check(s)) { 1179 goto fail_exit; 1180 } 1181 } else if (s->sve_excp_el) { 1182 gen_exception_insn_el(s, 0, EXCP_UDEF, 1183 syn_sve_access_trap(), s->sve_excp_el); 1184 goto fail_exit; 1185 } 1186 s->sve_access_checked = true; 1187 return fp_access_check(s); 1188 1189 fail_exit: 1190 /* Assert that we only raise one exception per instruction. */ 1191 assert(!s->sve_access_checked); 1192 s->sve_access_checked = true; 1193 return false; 1194 } 1195 1196 /* 1197 * Check that SME access is enabled, raise an exception if not. 1198 * Note that this function corresponds to CheckSMEAccess and is 1199 * only used directly for cpregs. 1200 */ 1201 static bool sme_access_check(DisasContext *s) 1202 { 1203 if (s->sme_excp_el) { 1204 gen_exception_insn_el(s, 0, EXCP_UDEF, 1205 syn_smetrap(SME_ET_AccessTrap, false), 1206 s->sme_excp_el); 1207 return false; 1208 } 1209 return true; 1210 } 1211 1212 /* This function corresponds to CheckSMEEnabled. */ 1213 bool sme_enabled_check(DisasContext *s) 1214 { 1215 /* 1216 * Note that unlike sve_excp_el, we have not constrained sme_excp_el 1217 * to be zero when fp_excp_el has priority. This is because we need 1218 * sme_excp_el by itself for cpregs access checks. 1219 */ 1220 if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) { 1221 s->fp_access_checked = true; 1222 return sme_access_check(s); 1223 } 1224 return fp_access_check_only(s); 1225 } 1226 1227 /* Common subroutine for CheckSMEAnd*Enabled. */ 1228 bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) 1229 { 1230 if (!sme_enabled_check(s)) { 1231 return false; 1232 } 1233 if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) { 1234 gen_exception_insn(s, 0, EXCP_UDEF, 1235 syn_smetrap(SME_ET_NotStreaming, false)); 1236 return false; 1237 } 1238 if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) { 1239 gen_exception_insn(s, 0, EXCP_UDEF, 1240 syn_smetrap(SME_ET_InactiveZA, false)); 1241 return false; 1242 } 1243 return true; 1244 } 1245 1246 /* 1247 * This utility function is for doing register extension with an 1248 * optional shift. You will likely want to pass a temporary for the 1249 * destination register. See DecodeRegExtend() in the ARM ARM. 1250 */ 1251 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in, 1252 int option, unsigned int shift) 1253 { 1254 int extsize = extract32(option, 0, 2); 1255 bool is_signed = extract32(option, 2, 1); 1256 1257 if (is_signed) { 1258 switch (extsize) { 1259 case 0: 1260 tcg_gen_ext8s_i64(tcg_out, tcg_in); 1261 break; 1262 case 1: 1263 tcg_gen_ext16s_i64(tcg_out, tcg_in); 1264 break; 1265 case 2: 1266 tcg_gen_ext32s_i64(tcg_out, tcg_in); 1267 break; 1268 case 3: 1269 tcg_gen_mov_i64(tcg_out, tcg_in); 1270 break; 1271 } 1272 } else { 1273 switch (extsize) { 1274 case 0: 1275 tcg_gen_ext8u_i64(tcg_out, tcg_in); 1276 break; 1277 case 1: 1278 tcg_gen_ext16u_i64(tcg_out, tcg_in); 1279 break; 1280 case 2: 1281 tcg_gen_ext32u_i64(tcg_out, tcg_in); 1282 break; 1283 case 3: 1284 tcg_gen_mov_i64(tcg_out, tcg_in); 1285 break; 1286 } 1287 } 1288 1289 if (shift) { 1290 tcg_gen_shli_i64(tcg_out, tcg_out, shift); 1291 } 1292 } 1293 1294 static inline void gen_check_sp_alignment(DisasContext *s) 1295 { 1296 /* The AArch64 architecture mandates that (if enabled via PSTATE 1297 * or SCTLR bits) there is a check that SP is 16-aligned on every 1298 * SP-relative load or store (with an exception generated if it is not). 1299 * In line with general QEMU practice regarding misaligned accesses, 1300 * we omit these checks for the sake of guest program performance. 1301 * This function is provided as a hook so we can more easily add these 1302 * checks in future (possibly as a "favour catching guest program bugs 1303 * over speed" user selectable option). 1304 */ 1305 } 1306 1307 /* 1308 * This provides a simple table based table lookup decoder. It is 1309 * intended to be used when the relevant bits for decode are too 1310 * awkwardly placed and switch/if based logic would be confusing and 1311 * deeply nested. Since it's a linear search through the table, tables 1312 * should be kept small. 1313 * 1314 * It returns the first handler where insn & mask == pattern, or 1315 * NULL if there is no match. 1316 * The table is terminated by an empty mask (i.e. 0) 1317 */ 1318 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table, 1319 uint32_t insn) 1320 { 1321 const AArch64DecodeTable *tptr = table; 1322 1323 while (tptr->mask) { 1324 if ((insn & tptr->mask) == tptr->pattern) { 1325 return tptr->disas_fn; 1326 } 1327 tptr++; 1328 } 1329 return NULL; 1330 } 1331 1332 /* 1333 * The instruction disassembly implemented here matches 1334 * the instruction encoding classifications in chapter C4 1335 * of the ARM Architecture Reference Manual (DDI0487B_a); 1336 * classification names and decode diagrams here should generally 1337 * match up with those in the manual. 1338 */ 1339 1340 /* Unconditional branch (immediate) 1341 * 31 30 26 25 0 1342 * +----+-----------+-------------------------------------+ 1343 * | op | 0 0 1 0 1 | imm26 | 1344 * +----+-----------+-------------------------------------+ 1345 */ 1346 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn) 1347 { 1348 int64_t diff = sextract32(insn, 0, 26) * 4; 1349 1350 if (insn & (1U << 31)) { 1351 /* BL Branch with link */ 1352 gen_pc_plus_diff(s, cpu_reg(s, 30), curr_insn_len(s)); 1353 } 1354 1355 /* B Branch / BL Branch with link */ 1356 reset_btype(s); 1357 gen_goto_tb(s, 0, diff); 1358 } 1359 1360 /* Compare and branch (immediate) 1361 * 31 30 25 24 23 5 4 0 1362 * +----+-------------+----+---------------------+--------+ 1363 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt | 1364 * +----+-------------+----+---------------------+--------+ 1365 */ 1366 static void disas_comp_b_imm(DisasContext *s, uint32_t insn) 1367 { 1368 unsigned int sf, op, rt; 1369 int64_t diff; 1370 DisasLabel match; 1371 TCGv_i64 tcg_cmp; 1372 1373 sf = extract32(insn, 31, 1); 1374 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */ 1375 rt = extract32(insn, 0, 5); 1376 diff = sextract32(insn, 5, 19) * 4; 1377 1378 tcg_cmp = read_cpu_reg(s, rt, sf); 1379 reset_btype(s); 1380 1381 match = gen_disas_label(s); 1382 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1383 tcg_cmp, 0, match.label); 1384 gen_goto_tb(s, 0, 4); 1385 set_disas_label(s, match); 1386 gen_goto_tb(s, 1, diff); 1387 } 1388 1389 /* Test and branch (immediate) 1390 * 31 30 25 24 23 19 18 5 4 0 1391 * +----+-------------+----+-------+-------------+------+ 1392 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt | 1393 * +----+-------------+----+-------+-------------+------+ 1394 */ 1395 static void disas_test_b_imm(DisasContext *s, uint32_t insn) 1396 { 1397 unsigned int bit_pos, op, rt; 1398 int64_t diff; 1399 DisasLabel match; 1400 TCGv_i64 tcg_cmp; 1401 1402 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5); 1403 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */ 1404 diff = sextract32(insn, 5, 14) * 4; 1405 rt = extract32(insn, 0, 5); 1406 1407 tcg_cmp = tcg_temp_new_i64(); 1408 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos)); 1409 1410 reset_btype(s); 1411 1412 match = gen_disas_label(s); 1413 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ, 1414 tcg_cmp, 0, match.label); 1415 tcg_temp_free_i64(tcg_cmp); 1416 gen_goto_tb(s, 0, 4); 1417 set_disas_label(s, match); 1418 gen_goto_tb(s, 1, diff); 1419 } 1420 1421 /* Conditional branch (immediate) 1422 * 31 25 24 23 5 4 3 0 1423 * +---------------+----+---------------------+----+------+ 1424 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond | 1425 * +---------------+----+---------------------+----+------+ 1426 */ 1427 static void disas_cond_b_imm(DisasContext *s, uint32_t insn) 1428 { 1429 unsigned int cond; 1430 int64_t diff; 1431 1432 if ((insn & (1 << 4)) || (insn & (1 << 24))) { 1433 unallocated_encoding(s); 1434 return; 1435 } 1436 diff = sextract32(insn, 5, 19) * 4; 1437 cond = extract32(insn, 0, 4); 1438 1439 reset_btype(s); 1440 if (cond < 0x0e) { 1441 /* genuinely conditional branches */ 1442 DisasLabel match = gen_disas_label(s); 1443 arm_gen_test_cc(cond, match.label); 1444 gen_goto_tb(s, 0, 4); 1445 set_disas_label(s, match); 1446 gen_goto_tb(s, 1, diff); 1447 } else { 1448 /* 0xe and 0xf are both "always" conditions */ 1449 gen_goto_tb(s, 0, diff); 1450 } 1451 } 1452 1453 /* HINT instruction group, including various allocated HINTs */ 1454 static void handle_hint(DisasContext *s, uint32_t insn, 1455 unsigned int op1, unsigned int op2, unsigned int crm) 1456 { 1457 unsigned int selector = crm << 3 | op2; 1458 1459 if (op1 != 3) { 1460 unallocated_encoding(s); 1461 return; 1462 } 1463 1464 switch (selector) { 1465 case 0b00000: /* NOP */ 1466 break; 1467 case 0b00011: /* WFI */ 1468 s->base.is_jmp = DISAS_WFI; 1469 break; 1470 case 0b00001: /* YIELD */ 1471 /* When running in MTTCG we don't generate jumps to the yield and 1472 * WFE helpers as it won't affect the scheduling of other vCPUs. 1473 * If we wanted to more completely model WFE/SEV so we don't busy 1474 * spin unnecessarily we would need to do something more involved. 1475 */ 1476 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1477 s->base.is_jmp = DISAS_YIELD; 1478 } 1479 break; 1480 case 0b00010: /* WFE */ 1481 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { 1482 s->base.is_jmp = DISAS_WFE; 1483 } 1484 break; 1485 case 0b00100: /* SEV */ 1486 case 0b00101: /* SEVL */ 1487 case 0b00110: /* DGH */ 1488 /* we treat all as NOP at least for now */ 1489 break; 1490 case 0b00111: /* XPACLRI */ 1491 if (s->pauth_active) { 1492 gen_helper_xpaci(cpu_X[30], cpu_env, cpu_X[30]); 1493 } 1494 break; 1495 case 0b01000: /* PACIA1716 */ 1496 if (s->pauth_active) { 1497 gen_helper_pacia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1498 } 1499 break; 1500 case 0b01010: /* PACIB1716 */ 1501 if (s->pauth_active) { 1502 gen_helper_pacib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1503 } 1504 break; 1505 case 0b01100: /* AUTIA1716 */ 1506 if (s->pauth_active) { 1507 gen_helper_autia(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1508 } 1509 break; 1510 case 0b01110: /* AUTIB1716 */ 1511 if (s->pauth_active) { 1512 gen_helper_autib(cpu_X[17], cpu_env, cpu_X[17], cpu_X[16]); 1513 } 1514 break; 1515 case 0b10000: /* ESB */ 1516 /* Without RAS, we must implement this as NOP. */ 1517 if (dc_isar_feature(aa64_ras, s)) { 1518 /* 1519 * QEMU does not have a source of physical SErrors, 1520 * so we are only concerned with virtual SErrors. 1521 * The pseudocode in the ARM for this case is 1522 * if PSTATE.EL IN {EL0, EL1} && EL2Enabled() then 1523 * AArch64.vESBOperation(); 1524 * Most of the condition can be evaluated at translation time. 1525 * Test for EL2 present, and defer test for SEL2 to runtime. 1526 */ 1527 if (s->current_el <= 1 && arm_dc_feature(s, ARM_FEATURE_EL2)) { 1528 gen_helper_vesb(cpu_env); 1529 } 1530 } 1531 break; 1532 case 0b11000: /* PACIAZ */ 1533 if (s->pauth_active) { 1534 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], 1535 new_tmp_a64_zero(s)); 1536 } 1537 break; 1538 case 0b11001: /* PACIASP */ 1539 if (s->pauth_active) { 1540 gen_helper_pacia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1541 } 1542 break; 1543 case 0b11010: /* PACIBZ */ 1544 if (s->pauth_active) { 1545 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], 1546 new_tmp_a64_zero(s)); 1547 } 1548 break; 1549 case 0b11011: /* PACIBSP */ 1550 if (s->pauth_active) { 1551 gen_helper_pacib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1552 } 1553 break; 1554 case 0b11100: /* AUTIAZ */ 1555 if (s->pauth_active) { 1556 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], 1557 new_tmp_a64_zero(s)); 1558 } 1559 break; 1560 case 0b11101: /* AUTIASP */ 1561 if (s->pauth_active) { 1562 gen_helper_autia(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1563 } 1564 break; 1565 case 0b11110: /* AUTIBZ */ 1566 if (s->pauth_active) { 1567 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], 1568 new_tmp_a64_zero(s)); 1569 } 1570 break; 1571 case 0b11111: /* AUTIBSP */ 1572 if (s->pauth_active) { 1573 gen_helper_autib(cpu_X[30], cpu_env, cpu_X[30], cpu_X[31]); 1574 } 1575 break; 1576 default: 1577 /* default specified as NOP equivalent */ 1578 break; 1579 } 1580 } 1581 1582 static void gen_clrex(DisasContext *s, uint32_t insn) 1583 { 1584 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 1585 } 1586 1587 /* CLREX, DSB, DMB, ISB */ 1588 static void handle_sync(DisasContext *s, uint32_t insn, 1589 unsigned int op1, unsigned int op2, unsigned int crm) 1590 { 1591 TCGBar bar; 1592 1593 if (op1 != 3) { 1594 unallocated_encoding(s); 1595 return; 1596 } 1597 1598 switch (op2) { 1599 case 2: /* CLREX */ 1600 gen_clrex(s, insn); 1601 return; 1602 case 4: /* DSB */ 1603 case 5: /* DMB */ 1604 switch (crm & 3) { 1605 case 1: /* MBReqTypes_Reads */ 1606 bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST; 1607 break; 1608 case 2: /* MBReqTypes_Writes */ 1609 bar = TCG_BAR_SC | TCG_MO_ST_ST; 1610 break; 1611 default: /* MBReqTypes_All */ 1612 bar = TCG_BAR_SC | TCG_MO_ALL; 1613 break; 1614 } 1615 tcg_gen_mb(bar); 1616 return; 1617 case 6: /* ISB */ 1618 /* We need to break the TB after this insn to execute 1619 * a self-modified code correctly and also to take 1620 * any pending interrupts immediately. 1621 */ 1622 reset_btype(s); 1623 gen_goto_tb(s, 0, 4); 1624 return; 1625 1626 case 7: /* SB */ 1627 if (crm != 0 || !dc_isar_feature(aa64_sb, s)) { 1628 goto do_unallocated; 1629 } 1630 /* 1631 * TODO: There is no speculation barrier opcode for TCG; 1632 * MB and end the TB instead. 1633 */ 1634 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); 1635 gen_goto_tb(s, 0, 4); 1636 return; 1637 1638 default: 1639 do_unallocated: 1640 unallocated_encoding(s); 1641 return; 1642 } 1643 } 1644 1645 static void gen_xaflag(void) 1646 { 1647 TCGv_i32 z = tcg_temp_new_i32(); 1648 1649 tcg_gen_setcondi_i32(TCG_COND_EQ, z, cpu_ZF, 0); 1650 1651 /* 1652 * (!C & !Z) << 31 1653 * (!(C | Z)) << 31 1654 * ~((C | Z) << 31) 1655 * ~-(C | Z) 1656 * (C | Z) - 1 1657 */ 1658 tcg_gen_or_i32(cpu_NF, cpu_CF, z); 1659 tcg_gen_subi_i32(cpu_NF, cpu_NF, 1); 1660 1661 /* !(Z & C) */ 1662 tcg_gen_and_i32(cpu_ZF, z, cpu_CF); 1663 tcg_gen_xori_i32(cpu_ZF, cpu_ZF, 1); 1664 1665 /* (!C & Z) << 31 -> -(Z & ~C) */ 1666 tcg_gen_andc_i32(cpu_VF, z, cpu_CF); 1667 tcg_gen_neg_i32(cpu_VF, cpu_VF); 1668 1669 /* C | Z */ 1670 tcg_gen_or_i32(cpu_CF, cpu_CF, z); 1671 1672 tcg_temp_free_i32(z); 1673 } 1674 1675 static void gen_axflag(void) 1676 { 1677 tcg_gen_sari_i32(cpu_VF, cpu_VF, 31); /* V ? -1 : 0 */ 1678 tcg_gen_andc_i32(cpu_CF, cpu_CF, cpu_VF); /* C & !V */ 1679 1680 /* !(Z | V) -> !(!ZF | V) -> ZF & !V -> ZF & ~VF */ 1681 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, cpu_VF); 1682 1683 tcg_gen_movi_i32(cpu_NF, 0); 1684 tcg_gen_movi_i32(cpu_VF, 0); 1685 } 1686 1687 /* MSR (immediate) - move immediate to processor state field */ 1688 static void handle_msr_i(DisasContext *s, uint32_t insn, 1689 unsigned int op1, unsigned int op2, unsigned int crm) 1690 { 1691 int op = op1 << 3 | op2; 1692 1693 /* End the TB by default, chaining is ok. */ 1694 s->base.is_jmp = DISAS_TOO_MANY; 1695 1696 switch (op) { 1697 case 0x00: /* CFINV */ 1698 if (crm != 0 || !dc_isar_feature(aa64_condm_4, s)) { 1699 goto do_unallocated; 1700 } 1701 tcg_gen_xori_i32(cpu_CF, cpu_CF, 1); 1702 s->base.is_jmp = DISAS_NEXT; 1703 break; 1704 1705 case 0x01: /* XAFlag */ 1706 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1707 goto do_unallocated; 1708 } 1709 gen_xaflag(); 1710 s->base.is_jmp = DISAS_NEXT; 1711 break; 1712 1713 case 0x02: /* AXFlag */ 1714 if (crm != 0 || !dc_isar_feature(aa64_condm_5, s)) { 1715 goto do_unallocated; 1716 } 1717 gen_axflag(); 1718 s->base.is_jmp = DISAS_NEXT; 1719 break; 1720 1721 case 0x03: /* UAO */ 1722 if (!dc_isar_feature(aa64_uao, s) || s->current_el == 0) { 1723 goto do_unallocated; 1724 } 1725 if (crm & 1) { 1726 set_pstate_bits(PSTATE_UAO); 1727 } else { 1728 clear_pstate_bits(PSTATE_UAO); 1729 } 1730 gen_rebuild_hflags(s); 1731 break; 1732 1733 case 0x04: /* PAN */ 1734 if (!dc_isar_feature(aa64_pan, s) || s->current_el == 0) { 1735 goto do_unallocated; 1736 } 1737 if (crm & 1) { 1738 set_pstate_bits(PSTATE_PAN); 1739 } else { 1740 clear_pstate_bits(PSTATE_PAN); 1741 } 1742 gen_rebuild_hflags(s); 1743 break; 1744 1745 case 0x05: /* SPSel */ 1746 if (s->current_el == 0) { 1747 goto do_unallocated; 1748 } 1749 gen_helper_msr_i_spsel(cpu_env, tcg_constant_i32(crm & PSTATE_SP)); 1750 break; 1751 1752 case 0x19: /* SSBS */ 1753 if (!dc_isar_feature(aa64_ssbs, s)) { 1754 goto do_unallocated; 1755 } 1756 if (crm & 1) { 1757 set_pstate_bits(PSTATE_SSBS); 1758 } else { 1759 clear_pstate_bits(PSTATE_SSBS); 1760 } 1761 /* Don't need to rebuild hflags since SSBS is a nop */ 1762 break; 1763 1764 case 0x1a: /* DIT */ 1765 if (!dc_isar_feature(aa64_dit, s)) { 1766 goto do_unallocated; 1767 } 1768 if (crm & 1) { 1769 set_pstate_bits(PSTATE_DIT); 1770 } else { 1771 clear_pstate_bits(PSTATE_DIT); 1772 } 1773 /* There's no need to rebuild hflags because DIT is a nop */ 1774 break; 1775 1776 case 0x1e: /* DAIFSet */ 1777 gen_helper_msr_i_daifset(cpu_env, tcg_constant_i32(crm)); 1778 break; 1779 1780 case 0x1f: /* DAIFClear */ 1781 gen_helper_msr_i_daifclear(cpu_env, tcg_constant_i32(crm)); 1782 /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ 1783 s->base.is_jmp = DISAS_UPDATE_EXIT; 1784 break; 1785 1786 case 0x1c: /* TCO */ 1787 if (dc_isar_feature(aa64_mte, s)) { 1788 /* Full MTE is enabled -- set the TCO bit as directed. */ 1789 if (crm & 1) { 1790 set_pstate_bits(PSTATE_TCO); 1791 } else { 1792 clear_pstate_bits(PSTATE_TCO); 1793 } 1794 gen_rebuild_hflags(s); 1795 /* Many factors, including TCO, go into MTE_ACTIVE. */ 1796 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1797 } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { 1798 /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ 1799 s->base.is_jmp = DISAS_NEXT; 1800 } else { 1801 goto do_unallocated; 1802 } 1803 break; 1804 1805 case 0x1b: /* SVCR* */ 1806 if (!dc_isar_feature(aa64_sme, s) || crm < 2 || crm > 7) { 1807 goto do_unallocated; 1808 } 1809 if (sme_access_check(s)) { 1810 int old = s->pstate_sm | (s->pstate_za << 1); 1811 int new = (crm & 1) * 3; 1812 int msk = (crm >> 1) & 3; 1813 1814 if ((old ^ new) & msk) { 1815 /* At least one bit changes. */ 1816 gen_helper_set_svcr(cpu_env, tcg_constant_i32(new), 1817 tcg_constant_i32(msk)); 1818 } else { 1819 s->base.is_jmp = DISAS_NEXT; 1820 } 1821 } 1822 break; 1823 1824 default: 1825 do_unallocated: 1826 unallocated_encoding(s); 1827 return; 1828 } 1829 } 1830 1831 static void gen_get_nzcv(TCGv_i64 tcg_rt) 1832 { 1833 TCGv_i32 tmp = tcg_temp_new_i32(); 1834 TCGv_i32 nzcv = tcg_temp_new_i32(); 1835 1836 /* build bit 31, N */ 1837 tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31)); 1838 /* build bit 30, Z */ 1839 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0); 1840 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1); 1841 /* build bit 29, C */ 1842 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1); 1843 /* build bit 28, V */ 1844 tcg_gen_shri_i32(tmp, cpu_VF, 31); 1845 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1); 1846 /* generate result */ 1847 tcg_gen_extu_i32_i64(tcg_rt, nzcv); 1848 1849 tcg_temp_free_i32(nzcv); 1850 tcg_temp_free_i32(tmp); 1851 } 1852 1853 static void gen_set_nzcv(TCGv_i64 tcg_rt) 1854 { 1855 TCGv_i32 nzcv = tcg_temp_new_i32(); 1856 1857 /* take NZCV from R[t] */ 1858 tcg_gen_extrl_i64_i32(nzcv, tcg_rt); 1859 1860 /* bit 31, N */ 1861 tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); 1862 /* bit 30, Z */ 1863 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30)); 1864 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0); 1865 /* bit 29, C */ 1866 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29)); 1867 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29); 1868 /* bit 28, V */ 1869 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28)); 1870 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3); 1871 tcg_temp_free_i32(nzcv); 1872 } 1873 1874 static void gen_sysreg_undef(DisasContext *s, bool isread, 1875 uint8_t op0, uint8_t op1, uint8_t op2, 1876 uint8_t crn, uint8_t crm, uint8_t rt) 1877 { 1878 /* 1879 * Generate code to emit an UNDEF with correct syndrome 1880 * information for a failed system register access. 1881 * This is EC_UNCATEGORIZED (ie a standard UNDEF) in most cases, 1882 * but if FEAT_IDST is implemented then read accesses to registers 1883 * in the feature ID space are reported with the EC_SYSTEMREGISTERTRAP 1884 * syndrome. 1885 */ 1886 uint32_t syndrome; 1887 1888 if (isread && dc_isar_feature(aa64_ids, s) && 1889 arm_cpreg_encoding_in_idspace(op0, op1, op2, crn, crm)) { 1890 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1891 } else { 1892 syndrome = syn_uncategorized(); 1893 } 1894 gen_exception_insn(s, 0, EXCP_UDEF, syndrome); 1895 } 1896 1897 /* MRS - move from system register 1898 * MSR (register) - move to system register 1899 * SYS 1900 * SYSL 1901 * These are all essentially the same insn in 'read' and 'write' 1902 * versions, with varying op0 fields. 1903 */ 1904 static void handle_sys(DisasContext *s, uint32_t insn, bool isread, 1905 unsigned int op0, unsigned int op1, unsigned int op2, 1906 unsigned int crn, unsigned int crm, unsigned int rt) 1907 { 1908 uint32_t key = ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, 1909 crn, crm, op0, op1, op2); 1910 const ARMCPRegInfo *ri = get_arm_cp_reginfo(s->cp_regs, key); 1911 TCGv_ptr tcg_ri = NULL; 1912 TCGv_i64 tcg_rt; 1913 1914 if (!ri) { 1915 /* Unknown register; this might be a guest error or a QEMU 1916 * unimplemented feature. 1917 */ 1918 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 " 1919 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n", 1920 isread ? "read" : "write", op0, op1, crn, crm, op2); 1921 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1922 return; 1923 } 1924 1925 /* Check access permissions */ 1926 if (!cp_access_ok(s->current_el, ri, isread)) { 1927 gen_sysreg_undef(s, isread, op0, op1, op2, crn, crm, rt); 1928 return; 1929 } 1930 1931 if (ri->accessfn || (ri->fgt && s->fgt_active)) { 1932 /* Emit code to perform further access permissions checks at 1933 * runtime; this may result in an exception. 1934 */ 1935 uint32_t syndrome; 1936 1937 syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread); 1938 gen_a64_update_pc(s, 0); 1939 tcg_ri = tcg_temp_new_ptr(); 1940 gen_helper_access_check_cp_reg(tcg_ri, cpu_env, 1941 tcg_constant_i32(key), 1942 tcg_constant_i32(syndrome), 1943 tcg_constant_i32(isread)); 1944 } else if (ri->type & ARM_CP_RAISES_EXC) { 1945 /* 1946 * The readfn or writefn might raise an exception; 1947 * synchronize the CPU state in case it does. 1948 */ 1949 gen_a64_update_pc(s, 0); 1950 } 1951 1952 /* Handle special cases first */ 1953 switch (ri->type & ARM_CP_SPECIAL_MASK) { 1954 case 0: 1955 break; 1956 case ARM_CP_NOP: 1957 goto exit; 1958 case ARM_CP_NZCV: 1959 tcg_rt = cpu_reg(s, rt); 1960 if (isread) { 1961 gen_get_nzcv(tcg_rt); 1962 } else { 1963 gen_set_nzcv(tcg_rt); 1964 } 1965 goto exit; 1966 case ARM_CP_CURRENTEL: 1967 /* Reads as current EL value from pstate, which is 1968 * guaranteed to be constant by the tb flags. 1969 */ 1970 tcg_rt = cpu_reg(s, rt); 1971 tcg_gen_movi_i64(tcg_rt, s->current_el << 2); 1972 goto exit; 1973 case ARM_CP_DC_ZVA: 1974 /* Writes clear the aligned block of memory which rt points into. */ 1975 if (s->mte_active[0]) { 1976 int desc = 0; 1977 1978 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); 1979 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); 1980 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); 1981 1982 tcg_rt = tcg_temp_new_i64(); 1983 gen_helper_mte_check_zva(tcg_rt, cpu_env, 1984 tcg_constant_i32(desc), cpu_reg(s, rt)); 1985 } else { 1986 tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); 1987 } 1988 gen_helper_dc_zva(cpu_env, tcg_rt); 1989 goto exit; 1990 case ARM_CP_DC_GVA: 1991 { 1992 TCGv_i64 clean_addr, tag; 1993 1994 /* 1995 * DC_GVA, like DC_ZVA, requires that we supply the original 1996 * pointer for an invalid page. Probe that address first. 1997 */ 1998 tcg_rt = cpu_reg(s, rt); 1999 clean_addr = clean_data_tbi(s, tcg_rt); 2000 gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); 2001 2002 if (s->ata) { 2003 /* Extract the tag from the register to match STZGM. */ 2004 tag = tcg_temp_new_i64(); 2005 tcg_gen_shri_i64(tag, tcg_rt, 56); 2006 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2007 tcg_temp_free_i64(tag); 2008 } 2009 } 2010 goto exit; 2011 case ARM_CP_DC_GZVA: 2012 { 2013 TCGv_i64 clean_addr, tag; 2014 2015 /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ 2016 tcg_rt = cpu_reg(s, rt); 2017 clean_addr = clean_data_tbi(s, tcg_rt); 2018 gen_helper_dc_zva(cpu_env, clean_addr); 2019 2020 if (s->ata) { 2021 /* Extract the tag from the register to match STZGM. */ 2022 tag = tcg_temp_new_i64(); 2023 tcg_gen_shri_i64(tag, tcg_rt, 56); 2024 gen_helper_stzgm_tags(cpu_env, clean_addr, tag); 2025 tcg_temp_free_i64(tag); 2026 } 2027 } 2028 goto exit; 2029 default: 2030 g_assert_not_reached(); 2031 } 2032 if ((ri->type & ARM_CP_FPU) && !fp_access_check_only(s)) { 2033 goto exit; 2034 } else if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) { 2035 goto exit; 2036 } else if ((ri->type & ARM_CP_SME) && !sme_access_check(s)) { 2037 goto exit; 2038 } 2039 2040 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2041 gen_io_start(); 2042 } 2043 2044 tcg_rt = cpu_reg(s, rt); 2045 2046 if (isread) { 2047 if (ri->type & ARM_CP_CONST) { 2048 tcg_gen_movi_i64(tcg_rt, ri->resetvalue); 2049 } else if (ri->readfn) { 2050 if (!tcg_ri) { 2051 tcg_ri = gen_lookup_cp_reg(key); 2052 } 2053 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tcg_ri); 2054 } else { 2055 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset); 2056 } 2057 } else { 2058 if (ri->type & ARM_CP_CONST) { 2059 /* If not forbidden by access permissions, treat as WI */ 2060 goto exit; 2061 } else if (ri->writefn) { 2062 if (!tcg_ri) { 2063 tcg_ri = gen_lookup_cp_reg(key); 2064 } 2065 gen_helper_set_cp_reg64(cpu_env, tcg_ri, tcg_rt); 2066 } else { 2067 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset); 2068 } 2069 } 2070 2071 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { 2072 /* I/O operations must end the TB here (whether read or write) */ 2073 s->base.is_jmp = DISAS_UPDATE_EXIT; 2074 } 2075 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { 2076 /* 2077 * A write to any coprocessor regiser that ends a TB 2078 * must rebuild the hflags for the next TB. 2079 */ 2080 gen_rebuild_hflags(s); 2081 /* 2082 * We default to ending the TB on a coprocessor register write, 2083 * but allow this to be suppressed by the register definition 2084 * (usually only necessary to work around guest bugs). 2085 */ 2086 s->base.is_jmp = DISAS_UPDATE_EXIT; 2087 } 2088 2089 exit: 2090 if (tcg_ri) { 2091 tcg_temp_free_ptr(tcg_ri); 2092 } 2093 } 2094 2095 /* System 2096 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0 2097 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2098 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt | 2099 * +---------------------+---+-----+-----+-------+-------+-----+------+ 2100 */ 2101 static void disas_system(DisasContext *s, uint32_t insn) 2102 { 2103 unsigned int l, op0, op1, crn, crm, op2, rt; 2104 l = extract32(insn, 21, 1); 2105 op0 = extract32(insn, 19, 2); 2106 op1 = extract32(insn, 16, 3); 2107 crn = extract32(insn, 12, 4); 2108 crm = extract32(insn, 8, 4); 2109 op2 = extract32(insn, 5, 3); 2110 rt = extract32(insn, 0, 5); 2111 2112 if (op0 == 0) { 2113 if (l || rt != 31) { 2114 unallocated_encoding(s); 2115 return; 2116 } 2117 switch (crn) { 2118 case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */ 2119 handle_hint(s, insn, op1, op2, crm); 2120 break; 2121 case 3: /* CLREX, DSB, DMB, ISB */ 2122 handle_sync(s, insn, op1, op2, crm); 2123 break; 2124 case 4: /* MSR (immediate) */ 2125 handle_msr_i(s, insn, op1, op2, crm); 2126 break; 2127 default: 2128 unallocated_encoding(s); 2129 break; 2130 } 2131 return; 2132 } 2133 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt); 2134 } 2135 2136 /* Exception generation 2137 * 2138 * 31 24 23 21 20 5 4 2 1 0 2139 * +-----------------+-----+------------------------+-----+----+ 2140 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL | 2141 * +-----------------------+------------------------+----------+ 2142 */ 2143 static void disas_exc(DisasContext *s, uint32_t insn) 2144 { 2145 int opc = extract32(insn, 21, 3); 2146 int op2_ll = extract32(insn, 0, 5); 2147 int imm16 = extract32(insn, 5, 16); 2148 uint32_t syndrome; 2149 2150 switch (opc) { 2151 case 0: 2152 /* For SVC, HVC and SMC we advance the single-step state 2153 * machine before taking the exception. This is architecturally 2154 * mandated, to ensure that single-stepping a system call 2155 * instruction works properly. 2156 */ 2157 switch (op2_ll) { 2158 case 1: /* SVC */ 2159 syndrome = syn_aa64_svc(imm16); 2160 if (s->fgt_svc) { 2161 gen_exception_insn_el(s, 0, EXCP_UDEF, syndrome, 2); 2162 break; 2163 } 2164 gen_ss_advance(s); 2165 gen_exception_insn(s, 4, EXCP_SWI, syndrome); 2166 break; 2167 case 2: /* HVC */ 2168 if (s->current_el == 0) { 2169 unallocated_encoding(s); 2170 break; 2171 } 2172 /* The pre HVC helper handles cases when HVC gets trapped 2173 * as an undefined insn by runtime configuration. 2174 */ 2175 gen_a64_update_pc(s, 0); 2176 gen_helper_pre_hvc(cpu_env); 2177 gen_ss_advance(s); 2178 gen_exception_insn_el(s, 4, EXCP_HVC, syn_aa64_hvc(imm16), 2); 2179 break; 2180 case 3: /* SMC */ 2181 if (s->current_el == 0) { 2182 unallocated_encoding(s); 2183 break; 2184 } 2185 gen_a64_update_pc(s, 0); 2186 gen_helper_pre_smc(cpu_env, tcg_constant_i32(syn_aa64_smc(imm16))); 2187 gen_ss_advance(s); 2188 gen_exception_insn_el(s, 4, EXCP_SMC, syn_aa64_smc(imm16), 3); 2189 break; 2190 default: 2191 unallocated_encoding(s); 2192 break; 2193 } 2194 break; 2195 case 1: 2196 if (op2_ll != 0) { 2197 unallocated_encoding(s); 2198 break; 2199 } 2200 /* BRK */ 2201 gen_exception_bkpt_insn(s, syn_aa64_bkpt(imm16)); 2202 break; 2203 case 2: 2204 if (op2_ll != 0) { 2205 unallocated_encoding(s); 2206 break; 2207 } 2208 /* HLT. This has two purposes. 2209 * Architecturally, it is an external halting debug instruction. 2210 * Since QEMU doesn't implement external debug, we treat this as 2211 * it is required for halting debug disabled: it will UNDEF. 2212 * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction. 2213 */ 2214 if (semihosting_enabled(s->current_el == 0) && imm16 == 0xf000) { 2215 gen_exception_internal_insn(s, EXCP_SEMIHOST); 2216 } else { 2217 unallocated_encoding(s); 2218 } 2219 break; 2220 case 5: 2221 if (op2_ll < 1 || op2_ll > 3) { 2222 unallocated_encoding(s); 2223 break; 2224 } 2225 /* DCPS1, DCPS2, DCPS3 */ 2226 unallocated_encoding(s); 2227 break; 2228 default: 2229 unallocated_encoding(s); 2230 break; 2231 } 2232 } 2233 2234 /* Unconditional branch (register) 2235 * 31 25 24 21 20 16 15 10 9 5 4 0 2236 * +---------------+-------+-------+-------+------+-------+ 2237 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 | 2238 * +---------------+-------+-------+-------+------+-------+ 2239 */ 2240 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) 2241 { 2242 unsigned int opc, op2, op3, rn, op4; 2243 unsigned btype_mod = 2; /* 0: BR, 1: BLR, 2: other */ 2244 TCGv_i64 dst; 2245 TCGv_i64 modifier; 2246 2247 opc = extract32(insn, 21, 4); 2248 op2 = extract32(insn, 16, 5); 2249 op3 = extract32(insn, 10, 6); 2250 rn = extract32(insn, 5, 5); 2251 op4 = extract32(insn, 0, 5); 2252 2253 if (op2 != 0x1f) { 2254 goto do_unallocated; 2255 } 2256 2257 switch (opc) { 2258 case 0: /* BR */ 2259 case 1: /* BLR */ 2260 case 2: /* RET */ 2261 btype_mod = opc; 2262 switch (op3) { 2263 case 0: 2264 /* BR, BLR, RET */ 2265 if (op4 != 0) { 2266 goto do_unallocated; 2267 } 2268 dst = cpu_reg(s, rn); 2269 break; 2270 2271 case 2: 2272 case 3: 2273 if (!dc_isar_feature(aa64_pauth, s)) { 2274 goto do_unallocated; 2275 } 2276 if (opc == 2) { 2277 /* RETAA, RETAB */ 2278 if (rn != 0x1f || op4 != 0x1f) { 2279 goto do_unallocated; 2280 } 2281 rn = 30; 2282 modifier = cpu_X[31]; 2283 } else { 2284 /* BRAAZ, BRABZ, BLRAAZ, BLRABZ */ 2285 if (op4 != 0x1f) { 2286 goto do_unallocated; 2287 } 2288 modifier = new_tmp_a64_zero(s); 2289 } 2290 if (s->pauth_active) { 2291 dst = tcg_temp_new_i64(); 2292 if (op3 == 2) { 2293 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2294 } else { 2295 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2296 } 2297 } else { 2298 dst = cpu_reg(s, rn); 2299 } 2300 break; 2301 2302 default: 2303 goto do_unallocated; 2304 } 2305 /* BLR also needs to load return address */ 2306 if (opc == 1) { 2307 TCGv_i64 lr = cpu_reg(s, 30); 2308 if (dst == lr) { 2309 TCGv_i64 tmp = tcg_temp_new_i64(); 2310 tcg_gen_mov_i64(tmp, dst); 2311 dst = tmp; 2312 } 2313 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2314 } 2315 gen_a64_set_pc(s, dst); 2316 break; 2317 2318 case 8: /* BRAA */ 2319 case 9: /* BLRAA */ 2320 if (!dc_isar_feature(aa64_pauth, s)) { 2321 goto do_unallocated; 2322 } 2323 if ((op3 & ~1) != 2) { 2324 goto do_unallocated; 2325 } 2326 btype_mod = opc & 1; 2327 if (s->pauth_active) { 2328 dst = tcg_temp_new_i64(); 2329 modifier = cpu_reg_sp(s, op4); 2330 if (op3 == 2) { 2331 gen_helper_autia(dst, cpu_env, cpu_reg(s, rn), modifier); 2332 } else { 2333 gen_helper_autib(dst, cpu_env, cpu_reg(s, rn), modifier); 2334 } 2335 } else { 2336 dst = cpu_reg(s, rn); 2337 } 2338 /* BLRAA also needs to load return address */ 2339 if (opc == 9) { 2340 TCGv_i64 lr = cpu_reg(s, 30); 2341 if (dst == lr) { 2342 TCGv_i64 tmp = tcg_temp_new_i64(); 2343 tcg_gen_mov_i64(tmp, dst); 2344 dst = tmp; 2345 } 2346 gen_pc_plus_diff(s, lr, curr_insn_len(s)); 2347 } 2348 gen_a64_set_pc(s, dst); 2349 break; 2350 2351 case 4: /* ERET */ 2352 if (s->current_el == 0) { 2353 goto do_unallocated; 2354 } 2355 switch (op3) { 2356 case 0: /* ERET */ 2357 if (op4 != 0) { 2358 goto do_unallocated; 2359 } 2360 if (s->fgt_eret) { 2361 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2362 return; 2363 } 2364 dst = tcg_temp_new_i64(); 2365 tcg_gen_ld_i64(dst, cpu_env, 2366 offsetof(CPUARMState, elr_el[s->current_el])); 2367 break; 2368 2369 case 2: /* ERETAA */ 2370 case 3: /* ERETAB */ 2371 if (!dc_isar_feature(aa64_pauth, s)) { 2372 goto do_unallocated; 2373 } 2374 if (rn != 0x1f || op4 != 0x1f) { 2375 goto do_unallocated; 2376 } 2377 /* The FGT trap takes precedence over an auth trap. */ 2378 if (s->fgt_eret) { 2379 gen_exception_insn_el(s, 0, EXCP_UDEF, syn_erettrap(op3), 2); 2380 return; 2381 } 2382 dst = tcg_temp_new_i64(); 2383 tcg_gen_ld_i64(dst, cpu_env, 2384 offsetof(CPUARMState, elr_el[s->current_el])); 2385 if (s->pauth_active) { 2386 modifier = cpu_X[31]; 2387 if (op3 == 2) { 2388 gen_helper_autia(dst, cpu_env, dst, modifier); 2389 } else { 2390 gen_helper_autib(dst, cpu_env, dst, modifier); 2391 } 2392 } 2393 break; 2394 2395 default: 2396 goto do_unallocated; 2397 } 2398 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { 2399 gen_io_start(); 2400 } 2401 2402 gen_helper_exception_return(cpu_env, dst); 2403 tcg_temp_free_i64(dst); 2404 /* Must exit loop to check un-masked IRQs */ 2405 s->base.is_jmp = DISAS_EXIT; 2406 return; 2407 2408 case 5: /* DRPS */ 2409 if (op3 != 0 || op4 != 0 || rn != 0x1f) { 2410 goto do_unallocated; 2411 } else { 2412 unallocated_encoding(s); 2413 } 2414 return; 2415 2416 default: 2417 do_unallocated: 2418 unallocated_encoding(s); 2419 return; 2420 } 2421 2422 switch (btype_mod) { 2423 case 0: /* BR */ 2424 if (dc_isar_feature(aa64_bti, s)) { 2425 /* BR to {x16,x17} or !guard -> 1, else 3. */ 2426 set_btype(s, rn == 16 || rn == 17 || !s->guarded_page ? 1 : 3); 2427 } 2428 break; 2429 2430 case 1: /* BLR */ 2431 if (dc_isar_feature(aa64_bti, s)) { 2432 /* BLR sets BTYPE to 2, regardless of source guarded page. */ 2433 set_btype(s, 2); 2434 } 2435 break; 2436 2437 default: /* RET or none of the above. */ 2438 /* BTYPE will be set to 0 by normal end-of-insn processing. */ 2439 break; 2440 } 2441 2442 s->base.is_jmp = DISAS_JUMP; 2443 } 2444 2445 /* Branches, exception generating and system instructions */ 2446 static void disas_b_exc_sys(DisasContext *s, uint32_t insn) 2447 { 2448 switch (extract32(insn, 25, 7)) { 2449 case 0x0a: case 0x0b: 2450 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */ 2451 disas_uncond_b_imm(s, insn); 2452 break; 2453 case 0x1a: case 0x5a: /* Compare & branch (immediate) */ 2454 disas_comp_b_imm(s, insn); 2455 break; 2456 case 0x1b: case 0x5b: /* Test & branch (immediate) */ 2457 disas_test_b_imm(s, insn); 2458 break; 2459 case 0x2a: /* Conditional branch (immediate) */ 2460 disas_cond_b_imm(s, insn); 2461 break; 2462 case 0x6a: /* Exception generation / System */ 2463 if (insn & (1 << 24)) { 2464 if (extract32(insn, 22, 2) == 0) { 2465 disas_system(s, insn); 2466 } else { 2467 unallocated_encoding(s); 2468 } 2469 } else { 2470 disas_exc(s, insn); 2471 } 2472 break; 2473 case 0x6b: /* Unconditional branch (register) */ 2474 disas_uncond_b_reg(s, insn); 2475 break; 2476 default: 2477 unallocated_encoding(s); 2478 break; 2479 } 2480 } 2481 2482 /* 2483 * Load/Store exclusive instructions are implemented by remembering 2484 * the value/address loaded, and seeing if these are the same 2485 * when the store is performed. This is not actually the architecturally 2486 * mandated semantics, but it works for typical guest code sequences 2487 * and avoids having to monitor regular stores. 2488 * 2489 * The store exclusive uses the atomic cmpxchg primitives to avoid 2490 * races in multi-threaded linux-user and when MTTCG softmmu is 2491 * enabled. 2492 */ 2493 static void gen_load_exclusive(DisasContext *s, int rt, int rt2, 2494 TCGv_i64 addr, int size, bool is_pair) 2495 { 2496 int idx = get_mem_index(s); 2497 MemOp memop = s->be_data; 2498 2499 g_assert(size <= 3); 2500 if (is_pair) { 2501 g_assert(size >= 2); 2502 if (size == 2) { 2503 /* The pair must be single-copy atomic for the doubleword. */ 2504 memop |= MO_64 | MO_ALIGN; 2505 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2506 if (s->be_data == MO_LE) { 2507 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32); 2508 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32); 2509 } else { 2510 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32); 2511 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32); 2512 } 2513 } else { 2514 /* The pair must be single-copy atomic for *each* doubleword, not 2515 the entire quadword, however it must be quadword aligned. */ 2516 memop |= MO_64; 2517 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, 2518 memop | MO_ALIGN_16); 2519 2520 TCGv_i64 addr2 = tcg_temp_new_i64(); 2521 tcg_gen_addi_i64(addr2, addr, 8); 2522 tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop); 2523 tcg_temp_free_i64(addr2); 2524 2525 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2526 tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high); 2527 } 2528 } else { 2529 memop |= size | MO_ALIGN; 2530 tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop); 2531 tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val); 2532 } 2533 tcg_gen_mov_i64(cpu_exclusive_addr, addr); 2534 } 2535 2536 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, 2537 TCGv_i64 addr, int size, int is_pair) 2538 { 2539 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr] 2540 * && (!is_pair || env->exclusive_high == [addr + datasize])) { 2541 * [addr] = {Rt}; 2542 * if (is_pair) { 2543 * [addr + datasize] = {Rt2}; 2544 * } 2545 * {Rd} = 0; 2546 * } else { 2547 * {Rd} = 1; 2548 * } 2549 * env->exclusive_addr = -1; 2550 */ 2551 TCGLabel *fail_label = gen_new_label(); 2552 TCGLabel *done_label = gen_new_label(); 2553 TCGv_i64 tmp; 2554 2555 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label); 2556 2557 tmp = tcg_temp_new_i64(); 2558 if (is_pair) { 2559 if (size == 2) { 2560 if (s->be_data == MO_LE) { 2561 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); 2562 } else { 2563 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt)); 2564 } 2565 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, 2566 cpu_exclusive_val, tmp, 2567 get_mem_index(s), 2568 MO_64 | MO_ALIGN | s->be_data); 2569 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2570 } else { 2571 TCGv_i128 t16 = tcg_temp_new_i128(); 2572 TCGv_i128 c16 = tcg_temp_new_i128(); 2573 TCGv_i64 a, b; 2574 2575 if (s->be_data == MO_LE) { 2576 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt), cpu_reg(s, rt2)); 2577 tcg_gen_concat_i64_i128(c16, cpu_exclusive_val, 2578 cpu_exclusive_high); 2579 } else { 2580 tcg_gen_concat_i64_i128(t16, cpu_reg(s, rt2), cpu_reg(s, rt)); 2581 tcg_gen_concat_i64_i128(c16, cpu_exclusive_high, 2582 cpu_exclusive_val); 2583 } 2584 2585 tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16, 2586 get_mem_index(s), 2587 MO_128 | MO_ALIGN | s->be_data); 2588 tcg_temp_free_i128(c16); 2589 2590 a = tcg_temp_new_i64(); 2591 b = tcg_temp_new_i64(); 2592 if (s->be_data == MO_LE) { 2593 tcg_gen_extr_i128_i64(a, b, t16); 2594 } else { 2595 tcg_gen_extr_i128_i64(b, a, t16); 2596 } 2597 2598 tcg_gen_xor_i64(a, a, cpu_exclusive_val); 2599 tcg_gen_xor_i64(b, b, cpu_exclusive_high); 2600 tcg_gen_or_i64(tmp, a, b); 2601 tcg_temp_free_i64(a); 2602 tcg_temp_free_i64(b); 2603 tcg_temp_free_i128(t16); 2604 2605 tcg_gen_setcondi_i64(TCG_COND_NE, tmp, tmp, 0); 2606 } 2607 } else { 2608 tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val, 2609 cpu_reg(s, rt), get_mem_index(s), 2610 size | MO_ALIGN | s->be_data); 2611 tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val); 2612 } 2613 tcg_gen_mov_i64(cpu_reg(s, rd), tmp); 2614 tcg_temp_free_i64(tmp); 2615 tcg_gen_br(done_label); 2616 2617 gen_set_label(fail_label); 2618 tcg_gen_movi_i64(cpu_reg(s, rd), 1); 2619 gen_set_label(done_label); 2620 tcg_gen_movi_i64(cpu_exclusive_addr, -1); 2621 } 2622 2623 static void gen_compare_and_swap(DisasContext *s, int rs, int rt, 2624 int rn, int size) 2625 { 2626 TCGv_i64 tcg_rs = cpu_reg(s, rs); 2627 TCGv_i64 tcg_rt = cpu_reg(s, rt); 2628 int memidx = get_mem_index(s); 2629 TCGv_i64 clean_addr; 2630 2631 if (rn == 31) { 2632 gen_check_sp_alignment(s); 2633 } 2634 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); 2635 tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, 2636 size | MO_ALIGN | s->be_data); 2637 } 2638 2639 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, 2640 int rn, int size) 2641 { 2642 TCGv_i64 s1 = cpu_reg(s, rs); 2643 TCGv_i64 s2 = cpu_reg(s, rs + 1); 2644 TCGv_i64 t1 = cpu_reg(s, rt); 2645 TCGv_i64 t2 = cpu_reg(s, rt + 1); 2646 TCGv_i64 clean_addr; 2647 int memidx = get_mem_index(s); 2648 2649 if (rn == 31) { 2650 gen_check_sp_alignment(s); 2651 } 2652 2653 /* This is a single atomic access, despite the "pair". */ 2654 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); 2655 2656 if (size == 2) { 2657 TCGv_i64 cmp = tcg_temp_new_i64(); 2658 TCGv_i64 val = tcg_temp_new_i64(); 2659 2660 if (s->be_data == MO_LE) { 2661 tcg_gen_concat32_i64(val, t1, t2); 2662 tcg_gen_concat32_i64(cmp, s1, s2); 2663 } else { 2664 tcg_gen_concat32_i64(val, t2, t1); 2665 tcg_gen_concat32_i64(cmp, s2, s1); 2666 } 2667 2668 tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, 2669 MO_64 | MO_ALIGN | s->be_data); 2670 tcg_temp_free_i64(val); 2671 2672 if (s->be_data == MO_LE) { 2673 tcg_gen_extr32_i64(s1, s2, cmp); 2674 } else { 2675 tcg_gen_extr32_i64(s2, s1, cmp); 2676 } 2677 tcg_temp_free_i64(cmp); 2678 } else { 2679 TCGv_i128 cmp = tcg_temp_new_i128(); 2680 TCGv_i128 val = tcg_temp_new_i128(); 2681 2682 if (s->be_data == MO_LE) { 2683 tcg_gen_concat_i64_i128(val, t1, t2); 2684 tcg_gen_concat_i64_i128(cmp, s1, s2); 2685 } else { 2686 tcg_gen_concat_i64_i128(val, t2, t1); 2687 tcg_gen_concat_i64_i128(cmp, s2, s1); 2688 } 2689 2690 tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, 2691 MO_128 | MO_ALIGN | s->be_data); 2692 tcg_temp_free_i128(val); 2693 2694 if (s->be_data == MO_LE) { 2695 tcg_gen_extr_i128_i64(s1, s2, cmp); 2696 } else { 2697 tcg_gen_extr_i128_i64(s2, s1, cmp); 2698 } 2699 tcg_temp_free_i128(cmp); 2700 } 2701 } 2702 2703 /* Update the Sixty-Four bit (SF) registersize. This logic is derived 2704 * from the ARMv8 specs for LDR (Shared decode for all encodings). 2705 */ 2706 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc) 2707 { 2708 int opc0 = extract32(opc, 0, 1); 2709 int regsize; 2710 2711 if (is_signed) { 2712 regsize = opc0 ? 32 : 64; 2713 } else { 2714 regsize = size == 3 ? 64 : 32; 2715 } 2716 return regsize == 64; 2717 } 2718 2719 /* Load/store exclusive 2720 * 2721 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0 2722 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2723 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt | 2724 * +-----+-------------+----+---+----+------+----+-------+------+------+ 2725 * 2726 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit 2727 * L: 0 -> store, 1 -> load 2728 * o2: 0 -> exclusive, 1 -> not 2729 * o1: 0 -> single register, 1 -> register pair 2730 * o0: 1 -> load-acquire/store-release, 0 -> not 2731 */ 2732 static void disas_ldst_excl(DisasContext *s, uint32_t insn) 2733 { 2734 int rt = extract32(insn, 0, 5); 2735 int rn = extract32(insn, 5, 5); 2736 int rt2 = extract32(insn, 10, 5); 2737 int rs = extract32(insn, 16, 5); 2738 int is_lasr = extract32(insn, 15, 1); 2739 int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; 2740 int size = extract32(insn, 30, 2); 2741 TCGv_i64 clean_addr; 2742 2743 switch (o2_L_o1_o0) { 2744 case 0x0: /* STXR */ 2745 case 0x1: /* STLXR */ 2746 if (rn == 31) { 2747 gen_check_sp_alignment(s); 2748 } 2749 if (is_lasr) { 2750 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2751 } 2752 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2753 true, rn != 31, size); 2754 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); 2755 return; 2756 2757 case 0x4: /* LDXR */ 2758 case 0x5: /* LDAXR */ 2759 if (rn == 31) { 2760 gen_check_sp_alignment(s); 2761 } 2762 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2763 false, rn != 31, size); 2764 s->is_ldex = true; 2765 gen_load_exclusive(s, rt, rt2, clean_addr, size, false); 2766 if (is_lasr) { 2767 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2768 } 2769 return; 2770 2771 case 0x8: /* STLLR */ 2772 if (!dc_isar_feature(aa64_lor, s)) { 2773 break; 2774 } 2775 /* StoreLORelease is the same as Store-Release for QEMU. */ 2776 /* fall through */ 2777 case 0x9: /* STLR */ 2778 /* Generate ISS for non-exclusive accesses including LASR. */ 2779 if (rn == 31) { 2780 gen_check_sp_alignment(s); 2781 } 2782 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2783 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2784 true, rn != 31, size); 2785 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2786 do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt, 2787 disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2788 return; 2789 2790 case 0xc: /* LDLAR */ 2791 if (!dc_isar_feature(aa64_lor, s)) { 2792 break; 2793 } 2794 /* LoadLOAcquire is the same as Load-Acquire for QEMU. */ 2795 /* fall through */ 2796 case 0xd: /* LDAR */ 2797 /* Generate ISS for non-exclusive accesses including LASR. */ 2798 if (rn == 31) { 2799 gen_check_sp_alignment(s); 2800 } 2801 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2802 false, rn != 31, size); 2803 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 2804 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true, 2805 rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); 2806 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2807 return; 2808 2809 case 0x2: case 0x3: /* CASP / STXP */ 2810 if (size & 2) { /* STXP / STLXP */ 2811 if (rn == 31) { 2812 gen_check_sp_alignment(s); 2813 } 2814 if (is_lasr) { 2815 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 2816 } 2817 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2818 true, rn != 31, size); 2819 gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); 2820 return; 2821 } 2822 if (rt2 == 31 2823 && ((rt | rs) & 1) == 0 2824 && dc_isar_feature(aa64_atomics, s)) { 2825 /* CASP / CASPL */ 2826 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2827 return; 2828 } 2829 break; 2830 2831 case 0x6: case 0x7: /* CASPA / LDXP */ 2832 if (size & 2) { /* LDXP / LDAXP */ 2833 if (rn == 31) { 2834 gen_check_sp_alignment(s); 2835 } 2836 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), 2837 false, rn != 31, size); 2838 s->is_ldex = true; 2839 gen_load_exclusive(s, rt, rt2, clean_addr, size, true); 2840 if (is_lasr) { 2841 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 2842 } 2843 return; 2844 } 2845 if (rt2 == 31 2846 && ((rt | rs) & 1) == 0 2847 && dc_isar_feature(aa64_atomics, s)) { 2848 /* CASPA / CASPAL */ 2849 gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); 2850 return; 2851 } 2852 break; 2853 2854 case 0xa: /* CAS */ 2855 case 0xb: /* CASL */ 2856 case 0xe: /* CASA */ 2857 case 0xf: /* CASAL */ 2858 if (rt2 == 31 && dc_isar_feature(aa64_atomics, s)) { 2859 gen_compare_and_swap(s, rs, rt, rn, size); 2860 return; 2861 } 2862 break; 2863 } 2864 unallocated_encoding(s); 2865 } 2866 2867 /* 2868 * Load register (literal) 2869 * 2870 * 31 30 29 27 26 25 24 23 5 4 0 2871 * +-----+-------+---+-----+-------------------+-------+ 2872 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt | 2873 * +-----+-------+---+-----+-------------------+-------+ 2874 * 2875 * V: 1 -> vector (simd/fp) 2876 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit, 2877 * 10-> 32 bit signed, 11 -> prefetch 2878 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated) 2879 */ 2880 static void disas_ld_lit(DisasContext *s, uint32_t insn) 2881 { 2882 int rt = extract32(insn, 0, 5); 2883 int64_t imm = sextract32(insn, 5, 19) << 2; 2884 bool is_vector = extract32(insn, 26, 1); 2885 int opc = extract32(insn, 30, 2); 2886 bool is_signed = false; 2887 int size = 2; 2888 TCGv_i64 tcg_rt, clean_addr; 2889 2890 if (is_vector) { 2891 if (opc == 3) { 2892 unallocated_encoding(s); 2893 return; 2894 } 2895 size = 2 + opc; 2896 if (!fp_access_check(s)) { 2897 return; 2898 } 2899 } else { 2900 if (opc == 3) { 2901 /* PRFM (literal) : prefetch */ 2902 return; 2903 } 2904 size = 2 + extract32(opc, 0, 1); 2905 is_signed = extract32(opc, 1, 1); 2906 } 2907 2908 tcg_rt = cpu_reg(s, rt); 2909 2910 clean_addr = tcg_temp_new_i64(); 2911 gen_pc_plus_diff(s, clean_addr, imm); 2912 if (is_vector) { 2913 do_fp_ld(s, rt, clean_addr, size); 2914 } else { 2915 /* Only unsigned 32bit loads target 32bit registers. */ 2916 bool iss_sf = opc != 0; 2917 2918 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 2919 false, true, rt, iss_sf, false); 2920 } 2921 } 2922 2923 /* 2924 * LDNP (Load Pair - non-temporal hint) 2925 * LDP (Load Pair - non vector) 2926 * LDPSW (Load Pair Signed Word - non vector) 2927 * STNP (Store Pair - non-temporal hint) 2928 * STP (Store Pair - non vector) 2929 * LDNP (Load Pair of SIMD&FP - non-temporal hint) 2930 * LDP (Load Pair of SIMD&FP) 2931 * STNP (Store Pair of SIMD&FP - non-temporal hint) 2932 * STP (Store Pair of SIMD&FP) 2933 * 2934 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0 2935 * +-----+-------+---+---+-------+---+-----------------------------+ 2936 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt | 2937 * +-----+-------+---+---+-------+---+-------+-------+------+------+ 2938 * 2939 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit 2940 * LDPSW/STGP 01 2941 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit 2942 * V: 0 -> GPR, 1 -> Vector 2943 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, 2944 * 10 -> signed offset, 11 -> pre-index 2945 * L: 0 -> Store 1 -> Load 2946 * 2947 * Rt, Rt2 = GPR or SIMD registers to be stored 2948 * Rn = general purpose register containing address 2949 * imm7 = signed offset (multiple of 4 or 8 depending on size) 2950 */ 2951 static void disas_ldst_pair(DisasContext *s, uint32_t insn) 2952 { 2953 int rt = extract32(insn, 0, 5); 2954 int rn = extract32(insn, 5, 5); 2955 int rt2 = extract32(insn, 10, 5); 2956 uint64_t offset = sextract64(insn, 15, 7); 2957 int index = extract32(insn, 23, 2); 2958 bool is_vector = extract32(insn, 26, 1); 2959 bool is_load = extract32(insn, 22, 1); 2960 int opc = extract32(insn, 30, 2); 2961 2962 bool is_signed = false; 2963 bool postindex = false; 2964 bool wback = false; 2965 bool set_tag = false; 2966 2967 TCGv_i64 clean_addr, dirty_addr; 2968 2969 int size; 2970 2971 if (opc == 3) { 2972 unallocated_encoding(s); 2973 return; 2974 } 2975 2976 if (is_vector) { 2977 size = 2 + opc; 2978 } else if (opc == 1 && !is_load) { 2979 /* STGP */ 2980 if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { 2981 unallocated_encoding(s); 2982 return; 2983 } 2984 size = 3; 2985 set_tag = true; 2986 } else { 2987 size = 2 + extract32(opc, 1, 1); 2988 is_signed = extract32(opc, 0, 1); 2989 if (!is_load && is_signed) { 2990 unallocated_encoding(s); 2991 return; 2992 } 2993 } 2994 2995 switch (index) { 2996 case 1: /* post-index */ 2997 postindex = true; 2998 wback = true; 2999 break; 3000 case 0: 3001 /* signed offset with "non-temporal" hint. Since we don't emulate 3002 * caches we don't care about hints to the cache system about 3003 * data access patterns, and handle this identically to plain 3004 * signed offset. 3005 */ 3006 if (is_signed) { 3007 /* There is no non-temporal-hint version of LDPSW */ 3008 unallocated_encoding(s); 3009 return; 3010 } 3011 postindex = false; 3012 break; 3013 case 2: /* signed offset, rn not updated */ 3014 postindex = false; 3015 break; 3016 case 3: /* pre-index */ 3017 postindex = false; 3018 wback = true; 3019 break; 3020 } 3021 3022 if (is_vector && !fp_access_check(s)) { 3023 return; 3024 } 3025 3026 offset <<= (set_tag ? LOG2_TAG_GRANULE : size); 3027 3028 if (rn == 31) { 3029 gen_check_sp_alignment(s); 3030 } 3031 3032 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3033 if (!postindex) { 3034 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3035 } 3036 3037 if (set_tag) { 3038 if (!s->ata) { 3039 /* 3040 * TODO: We could rely on the stores below, at least for 3041 * system mode, if we arrange to add MO_ALIGN_16. 3042 */ 3043 gen_helper_stg_stub(cpu_env, dirty_addr); 3044 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 3045 gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); 3046 } else { 3047 gen_helper_stg(cpu_env, dirty_addr, dirty_addr); 3048 } 3049 } 3050 3051 clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, 3052 (wback || rn != 31) && !set_tag, 2 << size); 3053 3054 if (is_vector) { 3055 if (is_load) { 3056 do_fp_ld(s, rt, clean_addr, size); 3057 } else { 3058 do_fp_st(s, rt, clean_addr, size); 3059 } 3060 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3061 if (is_load) { 3062 do_fp_ld(s, rt2, clean_addr, size); 3063 } else { 3064 do_fp_st(s, rt2, clean_addr, size); 3065 } 3066 } else { 3067 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3068 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2); 3069 3070 if (is_load) { 3071 TCGv_i64 tmp = tcg_temp_new_i64(); 3072 3073 /* Do not modify tcg_rt before recognizing any exception 3074 * from the second load. 3075 */ 3076 do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN, 3077 false, false, 0, false, false); 3078 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3079 do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN, 3080 false, false, 0, false, false); 3081 3082 tcg_gen_mov_i64(tcg_rt, tmp); 3083 tcg_temp_free_i64(tmp); 3084 } else { 3085 do_gpr_st(s, tcg_rt, clean_addr, size, 3086 false, 0, false, false); 3087 tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size); 3088 do_gpr_st(s, tcg_rt2, clean_addr, size, 3089 false, 0, false, false); 3090 } 3091 } 3092 3093 if (wback) { 3094 if (postindex) { 3095 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3096 } 3097 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3098 } 3099 } 3100 3101 /* 3102 * Load/store (immediate post-indexed) 3103 * Load/store (immediate pre-indexed) 3104 * Load/store (unscaled immediate) 3105 * 3106 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0 3107 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3108 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt | 3109 * +----+-------+---+-----+-----+---+--------+-----+------+------+ 3110 * 3111 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback) 3112 10 -> unprivileged 3113 * V = 0 -> non-vector 3114 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit 3115 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3116 */ 3117 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, 3118 int opc, 3119 int size, 3120 int rt, 3121 bool is_vector) 3122 { 3123 int rn = extract32(insn, 5, 5); 3124 int imm9 = sextract32(insn, 12, 9); 3125 int idx = extract32(insn, 10, 2); 3126 bool is_signed = false; 3127 bool is_store = false; 3128 bool is_extended = false; 3129 bool is_unpriv = (idx == 2); 3130 bool iss_valid; 3131 bool post_index; 3132 bool writeback; 3133 int memidx; 3134 3135 TCGv_i64 clean_addr, dirty_addr; 3136 3137 if (is_vector) { 3138 size |= (opc & 2) << 1; 3139 if (size > 4 || is_unpriv) { 3140 unallocated_encoding(s); 3141 return; 3142 } 3143 is_store = ((opc & 1) == 0); 3144 if (!fp_access_check(s)) { 3145 return; 3146 } 3147 } else { 3148 if (size == 3 && opc == 2) { 3149 /* PRFM - prefetch */ 3150 if (idx != 0) { 3151 unallocated_encoding(s); 3152 return; 3153 } 3154 return; 3155 } 3156 if (opc == 3 && size > 1) { 3157 unallocated_encoding(s); 3158 return; 3159 } 3160 is_store = (opc == 0); 3161 is_signed = extract32(opc, 1, 1); 3162 is_extended = (size < 3) && extract32(opc, 0, 1); 3163 } 3164 3165 switch (idx) { 3166 case 0: 3167 case 2: 3168 post_index = false; 3169 writeback = false; 3170 break; 3171 case 1: 3172 post_index = true; 3173 writeback = true; 3174 break; 3175 case 3: 3176 post_index = false; 3177 writeback = true; 3178 break; 3179 default: 3180 g_assert_not_reached(); 3181 } 3182 3183 iss_valid = !is_vector && !writeback; 3184 3185 if (rn == 31) { 3186 gen_check_sp_alignment(s); 3187 } 3188 3189 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3190 if (!post_index) { 3191 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3192 } 3193 3194 memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); 3195 clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, 3196 writeback || rn != 31, 3197 size, is_unpriv, memidx); 3198 3199 if (is_vector) { 3200 if (is_store) { 3201 do_fp_st(s, rt, clean_addr, size); 3202 } else { 3203 do_fp_ld(s, rt, clean_addr, size); 3204 } 3205 } else { 3206 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3207 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3208 3209 if (is_store) { 3210 do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx, 3211 iss_valid, rt, iss_sf, false); 3212 } else { 3213 do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3214 is_extended, memidx, 3215 iss_valid, rt, iss_sf, false); 3216 } 3217 } 3218 3219 if (writeback) { 3220 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 3221 if (post_index) { 3222 tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); 3223 } 3224 tcg_gen_mov_i64(tcg_rn, dirty_addr); 3225 } 3226 } 3227 3228 /* 3229 * Load/store (register offset) 3230 * 3231 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3232 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3233 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt | 3234 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+ 3235 * 3236 * For non-vector: 3237 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3238 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3239 * For vector: 3240 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3241 * opc<0>: 0 -> store, 1 -> load 3242 * V: 1 -> vector/simd 3243 * opt: extend encoding (see DecodeRegExtend) 3244 * S: if S=1 then scale (essentially index by sizeof(size)) 3245 * Rt: register to transfer into/out of 3246 * Rn: address register or SP for base 3247 * Rm: offset register or ZR for offset 3248 */ 3249 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, 3250 int opc, 3251 int size, 3252 int rt, 3253 bool is_vector) 3254 { 3255 int rn = extract32(insn, 5, 5); 3256 int shift = extract32(insn, 12, 1); 3257 int rm = extract32(insn, 16, 5); 3258 int opt = extract32(insn, 13, 3); 3259 bool is_signed = false; 3260 bool is_store = false; 3261 bool is_extended = false; 3262 3263 TCGv_i64 tcg_rm, clean_addr, dirty_addr; 3264 3265 if (extract32(opt, 1, 1) == 0) { 3266 unallocated_encoding(s); 3267 return; 3268 } 3269 3270 if (is_vector) { 3271 size |= (opc & 2) << 1; 3272 if (size > 4) { 3273 unallocated_encoding(s); 3274 return; 3275 } 3276 is_store = !extract32(opc, 0, 1); 3277 if (!fp_access_check(s)) { 3278 return; 3279 } 3280 } else { 3281 if (size == 3 && opc == 2) { 3282 /* PRFM - prefetch */ 3283 return; 3284 } 3285 if (opc == 3 && size > 1) { 3286 unallocated_encoding(s); 3287 return; 3288 } 3289 is_store = (opc == 0); 3290 is_signed = extract32(opc, 1, 1); 3291 is_extended = (size < 3) && extract32(opc, 0, 1); 3292 } 3293 3294 if (rn == 31) { 3295 gen_check_sp_alignment(s); 3296 } 3297 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3298 3299 tcg_rm = read_cpu_reg(s, rm, 1); 3300 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); 3301 3302 tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); 3303 clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); 3304 3305 if (is_vector) { 3306 if (is_store) { 3307 do_fp_st(s, rt, clean_addr, size); 3308 } else { 3309 do_fp_ld(s, rt, clean_addr, size); 3310 } 3311 } else { 3312 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3313 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3314 if (is_store) { 3315 do_gpr_st(s, tcg_rt, clean_addr, size, 3316 true, rt, iss_sf, false); 3317 } else { 3318 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3319 is_extended, true, rt, iss_sf, false); 3320 } 3321 } 3322 } 3323 3324 /* 3325 * Load/store (unsigned immediate) 3326 * 3327 * 31 30 29 27 26 25 24 23 22 21 10 9 5 3328 * +----+-------+---+-----+-----+------------+-------+------+ 3329 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt | 3330 * +----+-------+---+-----+-----+------------+-------+------+ 3331 * 3332 * For non-vector: 3333 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit 3334 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32 3335 * For vector: 3336 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated 3337 * opc<0>: 0 -> store, 1 -> load 3338 * Rn: base address register (inc SP) 3339 * Rt: target register 3340 */ 3341 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, 3342 int opc, 3343 int size, 3344 int rt, 3345 bool is_vector) 3346 { 3347 int rn = extract32(insn, 5, 5); 3348 unsigned int imm12 = extract32(insn, 10, 12); 3349 unsigned int offset; 3350 3351 TCGv_i64 clean_addr, dirty_addr; 3352 3353 bool is_store; 3354 bool is_signed = false; 3355 bool is_extended = false; 3356 3357 if (is_vector) { 3358 size |= (opc & 2) << 1; 3359 if (size > 4) { 3360 unallocated_encoding(s); 3361 return; 3362 } 3363 is_store = !extract32(opc, 0, 1); 3364 if (!fp_access_check(s)) { 3365 return; 3366 } 3367 } else { 3368 if (size == 3 && opc == 2) { 3369 /* PRFM - prefetch */ 3370 return; 3371 } 3372 if (opc == 3 && size > 1) { 3373 unallocated_encoding(s); 3374 return; 3375 } 3376 is_store = (opc == 0); 3377 is_signed = extract32(opc, 1, 1); 3378 is_extended = (size < 3) && extract32(opc, 0, 1); 3379 } 3380 3381 if (rn == 31) { 3382 gen_check_sp_alignment(s); 3383 } 3384 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3385 offset = imm12 << size; 3386 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3387 clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); 3388 3389 if (is_vector) { 3390 if (is_store) { 3391 do_fp_st(s, rt, clean_addr, size); 3392 } else { 3393 do_fp_ld(s, rt, clean_addr, size); 3394 } 3395 } else { 3396 TCGv_i64 tcg_rt = cpu_reg(s, rt); 3397 bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); 3398 if (is_store) { 3399 do_gpr_st(s, tcg_rt, clean_addr, size, 3400 true, rt, iss_sf, false); 3401 } else { 3402 do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN, 3403 is_extended, true, rt, iss_sf, false); 3404 } 3405 } 3406 } 3407 3408 /* Atomic memory operations 3409 * 3410 * 31 30 27 26 24 22 21 16 15 12 10 5 0 3411 * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ 3412 * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | 3413 * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ 3414 * 3415 * Rt: the result register 3416 * Rn: base address or SP 3417 * Rs: the source register for the operation 3418 * V: vector flag (always 0 as of v8.3) 3419 * A: acquire flag 3420 * R: release flag 3421 */ 3422 static void disas_ldst_atomic(DisasContext *s, uint32_t insn, 3423 int size, int rt, bool is_vector) 3424 { 3425 int rs = extract32(insn, 16, 5); 3426 int rn = extract32(insn, 5, 5); 3427 int o3_opc = extract32(insn, 12, 4); 3428 bool r = extract32(insn, 22, 1); 3429 bool a = extract32(insn, 23, 1); 3430 TCGv_i64 tcg_rs, tcg_rt, clean_addr; 3431 AtomicThreeOpFn *fn = NULL; 3432 MemOp mop = s->be_data | size | MO_ALIGN; 3433 3434 if (is_vector || !dc_isar_feature(aa64_atomics, s)) { 3435 unallocated_encoding(s); 3436 return; 3437 } 3438 switch (o3_opc) { 3439 case 000: /* LDADD */ 3440 fn = tcg_gen_atomic_fetch_add_i64; 3441 break; 3442 case 001: /* LDCLR */ 3443 fn = tcg_gen_atomic_fetch_and_i64; 3444 break; 3445 case 002: /* LDEOR */ 3446 fn = tcg_gen_atomic_fetch_xor_i64; 3447 break; 3448 case 003: /* LDSET */ 3449 fn = tcg_gen_atomic_fetch_or_i64; 3450 break; 3451 case 004: /* LDSMAX */ 3452 fn = tcg_gen_atomic_fetch_smax_i64; 3453 mop |= MO_SIGN; 3454 break; 3455 case 005: /* LDSMIN */ 3456 fn = tcg_gen_atomic_fetch_smin_i64; 3457 mop |= MO_SIGN; 3458 break; 3459 case 006: /* LDUMAX */ 3460 fn = tcg_gen_atomic_fetch_umax_i64; 3461 break; 3462 case 007: /* LDUMIN */ 3463 fn = tcg_gen_atomic_fetch_umin_i64; 3464 break; 3465 case 010: /* SWP */ 3466 fn = tcg_gen_atomic_xchg_i64; 3467 break; 3468 case 014: /* LDAPR, LDAPRH, LDAPRB */ 3469 if (!dc_isar_feature(aa64_rcpc_8_3, s) || 3470 rs != 31 || a != 1 || r != 0) { 3471 unallocated_encoding(s); 3472 return; 3473 } 3474 break; 3475 default: 3476 unallocated_encoding(s); 3477 return; 3478 } 3479 3480 if (rn == 31) { 3481 gen_check_sp_alignment(s); 3482 } 3483 clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); 3484 3485 if (o3_opc == 014) { 3486 /* 3487 * LDAPR* are a special case because they are a simple load, not a 3488 * fetch-and-do-something op. 3489 * The architectural consistency requirements here are weaker than 3490 * full load-acquire (we only need "load-acquire processor consistent"), 3491 * but we choose to implement them as full LDAQ. 3492 */ 3493 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, 3494 true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); 3495 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3496 return; 3497 } 3498 3499 tcg_rs = read_cpu_reg(s, rs, true); 3500 tcg_rt = cpu_reg(s, rt); 3501 3502 if (o3_opc == 1) { /* LDCLR */ 3503 tcg_gen_not_i64(tcg_rs, tcg_rs); 3504 } 3505 3506 /* The tcg atomic primitives are all full barriers. Therefore we 3507 * can ignore the Acquire and Release bits of this instruction. 3508 */ 3509 fn(tcg_rt, clean_addr, tcg_rs, get_mem_index(s), mop); 3510 3511 if ((mop & MO_SIGN) && size != MO_64) { 3512 tcg_gen_ext32u_i64(tcg_rt, tcg_rt); 3513 } 3514 } 3515 3516 /* 3517 * PAC memory operations 3518 * 3519 * 31 30 27 26 24 22 21 12 11 10 5 0 3520 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3521 * | size | 1 1 1 | V | 0 0 | M S | 1 | imm9 | W | 1 | Rn | Rt | 3522 * +------+-------+---+-----+-----+---+--------+---+---+----+-----+ 3523 * 3524 * Rt: the result register 3525 * Rn: base address or SP 3526 * V: vector flag (always 0 as of v8.3) 3527 * M: clear for key DA, set for key DB 3528 * W: pre-indexing flag 3529 * S: sign for imm9. 3530 */ 3531 static void disas_ldst_pac(DisasContext *s, uint32_t insn, 3532 int size, int rt, bool is_vector) 3533 { 3534 int rn = extract32(insn, 5, 5); 3535 bool is_wback = extract32(insn, 11, 1); 3536 bool use_key_a = !extract32(insn, 23, 1); 3537 int offset; 3538 TCGv_i64 clean_addr, dirty_addr, tcg_rt; 3539 3540 if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) { 3541 unallocated_encoding(s); 3542 return; 3543 } 3544 3545 if (rn == 31) { 3546 gen_check_sp_alignment(s); 3547 } 3548 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3549 3550 if (s->pauth_active) { 3551 if (use_key_a) { 3552 gen_helper_autda(dirty_addr, cpu_env, dirty_addr, 3553 new_tmp_a64_zero(s)); 3554 } else { 3555 gen_helper_autdb(dirty_addr, cpu_env, dirty_addr, 3556 new_tmp_a64_zero(s)); 3557 } 3558 } 3559 3560 /* Form the 10-bit signed, scaled offset. */ 3561 offset = (extract32(insn, 22, 1) << 9) | extract32(insn, 12, 9); 3562 offset = sextract32(offset << size, 0, 10 + size); 3563 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3564 3565 /* Note that "clean" and "dirty" here refer to TBI not PAC. */ 3566 clean_addr = gen_mte_check1(s, dirty_addr, false, 3567 is_wback || rn != 31, size); 3568 3569 tcg_rt = cpu_reg(s, rt); 3570 do_gpr_ld(s, tcg_rt, clean_addr, size, 3571 /* extend */ false, /* iss_valid */ !is_wback, 3572 /* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false); 3573 3574 if (is_wback) { 3575 tcg_gen_mov_i64(cpu_reg_sp(s, rn), dirty_addr); 3576 } 3577 } 3578 3579 /* 3580 * LDAPR/STLR (unscaled immediate) 3581 * 3582 * 31 30 24 22 21 12 10 5 0 3583 * +------+-------------+-----+---+--------+-----+----+-----+ 3584 * | size | 0 1 1 0 0 1 | opc | 0 | imm9 | 0 0 | Rn | Rt | 3585 * +------+-------------+-----+---+--------+-----+----+-----+ 3586 * 3587 * Rt: source or destination register 3588 * Rn: base register 3589 * imm9: unscaled immediate offset 3590 * opc: 00: STLUR*, 01/10/11: various LDAPUR* 3591 * size: size of load/store 3592 */ 3593 static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn) 3594 { 3595 int rt = extract32(insn, 0, 5); 3596 int rn = extract32(insn, 5, 5); 3597 int offset = sextract32(insn, 12, 9); 3598 int opc = extract32(insn, 22, 2); 3599 int size = extract32(insn, 30, 2); 3600 TCGv_i64 clean_addr, dirty_addr; 3601 bool is_store = false; 3602 bool extend = false; 3603 bool iss_sf; 3604 MemOp mop; 3605 3606 if (!dc_isar_feature(aa64_rcpc_8_4, s)) { 3607 unallocated_encoding(s); 3608 return; 3609 } 3610 3611 /* TODO: ARMv8.4-LSE SCTLR.nAA */ 3612 mop = size | MO_ALIGN; 3613 3614 switch (opc) { 3615 case 0: /* STLURB */ 3616 is_store = true; 3617 break; 3618 case 1: /* LDAPUR* */ 3619 break; 3620 case 2: /* LDAPURS* 64-bit variant */ 3621 if (size == 3) { 3622 unallocated_encoding(s); 3623 return; 3624 } 3625 mop |= MO_SIGN; 3626 break; 3627 case 3: /* LDAPURS* 32-bit variant */ 3628 if (size > 1) { 3629 unallocated_encoding(s); 3630 return; 3631 } 3632 mop |= MO_SIGN; 3633 extend = true; /* zero-extend 32->64 after signed load */ 3634 break; 3635 default: 3636 g_assert_not_reached(); 3637 } 3638 3639 iss_sf = disas_ldst_compute_iss_sf(size, (mop & MO_SIGN) != 0, opc); 3640 3641 if (rn == 31) { 3642 gen_check_sp_alignment(s); 3643 } 3644 3645 dirty_addr = read_cpu_reg_sp(s, rn, 1); 3646 tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); 3647 clean_addr = clean_data_tbi(s, dirty_addr); 3648 3649 if (is_store) { 3650 /* Store-Release semantics */ 3651 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 3652 do_gpr_st(s, cpu_reg(s, rt), clean_addr, mop, true, rt, iss_sf, true); 3653 } else { 3654 /* 3655 * Load-AcquirePC semantics; we implement as the slightly more 3656 * restrictive Load-Acquire. 3657 */ 3658 do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, 3659 extend, true, rt, iss_sf, true); 3660 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 3661 } 3662 } 3663 3664 /* Load/store register (all forms) */ 3665 static void disas_ldst_reg(DisasContext *s, uint32_t insn) 3666 { 3667 int rt = extract32(insn, 0, 5); 3668 int opc = extract32(insn, 22, 2); 3669 bool is_vector = extract32(insn, 26, 1); 3670 int size = extract32(insn, 30, 2); 3671 3672 switch (extract32(insn, 24, 2)) { 3673 case 0: 3674 if (extract32(insn, 21, 1) == 0) { 3675 /* Load/store register (unscaled immediate) 3676 * Load/store immediate pre/post-indexed 3677 * Load/store register unprivileged 3678 */ 3679 disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); 3680 return; 3681 } 3682 switch (extract32(insn, 10, 2)) { 3683 case 0: 3684 disas_ldst_atomic(s, insn, size, rt, is_vector); 3685 return; 3686 case 2: 3687 disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); 3688 return; 3689 default: 3690 disas_ldst_pac(s, insn, size, rt, is_vector); 3691 return; 3692 } 3693 break; 3694 case 1: 3695 disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); 3696 return; 3697 } 3698 unallocated_encoding(s); 3699 } 3700 3701 /* AdvSIMD load/store multiple structures 3702 * 3703 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0 3704 * +---+---+---------------+---+-------------+--------+------+------+------+ 3705 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt | 3706 * +---+---+---------------+---+-------------+--------+------+------+------+ 3707 * 3708 * AdvSIMD load/store multiple structures (post-indexed) 3709 * 3710 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0 3711 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3712 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt | 3713 * +---+---+---------------+---+---+---------+--------+------+------+------+ 3714 * 3715 * Rt: first (or only) SIMD&FP register to be transferred 3716 * Rn: base address or SP 3717 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3718 */ 3719 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) 3720 { 3721 int rt = extract32(insn, 0, 5); 3722 int rn = extract32(insn, 5, 5); 3723 int rm = extract32(insn, 16, 5); 3724 int size = extract32(insn, 10, 2); 3725 int opcode = extract32(insn, 12, 4); 3726 bool is_store = !extract32(insn, 22, 1); 3727 bool is_postidx = extract32(insn, 23, 1); 3728 bool is_q = extract32(insn, 30, 1); 3729 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3730 MemOp endian, align, mop; 3731 3732 int total; /* total bytes */ 3733 int elements; /* elements per vector */ 3734 int rpt; /* num iterations */ 3735 int selem; /* structure elements */ 3736 int r; 3737 3738 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) { 3739 unallocated_encoding(s); 3740 return; 3741 } 3742 3743 if (!is_postidx && rm != 0) { 3744 unallocated_encoding(s); 3745 return; 3746 } 3747 3748 /* From the shared decode logic */ 3749 switch (opcode) { 3750 case 0x0: 3751 rpt = 1; 3752 selem = 4; 3753 break; 3754 case 0x2: 3755 rpt = 4; 3756 selem = 1; 3757 break; 3758 case 0x4: 3759 rpt = 1; 3760 selem = 3; 3761 break; 3762 case 0x6: 3763 rpt = 3; 3764 selem = 1; 3765 break; 3766 case 0x7: 3767 rpt = 1; 3768 selem = 1; 3769 break; 3770 case 0x8: 3771 rpt = 1; 3772 selem = 2; 3773 break; 3774 case 0xa: 3775 rpt = 2; 3776 selem = 1; 3777 break; 3778 default: 3779 unallocated_encoding(s); 3780 return; 3781 } 3782 3783 if (size == 3 && !is_q && selem != 1) { 3784 /* reserved */ 3785 unallocated_encoding(s); 3786 return; 3787 } 3788 3789 if (!fp_access_check(s)) { 3790 return; 3791 } 3792 3793 if (rn == 31) { 3794 gen_check_sp_alignment(s); 3795 } 3796 3797 /* For our purposes, bytes are always little-endian. */ 3798 endian = s->be_data; 3799 if (size == 0) { 3800 endian = MO_LE; 3801 } 3802 3803 total = rpt * selem * (is_q ? 16 : 8); 3804 tcg_rn = cpu_reg_sp(s, rn); 3805 3806 /* 3807 * Issue the MTE check vs the logical repeat count, before we 3808 * promote consecutive little-endian elements below. 3809 */ 3810 clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, 3811 total); 3812 3813 /* 3814 * Consecutive little-endian elements from a single register 3815 * can be promoted to a larger little-endian operation. 3816 */ 3817 align = MO_ALIGN; 3818 if (selem == 1 && endian == MO_LE) { 3819 align = pow2_align(size); 3820 size = 3; 3821 } 3822 if (!s->align_mem) { 3823 align = 0; 3824 } 3825 mop = endian | size | align; 3826 3827 elements = (is_q ? 16 : 8) >> size; 3828 tcg_ebytes = tcg_constant_i64(1 << size); 3829 for (r = 0; r < rpt; r++) { 3830 int e; 3831 for (e = 0; e < elements; e++) { 3832 int xs; 3833 for (xs = 0; xs < selem; xs++) { 3834 int tt = (rt + r + xs) % 32; 3835 if (is_store) { 3836 do_vec_st(s, tt, e, clean_addr, mop); 3837 } else { 3838 do_vec_ld(s, tt, e, clean_addr, mop); 3839 } 3840 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3841 } 3842 } 3843 } 3844 3845 if (!is_store) { 3846 /* For non-quad operations, setting a slice of the low 3847 * 64 bits of the register clears the high 64 bits (in 3848 * the ARM ARM pseudocode this is implicit in the fact 3849 * that 'rval' is a 64 bit wide variable). 3850 * For quad operations, we might still need to zero the 3851 * high bits of SVE. 3852 */ 3853 for (r = 0; r < rpt * selem; r++) { 3854 int tt = (rt + r) % 32; 3855 clear_vec_high(s, is_q, tt); 3856 } 3857 } 3858 3859 if (is_postidx) { 3860 if (rm == 31) { 3861 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3862 } else { 3863 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 3864 } 3865 } 3866 } 3867 3868 /* AdvSIMD load/store single structure 3869 * 3870 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3871 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3872 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt | 3873 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3874 * 3875 * AdvSIMD load/store single structure (post-indexed) 3876 * 3877 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0 3878 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3879 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt | 3880 * +---+---+---------------+-----+-----------+-----+---+------+------+------+ 3881 * 3882 * Rt: first (or only) SIMD&FP register to be transferred 3883 * Rn: base address or SP 3884 * Rm (post-index only): post-index register (when !31) or size dependent #imm 3885 * index = encoded in Q:S:size dependent on size 3886 * 3887 * lane_size = encoded in R, opc 3888 * transfer width = encoded in opc, S, size 3889 */ 3890 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) 3891 { 3892 int rt = extract32(insn, 0, 5); 3893 int rn = extract32(insn, 5, 5); 3894 int rm = extract32(insn, 16, 5); 3895 int size = extract32(insn, 10, 2); 3896 int S = extract32(insn, 12, 1); 3897 int opc = extract32(insn, 13, 3); 3898 int R = extract32(insn, 21, 1); 3899 int is_load = extract32(insn, 22, 1); 3900 int is_postidx = extract32(insn, 23, 1); 3901 int is_q = extract32(insn, 30, 1); 3902 3903 int scale = extract32(opc, 1, 2); 3904 int selem = (extract32(opc, 0, 1) << 1 | R) + 1; 3905 bool replicate = false; 3906 int index = is_q << 3 | S << 2 | size; 3907 int xs, total; 3908 TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; 3909 MemOp mop; 3910 3911 if (extract32(insn, 31, 1)) { 3912 unallocated_encoding(s); 3913 return; 3914 } 3915 if (!is_postidx && rm != 0) { 3916 unallocated_encoding(s); 3917 return; 3918 } 3919 3920 switch (scale) { 3921 case 3: 3922 if (!is_load || S) { 3923 unallocated_encoding(s); 3924 return; 3925 } 3926 scale = size; 3927 replicate = true; 3928 break; 3929 case 0: 3930 break; 3931 case 1: 3932 if (extract32(size, 0, 1)) { 3933 unallocated_encoding(s); 3934 return; 3935 } 3936 index >>= 1; 3937 break; 3938 case 2: 3939 if (extract32(size, 1, 1)) { 3940 unallocated_encoding(s); 3941 return; 3942 } 3943 if (!extract32(size, 0, 1)) { 3944 index >>= 2; 3945 } else { 3946 if (S) { 3947 unallocated_encoding(s); 3948 return; 3949 } 3950 index >>= 3; 3951 scale = 3; 3952 } 3953 break; 3954 default: 3955 g_assert_not_reached(); 3956 } 3957 3958 if (!fp_access_check(s)) { 3959 return; 3960 } 3961 3962 if (rn == 31) { 3963 gen_check_sp_alignment(s); 3964 } 3965 3966 total = selem << scale; 3967 tcg_rn = cpu_reg_sp(s, rn); 3968 3969 clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, 3970 total); 3971 mop = finalize_memop(s, scale); 3972 3973 tcg_ebytes = tcg_constant_i64(1 << scale); 3974 for (xs = 0; xs < selem; xs++) { 3975 if (replicate) { 3976 /* Load and replicate to all elements */ 3977 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 3978 3979 tcg_gen_qemu_ld_i64(tcg_tmp, clean_addr, get_mem_index(s), mop); 3980 tcg_gen_gvec_dup_i64(scale, vec_full_reg_offset(s, rt), 3981 (is_q + 1) * 8, vec_full_reg_size(s), 3982 tcg_tmp); 3983 tcg_temp_free_i64(tcg_tmp); 3984 } else { 3985 /* Load/store one element per register */ 3986 if (is_load) { 3987 do_vec_ld(s, rt, index, clean_addr, mop); 3988 } else { 3989 do_vec_st(s, rt, index, clean_addr, mop); 3990 } 3991 } 3992 tcg_gen_add_i64(clean_addr, clean_addr, tcg_ebytes); 3993 rt = (rt + 1) % 32; 3994 } 3995 3996 if (is_postidx) { 3997 if (rm == 31) { 3998 tcg_gen_addi_i64(tcg_rn, tcg_rn, total); 3999 } else { 4000 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); 4001 } 4002 } 4003 } 4004 4005 /* 4006 * Load/Store memory tags 4007 * 4008 * 31 30 29 24 22 21 12 10 5 0 4009 * +-----+-------------+-----+---+------+-----+------+------+ 4010 * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | 4011 * +-----+-------------+-----+---+------+-----+------+------+ 4012 */ 4013 static void disas_ldst_tag(DisasContext *s, uint32_t insn) 4014 { 4015 int rt = extract32(insn, 0, 5); 4016 int rn = extract32(insn, 5, 5); 4017 uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; 4018 int op2 = extract32(insn, 10, 2); 4019 int op1 = extract32(insn, 22, 2); 4020 bool is_load = false, is_pair = false, is_zero = false, is_mult = false; 4021 int index = 0; 4022 TCGv_i64 addr, clean_addr, tcg_rt; 4023 4024 /* We checked insn bits [29:24,21] in the caller. */ 4025 if (extract32(insn, 30, 2) != 3) { 4026 goto do_unallocated; 4027 } 4028 4029 /* 4030 * @index is a tri-state variable which has 3 states: 4031 * < 0 : post-index, writeback 4032 * = 0 : signed offset 4033 * > 0 : pre-index, writeback 4034 */ 4035 switch (op1) { 4036 case 0: 4037 if (op2 != 0) { 4038 /* STG */ 4039 index = op2 - 2; 4040 } else { 4041 /* STZGM */ 4042 if (s->current_el == 0 || offset != 0) { 4043 goto do_unallocated; 4044 } 4045 is_mult = is_zero = true; 4046 } 4047 break; 4048 case 1: 4049 if (op2 != 0) { 4050 /* STZG */ 4051 is_zero = true; 4052 index = op2 - 2; 4053 } else { 4054 /* LDG */ 4055 is_load = true; 4056 } 4057 break; 4058 case 2: 4059 if (op2 != 0) { 4060 /* ST2G */ 4061 is_pair = true; 4062 index = op2 - 2; 4063 } else { 4064 /* STGM */ 4065 if (s->current_el == 0 || offset != 0) { 4066 goto do_unallocated; 4067 } 4068 is_mult = true; 4069 } 4070 break; 4071 case 3: 4072 if (op2 != 0) { 4073 /* STZ2G */ 4074 is_pair = is_zero = true; 4075 index = op2 - 2; 4076 } else { 4077 /* LDGM */ 4078 if (s->current_el == 0 || offset != 0) { 4079 goto do_unallocated; 4080 } 4081 is_mult = is_load = true; 4082 } 4083 break; 4084 4085 default: 4086 do_unallocated: 4087 unallocated_encoding(s); 4088 return; 4089 } 4090 4091 if (is_mult 4092 ? !dc_isar_feature(aa64_mte, s) 4093 : !dc_isar_feature(aa64_mte_insn_reg, s)) { 4094 goto do_unallocated; 4095 } 4096 4097 if (rn == 31) { 4098 gen_check_sp_alignment(s); 4099 } 4100 4101 addr = read_cpu_reg_sp(s, rn, true); 4102 if (index >= 0) { 4103 /* pre-index or signed offset */ 4104 tcg_gen_addi_i64(addr, addr, offset); 4105 } 4106 4107 if (is_mult) { 4108 tcg_rt = cpu_reg(s, rt); 4109 4110 if (is_zero) { 4111 int size = 4 << s->dcz_blocksize; 4112 4113 if (s->ata) { 4114 gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); 4115 } 4116 /* 4117 * The non-tags portion of STZGM is mostly like DC_ZVA, 4118 * except the alignment happens before the access. 4119 */ 4120 clean_addr = clean_data_tbi(s, addr); 4121 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4122 gen_helper_dc_zva(cpu_env, clean_addr); 4123 } else if (s->ata) { 4124 if (is_load) { 4125 gen_helper_ldgm(tcg_rt, cpu_env, addr); 4126 } else { 4127 gen_helper_stgm(cpu_env, addr, tcg_rt); 4128 } 4129 } else { 4130 MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 4131 int size = 4 << GMID_EL1_BS; 4132 4133 clean_addr = clean_data_tbi(s, addr); 4134 tcg_gen_andi_i64(clean_addr, clean_addr, -size); 4135 gen_probe_access(s, clean_addr, acc, size); 4136 4137 if (is_load) { 4138 /* The result tags are zeros. */ 4139 tcg_gen_movi_i64(tcg_rt, 0); 4140 } 4141 } 4142 return; 4143 } 4144 4145 if (is_load) { 4146 tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); 4147 tcg_rt = cpu_reg(s, rt); 4148 if (s->ata) { 4149 gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); 4150 } else { 4151 clean_addr = clean_data_tbi(s, addr); 4152 gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); 4153 gen_address_with_allocation_tag0(tcg_rt, addr); 4154 } 4155 } else { 4156 tcg_rt = cpu_reg_sp(s, rt); 4157 if (!s->ata) { 4158 /* 4159 * For STG and ST2G, we need to check alignment and probe memory. 4160 * TODO: For STZG and STZ2G, we could rely on the stores below, 4161 * at least for system mode; user-only won't enforce alignment. 4162 */ 4163 if (is_pair) { 4164 gen_helper_st2g_stub(cpu_env, addr); 4165 } else { 4166 gen_helper_stg_stub(cpu_env, addr); 4167 } 4168 } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { 4169 if (is_pair) { 4170 gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); 4171 } else { 4172 gen_helper_stg_parallel(cpu_env, addr, tcg_rt); 4173 } 4174 } else { 4175 if (is_pair) { 4176 gen_helper_st2g(cpu_env, addr, tcg_rt); 4177 } else { 4178 gen_helper_stg(cpu_env, addr, tcg_rt); 4179 } 4180 } 4181 } 4182 4183 if (is_zero) { 4184 TCGv_i64 clean_addr = clean_data_tbi(s, addr); 4185 TCGv_i64 tcg_zero = tcg_constant_i64(0); 4186 int mem_index = get_mem_index(s); 4187 int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; 4188 4189 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, 4190 MO_UQ | MO_ALIGN_16); 4191 for (i = 8; i < n; i += 8) { 4192 tcg_gen_addi_i64(clean_addr, clean_addr, 8); 4193 tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ); 4194 } 4195 } 4196 4197 if (index != 0) { 4198 /* pre-index or post-index */ 4199 if (index < 0) { 4200 /* post-index */ 4201 tcg_gen_addi_i64(addr, addr, offset); 4202 } 4203 tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); 4204 } 4205 } 4206 4207 /* Loads and stores */ 4208 static void disas_ldst(DisasContext *s, uint32_t insn) 4209 { 4210 switch (extract32(insn, 24, 6)) { 4211 case 0x08: /* Load/store exclusive */ 4212 disas_ldst_excl(s, insn); 4213 break; 4214 case 0x18: case 0x1c: /* Load register (literal) */ 4215 disas_ld_lit(s, insn); 4216 break; 4217 case 0x28: case 0x29: 4218 case 0x2c: case 0x2d: /* Load/store pair (all forms) */ 4219 disas_ldst_pair(s, insn); 4220 break; 4221 case 0x38: case 0x39: 4222 case 0x3c: case 0x3d: /* Load/store register (all forms) */ 4223 disas_ldst_reg(s, insn); 4224 break; 4225 case 0x0c: /* AdvSIMD load/store multiple structures */ 4226 disas_ldst_multiple_struct(s, insn); 4227 break; 4228 case 0x0d: /* AdvSIMD load/store single structure */ 4229 disas_ldst_single_struct(s, insn); 4230 break; 4231 case 0x19: 4232 if (extract32(insn, 21, 1) != 0) { 4233 disas_ldst_tag(s, insn); 4234 } else if (extract32(insn, 10, 2) == 0) { 4235 disas_ldst_ldapr_stlr(s, insn); 4236 } else { 4237 unallocated_encoding(s); 4238 } 4239 break; 4240 default: 4241 unallocated_encoding(s); 4242 break; 4243 } 4244 } 4245 4246 /* PC-rel. addressing 4247 * 31 30 29 28 24 23 5 4 0 4248 * +----+-------+-----------+-------------------+------+ 4249 * | op | immlo | 1 0 0 0 0 | immhi | Rd | 4250 * +----+-------+-----------+-------------------+------+ 4251 */ 4252 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) 4253 { 4254 unsigned int page, rd; 4255 int64_t offset; 4256 4257 page = extract32(insn, 31, 1); 4258 /* SignExtend(immhi:immlo) -> offset */ 4259 offset = sextract64(insn, 5, 19); 4260 offset = offset << 2 | extract32(insn, 29, 2); 4261 rd = extract32(insn, 0, 5); 4262 4263 if (page) { 4264 /* ADRP (page based) */ 4265 offset <<= 12; 4266 /* The page offset is ok for CF_PCREL. */ 4267 offset -= s->pc_curr & 0xfff; 4268 } 4269 4270 gen_pc_plus_diff(s, cpu_reg(s, rd), offset); 4271 } 4272 4273 /* 4274 * Add/subtract (immediate) 4275 * 4276 * 31 30 29 28 23 22 21 10 9 5 4 0 4277 * +--+--+--+-------------+--+-------------+-----+-----+ 4278 * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | 4279 * +--+--+--+-------------+--+-------------+-----+-----+ 4280 * 4281 * sf: 0 -> 32bit, 1 -> 64bit 4282 * op: 0 -> add , 1 -> sub 4283 * S: 1 -> set flags 4284 * sh: 1 -> LSL imm by 12 4285 */ 4286 static void disas_add_sub_imm(DisasContext *s, uint32_t insn) 4287 { 4288 int rd = extract32(insn, 0, 5); 4289 int rn = extract32(insn, 5, 5); 4290 uint64_t imm = extract32(insn, 10, 12); 4291 bool shift = extract32(insn, 22, 1); 4292 bool setflags = extract32(insn, 29, 1); 4293 bool sub_op = extract32(insn, 30, 1); 4294 bool is_64bit = extract32(insn, 31, 1); 4295 4296 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn); 4297 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); 4298 TCGv_i64 tcg_result; 4299 4300 if (shift) { 4301 imm <<= 12; 4302 } 4303 4304 tcg_result = tcg_temp_new_i64(); 4305 if (!setflags) { 4306 if (sub_op) { 4307 tcg_gen_subi_i64(tcg_result, tcg_rn, imm); 4308 } else { 4309 tcg_gen_addi_i64(tcg_result, tcg_rn, imm); 4310 } 4311 } else { 4312 TCGv_i64 tcg_imm = tcg_constant_i64(imm); 4313 if (sub_op) { 4314 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4315 } else { 4316 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm); 4317 } 4318 } 4319 4320 if (is_64bit) { 4321 tcg_gen_mov_i64(tcg_rd, tcg_result); 4322 } else { 4323 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4324 } 4325 4326 tcg_temp_free_i64(tcg_result); 4327 } 4328 4329 /* 4330 * Add/subtract (immediate, with tags) 4331 * 4332 * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 4333 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4334 * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | 4335 * +--+--+--+-------------+--+---------+--+-------+-----+-----+ 4336 * 4337 * op: 0 -> add, 1 -> sub 4338 */ 4339 static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) 4340 { 4341 int rd = extract32(insn, 0, 5); 4342 int rn = extract32(insn, 5, 5); 4343 int uimm4 = extract32(insn, 10, 4); 4344 int uimm6 = extract32(insn, 16, 6); 4345 bool sub_op = extract32(insn, 30, 1); 4346 TCGv_i64 tcg_rn, tcg_rd; 4347 int imm; 4348 4349 /* Test all of sf=1, S=0, o2=0, o3=0. */ 4350 if ((insn & 0xa040c000u) != 0x80000000u || 4351 !dc_isar_feature(aa64_mte_insn_reg, s)) { 4352 unallocated_encoding(s); 4353 return; 4354 } 4355 4356 imm = uimm6 << LOG2_TAG_GRANULE; 4357 if (sub_op) { 4358 imm = -imm; 4359 } 4360 4361 tcg_rn = cpu_reg_sp(s, rn); 4362 tcg_rd = cpu_reg_sp(s, rd); 4363 4364 if (s->ata) { 4365 gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, 4366 tcg_constant_i32(imm), 4367 tcg_constant_i32(uimm4)); 4368 } else { 4369 tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); 4370 gen_address_with_allocation_tag0(tcg_rd, tcg_rd); 4371 } 4372 } 4373 4374 /* The input should be a value in the bottom e bits (with higher 4375 * bits zero); returns that value replicated into every element 4376 * of size e in a 64 bit integer. 4377 */ 4378 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e) 4379 { 4380 assert(e != 0); 4381 while (e < 64) { 4382 mask |= mask << e; 4383 e *= 2; 4384 } 4385 return mask; 4386 } 4387 4388 /* Return a value with the bottom len bits set (where 0 < len <= 64) */ 4389 static inline uint64_t bitmask64(unsigned int length) 4390 { 4391 assert(length > 0 && length <= 64); 4392 return ~0ULL >> (64 - length); 4393 } 4394 4395 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we 4396 * only require the wmask. Returns false if the imms/immr/immn are a reserved 4397 * value (ie should cause a guest UNDEF exception), and true if they are 4398 * valid, in which case the decoded bit pattern is written to result. 4399 */ 4400 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, 4401 unsigned int imms, unsigned int immr) 4402 { 4403 uint64_t mask; 4404 unsigned e, levels, s, r; 4405 int len; 4406 4407 assert(immn < 2 && imms < 64 && immr < 64); 4408 4409 /* The bit patterns we create here are 64 bit patterns which 4410 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or 4411 * 64 bits each. Each element contains the same value: a run 4412 * of between 1 and e-1 non-zero bits, rotated within the 4413 * element by between 0 and e-1 bits. 4414 * 4415 * The element size and run length are encoded into immn (1 bit) 4416 * and imms (6 bits) as follows: 4417 * 64 bit elements: immn = 1, imms = <length of run - 1> 4418 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1> 4419 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1> 4420 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1> 4421 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1> 4422 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1> 4423 * Notice that immn = 0, imms = 11111x is the only combination 4424 * not covered by one of the above options; this is reserved. 4425 * Further, <length of run - 1> all-ones is a reserved pattern. 4426 * 4427 * In all cases the rotation is by immr % e (and immr is 6 bits). 4428 */ 4429 4430 /* First determine the element size */ 4431 len = 31 - clz32((immn << 6) | (~imms & 0x3f)); 4432 if (len < 1) { 4433 /* This is the immn == 0, imms == 0x11111x case */ 4434 return false; 4435 } 4436 e = 1 << len; 4437 4438 levels = e - 1; 4439 s = imms & levels; 4440 r = immr & levels; 4441 4442 if (s == levels) { 4443 /* <length of run - 1> mustn't be all-ones. */ 4444 return false; 4445 } 4446 4447 /* Create the value of one element: s+1 set bits rotated 4448 * by r within the element (which is e bits wide)... 4449 */ 4450 mask = bitmask64(s + 1); 4451 if (r) { 4452 mask = (mask >> r) | (mask << (e - r)); 4453 mask &= bitmask64(e); 4454 } 4455 /* ...then replicate the element over the whole 64 bit value */ 4456 mask = bitfield_replicate(mask, e); 4457 *result = mask; 4458 return true; 4459 } 4460 4461 /* Logical (immediate) 4462 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4463 * +----+-----+-------------+---+------+------+------+------+ 4464 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd | 4465 * +----+-----+-------------+---+------+------+------+------+ 4466 */ 4467 static void disas_logic_imm(DisasContext *s, uint32_t insn) 4468 { 4469 unsigned int sf, opc, is_n, immr, imms, rn, rd; 4470 TCGv_i64 tcg_rd, tcg_rn; 4471 uint64_t wmask; 4472 bool is_and = false; 4473 4474 sf = extract32(insn, 31, 1); 4475 opc = extract32(insn, 29, 2); 4476 is_n = extract32(insn, 22, 1); 4477 immr = extract32(insn, 16, 6); 4478 imms = extract32(insn, 10, 6); 4479 rn = extract32(insn, 5, 5); 4480 rd = extract32(insn, 0, 5); 4481 4482 if (!sf && is_n) { 4483 unallocated_encoding(s); 4484 return; 4485 } 4486 4487 if (opc == 0x3) { /* ANDS */ 4488 tcg_rd = cpu_reg(s, rd); 4489 } else { 4490 tcg_rd = cpu_reg_sp(s, rd); 4491 } 4492 tcg_rn = cpu_reg(s, rn); 4493 4494 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) { 4495 /* some immediate field values are reserved */ 4496 unallocated_encoding(s); 4497 return; 4498 } 4499 4500 if (!sf) { 4501 wmask &= 0xffffffff; 4502 } 4503 4504 switch (opc) { 4505 case 0x3: /* ANDS */ 4506 case 0x0: /* AND */ 4507 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask); 4508 is_and = true; 4509 break; 4510 case 0x1: /* ORR */ 4511 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask); 4512 break; 4513 case 0x2: /* EOR */ 4514 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask); 4515 break; 4516 default: 4517 assert(FALSE); /* must handle all above */ 4518 break; 4519 } 4520 4521 if (!sf && !is_and) { 4522 /* zero extend final result; we know we can skip this for AND 4523 * since the immediate had the high 32 bits clear. 4524 */ 4525 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4526 } 4527 4528 if (opc == 3) { /* ANDS */ 4529 gen_logic_CC(sf, tcg_rd); 4530 } 4531 } 4532 4533 /* 4534 * Move wide (immediate) 4535 * 4536 * 31 30 29 28 23 22 21 20 5 4 0 4537 * +--+-----+-------------+-----+----------------+------+ 4538 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd | 4539 * +--+-----+-------------+-----+----------------+------+ 4540 * 4541 * sf: 0 -> 32 bit, 1 -> 64 bit 4542 * opc: 00 -> N, 10 -> Z, 11 -> K 4543 * hw: shift/16 (0,16, and sf only 32, 48) 4544 */ 4545 static void disas_movw_imm(DisasContext *s, uint32_t insn) 4546 { 4547 int rd = extract32(insn, 0, 5); 4548 uint64_t imm = extract32(insn, 5, 16); 4549 int sf = extract32(insn, 31, 1); 4550 int opc = extract32(insn, 29, 2); 4551 int pos = extract32(insn, 21, 2) << 4; 4552 TCGv_i64 tcg_rd = cpu_reg(s, rd); 4553 4554 if (!sf && (pos >= 32)) { 4555 unallocated_encoding(s); 4556 return; 4557 } 4558 4559 switch (opc) { 4560 case 0: /* MOVN */ 4561 case 2: /* MOVZ */ 4562 imm <<= pos; 4563 if (opc == 0) { 4564 imm = ~imm; 4565 } 4566 if (!sf) { 4567 imm &= 0xffffffffu; 4568 } 4569 tcg_gen_movi_i64(tcg_rd, imm); 4570 break; 4571 case 3: /* MOVK */ 4572 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_constant_i64(imm), pos, 16); 4573 if (!sf) { 4574 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4575 } 4576 break; 4577 default: 4578 unallocated_encoding(s); 4579 break; 4580 } 4581 } 4582 4583 /* Bitfield 4584 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0 4585 * +----+-----+-------------+---+------+------+------+------+ 4586 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd | 4587 * +----+-----+-------------+---+------+------+------+------+ 4588 */ 4589 static void disas_bitfield(DisasContext *s, uint32_t insn) 4590 { 4591 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len; 4592 TCGv_i64 tcg_rd, tcg_tmp; 4593 4594 sf = extract32(insn, 31, 1); 4595 opc = extract32(insn, 29, 2); 4596 n = extract32(insn, 22, 1); 4597 ri = extract32(insn, 16, 6); 4598 si = extract32(insn, 10, 6); 4599 rn = extract32(insn, 5, 5); 4600 rd = extract32(insn, 0, 5); 4601 bitsize = sf ? 64 : 32; 4602 4603 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) { 4604 unallocated_encoding(s); 4605 return; 4606 } 4607 4608 tcg_rd = cpu_reg(s, rd); 4609 4610 /* Suppress the zero-extend for !sf. Since RI and SI are constrained 4611 to be smaller than bitsize, we'll never reference data outside the 4612 low 32-bits anyway. */ 4613 tcg_tmp = read_cpu_reg(s, rn, 1); 4614 4615 /* Recognize simple(r) extractions. */ 4616 if (si >= ri) { 4617 /* Wd<s-r:0> = Wn<s:r> */ 4618 len = (si - ri) + 1; 4619 if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */ 4620 tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len); 4621 goto done; 4622 } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */ 4623 tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len); 4624 return; 4625 } 4626 /* opc == 1, BFXIL fall through to deposit */ 4627 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri); 4628 pos = 0; 4629 } else { 4630 /* Handle the ri > si case with a deposit 4631 * Wd<32+s-r,32-r> = Wn<s:0> 4632 */ 4633 len = si + 1; 4634 pos = (bitsize - ri) & (bitsize - 1); 4635 } 4636 4637 if (opc == 0 && len < ri) { 4638 /* SBFM: sign extend the destination field from len to fill 4639 the balance of the word. Let the deposit below insert all 4640 of those sign bits. */ 4641 tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len); 4642 len = ri; 4643 } 4644 4645 if (opc == 1) { /* BFM, BFXIL */ 4646 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len); 4647 } else { 4648 /* SBFM or UBFM: We start with zero, and we haven't modified 4649 any bits outside bitsize, therefore the zero-extension 4650 below is unneeded. */ 4651 tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len); 4652 return; 4653 } 4654 4655 done: 4656 if (!sf) { /* zero extend final result */ 4657 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4658 } 4659 } 4660 4661 /* Extract 4662 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0 4663 * +----+------+-------------+---+----+------+--------+------+------+ 4664 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd | 4665 * +----+------+-------------+---+----+------+--------+------+------+ 4666 */ 4667 static void disas_extract(DisasContext *s, uint32_t insn) 4668 { 4669 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0; 4670 4671 sf = extract32(insn, 31, 1); 4672 n = extract32(insn, 22, 1); 4673 rm = extract32(insn, 16, 5); 4674 imm = extract32(insn, 10, 6); 4675 rn = extract32(insn, 5, 5); 4676 rd = extract32(insn, 0, 5); 4677 op21 = extract32(insn, 29, 2); 4678 op0 = extract32(insn, 21, 1); 4679 bitsize = sf ? 64 : 32; 4680 4681 if (sf != n || op21 || op0 || imm >= bitsize) { 4682 unallocated_encoding(s); 4683 } else { 4684 TCGv_i64 tcg_rd, tcg_rm, tcg_rn; 4685 4686 tcg_rd = cpu_reg(s, rd); 4687 4688 if (unlikely(imm == 0)) { 4689 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts, 4690 * so an extract from bit 0 is a special case. 4691 */ 4692 if (sf) { 4693 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm)); 4694 } else { 4695 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm)); 4696 } 4697 } else { 4698 tcg_rm = cpu_reg(s, rm); 4699 tcg_rn = cpu_reg(s, rn); 4700 4701 if (sf) { 4702 /* Specialization to ROR happens in EXTRACT2. */ 4703 tcg_gen_extract2_i64(tcg_rd, tcg_rm, tcg_rn, imm); 4704 } else { 4705 TCGv_i32 t0 = tcg_temp_new_i32(); 4706 4707 tcg_gen_extrl_i64_i32(t0, tcg_rm); 4708 if (rm == rn) { 4709 tcg_gen_rotri_i32(t0, t0, imm); 4710 } else { 4711 TCGv_i32 t1 = tcg_temp_new_i32(); 4712 tcg_gen_extrl_i64_i32(t1, tcg_rn); 4713 tcg_gen_extract2_i32(t0, t0, t1, imm); 4714 tcg_temp_free_i32(t1); 4715 } 4716 tcg_gen_extu_i32_i64(tcg_rd, t0); 4717 tcg_temp_free_i32(t0); 4718 } 4719 } 4720 } 4721 } 4722 4723 /* Data processing - immediate */ 4724 static void disas_data_proc_imm(DisasContext *s, uint32_t insn) 4725 { 4726 switch (extract32(insn, 23, 6)) { 4727 case 0x20: case 0x21: /* PC-rel. addressing */ 4728 disas_pc_rel_adr(s, insn); 4729 break; 4730 case 0x22: /* Add/subtract (immediate) */ 4731 disas_add_sub_imm(s, insn); 4732 break; 4733 case 0x23: /* Add/subtract (immediate, with tags) */ 4734 disas_add_sub_imm_with_tags(s, insn); 4735 break; 4736 case 0x24: /* Logical (immediate) */ 4737 disas_logic_imm(s, insn); 4738 break; 4739 case 0x25: /* Move wide (immediate) */ 4740 disas_movw_imm(s, insn); 4741 break; 4742 case 0x26: /* Bitfield */ 4743 disas_bitfield(s, insn); 4744 break; 4745 case 0x27: /* Extract */ 4746 disas_extract(s, insn); 4747 break; 4748 default: 4749 unallocated_encoding(s); 4750 break; 4751 } 4752 } 4753 4754 /* Shift a TCGv src by TCGv shift_amount, put result in dst. 4755 * Note that it is the caller's responsibility to ensure that the 4756 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM 4757 * mandated semantics for out of range shifts. 4758 */ 4759 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, 4760 enum a64_shift_type shift_type, TCGv_i64 shift_amount) 4761 { 4762 switch (shift_type) { 4763 case A64_SHIFT_TYPE_LSL: 4764 tcg_gen_shl_i64(dst, src, shift_amount); 4765 break; 4766 case A64_SHIFT_TYPE_LSR: 4767 tcg_gen_shr_i64(dst, src, shift_amount); 4768 break; 4769 case A64_SHIFT_TYPE_ASR: 4770 if (!sf) { 4771 tcg_gen_ext32s_i64(dst, src); 4772 } 4773 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount); 4774 break; 4775 case A64_SHIFT_TYPE_ROR: 4776 if (sf) { 4777 tcg_gen_rotr_i64(dst, src, shift_amount); 4778 } else { 4779 TCGv_i32 t0, t1; 4780 t0 = tcg_temp_new_i32(); 4781 t1 = tcg_temp_new_i32(); 4782 tcg_gen_extrl_i64_i32(t0, src); 4783 tcg_gen_extrl_i64_i32(t1, shift_amount); 4784 tcg_gen_rotr_i32(t0, t0, t1); 4785 tcg_gen_extu_i32_i64(dst, t0); 4786 tcg_temp_free_i32(t0); 4787 tcg_temp_free_i32(t1); 4788 } 4789 break; 4790 default: 4791 assert(FALSE); /* all shift types should be handled */ 4792 break; 4793 } 4794 4795 if (!sf) { /* zero extend final result */ 4796 tcg_gen_ext32u_i64(dst, dst); 4797 } 4798 } 4799 4800 /* Shift a TCGv src by immediate, put result in dst. 4801 * The shift amount must be in range (this should always be true as the 4802 * relevant instructions will UNDEF on bad shift immediates). 4803 */ 4804 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf, 4805 enum a64_shift_type shift_type, unsigned int shift_i) 4806 { 4807 assert(shift_i < (sf ? 64 : 32)); 4808 4809 if (shift_i == 0) { 4810 tcg_gen_mov_i64(dst, src); 4811 } else { 4812 shift_reg(dst, src, sf, shift_type, tcg_constant_i64(shift_i)); 4813 } 4814 } 4815 4816 /* Logical (shifted register) 4817 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4818 * +----+-----+-----------+-------+---+------+--------+------+------+ 4819 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd | 4820 * +----+-----+-----------+-------+---+------+--------+------+------+ 4821 */ 4822 static void disas_logic_reg(DisasContext *s, uint32_t insn) 4823 { 4824 TCGv_i64 tcg_rd, tcg_rn, tcg_rm; 4825 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd; 4826 4827 sf = extract32(insn, 31, 1); 4828 opc = extract32(insn, 29, 2); 4829 shift_type = extract32(insn, 22, 2); 4830 invert = extract32(insn, 21, 1); 4831 rm = extract32(insn, 16, 5); 4832 shift_amount = extract32(insn, 10, 6); 4833 rn = extract32(insn, 5, 5); 4834 rd = extract32(insn, 0, 5); 4835 4836 if (!sf && (shift_amount & (1 << 5))) { 4837 unallocated_encoding(s); 4838 return; 4839 } 4840 4841 tcg_rd = cpu_reg(s, rd); 4842 4843 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) { 4844 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for 4845 * register-register MOV and MVN, so it is worth special casing. 4846 */ 4847 tcg_rm = cpu_reg(s, rm); 4848 if (invert) { 4849 tcg_gen_not_i64(tcg_rd, tcg_rm); 4850 if (!sf) { 4851 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4852 } 4853 } else { 4854 if (sf) { 4855 tcg_gen_mov_i64(tcg_rd, tcg_rm); 4856 } else { 4857 tcg_gen_ext32u_i64(tcg_rd, tcg_rm); 4858 } 4859 } 4860 return; 4861 } 4862 4863 tcg_rm = read_cpu_reg(s, rm, sf); 4864 4865 if (shift_amount) { 4866 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount); 4867 } 4868 4869 tcg_rn = cpu_reg(s, rn); 4870 4871 switch (opc | (invert << 2)) { 4872 case 0: /* AND */ 4873 case 3: /* ANDS */ 4874 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm); 4875 break; 4876 case 1: /* ORR */ 4877 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm); 4878 break; 4879 case 2: /* EOR */ 4880 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm); 4881 break; 4882 case 4: /* BIC */ 4883 case 7: /* BICS */ 4884 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm); 4885 break; 4886 case 5: /* ORN */ 4887 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm); 4888 break; 4889 case 6: /* EON */ 4890 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm); 4891 break; 4892 default: 4893 assert(FALSE); 4894 break; 4895 } 4896 4897 if (!sf) { 4898 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 4899 } 4900 4901 if (opc == 3) { 4902 gen_logic_CC(sf, tcg_rd); 4903 } 4904 } 4905 4906 /* 4907 * Add/subtract (extended register) 4908 * 4909 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0| 4910 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4911 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd | 4912 * +--+--+--+-----------+-----+--+-------+------+------+----+----+ 4913 * 4914 * sf: 0 -> 32bit, 1 -> 64bit 4915 * op: 0 -> add , 1 -> sub 4916 * S: 1 -> set flags 4917 * opt: 00 4918 * option: extension type (see DecodeRegExtend) 4919 * imm3: optional shift to Rm 4920 * 4921 * Rd = Rn + LSL(extend(Rm), amount) 4922 */ 4923 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) 4924 { 4925 int rd = extract32(insn, 0, 5); 4926 int rn = extract32(insn, 5, 5); 4927 int imm3 = extract32(insn, 10, 3); 4928 int option = extract32(insn, 13, 3); 4929 int rm = extract32(insn, 16, 5); 4930 int opt = extract32(insn, 22, 2); 4931 bool setflags = extract32(insn, 29, 1); 4932 bool sub_op = extract32(insn, 30, 1); 4933 bool sf = extract32(insn, 31, 1); 4934 4935 TCGv_i64 tcg_rm, tcg_rn; /* temps */ 4936 TCGv_i64 tcg_rd; 4937 TCGv_i64 tcg_result; 4938 4939 if (imm3 > 4 || opt != 0) { 4940 unallocated_encoding(s); 4941 return; 4942 } 4943 4944 /* non-flag setting ops may use SP */ 4945 if (!setflags) { 4946 tcg_rd = cpu_reg_sp(s, rd); 4947 } else { 4948 tcg_rd = cpu_reg(s, rd); 4949 } 4950 tcg_rn = read_cpu_reg_sp(s, rn, sf); 4951 4952 tcg_rm = read_cpu_reg(s, rm, sf); 4953 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); 4954 4955 tcg_result = tcg_temp_new_i64(); 4956 4957 if (!setflags) { 4958 if (sub_op) { 4959 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 4960 } else { 4961 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 4962 } 4963 } else { 4964 if (sub_op) { 4965 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 4966 } else { 4967 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 4968 } 4969 } 4970 4971 if (sf) { 4972 tcg_gen_mov_i64(tcg_rd, tcg_result); 4973 } else { 4974 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 4975 } 4976 4977 tcg_temp_free_i64(tcg_result); 4978 } 4979 4980 /* 4981 * Add/subtract (shifted register) 4982 * 4983 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0 4984 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4985 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd | 4986 * +--+--+--+-----------+-----+--+-------+---------+------+------+ 4987 * 4988 * sf: 0 -> 32bit, 1 -> 64bit 4989 * op: 0 -> add , 1 -> sub 4990 * S: 1 -> set flags 4991 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED 4992 * imm6: Shift amount to apply to Rm before the add/sub 4993 */ 4994 static void disas_add_sub_reg(DisasContext *s, uint32_t insn) 4995 { 4996 int rd = extract32(insn, 0, 5); 4997 int rn = extract32(insn, 5, 5); 4998 int imm6 = extract32(insn, 10, 6); 4999 int rm = extract32(insn, 16, 5); 5000 int shift_type = extract32(insn, 22, 2); 5001 bool setflags = extract32(insn, 29, 1); 5002 bool sub_op = extract32(insn, 30, 1); 5003 bool sf = extract32(insn, 31, 1); 5004 5005 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5006 TCGv_i64 tcg_rn, tcg_rm; 5007 TCGv_i64 tcg_result; 5008 5009 if ((shift_type == 3) || (!sf && (imm6 > 31))) { 5010 unallocated_encoding(s); 5011 return; 5012 } 5013 5014 tcg_rn = read_cpu_reg(s, rn, sf); 5015 tcg_rm = read_cpu_reg(s, rm, sf); 5016 5017 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6); 5018 5019 tcg_result = tcg_temp_new_i64(); 5020 5021 if (!setflags) { 5022 if (sub_op) { 5023 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm); 5024 } else { 5025 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm); 5026 } 5027 } else { 5028 if (sub_op) { 5029 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm); 5030 } else { 5031 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm); 5032 } 5033 } 5034 5035 if (sf) { 5036 tcg_gen_mov_i64(tcg_rd, tcg_result); 5037 } else { 5038 tcg_gen_ext32u_i64(tcg_rd, tcg_result); 5039 } 5040 5041 tcg_temp_free_i64(tcg_result); 5042 } 5043 5044 /* Data-processing (3 source) 5045 * 5046 * 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0 5047 * +--+------+-----------+------+------+----+------+------+------+ 5048 * |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd | 5049 * +--+------+-----------+------+------+----+------+------+------+ 5050 */ 5051 static void disas_data_proc_3src(DisasContext *s, uint32_t insn) 5052 { 5053 int rd = extract32(insn, 0, 5); 5054 int rn = extract32(insn, 5, 5); 5055 int ra = extract32(insn, 10, 5); 5056 int rm = extract32(insn, 16, 5); 5057 int op_id = (extract32(insn, 29, 3) << 4) | 5058 (extract32(insn, 21, 3) << 1) | 5059 extract32(insn, 15, 1); 5060 bool sf = extract32(insn, 31, 1); 5061 bool is_sub = extract32(op_id, 0, 1); 5062 bool is_high = extract32(op_id, 2, 1); 5063 bool is_signed = false; 5064 TCGv_i64 tcg_op1; 5065 TCGv_i64 tcg_op2; 5066 TCGv_i64 tcg_tmp; 5067 5068 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */ 5069 switch (op_id) { 5070 case 0x42: /* SMADDL */ 5071 case 0x43: /* SMSUBL */ 5072 case 0x44: /* SMULH */ 5073 is_signed = true; 5074 break; 5075 case 0x0: /* MADD (32bit) */ 5076 case 0x1: /* MSUB (32bit) */ 5077 case 0x40: /* MADD (64bit) */ 5078 case 0x41: /* MSUB (64bit) */ 5079 case 0x4a: /* UMADDL */ 5080 case 0x4b: /* UMSUBL */ 5081 case 0x4c: /* UMULH */ 5082 break; 5083 default: 5084 unallocated_encoding(s); 5085 return; 5086 } 5087 5088 if (is_high) { 5089 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */ 5090 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5091 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5092 TCGv_i64 tcg_rm = cpu_reg(s, rm); 5093 5094 if (is_signed) { 5095 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5096 } else { 5097 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm); 5098 } 5099 5100 tcg_temp_free_i64(low_bits); 5101 return; 5102 } 5103 5104 tcg_op1 = tcg_temp_new_i64(); 5105 tcg_op2 = tcg_temp_new_i64(); 5106 tcg_tmp = tcg_temp_new_i64(); 5107 5108 if (op_id < 0x42) { 5109 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn)); 5110 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm)); 5111 } else { 5112 if (is_signed) { 5113 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn)); 5114 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm)); 5115 } else { 5116 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn)); 5117 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm)); 5118 } 5119 } 5120 5121 if (ra == 31 && !is_sub) { 5122 /* Special-case MADD with rA == XZR; it is the standard MUL alias */ 5123 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2); 5124 } else { 5125 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2); 5126 if (is_sub) { 5127 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5128 } else { 5129 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp); 5130 } 5131 } 5132 5133 if (!sf) { 5134 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd)); 5135 } 5136 5137 tcg_temp_free_i64(tcg_op1); 5138 tcg_temp_free_i64(tcg_op2); 5139 tcg_temp_free_i64(tcg_tmp); 5140 } 5141 5142 /* Add/subtract (with carry) 5143 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0 5144 * +--+--+--+------------------------+------+-------------+------+-----+ 5145 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | 0 0 0 0 0 0 | Rn | Rd | 5146 * +--+--+--+------------------------+------+-------------+------+-----+ 5147 */ 5148 5149 static void disas_adc_sbc(DisasContext *s, uint32_t insn) 5150 { 5151 unsigned int sf, op, setflags, rm, rn, rd; 5152 TCGv_i64 tcg_y, tcg_rn, tcg_rd; 5153 5154 sf = extract32(insn, 31, 1); 5155 op = extract32(insn, 30, 1); 5156 setflags = extract32(insn, 29, 1); 5157 rm = extract32(insn, 16, 5); 5158 rn = extract32(insn, 5, 5); 5159 rd = extract32(insn, 0, 5); 5160 5161 tcg_rd = cpu_reg(s, rd); 5162 tcg_rn = cpu_reg(s, rn); 5163 5164 if (op) { 5165 tcg_y = tcg_temp_new_i64(); 5166 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm)); 5167 } else { 5168 tcg_y = cpu_reg(s, rm); 5169 } 5170 5171 if (setflags) { 5172 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y); 5173 } else { 5174 gen_adc(sf, tcg_rd, tcg_rn, tcg_y); 5175 } 5176 } 5177 5178 /* 5179 * Rotate right into flags 5180 * 31 30 29 21 15 10 5 4 0 5181 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5182 * |sf|op| S| 1 1 0 1 0 0 0 0 | imm6 | 0 0 0 0 1 | Rn |o2| mask | 5183 * +--+--+--+-----------------+--------+-----------+------+--+------+ 5184 */ 5185 static void disas_rotate_right_into_flags(DisasContext *s, uint32_t insn) 5186 { 5187 int mask = extract32(insn, 0, 4); 5188 int o2 = extract32(insn, 4, 1); 5189 int rn = extract32(insn, 5, 5); 5190 int imm6 = extract32(insn, 15, 6); 5191 int sf_op_s = extract32(insn, 29, 3); 5192 TCGv_i64 tcg_rn; 5193 TCGv_i32 nzcv; 5194 5195 if (sf_op_s != 5 || o2 != 0 || !dc_isar_feature(aa64_condm_4, s)) { 5196 unallocated_encoding(s); 5197 return; 5198 } 5199 5200 tcg_rn = read_cpu_reg(s, rn, 1); 5201 tcg_gen_rotri_i64(tcg_rn, tcg_rn, imm6); 5202 5203 nzcv = tcg_temp_new_i32(); 5204 tcg_gen_extrl_i64_i32(nzcv, tcg_rn); 5205 5206 if (mask & 8) { /* N */ 5207 tcg_gen_shli_i32(cpu_NF, nzcv, 31 - 3); 5208 } 5209 if (mask & 4) { /* Z */ 5210 tcg_gen_not_i32(cpu_ZF, nzcv); 5211 tcg_gen_andi_i32(cpu_ZF, cpu_ZF, 4); 5212 } 5213 if (mask & 2) { /* C */ 5214 tcg_gen_extract_i32(cpu_CF, nzcv, 1, 1); 5215 } 5216 if (mask & 1) { /* V */ 5217 tcg_gen_shli_i32(cpu_VF, nzcv, 31 - 0); 5218 } 5219 5220 tcg_temp_free_i32(nzcv); 5221 } 5222 5223 /* 5224 * Evaluate into flags 5225 * 31 30 29 21 15 14 10 5 4 0 5226 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5227 * |sf|op| S| 1 1 0 1 0 0 0 0 | opcode2 | sz | 0 0 1 0 | Rn |o3| mask | 5228 * +--+--+--+-----------------+---------+----+---------+------+--+------+ 5229 */ 5230 static void disas_evaluate_into_flags(DisasContext *s, uint32_t insn) 5231 { 5232 int o3_mask = extract32(insn, 0, 5); 5233 int rn = extract32(insn, 5, 5); 5234 int o2 = extract32(insn, 15, 6); 5235 int sz = extract32(insn, 14, 1); 5236 int sf_op_s = extract32(insn, 29, 3); 5237 TCGv_i32 tmp; 5238 int shift; 5239 5240 if (sf_op_s != 1 || o2 != 0 || o3_mask != 0xd || 5241 !dc_isar_feature(aa64_condm_4, s)) { 5242 unallocated_encoding(s); 5243 return; 5244 } 5245 shift = sz ? 16 : 24; /* SETF16 or SETF8 */ 5246 5247 tmp = tcg_temp_new_i32(); 5248 tcg_gen_extrl_i64_i32(tmp, cpu_reg(s, rn)); 5249 tcg_gen_shli_i32(cpu_NF, tmp, shift); 5250 tcg_gen_shli_i32(cpu_VF, tmp, shift - 1); 5251 tcg_gen_mov_i32(cpu_ZF, cpu_NF); 5252 tcg_gen_xor_i32(cpu_VF, cpu_VF, cpu_NF); 5253 tcg_temp_free_i32(tmp); 5254 } 5255 5256 /* Conditional compare (immediate / register) 5257 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 5258 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5259 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv | 5260 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+ 5261 * [1] y [0] [0] 5262 */ 5263 static void disas_cc(DisasContext *s, uint32_t insn) 5264 { 5265 unsigned int sf, op, y, cond, rn, nzcv, is_imm; 5266 TCGv_i32 tcg_t0, tcg_t1, tcg_t2; 5267 TCGv_i64 tcg_tmp, tcg_y, tcg_rn; 5268 DisasCompare c; 5269 5270 if (!extract32(insn, 29, 1)) { 5271 unallocated_encoding(s); 5272 return; 5273 } 5274 if (insn & (1 << 10 | 1 << 4)) { 5275 unallocated_encoding(s); 5276 return; 5277 } 5278 sf = extract32(insn, 31, 1); 5279 op = extract32(insn, 30, 1); 5280 is_imm = extract32(insn, 11, 1); 5281 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */ 5282 cond = extract32(insn, 12, 4); 5283 rn = extract32(insn, 5, 5); 5284 nzcv = extract32(insn, 0, 4); 5285 5286 /* Set T0 = !COND. */ 5287 tcg_t0 = tcg_temp_new_i32(); 5288 arm_test_cc(&c, cond); 5289 tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0); 5290 5291 /* Load the arguments for the new comparison. */ 5292 if (is_imm) { 5293 tcg_y = tcg_temp_new_i64(); 5294 tcg_gen_movi_i64(tcg_y, y); 5295 } else { 5296 tcg_y = cpu_reg(s, y); 5297 } 5298 tcg_rn = cpu_reg(s, rn); 5299 5300 /* Set the flags for the new comparison. */ 5301 tcg_tmp = tcg_temp_new_i64(); 5302 if (op) { 5303 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5304 } else { 5305 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y); 5306 } 5307 tcg_temp_free_i64(tcg_tmp); 5308 5309 /* If COND was false, force the flags to #nzcv. Compute two masks 5310 * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0). 5311 * For tcg hosts that support ANDC, we can make do with just T1. 5312 * In either case, allow the tcg optimizer to delete any unused mask. 5313 */ 5314 tcg_t1 = tcg_temp_new_i32(); 5315 tcg_t2 = tcg_temp_new_i32(); 5316 tcg_gen_neg_i32(tcg_t1, tcg_t0); 5317 tcg_gen_subi_i32(tcg_t2, tcg_t0, 1); 5318 5319 if (nzcv & 8) { /* N */ 5320 tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1); 5321 } else { 5322 if (TCG_TARGET_HAS_andc_i32) { 5323 tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1); 5324 } else { 5325 tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2); 5326 } 5327 } 5328 if (nzcv & 4) { /* Z */ 5329 if (TCG_TARGET_HAS_andc_i32) { 5330 tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1); 5331 } else { 5332 tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2); 5333 } 5334 } else { 5335 tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0); 5336 } 5337 if (nzcv & 2) { /* C */ 5338 tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0); 5339 } else { 5340 if (TCG_TARGET_HAS_andc_i32) { 5341 tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1); 5342 } else { 5343 tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2); 5344 } 5345 } 5346 if (nzcv & 1) { /* V */ 5347 tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1); 5348 } else { 5349 if (TCG_TARGET_HAS_andc_i32) { 5350 tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1); 5351 } else { 5352 tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2); 5353 } 5354 } 5355 tcg_temp_free_i32(tcg_t0); 5356 tcg_temp_free_i32(tcg_t1); 5357 tcg_temp_free_i32(tcg_t2); 5358 } 5359 5360 /* Conditional select 5361 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0 5362 * +----+----+---+-----------------+------+------+-----+------+------+ 5363 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd | 5364 * +----+----+---+-----------------+------+------+-----+------+------+ 5365 */ 5366 static void disas_cond_select(DisasContext *s, uint32_t insn) 5367 { 5368 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd; 5369 TCGv_i64 tcg_rd, zero; 5370 DisasCompare64 c; 5371 5372 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) { 5373 /* S == 1 or op2<1> == 1 */ 5374 unallocated_encoding(s); 5375 return; 5376 } 5377 sf = extract32(insn, 31, 1); 5378 else_inv = extract32(insn, 30, 1); 5379 rm = extract32(insn, 16, 5); 5380 cond = extract32(insn, 12, 4); 5381 else_inc = extract32(insn, 10, 1); 5382 rn = extract32(insn, 5, 5); 5383 rd = extract32(insn, 0, 5); 5384 5385 tcg_rd = cpu_reg(s, rd); 5386 5387 a64_test_cc(&c, cond); 5388 zero = tcg_constant_i64(0); 5389 5390 if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) { 5391 /* CSET & CSETM. */ 5392 tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero); 5393 if (else_inv) { 5394 tcg_gen_neg_i64(tcg_rd, tcg_rd); 5395 } 5396 } else { 5397 TCGv_i64 t_true = cpu_reg(s, rn); 5398 TCGv_i64 t_false = read_cpu_reg(s, rm, 1); 5399 if (else_inv && else_inc) { 5400 tcg_gen_neg_i64(t_false, t_false); 5401 } else if (else_inv) { 5402 tcg_gen_not_i64(t_false, t_false); 5403 } else if (else_inc) { 5404 tcg_gen_addi_i64(t_false, t_false, 1); 5405 } 5406 tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false); 5407 } 5408 5409 if (!sf) { 5410 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5411 } 5412 } 5413 5414 static void handle_clz(DisasContext *s, unsigned int sf, 5415 unsigned int rn, unsigned int rd) 5416 { 5417 TCGv_i64 tcg_rd, tcg_rn; 5418 tcg_rd = cpu_reg(s, rd); 5419 tcg_rn = cpu_reg(s, rn); 5420 5421 if (sf) { 5422 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 5423 } else { 5424 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5425 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5426 tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32); 5427 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5428 tcg_temp_free_i32(tcg_tmp32); 5429 } 5430 } 5431 5432 static void handle_cls(DisasContext *s, unsigned int sf, 5433 unsigned int rn, unsigned int rd) 5434 { 5435 TCGv_i64 tcg_rd, tcg_rn; 5436 tcg_rd = cpu_reg(s, rd); 5437 tcg_rn = cpu_reg(s, rn); 5438 5439 if (sf) { 5440 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 5441 } else { 5442 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5443 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5444 tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32); 5445 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5446 tcg_temp_free_i32(tcg_tmp32); 5447 } 5448 } 5449 5450 static void handle_rbit(DisasContext *s, unsigned int sf, 5451 unsigned int rn, unsigned int rd) 5452 { 5453 TCGv_i64 tcg_rd, tcg_rn; 5454 tcg_rd = cpu_reg(s, rd); 5455 tcg_rn = cpu_reg(s, rn); 5456 5457 if (sf) { 5458 gen_helper_rbit64(tcg_rd, tcg_rn); 5459 } else { 5460 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); 5461 tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); 5462 gen_helper_rbit(tcg_tmp32, tcg_tmp32); 5463 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); 5464 tcg_temp_free_i32(tcg_tmp32); 5465 } 5466 } 5467 5468 /* REV with sf==1, opcode==3 ("REV64") */ 5469 static void handle_rev64(DisasContext *s, unsigned int sf, 5470 unsigned int rn, unsigned int rd) 5471 { 5472 if (!sf) { 5473 unallocated_encoding(s); 5474 return; 5475 } 5476 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn)); 5477 } 5478 5479 /* REV with sf==0, opcode==2 5480 * REV32 (sf==1, opcode==2) 5481 */ 5482 static void handle_rev32(DisasContext *s, unsigned int sf, 5483 unsigned int rn, unsigned int rd) 5484 { 5485 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5486 TCGv_i64 tcg_rn = cpu_reg(s, rn); 5487 5488 if (sf) { 5489 tcg_gen_bswap64_i64(tcg_rd, tcg_rn); 5490 tcg_gen_rotri_i64(tcg_rd, tcg_rd, 32); 5491 } else { 5492 tcg_gen_bswap32_i64(tcg_rd, tcg_rn, TCG_BSWAP_OZ); 5493 } 5494 } 5495 5496 /* REV16 (opcode==1) */ 5497 static void handle_rev16(DisasContext *s, unsigned int sf, 5498 unsigned int rn, unsigned int rd) 5499 { 5500 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5501 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 5502 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5503 TCGv_i64 mask = tcg_constant_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff); 5504 5505 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8); 5506 tcg_gen_and_i64(tcg_rd, tcg_rn, mask); 5507 tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask); 5508 tcg_gen_shli_i64(tcg_rd, tcg_rd, 8); 5509 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp); 5510 5511 tcg_temp_free_i64(tcg_tmp); 5512 } 5513 5514 /* Data-processing (1 source) 5515 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5516 * +----+---+---+-----------------+---------+--------+------+------+ 5517 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd | 5518 * +----+---+---+-----------------+---------+--------+------+------+ 5519 */ 5520 static void disas_data_proc_1src(DisasContext *s, uint32_t insn) 5521 { 5522 unsigned int sf, opcode, opcode2, rn, rd; 5523 TCGv_i64 tcg_rd; 5524 5525 if (extract32(insn, 29, 1)) { 5526 unallocated_encoding(s); 5527 return; 5528 } 5529 5530 sf = extract32(insn, 31, 1); 5531 opcode = extract32(insn, 10, 6); 5532 opcode2 = extract32(insn, 16, 5); 5533 rn = extract32(insn, 5, 5); 5534 rd = extract32(insn, 0, 5); 5535 5536 #define MAP(SF, O2, O1) ((SF) | (O1 << 1) | (O2 << 7)) 5537 5538 switch (MAP(sf, opcode2, opcode)) { 5539 case MAP(0, 0x00, 0x00): /* RBIT */ 5540 case MAP(1, 0x00, 0x00): 5541 handle_rbit(s, sf, rn, rd); 5542 break; 5543 case MAP(0, 0x00, 0x01): /* REV16 */ 5544 case MAP(1, 0x00, 0x01): 5545 handle_rev16(s, sf, rn, rd); 5546 break; 5547 case MAP(0, 0x00, 0x02): /* REV/REV32 */ 5548 case MAP(1, 0x00, 0x02): 5549 handle_rev32(s, sf, rn, rd); 5550 break; 5551 case MAP(1, 0x00, 0x03): /* REV64 */ 5552 handle_rev64(s, sf, rn, rd); 5553 break; 5554 case MAP(0, 0x00, 0x04): /* CLZ */ 5555 case MAP(1, 0x00, 0x04): 5556 handle_clz(s, sf, rn, rd); 5557 break; 5558 case MAP(0, 0x00, 0x05): /* CLS */ 5559 case MAP(1, 0x00, 0x05): 5560 handle_cls(s, sf, rn, rd); 5561 break; 5562 case MAP(1, 0x01, 0x00): /* PACIA */ 5563 if (s->pauth_active) { 5564 tcg_rd = cpu_reg(s, rd); 5565 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5566 } else if (!dc_isar_feature(aa64_pauth, s)) { 5567 goto do_unallocated; 5568 } 5569 break; 5570 case MAP(1, 0x01, 0x01): /* PACIB */ 5571 if (s->pauth_active) { 5572 tcg_rd = cpu_reg(s, rd); 5573 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5574 } else if (!dc_isar_feature(aa64_pauth, s)) { 5575 goto do_unallocated; 5576 } 5577 break; 5578 case MAP(1, 0x01, 0x02): /* PACDA */ 5579 if (s->pauth_active) { 5580 tcg_rd = cpu_reg(s, rd); 5581 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5582 } else if (!dc_isar_feature(aa64_pauth, s)) { 5583 goto do_unallocated; 5584 } 5585 break; 5586 case MAP(1, 0x01, 0x03): /* PACDB */ 5587 if (s->pauth_active) { 5588 tcg_rd = cpu_reg(s, rd); 5589 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5590 } else if (!dc_isar_feature(aa64_pauth, s)) { 5591 goto do_unallocated; 5592 } 5593 break; 5594 case MAP(1, 0x01, 0x04): /* AUTIA */ 5595 if (s->pauth_active) { 5596 tcg_rd = cpu_reg(s, rd); 5597 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5598 } else if (!dc_isar_feature(aa64_pauth, s)) { 5599 goto do_unallocated; 5600 } 5601 break; 5602 case MAP(1, 0x01, 0x05): /* AUTIB */ 5603 if (s->pauth_active) { 5604 tcg_rd = cpu_reg(s, rd); 5605 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5606 } else if (!dc_isar_feature(aa64_pauth, s)) { 5607 goto do_unallocated; 5608 } 5609 break; 5610 case MAP(1, 0x01, 0x06): /* AUTDA */ 5611 if (s->pauth_active) { 5612 tcg_rd = cpu_reg(s, rd); 5613 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5614 } else if (!dc_isar_feature(aa64_pauth, s)) { 5615 goto do_unallocated; 5616 } 5617 break; 5618 case MAP(1, 0x01, 0x07): /* AUTDB */ 5619 if (s->pauth_active) { 5620 tcg_rd = cpu_reg(s, rd); 5621 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, cpu_reg_sp(s, rn)); 5622 } else if (!dc_isar_feature(aa64_pauth, s)) { 5623 goto do_unallocated; 5624 } 5625 break; 5626 case MAP(1, 0x01, 0x08): /* PACIZA */ 5627 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5628 goto do_unallocated; 5629 } else if (s->pauth_active) { 5630 tcg_rd = cpu_reg(s, rd); 5631 gen_helper_pacia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5632 } 5633 break; 5634 case MAP(1, 0x01, 0x09): /* PACIZB */ 5635 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5636 goto do_unallocated; 5637 } else if (s->pauth_active) { 5638 tcg_rd = cpu_reg(s, rd); 5639 gen_helper_pacib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5640 } 5641 break; 5642 case MAP(1, 0x01, 0x0a): /* PACDZA */ 5643 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5644 goto do_unallocated; 5645 } else if (s->pauth_active) { 5646 tcg_rd = cpu_reg(s, rd); 5647 gen_helper_pacda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5648 } 5649 break; 5650 case MAP(1, 0x01, 0x0b): /* PACDZB */ 5651 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5652 goto do_unallocated; 5653 } else if (s->pauth_active) { 5654 tcg_rd = cpu_reg(s, rd); 5655 gen_helper_pacdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5656 } 5657 break; 5658 case MAP(1, 0x01, 0x0c): /* AUTIZA */ 5659 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5660 goto do_unallocated; 5661 } else if (s->pauth_active) { 5662 tcg_rd = cpu_reg(s, rd); 5663 gen_helper_autia(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5664 } 5665 break; 5666 case MAP(1, 0x01, 0x0d): /* AUTIZB */ 5667 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5668 goto do_unallocated; 5669 } else if (s->pauth_active) { 5670 tcg_rd = cpu_reg(s, rd); 5671 gen_helper_autib(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5672 } 5673 break; 5674 case MAP(1, 0x01, 0x0e): /* AUTDZA */ 5675 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5676 goto do_unallocated; 5677 } else if (s->pauth_active) { 5678 tcg_rd = cpu_reg(s, rd); 5679 gen_helper_autda(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5680 } 5681 break; 5682 case MAP(1, 0x01, 0x0f): /* AUTDZB */ 5683 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5684 goto do_unallocated; 5685 } else if (s->pauth_active) { 5686 tcg_rd = cpu_reg(s, rd); 5687 gen_helper_autdb(tcg_rd, cpu_env, tcg_rd, new_tmp_a64_zero(s)); 5688 } 5689 break; 5690 case MAP(1, 0x01, 0x10): /* XPACI */ 5691 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5692 goto do_unallocated; 5693 } else if (s->pauth_active) { 5694 tcg_rd = cpu_reg(s, rd); 5695 gen_helper_xpaci(tcg_rd, cpu_env, tcg_rd); 5696 } 5697 break; 5698 case MAP(1, 0x01, 0x11): /* XPACD */ 5699 if (!dc_isar_feature(aa64_pauth, s) || rn != 31) { 5700 goto do_unallocated; 5701 } else if (s->pauth_active) { 5702 tcg_rd = cpu_reg(s, rd); 5703 gen_helper_xpacd(tcg_rd, cpu_env, tcg_rd); 5704 } 5705 break; 5706 default: 5707 do_unallocated: 5708 unallocated_encoding(s); 5709 break; 5710 } 5711 5712 #undef MAP 5713 } 5714 5715 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf, 5716 unsigned int rm, unsigned int rn, unsigned int rd) 5717 { 5718 TCGv_i64 tcg_n, tcg_m, tcg_rd; 5719 tcg_rd = cpu_reg(s, rd); 5720 5721 if (!sf && is_signed) { 5722 tcg_n = tcg_temp_new_i64(); 5723 tcg_m = tcg_temp_new_i64(); 5724 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn)); 5725 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm)); 5726 } else { 5727 tcg_n = read_cpu_reg(s, rn, sf); 5728 tcg_m = read_cpu_reg(s, rm, sf); 5729 } 5730 5731 if (is_signed) { 5732 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m); 5733 } else { 5734 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m); 5735 } 5736 5737 if (!sf) { /* zero extend final result */ 5738 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 5739 } 5740 } 5741 5742 /* LSLV, LSRV, ASRV, RORV */ 5743 static void handle_shift_reg(DisasContext *s, 5744 enum a64_shift_type shift_type, unsigned int sf, 5745 unsigned int rm, unsigned int rn, unsigned int rd) 5746 { 5747 TCGv_i64 tcg_shift = tcg_temp_new_i64(); 5748 TCGv_i64 tcg_rd = cpu_reg(s, rd); 5749 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf); 5750 5751 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31); 5752 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift); 5753 tcg_temp_free_i64(tcg_shift); 5754 } 5755 5756 /* CRC32[BHWX], CRC32C[BHWX] */ 5757 static void handle_crc32(DisasContext *s, 5758 unsigned int sf, unsigned int sz, bool crc32c, 5759 unsigned int rm, unsigned int rn, unsigned int rd) 5760 { 5761 TCGv_i64 tcg_acc, tcg_val; 5762 TCGv_i32 tcg_bytes; 5763 5764 if (!dc_isar_feature(aa64_crc32, s) 5765 || (sf == 1 && sz != 3) 5766 || (sf == 0 && sz == 3)) { 5767 unallocated_encoding(s); 5768 return; 5769 } 5770 5771 if (sz == 3) { 5772 tcg_val = cpu_reg(s, rm); 5773 } else { 5774 uint64_t mask; 5775 switch (sz) { 5776 case 0: 5777 mask = 0xFF; 5778 break; 5779 case 1: 5780 mask = 0xFFFF; 5781 break; 5782 case 2: 5783 mask = 0xFFFFFFFF; 5784 break; 5785 default: 5786 g_assert_not_reached(); 5787 } 5788 tcg_val = tcg_temp_new_i64(); 5789 tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask); 5790 } 5791 5792 tcg_acc = cpu_reg(s, rn); 5793 tcg_bytes = tcg_constant_i32(1 << sz); 5794 5795 if (crc32c) { 5796 gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5797 } else { 5798 gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes); 5799 } 5800 } 5801 5802 /* Data-processing (2 source) 5803 * 31 30 29 28 21 20 16 15 10 9 5 4 0 5804 * +----+---+---+-----------------+------+--------+------+------+ 5805 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd | 5806 * +----+---+---+-----------------+------+--------+------+------+ 5807 */ 5808 static void disas_data_proc_2src(DisasContext *s, uint32_t insn) 5809 { 5810 unsigned int sf, rm, opcode, rn, rd, setflag; 5811 sf = extract32(insn, 31, 1); 5812 setflag = extract32(insn, 29, 1); 5813 rm = extract32(insn, 16, 5); 5814 opcode = extract32(insn, 10, 6); 5815 rn = extract32(insn, 5, 5); 5816 rd = extract32(insn, 0, 5); 5817 5818 if (setflag && opcode != 0) { 5819 unallocated_encoding(s); 5820 return; 5821 } 5822 5823 switch (opcode) { 5824 case 0: /* SUBP(S) */ 5825 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5826 goto do_unallocated; 5827 } else { 5828 TCGv_i64 tcg_n, tcg_m, tcg_d; 5829 5830 tcg_n = read_cpu_reg_sp(s, rn, true); 5831 tcg_m = read_cpu_reg_sp(s, rm, true); 5832 tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); 5833 tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); 5834 tcg_d = cpu_reg(s, rd); 5835 5836 if (setflag) { 5837 gen_sub_CC(true, tcg_d, tcg_n, tcg_m); 5838 } else { 5839 tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); 5840 } 5841 } 5842 break; 5843 case 2: /* UDIV */ 5844 handle_div(s, false, sf, rm, rn, rd); 5845 break; 5846 case 3: /* SDIV */ 5847 handle_div(s, true, sf, rm, rn, rd); 5848 break; 5849 case 4: /* IRG */ 5850 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5851 goto do_unallocated; 5852 } 5853 if (s->ata) { 5854 gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, 5855 cpu_reg_sp(s, rn), cpu_reg(s, rm)); 5856 } else { 5857 gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), 5858 cpu_reg_sp(s, rn)); 5859 } 5860 break; 5861 case 5: /* GMI */ 5862 if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { 5863 goto do_unallocated; 5864 } else { 5865 TCGv_i64 t = tcg_temp_new_i64(); 5866 5867 tcg_gen_extract_i64(t, cpu_reg_sp(s, rn), 56, 4); 5868 tcg_gen_shl_i64(t, tcg_constant_i64(1), t); 5869 tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t); 5870 5871 tcg_temp_free_i64(t); 5872 } 5873 break; 5874 case 8: /* LSLV */ 5875 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); 5876 break; 5877 case 9: /* LSRV */ 5878 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd); 5879 break; 5880 case 10: /* ASRV */ 5881 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd); 5882 break; 5883 case 11: /* RORV */ 5884 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd); 5885 break; 5886 case 12: /* PACGA */ 5887 if (sf == 0 || !dc_isar_feature(aa64_pauth, s)) { 5888 goto do_unallocated; 5889 } 5890 gen_helper_pacga(cpu_reg(s, rd), cpu_env, 5891 cpu_reg(s, rn), cpu_reg_sp(s, rm)); 5892 break; 5893 case 16: 5894 case 17: 5895 case 18: 5896 case 19: 5897 case 20: 5898 case 21: 5899 case 22: 5900 case 23: /* CRC32 */ 5901 { 5902 int sz = extract32(opcode, 0, 2); 5903 bool crc32c = extract32(opcode, 2, 1); 5904 handle_crc32(s, sf, sz, crc32c, rm, rn, rd); 5905 break; 5906 } 5907 default: 5908 do_unallocated: 5909 unallocated_encoding(s); 5910 break; 5911 } 5912 } 5913 5914 /* 5915 * Data processing - register 5916 * 31 30 29 28 25 21 20 16 10 0 5917 * +--+---+--+---+-------+-----+-------+-------+---------+ 5918 * | |op0| |op1| 1 0 1 | op2 | | op3 | | 5919 * +--+---+--+---+-------+-----+-------+-------+---------+ 5920 */ 5921 static void disas_data_proc_reg(DisasContext *s, uint32_t insn) 5922 { 5923 int op0 = extract32(insn, 30, 1); 5924 int op1 = extract32(insn, 28, 1); 5925 int op2 = extract32(insn, 21, 4); 5926 int op3 = extract32(insn, 10, 6); 5927 5928 if (!op1) { 5929 if (op2 & 8) { 5930 if (op2 & 1) { 5931 /* Add/sub (extended register) */ 5932 disas_add_sub_ext_reg(s, insn); 5933 } else { 5934 /* Add/sub (shifted register) */ 5935 disas_add_sub_reg(s, insn); 5936 } 5937 } else { 5938 /* Logical (shifted register) */ 5939 disas_logic_reg(s, insn); 5940 } 5941 return; 5942 } 5943 5944 switch (op2) { 5945 case 0x0: 5946 switch (op3) { 5947 case 0x00: /* Add/subtract (with carry) */ 5948 disas_adc_sbc(s, insn); 5949 break; 5950 5951 case 0x01: /* Rotate right into flags */ 5952 case 0x21: 5953 disas_rotate_right_into_flags(s, insn); 5954 break; 5955 5956 case 0x02: /* Evaluate into flags */ 5957 case 0x12: 5958 case 0x22: 5959 case 0x32: 5960 disas_evaluate_into_flags(s, insn); 5961 break; 5962 5963 default: 5964 goto do_unallocated; 5965 } 5966 break; 5967 5968 case 0x2: /* Conditional compare */ 5969 disas_cc(s, insn); /* both imm and reg forms */ 5970 break; 5971 5972 case 0x4: /* Conditional select */ 5973 disas_cond_select(s, insn); 5974 break; 5975 5976 case 0x6: /* Data-processing */ 5977 if (op0) { /* (1 source) */ 5978 disas_data_proc_1src(s, insn); 5979 } else { /* (2 source) */ 5980 disas_data_proc_2src(s, insn); 5981 } 5982 break; 5983 case 0x8 ... 0xf: /* (3 source) */ 5984 disas_data_proc_3src(s, insn); 5985 break; 5986 5987 default: 5988 do_unallocated: 5989 unallocated_encoding(s); 5990 break; 5991 } 5992 } 5993 5994 static void handle_fp_compare(DisasContext *s, int size, 5995 unsigned int rn, unsigned int rm, 5996 bool cmp_with_zero, bool signal_all_nans) 5997 { 5998 TCGv_i64 tcg_flags = tcg_temp_new_i64(); 5999 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 6000 6001 if (size == MO_64) { 6002 TCGv_i64 tcg_vn, tcg_vm; 6003 6004 tcg_vn = read_fp_dreg(s, rn); 6005 if (cmp_with_zero) { 6006 tcg_vm = tcg_constant_i64(0); 6007 } else { 6008 tcg_vm = read_fp_dreg(s, rm); 6009 } 6010 if (signal_all_nans) { 6011 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6012 } else { 6013 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6014 } 6015 tcg_temp_free_i64(tcg_vn); 6016 tcg_temp_free_i64(tcg_vm); 6017 } else { 6018 TCGv_i32 tcg_vn = tcg_temp_new_i32(); 6019 TCGv_i32 tcg_vm = tcg_temp_new_i32(); 6020 6021 read_vec_element_i32(s, tcg_vn, rn, 0, size); 6022 if (cmp_with_zero) { 6023 tcg_gen_movi_i32(tcg_vm, 0); 6024 } else { 6025 read_vec_element_i32(s, tcg_vm, rm, 0, size); 6026 } 6027 6028 switch (size) { 6029 case MO_32: 6030 if (signal_all_nans) { 6031 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6032 } else { 6033 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6034 } 6035 break; 6036 case MO_16: 6037 if (signal_all_nans) { 6038 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6039 } else { 6040 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst); 6041 } 6042 break; 6043 default: 6044 g_assert_not_reached(); 6045 } 6046 6047 tcg_temp_free_i32(tcg_vn); 6048 tcg_temp_free_i32(tcg_vm); 6049 } 6050 6051 tcg_temp_free_ptr(fpst); 6052 6053 gen_set_nzcv(tcg_flags); 6054 6055 tcg_temp_free_i64(tcg_flags); 6056 } 6057 6058 /* Floating point compare 6059 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0 6060 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 6061 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 | 6062 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+ 6063 */ 6064 static void disas_fp_compare(DisasContext *s, uint32_t insn) 6065 { 6066 unsigned int mos, type, rm, op, rn, opc, op2r; 6067 int size; 6068 6069 mos = extract32(insn, 29, 3); 6070 type = extract32(insn, 22, 2); 6071 rm = extract32(insn, 16, 5); 6072 op = extract32(insn, 14, 2); 6073 rn = extract32(insn, 5, 5); 6074 opc = extract32(insn, 3, 2); 6075 op2r = extract32(insn, 0, 3); 6076 6077 if (mos || op || op2r) { 6078 unallocated_encoding(s); 6079 return; 6080 } 6081 6082 switch (type) { 6083 case 0: 6084 size = MO_32; 6085 break; 6086 case 1: 6087 size = MO_64; 6088 break; 6089 case 3: 6090 size = MO_16; 6091 if (dc_isar_feature(aa64_fp16, s)) { 6092 break; 6093 } 6094 /* fallthru */ 6095 default: 6096 unallocated_encoding(s); 6097 return; 6098 } 6099 6100 if (!fp_access_check(s)) { 6101 return; 6102 } 6103 6104 handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2); 6105 } 6106 6107 /* Floating point conditional compare 6108 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0 6109 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6110 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv | 6111 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+ 6112 */ 6113 static void disas_fp_ccomp(DisasContext *s, uint32_t insn) 6114 { 6115 unsigned int mos, type, rm, cond, rn, op, nzcv; 6116 TCGLabel *label_continue = NULL; 6117 int size; 6118 6119 mos = extract32(insn, 29, 3); 6120 type = extract32(insn, 22, 2); 6121 rm = extract32(insn, 16, 5); 6122 cond = extract32(insn, 12, 4); 6123 rn = extract32(insn, 5, 5); 6124 op = extract32(insn, 4, 1); 6125 nzcv = extract32(insn, 0, 4); 6126 6127 if (mos) { 6128 unallocated_encoding(s); 6129 return; 6130 } 6131 6132 switch (type) { 6133 case 0: 6134 size = MO_32; 6135 break; 6136 case 1: 6137 size = MO_64; 6138 break; 6139 case 3: 6140 size = MO_16; 6141 if (dc_isar_feature(aa64_fp16, s)) { 6142 break; 6143 } 6144 /* fallthru */ 6145 default: 6146 unallocated_encoding(s); 6147 return; 6148 } 6149 6150 if (!fp_access_check(s)) { 6151 return; 6152 } 6153 6154 if (cond < 0x0e) { /* not always */ 6155 TCGLabel *label_match = gen_new_label(); 6156 label_continue = gen_new_label(); 6157 arm_gen_test_cc(cond, label_match); 6158 /* nomatch: */ 6159 gen_set_nzcv(tcg_constant_i64(nzcv << 28)); 6160 tcg_gen_br(label_continue); 6161 gen_set_label(label_match); 6162 } 6163 6164 handle_fp_compare(s, size, rn, rm, false, op); 6165 6166 if (cond < 0x0e) { 6167 gen_set_label(label_continue); 6168 } 6169 } 6170 6171 /* Floating point conditional select 6172 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6173 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6174 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | 6175 * +---+---+---+-----------+------+---+------+------+-----+------+------+ 6176 */ 6177 static void disas_fp_csel(DisasContext *s, uint32_t insn) 6178 { 6179 unsigned int mos, type, rm, cond, rn, rd; 6180 TCGv_i64 t_true, t_false; 6181 DisasCompare64 c; 6182 MemOp sz; 6183 6184 mos = extract32(insn, 29, 3); 6185 type = extract32(insn, 22, 2); 6186 rm = extract32(insn, 16, 5); 6187 cond = extract32(insn, 12, 4); 6188 rn = extract32(insn, 5, 5); 6189 rd = extract32(insn, 0, 5); 6190 6191 if (mos) { 6192 unallocated_encoding(s); 6193 return; 6194 } 6195 6196 switch (type) { 6197 case 0: 6198 sz = MO_32; 6199 break; 6200 case 1: 6201 sz = MO_64; 6202 break; 6203 case 3: 6204 sz = MO_16; 6205 if (dc_isar_feature(aa64_fp16, s)) { 6206 break; 6207 } 6208 /* fallthru */ 6209 default: 6210 unallocated_encoding(s); 6211 return; 6212 } 6213 6214 if (!fp_access_check(s)) { 6215 return; 6216 } 6217 6218 /* Zero extend sreg & hreg inputs to 64 bits now. */ 6219 t_true = tcg_temp_new_i64(); 6220 t_false = tcg_temp_new_i64(); 6221 read_vec_element(s, t_true, rn, 0, sz); 6222 read_vec_element(s, t_false, rm, 0, sz); 6223 6224 a64_test_cc(&c, cond); 6225 tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), 6226 t_true, t_false); 6227 tcg_temp_free_i64(t_false); 6228 6229 /* Note that sregs & hregs write back zeros to the high bits, 6230 and we've already done the zero-extension. */ 6231 write_fp_dreg(s, rd, t_true); 6232 tcg_temp_free_i64(t_true); 6233 } 6234 6235 /* Floating-point data-processing (1 source) - half precision */ 6236 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) 6237 { 6238 TCGv_ptr fpst = NULL; 6239 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 6240 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6241 6242 switch (opcode) { 6243 case 0x0: /* FMOV */ 6244 tcg_gen_mov_i32(tcg_res, tcg_op); 6245 break; 6246 case 0x1: /* FABS */ 6247 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 6248 break; 6249 case 0x2: /* FNEG */ 6250 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 6251 break; 6252 case 0x3: /* FSQRT */ 6253 fpst = fpstatus_ptr(FPST_FPCR_F16); 6254 gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); 6255 break; 6256 case 0x8: /* FRINTN */ 6257 case 0x9: /* FRINTP */ 6258 case 0xa: /* FRINTM */ 6259 case 0xb: /* FRINTZ */ 6260 case 0xc: /* FRINTA */ 6261 { 6262 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7)); 6263 fpst = fpstatus_ptr(FPST_FPCR_F16); 6264 6265 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6266 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6267 6268 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6269 tcg_temp_free_i32(tcg_rmode); 6270 break; 6271 } 6272 case 0xe: /* FRINTX */ 6273 fpst = fpstatus_ptr(FPST_FPCR_F16); 6274 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst); 6275 break; 6276 case 0xf: /* FRINTI */ 6277 fpst = fpstatus_ptr(FPST_FPCR_F16); 6278 gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); 6279 break; 6280 default: 6281 g_assert_not_reached(); 6282 } 6283 6284 write_fp_sreg(s, rd, tcg_res); 6285 6286 if (fpst) { 6287 tcg_temp_free_ptr(fpst); 6288 } 6289 tcg_temp_free_i32(tcg_op); 6290 tcg_temp_free_i32(tcg_res); 6291 } 6292 6293 /* Floating-point data-processing (1 source) - single precision */ 6294 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) 6295 { 6296 void (*gen_fpst)(TCGv_i32, TCGv_i32, TCGv_ptr); 6297 TCGv_i32 tcg_op, tcg_res; 6298 TCGv_ptr fpst; 6299 int rmode = -1; 6300 6301 tcg_op = read_fp_sreg(s, rn); 6302 tcg_res = tcg_temp_new_i32(); 6303 6304 switch (opcode) { 6305 case 0x0: /* FMOV */ 6306 tcg_gen_mov_i32(tcg_res, tcg_op); 6307 goto done; 6308 case 0x1: /* FABS */ 6309 gen_helper_vfp_abss(tcg_res, tcg_op); 6310 goto done; 6311 case 0x2: /* FNEG */ 6312 gen_helper_vfp_negs(tcg_res, tcg_op); 6313 goto done; 6314 case 0x3: /* FSQRT */ 6315 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 6316 goto done; 6317 case 0x6: /* BFCVT */ 6318 gen_fpst = gen_helper_bfcvt; 6319 break; 6320 case 0x8: /* FRINTN */ 6321 case 0x9: /* FRINTP */ 6322 case 0xa: /* FRINTM */ 6323 case 0xb: /* FRINTZ */ 6324 case 0xc: /* FRINTA */ 6325 rmode = arm_rmode_to_sf(opcode & 7); 6326 gen_fpst = gen_helper_rints; 6327 break; 6328 case 0xe: /* FRINTX */ 6329 gen_fpst = gen_helper_rints_exact; 6330 break; 6331 case 0xf: /* FRINTI */ 6332 gen_fpst = gen_helper_rints; 6333 break; 6334 case 0x10: /* FRINT32Z */ 6335 rmode = float_round_to_zero; 6336 gen_fpst = gen_helper_frint32_s; 6337 break; 6338 case 0x11: /* FRINT32X */ 6339 gen_fpst = gen_helper_frint32_s; 6340 break; 6341 case 0x12: /* FRINT64Z */ 6342 rmode = float_round_to_zero; 6343 gen_fpst = gen_helper_frint64_s; 6344 break; 6345 case 0x13: /* FRINT64X */ 6346 gen_fpst = gen_helper_frint64_s; 6347 break; 6348 default: 6349 g_assert_not_reached(); 6350 } 6351 6352 fpst = fpstatus_ptr(FPST_FPCR); 6353 if (rmode >= 0) { 6354 TCGv_i32 tcg_rmode = tcg_const_i32(rmode); 6355 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6356 gen_fpst(tcg_res, tcg_op, fpst); 6357 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6358 tcg_temp_free_i32(tcg_rmode); 6359 } else { 6360 gen_fpst(tcg_res, tcg_op, fpst); 6361 } 6362 tcg_temp_free_ptr(fpst); 6363 6364 done: 6365 write_fp_sreg(s, rd, tcg_res); 6366 tcg_temp_free_i32(tcg_op); 6367 tcg_temp_free_i32(tcg_res); 6368 } 6369 6370 /* Floating-point data-processing (1 source) - double precision */ 6371 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) 6372 { 6373 void (*gen_fpst)(TCGv_i64, TCGv_i64, TCGv_ptr); 6374 TCGv_i64 tcg_op, tcg_res; 6375 TCGv_ptr fpst; 6376 int rmode = -1; 6377 6378 switch (opcode) { 6379 case 0x0: /* FMOV */ 6380 gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); 6381 return; 6382 } 6383 6384 tcg_op = read_fp_dreg(s, rn); 6385 tcg_res = tcg_temp_new_i64(); 6386 6387 switch (opcode) { 6388 case 0x1: /* FABS */ 6389 gen_helper_vfp_absd(tcg_res, tcg_op); 6390 goto done; 6391 case 0x2: /* FNEG */ 6392 gen_helper_vfp_negd(tcg_res, tcg_op); 6393 goto done; 6394 case 0x3: /* FSQRT */ 6395 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env); 6396 goto done; 6397 case 0x8: /* FRINTN */ 6398 case 0x9: /* FRINTP */ 6399 case 0xa: /* FRINTM */ 6400 case 0xb: /* FRINTZ */ 6401 case 0xc: /* FRINTA */ 6402 rmode = arm_rmode_to_sf(opcode & 7); 6403 gen_fpst = gen_helper_rintd; 6404 break; 6405 case 0xe: /* FRINTX */ 6406 gen_fpst = gen_helper_rintd_exact; 6407 break; 6408 case 0xf: /* FRINTI */ 6409 gen_fpst = gen_helper_rintd; 6410 break; 6411 case 0x10: /* FRINT32Z */ 6412 rmode = float_round_to_zero; 6413 gen_fpst = gen_helper_frint32_d; 6414 break; 6415 case 0x11: /* FRINT32X */ 6416 gen_fpst = gen_helper_frint32_d; 6417 break; 6418 case 0x12: /* FRINT64Z */ 6419 rmode = float_round_to_zero; 6420 gen_fpst = gen_helper_frint64_d; 6421 break; 6422 case 0x13: /* FRINT64X */ 6423 gen_fpst = gen_helper_frint64_d; 6424 break; 6425 default: 6426 g_assert_not_reached(); 6427 } 6428 6429 fpst = fpstatus_ptr(FPST_FPCR); 6430 if (rmode >= 0) { 6431 TCGv_i32 tcg_rmode = tcg_const_i32(rmode); 6432 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6433 gen_fpst(tcg_res, tcg_op, fpst); 6434 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst); 6435 tcg_temp_free_i32(tcg_rmode); 6436 } else { 6437 gen_fpst(tcg_res, tcg_op, fpst); 6438 } 6439 tcg_temp_free_ptr(fpst); 6440 6441 done: 6442 write_fp_dreg(s, rd, tcg_res); 6443 tcg_temp_free_i64(tcg_op); 6444 tcg_temp_free_i64(tcg_res); 6445 } 6446 6447 static void handle_fp_fcvt(DisasContext *s, int opcode, 6448 int rd, int rn, int dtype, int ntype) 6449 { 6450 switch (ntype) { 6451 case 0x0: 6452 { 6453 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6454 if (dtype == 1) { 6455 /* Single to double */ 6456 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6457 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env); 6458 write_fp_dreg(s, rd, tcg_rd); 6459 tcg_temp_free_i64(tcg_rd); 6460 } else { 6461 /* Single to half */ 6462 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6463 TCGv_i32 ahp = get_ahp_flag(); 6464 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6465 6466 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6467 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6468 write_fp_sreg(s, rd, tcg_rd); 6469 tcg_temp_free_i32(tcg_rd); 6470 tcg_temp_free_i32(ahp); 6471 tcg_temp_free_ptr(fpst); 6472 } 6473 tcg_temp_free_i32(tcg_rn); 6474 break; 6475 } 6476 case 0x1: 6477 { 6478 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 6479 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6480 if (dtype == 0) { 6481 /* Double to single */ 6482 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env); 6483 } else { 6484 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6485 TCGv_i32 ahp = get_ahp_flag(); 6486 /* Double to half */ 6487 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp); 6488 /* write_fp_sreg is OK here because top half of tcg_rd is zero */ 6489 tcg_temp_free_ptr(fpst); 6490 tcg_temp_free_i32(ahp); 6491 } 6492 write_fp_sreg(s, rd, tcg_rd); 6493 tcg_temp_free_i32(tcg_rd); 6494 tcg_temp_free_i64(tcg_rn); 6495 break; 6496 } 6497 case 0x3: 6498 { 6499 TCGv_i32 tcg_rn = read_fp_sreg(s, rn); 6500 TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR); 6501 TCGv_i32 tcg_ahp = get_ahp_flag(); 6502 tcg_gen_ext16u_i32(tcg_rn, tcg_rn); 6503 if (dtype == 0) { 6504 /* Half to single */ 6505 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 6506 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6507 write_fp_sreg(s, rd, tcg_rd); 6508 tcg_temp_free_i32(tcg_rd); 6509 } else { 6510 /* Half to double */ 6511 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 6512 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp); 6513 write_fp_dreg(s, rd, tcg_rd); 6514 tcg_temp_free_i64(tcg_rd); 6515 } 6516 tcg_temp_free_i32(tcg_rn); 6517 tcg_temp_free_ptr(tcg_fpst); 6518 tcg_temp_free_i32(tcg_ahp); 6519 break; 6520 } 6521 default: 6522 g_assert_not_reached(); 6523 } 6524 } 6525 6526 /* Floating point data-processing (1 source) 6527 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0 6528 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6529 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd | 6530 * +---+---+---+-----------+------+---+--------+-----------+------+------+ 6531 */ 6532 static void disas_fp_1src(DisasContext *s, uint32_t insn) 6533 { 6534 int mos = extract32(insn, 29, 3); 6535 int type = extract32(insn, 22, 2); 6536 int opcode = extract32(insn, 15, 6); 6537 int rn = extract32(insn, 5, 5); 6538 int rd = extract32(insn, 0, 5); 6539 6540 if (mos) { 6541 goto do_unallocated; 6542 } 6543 6544 switch (opcode) { 6545 case 0x4: case 0x5: case 0x7: 6546 { 6547 /* FCVT between half, single and double precision */ 6548 int dtype = extract32(opcode, 0, 2); 6549 if (type == 2 || dtype == type) { 6550 goto do_unallocated; 6551 } 6552 if (!fp_access_check(s)) { 6553 return; 6554 } 6555 6556 handle_fp_fcvt(s, opcode, rd, rn, dtype, type); 6557 break; 6558 } 6559 6560 case 0x10 ... 0x13: /* FRINT{32,64}{X,Z} */ 6561 if (type > 1 || !dc_isar_feature(aa64_frint, s)) { 6562 goto do_unallocated; 6563 } 6564 /* fall through */ 6565 case 0x0 ... 0x3: 6566 case 0x8 ... 0xc: 6567 case 0xe ... 0xf: 6568 /* 32-to-32 and 64-to-64 ops */ 6569 switch (type) { 6570 case 0: 6571 if (!fp_access_check(s)) { 6572 return; 6573 } 6574 handle_fp_1src_single(s, opcode, rd, rn); 6575 break; 6576 case 1: 6577 if (!fp_access_check(s)) { 6578 return; 6579 } 6580 handle_fp_1src_double(s, opcode, rd, rn); 6581 break; 6582 case 3: 6583 if (!dc_isar_feature(aa64_fp16, s)) { 6584 goto do_unallocated; 6585 } 6586 6587 if (!fp_access_check(s)) { 6588 return; 6589 } 6590 handle_fp_1src_half(s, opcode, rd, rn); 6591 break; 6592 default: 6593 goto do_unallocated; 6594 } 6595 break; 6596 6597 case 0x6: 6598 switch (type) { 6599 case 1: /* BFCVT */ 6600 if (!dc_isar_feature(aa64_bf16, s)) { 6601 goto do_unallocated; 6602 } 6603 if (!fp_access_check(s)) { 6604 return; 6605 } 6606 handle_fp_1src_single(s, opcode, rd, rn); 6607 break; 6608 default: 6609 goto do_unallocated; 6610 } 6611 break; 6612 6613 default: 6614 do_unallocated: 6615 unallocated_encoding(s); 6616 break; 6617 } 6618 } 6619 6620 /* Floating-point data-processing (2 source) - single precision */ 6621 static void handle_fp_2src_single(DisasContext *s, int opcode, 6622 int rd, int rn, int rm) 6623 { 6624 TCGv_i32 tcg_op1; 6625 TCGv_i32 tcg_op2; 6626 TCGv_i32 tcg_res; 6627 TCGv_ptr fpst; 6628 6629 tcg_res = tcg_temp_new_i32(); 6630 fpst = fpstatus_ptr(FPST_FPCR); 6631 tcg_op1 = read_fp_sreg(s, rn); 6632 tcg_op2 = read_fp_sreg(s, rm); 6633 6634 switch (opcode) { 6635 case 0x0: /* FMUL */ 6636 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6637 break; 6638 case 0x1: /* FDIV */ 6639 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 6640 break; 6641 case 0x2: /* FADD */ 6642 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 6643 break; 6644 case 0x3: /* FSUB */ 6645 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 6646 break; 6647 case 0x4: /* FMAX */ 6648 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 6649 break; 6650 case 0x5: /* FMIN */ 6651 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 6652 break; 6653 case 0x6: /* FMAXNM */ 6654 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 6655 break; 6656 case 0x7: /* FMINNM */ 6657 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 6658 break; 6659 case 0x8: /* FNMUL */ 6660 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 6661 gen_helper_vfp_negs(tcg_res, tcg_res); 6662 break; 6663 } 6664 6665 write_fp_sreg(s, rd, tcg_res); 6666 6667 tcg_temp_free_ptr(fpst); 6668 tcg_temp_free_i32(tcg_op1); 6669 tcg_temp_free_i32(tcg_op2); 6670 tcg_temp_free_i32(tcg_res); 6671 } 6672 6673 /* Floating-point data-processing (2 source) - double precision */ 6674 static void handle_fp_2src_double(DisasContext *s, int opcode, 6675 int rd, int rn, int rm) 6676 { 6677 TCGv_i64 tcg_op1; 6678 TCGv_i64 tcg_op2; 6679 TCGv_i64 tcg_res; 6680 TCGv_ptr fpst; 6681 6682 tcg_res = tcg_temp_new_i64(); 6683 fpst = fpstatus_ptr(FPST_FPCR); 6684 tcg_op1 = read_fp_dreg(s, rn); 6685 tcg_op2 = read_fp_dreg(s, rm); 6686 6687 switch (opcode) { 6688 case 0x0: /* FMUL */ 6689 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6690 break; 6691 case 0x1: /* FDIV */ 6692 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 6693 break; 6694 case 0x2: /* FADD */ 6695 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 6696 break; 6697 case 0x3: /* FSUB */ 6698 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 6699 break; 6700 case 0x4: /* FMAX */ 6701 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 6702 break; 6703 case 0x5: /* FMIN */ 6704 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 6705 break; 6706 case 0x6: /* FMAXNM */ 6707 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6708 break; 6709 case 0x7: /* FMINNM */ 6710 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 6711 break; 6712 case 0x8: /* FNMUL */ 6713 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 6714 gen_helper_vfp_negd(tcg_res, tcg_res); 6715 break; 6716 } 6717 6718 write_fp_dreg(s, rd, tcg_res); 6719 6720 tcg_temp_free_ptr(fpst); 6721 tcg_temp_free_i64(tcg_op1); 6722 tcg_temp_free_i64(tcg_op2); 6723 tcg_temp_free_i64(tcg_res); 6724 } 6725 6726 /* Floating-point data-processing (2 source) - half precision */ 6727 static void handle_fp_2src_half(DisasContext *s, int opcode, 6728 int rd, int rn, int rm) 6729 { 6730 TCGv_i32 tcg_op1; 6731 TCGv_i32 tcg_op2; 6732 TCGv_i32 tcg_res; 6733 TCGv_ptr fpst; 6734 6735 tcg_res = tcg_temp_new_i32(); 6736 fpst = fpstatus_ptr(FPST_FPCR_F16); 6737 tcg_op1 = read_fp_hreg(s, rn); 6738 tcg_op2 = read_fp_hreg(s, rm); 6739 6740 switch (opcode) { 6741 case 0x0: /* FMUL */ 6742 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6743 break; 6744 case 0x1: /* FDIV */ 6745 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 6746 break; 6747 case 0x2: /* FADD */ 6748 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 6749 break; 6750 case 0x3: /* FSUB */ 6751 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 6752 break; 6753 case 0x4: /* FMAX */ 6754 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 6755 break; 6756 case 0x5: /* FMIN */ 6757 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 6758 break; 6759 case 0x6: /* FMAXNM */ 6760 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6761 break; 6762 case 0x7: /* FMINNM */ 6763 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 6764 break; 6765 case 0x8: /* FNMUL */ 6766 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 6767 tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); 6768 break; 6769 default: 6770 g_assert_not_reached(); 6771 } 6772 6773 write_fp_sreg(s, rd, tcg_res); 6774 6775 tcg_temp_free_ptr(fpst); 6776 tcg_temp_free_i32(tcg_op1); 6777 tcg_temp_free_i32(tcg_op2); 6778 tcg_temp_free_i32(tcg_res); 6779 } 6780 6781 /* Floating point data-processing (2 source) 6782 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 6783 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6784 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | 6785 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 6786 */ 6787 static void disas_fp_2src(DisasContext *s, uint32_t insn) 6788 { 6789 int mos = extract32(insn, 29, 3); 6790 int type = extract32(insn, 22, 2); 6791 int rd = extract32(insn, 0, 5); 6792 int rn = extract32(insn, 5, 5); 6793 int rm = extract32(insn, 16, 5); 6794 int opcode = extract32(insn, 12, 4); 6795 6796 if (opcode > 8 || mos) { 6797 unallocated_encoding(s); 6798 return; 6799 } 6800 6801 switch (type) { 6802 case 0: 6803 if (!fp_access_check(s)) { 6804 return; 6805 } 6806 handle_fp_2src_single(s, opcode, rd, rn, rm); 6807 break; 6808 case 1: 6809 if (!fp_access_check(s)) { 6810 return; 6811 } 6812 handle_fp_2src_double(s, opcode, rd, rn, rm); 6813 break; 6814 case 3: 6815 if (!dc_isar_feature(aa64_fp16, s)) { 6816 unallocated_encoding(s); 6817 return; 6818 } 6819 if (!fp_access_check(s)) { 6820 return; 6821 } 6822 handle_fp_2src_half(s, opcode, rd, rn, rm); 6823 break; 6824 default: 6825 unallocated_encoding(s); 6826 } 6827 } 6828 6829 /* Floating-point data-processing (3 source) - single precision */ 6830 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, 6831 int rd, int rn, int rm, int ra) 6832 { 6833 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6834 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6835 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6836 6837 tcg_op1 = read_fp_sreg(s, rn); 6838 tcg_op2 = read_fp_sreg(s, rm); 6839 tcg_op3 = read_fp_sreg(s, ra); 6840 6841 /* These are fused multiply-add, and must be done as one 6842 * floating point operation with no rounding between the 6843 * multiplication and addition steps. 6844 * NB that doing the negations here as separate steps is 6845 * correct : an input NaN should come out with its sign bit 6846 * flipped if it is a negated-input. 6847 */ 6848 if (o1 == true) { 6849 gen_helper_vfp_negs(tcg_op3, tcg_op3); 6850 } 6851 6852 if (o0 != o1) { 6853 gen_helper_vfp_negs(tcg_op1, tcg_op1); 6854 } 6855 6856 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6857 6858 write_fp_sreg(s, rd, tcg_res); 6859 6860 tcg_temp_free_ptr(fpst); 6861 tcg_temp_free_i32(tcg_op1); 6862 tcg_temp_free_i32(tcg_op2); 6863 tcg_temp_free_i32(tcg_op3); 6864 tcg_temp_free_i32(tcg_res); 6865 } 6866 6867 /* Floating-point data-processing (3 source) - double precision */ 6868 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, 6869 int rd, int rn, int rm, int ra) 6870 { 6871 TCGv_i64 tcg_op1, tcg_op2, tcg_op3; 6872 TCGv_i64 tcg_res = tcg_temp_new_i64(); 6873 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 6874 6875 tcg_op1 = read_fp_dreg(s, rn); 6876 tcg_op2 = read_fp_dreg(s, rm); 6877 tcg_op3 = read_fp_dreg(s, ra); 6878 6879 /* These are fused multiply-add, and must be done as one 6880 * floating point operation with no rounding between the 6881 * multiplication and addition steps. 6882 * NB that doing the negations here as separate steps is 6883 * correct : an input NaN should come out with its sign bit 6884 * flipped if it is a negated-input. 6885 */ 6886 if (o1 == true) { 6887 gen_helper_vfp_negd(tcg_op3, tcg_op3); 6888 } 6889 6890 if (o0 != o1) { 6891 gen_helper_vfp_negd(tcg_op1, tcg_op1); 6892 } 6893 6894 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6895 6896 write_fp_dreg(s, rd, tcg_res); 6897 6898 tcg_temp_free_ptr(fpst); 6899 tcg_temp_free_i64(tcg_op1); 6900 tcg_temp_free_i64(tcg_op2); 6901 tcg_temp_free_i64(tcg_op3); 6902 tcg_temp_free_i64(tcg_res); 6903 } 6904 6905 /* Floating-point data-processing (3 source) - half precision */ 6906 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, 6907 int rd, int rn, int rm, int ra) 6908 { 6909 TCGv_i32 tcg_op1, tcg_op2, tcg_op3; 6910 TCGv_i32 tcg_res = tcg_temp_new_i32(); 6911 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); 6912 6913 tcg_op1 = read_fp_hreg(s, rn); 6914 tcg_op2 = read_fp_hreg(s, rm); 6915 tcg_op3 = read_fp_hreg(s, ra); 6916 6917 /* These are fused multiply-add, and must be done as one 6918 * floating point operation with no rounding between the 6919 * multiplication and addition steps. 6920 * NB that doing the negations here as separate steps is 6921 * correct : an input NaN should come out with its sign bit 6922 * flipped if it is a negated-input. 6923 */ 6924 if (o1 == true) { 6925 tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); 6926 } 6927 6928 if (o0 != o1) { 6929 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 6930 } 6931 6932 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); 6933 6934 write_fp_sreg(s, rd, tcg_res); 6935 6936 tcg_temp_free_ptr(fpst); 6937 tcg_temp_free_i32(tcg_op1); 6938 tcg_temp_free_i32(tcg_op2); 6939 tcg_temp_free_i32(tcg_op3); 6940 tcg_temp_free_i32(tcg_res); 6941 } 6942 6943 /* Floating point data-processing (3 source) 6944 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 6945 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6946 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | 6947 * +---+---+---+-----------+------+----+------+----+------+------+------+ 6948 */ 6949 static void disas_fp_3src(DisasContext *s, uint32_t insn) 6950 { 6951 int mos = extract32(insn, 29, 3); 6952 int type = extract32(insn, 22, 2); 6953 int rd = extract32(insn, 0, 5); 6954 int rn = extract32(insn, 5, 5); 6955 int ra = extract32(insn, 10, 5); 6956 int rm = extract32(insn, 16, 5); 6957 bool o0 = extract32(insn, 15, 1); 6958 bool o1 = extract32(insn, 21, 1); 6959 6960 if (mos) { 6961 unallocated_encoding(s); 6962 return; 6963 } 6964 6965 switch (type) { 6966 case 0: 6967 if (!fp_access_check(s)) { 6968 return; 6969 } 6970 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); 6971 break; 6972 case 1: 6973 if (!fp_access_check(s)) { 6974 return; 6975 } 6976 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); 6977 break; 6978 case 3: 6979 if (!dc_isar_feature(aa64_fp16, s)) { 6980 unallocated_encoding(s); 6981 return; 6982 } 6983 if (!fp_access_check(s)) { 6984 return; 6985 } 6986 handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); 6987 break; 6988 default: 6989 unallocated_encoding(s); 6990 } 6991 } 6992 6993 /* Floating point immediate 6994 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 6995 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6996 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd | 6997 * +---+---+---+-----------+------+---+------------+-------+------+------+ 6998 */ 6999 static void disas_fp_imm(DisasContext *s, uint32_t insn) 7000 { 7001 int rd = extract32(insn, 0, 5); 7002 int imm5 = extract32(insn, 5, 5); 7003 int imm8 = extract32(insn, 13, 8); 7004 int type = extract32(insn, 22, 2); 7005 int mos = extract32(insn, 29, 3); 7006 uint64_t imm; 7007 MemOp sz; 7008 7009 if (mos || imm5) { 7010 unallocated_encoding(s); 7011 return; 7012 } 7013 7014 switch (type) { 7015 case 0: 7016 sz = MO_32; 7017 break; 7018 case 1: 7019 sz = MO_64; 7020 break; 7021 case 3: 7022 sz = MO_16; 7023 if (dc_isar_feature(aa64_fp16, s)) { 7024 break; 7025 } 7026 /* fallthru */ 7027 default: 7028 unallocated_encoding(s); 7029 return; 7030 } 7031 7032 if (!fp_access_check(s)) { 7033 return; 7034 } 7035 7036 imm = vfp_expand_imm(sz, imm8); 7037 write_fp_dreg(s, rd, tcg_constant_i64(imm)); 7038 } 7039 7040 /* Handle floating point <=> fixed point conversions. Note that we can 7041 * also deal with fp <=> integer conversions as a special case (scale == 64) 7042 * OPTME: consider handling that special case specially or at least skipping 7043 * the call to scalbn in the helpers for zero shifts. 7044 */ 7045 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode, 7046 bool itof, int rmode, int scale, int sf, int type) 7047 { 7048 bool is_signed = !(opcode & 1); 7049 TCGv_ptr tcg_fpstatus; 7050 TCGv_i32 tcg_shift, tcg_single; 7051 TCGv_i64 tcg_double; 7052 7053 tcg_fpstatus = fpstatus_ptr(type == 3 ? FPST_FPCR_F16 : FPST_FPCR); 7054 7055 tcg_shift = tcg_constant_i32(64 - scale); 7056 7057 if (itof) { 7058 TCGv_i64 tcg_int = cpu_reg(s, rn); 7059 if (!sf) { 7060 TCGv_i64 tcg_extend = tcg_temp_new_i64(); 7061 7062 if (is_signed) { 7063 tcg_gen_ext32s_i64(tcg_extend, tcg_int); 7064 } else { 7065 tcg_gen_ext32u_i64(tcg_extend, tcg_int); 7066 } 7067 7068 tcg_int = tcg_extend; 7069 } 7070 7071 switch (type) { 7072 case 1: /* float64 */ 7073 tcg_double = tcg_temp_new_i64(); 7074 if (is_signed) { 7075 gen_helper_vfp_sqtod(tcg_double, tcg_int, 7076 tcg_shift, tcg_fpstatus); 7077 } else { 7078 gen_helper_vfp_uqtod(tcg_double, tcg_int, 7079 tcg_shift, tcg_fpstatus); 7080 } 7081 write_fp_dreg(s, rd, tcg_double); 7082 tcg_temp_free_i64(tcg_double); 7083 break; 7084 7085 case 0: /* float32 */ 7086 tcg_single = tcg_temp_new_i32(); 7087 if (is_signed) { 7088 gen_helper_vfp_sqtos(tcg_single, tcg_int, 7089 tcg_shift, tcg_fpstatus); 7090 } else { 7091 gen_helper_vfp_uqtos(tcg_single, tcg_int, 7092 tcg_shift, tcg_fpstatus); 7093 } 7094 write_fp_sreg(s, rd, tcg_single); 7095 tcg_temp_free_i32(tcg_single); 7096 break; 7097 7098 case 3: /* float16 */ 7099 tcg_single = tcg_temp_new_i32(); 7100 if (is_signed) { 7101 gen_helper_vfp_sqtoh(tcg_single, tcg_int, 7102 tcg_shift, tcg_fpstatus); 7103 } else { 7104 gen_helper_vfp_uqtoh(tcg_single, tcg_int, 7105 tcg_shift, tcg_fpstatus); 7106 } 7107 write_fp_sreg(s, rd, tcg_single); 7108 tcg_temp_free_i32(tcg_single); 7109 break; 7110 7111 default: 7112 g_assert_not_reached(); 7113 } 7114 } else { 7115 TCGv_i64 tcg_int = cpu_reg(s, rd); 7116 TCGv_i32 tcg_rmode; 7117 7118 if (extract32(opcode, 2, 1)) { 7119 /* There are too many rounding modes to all fit into rmode, 7120 * so FCVTA[US] is a special case. 7121 */ 7122 rmode = FPROUNDING_TIEAWAY; 7123 } 7124 7125 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 7126 7127 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7128 7129 switch (type) { 7130 case 1: /* float64 */ 7131 tcg_double = read_fp_dreg(s, rn); 7132 if (is_signed) { 7133 if (!sf) { 7134 gen_helper_vfp_tosld(tcg_int, tcg_double, 7135 tcg_shift, tcg_fpstatus); 7136 } else { 7137 gen_helper_vfp_tosqd(tcg_int, tcg_double, 7138 tcg_shift, tcg_fpstatus); 7139 } 7140 } else { 7141 if (!sf) { 7142 gen_helper_vfp_tould(tcg_int, tcg_double, 7143 tcg_shift, tcg_fpstatus); 7144 } else { 7145 gen_helper_vfp_touqd(tcg_int, tcg_double, 7146 tcg_shift, tcg_fpstatus); 7147 } 7148 } 7149 if (!sf) { 7150 tcg_gen_ext32u_i64(tcg_int, tcg_int); 7151 } 7152 tcg_temp_free_i64(tcg_double); 7153 break; 7154 7155 case 0: /* float32 */ 7156 tcg_single = read_fp_sreg(s, rn); 7157 if (sf) { 7158 if (is_signed) { 7159 gen_helper_vfp_tosqs(tcg_int, tcg_single, 7160 tcg_shift, tcg_fpstatus); 7161 } else { 7162 gen_helper_vfp_touqs(tcg_int, tcg_single, 7163 tcg_shift, tcg_fpstatus); 7164 } 7165 } else { 7166 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7167 if (is_signed) { 7168 gen_helper_vfp_tosls(tcg_dest, tcg_single, 7169 tcg_shift, tcg_fpstatus); 7170 } else { 7171 gen_helper_vfp_touls(tcg_dest, tcg_single, 7172 tcg_shift, tcg_fpstatus); 7173 } 7174 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7175 tcg_temp_free_i32(tcg_dest); 7176 } 7177 tcg_temp_free_i32(tcg_single); 7178 break; 7179 7180 case 3: /* float16 */ 7181 tcg_single = read_fp_sreg(s, rn); 7182 if (sf) { 7183 if (is_signed) { 7184 gen_helper_vfp_tosqh(tcg_int, tcg_single, 7185 tcg_shift, tcg_fpstatus); 7186 } else { 7187 gen_helper_vfp_touqh(tcg_int, tcg_single, 7188 tcg_shift, tcg_fpstatus); 7189 } 7190 } else { 7191 TCGv_i32 tcg_dest = tcg_temp_new_i32(); 7192 if (is_signed) { 7193 gen_helper_vfp_toslh(tcg_dest, tcg_single, 7194 tcg_shift, tcg_fpstatus); 7195 } else { 7196 gen_helper_vfp_toulh(tcg_dest, tcg_single, 7197 tcg_shift, tcg_fpstatus); 7198 } 7199 tcg_gen_extu_i32_i64(tcg_int, tcg_dest); 7200 tcg_temp_free_i32(tcg_dest); 7201 } 7202 tcg_temp_free_i32(tcg_single); 7203 break; 7204 7205 default: 7206 g_assert_not_reached(); 7207 } 7208 7209 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 7210 tcg_temp_free_i32(tcg_rmode); 7211 } 7212 7213 tcg_temp_free_ptr(tcg_fpstatus); 7214 } 7215 7216 /* Floating point <-> fixed point conversions 7217 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7218 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7219 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd | 7220 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+ 7221 */ 7222 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn) 7223 { 7224 int rd = extract32(insn, 0, 5); 7225 int rn = extract32(insn, 5, 5); 7226 int scale = extract32(insn, 10, 6); 7227 int opcode = extract32(insn, 16, 3); 7228 int rmode = extract32(insn, 19, 2); 7229 int type = extract32(insn, 22, 2); 7230 bool sbit = extract32(insn, 29, 1); 7231 bool sf = extract32(insn, 31, 1); 7232 bool itof; 7233 7234 if (sbit || (!sf && scale < 32)) { 7235 unallocated_encoding(s); 7236 return; 7237 } 7238 7239 switch (type) { 7240 case 0: /* float32 */ 7241 case 1: /* float64 */ 7242 break; 7243 case 3: /* float16 */ 7244 if (dc_isar_feature(aa64_fp16, s)) { 7245 break; 7246 } 7247 /* fallthru */ 7248 default: 7249 unallocated_encoding(s); 7250 return; 7251 } 7252 7253 switch ((rmode << 3) | opcode) { 7254 case 0x2: /* SCVTF */ 7255 case 0x3: /* UCVTF */ 7256 itof = true; 7257 break; 7258 case 0x18: /* FCVTZS */ 7259 case 0x19: /* FCVTZU */ 7260 itof = false; 7261 break; 7262 default: 7263 unallocated_encoding(s); 7264 return; 7265 } 7266 7267 if (!fp_access_check(s)) { 7268 return; 7269 } 7270 7271 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type); 7272 } 7273 7274 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) 7275 { 7276 /* FMOV: gpr to or from float, double, or top half of quad fp reg, 7277 * without conversion. 7278 */ 7279 7280 if (itof) { 7281 TCGv_i64 tcg_rn = cpu_reg(s, rn); 7282 TCGv_i64 tmp; 7283 7284 switch (type) { 7285 case 0: 7286 /* 32 bit */ 7287 tmp = tcg_temp_new_i64(); 7288 tcg_gen_ext32u_i64(tmp, tcg_rn); 7289 write_fp_dreg(s, rd, tmp); 7290 tcg_temp_free_i64(tmp); 7291 break; 7292 case 1: 7293 /* 64 bit */ 7294 write_fp_dreg(s, rd, tcg_rn); 7295 break; 7296 case 2: 7297 /* 64 bit to top half. */ 7298 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); 7299 clear_vec_high(s, true, rd); 7300 break; 7301 case 3: 7302 /* 16 bit */ 7303 tmp = tcg_temp_new_i64(); 7304 tcg_gen_ext16u_i64(tmp, tcg_rn); 7305 write_fp_dreg(s, rd, tmp); 7306 tcg_temp_free_i64(tmp); 7307 break; 7308 default: 7309 g_assert_not_reached(); 7310 } 7311 } else { 7312 TCGv_i64 tcg_rd = cpu_reg(s, rd); 7313 7314 switch (type) { 7315 case 0: 7316 /* 32 bit */ 7317 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32)); 7318 break; 7319 case 1: 7320 /* 64 bit */ 7321 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64)); 7322 break; 7323 case 2: 7324 /* 64 bits from top half */ 7325 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn)); 7326 break; 7327 case 3: 7328 /* 16 bit */ 7329 tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16)); 7330 break; 7331 default: 7332 g_assert_not_reached(); 7333 } 7334 } 7335 } 7336 7337 static void handle_fjcvtzs(DisasContext *s, int rd, int rn) 7338 { 7339 TCGv_i64 t = read_fp_dreg(s, rn); 7340 TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR); 7341 7342 gen_helper_fjcvtzs(t, t, fpstatus); 7343 7344 tcg_temp_free_ptr(fpstatus); 7345 7346 tcg_gen_ext32u_i64(cpu_reg(s, rd), t); 7347 tcg_gen_extrh_i64_i32(cpu_ZF, t); 7348 tcg_gen_movi_i32(cpu_CF, 0); 7349 tcg_gen_movi_i32(cpu_NF, 0); 7350 tcg_gen_movi_i32(cpu_VF, 0); 7351 7352 tcg_temp_free_i64(t); 7353 } 7354 7355 /* Floating point <-> integer conversions 7356 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0 7357 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7358 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd | 7359 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+ 7360 */ 7361 static void disas_fp_int_conv(DisasContext *s, uint32_t insn) 7362 { 7363 int rd = extract32(insn, 0, 5); 7364 int rn = extract32(insn, 5, 5); 7365 int opcode = extract32(insn, 16, 3); 7366 int rmode = extract32(insn, 19, 2); 7367 int type = extract32(insn, 22, 2); 7368 bool sbit = extract32(insn, 29, 1); 7369 bool sf = extract32(insn, 31, 1); 7370 bool itof = false; 7371 7372 if (sbit) { 7373 goto do_unallocated; 7374 } 7375 7376 switch (opcode) { 7377 case 2: /* SCVTF */ 7378 case 3: /* UCVTF */ 7379 itof = true; 7380 /* fallthru */ 7381 case 4: /* FCVTAS */ 7382 case 5: /* FCVTAU */ 7383 if (rmode != 0) { 7384 goto do_unallocated; 7385 } 7386 /* fallthru */ 7387 case 0: /* FCVT[NPMZ]S */ 7388 case 1: /* FCVT[NPMZ]U */ 7389 switch (type) { 7390 case 0: /* float32 */ 7391 case 1: /* float64 */ 7392 break; 7393 case 3: /* float16 */ 7394 if (!dc_isar_feature(aa64_fp16, s)) { 7395 goto do_unallocated; 7396 } 7397 break; 7398 default: 7399 goto do_unallocated; 7400 } 7401 if (!fp_access_check(s)) { 7402 return; 7403 } 7404 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type); 7405 break; 7406 7407 default: 7408 switch (sf << 7 | type << 5 | rmode << 3 | opcode) { 7409 case 0b01100110: /* FMOV half <-> 32-bit int */ 7410 case 0b01100111: 7411 case 0b11100110: /* FMOV half <-> 64-bit int */ 7412 case 0b11100111: 7413 if (!dc_isar_feature(aa64_fp16, s)) { 7414 goto do_unallocated; 7415 } 7416 /* fallthru */ 7417 case 0b00000110: /* FMOV 32-bit */ 7418 case 0b00000111: 7419 case 0b10100110: /* FMOV 64-bit */ 7420 case 0b10100111: 7421 case 0b11001110: /* FMOV top half of 128-bit */ 7422 case 0b11001111: 7423 if (!fp_access_check(s)) { 7424 return; 7425 } 7426 itof = opcode & 1; 7427 handle_fmov(s, rd, rn, type, itof); 7428 break; 7429 7430 case 0b00111110: /* FJCVTZS */ 7431 if (!dc_isar_feature(aa64_jscvt, s)) { 7432 goto do_unallocated; 7433 } else if (fp_access_check(s)) { 7434 handle_fjcvtzs(s, rd, rn); 7435 } 7436 break; 7437 7438 default: 7439 do_unallocated: 7440 unallocated_encoding(s); 7441 return; 7442 } 7443 break; 7444 } 7445 } 7446 7447 /* FP-specific subcases of table C3-6 (SIMD and FP data processing) 7448 * 31 30 29 28 25 24 0 7449 * +---+---+---+---------+-----------------------------+ 7450 * | | 0 | | 1 1 1 1 | | 7451 * +---+---+---+---------+-----------------------------+ 7452 */ 7453 static void disas_data_proc_fp(DisasContext *s, uint32_t insn) 7454 { 7455 if (extract32(insn, 24, 1)) { 7456 /* Floating point data-processing (3 source) */ 7457 disas_fp_3src(s, insn); 7458 } else if (extract32(insn, 21, 1) == 0) { 7459 /* Floating point to fixed point conversions */ 7460 disas_fp_fixed_conv(s, insn); 7461 } else { 7462 switch (extract32(insn, 10, 2)) { 7463 case 1: 7464 /* Floating point conditional compare */ 7465 disas_fp_ccomp(s, insn); 7466 break; 7467 case 2: 7468 /* Floating point data-processing (2 source) */ 7469 disas_fp_2src(s, insn); 7470 break; 7471 case 3: 7472 /* Floating point conditional select */ 7473 disas_fp_csel(s, insn); 7474 break; 7475 case 0: 7476 switch (ctz32(extract32(insn, 12, 4))) { 7477 case 0: /* [15:12] == xxx1 */ 7478 /* Floating point immediate */ 7479 disas_fp_imm(s, insn); 7480 break; 7481 case 1: /* [15:12] == xx10 */ 7482 /* Floating point compare */ 7483 disas_fp_compare(s, insn); 7484 break; 7485 case 2: /* [15:12] == x100 */ 7486 /* Floating point data-processing (1 source) */ 7487 disas_fp_1src(s, insn); 7488 break; 7489 case 3: /* [15:12] == 1000 */ 7490 unallocated_encoding(s); 7491 break; 7492 default: /* [15:12] == 0000 */ 7493 /* Floating point <-> integer conversions */ 7494 disas_fp_int_conv(s, insn); 7495 break; 7496 } 7497 break; 7498 } 7499 } 7500 } 7501 7502 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right, 7503 int pos) 7504 { 7505 /* Extract 64 bits from the middle of two concatenated 64 bit 7506 * vector register slices left:right. The extracted bits start 7507 * at 'pos' bits into the right (least significant) side. 7508 * We return the result in tcg_right, and guarantee not to 7509 * trash tcg_left. 7510 */ 7511 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 7512 assert(pos > 0 && pos < 64); 7513 7514 tcg_gen_shri_i64(tcg_right, tcg_right, pos); 7515 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos); 7516 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp); 7517 7518 tcg_temp_free_i64(tcg_tmp); 7519 } 7520 7521 /* EXT 7522 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0 7523 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7524 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd | 7525 * +---+---+-------------+-----+---+------+---+------+---+------+------+ 7526 */ 7527 static void disas_simd_ext(DisasContext *s, uint32_t insn) 7528 { 7529 int is_q = extract32(insn, 30, 1); 7530 int op2 = extract32(insn, 22, 2); 7531 int imm4 = extract32(insn, 11, 4); 7532 int rm = extract32(insn, 16, 5); 7533 int rn = extract32(insn, 5, 5); 7534 int rd = extract32(insn, 0, 5); 7535 int pos = imm4 << 3; 7536 TCGv_i64 tcg_resl, tcg_resh; 7537 7538 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) { 7539 unallocated_encoding(s); 7540 return; 7541 } 7542 7543 if (!fp_access_check(s)) { 7544 return; 7545 } 7546 7547 tcg_resh = tcg_temp_new_i64(); 7548 tcg_resl = tcg_temp_new_i64(); 7549 7550 /* Vd gets bits starting at pos bits into Vm:Vn. This is 7551 * either extracting 128 bits from a 128:128 concatenation, or 7552 * extracting 64 bits from a 64:64 concatenation. 7553 */ 7554 if (!is_q) { 7555 read_vec_element(s, tcg_resl, rn, 0, MO_64); 7556 if (pos != 0) { 7557 read_vec_element(s, tcg_resh, rm, 0, MO_64); 7558 do_ext64(s, tcg_resh, tcg_resl, pos); 7559 } 7560 } else { 7561 TCGv_i64 tcg_hh; 7562 typedef struct { 7563 int reg; 7564 int elt; 7565 } EltPosns; 7566 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} }; 7567 EltPosns *elt = eltposns; 7568 7569 if (pos >= 64) { 7570 elt++; 7571 pos -= 64; 7572 } 7573 7574 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64); 7575 elt++; 7576 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64); 7577 elt++; 7578 if (pos != 0) { 7579 do_ext64(s, tcg_resh, tcg_resl, pos); 7580 tcg_hh = tcg_temp_new_i64(); 7581 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64); 7582 do_ext64(s, tcg_hh, tcg_resh, pos); 7583 tcg_temp_free_i64(tcg_hh); 7584 } 7585 } 7586 7587 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7588 tcg_temp_free_i64(tcg_resl); 7589 if (is_q) { 7590 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7591 } 7592 tcg_temp_free_i64(tcg_resh); 7593 clear_vec_high(s, is_q, rd); 7594 } 7595 7596 /* TBL/TBX 7597 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0 7598 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7599 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd | 7600 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+ 7601 */ 7602 static void disas_simd_tb(DisasContext *s, uint32_t insn) 7603 { 7604 int op2 = extract32(insn, 22, 2); 7605 int is_q = extract32(insn, 30, 1); 7606 int rm = extract32(insn, 16, 5); 7607 int rn = extract32(insn, 5, 5); 7608 int rd = extract32(insn, 0, 5); 7609 int is_tbx = extract32(insn, 12, 1); 7610 int len = (extract32(insn, 13, 2) + 1) * 16; 7611 7612 if (op2 != 0) { 7613 unallocated_encoding(s); 7614 return; 7615 } 7616 7617 if (!fp_access_check(s)) { 7618 return; 7619 } 7620 7621 tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, rd), 7622 vec_full_reg_offset(s, rm), cpu_env, 7623 is_q ? 16 : 8, vec_full_reg_size(s), 7624 (len << 6) | (is_tbx << 5) | rn, 7625 gen_helper_simd_tblx); 7626 } 7627 7628 /* ZIP/UZP/TRN 7629 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 7630 * +---+---+-------------+------+---+------+---+------------------+------+ 7631 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd | 7632 * +---+---+-------------+------+---+------+---+------------------+------+ 7633 */ 7634 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn) 7635 { 7636 int rd = extract32(insn, 0, 5); 7637 int rn = extract32(insn, 5, 5); 7638 int rm = extract32(insn, 16, 5); 7639 int size = extract32(insn, 22, 2); 7640 /* opc field bits [1:0] indicate ZIP/UZP/TRN; 7641 * bit 2 indicates 1 vs 2 variant of the insn. 7642 */ 7643 int opcode = extract32(insn, 12, 2); 7644 bool part = extract32(insn, 14, 1); 7645 bool is_q = extract32(insn, 30, 1); 7646 int esize = 8 << size; 7647 int i, ofs; 7648 int datasize = is_q ? 128 : 64; 7649 int elements = datasize / esize; 7650 TCGv_i64 tcg_res, tcg_resl, tcg_resh; 7651 7652 if (opcode == 0 || (size == 3 && !is_q)) { 7653 unallocated_encoding(s); 7654 return; 7655 } 7656 7657 if (!fp_access_check(s)) { 7658 return; 7659 } 7660 7661 tcg_resl = tcg_const_i64(0); 7662 tcg_resh = is_q ? tcg_const_i64(0) : NULL; 7663 tcg_res = tcg_temp_new_i64(); 7664 7665 for (i = 0; i < elements; i++) { 7666 switch (opcode) { 7667 case 1: /* UZP1/2 */ 7668 { 7669 int midpoint = elements / 2; 7670 if (i < midpoint) { 7671 read_vec_element(s, tcg_res, rn, 2 * i + part, size); 7672 } else { 7673 read_vec_element(s, tcg_res, rm, 7674 2 * (i - midpoint) + part, size); 7675 } 7676 break; 7677 } 7678 case 2: /* TRN1/2 */ 7679 if (i & 1) { 7680 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size); 7681 } else { 7682 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size); 7683 } 7684 break; 7685 case 3: /* ZIP1/2 */ 7686 { 7687 int base = part * elements / 2; 7688 if (i & 1) { 7689 read_vec_element(s, tcg_res, rm, base + (i >> 1), size); 7690 } else { 7691 read_vec_element(s, tcg_res, rn, base + (i >> 1), size); 7692 } 7693 break; 7694 } 7695 default: 7696 g_assert_not_reached(); 7697 } 7698 7699 ofs = i * esize; 7700 if (ofs < 64) { 7701 tcg_gen_shli_i64(tcg_res, tcg_res, ofs); 7702 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res); 7703 } else { 7704 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64); 7705 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res); 7706 } 7707 } 7708 7709 tcg_temp_free_i64(tcg_res); 7710 7711 write_vec_element(s, tcg_resl, rd, 0, MO_64); 7712 tcg_temp_free_i64(tcg_resl); 7713 7714 if (is_q) { 7715 write_vec_element(s, tcg_resh, rd, 1, MO_64); 7716 tcg_temp_free_i64(tcg_resh); 7717 } 7718 clear_vec_high(s, is_q, rd); 7719 } 7720 7721 /* 7722 * do_reduction_op helper 7723 * 7724 * This mirrors the Reduce() pseudocode in the ARM ARM. It is 7725 * important for correct NaN propagation that we do these 7726 * operations in exactly the order specified by the pseudocode. 7727 * 7728 * This is a recursive function, TCG temps should be freed by the 7729 * calling function once it is done with the values. 7730 */ 7731 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn, 7732 int esize, int size, int vmap, TCGv_ptr fpst) 7733 { 7734 if (esize == size) { 7735 int element; 7736 MemOp msize = esize == 16 ? MO_16 : MO_32; 7737 TCGv_i32 tcg_elem; 7738 7739 /* We should have one register left here */ 7740 assert(ctpop8(vmap) == 1); 7741 element = ctz32(vmap); 7742 assert(element < 8); 7743 7744 tcg_elem = tcg_temp_new_i32(); 7745 read_vec_element_i32(s, tcg_elem, rn, element, msize); 7746 return tcg_elem; 7747 } else { 7748 int bits = size / 2; 7749 int shift = ctpop8(vmap) / 2; 7750 int vmap_lo = (vmap >> shift) & vmap; 7751 int vmap_hi = (vmap & ~vmap_lo); 7752 TCGv_i32 tcg_hi, tcg_lo, tcg_res; 7753 7754 tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst); 7755 tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst); 7756 tcg_res = tcg_temp_new_i32(); 7757 7758 switch (fpopcode) { 7759 case 0x0c: /* fmaxnmv half-precision */ 7760 gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7761 break; 7762 case 0x0f: /* fmaxv half-precision */ 7763 gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst); 7764 break; 7765 case 0x1c: /* fminnmv half-precision */ 7766 gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst); 7767 break; 7768 case 0x1f: /* fminv half-precision */ 7769 gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst); 7770 break; 7771 case 0x2c: /* fmaxnmv */ 7772 gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst); 7773 break; 7774 case 0x2f: /* fmaxv */ 7775 gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst); 7776 break; 7777 case 0x3c: /* fminnmv */ 7778 gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst); 7779 break; 7780 case 0x3f: /* fminv */ 7781 gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst); 7782 break; 7783 default: 7784 g_assert_not_reached(); 7785 } 7786 7787 tcg_temp_free_i32(tcg_hi); 7788 tcg_temp_free_i32(tcg_lo); 7789 return tcg_res; 7790 } 7791 } 7792 7793 /* AdvSIMD across lanes 7794 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 7795 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7796 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 7797 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 7798 */ 7799 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) 7800 { 7801 int rd = extract32(insn, 0, 5); 7802 int rn = extract32(insn, 5, 5); 7803 int size = extract32(insn, 22, 2); 7804 int opcode = extract32(insn, 12, 5); 7805 bool is_q = extract32(insn, 30, 1); 7806 bool is_u = extract32(insn, 29, 1); 7807 bool is_fp = false; 7808 bool is_min = false; 7809 int esize; 7810 int elements; 7811 int i; 7812 TCGv_i64 tcg_res, tcg_elt; 7813 7814 switch (opcode) { 7815 case 0x1b: /* ADDV */ 7816 if (is_u) { 7817 unallocated_encoding(s); 7818 return; 7819 } 7820 /* fall through */ 7821 case 0x3: /* SADDLV, UADDLV */ 7822 case 0xa: /* SMAXV, UMAXV */ 7823 case 0x1a: /* SMINV, UMINV */ 7824 if (size == 3 || (size == 2 && !is_q)) { 7825 unallocated_encoding(s); 7826 return; 7827 } 7828 break; 7829 case 0xc: /* FMAXNMV, FMINNMV */ 7830 case 0xf: /* FMAXV, FMINV */ 7831 /* Bit 1 of size field encodes min vs max and the actual size 7832 * depends on the encoding of the U bit. If not set (and FP16 7833 * enabled) then we do half-precision float instead of single 7834 * precision. 7835 */ 7836 is_min = extract32(size, 1, 1); 7837 is_fp = true; 7838 if (!is_u && dc_isar_feature(aa64_fp16, s)) { 7839 size = 1; 7840 } else if (!is_u || !is_q || extract32(size, 0, 1)) { 7841 unallocated_encoding(s); 7842 return; 7843 } else { 7844 size = 2; 7845 } 7846 break; 7847 default: 7848 unallocated_encoding(s); 7849 return; 7850 } 7851 7852 if (!fp_access_check(s)) { 7853 return; 7854 } 7855 7856 esize = 8 << size; 7857 elements = (is_q ? 128 : 64) / esize; 7858 7859 tcg_res = tcg_temp_new_i64(); 7860 tcg_elt = tcg_temp_new_i64(); 7861 7862 /* These instructions operate across all lanes of a vector 7863 * to produce a single result. We can guarantee that a 64 7864 * bit intermediate is sufficient: 7865 * + for [US]ADDLV the maximum element size is 32 bits, and 7866 * the result type is 64 bits 7867 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the 7868 * same as the element size, which is 32 bits at most 7869 * For the integer operations we can choose to work at 64 7870 * or 32 bits and truncate at the end; for simplicity 7871 * we use 64 bits always. The floating point 7872 * ops do require 32 bit intermediates, though. 7873 */ 7874 if (!is_fp) { 7875 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN)); 7876 7877 for (i = 1; i < elements; i++) { 7878 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN)); 7879 7880 switch (opcode) { 7881 case 0x03: /* SADDLV / UADDLV */ 7882 case 0x1b: /* ADDV */ 7883 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); 7884 break; 7885 case 0x0a: /* SMAXV / UMAXV */ 7886 if (is_u) { 7887 tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); 7888 } else { 7889 tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); 7890 } 7891 break; 7892 case 0x1a: /* SMINV / UMINV */ 7893 if (is_u) { 7894 tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); 7895 } else { 7896 tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); 7897 } 7898 break; 7899 default: 7900 g_assert_not_reached(); 7901 } 7902 7903 } 7904 } else { 7905 /* Floating point vector reduction ops which work across 32 7906 * bit (single) or 16 bit (half-precision) intermediates. 7907 * Note that correct NaN propagation requires that we do these 7908 * operations in exactly the order specified by the pseudocode. 7909 */ 7910 TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 7911 int fpopcode = opcode | is_min << 4 | is_u << 5; 7912 int vmap = (1 << elements) - 1; 7913 TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize, 7914 (is_q ? 128 : 64), vmap, fpst); 7915 tcg_gen_extu_i32_i64(tcg_res, tcg_res32); 7916 tcg_temp_free_i32(tcg_res32); 7917 tcg_temp_free_ptr(fpst); 7918 } 7919 7920 tcg_temp_free_i64(tcg_elt); 7921 7922 /* Now truncate the result to the width required for the final output */ 7923 if (opcode == 0x03) { 7924 /* SADDLV, UADDLV: result is 2*esize */ 7925 size++; 7926 } 7927 7928 switch (size) { 7929 case 0: 7930 tcg_gen_ext8u_i64(tcg_res, tcg_res); 7931 break; 7932 case 1: 7933 tcg_gen_ext16u_i64(tcg_res, tcg_res); 7934 break; 7935 case 2: 7936 tcg_gen_ext32u_i64(tcg_res, tcg_res); 7937 break; 7938 case 3: 7939 break; 7940 default: 7941 g_assert_not_reached(); 7942 } 7943 7944 write_fp_dreg(s, rd, tcg_res); 7945 tcg_temp_free_i64(tcg_res); 7946 } 7947 7948 /* DUP (Element, Vector) 7949 * 7950 * 31 30 29 21 20 16 15 10 9 5 4 0 7951 * +---+---+-------------------+--------+-------------+------+------+ 7952 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7953 * +---+---+-------------------+--------+-------------+------+------+ 7954 * 7955 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 7956 */ 7957 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, 7958 int imm5) 7959 { 7960 int size = ctz32(imm5); 7961 int index; 7962 7963 if (size > 3 || (size == 3 && !is_q)) { 7964 unallocated_encoding(s); 7965 return; 7966 } 7967 7968 if (!fp_access_check(s)) { 7969 return; 7970 } 7971 7972 index = imm5 >> (size + 1); 7973 tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), 7974 vec_reg_offset(s, rn, index, size), 7975 is_q ? 16 : 8, vec_full_reg_size(s)); 7976 } 7977 7978 /* DUP (element, scalar) 7979 * 31 21 20 16 15 10 9 5 4 0 7980 * +-----------------------+--------+-------------+------+------+ 7981 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | 7982 * +-----------------------+--------+-------------+------+------+ 7983 */ 7984 static void handle_simd_dupes(DisasContext *s, int rd, int rn, 7985 int imm5) 7986 { 7987 int size = ctz32(imm5); 7988 int index; 7989 TCGv_i64 tmp; 7990 7991 if (size > 3) { 7992 unallocated_encoding(s); 7993 return; 7994 } 7995 7996 if (!fp_access_check(s)) { 7997 return; 7998 } 7999 8000 index = imm5 >> (size + 1); 8001 8002 /* This instruction just extracts the specified element and 8003 * zero-extends it into the bottom of the destination register. 8004 */ 8005 tmp = tcg_temp_new_i64(); 8006 read_vec_element(s, tmp, rn, index, size); 8007 write_fp_dreg(s, rd, tmp); 8008 tcg_temp_free_i64(tmp); 8009 } 8010 8011 /* DUP (General) 8012 * 8013 * 31 30 29 21 20 16 15 10 9 5 4 0 8014 * +---+---+-------------------+--------+-------------+------+------+ 8015 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | 8016 * +---+---+-------------------+--------+-------------+------+------+ 8017 * 8018 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8019 */ 8020 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, 8021 int imm5) 8022 { 8023 int size = ctz32(imm5); 8024 uint32_t dofs, oprsz, maxsz; 8025 8026 if (size > 3 || ((size == 3) && !is_q)) { 8027 unallocated_encoding(s); 8028 return; 8029 } 8030 8031 if (!fp_access_check(s)) { 8032 return; 8033 } 8034 8035 dofs = vec_full_reg_offset(s, rd); 8036 oprsz = is_q ? 16 : 8; 8037 maxsz = vec_full_reg_size(s); 8038 8039 tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); 8040 } 8041 8042 /* INS (Element) 8043 * 8044 * 31 21 20 16 15 14 11 10 9 5 4 0 8045 * +-----------------------+--------+------------+---+------+------+ 8046 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8047 * +-----------------------+--------+------------+---+------+------+ 8048 * 8049 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8050 * index: encoded in imm5<4:size+1> 8051 */ 8052 static void handle_simd_inse(DisasContext *s, int rd, int rn, 8053 int imm4, int imm5) 8054 { 8055 int size = ctz32(imm5); 8056 int src_index, dst_index; 8057 TCGv_i64 tmp; 8058 8059 if (size > 3) { 8060 unallocated_encoding(s); 8061 return; 8062 } 8063 8064 if (!fp_access_check(s)) { 8065 return; 8066 } 8067 8068 dst_index = extract32(imm5, 1+size, 5); 8069 src_index = extract32(imm4, size, 4); 8070 8071 tmp = tcg_temp_new_i64(); 8072 8073 read_vec_element(s, tmp, rn, src_index, size); 8074 write_vec_element(s, tmp, rd, dst_index, size); 8075 8076 tcg_temp_free_i64(tmp); 8077 8078 /* INS is considered a 128-bit write for SVE. */ 8079 clear_vec_high(s, true, rd); 8080 } 8081 8082 8083 /* INS (General) 8084 * 8085 * 31 21 20 16 15 10 9 5 4 0 8086 * +-----------------------+--------+-------------+------+------+ 8087 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | 8088 * +-----------------------+--------+-------------+------+------+ 8089 * 8090 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8091 * index: encoded in imm5<4:size+1> 8092 */ 8093 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) 8094 { 8095 int size = ctz32(imm5); 8096 int idx; 8097 8098 if (size > 3) { 8099 unallocated_encoding(s); 8100 return; 8101 } 8102 8103 if (!fp_access_check(s)) { 8104 return; 8105 } 8106 8107 idx = extract32(imm5, 1 + size, 4 - size); 8108 write_vec_element(s, cpu_reg(s, rn), rd, idx, size); 8109 8110 /* INS is considered a 128-bit write for SVE. */ 8111 clear_vec_high(s, true, rd); 8112 } 8113 8114 /* 8115 * UMOV (General) 8116 * SMOV (General) 8117 * 8118 * 31 30 29 21 20 16 15 12 10 9 5 4 0 8119 * +---+---+-------------------+--------+-------------+------+------+ 8120 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | 8121 * +---+---+-------------------+--------+-------------+------+------+ 8122 * 8123 * U: unsigned when set 8124 * size: encoded in imm5 (see ARM ARM LowestSetBit()) 8125 */ 8126 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, 8127 int rn, int rd, int imm5) 8128 { 8129 int size = ctz32(imm5); 8130 int element; 8131 TCGv_i64 tcg_rd; 8132 8133 /* Check for UnallocatedEncodings */ 8134 if (is_signed) { 8135 if (size > 2 || (size == 2 && !is_q)) { 8136 unallocated_encoding(s); 8137 return; 8138 } 8139 } else { 8140 if (size > 3 8141 || (size < 3 && is_q) 8142 || (size == 3 && !is_q)) { 8143 unallocated_encoding(s); 8144 return; 8145 } 8146 } 8147 8148 if (!fp_access_check(s)) { 8149 return; 8150 } 8151 8152 element = extract32(imm5, 1+size, 4); 8153 8154 tcg_rd = cpu_reg(s, rd); 8155 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); 8156 if (is_signed && !is_q) { 8157 tcg_gen_ext32u_i64(tcg_rd, tcg_rd); 8158 } 8159 } 8160 8161 /* AdvSIMD copy 8162 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8163 * +---+---+----+-----------------+------+---+------+---+------+------+ 8164 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8165 * +---+---+----+-----------------+------+---+------+---+------+------+ 8166 */ 8167 static void disas_simd_copy(DisasContext *s, uint32_t insn) 8168 { 8169 int rd = extract32(insn, 0, 5); 8170 int rn = extract32(insn, 5, 5); 8171 int imm4 = extract32(insn, 11, 4); 8172 int op = extract32(insn, 29, 1); 8173 int is_q = extract32(insn, 30, 1); 8174 int imm5 = extract32(insn, 16, 5); 8175 8176 if (op) { 8177 if (is_q) { 8178 /* INS (element) */ 8179 handle_simd_inse(s, rd, rn, imm4, imm5); 8180 } else { 8181 unallocated_encoding(s); 8182 } 8183 } else { 8184 switch (imm4) { 8185 case 0: 8186 /* DUP (element - vector) */ 8187 handle_simd_dupe(s, is_q, rd, rn, imm5); 8188 break; 8189 case 1: 8190 /* DUP (general) */ 8191 handle_simd_dupg(s, is_q, rd, rn, imm5); 8192 break; 8193 case 3: 8194 if (is_q) { 8195 /* INS (general) */ 8196 handle_simd_insg(s, rd, rn, imm5); 8197 } else { 8198 unallocated_encoding(s); 8199 } 8200 break; 8201 case 5: 8202 case 7: 8203 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ 8204 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); 8205 break; 8206 default: 8207 unallocated_encoding(s); 8208 break; 8209 } 8210 } 8211 } 8212 8213 /* AdvSIMD modified immediate 8214 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 8215 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8216 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd | 8217 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ 8218 * 8219 * There are a number of operations that can be carried out here: 8220 * MOVI - move (shifted) imm into register 8221 * MVNI - move inverted (shifted) imm into register 8222 * ORR - bitwise OR of (shifted) imm with register 8223 * BIC - bitwise clear of (shifted) imm with register 8224 * With ARMv8.2 we also have: 8225 * FMOV half-precision 8226 */ 8227 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) 8228 { 8229 int rd = extract32(insn, 0, 5); 8230 int cmode = extract32(insn, 12, 4); 8231 int o2 = extract32(insn, 11, 1); 8232 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5); 8233 bool is_neg = extract32(insn, 29, 1); 8234 bool is_q = extract32(insn, 30, 1); 8235 uint64_t imm = 0; 8236 8237 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { 8238 /* Check for FMOV (vector, immediate) - half-precision */ 8239 if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { 8240 unallocated_encoding(s); 8241 return; 8242 } 8243 } 8244 8245 if (!fp_access_check(s)) { 8246 return; 8247 } 8248 8249 if (cmode == 15 && o2 && !is_neg) { 8250 /* FMOV (vector, immediate) - half-precision */ 8251 imm = vfp_expand_imm(MO_16, abcdefgh); 8252 /* now duplicate across the lanes */ 8253 imm = dup_const(MO_16, imm); 8254 } else { 8255 imm = asimd_imm_const(abcdefgh, cmode, is_neg); 8256 } 8257 8258 if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { 8259 /* MOVI or MVNI, with MVNI negation handled above. */ 8260 tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, 8261 vec_full_reg_size(s), imm); 8262 } else { 8263 /* ORR or BIC, with BIC negation to AND handled above. */ 8264 if (is_neg) { 8265 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64); 8266 } else { 8267 gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64); 8268 } 8269 } 8270 } 8271 8272 /* AdvSIMD scalar copy 8273 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 8274 * +-----+----+-----------------+------+---+------+---+------+------+ 8275 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | 8276 * +-----+----+-----------------+------+---+------+---+------+------+ 8277 */ 8278 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) 8279 { 8280 int rd = extract32(insn, 0, 5); 8281 int rn = extract32(insn, 5, 5); 8282 int imm4 = extract32(insn, 11, 4); 8283 int imm5 = extract32(insn, 16, 5); 8284 int op = extract32(insn, 29, 1); 8285 8286 if (op != 0 || imm4 != 0) { 8287 unallocated_encoding(s); 8288 return; 8289 } 8290 8291 /* DUP (element, scalar) */ 8292 handle_simd_dupes(s, rd, rn, imm5); 8293 } 8294 8295 /* AdvSIMD scalar pairwise 8296 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 8297 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8298 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | 8299 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 8300 */ 8301 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) 8302 { 8303 int u = extract32(insn, 29, 1); 8304 int size = extract32(insn, 22, 2); 8305 int opcode = extract32(insn, 12, 5); 8306 int rn = extract32(insn, 5, 5); 8307 int rd = extract32(insn, 0, 5); 8308 TCGv_ptr fpst; 8309 8310 /* For some ops (the FP ones), size[1] is part of the encoding. 8311 * For ADDP strictly it is not but size[1] is always 1 for valid 8312 * encodings. 8313 */ 8314 opcode |= (extract32(size, 1, 1) << 5); 8315 8316 switch (opcode) { 8317 case 0x3b: /* ADDP */ 8318 if (u || size != 3) { 8319 unallocated_encoding(s); 8320 return; 8321 } 8322 if (!fp_access_check(s)) { 8323 return; 8324 } 8325 8326 fpst = NULL; 8327 break; 8328 case 0xc: /* FMAXNMP */ 8329 case 0xd: /* FADDP */ 8330 case 0xf: /* FMAXP */ 8331 case 0x2c: /* FMINNMP */ 8332 case 0x2f: /* FMINP */ 8333 /* FP op, size[0] is 32 or 64 bit*/ 8334 if (!u) { 8335 if (!dc_isar_feature(aa64_fp16, s)) { 8336 unallocated_encoding(s); 8337 return; 8338 } else { 8339 size = MO_16; 8340 } 8341 } else { 8342 size = extract32(size, 0, 1) ? MO_64 : MO_32; 8343 } 8344 8345 if (!fp_access_check(s)) { 8346 return; 8347 } 8348 8349 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8350 break; 8351 default: 8352 unallocated_encoding(s); 8353 return; 8354 } 8355 8356 if (size == MO_64) { 8357 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 8358 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 8359 TCGv_i64 tcg_res = tcg_temp_new_i64(); 8360 8361 read_vec_element(s, tcg_op1, rn, 0, MO_64); 8362 read_vec_element(s, tcg_op2, rn, 1, MO_64); 8363 8364 switch (opcode) { 8365 case 0x3b: /* ADDP */ 8366 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); 8367 break; 8368 case 0xc: /* FMAXNMP */ 8369 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8370 break; 8371 case 0xd: /* FADDP */ 8372 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 8373 break; 8374 case 0xf: /* FMAXP */ 8375 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 8376 break; 8377 case 0x2c: /* FMINNMP */ 8378 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 8379 break; 8380 case 0x2f: /* FMINP */ 8381 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 8382 break; 8383 default: 8384 g_assert_not_reached(); 8385 } 8386 8387 write_fp_dreg(s, rd, tcg_res); 8388 8389 tcg_temp_free_i64(tcg_op1); 8390 tcg_temp_free_i64(tcg_op2); 8391 tcg_temp_free_i64(tcg_res); 8392 } else { 8393 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 8394 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 8395 TCGv_i32 tcg_res = tcg_temp_new_i32(); 8396 8397 read_vec_element_i32(s, tcg_op1, rn, 0, size); 8398 read_vec_element_i32(s, tcg_op2, rn, 1, size); 8399 8400 if (size == MO_16) { 8401 switch (opcode) { 8402 case 0xc: /* FMAXNMP */ 8403 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8404 break; 8405 case 0xd: /* FADDP */ 8406 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 8407 break; 8408 case 0xf: /* FMAXP */ 8409 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 8410 break; 8411 case 0x2c: /* FMINNMP */ 8412 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 8413 break; 8414 case 0x2f: /* FMINP */ 8415 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 8416 break; 8417 default: 8418 g_assert_not_reached(); 8419 } 8420 } else { 8421 switch (opcode) { 8422 case 0xc: /* FMAXNMP */ 8423 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 8424 break; 8425 case 0xd: /* FADDP */ 8426 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 8427 break; 8428 case 0xf: /* FMAXP */ 8429 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 8430 break; 8431 case 0x2c: /* FMINNMP */ 8432 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 8433 break; 8434 case 0x2f: /* FMINP */ 8435 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 8436 break; 8437 default: 8438 g_assert_not_reached(); 8439 } 8440 } 8441 8442 write_fp_sreg(s, rd, tcg_res); 8443 8444 tcg_temp_free_i32(tcg_op1); 8445 tcg_temp_free_i32(tcg_op2); 8446 tcg_temp_free_i32(tcg_res); 8447 } 8448 8449 if (fpst) { 8450 tcg_temp_free_ptr(fpst); 8451 } 8452 } 8453 8454 /* 8455 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) 8456 * 8457 * This code is handles the common shifting code and is used by both 8458 * the vector and scalar code. 8459 */ 8460 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src, 8461 TCGv_i64 tcg_rnd, bool accumulate, 8462 bool is_u, int size, int shift) 8463 { 8464 bool extended_result = false; 8465 bool round = tcg_rnd != NULL; 8466 int ext_lshift = 0; 8467 TCGv_i64 tcg_src_hi; 8468 8469 if (round && size == 3) { 8470 extended_result = true; 8471 ext_lshift = 64 - shift; 8472 tcg_src_hi = tcg_temp_new_i64(); 8473 } else if (shift == 64) { 8474 if (!accumulate && is_u) { 8475 /* result is zero */ 8476 tcg_gen_movi_i64(tcg_res, 0); 8477 return; 8478 } 8479 } 8480 8481 /* Deal with the rounding step */ 8482 if (round) { 8483 if (extended_result) { 8484 TCGv_i64 tcg_zero = tcg_constant_i64(0); 8485 if (!is_u) { 8486 /* take care of sign extending tcg_res */ 8487 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63); 8488 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8489 tcg_src, tcg_src_hi, 8490 tcg_rnd, tcg_zero); 8491 } else { 8492 tcg_gen_add2_i64(tcg_src, tcg_src_hi, 8493 tcg_src, tcg_zero, 8494 tcg_rnd, tcg_zero); 8495 } 8496 } else { 8497 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd); 8498 } 8499 } 8500 8501 /* Now do the shift right */ 8502 if (round && extended_result) { 8503 /* extended case, >64 bit precision required */ 8504 if (ext_lshift == 0) { 8505 /* special case, only high bits matter */ 8506 tcg_gen_mov_i64(tcg_src, tcg_src_hi); 8507 } else { 8508 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8509 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift); 8510 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi); 8511 } 8512 } else { 8513 if (is_u) { 8514 if (shift == 64) { 8515 /* essentially shifting in 64 zeros */ 8516 tcg_gen_movi_i64(tcg_src, 0); 8517 } else { 8518 tcg_gen_shri_i64(tcg_src, tcg_src, shift); 8519 } 8520 } else { 8521 if (shift == 64) { 8522 /* effectively extending the sign-bit */ 8523 tcg_gen_sari_i64(tcg_src, tcg_src, 63); 8524 } else { 8525 tcg_gen_sari_i64(tcg_src, tcg_src, shift); 8526 } 8527 } 8528 } 8529 8530 if (accumulate) { 8531 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src); 8532 } else { 8533 tcg_gen_mov_i64(tcg_res, tcg_src); 8534 } 8535 8536 if (extended_result) { 8537 tcg_temp_free_i64(tcg_src_hi); 8538 } 8539 } 8540 8541 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ 8542 static void handle_scalar_simd_shri(DisasContext *s, 8543 bool is_u, int immh, int immb, 8544 int opcode, int rn, int rd) 8545 { 8546 const int size = 3; 8547 int immhb = immh << 3 | immb; 8548 int shift = 2 * (8 << size) - immhb; 8549 bool accumulate = false; 8550 bool round = false; 8551 bool insert = false; 8552 TCGv_i64 tcg_rn; 8553 TCGv_i64 tcg_rd; 8554 TCGv_i64 tcg_round; 8555 8556 if (!extract32(immh, 3, 1)) { 8557 unallocated_encoding(s); 8558 return; 8559 } 8560 8561 if (!fp_access_check(s)) { 8562 return; 8563 } 8564 8565 switch (opcode) { 8566 case 0x02: /* SSRA / USRA (accumulate) */ 8567 accumulate = true; 8568 break; 8569 case 0x04: /* SRSHR / URSHR (rounding) */ 8570 round = true; 8571 break; 8572 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 8573 accumulate = round = true; 8574 break; 8575 case 0x08: /* SRI */ 8576 insert = true; 8577 break; 8578 } 8579 8580 if (round) { 8581 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8582 } else { 8583 tcg_round = NULL; 8584 } 8585 8586 tcg_rn = read_fp_dreg(s, rn); 8587 tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8588 8589 if (insert) { 8590 /* shift count same as element size is valid but does nothing; 8591 * special case to avoid potential shift by 64. 8592 */ 8593 int esize = 8 << size; 8594 if (shift != esize) { 8595 tcg_gen_shri_i64(tcg_rn, tcg_rn, shift); 8596 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift); 8597 } 8598 } else { 8599 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8600 accumulate, is_u, size, shift); 8601 } 8602 8603 write_fp_dreg(s, rd, tcg_rd); 8604 8605 tcg_temp_free_i64(tcg_rn); 8606 tcg_temp_free_i64(tcg_rd); 8607 } 8608 8609 /* SHL/SLI - Scalar shift left */ 8610 static void handle_scalar_simd_shli(DisasContext *s, bool insert, 8611 int immh, int immb, int opcode, 8612 int rn, int rd) 8613 { 8614 int size = 32 - clz32(immh) - 1; 8615 int immhb = immh << 3 | immb; 8616 int shift = immhb - (8 << size); 8617 TCGv_i64 tcg_rn; 8618 TCGv_i64 tcg_rd; 8619 8620 if (!extract32(immh, 3, 1)) { 8621 unallocated_encoding(s); 8622 return; 8623 } 8624 8625 if (!fp_access_check(s)) { 8626 return; 8627 } 8628 8629 tcg_rn = read_fp_dreg(s, rn); 8630 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); 8631 8632 if (insert) { 8633 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift); 8634 } else { 8635 tcg_gen_shli_i64(tcg_rd, tcg_rn, shift); 8636 } 8637 8638 write_fp_dreg(s, rd, tcg_rd); 8639 8640 tcg_temp_free_i64(tcg_rn); 8641 tcg_temp_free_i64(tcg_rd); 8642 } 8643 8644 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with 8645 * (signed/unsigned) narrowing */ 8646 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, 8647 bool is_u_shift, bool is_u_narrow, 8648 int immh, int immb, int opcode, 8649 int rn, int rd) 8650 { 8651 int immhb = immh << 3 | immb; 8652 int size = 32 - clz32(immh) - 1; 8653 int esize = 8 << size; 8654 int shift = (2 * esize) - immhb; 8655 int elements = is_scalar ? 1 : (64 / esize); 8656 bool round = extract32(opcode, 0, 1); 8657 MemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); 8658 TCGv_i64 tcg_rn, tcg_rd, tcg_round; 8659 TCGv_i32 tcg_rd_narrowed; 8660 TCGv_i64 tcg_final; 8661 8662 static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { 8663 { gen_helper_neon_narrow_sat_s8, 8664 gen_helper_neon_unarrow_sat8 }, 8665 { gen_helper_neon_narrow_sat_s16, 8666 gen_helper_neon_unarrow_sat16 }, 8667 { gen_helper_neon_narrow_sat_s32, 8668 gen_helper_neon_unarrow_sat32 }, 8669 { NULL, NULL }, 8670 }; 8671 static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { 8672 gen_helper_neon_narrow_sat_u8, 8673 gen_helper_neon_narrow_sat_u16, 8674 gen_helper_neon_narrow_sat_u32, 8675 NULL 8676 }; 8677 NeonGenNarrowEnvFn *narrowfn; 8678 8679 int i; 8680 8681 assert(size < 4); 8682 8683 if (extract32(immh, 3, 1)) { 8684 unallocated_encoding(s); 8685 return; 8686 } 8687 8688 if (!fp_access_check(s)) { 8689 return; 8690 } 8691 8692 if (is_u_shift) { 8693 narrowfn = unsigned_narrow_fns[size]; 8694 } else { 8695 narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; 8696 } 8697 8698 tcg_rn = tcg_temp_new_i64(); 8699 tcg_rd = tcg_temp_new_i64(); 8700 tcg_rd_narrowed = tcg_temp_new_i32(); 8701 tcg_final = tcg_const_i64(0); 8702 8703 if (round) { 8704 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 8705 } else { 8706 tcg_round = NULL; 8707 } 8708 8709 for (i = 0; i < elements; i++) { 8710 read_vec_element(s, tcg_rn, rn, i, ldop); 8711 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 8712 false, is_u_shift, size+1, shift); 8713 narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); 8714 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); 8715 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 8716 } 8717 8718 if (!is_q) { 8719 write_vec_element(s, tcg_final, rd, 0, MO_64); 8720 } else { 8721 write_vec_element(s, tcg_final, rd, 1, MO_64); 8722 } 8723 8724 tcg_temp_free_i64(tcg_rn); 8725 tcg_temp_free_i64(tcg_rd); 8726 tcg_temp_free_i32(tcg_rd_narrowed); 8727 tcg_temp_free_i64(tcg_final); 8728 8729 clear_vec_high(s, is_q, rd); 8730 } 8731 8732 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */ 8733 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, 8734 bool src_unsigned, bool dst_unsigned, 8735 int immh, int immb, int rn, int rd) 8736 { 8737 int immhb = immh << 3 | immb; 8738 int size = 32 - clz32(immh) - 1; 8739 int shift = immhb - (8 << size); 8740 int pass; 8741 8742 assert(immh != 0); 8743 assert(!(scalar && is_q)); 8744 8745 if (!scalar) { 8746 if (!is_q && extract32(immh, 3, 1)) { 8747 unallocated_encoding(s); 8748 return; 8749 } 8750 8751 /* Since we use the variable-shift helpers we must 8752 * replicate the shift count into each element of 8753 * the tcg_shift value. 8754 */ 8755 switch (size) { 8756 case 0: 8757 shift |= shift << 8; 8758 /* fall through */ 8759 case 1: 8760 shift |= shift << 16; 8761 break; 8762 case 2: 8763 case 3: 8764 break; 8765 default: 8766 g_assert_not_reached(); 8767 } 8768 } 8769 8770 if (!fp_access_check(s)) { 8771 return; 8772 } 8773 8774 if (size == 3) { 8775 TCGv_i64 tcg_shift = tcg_constant_i64(shift); 8776 static NeonGenTwo64OpEnvFn * const fns[2][2] = { 8777 { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, 8778 { NULL, gen_helper_neon_qshl_u64 }, 8779 }; 8780 NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; 8781 int maxpass = is_q ? 2 : 1; 8782 8783 for (pass = 0; pass < maxpass; pass++) { 8784 TCGv_i64 tcg_op = tcg_temp_new_i64(); 8785 8786 read_vec_element(s, tcg_op, rn, pass, MO_64); 8787 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8788 write_vec_element(s, tcg_op, rd, pass, MO_64); 8789 8790 tcg_temp_free_i64(tcg_op); 8791 } 8792 clear_vec_high(s, is_q, rd); 8793 } else { 8794 TCGv_i32 tcg_shift = tcg_constant_i32(shift); 8795 static NeonGenTwoOpEnvFn * const fns[2][2][3] = { 8796 { 8797 { gen_helper_neon_qshl_s8, 8798 gen_helper_neon_qshl_s16, 8799 gen_helper_neon_qshl_s32 }, 8800 { gen_helper_neon_qshlu_s8, 8801 gen_helper_neon_qshlu_s16, 8802 gen_helper_neon_qshlu_s32 } 8803 }, { 8804 { NULL, NULL, NULL }, 8805 { gen_helper_neon_qshl_u8, 8806 gen_helper_neon_qshl_u16, 8807 gen_helper_neon_qshl_u32 } 8808 } 8809 }; 8810 NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; 8811 MemOp memop = scalar ? size : MO_32; 8812 int maxpass = scalar ? 1 : is_q ? 4 : 2; 8813 8814 for (pass = 0; pass < maxpass; pass++) { 8815 TCGv_i32 tcg_op = tcg_temp_new_i32(); 8816 8817 read_vec_element_i32(s, tcg_op, rn, pass, memop); 8818 genfn(tcg_op, cpu_env, tcg_op, tcg_shift); 8819 if (scalar) { 8820 switch (size) { 8821 case 0: 8822 tcg_gen_ext8u_i32(tcg_op, tcg_op); 8823 break; 8824 case 1: 8825 tcg_gen_ext16u_i32(tcg_op, tcg_op); 8826 break; 8827 case 2: 8828 break; 8829 default: 8830 g_assert_not_reached(); 8831 } 8832 write_fp_sreg(s, rd, tcg_op); 8833 } else { 8834 write_vec_element_i32(s, tcg_op, rd, pass, MO_32); 8835 } 8836 8837 tcg_temp_free_i32(tcg_op); 8838 } 8839 8840 if (!scalar) { 8841 clear_vec_high(s, is_q, rd); 8842 } 8843 } 8844 } 8845 8846 /* Common vector code for handling integer to FP conversion */ 8847 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, 8848 int elements, int is_signed, 8849 int fracbits, int size) 8850 { 8851 TCGv_ptr tcg_fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 8852 TCGv_i32 tcg_shift = NULL; 8853 8854 MemOp mop = size | (is_signed ? MO_SIGN : 0); 8855 int pass; 8856 8857 if (fracbits || size == MO_64) { 8858 tcg_shift = tcg_constant_i32(fracbits); 8859 } 8860 8861 if (size == MO_64) { 8862 TCGv_i64 tcg_int64 = tcg_temp_new_i64(); 8863 TCGv_i64 tcg_double = tcg_temp_new_i64(); 8864 8865 for (pass = 0; pass < elements; pass++) { 8866 read_vec_element(s, tcg_int64, rn, pass, mop); 8867 8868 if (is_signed) { 8869 gen_helper_vfp_sqtod(tcg_double, tcg_int64, 8870 tcg_shift, tcg_fpst); 8871 } else { 8872 gen_helper_vfp_uqtod(tcg_double, tcg_int64, 8873 tcg_shift, tcg_fpst); 8874 } 8875 if (elements == 1) { 8876 write_fp_dreg(s, rd, tcg_double); 8877 } else { 8878 write_vec_element(s, tcg_double, rd, pass, MO_64); 8879 } 8880 } 8881 8882 tcg_temp_free_i64(tcg_int64); 8883 tcg_temp_free_i64(tcg_double); 8884 8885 } else { 8886 TCGv_i32 tcg_int32 = tcg_temp_new_i32(); 8887 TCGv_i32 tcg_float = tcg_temp_new_i32(); 8888 8889 for (pass = 0; pass < elements; pass++) { 8890 read_vec_element_i32(s, tcg_int32, rn, pass, mop); 8891 8892 switch (size) { 8893 case MO_32: 8894 if (fracbits) { 8895 if (is_signed) { 8896 gen_helper_vfp_sltos(tcg_float, tcg_int32, 8897 tcg_shift, tcg_fpst); 8898 } else { 8899 gen_helper_vfp_ultos(tcg_float, tcg_int32, 8900 tcg_shift, tcg_fpst); 8901 } 8902 } else { 8903 if (is_signed) { 8904 gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst); 8905 } else { 8906 gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst); 8907 } 8908 } 8909 break; 8910 case MO_16: 8911 if (fracbits) { 8912 if (is_signed) { 8913 gen_helper_vfp_sltoh(tcg_float, tcg_int32, 8914 tcg_shift, tcg_fpst); 8915 } else { 8916 gen_helper_vfp_ultoh(tcg_float, tcg_int32, 8917 tcg_shift, tcg_fpst); 8918 } 8919 } else { 8920 if (is_signed) { 8921 gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst); 8922 } else { 8923 gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst); 8924 } 8925 } 8926 break; 8927 default: 8928 g_assert_not_reached(); 8929 } 8930 8931 if (elements == 1) { 8932 write_fp_sreg(s, rd, tcg_float); 8933 } else { 8934 write_vec_element_i32(s, tcg_float, rd, pass, size); 8935 } 8936 } 8937 8938 tcg_temp_free_i32(tcg_int32); 8939 tcg_temp_free_i32(tcg_float); 8940 } 8941 8942 tcg_temp_free_ptr(tcg_fpst); 8943 8944 clear_vec_high(s, elements << size == 16, rd); 8945 } 8946 8947 /* UCVTF/SCVTF - Integer to FP conversion */ 8948 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, 8949 bool is_q, bool is_u, 8950 int immh, int immb, int opcode, 8951 int rn, int rd) 8952 { 8953 int size, elements, fracbits; 8954 int immhb = immh << 3 | immb; 8955 8956 if (immh & 8) { 8957 size = MO_64; 8958 if (!is_scalar && !is_q) { 8959 unallocated_encoding(s); 8960 return; 8961 } 8962 } else if (immh & 4) { 8963 size = MO_32; 8964 } else if (immh & 2) { 8965 size = MO_16; 8966 if (!dc_isar_feature(aa64_fp16, s)) { 8967 unallocated_encoding(s); 8968 return; 8969 } 8970 } else { 8971 /* immh == 0 would be a failure of the decode logic */ 8972 g_assert(immh == 1); 8973 unallocated_encoding(s); 8974 return; 8975 } 8976 8977 if (is_scalar) { 8978 elements = 1; 8979 } else { 8980 elements = (8 << is_q) >> size; 8981 } 8982 fracbits = (16 << size) - immhb; 8983 8984 if (!fp_access_check(s)) { 8985 return; 8986 } 8987 8988 handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); 8989 } 8990 8991 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ 8992 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, 8993 bool is_q, bool is_u, 8994 int immh, int immb, int rn, int rd) 8995 { 8996 int immhb = immh << 3 | immb; 8997 int pass, size, fracbits; 8998 TCGv_ptr tcg_fpstatus; 8999 TCGv_i32 tcg_rmode, tcg_shift; 9000 9001 if (immh & 0x8) { 9002 size = MO_64; 9003 if (!is_scalar && !is_q) { 9004 unallocated_encoding(s); 9005 return; 9006 } 9007 } else if (immh & 0x4) { 9008 size = MO_32; 9009 } else if (immh & 0x2) { 9010 size = MO_16; 9011 if (!dc_isar_feature(aa64_fp16, s)) { 9012 unallocated_encoding(s); 9013 return; 9014 } 9015 } else { 9016 /* Should have split out AdvSIMD modified immediate earlier. */ 9017 assert(immh == 1); 9018 unallocated_encoding(s); 9019 return; 9020 } 9021 9022 if (!fp_access_check(s)) { 9023 return; 9024 } 9025 9026 assert(!(is_scalar && is_q)); 9027 9028 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); 9029 tcg_fpstatus = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 9030 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 9031 fracbits = (16 << size) - immhb; 9032 tcg_shift = tcg_constant_i32(fracbits); 9033 9034 if (size == MO_64) { 9035 int maxpass = is_scalar ? 1 : 2; 9036 9037 for (pass = 0; pass < maxpass; pass++) { 9038 TCGv_i64 tcg_op = tcg_temp_new_i64(); 9039 9040 read_vec_element(s, tcg_op, rn, pass, MO_64); 9041 if (is_u) { 9042 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9043 } else { 9044 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9045 } 9046 write_vec_element(s, tcg_op, rd, pass, MO_64); 9047 tcg_temp_free_i64(tcg_op); 9048 } 9049 clear_vec_high(s, is_q, rd); 9050 } else { 9051 void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); 9052 int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); 9053 9054 switch (size) { 9055 case MO_16: 9056 if (is_u) { 9057 fn = gen_helper_vfp_touhh; 9058 } else { 9059 fn = gen_helper_vfp_toshh; 9060 } 9061 break; 9062 case MO_32: 9063 if (is_u) { 9064 fn = gen_helper_vfp_touls; 9065 } else { 9066 fn = gen_helper_vfp_tosls; 9067 } 9068 break; 9069 default: 9070 g_assert_not_reached(); 9071 } 9072 9073 for (pass = 0; pass < maxpass; pass++) { 9074 TCGv_i32 tcg_op = tcg_temp_new_i32(); 9075 9076 read_vec_element_i32(s, tcg_op, rn, pass, size); 9077 fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); 9078 if (is_scalar) { 9079 write_fp_sreg(s, rd, tcg_op); 9080 } else { 9081 write_vec_element_i32(s, tcg_op, rd, pass, size); 9082 } 9083 tcg_temp_free_i32(tcg_op); 9084 } 9085 if (!is_scalar) { 9086 clear_vec_high(s, is_q, rd); 9087 } 9088 } 9089 9090 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 9091 tcg_temp_free_ptr(tcg_fpstatus); 9092 tcg_temp_free_i32(tcg_rmode); 9093 } 9094 9095 /* AdvSIMD scalar shift by immediate 9096 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 9097 * +-----+---+-------------+------+------+--------+---+------+------+ 9098 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 9099 * +-----+---+-------------+------+------+--------+---+------+------+ 9100 * 9101 * This is the scalar version so it works on a fixed sized registers 9102 */ 9103 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) 9104 { 9105 int rd = extract32(insn, 0, 5); 9106 int rn = extract32(insn, 5, 5); 9107 int opcode = extract32(insn, 11, 5); 9108 int immb = extract32(insn, 16, 3); 9109 int immh = extract32(insn, 19, 4); 9110 bool is_u = extract32(insn, 29, 1); 9111 9112 if (immh == 0) { 9113 unallocated_encoding(s); 9114 return; 9115 } 9116 9117 switch (opcode) { 9118 case 0x08: /* SRI */ 9119 if (!is_u) { 9120 unallocated_encoding(s); 9121 return; 9122 } 9123 /* fall through */ 9124 case 0x00: /* SSHR / USHR */ 9125 case 0x02: /* SSRA / USRA */ 9126 case 0x04: /* SRSHR / URSHR */ 9127 case 0x06: /* SRSRA / URSRA */ 9128 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); 9129 break; 9130 case 0x0a: /* SHL / SLI */ 9131 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); 9132 break; 9133 case 0x1c: /* SCVTF, UCVTF */ 9134 handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, 9135 opcode, rn, rd); 9136 break; 9137 case 0x10: /* SQSHRUN, SQSHRUN2 */ 9138 case 0x11: /* SQRSHRUN, SQRSHRUN2 */ 9139 if (!is_u) { 9140 unallocated_encoding(s); 9141 return; 9142 } 9143 handle_vec_simd_sqshrn(s, true, false, false, true, 9144 immh, immb, opcode, rn, rd); 9145 break; 9146 case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ 9147 case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ 9148 handle_vec_simd_sqshrn(s, true, false, is_u, is_u, 9149 immh, immb, opcode, rn, rd); 9150 break; 9151 case 0xc: /* SQSHLU */ 9152 if (!is_u) { 9153 unallocated_encoding(s); 9154 return; 9155 } 9156 handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); 9157 break; 9158 case 0xe: /* SQSHL, UQSHL */ 9159 handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); 9160 break; 9161 case 0x1f: /* FCVTZS, FCVTZU */ 9162 handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); 9163 break; 9164 default: 9165 unallocated_encoding(s); 9166 break; 9167 } 9168 } 9169 9170 /* AdvSIMD scalar three different 9171 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 9172 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 9173 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 9174 * +-----+---+-----------+------+---+------+--------+-----+------+------+ 9175 */ 9176 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) 9177 { 9178 bool is_u = extract32(insn, 29, 1); 9179 int size = extract32(insn, 22, 2); 9180 int opcode = extract32(insn, 12, 4); 9181 int rm = extract32(insn, 16, 5); 9182 int rn = extract32(insn, 5, 5); 9183 int rd = extract32(insn, 0, 5); 9184 9185 if (is_u) { 9186 unallocated_encoding(s); 9187 return; 9188 } 9189 9190 switch (opcode) { 9191 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9192 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9193 case 0xd: /* SQDMULL, SQDMULL2 */ 9194 if (size == 0 || size == 3) { 9195 unallocated_encoding(s); 9196 return; 9197 } 9198 break; 9199 default: 9200 unallocated_encoding(s); 9201 return; 9202 } 9203 9204 if (!fp_access_check(s)) { 9205 return; 9206 } 9207 9208 if (size == 2) { 9209 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9210 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9211 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9212 9213 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); 9214 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); 9215 9216 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); 9217 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); 9218 9219 switch (opcode) { 9220 case 0xd: /* SQDMULL, SQDMULL2 */ 9221 break; 9222 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9223 tcg_gen_neg_i64(tcg_res, tcg_res); 9224 /* fall through */ 9225 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9226 read_vec_element(s, tcg_op1, rd, 0, MO_64); 9227 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, 9228 tcg_res, tcg_op1); 9229 break; 9230 default: 9231 g_assert_not_reached(); 9232 } 9233 9234 write_fp_dreg(s, rd, tcg_res); 9235 9236 tcg_temp_free_i64(tcg_op1); 9237 tcg_temp_free_i64(tcg_op2); 9238 tcg_temp_free_i64(tcg_res); 9239 } else { 9240 TCGv_i32 tcg_op1 = read_fp_hreg(s, rn); 9241 TCGv_i32 tcg_op2 = read_fp_hreg(s, rm); 9242 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9243 9244 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); 9245 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); 9246 9247 switch (opcode) { 9248 case 0xd: /* SQDMULL, SQDMULL2 */ 9249 break; 9250 case 0xb: /* SQDMLSL, SQDMLSL2 */ 9251 gen_helper_neon_negl_u32(tcg_res, tcg_res); 9252 /* fall through */ 9253 case 0x9: /* SQDMLAL, SQDMLAL2 */ 9254 { 9255 TCGv_i64 tcg_op3 = tcg_temp_new_i64(); 9256 read_vec_element(s, tcg_op3, rd, 0, MO_32); 9257 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, 9258 tcg_res, tcg_op3); 9259 tcg_temp_free_i64(tcg_op3); 9260 break; 9261 } 9262 default: 9263 g_assert_not_reached(); 9264 } 9265 9266 tcg_gen_ext32u_i64(tcg_res, tcg_res); 9267 write_fp_dreg(s, rd, tcg_res); 9268 9269 tcg_temp_free_i32(tcg_op1); 9270 tcg_temp_free_i32(tcg_op2); 9271 tcg_temp_free_i64(tcg_res); 9272 } 9273 } 9274 9275 static void handle_3same_64(DisasContext *s, int opcode, bool u, 9276 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) 9277 { 9278 /* Handle 64x64->64 opcodes which are shared between the scalar 9279 * and vector 3-same groups. We cover every opcode where size == 3 9280 * is valid in either the three-reg-same (integer, not pairwise) 9281 * or scalar-three-reg-same groups. 9282 */ 9283 TCGCond cond; 9284 9285 switch (opcode) { 9286 case 0x1: /* SQADD */ 9287 if (u) { 9288 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9289 } else { 9290 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9291 } 9292 break; 9293 case 0x5: /* SQSUB */ 9294 if (u) { 9295 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9296 } else { 9297 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9298 } 9299 break; 9300 case 0x6: /* CMGT, CMHI */ 9301 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0. 9302 * We implement this using setcond (test) and then negating. 9303 */ 9304 cond = u ? TCG_COND_GTU : TCG_COND_GT; 9305 do_cmop: 9306 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); 9307 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9308 break; 9309 case 0x7: /* CMGE, CMHS */ 9310 cond = u ? TCG_COND_GEU : TCG_COND_GE; 9311 goto do_cmop; 9312 case 0x11: /* CMTST, CMEQ */ 9313 if (u) { 9314 cond = TCG_COND_EQ; 9315 goto do_cmop; 9316 } 9317 gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); 9318 break; 9319 case 0x8: /* SSHL, USHL */ 9320 if (u) { 9321 gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); 9322 } else { 9323 gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); 9324 } 9325 break; 9326 case 0x9: /* SQSHL, UQSHL */ 9327 if (u) { 9328 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9329 } else { 9330 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9331 } 9332 break; 9333 case 0xa: /* SRSHL, URSHL */ 9334 if (u) { 9335 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); 9336 } else { 9337 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); 9338 } 9339 break; 9340 case 0xb: /* SQRSHL, UQRSHL */ 9341 if (u) { 9342 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9343 } else { 9344 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); 9345 } 9346 break; 9347 case 0x10: /* ADD, SUB */ 9348 if (u) { 9349 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); 9350 } else { 9351 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); 9352 } 9353 break; 9354 default: 9355 g_assert_not_reached(); 9356 } 9357 } 9358 9359 /* Handle the 3-same-operands float operations; shared by the scalar 9360 * and vector encodings. The caller must filter out any encodings 9361 * not allocated for the encoding it is dealing with. 9362 */ 9363 static void handle_3same_float(DisasContext *s, int size, int elements, 9364 int fpopcode, int rd, int rn, int rm) 9365 { 9366 int pass; 9367 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 9368 9369 for (pass = 0; pass < elements; pass++) { 9370 if (size) { 9371 /* Double */ 9372 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 9373 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 9374 TCGv_i64 tcg_res = tcg_temp_new_i64(); 9375 9376 read_vec_element(s, tcg_op1, rn, pass, MO_64); 9377 read_vec_element(s, tcg_op2, rm, pass, MO_64); 9378 9379 switch (fpopcode) { 9380 case 0x39: /* FMLS */ 9381 /* As usual for ARM, separate negation for fused multiply-add */ 9382 gen_helper_vfp_negd(tcg_op1, tcg_op1); 9383 /* fall through */ 9384 case 0x19: /* FMLA */ 9385 read_vec_element(s, tcg_res, rd, pass, MO_64); 9386 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, 9387 tcg_res, fpst); 9388 break; 9389 case 0x18: /* FMAXNM */ 9390 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9391 break; 9392 case 0x1a: /* FADD */ 9393 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); 9394 break; 9395 case 0x1b: /* FMULX */ 9396 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); 9397 break; 9398 case 0x1c: /* FCMEQ */ 9399 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9400 break; 9401 case 0x1e: /* FMAX */ 9402 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); 9403 break; 9404 case 0x1f: /* FRECPS */ 9405 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9406 break; 9407 case 0x38: /* FMINNM */ 9408 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); 9409 break; 9410 case 0x3a: /* FSUB */ 9411 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9412 break; 9413 case 0x3e: /* FMIN */ 9414 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); 9415 break; 9416 case 0x3f: /* FRSQRTS */ 9417 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9418 break; 9419 case 0x5b: /* FMUL */ 9420 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); 9421 break; 9422 case 0x5c: /* FCMGE */ 9423 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9424 break; 9425 case 0x5d: /* FACGE */ 9426 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9427 break; 9428 case 0x5f: /* FDIV */ 9429 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); 9430 break; 9431 case 0x7a: /* FABD */ 9432 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); 9433 gen_helper_vfp_absd(tcg_res, tcg_res); 9434 break; 9435 case 0x7c: /* FCMGT */ 9436 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9437 break; 9438 case 0x7d: /* FACGT */ 9439 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); 9440 break; 9441 default: 9442 g_assert_not_reached(); 9443 } 9444 9445 write_vec_element(s, tcg_res, rd, pass, MO_64); 9446 9447 tcg_temp_free_i64(tcg_res); 9448 tcg_temp_free_i64(tcg_op1); 9449 tcg_temp_free_i64(tcg_op2); 9450 } else { 9451 /* Single */ 9452 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 9453 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 9454 TCGv_i32 tcg_res = tcg_temp_new_i32(); 9455 9456 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 9457 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 9458 9459 switch (fpopcode) { 9460 case 0x39: /* FMLS */ 9461 /* As usual for ARM, separate negation for fused multiply-add */ 9462 gen_helper_vfp_negs(tcg_op1, tcg_op1); 9463 /* fall through */ 9464 case 0x19: /* FMLA */ 9465 read_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9466 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, 9467 tcg_res, fpst); 9468 break; 9469 case 0x1a: /* FADD */ 9470 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); 9471 break; 9472 case 0x1b: /* FMULX */ 9473 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); 9474 break; 9475 case 0x1c: /* FCMEQ */ 9476 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9477 break; 9478 case 0x1e: /* FMAX */ 9479 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); 9480 break; 9481 case 0x1f: /* FRECPS */ 9482 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9483 break; 9484 case 0x18: /* FMAXNM */ 9485 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); 9486 break; 9487 case 0x38: /* FMINNM */ 9488 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); 9489 break; 9490 case 0x3a: /* FSUB */ 9491 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9492 break; 9493 case 0x3e: /* FMIN */ 9494 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); 9495 break; 9496 case 0x3f: /* FRSQRTS */ 9497 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9498 break; 9499 case 0x5b: /* FMUL */ 9500 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); 9501 break; 9502 case 0x5c: /* FCMGE */ 9503 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9504 break; 9505 case 0x5d: /* FACGE */ 9506 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9507 break; 9508 case 0x5f: /* FDIV */ 9509 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); 9510 break; 9511 case 0x7a: /* FABD */ 9512 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); 9513 gen_helper_vfp_abss(tcg_res, tcg_res); 9514 break; 9515 case 0x7c: /* FCMGT */ 9516 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9517 break; 9518 case 0x7d: /* FACGT */ 9519 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); 9520 break; 9521 default: 9522 g_assert_not_reached(); 9523 } 9524 9525 if (elements == 1) { 9526 /* scalar single so clear high part */ 9527 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 9528 9529 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); 9530 write_vec_element(s, tcg_tmp, rd, pass, MO_64); 9531 tcg_temp_free_i64(tcg_tmp); 9532 } else { 9533 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 9534 } 9535 9536 tcg_temp_free_i32(tcg_res); 9537 tcg_temp_free_i32(tcg_op1); 9538 tcg_temp_free_i32(tcg_op2); 9539 } 9540 } 9541 9542 tcg_temp_free_ptr(fpst); 9543 9544 clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); 9545 } 9546 9547 /* AdvSIMD scalar three same 9548 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 9549 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9550 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 9551 * +-----+---+-----------+------+---+------+--------+---+------+------+ 9552 */ 9553 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) 9554 { 9555 int rd = extract32(insn, 0, 5); 9556 int rn = extract32(insn, 5, 5); 9557 int opcode = extract32(insn, 11, 5); 9558 int rm = extract32(insn, 16, 5); 9559 int size = extract32(insn, 22, 2); 9560 bool u = extract32(insn, 29, 1); 9561 TCGv_i64 tcg_rd; 9562 9563 if (opcode >= 0x18) { 9564 /* Floating point: U, size[1] and opcode indicate operation */ 9565 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); 9566 switch (fpopcode) { 9567 case 0x1b: /* FMULX */ 9568 case 0x1f: /* FRECPS */ 9569 case 0x3f: /* FRSQRTS */ 9570 case 0x5d: /* FACGE */ 9571 case 0x7d: /* FACGT */ 9572 case 0x1c: /* FCMEQ */ 9573 case 0x5c: /* FCMGE */ 9574 case 0x7c: /* FCMGT */ 9575 case 0x7a: /* FABD */ 9576 break; 9577 default: 9578 unallocated_encoding(s); 9579 return; 9580 } 9581 9582 if (!fp_access_check(s)) { 9583 return; 9584 } 9585 9586 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); 9587 return; 9588 } 9589 9590 switch (opcode) { 9591 case 0x1: /* SQADD, UQADD */ 9592 case 0x5: /* SQSUB, UQSUB */ 9593 case 0x9: /* SQSHL, UQSHL */ 9594 case 0xb: /* SQRSHL, UQRSHL */ 9595 break; 9596 case 0x8: /* SSHL, USHL */ 9597 case 0xa: /* SRSHL, URSHL */ 9598 case 0x6: /* CMGT, CMHI */ 9599 case 0x7: /* CMGE, CMHS */ 9600 case 0x11: /* CMTST, CMEQ */ 9601 case 0x10: /* ADD, SUB (vector) */ 9602 if (size != 3) { 9603 unallocated_encoding(s); 9604 return; 9605 } 9606 break; 9607 case 0x16: /* SQDMULH, SQRDMULH (vector) */ 9608 if (size != 1 && size != 2) { 9609 unallocated_encoding(s); 9610 return; 9611 } 9612 break; 9613 default: 9614 unallocated_encoding(s); 9615 return; 9616 } 9617 9618 if (!fp_access_check(s)) { 9619 return; 9620 } 9621 9622 tcg_rd = tcg_temp_new_i64(); 9623 9624 if (size == 3) { 9625 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 9626 TCGv_i64 tcg_rm = read_fp_dreg(s, rm); 9627 9628 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); 9629 tcg_temp_free_i64(tcg_rn); 9630 tcg_temp_free_i64(tcg_rm); 9631 } else { 9632 /* Do a single operation on the lowest element in the vector. 9633 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with 9634 * no side effects for all these operations. 9635 * OPTME: special-purpose helpers would avoid doing some 9636 * unnecessary work in the helper for the 8 and 16 bit cases. 9637 */ 9638 NeonGenTwoOpEnvFn *genenvfn; 9639 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 9640 TCGv_i32 tcg_rm = tcg_temp_new_i32(); 9641 TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); 9642 9643 read_vec_element_i32(s, tcg_rn, rn, 0, size); 9644 read_vec_element_i32(s, tcg_rm, rm, 0, size); 9645 9646 switch (opcode) { 9647 case 0x1: /* SQADD, UQADD */ 9648 { 9649 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9650 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, 9651 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, 9652 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, 9653 }; 9654 genenvfn = fns[size][u]; 9655 break; 9656 } 9657 case 0x5: /* SQSUB, UQSUB */ 9658 { 9659 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9660 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, 9661 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, 9662 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, 9663 }; 9664 genenvfn = fns[size][u]; 9665 break; 9666 } 9667 case 0x9: /* SQSHL, UQSHL */ 9668 { 9669 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9670 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 9671 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 9672 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 9673 }; 9674 genenvfn = fns[size][u]; 9675 break; 9676 } 9677 case 0xb: /* SQRSHL, UQRSHL */ 9678 { 9679 static NeonGenTwoOpEnvFn * const fns[3][2] = { 9680 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 9681 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 9682 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 9683 }; 9684 genenvfn = fns[size][u]; 9685 break; 9686 } 9687 case 0x16: /* SQDMULH, SQRDMULH */ 9688 { 9689 static NeonGenTwoOpEnvFn * const fns[2][2] = { 9690 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, 9691 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, 9692 }; 9693 assert(size == 1 || size == 2); 9694 genenvfn = fns[size - 1][u]; 9695 break; 9696 } 9697 default: 9698 g_assert_not_reached(); 9699 } 9700 9701 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm); 9702 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); 9703 tcg_temp_free_i32(tcg_rd32); 9704 tcg_temp_free_i32(tcg_rn); 9705 tcg_temp_free_i32(tcg_rm); 9706 } 9707 9708 write_fp_dreg(s, rd, tcg_rd); 9709 9710 tcg_temp_free_i64(tcg_rd); 9711 } 9712 9713 /* AdvSIMD scalar three same FP16 9714 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 9715 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9716 * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 9717 * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ 9718 * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 9719 * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 9720 */ 9721 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, 9722 uint32_t insn) 9723 { 9724 int rd = extract32(insn, 0, 5); 9725 int rn = extract32(insn, 5, 5); 9726 int opcode = extract32(insn, 11, 3); 9727 int rm = extract32(insn, 16, 5); 9728 bool u = extract32(insn, 29, 1); 9729 bool a = extract32(insn, 23, 1); 9730 int fpopcode = opcode | (a << 3) | (u << 4); 9731 TCGv_ptr fpst; 9732 TCGv_i32 tcg_op1; 9733 TCGv_i32 tcg_op2; 9734 TCGv_i32 tcg_res; 9735 9736 switch (fpopcode) { 9737 case 0x03: /* FMULX */ 9738 case 0x04: /* FCMEQ (reg) */ 9739 case 0x07: /* FRECPS */ 9740 case 0x0f: /* FRSQRTS */ 9741 case 0x14: /* FCMGE (reg) */ 9742 case 0x15: /* FACGE */ 9743 case 0x1a: /* FABD */ 9744 case 0x1c: /* FCMGT (reg) */ 9745 case 0x1d: /* FACGT */ 9746 break; 9747 default: 9748 unallocated_encoding(s); 9749 return; 9750 } 9751 9752 if (!dc_isar_feature(aa64_fp16, s)) { 9753 unallocated_encoding(s); 9754 } 9755 9756 if (!fp_access_check(s)) { 9757 return; 9758 } 9759 9760 fpst = fpstatus_ptr(FPST_FPCR_F16); 9761 9762 tcg_op1 = read_fp_hreg(s, rn); 9763 tcg_op2 = read_fp_hreg(s, rm); 9764 tcg_res = tcg_temp_new_i32(); 9765 9766 switch (fpopcode) { 9767 case 0x03: /* FMULX */ 9768 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 9769 break; 9770 case 0x04: /* FCMEQ (reg) */ 9771 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9772 break; 9773 case 0x07: /* FRECPS */ 9774 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9775 break; 9776 case 0x0f: /* FRSQRTS */ 9777 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9778 break; 9779 case 0x14: /* FCMGE (reg) */ 9780 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9781 break; 9782 case 0x15: /* FACGE */ 9783 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9784 break; 9785 case 0x1a: /* FABD */ 9786 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 9787 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 9788 break; 9789 case 0x1c: /* FCMGT (reg) */ 9790 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9791 break; 9792 case 0x1d: /* FACGT */ 9793 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 9794 break; 9795 default: 9796 g_assert_not_reached(); 9797 } 9798 9799 write_fp_sreg(s, rd, tcg_res); 9800 9801 9802 tcg_temp_free_i32(tcg_res); 9803 tcg_temp_free_i32(tcg_op1); 9804 tcg_temp_free_i32(tcg_op2); 9805 tcg_temp_free_ptr(fpst); 9806 } 9807 9808 /* AdvSIMD scalar three same extra 9809 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 9810 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9811 * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 9812 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ 9813 */ 9814 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s, 9815 uint32_t insn) 9816 { 9817 int rd = extract32(insn, 0, 5); 9818 int rn = extract32(insn, 5, 5); 9819 int opcode = extract32(insn, 11, 4); 9820 int rm = extract32(insn, 16, 5); 9821 int size = extract32(insn, 22, 2); 9822 bool u = extract32(insn, 29, 1); 9823 TCGv_i32 ele1, ele2, ele3; 9824 TCGv_i64 res; 9825 bool feature; 9826 9827 switch (u * 16 + opcode) { 9828 case 0x10: /* SQRDMLAH (vector) */ 9829 case 0x11: /* SQRDMLSH (vector) */ 9830 if (size != 1 && size != 2) { 9831 unallocated_encoding(s); 9832 return; 9833 } 9834 feature = dc_isar_feature(aa64_rdm, s); 9835 break; 9836 default: 9837 unallocated_encoding(s); 9838 return; 9839 } 9840 if (!feature) { 9841 unallocated_encoding(s); 9842 return; 9843 } 9844 if (!fp_access_check(s)) { 9845 return; 9846 } 9847 9848 /* Do a single operation on the lowest element in the vector. 9849 * We use the standard Neon helpers and rely on 0 OP 0 == 0 9850 * with no side effects for all these operations. 9851 * OPTME: special-purpose helpers would avoid doing some 9852 * unnecessary work in the helper for the 16 bit cases. 9853 */ 9854 ele1 = tcg_temp_new_i32(); 9855 ele2 = tcg_temp_new_i32(); 9856 ele3 = tcg_temp_new_i32(); 9857 9858 read_vec_element_i32(s, ele1, rn, 0, size); 9859 read_vec_element_i32(s, ele2, rm, 0, size); 9860 read_vec_element_i32(s, ele3, rd, 0, size); 9861 9862 switch (opcode) { 9863 case 0x0: /* SQRDMLAH */ 9864 if (size == 1) { 9865 gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3); 9866 } else { 9867 gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3); 9868 } 9869 break; 9870 case 0x1: /* SQRDMLSH */ 9871 if (size == 1) { 9872 gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3); 9873 } else { 9874 gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3); 9875 } 9876 break; 9877 default: 9878 g_assert_not_reached(); 9879 } 9880 tcg_temp_free_i32(ele1); 9881 tcg_temp_free_i32(ele2); 9882 9883 res = tcg_temp_new_i64(); 9884 tcg_gen_extu_i32_i64(res, ele3); 9885 tcg_temp_free_i32(ele3); 9886 9887 write_fp_dreg(s, rd, res); 9888 tcg_temp_free_i64(res); 9889 } 9890 9891 static void handle_2misc_64(DisasContext *s, int opcode, bool u, 9892 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, 9893 TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) 9894 { 9895 /* Handle 64->64 opcodes which are shared between the scalar and 9896 * vector 2-reg-misc groups. We cover every integer opcode where size == 3 9897 * is valid in either group and also the double-precision fp ops. 9898 * The caller only need provide tcg_rmode and tcg_fpstatus if the op 9899 * requires them. 9900 */ 9901 TCGCond cond; 9902 9903 switch (opcode) { 9904 case 0x4: /* CLS, CLZ */ 9905 if (u) { 9906 tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64); 9907 } else { 9908 tcg_gen_clrsb_i64(tcg_rd, tcg_rn); 9909 } 9910 break; 9911 case 0x5: /* NOT */ 9912 /* This opcode is shared with CNT and RBIT but we have earlier 9913 * enforced that size == 3 if and only if this is the NOT insn. 9914 */ 9915 tcg_gen_not_i64(tcg_rd, tcg_rn); 9916 break; 9917 case 0x7: /* SQABS, SQNEG */ 9918 if (u) { 9919 gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); 9920 } else { 9921 gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); 9922 } 9923 break; 9924 case 0xa: /* CMLT */ 9925 /* 64 bit integer comparison against zero, result is 9926 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and 9927 * subtracting 1. 9928 */ 9929 cond = TCG_COND_LT; 9930 do_cmop: 9931 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0); 9932 tcg_gen_neg_i64(tcg_rd, tcg_rd); 9933 break; 9934 case 0x8: /* CMGT, CMGE */ 9935 cond = u ? TCG_COND_GE : TCG_COND_GT; 9936 goto do_cmop; 9937 case 0x9: /* CMEQ, CMLE */ 9938 cond = u ? TCG_COND_LE : TCG_COND_EQ; 9939 goto do_cmop; 9940 case 0xb: /* ABS, NEG */ 9941 if (u) { 9942 tcg_gen_neg_i64(tcg_rd, tcg_rn); 9943 } else { 9944 tcg_gen_abs_i64(tcg_rd, tcg_rn); 9945 } 9946 break; 9947 case 0x2f: /* FABS */ 9948 gen_helper_vfp_absd(tcg_rd, tcg_rn); 9949 break; 9950 case 0x6f: /* FNEG */ 9951 gen_helper_vfp_negd(tcg_rd, tcg_rn); 9952 break; 9953 case 0x7f: /* FSQRT */ 9954 gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); 9955 break; 9956 case 0x1a: /* FCVTNS */ 9957 case 0x1b: /* FCVTMS */ 9958 case 0x1c: /* FCVTAS */ 9959 case 0x3a: /* FCVTPS */ 9960 case 0x3b: /* FCVTZS */ 9961 gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9962 break; 9963 case 0x5a: /* FCVTNU */ 9964 case 0x5b: /* FCVTMU */ 9965 case 0x5c: /* FCVTAU */ 9966 case 0x7a: /* FCVTPU */ 9967 case 0x7b: /* FCVTZU */ 9968 gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_constant_i32(0), tcg_fpstatus); 9969 break; 9970 case 0x18: /* FRINTN */ 9971 case 0x19: /* FRINTM */ 9972 case 0x38: /* FRINTP */ 9973 case 0x39: /* FRINTZ */ 9974 case 0x58: /* FRINTA */ 9975 case 0x79: /* FRINTI */ 9976 gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); 9977 break; 9978 case 0x59: /* FRINTX */ 9979 gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); 9980 break; 9981 case 0x1e: /* FRINT32Z */ 9982 case 0x5e: /* FRINT32X */ 9983 gen_helper_frint32_d(tcg_rd, tcg_rn, tcg_fpstatus); 9984 break; 9985 case 0x1f: /* FRINT64Z */ 9986 case 0x5f: /* FRINT64X */ 9987 gen_helper_frint64_d(tcg_rd, tcg_rn, tcg_fpstatus); 9988 break; 9989 default: 9990 g_assert_not_reached(); 9991 } 9992 } 9993 9994 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, 9995 bool is_scalar, bool is_u, bool is_q, 9996 int size, int rn, int rd) 9997 { 9998 bool is_double = (size == MO_64); 9999 TCGv_ptr fpst; 10000 10001 if (!fp_access_check(s)) { 10002 return; 10003 } 10004 10005 fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); 10006 10007 if (is_double) { 10008 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10009 TCGv_i64 tcg_zero = tcg_constant_i64(0); 10010 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10011 NeonGenTwoDoubleOpFn *genfn; 10012 bool swap = false; 10013 int pass; 10014 10015 switch (opcode) { 10016 case 0x2e: /* FCMLT (zero) */ 10017 swap = true; 10018 /* fallthrough */ 10019 case 0x2c: /* FCMGT (zero) */ 10020 genfn = gen_helper_neon_cgt_f64; 10021 break; 10022 case 0x2d: /* FCMEQ (zero) */ 10023 genfn = gen_helper_neon_ceq_f64; 10024 break; 10025 case 0x6d: /* FCMLE (zero) */ 10026 swap = true; 10027 /* fall through */ 10028 case 0x6c: /* FCMGE (zero) */ 10029 genfn = gen_helper_neon_cge_f64; 10030 break; 10031 default: 10032 g_assert_not_reached(); 10033 } 10034 10035 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10036 read_vec_element(s, tcg_op, rn, pass, MO_64); 10037 if (swap) { 10038 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10039 } else { 10040 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10041 } 10042 write_vec_element(s, tcg_res, rd, pass, MO_64); 10043 } 10044 tcg_temp_free_i64(tcg_res); 10045 tcg_temp_free_i64(tcg_op); 10046 10047 clear_vec_high(s, !is_scalar, rd); 10048 } else { 10049 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10050 TCGv_i32 tcg_zero = tcg_constant_i32(0); 10051 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10052 NeonGenTwoSingleOpFn *genfn; 10053 bool swap = false; 10054 int pass, maxpasses; 10055 10056 if (size == MO_16) { 10057 switch (opcode) { 10058 case 0x2e: /* FCMLT (zero) */ 10059 swap = true; 10060 /* fall through */ 10061 case 0x2c: /* FCMGT (zero) */ 10062 genfn = gen_helper_advsimd_cgt_f16; 10063 break; 10064 case 0x2d: /* FCMEQ (zero) */ 10065 genfn = gen_helper_advsimd_ceq_f16; 10066 break; 10067 case 0x6d: /* FCMLE (zero) */ 10068 swap = true; 10069 /* fall through */ 10070 case 0x6c: /* FCMGE (zero) */ 10071 genfn = gen_helper_advsimd_cge_f16; 10072 break; 10073 default: 10074 g_assert_not_reached(); 10075 } 10076 } else { 10077 switch (opcode) { 10078 case 0x2e: /* FCMLT (zero) */ 10079 swap = true; 10080 /* fall through */ 10081 case 0x2c: /* FCMGT (zero) */ 10082 genfn = gen_helper_neon_cgt_f32; 10083 break; 10084 case 0x2d: /* FCMEQ (zero) */ 10085 genfn = gen_helper_neon_ceq_f32; 10086 break; 10087 case 0x6d: /* FCMLE (zero) */ 10088 swap = true; 10089 /* fall through */ 10090 case 0x6c: /* FCMGE (zero) */ 10091 genfn = gen_helper_neon_cge_f32; 10092 break; 10093 default: 10094 g_assert_not_reached(); 10095 } 10096 } 10097 10098 if (is_scalar) { 10099 maxpasses = 1; 10100 } else { 10101 int vector_size = 8 << is_q; 10102 maxpasses = vector_size >> size; 10103 } 10104 10105 for (pass = 0; pass < maxpasses; pass++) { 10106 read_vec_element_i32(s, tcg_op, rn, pass, size); 10107 if (swap) { 10108 genfn(tcg_res, tcg_zero, tcg_op, fpst); 10109 } else { 10110 genfn(tcg_res, tcg_op, tcg_zero, fpst); 10111 } 10112 if (is_scalar) { 10113 write_fp_sreg(s, rd, tcg_res); 10114 } else { 10115 write_vec_element_i32(s, tcg_res, rd, pass, size); 10116 } 10117 } 10118 tcg_temp_free_i32(tcg_res); 10119 tcg_temp_free_i32(tcg_op); 10120 if (!is_scalar) { 10121 clear_vec_high(s, is_q, rd); 10122 } 10123 } 10124 10125 tcg_temp_free_ptr(fpst); 10126 } 10127 10128 static void handle_2misc_reciprocal(DisasContext *s, int opcode, 10129 bool is_scalar, bool is_u, bool is_q, 10130 int size, int rn, int rd) 10131 { 10132 bool is_double = (size == 3); 10133 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10134 10135 if (is_double) { 10136 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10137 TCGv_i64 tcg_res = tcg_temp_new_i64(); 10138 int pass; 10139 10140 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10141 read_vec_element(s, tcg_op, rn, pass, MO_64); 10142 switch (opcode) { 10143 case 0x3d: /* FRECPE */ 10144 gen_helper_recpe_f64(tcg_res, tcg_op, fpst); 10145 break; 10146 case 0x3f: /* FRECPX */ 10147 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); 10148 break; 10149 case 0x7d: /* FRSQRTE */ 10150 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); 10151 break; 10152 default: 10153 g_assert_not_reached(); 10154 } 10155 write_vec_element(s, tcg_res, rd, pass, MO_64); 10156 } 10157 tcg_temp_free_i64(tcg_res); 10158 tcg_temp_free_i64(tcg_op); 10159 clear_vec_high(s, !is_scalar, rd); 10160 } else { 10161 TCGv_i32 tcg_op = tcg_temp_new_i32(); 10162 TCGv_i32 tcg_res = tcg_temp_new_i32(); 10163 int pass, maxpasses; 10164 10165 if (is_scalar) { 10166 maxpasses = 1; 10167 } else { 10168 maxpasses = is_q ? 4 : 2; 10169 } 10170 10171 for (pass = 0; pass < maxpasses; pass++) { 10172 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 10173 10174 switch (opcode) { 10175 case 0x3c: /* URECPE */ 10176 gen_helper_recpe_u32(tcg_res, tcg_op); 10177 break; 10178 case 0x3d: /* FRECPE */ 10179 gen_helper_recpe_f32(tcg_res, tcg_op, fpst); 10180 break; 10181 case 0x3f: /* FRECPX */ 10182 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); 10183 break; 10184 case 0x7d: /* FRSQRTE */ 10185 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); 10186 break; 10187 default: 10188 g_assert_not_reached(); 10189 } 10190 10191 if (is_scalar) { 10192 write_fp_sreg(s, rd, tcg_res); 10193 } else { 10194 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 10195 } 10196 } 10197 tcg_temp_free_i32(tcg_res); 10198 tcg_temp_free_i32(tcg_op); 10199 if (!is_scalar) { 10200 clear_vec_high(s, is_q, rd); 10201 } 10202 } 10203 tcg_temp_free_ptr(fpst); 10204 } 10205 10206 static void handle_2misc_narrow(DisasContext *s, bool scalar, 10207 int opcode, bool u, bool is_q, 10208 int size, int rn, int rd) 10209 { 10210 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element 10211 * in the source becomes a size element in the destination). 10212 */ 10213 int pass; 10214 TCGv_i32 tcg_res[2]; 10215 int destelt = is_q ? 2 : 0; 10216 int passes = scalar ? 1 : 2; 10217 10218 if (scalar) { 10219 tcg_res[1] = tcg_constant_i32(0); 10220 } 10221 10222 for (pass = 0; pass < passes; pass++) { 10223 TCGv_i64 tcg_op = tcg_temp_new_i64(); 10224 NeonGenNarrowFn *genfn = NULL; 10225 NeonGenNarrowEnvFn *genenvfn = NULL; 10226 10227 if (scalar) { 10228 read_vec_element(s, tcg_op, rn, pass, size + 1); 10229 } else { 10230 read_vec_element(s, tcg_op, rn, pass, MO_64); 10231 } 10232 tcg_res[pass] = tcg_temp_new_i32(); 10233 10234 switch (opcode) { 10235 case 0x12: /* XTN, SQXTUN */ 10236 { 10237 static NeonGenNarrowFn * const xtnfns[3] = { 10238 gen_helper_neon_narrow_u8, 10239 gen_helper_neon_narrow_u16, 10240 tcg_gen_extrl_i64_i32, 10241 }; 10242 static NeonGenNarrowEnvFn * const sqxtunfns[3] = { 10243 gen_helper_neon_unarrow_sat8, 10244 gen_helper_neon_unarrow_sat16, 10245 gen_helper_neon_unarrow_sat32, 10246 }; 10247 if (u) { 10248 genenvfn = sqxtunfns[size]; 10249 } else { 10250 genfn = xtnfns[size]; 10251 } 10252 break; 10253 } 10254 case 0x14: /* SQXTN, UQXTN */ 10255 { 10256 static NeonGenNarrowEnvFn * const fns[3][2] = { 10257 { gen_helper_neon_narrow_sat_s8, 10258 gen_helper_neon_narrow_sat_u8 }, 10259 { gen_helper_neon_narrow_sat_s16, 10260 gen_helper_neon_narrow_sat_u16 }, 10261 { gen_helper_neon_narrow_sat_s32, 10262 gen_helper_neon_narrow_sat_u32 }, 10263 }; 10264 genenvfn = fns[size][u]; 10265 break; 10266 } 10267 case 0x16: /* FCVTN, FCVTN2 */ 10268 /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ 10269 if (size == 2) { 10270 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); 10271 } else { 10272 TCGv_i32 tcg_lo = tcg_temp_new_i32(); 10273 TCGv_i32 tcg_hi = tcg_temp_new_i32(); 10274 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10275 TCGv_i32 ahp = get_ahp_flag(); 10276 10277 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op); 10278 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp); 10279 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp); 10280 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); 10281 tcg_temp_free_i32(tcg_lo); 10282 tcg_temp_free_i32(tcg_hi); 10283 tcg_temp_free_ptr(fpst); 10284 tcg_temp_free_i32(ahp); 10285 } 10286 break; 10287 case 0x36: /* BFCVTN, BFCVTN2 */ 10288 { 10289 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 10290 gen_helper_bfcvt_pair(tcg_res[pass], tcg_op, fpst); 10291 tcg_temp_free_ptr(fpst); 10292 } 10293 break; 10294 case 0x56: /* FCVTXN, FCVTXN2 */ 10295 /* 64 bit to 32 bit float conversion 10296 * with von Neumann rounding (round to odd) 10297 */ 10298 assert(size == 2); 10299 gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); 10300 break; 10301 default: 10302 g_assert_not_reached(); 10303 } 10304 10305 if (genfn) { 10306 genfn(tcg_res[pass], tcg_op); 10307 } else if (genenvfn) { 10308 genenvfn(tcg_res[pass], cpu_env, tcg_op); 10309 } 10310 10311 tcg_temp_free_i64(tcg_op); 10312 } 10313 10314 for (pass = 0; pass < 2; pass++) { 10315 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); 10316 tcg_temp_free_i32(tcg_res[pass]); 10317 } 10318 clear_vec_high(s, is_q, rd); 10319 } 10320 10321 /* Remaining saturating accumulating ops */ 10322 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, 10323 bool is_q, int size, int rn, int rd) 10324 { 10325 bool is_double = (size == 3); 10326 10327 if (is_double) { 10328 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10329 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10330 int pass; 10331 10332 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 10333 read_vec_element(s, tcg_rn, rn, pass, MO_64); 10334 read_vec_element(s, tcg_rd, rd, pass, MO_64); 10335 10336 if (is_u) { /* USQADD */ 10337 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10338 } else { /* SUQADD */ 10339 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10340 } 10341 write_vec_element(s, tcg_rd, rd, pass, MO_64); 10342 } 10343 tcg_temp_free_i64(tcg_rd); 10344 tcg_temp_free_i64(tcg_rn); 10345 clear_vec_high(s, !is_scalar, rd); 10346 } else { 10347 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10348 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10349 int pass, maxpasses; 10350 10351 if (is_scalar) { 10352 maxpasses = 1; 10353 } else { 10354 maxpasses = is_q ? 4 : 2; 10355 } 10356 10357 for (pass = 0; pass < maxpasses; pass++) { 10358 if (is_scalar) { 10359 read_vec_element_i32(s, tcg_rn, rn, pass, size); 10360 read_vec_element_i32(s, tcg_rd, rd, pass, size); 10361 } else { 10362 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); 10363 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10364 } 10365 10366 if (is_u) { /* USQADD */ 10367 switch (size) { 10368 case 0: 10369 gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10370 break; 10371 case 1: 10372 gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10373 break; 10374 case 2: 10375 gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10376 break; 10377 default: 10378 g_assert_not_reached(); 10379 } 10380 } else { /* SUQADD */ 10381 switch (size) { 10382 case 0: 10383 gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10384 break; 10385 case 1: 10386 gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10387 break; 10388 case 2: 10389 gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); 10390 break; 10391 default: 10392 g_assert_not_reached(); 10393 } 10394 } 10395 10396 if (is_scalar) { 10397 write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); 10398 } 10399 write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); 10400 } 10401 tcg_temp_free_i32(tcg_rd); 10402 tcg_temp_free_i32(tcg_rn); 10403 clear_vec_high(s, is_q, rd); 10404 } 10405 } 10406 10407 /* AdvSIMD scalar two reg misc 10408 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 10409 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10410 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 10411 * +-----+---+-----------+------+-----------+--------+-----+------+------+ 10412 */ 10413 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) 10414 { 10415 int rd = extract32(insn, 0, 5); 10416 int rn = extract32(insn, 5, 5); 10417 int opcode = extract32(insn, 12, 5); 10418 int size = extract32(insn, 22, 2); 10419 bool u = extract32(insn, 29, 1); 10420 bool is_fcvt = false; 10421 int rmode; 10422 TCGv_i32 tcg_rmode; 10423 TCGv_ptr tcg_fpstatus; 10424 10425 switch (opcode) { 10426 case 0x3: /* USQADD / SUQADD*/ 10427 if (!fp_access_check(s)) { 10428 return; 10429 } 10430 handle_2misc_satacc(s, true, u, false, size, rn, rd); 10431 return; 10432 case 0x7: /* SQABS / SQNEG */ 10433 break; 10434 case 0xa: /* CMLT */ 10435 if (u) { 10436 unallocated_encoding(s); 10437 return; 10438 } 10439 /* fall through */ 10440 case 0x8: /* CMGT, CMGE */ 10441 case 0x9: /* CMEQ, CMLE */ 10442 case 0xb: /* ABS, NEG */ 10443 if (size != 3) { 10444 unallocated_encoding(s); 10445 return; 10446 } 10447 break; 10448 case 0x12: /* SQXTUN */ 10449 if (!u) { 10450 unallocated_encoding(s); 10451 return; 10452 } 10453 /* fall through */ 10454 case 0x14: /* SQXTN, UQXTN */ 10455 if (size == 3) { 10456 unallocated_encoding(s); 10457 return; 10458 } 10459 if (!fp_access_check(s)) { 10460 return; 10461 } 10462 handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); 10463 return; 10464 case 0xc ... 0xf: 10465 case 0x16 ... 0x1d: 10466 case 0x1f: 10467 /* Floating point: U, size[1] and opcode indicate operation; 10468 * size[0] indicates single or double precision. 10469 */ 10470 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 10471 size = extract32(size, 0, 1) ? 3 : 2; 10472 switch (opcode) { 10473 case 0x2c: /* FCMGT (zero) */ 10474 case 0x2d: /* FCMEQ (zero) */ 10475 case 0x2e: /* FCMLT (zero) */ 10476 case 0x6c: /* FCMGE (zero) */ 10477 case 0x6d: /* FCMLE (zero) */ 10478 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); 10479 return; 10480 case 0x1d: /* SCVTF */ 10481 case 0x5d: /* UCVTF */ 10482 { 10483 bool is_signed = (opcode == 0x1d); 10484 if (!fp_access_check(s)) { 10485 return; 10486 } 10487 handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); 10488 return; 10489 } 10490 case 0x3d: /* FRECPE */ 10491 case 0x3f: /* FRECPX */ 10492 case 0x7d: /* FRSQRTE */ 10493 if (!fp_access_check(s)) { 10494 return; 10495 } 10496 handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); 10497 return; 10498 case 0x1a: /* FCVTNS */ 10499 case 0x1b: /* FCVTMS */ 10500 case 0x3a: /* FCVTPS */ 10501 case 0x3b: /* FCVTZS */ 10502 case 0x5a: /* FCVTNU */ 10503 case 0x5b: /* FCVTMU */ 10504 case 0x7a: /* FCVTPU */ 10505 case 0x7b: /* FCVTZU */ 10506 is_fcvt = true; 10507 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 10508 break; 10509 case 0x1c: /* FCVTAS */ 10510 case 0x5c: /* FCVTAU */ 10511 /* TIEAWAY doesn't fit in the usual rounding mode encoding */ 10512 is_fcvt = true; 10513 rmode = FPROUNDING_TIEAWAY; 10514 break; 10515 case 0x56: /* FCVTXN, FCVTXN2 */ 10516 if (size == 2) { 10517 unallocated_encoding(s); 10518 return; 10519 } 10520 if (!fp_access_check(s)) { 10521 return; 10522 } 10523 handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); 10524 return; 10525 default: 10526 unallocated_encoding(s); 10527 return; 10528 } 10529 break; 10530 default: 10531 unallocated_encoding(s); 10532 return; 10533 } 10534 10535 if (!fp_access_check(s)) { 10536 return; 10537 } 10538 10539 if (is_fcvt) { 10540 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 10541 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 10542 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 10543 } else { 10544 tcg_rmode = NULL; 10545 tcg_fpstatus = NULL; 10546 } 10547 10548 if (size == 3) { 10549 TCGv_i64 tcg_rn = read_fp_dreg(s, rn); 10550 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10551 10552 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); 10553 write_fp_dreg(s, rd, tcg_rd); 10554 tcg_temp_free_i64(tcg_rd); 10555 tcg_temp_free_i64(tcg_rn); 10556 } else { 10557 TCGv_i32 tcg_rn = tcg_temp_new_i32(); 10558 TCGv_i32 tcg_rd = tcg_temp_new_i32(); 10559 10560 read_vec_element_i32(s, tcg_rn, rn, 0, size); 10561 10562 switch (opcode) { 10563 case 0x7: /* SQABS, SQNEG */ 10564 { 10565 NeonGenOneOpEnvFn *genfn; 10566 static NeonGenOneOpEnvFn * const fns[3][2] = { 10567 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 10568 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 10569 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, 10570 }; 10571 genfn = fns[size][u]; 10572 genfn(tcg_rd, cpu_env, tcg_rn); 10573 break; 10574 } 10575 case 0x1a: /* FCVTNS */ 10576 case 0x1b: /* FCVTMS */ 10577 case 0x1c: /* FCVTAS */ 10578 case 0x3a: /* FCVTPS */ 10579 case 0x3b: /* FCVTZS */ 10580 gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10581 tcg_fpstatus); 10582 break; 10583 case 0x5a: /* FCVTNU */ 10584 case 0x5b: /* FCVTMU */ 10585 case 0x5c: /* FCVTAU */ 10586 case 0x7a: /* FCVTPU */ 10587 case 0x7b: /* FCVTZU */ 10588 gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_constant_i32(0), 10589 tcg_fpstatus); 10590 break; 10591 default: 10592 g_assert_not_reached(); 10593 } 10594 10595 write_fp_sreg(s, rd, tcg_rd); 10596 tcg_temp_free_i32(tcg_rd); 10597 tcg_temp_free_i32(tcg_rn); 10598 } 10599 10600 if (is_fcvt) { 10601 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 10602 tcg_temp_free_i32(tcg_rmode); 10603 tcg_temp_free_ptr(tcg_fpstatus); 10604 } 10605 } 10606 10607 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */ 10608 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, 10609 int immh, int immb, int opcode, int rn, int rd) 10610 { 10611 int size = 32 - clz32(immh) - 1; 10612 int immhb = immh << 3 | immb; 10613 int shift = 2 * (8 << size) - immhb; 10614 GVecGen2iFn *gvec_fn; 10615 10616 if (extract32(immh, 3, 1) && !is_q) { 10617 unallocated_encoding(s); 10618 return; 10619 } 10620 tcg_debug_assert(size <= 3); 10621 10622 if (!fp_access_check(s)) { 10623 return; 10624 } 10625 10626 switch (opcode) { 10627 case 0x02: /* SSRA / USRA (accumulate) */ 10628 gvec_fn = is_u ? gen_gvec_usra : gen_gvec_ssra; 10629 break; 10630 10631 case 0x08: /* SRI */ 10632 gvec_fn = gen_gvec_sri; 10633 break; 10634 10635 case 0x00: /* SSHR / USHR */ 10636 if (is_u) { 10637 if (shift == 8 << size) { 10638 /* Shift count the same size as element size produces zero. */ 10639 tcg_gen_gvec_dup_imm(size, vec_full_reg_offset(s, rd), 10640 is_q ? 16 : 8, vec_full_reg_size(s), 0); 10641 return; 10642 } 10643 gvec_fn = tcg_gen_gvec_shri; 10644 } else { 10645 /* Shift count the same size as element size produces all sign. */ 10646 if (shift == 8 << size) { 10647 shift -= 1; 10648 } 10649 gvec_fn = tcg_gen_gvec_sari; 10650 } 10651 break; 10652 10653 case 0x04: /* SRSHR / URSHR (rounding) */ 10654 gvec_fn = is_u ? gen_gvec_urshr : gen_gvec_srshr; 10655 break; 10656 10657 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10658 gvec_fn = is_u ? gen_gvec_ursra : gen_gvec_srsra; 10659 break; 10660 10661 default: 10662 g_assert_not_reached(); 10663 } 10664 10665 gen_gvec_fn2i(s, is_q, rd, rn, shift, gvec_fn, size); 10666 } 10667 10668 /* SHL/SLI - Vector shift left */ 10669 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert, 10670 int immh, int immb, int opcode, int rn, int rd) 10671 { 10672 int size = 32 - clz32(immh) - 1; 10673 int immhb = immh << 3 | immb; 10674 int shift = immhb - (8 << size); 10675 10676 /* Range of size is limited by decode: immh is a non-zero 4 bit field */ 10677 assert(size >= 0 && size <= 3); 10678 10679 if (extract32(immh, 3, 1) && !is_q) { 10680 unallocated_encoding(s); 10681 return; 10682 } 10683 10684 if (!fp_access_check(s)) { 10685 return; 10686 } 10687 10688 if (insert) { 10689 gen_gvec_fn2i(s, is_q, rd, rn, shift, gen_gvec_sli, size); 10690 } else { 10691 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size); 10692 } 10693 } 10694 10695 /* USHLL/SHLL - Vector shift left with widening */ 10696 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, 10697 int immh, int immb, int opcode, int rn, int rd) 10698 { 10699 int size = 32 - clz32(immh) - 1; 10700 int immhb = immh << 3 | immb; 10701 int shift = immhb - (8 << size); 10702 int dsize = 64; 10703 int esize = 8 << size; 10704 int elements = dsize/esize; 10705 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 10706 TCGv_i64 tcg_rd = tcg_temp_new_i64(); 10707 int i; 10708 10709 if (size >= 3) { 10710 unallocated_encoding(s); 10711 return; 10712 } 10713 10714 if (!fp_access_check(s)) { 10715 return; 10716 } 10717 10718 /* For the LL variants the store is larger than the load, 10719 * so if rd == rn we would overwrite parts of our input. 10720 * So load everything right now and use shifts in the main loop. 10721 */ 10722 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64); 10723 10724 for (i = 0; i < elements; i++) { 10725 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize); 10726 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0); 10727 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift); 10728 write_vec_element(s, tcg_rd, rd, i, size + 1); 10729 } 10730 } 10731 10732 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ 10733 static void handle_vec_simd_shrn(DisasContext *s, bool is_q, 10734 int immh, int immb, int opcode, int rn, int rd) 10735 { 10736 int immhb = immh << 3 | immb; 10737 int size = 32 - clz32(immh) - 1; 10738 int dsize = 64; 10739 int esize = 8 << size; 10740 int elements = dsize/esize; 10741 int shift = (2 * esize) - immhb; 10742 bool round = extract32(opcode, 0, 1); 10743 TCGv_i64 tcg_rn, tcg_rd, tcg_final; 10744 TCGv_i64 tcg_round; 10745 int i; 10746 10747 if (extract32(immh, 3, 1)) { 10748 unallocated_encoding(s); 10749 return; 10750 } 10751 10752 if (!fp_access_check(s)) { 10753 return; 10754 } 10755 10756 tcg_rn = tcg_temp_new_i64(); 10757 tcg_rd = tcg_temp_new_i64(); 10758 tcg_final = tcg_temp_new_i64(); 10759 read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); 10760 10761 if (round) { 10762 tcg_round = tcg_constant_i64(1ULL << (shift - 1)); 10763 } else { 10764 tcg_round = NULL; 10765 } 10766 10767 for (i = 0; i < elements; i++) { 10768 read_vec_element(s, tcg_rn, rn, i, size+1); 10769 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, 10770 false, true, size+1, shift); 10771 10772 tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); 10773 } 10774 10775 if (!is_q) { 10776 write_vec_element(s, tcg_final, rd, 0, MO_64); 10777 } else { 10778 write_vec_element(s, tcg_final, rd, 1, MO_64); 10779 } 10780 tcg_temp_free_i64(tcg_rn); 10781 tcg_temp_free_i64(tcg_rd); 10782 tcg_temp_free_i64(tcg_final); 10783 10784 clear_vec_high(s, is_q, rd); 10785 } 10786 10787 10788 /* AdvSIMD shift by immediate 10789 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 10790 * +---+---+---+-------------+------+------+--------+---+------+------+ 10791 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | 10792 * +---+---+---+-------------+------+------+--------+---+------+------+ 10793 */ 10794 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) 10795 { 10796 int rd = extract32(insn, 0, 5); 10797 int rn = extract32(insn, 5, 5); 10798 int opcode = extract32(insn, 11, 5); 10799 int immb = extract32(insn, 16, 3); 10800 int immh = extract32(insn, 19, 4); 10801 bool is_u = extract32(insn, 29, 1); 10802 bool is_q = extract32(insn, 30, 1); 10803 10804 /* data_proc_simd[] has sent immh == 0 to disas_simd_mod_imm. */ 10805 assert(immh != 0); 10806 10807 switch (opcode) { 10808 case 0x08: /* SRI */ 10809 if (!is_u) { 10810 unallocated_encoding(s); 10811 return; 10812 } 10813 /* fall through */ 10814 case 0x00: /* SSHR / USHR */ 10815 case 0x02: /* SSRA / USRA (accumulate) */ 10816 case 0x04: /* SRSHR / URSHR (rounding) */ 10817 case 0x06: /* SRSRA / URSRA (accum + rounding) */ 10818 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd); 10819 break; 10820 case 0x0a: /* SHL / SLI */ 10821 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10822 break; 10823 case 0x10: /* SHRN */ 10824 case 0x11: /* RSHRN / SQRSHRUN */ 10825 if (is_u) { 10826 handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, 10827 opcode, rn, rd); 10828 } else { 10829 handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); 10830 } 10831 break; 10832 case 0x12: /* SQSHRN / UQSHRN */ 10833 case 0x13: /* SQRSHRN / UQRSHRN */ 10834 handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, 10835 opcode, rn, rd); 10836 break; 10837 case 0x14: /* SSHLL / USHLL */ 10838 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); 10839 break; 10840 case 0x1c: /* SCVTF / UCVTF */ 10841 handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, 10842 opcode, rn, rd); 10843 break; 10844 case 0xc: /* SQSHLU */ 10845 if (!is_u) { 10846 unallocated_encoding(s); 10847 return; 10848 } 10849 handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); 10850 break; 10851 case 0xe: /* SQSHL, UQSHL */ 10852 handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); 10853 break; 10854 case 0x1f: /* FCVTZS/ FCVTZU */ 10855 handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); 10856 return; 10857 default: 10858 unallocated_encoding(s); 10859 return; 10860 } 10861 } 10862 10863 /* Generate code to do a "long" addition or subtraction, ie one done in 10864 * TCGv_i64 on vector lanes twice the width specified by size. 10865 */ 10866 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res, 10867 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2) 10868 { 10869 static NeonGenTwo64OpFn * const fns[3][2] = { 10870 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 }, 10871 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 }, 10872 { tcg_gen_add_i64, tcg_gen_sub_i64 }, 10873 }; 10874 NeonGenTwo64OpFn *genfn; 10875 assert(size < 3); 10876 10877 genfn = fns[size][is_sub]; 10878 genfn(tcg_res, tcg_op1, tcg_op2); 10879 } 10880 10881 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, 10882 int opcode, int rd, int rn, int rm) 10883 { 10884 /* 3-reg-different widening insns: 64 x 64 -> 128 */ 10885 TCGv_i64 tcg_res[2]; 10886 int pass, accop; 10887 10888 tcg_res[0] = tcg_temp_new_i64(); 10889 tcg_res[1] = tcg_temp_new_i64(); 10890 10891 /* Does this op do an adding accumulate, a subtracting accumulate, 10892 * or no accumulate at all? 10893 */ 10894 switch (opcode) { 10895 case 5: 10896 case 8: 10897 case 9: 10898 accop = 1; 10899 break; 10900 case 10: 10901 case 11: 10902 accop = -1; 10903 break; 10904 default: 10905 accop = 0; 10906 break; 10907 } 10908 10909 if (accop != 0) { 10910 read_vec_element(s, tcg_res[0], rd, 0, MO_64); 10911 read_vec_element(s, tcg_res[1], rd, 1, MO_64); 10912 } 10913 10914 /* size == 2 means two 32x32->64 operations; this is worth special 10915 * casing because we can generally handle it inline. 10916 */ 10917 if (size == 2) { 10918 for (pass = 0; pass < 2; pass++) { 10919 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 10920 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 10921 TCGv_i64 tcg_passres; 10922 MemOp memop = MO_32 | (is_u ? 0 : MO_SIGN); 10923 10924 int elt = pass + is_q * 2; 10925 10926 read_vec_element(s, tcg_op1, rn, elt, memop); 10927 read_vec_element(s, tcg_op2, rm, elt, memop); 10928 10929 if (accop == 0) { 10930 tcg_passres = tcg_res[pass]; 10931 } else { 10932 tcg_passres = tcg_temp_new_i64(); 10933 } 10934 10935 switch (opcode) { 10936 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 10937 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2); 10938 break; 10939 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 10940 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2); 10941 break; 10942 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 10943 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 10944 { 10945 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64(); 10946 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64(); 10947 10948 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2); 10949 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1); 10950 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, 10951 tcg_passres, 10952 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2); 10953 tcg_temp_free_i64(tcg_tmp1); 10954 tcg_temp_free_i64(tcg_tmp2); 10955 break; 10956 } 10957 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 10958 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 10959 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 10960 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10961 break; 10962 case 9: /* SQDMLAL, SQDMLAL2 */ 10963 case 11: /* SQDMLSL, SQDMLSL2 */ 10964 case 13: /* SQDMULL, SQDMULL2 */ 10965 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2); 10966 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 10967 tcg_passres, tcg_passres); 10968 break; 10969 default: 10970 g_assert_not_reached(); 10971 } 10972 10973 if (opcode == 9 || opcode == 11) { 10974 /* saturating accumulate ops */ 10975 if (accop < 0) { 10976 tcg_gen_neg_i64(tcg_passres, tcg_passres); 10977 } 10978 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 10979 tcg_res[pass], tcg_passres); 10980 } else if (accop > 0) { 10981 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10982 } else if (accop < 0) { 10983 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 10984 } 10985 10986 if (accop != 0) { 10987 tcg_temp_free_i64(tcg_passres); 10988 } 10989 10990 tcg_temp_free_i64(tcg_op1); 10991 tcg_temp_free_i64(tcg_op2); 10992 } 10993 } else { 10994 /* size 0 or 1, generally helper functions */ 10995 for (pass = 0; pass < 2; pass++) { 10996 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 10997 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 10998 TCGv_i64 tcg_passres; 10999 int elt = pass + is_q * 2; 11000 11001 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32); 11002 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32); 11003 11004 if (accop == 0) { 11005 tcg_passres = tcg_res[pass]; 11006 } else { 11007 tcg_passres = tcg_temp_new_i64(); 11008 } 11009 11010 switch (opcode) { 11011 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 11012 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 11013 { 11014 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64(); 11015 static NeonGenWidenFn * const widenfns[2][2] = { 11016 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 11017 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 11018 }; 11019 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 11020 11021 widenfn(tcg_op2_64, tcg_op2); 11022 widenfn(tcg_passres, tcg_op1); 11023 gen_neon_addl(size, (opcode == 2), tcg_passres, 11024 tcg_passres, tcg_op2_64); 11025 tcg_temp_free_i64(tcg_op2_64); 11026 break; 11027 } 11028 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 11029 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 11030 if (size == 0) { 11031 if (is_u) { 11032 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2); 11033 } else { 11034 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2); 11035 } 11036 } else { 11037 if (is_u) { 11038 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2); 11039 } else { 11040 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2); 11041 } 11042 } 11043 break; 11044 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 11045 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 11046 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */ 11047 if (size == 0) { 11048 if (is_u) { 11049 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2); 11050 } else { 11051 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2); 11052 } 11053 } else { 11054 if (is_u) { 11055 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2); 11056 } else { 11057 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 11058 } 11059 } 11060 break; 11061 case 9: /* SQDMLAL, SQDMLAL2 */ 11062 case 11: /* SQDMLSL, SQDMLSL2 */ 11063 case 13: /* SQDMULL, SQDMULL2 */ 11064 assert(size == 1); 11065 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2); 11066 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 11067 tcg_passres, tcg_passres); 11068 break; 11069 default: 11070 g_assert_not_reached(); 11071 } 11072 tcg_temp_free_i32(tcg_op1); 11073 tcg_temp_free_i32(tcg_op2); 11074 11075 if (accop != 0) { 11076 if (opcode == 9 || opcode == 11) { 11077 /* saturating accumulate ops */ 11078 if (accop < 0) { 11079 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 11080 } 11081 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 11082 tcg_res[pass], 11083 tcg_passres); 11084 } else { 11085 gen_neon_addl(size, (accop < 0), tcg_res[pass], 11086 tcg_res[pass], tcg_passres); 11087 } 11088 tcg_temp_free_i64(tcg_passres); 11089 } 11090 } 11091 } 11092 11093 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 11094 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 11095 tcg_temp_free_i64(tcg_res[0]); 11096 tcg_temp_free_i64(tcg_res[1]); 11097 } 11098 11099 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, 11100 int opcode, int rd, int rn, int rm) 11101 { 11102 TCGv_i64 tcg_res[2]; 11103 int part = is_q ? 2 : 0; 11104 int pass; 11105 11106 for (pass = 0; pass < 2; pass++) { 11107 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11108 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11109 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64(); 11110 static NeonGenWidenFn * const widenfns[3][2] = { 11111 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 }, 11112 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 }, 11113 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 }, 11114 }; 11115 NeonGenWidenFn *widenfn = widenfns[size][is_u]; 11116 11117 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11118 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32); 11119 widenfn(tcg_op2_wide, tcg_op2); 11120 tcg_temp_free_i32(tcg_op2); 11121 tcg_res[pass] = tcg_temp_new_i64(); 11122 gen_neon_addl(size, (opcode == 3), 11123 tcg_res[pass], tcg_op1, tcg_op2_wide); 11124 tcg_temp_free_i64(tcg_op1); 11125 tcg_temp_free_i64(tcg_op2_wide); 11126 } 11127 11128 for (pass = 0; pass < 2; pass++) { 11129 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11130 tcg_temp_free_i64(tcg_res[pass]); 11131 } 11132 } 11133 11134 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) 11135 { 11136 tcg_gen_addi_i64(in, in, 1U << 31); 11137 tcg_gen_extrh_i64_i32(res, in); 11138 } 11139 11140 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, 11141 int opcode, int rd, int rn, int rm) 11142 { 11143 TCGv_i32 tcg_res[2]; 11144 int part = is_q ? 2 : 0; 11145 int pass; 11146 11147 for (pass = 0; pass < 2; pass++) { 11148 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11149 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11150 TCGv_i64 tcg_wideres = tcg_temp_new_i64(); 11151 static NeonGenNarrowFn * const narrowfns[3][2] = { 11152 { gen_helper_neon_narrow_high_u8, 11153 gen_helper_neon_narrow_round_high_u8 }, 11154 { gen_helper_neon_narrow_high_u16, 11155 gen_helper_neon_narrow_round_high_u16 }, 11156 { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 }, 11157 }; 11158 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u]; 11159 11160 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11161 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11162 11163 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2); 11164 11165 tcg_temp_free_i64(tcg_op1); 11166 tcg_temp_free_i64(tcg_op2); 11167 11168 tcg_res[pass] = tcg_temp_new_i32(); 11169 gennarrow(tcg_res[pass], tcg_wideres); 11170 tcg_temp_free_i64(tcg_wideres); 11171 } 11172 11173 for (pass = 0; pass < 2; pass++) { 11174 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32); 11175 tcg_temp_free_i32(tcg_res[pass]); 11176 } 11177 clear_vec_high(s, is_q, rd); 11178 } 11179 11180 /* AdvSIMD three different 11181 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 11182 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 11183 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | 11184 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ 11185 */ 11186 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) 11187 { 11188 /* Instructions in this group fall into three basic classes 11189 * (in each case with the operation working on each element in 11190 * the input vectors): 11191 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra 11192 * 128 bit input) 11193 * (2) wide 64 x 128 -> 128 11194 * (3) narrowing 128 x 128 -> 64 11195 * Here we do initial decode, catch unallocated cases and 11196 * dispatch to separate functions for each class. 11197 */ 11198 int is_q = extract32(insn, 30, 1); 11199 int is_u = extract32(insn, 29, 1); 11200 int size = extract32(insn, 22, 2); 11201 int opcode = extract32(insn, 12, 4); 11202 int rm = extract32(insn, 16, 5); 11203 int rn = extract32(insn, 5, 5); 11204 int rd = extract32(insn, 0, 5); 11205 11206 switch (opcode) { 11207 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */ 11208 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */ 11209 /* 64 x 128 -> 128 */ 11210 if (size == 3) { 11211 unallocated_encoding(s); 11212 return; 11213 } 11214 if (!fp_access_check(s)) { 11215 return; 11216 } 11217 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm); 11218 break; 11219 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */ 11220 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */ 11221 /* 128 x 128 -> 64 */ 11222 if (size == 3) { 11223 unallocated_encoding(s); 11224 return; 11225 } 11226 if (!fp_access_check(s)) { 11227 return; 11228 } 11229 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm); 11230 break; 11231 case 14: /* PMULL, PMULL2 */ 11232 if (is_u) { 11233 unallocated_encoding(s); 11234 return; 11235 } 11236 switch (size) { 11237 case 0: /* PMULL.P8 */ 11238 if (!fp_access_check(s)) { 11239 return; 11240 } 11241 /* The Q field specifies lo/hi half input for this insn. */ 11242 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 11243 gen_helper_neon_pmull_h); 11244 break; 11245 11246 case 3: /* PMULL.P64 */ 11247 if (!dc_isar_feature(aa64_pmull, s)) { 11248 unallocated_encoding(s); 11249 return; 11250 } 11251 if (!fp_access_check(s)) { 11252 return; 11253 } 11254 /* The Q field specifies lo/hi half input for this insn. */ 11255 gen_gvec_op3_ool(s, true, rd, rn, rm, is_q, 11256 gen_helper_gvec_pmull_q); 11257 break; 11258 11259 default: 11260 unallocated_encoding(s); 11261 break; 11262 } 11263 return; 11264 case 9: /* SQDMLAL, SQDMLAL2 */ 11265 case 11: /* SQDMLSL, SQDMLSL2 */ 11266 case 13: /* SQDMULL, SQDMULL2 */ 11267 if (is_u || size == 0) { 11268 unallocated_encoding(s); 11269 return; 11270 } 11271 /* fall through */ 11272 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */ 11273 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */ 11274 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */ 11275 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */ 11276 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 11277 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 11278 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */ 11279 /* 64 x 64 -> 128 */ 11280 if (size == 3) { 11281 unallocated_encoding(s); 11282 return; 11283 } 11284 if (!fp_access_check(s)) { 11285 return; 11286 } 11287 11288 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); 11289 break; 11290 default: 11291 /* opcode 15 not allocated */ 11292 unallocated_encoding(s); 11293 break; 11294 } 11295 } 11296 11297 /* Logic op (opcode == 3) subgroup of C3.6.16. */ 11298 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) 11299 { 11300 int rd = extract32(insn, 0, 5); 11301 int rn = extract32(insn, 5, 5); 11302 int rm = extract32(insn, 16, 5); 11303 int size = extract32(insn, 22, 2); 11304 bool is_u = extract32(insn, 29, 1); 11305 bool is_q = extract32(insn, 30, 1); 11306 11307 if (!fp_access_check(s)) { 11308 return; 11309 } 11310 11311 switch (size + 4 * is_u) { 11312 case 0: /* AND */ 11313 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); 11314 return; 11315 case 1: /* BIC */ 11316 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); 11317 return; 11318 case 2: /* ORR */ 11319 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); 11320 return; 11321 case 3: /* ORN */ 11322 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); 11323 return; 11324 case 4: /* EOR */ 11325 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); 11326 return; 11327 11328 case 5: /* BSL bitwise select */ 11329 gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); 11330 return; 11331 case 6: /* BIT, bitwise insert if true */ 11332 gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); 11333 return; 11334 case 7: /* BIF, bitwise insert if false */ 11335 gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); 11336 return; 11337 11338 default: 11339 g_assert_not_reached(); 11340 } 11341 } 11342 11343 /* Pairwise op subgroup of C3.6.16. 11344 * 11345 * This is called directly or via the handle_3same_float for float pairwise 11346 * operations where the opcode and size are calculated differently. 11347 */ 11348 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, 11349 int size, int rn, int rm, int rd) 11350 { 11351 TCGv_ptr fpst; 11352 int pass; 11353 11354 /* Floating point operations need fpst */ 11355 if (opcode >= 0x58) { 11356 fpst = fpstatus_ptr(FPST_FPCR); 11357 } else { 11358 fpst = NULL; 11359 } 11360 11361 if (!fp_access_check(s)) { 11362 return; 11363 } 11364 11365 /* These operations work on the concatenated rm:rn, with each pair of 11366 * adjacent elements being operated on to produce an element in the result. 11367 */ 11368 if (size == 3) { 11369 TCGv_i64 tcg_res[2]; 11370 11371 for (pass = 0; pass < 2; pass++) { 11372 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11373 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11374 int passreg = (pass == 0) ? rn : rm; 11375 11376 read_vec_element(s, tcg_op1, passreg, 0, MO_64); 11377 read_vec_element(s, tcg_op2, passreg, 1, MO_64); 11378 tcg_res[pass] = tcg_temp_new_i64(); 11379 11380 switch (opcode) { 11381 case 0x17: /* ADDP */ 11382 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 11383 break; 11384 case 0x58: /* FMAXNMP */ 11385 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11386 break; 11387 case 0x5a: /* FADDP */ 11388 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11389 break; 11390 case 0x5e: /* FMAXP */ 11391 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11392 break; 11393 case 0x78: /* FMINNMP */ 11394 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11395 break; 11396 case 0x7e: /* FMINP */ 11397 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11398 break; 11399 default: 11400 g_assert_not_reached(); 11401 } 11402 11403 tcg_temp_free_i64(tcg_op1); 11404 tcg_temp_free_i64(tcg_op2); 11405 } 11406 11407 for (pass = 0; pass < 2; pass++) { 11408 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 11409 tcg_temp_free_i64(tcg_res[pass]); 11410 } 11411 } else { 11412 int maxpass = is_q ? 4 : 2; 11413 TCGv_i32 tcg_res[4]; 11414 11415 for (pass = 0; pass < maxpass; pass++) { 11416 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11417 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11418 NeonGenTwoOpFn *genfn = NULL; 11419 int passreg = pass < (maxpass / 2) ? rn : rm; 11420 int passelt = (is_q && (pass & 1)) ? 2 : 0; 11421 11422 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); 11423 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); 11424 tcg_res[pass] = tcg_temp_new_i32(); 11425 11426 switch (opcode) { 11427 case 0x17: /* ADDP */ 11428 { 11429 static NeonGenTwoOpFn * const fns[3] = { 11430 gen_helper_neon_padd_u8, 11431 gen_helper_neon_padd_u16, 11432 tcg_gen_add_i32, 11433 }; 11434 genfn = fns[size]; 11435 break; 11436 } 11437 case 0x14: /* SMAXP, UMAXP */ 11438 { 11439 static NeonGenTwoOpFn * const fns[3][2] = { 11440 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, 11441 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, 11442 { tcg_gen_smax_i32, tcg_gen_umax_i32 }, 11443 }; 11444 genfn = fns[size][u]; 11445 break; 11446 } 11447 case 0x15: /* SMINP, UMINP */ 11448 { 11449 static NeonGenTwoOpFn * const fns[3][2] = { 11450 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, 11451 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, 11452 { tcg_gen_smin_i32, tcg_gen_umin_i32 }, 11453 }; 11454 genfn = fns[size][u]; 11455 break; 11456 } 11457 /* The FP operations are all on single floats (32 bit) */ 11458 case 0x58: /* FMAXNMP */ 11459 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11460 break; 11461 case 0x5a: /* FADDP */ 11462 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11463 break; 11464 case 0x5e: /* FMAXP */ 11465 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11466 break; 11467 case 0x78: /* FMINNMP */ 11468 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11469 break; 11470 case 0x7e: /* FMINP */ 11471 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11472 break; 11473 default: 11474 g_assert_not_reached(); 11475 } 11476 11477 /* FP ops called directly, otherwise call now */ 11478 if (genfn) { 11479 genfn(tcg_res[pass], tcg_op1, tcg_op2); 11480 } 11481 11482 tcg_temp_free_i32(tcg_op1); 11483 tcg_temp_free_i32(tcg_op2); 11484 } 11485 11486 for (pass = 0; pass < maxpass; pass++) { 11487 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 11488 tcg_temp_free_i32(tcg_res[pass]); 11489 } 11490 clear_vec_high(s, is_q, rd); 11491 } 11492 11493 if (fpst) { 11494 tcg_temp_free_ptr(fpst); 11495 } 11496 } 11497 11498 /* Floating point op subgroup of C3.6.16. */ 11499 static void disas_simd_3same_float(DisasContext *s, uint32_t insn) 11500 { 11501 /* For floating point ops, the U, size[1] and opcode bits 11502 * together indicate the operation. size[0] indicates single 11503 * or double. 11504 */ 11505 int fpopcode = extract32(insn, 11, 5) 11506 | (extract32(insn, 23, 1) << 5) 11507 | (extract32(insn, 29, 1) << 6); 11508 int is_q = extract32(insn, 30, 1); 11509 int size = extract32(insn, 22, 1); 11510 int rm = extract32(insn, 16, 5); 11511 int rn = extract32(insn, 5, 5); 11512 int rd = extract32(insn, 0, 5); 11513 11514 int datasize = is_q ? 128 : 64; 11515 int esize = 32 << size; 11516 int elements = datasize / esize; 11517 11518 if (size == 1 && !is_q) { 11519 unallocated_encoding(s); 11520 return; 11521 } 11522 11523 switch (fpopcode) { 11524 case 0x58: /* FMAXNMP */ 11525 case 0x5a: /* FADDP */ 11526 case 0x5e: /* FMAXP */ 11527 case 0x78: /* FMINNMP */ 11528 case 0x7e: /* FMINP */ 11529 if (size && !is_q) { 11530 unallocated_encoding(s); 11531 return; 11532 } 11533 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, 11534 rn, rm, rd); 11535 return; 11536 case 0x1b: /* FMULX */ 11537 case 0x1f: /* FRECPS */ 11538 case 0x3f: /* FRSQRTS */ 11539 case 0x5d: /* FACGE */ 11540 case 0x7d: /* FACGT */ 11541 case 0x19: /* FMLA */ 11542 case 0x39: /* FMLS */ 11543 case 0x18: /* FMAXNM */ 11544 case 0x1a: /* FADD */ 11545 case 0x1c: /* FCMEQ */ 11546 case 0x1e: /* FMAX */ 11547 case 0x38: /* FMINNM */ 11548 case 0x3a: /* FSUB */ 11549 case 0x3e: /* FMIN */ 11550 case 0x5b: /* FMUL */ 11551 case 0x5c: /* FCMGE */ 11552 case 0x5f: /* FDIV */ 11553 case 0x7a: /* FABD */ 11554 case 0x7c: /* FCMGT */ 11555 if (!fp_access_check(s)) { 11556 return; 11557 } 11558 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); 11559 return; 11560 11561 case 0x1d: /* FMLAL */ 11562 case 0x3d: /* FMLSL */ 11563 case 0x59: /* FMLAL2 */ 11564 case 0x79: /* FMLSL2 */ 11565 if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { 11566 unallocated_encoding(s); 11567 return; 11568 } 11569 if (fp_access_check(s)) { 11570 int is_s = extract32(insn, 23, 1); 11571 int is_2 = extract32(insn, 29, 1); 11572 int data = (is_2 << 1) | is_s; 11573 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 11574 vec_full_reg_offset(s, rn), 11575 vec_full_reg_offset(s, rm), cpu_env, 11576 is_q ? 16 : 8, vec_full_reg_size(s), 11577 data, gen_helper_gvec_fmlal_a64); 11578 } 11579 return; 11580 11581 default: 11582 unallocated_encoding(s); 11583 return; 11584 } 11585 } 11586 11587 /* Integer op subgroup of C3.6.16. */ 11588 static void disas_simd_3same_int(DisasContext *s, uint32_t insn) 11589 { 11590 int is_q = extract32(insn, 30, 1); 11591 int u = extract32(insn, 29, 1); 11592 int size = extract32(insn, 22, 2); 11593 int opcode = extract32(insn, 11, 5); 11594 int rm = extract32(insn, 16, 5); 11595 int rn = extract32(insn, 5, 5); 11596 int rd = extract32(insn, 0, 5); 11597 int pass; 11598 TCGCond cond; 11599 11600 switch (opcode) { 11601 case 0x13: /* MUL, PMUL */ 11602 if (u && size != 0) { 11603 unallocated_encoding(s); 11604 return; 11605 } 11606 /* fall through */ 11607 case 0x0: /* SHADD, UHADD */ 11608 case 0x2: /* SRHADD, URHADD */ 11609 case 0x4: /* SHSUB, UHSUB */ 11610 case 0xc: /* SMAX, UMAX */ 11611 case 0xd: /* SMIN, UMIN */ 11612 case 0xe: /* SABD, UABD */ 11613 case 0xf: /* SABA, UABA */ 11614 case 0x12: /* MLA, MLS */ 11615 if (size == 3) { 11616 unallocated_encoding(s); 11617 return; 11618 } 11619 break; 11620 case 0x16: /* SQDMULH, SQRDMULH */ 11621 if (size == 0 || size == 3) { 11622 unallocated_encoding(s); 11623 return; 11624 } 11625 break; 11626 default: 11627 if (size == 3 && !is_q) { 11628 unallocated_encoding(s); 11629 return; 11630 } 11631 break; 11632 } 11633 11634 if (!fp_access_check(s)) { 11635 return; 11636 } 11637 11638 switch (opcode) { 11639 case 0x01: /* SQADD, UQADD */ 11640 if (u) { 11641 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); 11642 } else { 11643 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); 11644 } 11645 return; 11646 case 0x05: /* SQSUB, UQSUB */ 11647 if (u) { 11648 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); 11649 } else { 11650 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); 11651 } 11652 return; 11653 case 0x08: /* SSHL, USHL */ 11654 if (u) { 11655 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); 11656 } else { 11657 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); 11658 } 11659 return; 11660 case 0x0c: /* SMAX, UMAX */ 11661 if (u) { 11662 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); 11663 } else { 11664 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); 11665 } 11666 return; 11667 case 0x0d: /* SMIN, UMIN */ 11668 if (u) { 11669 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); 11670 } else { 11671 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); 11672 } 11673 return; 11674 case 0xe: /* SABD, UABD */ 11675 if (u) { 11676 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); 11677 } else { 11678 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); 11679 } 11680 return; 11681 case 0xf: /* SABA, UABA */ 11682 if (u) { 11683 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); 11684 } else { 11685 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); 11686 } 11687 return; 11688 case 0x10: /* ADD, SUB */ 11689 if (u) { 11690 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); 11691 } else { 11692 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); 11693 } 11694 return; 11695 case 0x13: /* MUL, PMUL */ 11696 if (!u) { /* MUL */ 11697 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); 11698 } else { /* PMUL */ 11699 gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); 11700 } 11701 return; 11702 case 0x12: /* MLA, MLS */ 11703 if (u) { 11704 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); 11705 } else { 11706 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); 11707 } 11708 return; 11709 case 0x16: /* SQDMULH, SQRDMULH */ 11710 { 11711 static gen_helper_gvec_3_ptr * const fns[2][2] = { 11712 { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, 11713 { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, 11714 }; 11715 gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); 11716 } 11717 return; 11718 case 0x11: 11719 if (!u) { /* CMTST */ 11720 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); 11721 return; 11722 } 11723 /* else CMEQ */ 11724 cond = TCG_COND_EQ; 11725 goto do_gvec_cmp; 11726 case 0x06: /* CMGT, CMHI */ 11727 cond = u ? TCG_COND_GTU : TCG_COND_GT; 11728 goto do_gvec_cmp; 11729 case 0x07: /* CMGE, CMHS */ 11730 cond = u ? TCG_COND_GEU : TCG_COND_GE; 11731 do_gvec_cmp: 11732 tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), 11733 vec_full_reg_offset(s, rn), 11734 vec_full_reg_offset(s, rm), 11735 is_q ? 16 : 8, vec_full_reg_size(s)); 11736 return; 11737 } 11738 11739 if (size == 3) { 11740 assert(is_q); 11741 for (pass = 0; pass < 2; pass++) { 11742 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 11743 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 11744 TCGv_i64 tcg_res = tcg_temp_new_i64(); 11745 11746 read_vec_element(s, tcg_op1, rn, pass, MO_64); 11747 read_vec_element(s, tcg_op2, rm, pass, MO_64); 11748 11749 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); 11750 11751 write_vec_element(s, tcg_res, rd, pass, MO_64); 11752 11753 tcg_temp_free_i64(tcg_res); 11754 tcg_temp_free_i64(tcg_op1); 11755 tcg_temp_free_i64(tcg_op2); 11756 } 11757 } else { 11758 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 11759 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11760 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11761 TCGv_i32 tcg_res = tcg_temp_new_i32(); 11762 NeonGenTwoOpFn *genfn = NULL; 11763 NeonGenTwoOpEnvFn *genenvfn = NULL; 11764 11765 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); 11766 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); 11767 11768 switch (opcode) { 11769 case 0x0: /* SHADD, UHADD */ 11770 { 11771 static NeonGenTwoOpFn * const fns[3][2] = { 11772 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, 11773 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, 11774 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, 11775 }; 11776 genfn = fns[size][u]; 11777 break; 11778 } 11779 case 0x2: /* SRHADD, URHADD */ 11780 { 11781 static NeonGenTwoOpFn * const fns[3][2] = { 11782 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, 11783 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, 11784 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, 11785 }; 11786 genfn = fns[size][u]; 11787 break; 11788 } 11789 case 0x4: /* SHSUB, UHSUB */ 11790 { 11791 static NeonGenTwoOpFn * const fns[3][2] = { 11792 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, 11793 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, 11794 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, 11795 }; 11796 genfn = fns[size][u]; 11797 break; 11798 } 11799 case 0x9: /* SQSHL, UQSHL */ 11800 { 11801 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11802 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, 11803 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, 11804 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, 11805 }; 11806 genenvfn = fns[size][u]; 11807 break; 11808 } 11809 case 0xa: /* SRSHL, URSHL */ 11810 { 11811 static NeonGenTwoOpFn * const fns[3][2] = { 11812 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, 11813 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, 11814 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, 11815 }; 11816 genfn = fns[size][u]; 11817 break; 11818 } 11819 case 0xb: /* SQRSHL, UQRSHL */ 11820 { 11821 static NeonGenTwoOpEnvFn * const fns[3][2] = { 11822 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, 11823 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, 11824 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, 11825 }; 11826 genenvfn = fns[size][u]; 11827 break; 11828 } 11829 default: 11830 g_assert_not_reached(); 11831 } 11832 11833 if (genenvfn) { 11834 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2); 11835 } else { 11836 genfn(tcg_res, tcg_op1, tcg_op2); 11837 } 11838 11839 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 11840 11841 tcg_temp_free_i32(tcg_res); 11842 tcg_temp_free_i32(tcg_op1); 11843 tcg_temp_free_i32(tcg_op2); 11844 } 11845 } 11846 clear_vec_high(s, is_q, rd); 11847 } 11848 11849 /* AdvSIMD three same 11850 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 11851 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11852 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | 11853 * +---+---+---+-----------+------+---+------+--------+---+------+------+ 11854 */ 11855 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) 11856 { 11857 int opcode = extract32(insn, 11, 5); 11858 11859 switch (opcode) { 11860 case 0x3: /* logic ops */ 11861 disas_simd_3same_logic(s, insn); 11862 break; 11863 case 0x17: /* ADDP */ 11864 case 0x14: /* SMAXP, UMAXP */ 11865 case 0x15: /* SMINP, UMINP */ 11866 { 11867 /* Pairwise operations */ 11868 int is_q = extract32(insn, 30, 1); 11869 int u = extract32(insn, 29, 1); 11870 int size = extract32(insn, 22, 2); 11871 int rm = extract32(insn, 16, 5); 11872 int rn = extract32(insn, 5, 5); 11873 int rd = extract32(insn, 0, 5); 11874 if (opcode == 0x17) { 11875 if (u || (size == 3 && !is_q)) { 11876 unallocated_encoding(s); 11877 return; 11878 } 11879 } else { 11880 if (size == 3) { 11881 unallocated_encoding(s); 11882 return; 11883 } 11884 } 11885 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); 11886 break; 11887 } 11888 case 0x18 ... 0x31: 11889 /* floating point ops, sz[1] and U are part of opcode */ 11890 disas_simd_3same_float(s, insn); 11891 break; 11892 default: 11893 disas_simd_3same_int(s, insn); 11894 break; 11895 } 11896 } 11897 11898 /* 11899 * Advanced SIMD three same (ARMv8.2 FP16 variants) 11900 * 11901 * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 11902 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11903 * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | 11904 * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ 11905 * 11906 * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE 11907 * (register), FACGE, FABD, FCMGT (register) and FACGT. 11908 * 11909 */ 11910 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) 11911 { 11912 int opcode = extract32(insn, 11, 3); 11913 int u = extract32(insn, 29, 1); 11914 int a = extract32(insn, 23, 1); 11915 int is_q = extract32(insn, 30, 1); 11916 int rm = extract32(insn, 16, 5); 11917 int rn = extract32(insn, 5, 5); 11918 int rd = extract32(insn, 0, 5); 11919 /* 11920 * For these floating point ops, the U, a and opcode bits 11921 * together indicate the operation. 11922 */ 11923 int fpopcode = opcode | (a << 3) | (u << 4); 11924 int datasize = is_q ? 128 : 64; 11925 int elements = datasize / 16; 11926 bool pairwise; 11927 TCGv_ptr fpst; 11928 int pass; 11929 11930 switch (fpopcode) { 11931 case 0x0: /* FMAXNM */ 11932 case 0x1: /* FMLA */ 11933 case 0x2: /* FADD */ 11934 case 0x3: /* FMULX */ 11935 case 0x4: /* FCMEQ */ 11936 case 0x6: /* FMAX */ 11937 case 0x7: /* FRECPS */ 11938 case 0x8: /* FMINNM */ 11939 case 0x9: /* FMLS */ 11940 case 0xa: /* FSUB */ 11941 case 0xe: /* FMIN */ 11942 case 0xf: /* FRSQRTS */ 11943 case 0x13: /* FMUL */ 11944 case 0x14: /* FCMGE */ 11945 case 0x15: /* FACGE */ 11946 case 0x17: /* FDIV */ 11947 case 0x1a: /* FABD */ 11948 case 0x1c: /* FCMGT */ 11949 case 0x1d: /* FACGT */ 11950 pairwise = false; 11951 break; 11952 case 0x10: /* FMAXNMP */ 11953 case 0x12: /* FADDP */ 11954 case 0x16: /* FMAXP */ 11955 case 0x18: /* FMINNMP */ 11956 case 0x1e: /* FMINP */ 11957 pairwise = true; 11958 break; 11959 default: 11960 unallocated_encoding(s); 11961 return; 11962 } 11963 11964 if (!dc_isar_feature(aa64_fp16, s)) { 11965 unallocated_encoding(s); 11966 return; 11967 } 11968 11969 if (!fp_access_check(s)) { 11970 return; 11971 } 11972 11973 fpst = fpstatus_ptr(FPST_FPCR_F16); 11974 11975 if (pairwise) { 11976 int maxpass = is_q ? 8 : 4; 11977 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 11978 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 11979 TCGv_i32 tcg_res[8]; 11980 11981 for (pass = 0; pass < maxpass; pass++) { 11982 int passreg = pass < (maxpass / 2) ? rn : rm; 11983 int passelt = (pass << 1) & (maxpass - 1); 11984 11985 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); 11986 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); 11987 tcg_res[pass] = tcg_temp_new_i32(); 11988 11989 switch (fpopcode) { 11990 case 0x10: /* FMAXNMP */ 11991 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, 11992 fpst); 11993 break; 11994 case 0x12: /* FADDP */ 11995 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11996 break; 11997 case 0x16: /* FMAXP */ 11998 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 11999 break; 12000 case 0x18: /* FMINNMP */ 12001 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, 12002 fpst); 12003 break; 12004 case 0x1e: /* FMINP */ 12005 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); 12006 break; 12007 default: 12008 g_assert_not_reached(); 12009 } 12010 } 12011 12012 for (pass = 0; pass < maxpass; pass++) { 12013 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); 12014 tcg_temp_free_i32(tcg_res[pass]); 12015 } 12016 12017 tcg_temp_free_i32(tcg_op1); 12018 tcg_temp_free_i32(tcg_op2); 12019 12020 } else { 12021 for (pass = 0; pass < elements; pass++) { 12022 TCGv_i32 tcg_op1 = tcg_temp_new_i32(); 12023 TCGv_i32 tcg_op2 = tcg_temp_new_i32(); 12024 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12025 12026 read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); 12027 read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); 12028 12029 switch (fpopcode) { 12030 case 0x0: /* FMAXNM */ 12031 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); 12032 break; 12033 case 0x1: /* FMLA */ 12034 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12035 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 12036 fpst); 12037 break; 12038 case 0x2: /* FADD */ 12039 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); 12040 break; 12041 case 0x3: /* FMULX */ 12042 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); 12043 break; 12044 case 0x4: /* FCMEQ */ 12045 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12046 break; 12047 case 0x6: /* FMAX */ 12048 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); 12049 break; 12050 case 0x7: /* FRECPS */ 12051 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12052 break; 12053 case 0x8: /* FMINNM */ 12054 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); 12055 break; 12056 case 0x9: /* FMLS */ 12057 /* As usual for ARM, separate negation for fused multiply-add */ 12058 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); 12059 read_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12060 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, 12061 fpst); 12062 break; 12063 case 0xa: /* FSUB */ 12064 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 12065 break; 12066 case 0xe: /* FMIN */ 12067 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); 12068 break; 12069 case 0xf: /* FRSQRTS */ 12070 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12071 break; 12072 case 0x13: /* FMUL */ 12073 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); 12074 break; 12075 case 0x14: /* FCMGE */ 12076 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12077 break; 12078 case 0x15: /* FACGE */ 12079 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12080 break; 12081 case 0x17: /* FDIV */ 12082 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); 12083 break; 12084 case 0x1a: /* FABD */ 12085 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); 12086 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); 12087 break; 12088 case 0x1c: /* FCMGT */ 12089 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12090 break; 12091 case 0x1d: /* FACGT */ 12092 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); 12093 break; 12094 default: 12095 g_assert_not_reached(); 12096 } 12097 12098 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 12099 tcg_temp_free_i32(tcg_res); 12100 tcg_temp_free_i32(tcg_op1); 12101 tcg_temp_free_i32(tcg_op2); 12102 } 12103 } 12104 12105 tcg_temp_free_ptr(fpst); 12106 12107 clear_vec_high(s, is_q, rd); 12108 } 12109 12110 /* AdvSIMD three same extra 12111 * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 12112 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 12113 * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd | 12114 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ 12115 */ 12116 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn) 12117 { 12118 int rd = extract32(insn, 0, 5); 12119 int rn = extract32(insn, 5, 5); 12120 int opcode = extract32(insn, 11, 4); 12121 int rm = extract32(insn, 16, 5); 12122 int size = extract32(insn, 22, 2); 12123 bool u = extract32(insn, 29, 1); 12124 bool is_q = extract32(insn, 30, 1); 12125 bool feature; 12126 int rot; 12127 12128 switch (u * 16 + opcode) { 12129 case 0x10: /* SQRDMLAH (vector) */ 12130 case 0x11: /* SQRDMLSH (vector) */ 12131 if (size != 1 && size != 2) { 12132 unallocated_encoding(s); 12133 return; 12134 } 12135 feature = dc_isar_feature(aa64_rdm, s); 12136 break; 12137 case 0x02: /* SDOT (vector) */ 12138 case 0x12: /* UDOT (vector) */ 12139 if (size != MO_32) { 12140 unallocated_encoding(s); 12141 return; 12142 } 12143 feature = dc_isar_feature(aa64_dp, s); 12144 break; 12145 case 0x03: /* USDOT */ 12146 if (size != MO_32) { 12147 unallocated_encoding(s); 12148 return; 12149 } 12150 feature = dc_isar_feature(aa64_i8mm, s); 12151 break; 12152 case 0x04: /* SMMLA */ 12153 case 0x14: /* UMMLA */ 12154 case 0x05: /* USMMLA */ 12155 if (!is_q || size != MO_32) { 12156 unallocated_encoding(s); 12157 return; 12158 } 12159 feature = dc_isar_feature(aa64_i8mm, s); 12160 break; 12161 case 0x18: /* FCMLA, #0 */ 12162 case 0x19: /* FCMLA, #90 */ 12163 case 0x1a: /* FCMLA, #180 */ 12164 case 0x1b: /* FCMLA, #270 */ 12165 case 0x1c: /* FCADD, #90 */ 12166 case 0x1e: /* FCADD, #270 */ 12167 if (size == 0 12168 || (size == 1 && !dc_isar_feature(aa64_fp16, s)) 12169 || (size == 3 && !is_q)) { 12170 unallocated_encoding(s); 12171 return; 12172 } 12173 feature = dc_isar_feature(aa64_fcma, s); 12174 break; 12175 case 0x1d: /* BFMMLA */ 12176 if (size != MO_16 || !is_q) { 12177 unallocated_encoding(s); 12178 return; 12179 } 12180 feature = dc_isar_feature(aa64_bf16, s); 12181 break; 12182 case 0x1f: 12183 switch (size) { 12184 case 1: /* BFDOT */ 12185 case 3: /* BFMLAL{B,T} */ 12186 feature = dc_isar_feature(aa64_bf16, s); 12187 break; 12188 default: 12189 unallocated_encoding(s); 12190 return; 12191 } 12192 break; 12193 default: 12194 unallocated_encoding(s); 12195 return; 12196 } 12197 if (!feature) { 12198 unallocated_encoding(s); 12199 return; 12200 } 12201 if (!fp_access_check(s)) { 12202 return; 12203 } 12204 12205 switch (opcode) { 12206 case 0x0: /* SQRDMLAH (vector) */ 12207 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlah_qc, size); 12208 return; 12209 12210 case 0x1: /* SQRDMLSH (vector) */ 12211 gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmlsh_qc, size); 12212 return; 12213 12214 case 0x2: /* SDOT / UDOT */ 12215 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, 12216 u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b); 12217 return; 12218 12219 case 0x3: /* USDOT */ 12220 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_usdot_b); 12221 return; 12222 12223 case 0x04: /* SMMLA, UMMLA */ 12224 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, 12225 u ? gen_helper_gvec_ummla_b 12226 : gen_helper_gvec_smmla_b); 12227 return; 12228 case 0x05: /* USMMLA */ 12229 gen_gvec_op4_ool(s, 1, rd, rn, rm, rd, 0, gen_helper_gvec_usmmla_b); 12230 return; 12231 12232 case 0x8: /* FCMLA, #0 */ 12233 case 0x9: /* FCMLA, #90 */ 12234 case 0xa: /* FCMLA, #180 */ 12235 case 0xb: /* FCMLA, #270 */ 12236 rot = extract32(opcode, 0, 2); 12237 switch (size) { 12238 case 1: 12239 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, true, rot, 12240 gen_helper_gvec_fcmlah); 12241 break; 12242 case 2: 12243 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 12244 gen_helper_gvec_fcmlas); 12245 break; 12246 case 3: 12247 gen_gvec_op4_fpst(s, is_q, rd, rn, rm, rd, false, rot, 12248 gen_helper_gvec_fcmlad); 12249 break; 12250 default: 12251 g_assert_not_reached(); 12252 } 12253 return; 12254 12255 case 0xc: /* FCADD, #90 */ 12256 case 0xe: /* FCADD, #270 */ 12257 rot = extract32(opcode, 1, 1); 12258 switch (size) { 12259 case 1: 12260 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12261 gen_helper_gvec_fcaddh); 12262 break; 12263 case 2: 12264 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12265 gen_helper_gvec_fcadds); 12266 break; 12267 case 3: 12268 gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot, 12269 gen_helper_gvec_fcaddd); 12270 break; 12271 default: 12272 g_assert_not_reached(); 12273 } 12274 return; 12275 12276 case 0xd: /* BFMMLA */ 12277 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfmmla); 12278 return; 12279 case 0xf: 12280 switch (size) { 12281 case 1: /* BFDOT */ 12282 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, 0, gen_helper_gvec_bfdot); 12283 break; 12284 case 3: /* BFMLAL{B,T} */ 12285 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, false, is_q, 12286 gen_helper_gvec_bfmlal); 12287 break; 12288 default: 12289 g_assert_not_reached(); 12290 } 12291 return; 12292 12293 default: 12294 g_assert_not_reached(); 12295 } 12296 } 12297 12298 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, 12299 int size, int rn, int rd) 12300 { 12301 /* Handle 2-reg-misc ops which are widening (so each size element 12302 * in the source becomes a 2*size element in the destination. 12303 * The only instruction like this is FCVTL. 12304 */ 12305 int pass; 12306 12307 if (size == 3) { 12308 /* 32 -> 64 bit fp conversion */ 12309 TCGv_i64 tcg_res[2]; 12310 int srcelt = is_q ? 2 : 0; 12311 12312 for (pass = 0; pass < 2; pass++) { 12313 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12314 tcg_res[pass] = tcg_temp_new_i64(); 12315 12316 read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); 12317 gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); 12318 tcg_temp_free_i32(tcg_op); 12319 } 12320 for (pass = 0; pass < 2; pass++) { 12321 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12322 tcg_temp_free_i64(tcg_res[pass]); 12323 } 12324 } else { 12325 /* 16 -> 32 bit fp conversion */ 12326 int srcelt = is_q ? 4 : 0; 12327 TCGv_i32 tcg_res[4]; 12328 TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); 12329 TCGv_i32 ahp = get_ahp_flag(); 12330 12331 for (pass = 0; pass < 4; pass++) { 12332 tcg_res[pass] = tcg_temp_new_i32(); 12333 12334 read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); 12335 gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], 12336 fpst, ahp); 12337 } 12338 for (pass = 0; pass < 4; pass++) { 12339 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); 12340 tcg_temp_free_i32(tcg_res[pass]); 12341 } 12342 12343 tcg_temp_free_ptr(fpst); 12344 tcg_temp_free_i32(ahp); 12345 } 12346 } 12347 12348 static void handle_rev(DisasContext *s, int opcode, bool u, 12349 bool is_q, int size, int rn, int rd) 12350 { 12351 int op = (opcode << 1) | u; 12352 int opsz = op + size; 12353 int grp_size = 3 - opsz; 12354 int dsize = is_q ? 128 : 64; 12355 int i; 12356 12357 if (opsz >= 3) { 12358 unallocated_encoding(s); 12359 return; 12360 } 12361 12362 if (!fp_access_check(s)) { 12363 return; 12364 } 12365 12366 if (size == 0) { 12367 /* Special case bytes, use bswap op on each group of elements */ 12368 int groups = dsize / (8 << grp_size); 12369 12370 for (i = 0; i < groups; i++) { 12371 TCGv_i64 tcg_tmp = tcg_temp_new_i64(); 12372 12373 read_vec_element(s, tcg_tmp, rn, i, grp_size); 12374 switch (grp_size) { 12375 case MO_16: 12376 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 12377 break; 12378 case MO_32: 12379 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp, TCG_BSWAP_IZ); 12380 break; 12381 case MO_64: 12382 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp); 12383 break; 12384 default: 12385 g_assert_not_reached(); 12386 } 12387 write_vec_element(s, tcg_tmp, rd, i, grp_size); 12388 tcg_temp_free_i64(tcg_tmp); 12389 } 12390 clear_vec_high(s, is_q, rd); 12391 } else { 12392 int revmask = (1 << grp_size) - 1; 12393 int esize = 8 << size; 12394 int elements = dsize / esize; 12395 TCGv_i64 tcg_rn = tcg_temp_new_i64(); 12396 TCGv_i64 tcg_rd = tcg_const_i64(0); 12397 TCGv_i64 tcg_rd_hi = tcg_const_i64(0); 12398 12399 for (i = 0; i < elements; i++) { 12400 int e_rev = (i & 0xf) ^ revmask; 12401 int off = e_rev * esize; 12402 read_vec_element(s, tcg_rn, rn, i, size); 12403 if (off >= 64) { 12404 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi, 12405 tcg_rn, off - 64, esize); 12406 } else { 12407 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize); 12408 } 12409 } 12410 write_vec_element(s, tcg_rd, rd, 0, MO_64); 12411 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64); 12412 12413 tcg_temp_free_i64(tcg_rd_hi); 12414 tcg_temp_free_i64(tcg_rd); 12415 tcg_temp_free_i64(tcg_rn); 12416 } 12417 } 12418 12419 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, 12420 bool is_q, int size, int rn, int rd) 12421 { 12422 /* Implement the pairwise operations from 2-misc: 12423 * SADDLP, UADDLP, SADALP, UADALP. 12424 * These all add pairs of elements in the input to produce a 12425 * double-width result element in the output (possibly accumulating). 12426 */ 12427 bool accum = (opcode == 0x6); 12428 int maxpass = is_q ? 2 : 1; 12429 int pass; 12430 TCGv_i64 tcg_res[2]; 12431 12432 if (size == 2) { 12433 /* 32 + 32 -> 64 op */ 12434 MemOp memop = size + (u ? 0 : MO_SIGN); 12435 12436 for (pass = 0; pass < maxpass; pass++) { 12437 TCGv_i64 tcg_op1 = tcg_temp_new_i64(); 12438 TCGv_i64 tcg_op2 = tcg_temp_new_i64(); 12439 12440 tcg_res[pass] = tcg_temp_new_i64(); 12441 12442 read_vec_element(s, tcg_op1, rn, pass * 2, memop); 12443 read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); 12444 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); 12445 if (accum) { 12446 read_vec_element(s, tcg_op1, rd, pass, MO_64); 12447 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 12448 } 12449 12450 tcg_temp_free_i64(tcg_op1); 12451 tcg_temp_free_i64(tcg_op2); 12452 } 12453 } else { 12454 for (pass = 0; pass < maxpass; pass++) { 12455 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12456 NeonGenOne64OpFn *genfn; 12457 static NeonGenOne64OpFn * const fns[2][2] = { 12458 { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, 12459 { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, 12460 }; 12461 12462 genfn = fns[size][u]; 12463 12464 tcg_res[pass] = tcg_temp_new_i64(); 12465 12466 read_vec_element(s, tcg_op, rn, pass, MO_64); 12467 genfn(tcg_res[pass], tcg_op); 12468 12469 if (accum) { 12470 read_vec_element(s, tcg_op, rd, pass, MO_64); 12471 if (size == 0) { 12472 gen_helper_neon_addl_u16(tcg_res[pass], 12473 tcg_res[pass], tcg_op); 12474 } else { 12475 gen_helper_neon_addl_u32(tcg_res[pass], 12476 tcg_res[pass], tcg_op); 12477 } 12478 } 12479 tcg_temp_free_i64(tcg_op); 12480 } 12481 } 12482 if (!is_q) { 12483 tcg_res[1] = tcg_constant_i64(0); 12484 } 12485 for (pass = 0; pass < 2; pass++) { 12486 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12487 tcg_temp_free_i64(tcg_res[pass]); 12488 } 12489 } 12490 12491 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) 12492 { 12493 /* Implement SHLL and SHLL2 */ 12494 int pass; 12495 int part = is_q ? 2 : 0; 12496 TCGv_i64 tcg_res[2]; 12497 12498 for (pass = 0; pass < 2; pass++) { 12499 static NeonGenWidenFn * const widenfns[3] = { 12500 gen_helper_neon_widen_u8, 12501 gen_helper_neon_widen_u16, 12502 tcg_gen_extu_i32_i64, 12503 }; 12504 NeonGenWidenFn *widenfn = widenfns[size]; 12505 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12506 12507 read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); 12508 tcg_res[pass] = tcg_temp_new_i64(); 12509 widenfn(tcg_res[pass], tcg_op); 12510 tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); 12511 12512 tcg_temp_free_i32(tcg_op); 12513 } 12514 12515 for (pass = 0; pass < 2; pass++) { 12516 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 12517 tcg_temp_free_i64(tcg_res[pass]); 12518 } 12519 } 12520 12521 /* AdvSIMD two reg misc 12522 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 12523 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12524 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd | 12525 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ 12526 */ 12527 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) 12528 { 12529 int size = extract32(insn, 22, 2); 12530 int opcode = extract32(insn, 12, 5); 12531 bool u = extract32(insn, 29, 1); 12532 bool is_q = extract32(insn, 30, 1); 12533 int rn = extract32(insn, 5, 5); 12534 int rd = extract32(insn, 0, 5); 12535 bool need_fpstatus = false; 12536 bool need_rmode = false; 12537 int rmode = -1; 12538 TCGv_i32 tcg_rmode; 12539 TCGv_ptr tcg_fpstatus; 12540 12541 switch (opcode) { 12542 case 0x0: /* REV64, REV32 */ 12543 case 0x1: /* REV16 */ 12544 handle_rev(s, opcode, u, is_q, size, rn, rd); 12545 return; 12546 case 0x5: /* CNT, NOT, RBIT */ 12547 if (u && size == 0) { 12548 /* NOT */ 12549 break; 12550 } else if (u && size == 1) { 12551 /* RBIT */ 12552 break; 12553 } else if (!u && size == 0) { 12554 /* CNT */ 12555 break; 12556 } 12557 unallocated_encoding(s); 12558 return; 12559 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */ 12560 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */ 12561 if (size == 3) { 12562 unallocated_encoding(s); 12563 return; 12564 } 12565 if (!fp_access_check(s)) { 12566 return; 12567 } 12568 12569 handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); 12570 return; 12571 case 0x4: /* CLS, CLZ */ 12572 if (size == 3) { 12573 unallocated_encoding(s); 12574 return; 12575 } 12576 break; 12577 case 0x2: /* SADDLP, UADDLP */ 12578 case 0x6: /* SADALP, UADALP */ 12579 if (size == 3) { 12580 unallocated_encoding(s); 12581 return; 12582 } 12583 if (!fp_access_check(s)) { 12584 return; 12585 } 12586 handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); 12587 return; 12588 case 0x13: /* SHLL, SHLL2 */ 12589 if (u == 0 || size == 3) { 12590 unallocated_encoding(s); 12591 return; 12592 } 12593 if (!fp_access_check(s)) { 12594 return; 12595 } 12596 handle_shll(s, is_q, size, rn, rd); 12597 return; 12598 case 0xa: /* CMLT */ 12599 if (u == 1) { 12600 unallocated_encoding(s); 12601 return; 12602 } 12603 /* fall through */ 12604 case 0x8: /* CMGT, CMGE */ 12605 case 0x9: /* CMEQ, CMLE */ 12606 case 0xb: /* ABS, NEG */ 12607 if (size == 3 && !is_q) { 12608 unallocated_encoding(s); 12609 return; 12610 } 12611 break; 12612 case 0x3: /* SUQADD, USQADD */ 12613 if (size == 3 && !is_q) { 12614 unallocated_encoding(s); 12615 return; 12616 } 12617 if (!fp_access_check(s)) { 12618 return; 12619 } 12620 handle_2misc_satacc(s, false, u, is_q, size, rn, rd); 12621 return; 12622 case 0x7: /* SQABS, SQNEG */ 12623 if (size == 3 && !is_q) { 12624 unallocated_encoding(s); 12625 return; 12626 } 12627 break; 12628 case 0xc ... 0xf: 12629 case 0x16 ... 0x1f: 12630 { 12631 /* Floating point: U, size[1] and opcode indicate operation; 12632 * size[0] indicates single or double precision. 12633 */ 12634 int is_double = extract32(size, 0, 1); 12635 opcode |= (extract32(size, 1, 1) << 5) | (u << 6); 12636 size = is_double ? 3 : 2; 12637 switch (opcode) { 12638 case 0x2f: /* FABS */ 12639 case 0x6f: /* FNEG */ 12640 if (size == 3 && !is_q) { 12641 unallocated_encoding(s); 12642 return; 12643 } 12644 break; 12645 case 0x1d: /* SCVTF */ 12646 case 0x5d: /* UCVTF */ 12647 { 12648 bool is_signed = (opcode == 0x1d) ? true : false; 12649 int elements = is_double ? 2 : is_q ? 4 : 2; 12650 if (is_double && !is_q) { 12651 unallocated_encoding(s); 12652 return; 12653 } 12654 if (!fp_access_check(s)) { 12655 return; 12656 } 12657 handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); 12658 return; 12659 } 12660 case 0x2c: /* FCMGT (zero) */ 12661 case 0x2d: /* FCMEQ (zero) */ 12662 case 0x2e: /* FCMLT (zero) */ 12663 case 0x6c: /* FCMGE (zero) */ 12664 case 0x6d: /* FCMLE (zero) */ 12665 if (size == 3 && !is_q) { 12666 unallocated_encoding(s); 12667 return; 12668 } 12669 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); 12670 return; 12671 case 0x7f: /* FSQRT */ 12672 if (size == 3 && !is_q) { 12673 unallocated_encoding(s); 12674 return; 12675 } 12676 break; 12677 case 0x1a: /* FCVTNS */ 12678 case 0x1b: /* FCVTMS */ 12679 case 0x3a: /* FCVTPS */ 12680 case 0x3b: /* FCVTZS */ 12681 case 0x5a: /* FCVTNU */ 12682 case 0x5b: /* FCVTMU */ 12683 case 0x7a: /* FCVTPU */ 12684 case 0x7b: /* FCVTZU */ 12685 need_fpstatus = true; 12686 need_rmode = true; 12687 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12688 if (size == 3 && !is_q) { 12689 unallocated_encoding(s); 12690 return; 12691 } 12692 break; 12693 case 0x5c: /* FCVTAU */ 12694 case 0x1c: /* FCVTAS */ 12695 need_fpstatus = true; 12696 need_rmode = true; 12697 rmode = FPROUNDING_TIEAWAY; 12698 if (size == 3 && !is_q) { 12699 unallocated_encoding(s); 12700 return; 12701 } 12702 break; 12703 case 0x3c: /* URECPE */ 12704 if (size == 3) { 12705 unallocated_encoding(s); 12706 return; 12707 } 12708 /* fall through */ 12709 case 0x3d: /* FRECPE */ 12710 case 0x7d: /* FRSQRTE */ 12711 if (size == 3 && !is_q) { 12712 unallocated_encoding(s); 12713 return; 12714 } 12715 if (!fp_access_check(s)) { 12716 return; 12717 } 12718 handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); 12719 return; 12720 case 0x56: /* FCVTXN, FCVTXN2 */ 12721 if (size == 2) { 12722 unallocated_encoding(s); 12723 return; 12724 } 12725 /* fall through */ 12726 case 0x16: /* FCVTN, FCVTN2 */ 12727 /* handle_2misc_narrow does a 2*size -> size operation, but these 12728 * instructions encode the source size rather than dest size. 12729 */ 12730 if (!fp_access_check(s)) { 12731 return; 12732 } 12733 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12734 return; 12735 case 0x36: /* BFCVTN, BFCVTN2 */ 12736 if (!dc_isar_feature(aa64_bf16, s) || size != 2) { 12737 unallocated_encoding(s); 12738 return; 12739 } 12740 if (!fp_access_check(s)) { 12741 return; 12742 } 12743 handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); 12744 return; 12745 case 0x17: /* FCVTL, FCVTL2 */ 12746 if (!fp_access_check(s)) { 12747 return; 12748 } 12749 handle_2misc_widening(s, opcode, is_q, size, rn, rd); 12750 return; 12751 case 0x18: /* FRINTN */ 12752 case 0x19: /* FRINTM */ 12753 case 0x38: /* FRINTP */ 12754 case 0x39: /* FRINTZ */ 12755 need_rmode = true; 12756 rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); 12757 /* fall through */ 12758 case 0x59: /* FRINTX */ 12759 case 0x79: /* FRINTI */ 12760 need_fpstatus = true; 12761 if (size == 3 && !is_q) { 12762 unallocated_encoding(s); 12763 return; 12764 } 12765 break; 12766 case 0x58: /* FRINTA */ 12767 need_rmode = true; 12768 rmode = FPROUNDING_TIEAWAY; 12769 need_fpstatus = true; 12770 if (size == 3 && !is_q) { 12771 unallocated_encoding(s); 12772 return; 12773 } 12774 break; 12775 case 0x7c: /* URSQRTE */ 12776 if (size == 3) { 12777 unallocated_encoding(s); 12778 return; 12779 } 12780 break; 12781 case 0x1e: /* FRINT32Z */ 12782 case 0x1f: /* FRINT64Z */ 12783 need_rmode = true; 12784 rmode = FPROUNDING_ZERO; 12785 /* fall through */ 12786 case 0x5e: /* FRINT32X */ 12787 case 0x5f: /* FRINT64X */ 12788 need_fpstatus = true; 12789 if ((size == 3 && !is_q) || !dc_isar_feature(aa64_frint, s)) { 12790 unallocated_encoding(s); 12791 return; 12792 } 12793 break; 12794 default: 12795 unallocated_encoding(s); 12796 return; 12797 } 12798 break; 12799 } 12800 default: 12801 unallocated_encoding(s); 12802 return; 12803 } 12804 12805 if (!fp_access_check(s)) { 12806 return; 12807 } 12808 12809 if (need_fpstatus || need_rmode) { 12810 tcg_fpstatus = fpstatus_ptr(FPST_FPCR); 12811 } else { 12812 tcg_fpstatus = NULL; 12813 } 12814 if (need_rmode) { 12815 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 12816 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 12817 } else { 12818 tcg_rmode = NULL; 12819 } 12820 12821 switch (opcode) { 12822 case 0x5: 12823 if (u && size == 0) { /* NOT */ 12824 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0); 12825 return; 12826 } 12827 break; 12828 case 0x8: /* CMGT, CMGE */ 12829 if (u) { 12830 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cge0, size); 12831 } else { 12832 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cgt0, size); 12833 } 12834 return; 12835 case 0x9: /* CMEQ, CMLE */ 12836 if (u) { 12837 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_cle0, size); 12838 } else { 12839 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_ceq0, size); 12840 } 12841 return; 12842 case 0xa: /* CMLT */ 12843 gen_gvec_fn2(s, is_q, rd, rn, gen_gvec_clt0, size); 12844 return; 12845 case 0xb: 12846 if (u) { /* ABS, NEG */ 12847 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size); 12848 } else { 12849 gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size); 12850 } 12851 return; 12852 } 12853 12854 if (size == 3) { 12855 /* All 64-bit element operations can be shared with scalar 2misc */ 12856 int pass; 12857 12858 /* Coverity claims (size == 3 && !is_q) has been eliminated 12859 * from all paths leading to here. 12860 */ 12861 tcg_debug_assert(is_q); 12862 for (pass = 0; pass < 2; pass++) { 12863 TCGv_i64 tcg_op = tcg_temp_new_i64(); 12864 TCGv_i64 tcg_res = tcg_temp_new_i64(); 12865 12866 read_vec_element(s, tcg_op, rn, pass, MO_64); 12867 12868 handle_2misc_64(s, opcode, u, tcg_res, tcg_op, 12869 tcg_rmode, tcg_fpstatus); 12870 12871 write_vec_element(s, tcg_res, rd, pass, MO_64); 12872 12873 tcg_temp_free_i64(tcg_res); 12874 tcg_temp_free_i64(tcg_op); 12875 } 12876 } else { 12877 int pass; 12878 12879 for (pass = 0; pass < (is_q ? 4 : 2); pass++) { 12880 TCGv_i32 tcg_op = tcg_temp_new_i32(); 12881 TCGv_i32 tcg_res = tcg_temp_new_i32(); 12882 12883 read_vec_element_i32(s, tcg_op, rn, pass, MO_32); 12884 12885 if (size == 2) { 12886 /* Special cases for 32 bit elements */ 12887 switch (opcode) { 12888 case 0x4: /* CLS */ 12889 if (u) { 12890 tcg_gen_clzi_i32(tcg_res, tcg_op, 32); 12891 } else { 12892 tcg_gen_clrsb_i32(tcg_res, tcg_op); 12893 } 12894 break; 12895 case 0x7: /* SQABS, SQNEG */ 12896 if (u) { 12897 gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); 12898 } else { 12899 gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); 12900 } 12901 break; 12902 case 0x2f: /* FABS */ 12903 gen_helper_vfp_abss(tcg_res, tcg_op); 12904 break; 12905 case 0x6f: /* FNEG */ 12906 gen_helper_vfp_negs(tcg_res, tcg_op); 12907 break; 12908 case 0x7f: /* FSQRT */ 12909 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); 12910 break; 12911 case 0x1a: /* FCVTNS */ 12912 case 0x1b: /* FCVTMS */ 12913 case 0x1c: /* FCVTAS */ 12914 case 0x3a: /* FCVTPS */ 12915 case 0x3b: /* FCVTZS */ 12916 gen_helper_vfp_tosls(tcg_res, tcg_op, 12917 tcg_constant_i32(0), tcg_fpstatus); 12918 break; 12919 case 0x5a: /* FCVTNU */ 12920 case 0x5b: /* FCVTMU */ 12921 case 0x5c: /* FCVTAU */ 12922 case 0x7a: /* FCVTPU */ 12923 case 0x7b: /* FCVTZU */ 12924 gen_helper_vfp_touls(tcg_res, tcg_op, 12925 tcg_constant_i32(0), tcg_fpstatus); 12926 break; 12927 case 0x18: /* FRINTN */ 12928 case 0x19: /* FRINTM */ 12929 case 0x38: /* FRINTP */ 12930 case 0x39: /* FRINTZ */ 12931 case 0x58: /* FRINTA */ 12932 case 0x79: /* FRINTI */ 12933 gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); 12934 break; 12935 case 0x59: /* FRINTX */ 12936 gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); 12937 break; 12938 case 0x7c: /* URSQRTE */ 12939 gen_helper_rsqrte_u32(tcg_res, tcg_op); 12940 break; 12941 case 0x1e: /* FRINT32Z */ 12942 case 0x5e: /* FRINT32X */ 12943 gen_helper_frint32_s(tcg_res, tcg_op, tcg_fpstatus); 12944 break; 12945 case 0x1f: /* FRINT64Z */ 12946 case 0x5f: /* FRINT64X */ 12947 gen_helper_frint64_s(tcg_res, tcg_op, tcg_fpstatus); 12948 break; 12949 default: 12950 g_assert_not_reached(); 12951 } 12952 } else { 12953 /* Use helpers for 8 and 16 bit elements */ 12954 switch (opcode) { 12955 case 0x5: /* CNT, RBIT */ 12956 /* For these two insns size is part of the opcode specifier 12957 * (handled earlier); they always operate on byte elements. 12958 */ 12959 if (u) { 12960 gen_helper_neon_rbit_u8(tcg_res, tcg_op); 12961 } else { 12962 gen_helper_neon_cnt_u8(tcg_res, tcg_op); 12963 } 12964 break; 12965 case 0x7: /* SQABS, SQNEG */ 12966 { 12967 NeonGenOneOpEnvFn *genfn; 12968 static NeonGenOneOpEnvFn * const fns[2][2] = { 12969 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, 12970 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, 12971 }; 12972 genfn = fns[size][u]; 12973 genfn(tcg_res, cpu_env, tcg_op); 12974 break; 12975 } 12976 case 0x4: /* CLS, CLZ */ 12977 if (u) { 12978 if (size == 0) { 12979 gen_helper_neon_clz_u8(tcg_res, tcg_op); 12980 } else { 12981 gen_helper_neon_clz_u16(tcg_res, tcg_op); 12982 } 12983 } else { 12984 if (size == 0) { 12985 gen_helper_neon_cls_s8(tcg_res, tcg_op); 12986 } else { 12987 gen_helper_neon_cls_s16(tcg_res, tcg_op); 12988 } 12989 } 12990 break; 12991 default: 12992 g_assert_not_reached(); 12993 } 12994 } 12995 12996 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 12997 12998 tcg_temp_free_i32(tcg_res); 12999 tcg_temp_free_i32(tcg_op); 13000 } 13001 } 13002 clear_vec_high(s, is_q, rd); 13003 13004 if (need_rmode) { 13005 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13006 tcg_temp_free_i32(tcg_rmode); 13007 } 13008 if (need_fpstatus) { 13009 tcg_temp_free_ptr(tcg_fpstatus); 13010 } 13011 } 13012 13013 /* AdvSIMD [scalar] two register miscellaneous (FP16) 13014 * 13015 * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0 13016 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 13017 * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd | 13018 * +---+---+---+---+---------+---+-------------+--------+-----+------+------+ 13019 * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00 13020 * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800 13021 * 13022 * This actually covers two groups where scalar access is governed by 13023 * bit 28. A bunch of the instructions (float to integral) only exist 13024 * in the vector form and are un-allocated for the scalar decode. Also 13025 * in the scalar decode Q is always 1. 13026 */ 13027 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) 13028 { 13029 int fpop, opcode, a, u; 13030 int rn, rd; 13031 bool is_q; 13032 bool is_scalar; 13033 bool only_in_vector = false; 13034 13035 int pass; 13036 TCGv_i32 tcg_rmode = NULL; 13037 TCGv_ptr tcg_fpstatus = NULL; 13038 bool need_rmode = false; 13039 bool need_fpst = true; 13040 int rmode; 13041 13042 if (!dc_isar_feature(aa64_fp16, s)) { 13043 unallocated_encoding(s); 13044 return; 13045 } 13046 13047 rd = extract32(insn, 0, 5); 13048 rn = extract32(insn, 5, 5); 13049 13050 a = extract32(insn, 23, 1); 13051 u = extract32(insn, 29, 1); 13052 is_scalar = extract32(insn, 28, 1); 13053 is_q = extract32(insn, 30, 1); 13054 13055 opcode = extract32(insn, 12, 5); 13056 fpop = deposit32(opcode, 5, 1, a); 13057 fpop = deposit32(fpop, 6, 1, u); 13058 13059 switch (fpop) { 13060 case 0x1d: /* SCVTF */ 13061 case 0x5d: /* UCVTF */ 13062 { 13063 int elements; 13064 13065 if (is_scalar) { 13066 elements = 1; 13067 } else { 13068 elements = (is_q ? 8 : 4); 13069 } 13070 13071 if (!fp_access_check(s)) { 13072 return; 13073 } 13074 handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16); 13075 return; 13076 } 13077 break; 13078 case 0x2c: /* FCMGT (zero) */ 13079 case 0x2d: /* FCMEQ (zero) */ 13080 case 0x2e: /* FCMLT (zero) */ 13081 case 0x6c: /* FCMGE (zero) */ 13082 case 0x6d: /* FCMLE (zero) */ 13083 handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd); 13084 return; 13085 case 0x3d: /* FRECPE */ 13086 case 0x3f: /* FRECPX */ 13087 break; 13088 case 0x18: /* FRINTN */ 13089 need_rmode = true; 13090 only_in_vector = true; 13091 rmode = FPROUNDING_TIEEVEN; 13092 break; 13093 case 0x19: /* FRINTM */ 13094 need_rmode = true; 13095 only_in_vector = true; 13096 rmode = FPROUNDING_NEGINF; 13097 break; 13098 case 0x38: /* FRINTP */ 13099 need_rmode = true; 13100 only_in_vector = true; 13101 rmode = FPROUNDING_POSINF; 13102 break; 13103 case 0x39: /* FRINTZ */ 13104 need_rmode = true; 13105 only_in_vector = true; 13106 rmode = FPROUNDING_ZERO; 13107 break; 13108 case 0x58: /* FRINTA */ 13109 need_rmode = true; 13110 only_in_vector = true; 13111 rmode = FPROUNDING_TIEAWAY; 13112 break; 13113 case 0x59: /* FRINTX */ 13114 case 0x79: /* FRINTI */ 13115 only_in_vector = true; 13116 /* current rounding mode */ 13117 break; 13118 case 0x1a: /* FCVTNS */ 13119 need_rmode = true; 13120 rmode = FPROUNDING_TIEEVEN; 13121 break; 13122 case 0x1b: /* FCVTMS */ 13123 need_rmode = true; 13124 rmode = FPROUNDING_NEGINF; 13125 break; 13126 case 0x1c: /* FCVTAS */ 13127 need_rmode = true; 13128 rmode = FPROUNDING_TIEAWAY; 13129 break; 13130 case 0x3a: /* FCVTPS */ 13131 need_rmode = true; 13132 rmode = FPROUNDING_POSINF; 13133 break; 13134 case 0x3b: /* FCVTZS */ 13135 need_rmode = true; 13136 rmode = FPROUNDING_ZERO; 13137 break; 13138 case 0x5a: /* FCVTNU */ 13139 need_rmode = true; 13140 rmode = FPROUNDING_TIEEVEN; 13141 break; 13142 case 0x5b: /* FCVTMU */ 13143 need_rmode = true; 13144 rmode = FPROUNDING_NEGINF; 13145 break; 13146 case 0x5c: /* FCVTAU */ 13147 need_rmode = true; 13148 rmode = FPROUNDING_TIEAWAY; 13149 break; 13150 case 0x7a: /* FCVTPU */ 13151 need_rmode = true; 13152 rmode = FPROUNDING_POSINF; 13153 break; 13154 case 0x7b: /* FCVTZU */ 13155 need_rmode = true; 13156 rmode = FPROUNDING_ZERO; 13157 break; 13158 case 0x2f: /* FABS */ 13159 case 0x6f: /* FNEG */ 13160 need_fpst = false; 13161 break; 13162 case 0x7d: /* FRSQRTE */ 13163 case 0x7f: /* FSQRT (vector) */ 13164 break; 13165 default: 13166 unallocated_encoding(s); 13167 return; 13168 } 13169 13170 13171 /* Check additional constraints for the scalar encoding */ 13172 if (is_scalar) { 13173 if (!is_q) { 13174 unallocated_encoding(s); 13175 return; 13176 } 13177 /* FRINTxx is only in the vector form */ 13178 if (only_in_vector) { 13179 unallocated_encoding(s); 13180 return; 13181 } 13182 } 13183 13184 if (!fp_access_check(s)) { 13185 return; 13186 } 13187 13188 if (need_rmode || need_fpst) { 13189 tcg_fpstatus = fpstatus_ptr(FPST_FPCR_F16); 13190 } 13191 13192 if (need_rmode) { 13193 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); 13194 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13195 } 13196 13197 if (is_scalar) { 13198 TCGv_i32 tcg_op = read_fp_hreg(s, rn); 13199 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13200 13201 switch (fpop) { 13202 case 0x1a: /* FCVTNS */ 13203 case 0x1b: /* FCVTMS */ 13204 case 0x1c: /* FCVTAS */ 13205 case 0x3a: /* FCVTPS */ 13206 case 0x3b: /* FCVTZS */ 13207 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 13208 break; 13209 case 0x3d: /* FRECPE */ 13210 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 13211 break; 13212 case 0x3f: /* FRECPX */ 13213 gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus); 13214 break; 13215 case 0x5a: /* FCVTNU */ 13216 case 0x5b: /* FCVTMU */ 13217 case 0x5c: /* FCVTAU */ 13218 case 0x7a: /* FCVTPU */ 13219 case 0x7b: /* FCVTZU */ 13220 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 13221 break; 13222 case 0x6f: /* FNEG */ 13223 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 13224 break; 13225 case 0x7d: /* FRSQRTE */ 13226 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 13227 break; 13228 default: 13229 g_assert_not_reached(); 13230 } 13231 13232 /* limit any sign extension going on */ 13233 tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff); 13234 write_fp_sreg(s, rd, tcg_res); 13235 13236 tcg_temp_free_i32(tcg_res); 13237 tcg_temp_free_i32(tcg_op); 13238 } else { 13239 for (pass = 0; pass < (is_q ? 8 : 4); pass++) { 13240 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13241 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13242 13243 read_vec_element_i32(s, tcg_op, rn, pass, MO_16); 13244 13245 switch (fpop) { 13246 case 0x1a: /* FCVTNS */ 13247 case 0x1b: /* FCVTMS */ 13248 case 0x1c: /* FCVTAS */ 13249 case 0x3a: /* FCVTPS */ 13250 case 0x3b: /* FCVTZS */ 13251 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus); 13252 break; 13253 case 0x3d: /* FRECPE */ 13254 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus); 13255 break; 13256 case 0x5a: /* FCVTNU */ 13257 case 0x5b: /* FCVTMU */ 13258 case 0x5c: /* FCVTAU */ 13259 case 0x7a: /* FCVTPU */ 13260 case 0x7b: /* FCVTZU */ 13261 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); 13262 break; 13263 case 0x18: /* FRINTN */ 13264 case 0x19: /* FRINTM */ 13265 case 0x38: /* FRINTP */ 13266 case 0x39: /* FRINTZ */ 13267 case 0x58: /* FRINTA */ 13268 case 0x79: /* FRINTI */ 13269 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus); 13270 break; 13271 case 0x59: /* FRINTX */ 13272 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus); 13273 break; 13274 case 0x2f: /* FABS */ 13275 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); 13276 break; 13277 case 0x6f: /* FNEG */ 13278 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); 13279 break; 13280 case 0x7d: /* FRSQRTE */ 13281 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); 13282 break; 13283 case 0x7f: /* FSQRT */ 13284 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus); 13285 break; 13286 default: 13287 g_assert_not_reached(); 13288 } 13289 13290 write_vec_element_i32(s, tcg_res, rd, pass, MO_16); 13291 13292 tcg_temp_free_i32(tcg_res); 13293 tcg_temp_free_i32(tcg_op); 13294 } 13295 13296 clear_vec_high(s, is_q, rd); 13297 } 13298 13299 if (tcg_rmode) { 13300 gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); 13301 tcg_temp_free_i32(tcg_rmode); 13302 } 13303 13304 if (tcg_fpstatus) { 13305 tcg_temp_free_ptr(tcg_fpstatus); 13306 } 13307 } 13308 13309 /* AdvSIMD scalar x indexed element 13310 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 13311 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 13312 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 13313 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+ 13314 * AdvSIMD vector x indexed element 13315 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0 13316 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 13317 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd | 13318 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+ 13319 */ 13320 static void disas_simd_indexed(DisasContext *s, uint32_t insn) 13321 { 13322 /* This encoding has two kinds of instruction: 13323 * normal, where we perform elt x idxelt => elt for each 13324 * element in the vector 13325 * long, where we perform elt x idxelt and generate a result of 13326 * double the width of the input element 13327 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs). 13328 */ 13329 bool is_scalar = extract32(insn, 28, 1); 13330 bool is_q = extract32(insn, 30, 1); 13331 bool u = extract32(insn, 29, 1); 13332 int size = extract32(insn, 22, 2); 13333 int l = extract32(insn, 21, 1); 13334 int m = extract32(insn, 20, 1); 13335 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */ 13336 int rm = extract32(insn, 16, 4); 13337 int opcode = extract32(insn, 12, 4); 13338 int h = extract32(insn, 11, 1); 13339 int rn = extract32(insn, 5, 5); 13340 int rd = extract32(insn, 0, 5); 13341 bool is_long = false; 13342 int is_fp = 0; 13343 bool is_fp16 = false; 13344 int index; 13345 TCGv_ptr fpst; 13346 13347 switch (16 * u + opcode) { 13348 case 0x08: /* MUL */ 13349 case 0x10: /* MLA */ 13350 case 0x14: /* MLS */ 13351 if (is_scalar) { 13352 unallocated_encoding(s); 13353 return; 13354 } 13355 break; 13356 case 0x02: /* SMLAL, SMLAL2 */ 13357 case 0x12: /* UMLAL, UMLAL2 */ 13358 case 0x06: /* SMLSL, SMLSL2 */ 13359 case 0x16: /* UMLSL, UMLSL2 */ 13360 case 0x0a: /* SMULL, SMULL2 */ 13361 case 0x1a: /* UMULL, UMULL2 */ 13362 if (is_scalar) { 13363 unallocated_encoding(s); 13364 return; 13365 } 13366 is_long = true; 13367 break; 13368 case 0x03: /* SQDMLAL, SQDMLAL2 */ 13369 case 0x07: /* SQDMLSL, SQDMLSL2 */ 13370 case 0x0b: /* SQDMULL, SQDMULL2 */ 13371 is_long = true; 13372 break; 13373 case 0x0c: /* SQDMULH */ 13374 case 0x0d: /* SQRDMULH */ 13375 break; 13376 case 0x01: /* FMLA */ 13377 case 0x05: /* FMLS */ 13378 case 0x09: /* FMUL */ 13379 case 0x19: /* FMULX */ 13380 is_fp = 1; 13381 break; 13382 case 0x1d: /* SQRDMLAH */ 13383 case 0x1f: /* SQRDMLSH */ 13384 if (!dc_isar_feature(aa64_rdm, s)) { 13385 unallocated_encoding(s); 13386 return; 13387 } 13388 break; 13389 case 0x0e: /* SDOT */ 13390 case 0x1e: /* UDOT */ 13391 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_dp, s)) { 13392 unallocated_encoding(s); 13393 return; 13394 } 13395 break; 13396 case 0x0f: 13397 switch (size) { 13398 case 0: /* SUDOT */ 13399 case 2: /* USDOT */ 13400 if (is_scalar || !dc_isar_feature(aa64_i8mm, s)) { 13401 unallocated_encoding(s); 13402 return; 13403 } 13404 size = MO_32; 13405 break; 13406 case 1: /* BFDOT */ 13407 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 13408 unallocated_encoding(s); 13409 return; 13410 } 13411 size = MO_32; 13412 break; 13413 case 3: /* BFMLAL{B,T} */ 13414 if (is_scalar || !dc_isar_feature(aa64_bf16, s)) { 13415 unallocated_encoding(s); 13416 return; 13417 } 13418 /* can't set is_fp without other incorrect size checks */ 13419 size = MO_16; 13420 break; 13421 default: 13422 unallocated_encoding(s); 13423 return; 13424 } 13425 break; 13426 case 0x11: /* FCMLA #0 */ 13427 case 0x13: /* FCMLA #90 */ 13428 case 0x15: /* FCMLA #180 */ 13429 case 0x17: /* FCMLA #270 */ 13430 if (is_scalar || !dc_isar_feature(aa64_fcma, s)) { 13431 unallocated_encoding(s); 13432 return; 13433 } 13434 is_fp = 2; 13435 break; 13436 case 0x00: /* FMLAL */ 13437 case 0x04: /* FMLSL */ 13438 case 0x18: /* FMLAL2 */ 13439 case 0x1c: /* FMLSL2 */ 13440 if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { 13441 unallocated_encoding(s); 13442 return; 13443 } 13444 size = MO_16; 13445 /* is_fp, but we pass cpu_env not fp_status. */ 13446 break; 13447 default: 13448 unallocated_encoding(s); 13449 return; 13450 } 13451 13452 switch (is_fp) { 13453 case 1: /* normal fp */ 13454 /* convert insn encoded size to MemOp size */ 13455 switch (size) { 13456 case 0: /* half-precision */ 13457 size = MO_16; 13458 is_fp16 = true; 13459 break; 13460 case MO_32: /* single precision */ 13461 case MO_64: /* double precision */ 13462 break; 13463 default: 13464 unallocated_encoding(s); 13465 return; 13466 } 13467 break; 13468 13469 case 2: /* complex fp */ 13470 /* Each indexable element is a complex pair. */ 13471 size += 1; 13472 switch (size) { 13473 case MO_32: 13474 if (h && !is_q) { 13475 unallocated_encoding(s); 13476 return; 13477 } 13478 is_fp16 = true; 13479 break; 13480 case MO_64: 13481 break; 13482 default: 13483 unallocated_encoding(s); 13484 return; 13485 } 13486 break; 13487 13488 default: /* integer */ 13489 switch (size) { 13490 case MO_8: 13491 case MO_64: 13492 unallocated_encoding(s); 13493 return; 13494 } 13495 break; 13496 } 13497 if (is_fp16 && !dc_isar_feature(aa64_fp16, s)) { 13498 unallocated_encoding(s); 13499 return; 13500 } 13501 13502 /* Given MemOp size, adjust register and indexing. */ 13503 switch (size) { 13504 case MO_16: 13505 index = h << 2 | l << 1 | m; 13506 break; 13507 case MO_32: 13508 index = h << 1 | l; 13509 rm |= m << 4; 13510 break; 13511 case MO_64: 13512 if (l || !is_q) { 13513 unallocated_encoding(s); 13514 return; 13515 } 13516 index = h; 13517 rm |= m << 4; 13518 break; 13519 default: 13520 g_assert_not_reached(); 13521 } 13522 13523 if (!fp_access_check(s)) { 13524 return; 13525 } 13526 13527 if (is_fp) { 13528 fpst = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16 : FPST_FPCR); 13529 } else { 13530 fpst = NULL; 13531 } 13532 13533 switch (16 * u + opcode) { 13534 case 0x0e: /* SDOT */ 13535 case 0x1e: /* UDOT */ 13536 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13537 u ? gen_helper_gvec_udot_idx_b 13538 : gen_helper_gvec_sdot_idx_b); 13539 return; 13540 case 0x0f: 13541 switch (extract32(insn, 22, 2)) { 13542 case 0: /* SUDOT */ 13543 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13544 gen_helper_gvec_sudot_idx_b); 13545 return; 13546 case 1: /* BFDOT */ 13547 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13548 gen_helper_gvec_bfdot_idx); 13549 return; 13550 case 2: /* USDOT */ 13551 gen_gvec_op4_ool(s, is_q, rd, rn, rm, rd, index, 13552 gen_helper_gvec_usdot_idx_b); 13553 return; 13554 case 3: /* BFMLAL{B,T} */ 13555 gen_gvec_op4_fpst(s, 1, rd, rn, rm, rd, 0, (index << 1) | is_q, 13556 gen_helper_gvec_bfmlal_idx); 13557 return; 13558 } 13559 g_assert_not_reached(); 13560 case 0x11: /* FCMLA #0 */ 13561 case 0x13: /* FCMLA #90 */ 13562 case 0x15: /* FCMLA #180 */ 13563 case 0x17: /* FCMLA #270 */ 13564 { 13565 int rot = extract32(insn, 13, 2); 13566 int data = (index << 2) | rot; 13567 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, rd), 13568 vec_full_reg_offset(s, rn), 13569 vec_full_reg_offset(s, rm), 13570 vec_full_reg_offset(s, rd), fpst, 13571 is_q ? 16 : 8, vec_full_reg_size(s), data, 13572 size == MO_64 13573 ? gen_helper_gvec_fcmlas_idx 13574 : gen_helper_gvec_fcmlah_idx); 13575 tcg_temp_free_ptr(fpst); 13576 } 13577 return; 13578 13579 case 0x00: /* FMLAL */ 13580 case 0x04: /* FMLSL */ 13581 case 0x18: /* FMLAL2 */ 13582 case 0x1c: /* FMLSL2 */ 13583 { 13584 int is_s = extract32(opcode, 2, 1); 13585 int is_2 = u; 13586 int data = (index << 2) | (is_2 << 1) | is_s; 13587 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), 13588 vec_full_reg_offset(s, rn), 13589 vec_full_reg_offset(s, rm), cpu_env, 13590 is_q ? 16 : 8, vec_full_reg_size(s), 13591 data, gen_helper_gvec_fmlal_idx_a64); 13592 } 13593 return; 13594 13595 case 0x08: /* MUL */ 13596 if (!is_long && !is_scalar) { 13597 static gen_helper_gvec_3 * const fns[3] = { 13598 gen_helper_gvec_mul_idx_h, 13599 gen_helper_gvec_mul_idx_s, 13600 gen_helper_gvec_mul_idx_d, 13601 }; 13602 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), 13603 vec_full_reg_offset(s, rn), 13604 vec_full_reg_offset(s, rm), 13605 is_q ? 16 : 8, vec_full_reg_size(s), 13606 index, fns[size - 1]); 13607 return; 13608 } 13609 break; 13610 13611 case 0x10: /* MLA */ 13612 if (!is_long && !is_scalar) { 13613 static gen_helper_gvec_4 * const fns[3] = { 13614 gen_helper_gvec_mla_idx_h, 13615 gen_helper_gvec_mla_idx_s, 13616 gen_helper_gvec_mla_idx_d, 13617 }; 13618 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13619 vec_full_reg_offset(s, rn), 13620 vec_full_reg_offset(s, rm), 13621 vec_full_reg_offset(s, rd), 13622 is_q ? 16 : 8, vec_full_reg_size(s), 13623 index, fns[size - 1]); 13624 return; 13625 } 13626 break; 13627 13628 case 0x14: /* MLS */ 13629 if (!is_long && !is_scalar) { 13630 static gen_helper_gvec_4 * const fns[3] = { 13631 gen_helper_gvec_mls_idx_h, 13632 gen_helper_gvec_mls_idx_s, 13633 gen_helper_gvec_mls_idx_d, 13634 }; 13635 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), 13636 vec_full_reg_offset(s, rn), 13637 vec_full_reg_offset(s, rm), 13638 vec_full_reg_offset(s, rd), 13639 is_q ? 16 : 8, vec_full_reg_size(s), 13640 index, fns[size - 1]); 13641 return; 13642 } 13643 break; 13644 } 13645 13646 if (size == 3) { 13647 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13648 int pass; 13649 13650 assert(is_fp && is_q && !is_long); 13651 13652 read_vec_element(s, tcg_idx, rm, index, MO_64); 13653 13654 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13655 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13656 TCGv_i64 tcg_res = tcg_temp_new_i64(); 13657 13658 read_vec_element(s, tcg_op, rn, pass, MO_64); 13659 13660 switch (16 * u + opcode) { 13661 case 0x05: /* FMLS */ 13662 /* As usual for ARM, separate negation for fused multiply-add */ 13663 gen_helper_vfp_negd(tcg_op, tcg_op); 13664 /* fall through */ 13665 case 0x01: /* FMLA */ 13666 read_vec_element(s, tcg_res, rd, pass, MO_64); 13667 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); 13668 break; 13669 case 0x09: /* FMUL */ 13670 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); 13671 break; 13672 case 0x19: /* FMULX */ 13673 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); 13674 break; 13675 default: 13676 g_assert_not_reached(); 13677 } 13678 13679 write_vec_element(s, tcg_res, rd, pass, MO_64); 13680 tcg_temp_free_i64(tcg_op); 13681 tcg_temp_free_i64(tcg_res); 13682 } 13683 13684 tcg_temp_free_i64(tcg_idx); 13685 clear_vec_high(s, !is_scalar, rd); 13686 } else if (!is_long) { 13687 /* 32 bit floating point, or 16 or 32 bit integer. 13688 * For the 16 bit scalar case we use the usual Neon helpers and 13689 * rely on the fact that 0 op 0 == 0 with no side effects. 13690 */ 13691 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13692 int pass, maxpasses; 13693 13694 if (is_scalar) { 13695 maxpasses = 1; 13696 } else { 13697 maxpasses = is_q ? 4 : 2; 13698 } 13699 13700 read_vec_element_i32(s, tcg_idx, rm, index, size); 13701 13702 if (size == 1 && !is_scalar) { 13703 /* The simplest way to handle the 16x16 indexed ops is to duplicate 13704 * the index into both halves of the 32 bit tcg_idx and then use 13705 * the usual Neon helpers. 13706 */ 13707 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13708 } 13709 13710 for (pass = 0; pass < maxpasses; pass++) { 13711 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13712 TCGv_i32 tcg_res = tcg_temp_new_i32(); 13713 13714 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); 13715 13716 switch (16 * u + opcode) { 13717 case 0x08: /* MUL */ 13718 case 0x10: /* MLA */ 13719 case 0x14: /* MLS */ 13720 { 13721 static NeonGenTwoOpFn * const fns[2][2] = { 13722 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 }, 13723 { tcg_gen_add_i32, tcg_gen_sub_i32 }, 13724 }; 13725 NeonGenTwoOpFn *genfn; 13726 bool is_sub = opcode == 0x4; 13727 13728 if (size == 1) { 13729 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx); 13730 } else { 13731 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx); 13732 } 13733 if (opcode == 0x8) { 13734 break; 13735 } 13736 read_vec_element_i32(s, tcg_op, rd, pass, MO_32); 13737 genfn = fns[size - 1][is_sub]; 13738 genfn(tcg_res, tcg_op, tcg_res); 13739 break; 13740 } 13741 case 0x05: /* FMLS */ 13742 case 0x01: /* FMLA */ 13743 read_vec_element_i32(s, tcg_res, rd, pass, 13744 is_scalar ? size : MO_32); 13745 switch (size) { 13746 case 1: 13747 if (opcode == 0x5) { 13748 /* As usual for ARM, separate negation for fused 13749 * multiply-add */ 13750 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); 13751 } 13752 if (is_scalar) { 13753 gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, 13754 tcg_res, fpst); 13755 } else { 13756 gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, 13757 tcg_res, fpst); 13758 } 13759 break; 13760 case 2: 13761 if (opcode == 0x5) { 13762 /* As usual for ARM, separate negation for 13763 * fused multiply-add */ 13764 tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); 13765 } 13766 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, 13767 tcg_res, fpst); 13768 break; 13769 default: 13770 g_assert_not_reached(); 13771 } 13772 break; 13773 case 0x09: /* FMUL */ 13774 switch (size) { 13775 case 1: 13776 if (is_scalar) { 13777 gen_helper_advsimd_mulh(tcg_res, tcg_op, 13778 tcg_idx, fpst); 13779 } else { 13780 gen_helper_advsimd_mul2h(tcg_res, tcg_op, 13781 tcg_idx, fpst); 13782 } 13783 break; 13784 case 2: 13785 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); 13786 break; 13787 default: 13788 g_assert_not_reached(); 13789 } 13790 break; 13791 case 0x19: /* FMULX */ 13792 switch (size) { 13793 case 1: 13794 if (is_scalar) { 13795 gen_helper_advsimd_mulxh(tcg_res, tcg_op, 13796 tcg_idx, fpst); 13797 } else { 13798 gen_helper_advsimd_mulx2h(tcg_res, tcg_op, 13799 tcg_idx, fpst); 13800 } 13801 break; 13802 case 2: 13803 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); 13804 break; 13805 default: 13806 g_assert_not_reached(); 13807 } 13808 break; 13809 case 0x0c: /* SQDMULH */ 13810 if (size == 1) { 13811 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env, 13812 tcg_op, tcg_idx); 13813 } else { 13814 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env, 13815 tcg_op, tcg_idx); 13816 } 13817 break; 13818 case 0x0d: /* SQRDMULH */ 13819 if (size == 1) { 13820 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env, 13821 tcg_op, tcg_idx); 13822 } else { 13823 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env, 13824 tcg_op, tcg_idx); 13825 } 13826 break; 13827 case 0x1d: /* SQRDMLAH */ 13828 read_vec_element_i32(s, tcg_res, rd, pass, 13829 is_scalar ? size : MO_32); 13830 if (size == 1) { 13831 gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env, 13832 tcg_op, tcg_idx, tcg_res); 13833 } else { 13834 gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env, 13835 tcg_op, tcg_idx, tcg_res); 13836 } 13837 break; 13838 case 0x1f: /* SQRDMLSH */ 13839 read_vec_element_i32(s, tcg_res, rd, pass, 13840 is_scalar ? size : MO_32); 13841 if (size == 1) { 13842 gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env, 13843 tcg_op, tcg_idx, tcg_res); 13844 } else { 13845 gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env, 13846 tcg_op, tcg_idx, tcg_res); 13847 } 13848 break; 13849 default: 13850 g_assert_not_reached(); 13851 } 13852 13853 if (is_scalar) { 13854 write_fp_sreg(s, rd, tcg_res); 13855 } else { 13856 write_vec_element_i32(s, tcg_res, rd, pass, MO_32); 13857 } 13858 13859 tcg_temp_free_i32(tcg_op); 13860 tcg_temp_free_i32(tcg_res); 13861 } 13862 13863 tcg_temp_free_i32(tcg_idx); 13864 clear_vec_high(s, is_q, rd); 13865 } else { 13866 /* long ops: 16x16->32 or 32x32->64 */ 13867 TCGv_i64 tcg_res[2]; 13868 int pass; 13869 bool satop = extract32(opcode, 0, 1); 13870 MemOp memop = MO_32; 13871 13872 if (satop || !u) { 13873 memop |= MO_SIGN; 13874 } 13875 13876 if (size == 2) { 13877 TCGv_i64 tcg_idx = tcg_temp_new_i64(); 13878 13879 read_vec_element(s, tcg_idx, rm, index, memop); 13880 13881 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13882 TCGv_i64 tcg_op = tcg_temp_new_i64(); 13883 TCGv_i64 tcg_passres; 13884 int passelt; 13885 13886 if (is_scalar) { 13887 passelt = 0; 13888 } else { 13889 passelt = pass + (is_q * 2); 13890 } 13891 13892 read_vec_element(s, tcg_op, rn, passelt, memop); 13893 13894 tcg_res[pass] = tcg_temp_new_i64(); 13895 13896 if (opcode == 0xa || opcode == 0xb) { 13897 /* Non-accumulating ops */ 13898 tcg_passres = tcg_res[pass]; 13899 } else { 13900 tcg_passres = tcg_temp_new_i64(); 13901 } 13902 13903 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); 13904 tcg_temp_free_i64(tcg_op); 13905 13906 if (satop) { 13907 /* saturating, doubling */ 13908 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, 13909 tcg_passres, tcg_passres); 13910 } 13911 13912 if (opcode == 0xa || opcode == 0xb) { 13913 continue; 13914 } 13915 13916 /* Accumulating op: handle accumulate step */ 13917 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13918 13919 switch (opcode) { 13920 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13921 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13922 break; 13923 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 13924 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); 13925 break; 13926 case 0x7: /* SQDMLSL, SQDMLSL2 */ 13927 tcg_gen_neg_i64(tcg_passres, tcg_passres); 13928 /* fall through */ 13929 case 0x3: /* SQDMLAL, SQDMLAL2 */ 13930 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, 13931 tcg_res[pass], 13932 tcg_passres); 13933 break; 13934 default: 13935 g_assert_not_reached(); 13936 } 13937 tcg_temp_free_i64(tcg_passres); 13938 } 13939 tcg_temp_free_i64(tcg_idx); 13940 13941 clear_vec_high(s, !is_scalar, rd); 13942 } else { 13943 TCGv_i32 tcg_idx = tcg_temp_new_i32(); 13944 13945 assert(size == 1); 13946 read_vec_element_i32(s, tcg_idx, rm, index, size); 13947 13948 if (!is_scalar) { 13949 /* The simplest way to handle the 16x16 indexed ops is to 13950 * duplicate the index into both halves of the 32 bit tcg_idx 13951 * and then use the usual Neon helpers. 13952 */ 13953 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); 13954 } 13955 13956 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { 13957 TCGv_i32 tcg_op = tcg_temp_new_i32(); 13958 TCGv_i64 tcg_passres; 13959 13960 if (is_scalar) { 13961 read_vec_element_i32(s, tcg_op, rn, pass, size); 13962 } else { 13963 read_vec_element_i32(s, tcg_op, rn, 13964 pass + (is_q * 2), MO_32); 13965 } 13966 13967 tcg_res[pass] = tcg_temp_new_i64(); 13968 13969 if (opcode == 0xa || opcode == 0xb) { 13970 /* Non-accumulating ops */ 13971 tcg_passres = tcg_res[pass]; 13972 } else { 13973 tcg_passres = tcg_temp_new_i64(); 13974 } 13975 13976 if (memop & MO_SIGN) { 13977 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); 13978 } else { 13979 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); 13980 } 13981 if (satop) { 13982 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, 13983 tcg_passres, tcg_passres); 13984 } 13985 tcg_temp_free_i32(tcg_op); 13986 13987 if (opcode == 0xa || opcode == 0xb) { 13988 continue; 13989 } 13990 13991 /* Accumulating op: handle accumulate step */ 13992 read_vec_element(s, tcg_res[pass], rd, pass, MO_64); 13993 13994 switch (opcode) { 13995 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ 13996 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], 13997 tcg_passres); 13998 break; 13999 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ 14000 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], 14001 tcg_passres); 14002 break; 14003 case 0x7: /* SQDMLSL, SQDMLSL2 */ 14004 gen_helper_neon_negl_u32(tcg_passres, tcg_passres); 14005 /* fall through */ 14006 case 0x3: /* SQDMLAL, SQDMLAL2 */ 14007 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, 14008 tcg_res[pass], 14009 tcg_passres); 14010 break; 14011 default: 14012 g_assert_not_reached(); 14013 } 14014 tcg_temp_free_i64(tcg_passres); 14015 } 14016 tcg_temp_free_i32(tcg_idx); 14017 14018 if (is_scalar) { 14019 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]); 14020 } 14021 } 14022 14023 if (is_scalar) { 14024 tcg_res[1] = tcg_constant_i64(0); 14025 } 14026 14027 for (pass = 0; pass < 2; pass++) { 14028 write_vec_element(s, tcg_res[pass], rd, pass, MO_64); 14029 tcg_temp_free_i64(tcg_res[pass]); 14030 } 14031 } 14032 14033 if (fpst) { 14034 tcg_temp_free_ptr(fpst); 14035 } 14036 } 14037 14038 /* Crypto AES 14039 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 14040 * +-----------------+------+-----------+--------+-----+------+------+ 14041 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 14042 * +-----------------+------+-----------+--------+-----+------+------+ 14043 */ 14044 static void disas_crypto_aes(DisasContext *s, uint32_t insn) 14045 { 14046 int size = extract32(insn, 22, 2); 14047 int opcode = extract32(insn, 12, 5); 14048 int rn = extract32(insn, 5, 5); 14049 int rd = extract32(insn, 0, 5); 14050 int decrypt; 14051 gen_helper_gvec_2 *genfn2 = NULL; 14052 gen_helper_gvec_3 *genfn3 = NULL; 14053 14054 if (!dc_isar_feature(aa64_aes, s) || size != 0) { 14055 unallocated_encoding(s); 14056 return; 14057 } 14058 14059 switch (opcode) { 14060 case 0x4: /* AESE */ 14061 decrypt = 0; 14062 genfn3 = gen_helper_crypto_aese; 14063 break; 14064 case 0x6: /* AESMC */ 14065 decrypt = 0; 14066 genfn2 = gen_helper_crypto_aesmc; 14067 break; 14068 case 0x5: /* AESD */ 14069 decrypt = 1; 14070 genfn3 = gen_helper_crypto_aese; 14071 break; 14072 case 0x7: /* AESIMC */ 14073 decrypt = 1; 14074 genfn2 = gen_helper_crypto_aesmc; 14075 break; 14076 default: 14077 unallocated_encoding(s); 14078 return; 14079 } 14080 14081 if (!fp_access_check(s)) { 14082 return; 14083 } 14084 if (genfn2) { 14085 gen_gvec_op2_ool(s, true, rd, rn, decrypt, genfn2); 14086 } else { 14087 gen_gvec_op3_ool(s, true, rd, rd, rn, decrypt, genfn3); 14088 } 14089 } 14090 14091 /* Crypto three-reg SHA 14092 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 14093 * +-----------------+------+---+------+---+--------+-----+------+------+ 14094 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | 14095 * +-----------------+------+---+------+---+--------+-----+------+------+ 14096 */ 14097 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) 14098 { 14099 int size = extract32(insn, 22, 2); 14100 int opcode = extract32(insn, 12, 3); 14101 int rm = extract32(insn, 16, 5); 14102 int rn = extract32(insn, 5, 5); 14103 int rd = extract32(insn, 0, 5); 14104 gen_helper_gvec_3 *genfn; 14105 bool feature; 14106 14107 if (size != 0) { 14108 unallocated_encoding(s); 14109 return; 14110 } 14111 14112 switch (opcode) { 14113 case 0: /* SHA1C */ 14114 genfn = gen_helper_crypto_sha1c; 14115 feature = dc_isar_feature(aa64_sha1, s); 14116 break; 14117 case 1: /* SHA1P */ 14118 genfn = gen_helper_crypto_sha1p; 14119 feature = dc_isar_feature(aa64_sha1, s); 14120 break; 14121 case 2: /* SHA1M */ 14122 genfn = gen_helper_crypto_sha1m; 14123 feature = dc_isar_feature(aa64_sha1, s); 14124 break; 14125 case 3: /* SHA1SU0 */ 14126 genfn = gen_helper_crypto_sha1su0; 14127 feature = dc_isar_feature(aa64_sha1, s); 14128 break; 14129 case 4: /* SHA256H */ 14130 genfn = gen_helper_crypto_sha256h; 14131 feature = dc_isar_feature(aa64_sha256, s); 14132 break; 14133 case 5: /* SHA256H2 */ 14134 genfn = gen_helper_crypto_sha256h2; 14135 feature = dc_isar_feature(aa64_sha256, s); 14136 break; 14137 case 6: /* SHA256SU1 */ 14138 genfn = gen_helper_crypto_sha256su1; 14139 feature = dc_isar_feature(aa64_sha256, s); 14140 break; 14141 default: 14142 unallocated_encoding(s); 14143 return; 14144 } 14145 14146 if (!feature) { 14147 unallocated_encoding(s); 14148 return; 14149 } 14150 14151 if (!fp_access_check(s)) { 14152 return; 14153 } 14154 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); 14155 } 14156 14157 /* Crypto two-reg SHA 14158 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 14159 * +-----------------+------+-----------+--------+-----+------+------+ 14160 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | 14161 * +-----------------+------+-----------+--------+-----+------+------+ 14162 */ 14163 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) 14164 { 14165 int size = extract32(insn, 22, 2); 14166 int opcode = extract32(insn, 12, 5); 14167 int rn = extract32(insn, 5, 5); 14168 int rd = extract32(insn, 0, 5); 14169 gen_helper_gvec_2 *genfn; 14170 bool feature; 14171 14172 if (size != 0) { 14173 unallocated_encoding(s); 14174 return; 14175 } 14176 14177 switch (opcode) { 14178 case 0: /* SHA1H */ 14179 feature = dc_isar_feature(aa64_sha1, s); 14180 genfn = gen_helper_crypto_sha1h; 14181 break; 14182 case 1: /* SHA1SU1 */ 14183 feature = dc_isar_feature(aa64_sha1, s); 14184 genfn = gen_helper_crypto_sha1su1; 14185 break; 14186 case 2: /* SHA256SU0 */ 14187 feature = dc_isar_feature(aa64_sha256, s); 14188 genfn = gen_helper_crypto_sha256su0; 14189 break; 14190 default: 14191 unallocated_encoding(s); 14192 return; 14193 } 14194 14195 if (!feature) { 14196 unallocated_encoding(s); 14197 return; 14198 } 14199 14200 if (!fp_access_check(s)) { 14201 return; 14202 } 14203 gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); 14204 } 14205 14206 static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) 14207 { 14208 tcg_gen_rotli_i64(d, m, 1); 14209 tcg_gen_xor_i64(d, d, n); 14210 } 14211 14212 static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) 14213 { 14214 tcg_gen_rotli_vec(vece, d, m, 1); 14215 tcg_gen_xor_vec(vece, d, d, n); 14216 } 14217 14218 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, 14219 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) 14220 { 14221 static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; 14222 static const GVecGen3 op = { 14223 .fni8 = gen_rax1_i64, 14224 .fniv = gen_rax1_vec, 14225 .opt_opc = vecop_list, 14226 .fno = gen_helper_crypto_rax1, 14227 .vece = MO_64, 14228 }; 14229 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); 14230 } 14231 14232 /* Crypto three-reg SHA512 14233 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14234 * +-----------------------+------+---+---+-----+--------+------+------+ 14235 * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | 14236 * +-----------------------+------+---+---+-----+--------+------+------+ 14237 */ 14238 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) 14239 { 14240 int opcode = extract32(insn, 10, 2); 14241 int o = extract32(insn, 14, 1); 14242 int rm = extract32(insn, 16, 5); 14243 int rn = extract32(insn, 5, 5); 14244 int rd = extract32(insn, 0, 5); 14245 bool feature; 14246 gen_helper_gvec_3 *oolfn = NULL; 14247 GVecGen3Fn *gvecfn = NULL; 14248 14249 if (o == 0) { 14250 switch (opcode) { 14251 case 0: /* SHA512H */ 14252 feature = dc_isar_feature(aa64_sha512, s); 14253 oolfn = gen_helper_crypto_sha512h; 14254 break; 14255 case 1: /* SHA512H2 */ 14256 feature = dc_isar_feature(aa64_sha512, s); 14257 oolfn = gen_helper_crypto_sha512h2; 14258 break; 14259 case 2: /* SHA512SU1 */ 14260 feature = dc_isar_feature(aa64_sha512, s); 14261 oolfn = gen_helper_crypto_sha512su1; 14262 break; 14263 case 3: /* RAX1 */ 14264 feature = dc_isar_feature(aa64_sha3, s); 14265 gvecfn = gen_gvec_rax1; 14266 break; 14267 default: 14268 g_assert_not_reached(); 14269 } 14270 } else { 14271 switch (opcode) { 14272 case 0: /* SM3PARTW1 */ 14273 feature = dc_isar_feature(aa64_sm3, s); 14274 oolfn = gen_helper_crypto_sm3partw1; 14275 break; 14276 case 1: /* SM3PARTW2 */ 14277 feature = dc_isar_feature(aa64_sm3, s); 14278 oolfn = gen_helper_crypto_sm3partw2; 14279 break; 14280 case 2: /* SM4EKEY */ 14281 feature = dc_isar_feature(aa64_sm4, s); 14282 oolfn = gen_helper_crypto_sm4ekey; 14283 break; 14284 default: 14285 unallocated_encoding(s); 14286 return; 14287 } 14288 } 14289 14290 if (!feature) { 14291 unallocated_encoding(s); 14292 return; 14293 } 14294 14295 if (!fp_access_check(s)) { 14296 return; 14297 } 14298 14299 if (oolfn) { 14300 gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); 14301 } else { 14302 gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); 14303 } 14304 } 14305 14306 /* Crypto two-reg SHA512 14307 * 31 12 11 10 9 5 4 0 14308 * +-----------------------------------------+--------+------+------+ 14309 * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | 14310 * +-----------------------------------------+--------+------+------+ 14311 */ 14312 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) 14313 { 14314 int opcode = extract32(insn, 10, 2); 14315 int rn = extract32(insn, 5, 5); 14316 int rd = extract32(insn, 0, 5); 14317 bool feature; 14318 14319 switch (opcode) { 14320 case 0: /* SHA512SU0 */ 14321 feature = dc_isar_feature(aa64_sha512, s); 14322 break; 14323 case 1: /* SM4E */ 14324 feature = dc_isar_feature(aa64_sm4, s); 14325 break; 14326 default: 14327 unallocated_encoding(s); 14328 return; 14329 } 14330 14331 if (!feature) { 14332 unallocated_encoding(s); 14333 return; 14334 } 14335 14336 if (!fp_access_check(s)) { 14337 return; 14338 } 14339 14340 switch (opcode) { 14341 case 0: /* SHA512SU0 */ 14342 gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); 14343 break; 14344 case 1: /* SM4E */ 14345 gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); 14346 break; 14347 default: 14348 g_assert_not_reached(); 14349 } 14350 } 14351 14352 /* Crypto four-register 14353 * 31 23 22 21 20 16 15 14 10 9 5 4 0 14354 * +-------------------+-----+------+---+------+------+------+ 14355 * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | 14356 * +-------------------+-----+------+---+------+------+------+ 14357 */ 14358 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) 14359 { 14360 int op0 = extract32(insn, 21, 2); 14361 int rm = extract32(insn, 16, 5); 14362 int ra = extract32(insn, 10, 5); 14363 int rn = extract32(insn, 5, 5); 14364 int rd = extract32(insn, 0, 5); 14365 bool feature; 14366 14367 switch (op0) { 14368 case 0: /* EOR3 */ 14369 case 1: /* BCAX */ 14370 feature = dc_isar_feature(aa64_sha3, s); 14371 break; 14372 case 2: /* SM3SS1 */ 14373 feature = dc_isar_feature(aa64_sm3, s); 14374 break; 14375 default: 14376 unallocated_encoding(s); 14377 return; 14378 } 14379 14380 if (!feature) { 14381 unallocated_encoding(s); 14382 return; 14383 } 14384 14385 if (!fp_access_check(s)) { 14386 return; 14387 } 14388 14389 if (op0 < 2) { 14390 TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; 14391 int pass; 14392 14393 tcg_op1 = tcg_temp_new_i64(); 14394 tcg_op2 = tcg_temp_new_i64(); 14395 tcg_op3 = tcg_temp_new_i64(); 14396 tcg_res[0] = tcg_temp_new_i64(); 14397 tcg_res[1] = tcg_temp_new_i64(); 14398 14399 for (pass = 0; pass < 2; pass++) { 14400 read_vec_element(s, tcg_op1, rn, pass, MO_64); 14401 read_vec_element(s, tcg_op2, rm, pass, MO_64); 14402 read_vec_element(s, tcg_op3, ra, pass, MO_64); 14403 14404 if (op0 == 0) { 14405 /* EOR3 */ 14406 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); 14407 } else { 14408 /* BCAX */ 14409 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); 14410 } 14411 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); 14412 } 14413 write_vec_element(s, tcg_res[0], rd, 0, MO_64); 14414 write_vec_element(s, tcg_res[1], rd, 1, MO_64); 14415 14416 tcg_temp_free_i64(tcg_op1); 14417 tcg_temp_free_i64(tcg_op2); 14418 tcg_temp_free_i64(tcg_op3); 14419 tcg_temp_free_i64(tcg_res[0]); 14420 tcg_temp_free_i64(tcg_res[1]); 14421 } else { 14422 TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; 14423 14424 tcg_op1 = tcg_temp_new_i32(); 14425 tcg_op2 = tcg_temp_new_i32(); 14426 tcg_op3 = tcg_temp_new_i32(); 14427 tcg_res = tcg_temp_new_i32(); 14428 tcg_zero = tcg_constant_i32(0); 14429 14430 read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); 14431 read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); 14432 read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); 14433 14434 tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); 14435 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); 14436 tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); 14437 tcg_gen_rotri_i32(tcg_res, tcg_res, 25); 14438 14439 write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); 14440 write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); 14441 write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); 14442 write_vec_element_i32(s, tcg_res, rd, 3, MO_32); 14443 14444 tcg_temp_free_i32(tcg_op1); 14445 tcg_temp_free_i32(tcg_op2); 14446 tcg_temp_free_i32(tcg_op3); 14447 tcg_temp_free_i32(tcg_res); 14448 } 14449 } 14450 14451 /* Crypto XAR 14452 * 31 21 20 16 15 10 9 5 4 0 14453 * +-----------------------+------+--------+------+------+ 14454 * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | 14455 * +-----------------------+------+--------+------+------+ 14456 */ 14457 static void disas_crypto_xar(DisasContext *s, uint32_t insn) 14458 { 14459 int rm = extract32(insn, 16, 5); 14460 int imm6 = extract32(insn, 10, 6); 14461 int rn = extract32(insn, 5, 5); 14462 int rd = extract32(insn, 0, 5); 14463 14464 if (!dc_isar_feature(aa64_sha3, s)) { 14465 unallocated_encoding(s); 14466 return; 14467 } 14468 14469 if (!fp_access_check(s)) { 14470 return; 14471 } 14472 14473 gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), 14474 vec_full_reg_offset(s, rn), 14475 vec_full_reg_offset(s, rm), imm6, 16, 14476 vec_full_reg_size(s)); 14477 } 14478 14479 /* Crypto three-reg imm2 14480 * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 14481 * +-----------------------+------+-----+------+--------+------+------+ 14482 * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | 14483 * +-----------------------+------+-----+------+--------+------+------+ 14484 */ 14485 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) 14486 { 14487 static gen_helper_gvec_3 * const fns[4] = { 14488 gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, 14489 gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, 14490 }; 14491 int opcode = extract32(insn, 10, 2); 14492 int imm2 = extract32(insn, 12, 2); 14493 int rm = extract32(insn, 16, 5); 14494 int rn = extract32(insn, 5, 5); 14495 int rd = extract32(insn, 0, 5); 14496 14497 if (!dc_isar_feature(aa64_sm3, s)) { 14498 unallocated_encoding(s); 14499 return; 14500 } 14501 14502 if (!fp_access_check(s)) { 14503 return; 14504 } 14505 14506 gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); 14507 } 14508 14509 /* C3.6 Data processing - SIMD, inc Crypto 14510 * 14511 * As the decode gets a little complex we are using a table based 14512 * approach for this part of the decode. 14513 */ 14514 static const AArch64DecodeTable data_proc_simd[] = { 14515 /* pattern , mask , fn */ 14516 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, 14517 { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, 14518 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, 14519 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, 14520 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, 14521 { 0x0e000400, 0x9fe08400, disas_simd_copy }, 14522 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ 14523 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ 14524 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, 14525 { 0x0f000400, 0x9f800400, disas_simd_shift_imm }, 14526 { 0x0e000000, 0xbf208c00, disas_simd_tb }, 14527 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, 14528 { 0x2e000000, 0xbf208400, disas_simd_ext }, 14529 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, 14530 { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, 14531 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, 14532 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, 14533 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, 14534 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, 14535 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ 14536 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, 14537 { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, 14538 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, 14539 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, 14540 { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, 14541 { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, 14542 { 0xce000000, 0xff808000, disas_crypto_four_reg }, 14543 { 0xce800000, 0xffe00000, disas_crypto_xar }, 14544 { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, 14545 { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, 14546 { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, 14547 { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, 14548 { 0x00000000, 0x00000000, NULL } 14549 }; 14550 14551 static void disas_data_proc_simd(DisasContext *s, uint32_t insn) 14552 { 14553 /* Note that this is called with all non-FP cases from 14554 * table C3-6 so it must UNDEF for entries not specifically 14555 * allocated to instructions in that table. 14556 */ 14557 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn); 14558 if (fn) { 14559 fn(s, insn); 14560 } else { 14561 unallocated_encoding(s); 14562 } 14563 } 14564 14565 /* C3.6 Data processing - SIMD and floating point */ 14566 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn) 14567 { 14568 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) { 14569 disas_data_proc_fp(s, insn); 14570 } else { 14571 /* SIMD, including crypto */ 14572 disas_data_proc_simd(s, insn); 14573 } 14574 } 14575 14576 /* 14577 * Include the generated SME FA64 decoder. 14578 */ 14579 14580 #include "decode-sme-fa64.c.inc" 14581 14582 static bool trans_OK(DisasContext *s, arg_OK *a) 14583 { 14584 return true; 14585 } 14586 14587 static bool trans_FAIL(DisasContext *s, arg_OK *a) 14588 { 14589 s->is_nonstreaming = true; 14590 return true; 14591 } 14592 14593 /** 14594 * is_guarded_page: 14595 * @env: The cpu environment 14596 * @s: The DisasContext 14597 * 14598 * Return true if the page is guarded. 14599 */ 14600 static bool is_guarded_page(CPUARMState *env, DisasContext *s) 14601 { 14602 uint64_t addr = s->base.pc_first; 14603 #ifdef CONFIG_USER_ONLY 14604 return page_get_flags(addr) & PAGE_BTI; 14605 #else 14606 CPUTLBEntryFull *full; 14607 void *host; 14608 int mmu_idx = arm_to_core_mmu_idx(s->mmu_idx); 14609 int flags; 14610 14611 /* 14612 * We test this immediately after reading an insn, which means 14613 * that the TLB entry must be present and valid, and thus this 14614 * access will never raise an exception. 14615 */ 14616 flags = probe_access_full(env, addr, 0, MMU_INST_FETCH, mmu_idx, 14617 false, &host, &full, 0); 14618 assert(!(flags & TLB_INVALID_MASK)); 14619 14620 return full->guarded; 14621 #endif 14622 } 14623 14624 /** 14625 * btype_destination_ok: 14626 * @insn: The instruction at the branch destination 14627 * @bt: SCTLR_ELx.BT 14628 * @btype: PSTATE.BTYPE, and is non-zero 14629 * 14630 * On a guarded page, there are a limited number of insns 14631 * that may be present at the branch target: 14632 * - branch target identifiers, 14633 * - paciasp, pacibsp, 14634 * - BRK insn 14635 * - HLT insn 14636 * Anything else causes a Branch Target Exception. 14637 * 14638 * Return true if the branch is compatible, false to raise BTITRAP. 14639 */ 14640 static bool btype_destination_ok(uint32_t insn, bool bt, int btype) 14641 { 14642 if ((insn & 0xfffff01fu) == 0xd503201fu) { 14643 /* HINT space */ 14644 switch (extract32(insn, 5, 7)) { 14645 case 0b011001: /* PACIASP */ 14646 case 0b011011: /* PACIBSP */ 14647 /* 14648 * If SCTLR_ELx.BT, then PACI*SP are not compatible 14649 * with btype == 3. Otherwise all btype are ok. 14650 */ 14651 return !bt || btype != 3; 14652 case 0b100000: /* BTI */ 14653 /* Not compatible with any btype. */ 14654 return false; 14655 case 0b100010: /* BTI c */ 14656 /* Not compatible with btype == 3 */ 14657 return btype != 3; 14658 case 0b100100: /* BTI j */ 14659 /* Not compatible with btype == 2 */ 14660 return btype != 2; 14661 case 0b100110: /* BTI jc */ 14662 /* Compatible with any btype. */ 14663 return true; 14664 } 14665 } else { 14666 switch (insn & 0xffe0001fu) { 14667 case 0xd4200000u: /* BRK */ 14668 case 0xd4400000u: /* HLT */ 14669 /* Give priority to the breakpoint exception. */ 14670 return true; 14671 } 14672 } 14673 return false; 14674 } 14675 14676 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, 14677 CPUState *cpu) 14678 { 14679 DisasContext *dc = container_of(dcbase, DisasContext, base); 14680 CPUARMState *env = cpu->env_ptr; 14681 ARMCPU *arm_cpu = env_archcpu(env); 14682 CPUARMTBFlags tb_flags = arm_tbflags_from_tb(dc->base.tb); 14683 int bound, core_mmu_idx; 14684 14685 dc->isar = &arm_cpu->isar; 14686 dc->condjmp = 0; 14687 dc->pc_save = dc->base.pc_first; 14688 dc->aarch64 = true; 14689 dc->thumb = false; 14690 dc->sctlr_b = 0; 14691 dc->be_data = EX_TBFLAG_ANY(tb_flags, BE_DATA) ? MO_BE : MO_LE; 14692 dc->condexec_mask = 0; 14693 dc->condexec_cond = 0; 14694 core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); 14695 dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); 14696 dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); 14697 dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); 14698 dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); 14699 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); 14700 #if !defined(CONFIG_USER_ONLY) 14701 dc->user = (dc->current_el == 0); 14702 #endif 14703 dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); 14704 dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); 14705 dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); 14706 dc->fgt_active = EX_TBFLAG_ANY(tb_flags, FGT_ACTIVE); 14707 dc->fgt_svc = EX_TBFLAG_ANY(tb_flags, FGT_SVC); 14708 dc->fgt_eret = EX_TBFLAG_A64(tb_flags, FGT_ERET); 14709 dc->sve_excp_el = EX_TBFLAG_A64(tb_flags, SVEEXC_EL); 14710 dc->sme_excp_el = EX_TBFLAG_A64(tb_flags, SMEEXC_EL); 14711 dc->vl = (EX_TBFLAG_A64(tb_flags, VL) + 1) * 16; 14712 dc->svl = (EX_TBFLAG_A64(tb_flags, SVL) + 1) * 16; 14713 dc->pauth_active = EX_TBFLAG_A64(tb_flags, PAUTH_ACTIVE); 14714 dc->bt = EX_TBFLAG_A64(tb_flags, BT); 14715 dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); 14716 dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); 14717 dc->ata = EX_TBFLAG_A64(tb_flags, ATA); 14718 dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); 14719 dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); 14720 dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); 14721 dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA); 14722 dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING); 14723 dc->vec_len = 0; 14724 dc->vec_stride = 0; 14725 dc->cp_regs = arm_cpu->cp_regs; 14726 dc->features = env->features; 14727 dc->dcz_blocksize = arm_cpu->dcz_blocksize; 14728 14729 #ifdef CONFIG_USER_ONLY 14730 /* In sve_probe_page, we assume TBI is enabled. */ 14731 tcg_debug_assert(dc->tbid & 1); 14732 #endif 14733 14734 /* Single step state. The code-generation logic here is: 14735 * SS_ACTIVE == 0: 14736 * generate code with no special handling for single-stepping (except 14737 * that anything that can make us go to SS_ACTIVE == 1 must end the TB; 14738 * this happens anyway because those changes are all system register or 14739 * PSTATE writes). 14740 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending) 14741 * emit code for one insn 14742 * emit code to clear PSTATE.SS 14743 * emit code to generate software step exception for completed step 14744 * end TB (as usual for having generated an exception) 14745 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending) 14746 * emit code to generate a software step exception 14747 * end the TB 14748 */ 14749 dc->ss_active = EX_TBFLAG_ANY(tb_flags, SS_ACTIVE); 14750 dc->pstate_ss = EX_TBFLAG_ANY(tb_flags, PSTATE__SS); 14751 dc->is_ldex = false; 14752 14753 /* Bound the number of insns to execute to those left on the page. */ 14754 bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4; 14755 14756 /* If architectural single step active, limit to 1. */ 14757 if (dc->ss_active) { 14758 bound = 1; 14759 } 14760 dc->base.max_insns = MIN(dc->base.max_insns, bound); 14761 } 14762 14763 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu) 14764 { 14765 } 14766 14767 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) 14768 { 14769 DisasContext *dc = container_of(dcbase, DisasContext, base); 14770 target_ulong pc_arg = dc->base.pc_next; 14771 14772 if (tb_cflags(dcbase->tb) & CF_PCREL) { 14773 pc_arg &= ~TARGET_PAGE_MASK; 14774 } 14775 tcg_gen_insn_start(pc_arg, 0, 0); 14776 dc->insn_start = tcg_last_op(); 14777 } 14778 14779 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) 14780 { 14781 DisasContext *s = container_of(dcbase, DisasContext, base); 14782 CPUARMState *env = cpu->env_ptr; 14783 uint64_t pc = s->base.pc_next; 14784 uint32_t insn; 14785 14786 /* Singlestep exceptions have the highest priority. */ 14787 if (s->ss_active && !s->pstate_ss) { 14788 /* Singlestep state is Active-pending. 14789 * If we're in this state at the start of a TB then either 14790 * a) we just took an exception to an EL which is being debugged 14791 * and this is the first insn in the exception handler 14792 * b) debug exceptions were masked and we just unmasked them 14793 * without changing EL (eg by clearing PSTATE.D) 14794 * In either case we're going to take a swstep exception in the 14795 * "did not step an insn" case, and so the syndrome ISV and EX 14796 * bits should be zero. 14797 */ 14798 assert(s->base.num_insns == 1); 14799 gen_swstep_exception(s, 0, 0); 14800 s->base.is_jmp = DISAS_NORETURN; 14801 s->base.pc_next = pc + 4; 14802 return; 14803 } 14804 14805 if (pc & 3) { 14806 /* 14807 * PC alignment fault. This has priority over the instruction abort 14808 * that we would receive from a translation fault via arm_ldl_code. 14809 * This should only be possible after an indirect branch, at the 14810 * start of the TB. 14811 */ 14812 assert(s->base.num_insns == 1); 14813 gen_helper_exception_pc_alignment(cpu_env, tcg_constant_tl(pc)); 14814 s->base.is_jmp = DISAS_NORETURN; 14815 s->base.pc_next = QEMU_ALIGN_UP(pc, 4); 14816 return; 14817 } 14818 14819 s->pc_curr = pc; 14820 insn = arm_ldl_code(env, &s->base, pc, s->sctlr_b); 14821 s->insn = insn; 14822 s->base.pc_next = pc + 4; 14823 14824 s->fp_access_checked = false; 14825 s->sve_access_checked = false; 14826 14827 if (s->pstate_il) { 14828 /* 14829 * Illegal execution state. This has priority over BTI 14830 * exceptions, but comes after instruction abort exceptions. 14831 */ 14832 gen_exception_insn(s, 0, EXCP_UDEF, syn_illegalstate()); 14833 return; 14834 } 14835 14836 if (dc_isar_feature(aa64_bti, s)) { 14837 if (s->base.num_insns == 1) { 14838 /* 14839 * At the first insn of the TB, compute s->guarded_page. 14840 * We delayed computing this until successfully reading 14841 * the first insn of the TB, above. This (mostly) ensures 14842 * that the softmmu tlb entry has been populated, and the 14843 * page table GP bit is available. 14844 * 14845 * Note that we need to compute this even if btype == 0, 14846 * because this value is used for BR instructions later 14847 * where ENV is not available. 14848 */ 14849 s->guarded_page = is_guarded_page(env, s); 14850 14851 /* First insn can have btype set to non-zero. */ 14852 tcg_debug_assert(s->btype >= 0); 14853 14854 /* 14855 * Note that the Branch Target Exception has fairly high 14856 * priority -- below debugging exceptions but above most 14857 * everything else. This allows us to handle this now 14858 * instead of waiting until the insn is otherwise decoded. 14859 */ 14860 if (s->btype != 0 14861 && s->guarded_page 14862 && !btype_destination_ok(insn, s->bt, s->btype)) { 14863 gen_exception_insn(s, 0, EXCP_UDEF, syn_btitrap(s->btype)); 14864 return; 14865 } 14866 } else { 14867 /* Not the first insn: btype must be 0. */ 14868 tcg_debug_assert(s->btype == 0); 14869 } 14870 } 14871 14872 s->is_nonstreaming = false; 14873 if (s->sme_trap_nonstreaming) { 14874 disas_sme_fa64(s, insn); 14875 } 14876 14877 switch (extract32(insn, 25, 4)) { 14878 case 0x0: 14879 if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) { 14880 unallocated_encoding(s); 14881 } 14882 break; 14883 case 0x1: case 0x3: /* UNALLOCATED */ 14884 unallocated_encoding(s); 14885 break; 14886 case 0x2: 14887 if (!disas_sve(s, insn)) { 14888 unallocated_encoding(s); 14889 } 14890 break; 14891 case 0x8: case 0x9: /* Data processing - immediate */ 14892 disas_data_proc_imm(s, insn); 14893 break; 14894 case 0xa: case 0xb: /* Branch, exception generation and system insns */ 14895 disas_b_exc_sys(s, insn); 14896 break; 14897 case 0x4: 14898 case 0x6: 14899 case 0xc: 14900 case 0xe: /* Loads and stores */ 14901 disas_ldst(s, insn); 14902 break; 14903 case 0x5: 14904 case 0xd: /* Data processing - register */ 14905 disas_data_proc_reg(s, insn); 14906 break; 14907 case 0x7: 14908 case 0xf: /* Data processing - SIMD and floating point */ 14909 disas_data_proc_simd_fp(s, insn); 14910 break; 14911 default: 14912 assert(FALSE); /* all 15 cases should be handled above */ 14913 break; 14914 } 14915 14916 /* 14917 * After execution of most insns, btype is reset to 0. 14918 * Note that we set btype == -1 when the insn sets btype. 14919 */ 14920 if (s->btype > 0 && s->base.is_jmp != DISAS_NORETURN) { 14921 reset_btype(s); 14922 } 14923 } 14924 14925 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) 14926 { 14927 DisasContext *dc = container_of(dcbase, DisasContext, base); 14928 14929 if (unlikely(dc->ss_active)) { 14930 /* Note that this means single stepping WFI doesn't halt the CPU. 14931 * For conditional branch insns this is harmless unreachable code as 14932 * gen_goto_tb() has already handled emitting the debug exception 14933 * (and thus a tb-jump is not possible when singlestepping). 14934 */ 14935 switch (dc->base.is_jmp) { 14936 default: 14937 gen_a64_update_pc(dc, 4); 14938 /* fall through */ 14939 case DISAS_EXIT: 14940 case DISAS_JUMP: 14941 gen_step_complete_exception(dc); 14942 break; 14943 case DISAS_NORETURN: 14944 break; 14945 } 14946 } else { 14947 switch (dc->base.is_jmp) { 14948 case DISAS_NEXT: 14949 case DISAS_TOO_MANY: 14950 gen_goto_tb(dc, 1, 4); 14951 break; 14952 default: 14953 case DISAS_UPDATE_EXIT: 14954 gen_a64_update_pc(dc, 4); 14955 /* fall through */ 14956 case DISAS_EXIT: 14957 tcg_gen_exit_tb(NULL, 0); 14958 break; 14959 case DISAS_UPDATE_NOCHAIN: 14960 gen_a64_update_pc(dc, 4); 14961 /* fall through */ 14962 case DISAS_JUMP: 14963 tcg_gen_lookup_and_goto_ptr(); 14964 break; 14965 case DISAS_NORETURN: 14966 case DISAS_SWI: 14967 break; 14968 case DISAS_WFE: 14969 gen_a64_update_pc(dc, 4); 14970 gen_helper_wfe(cpu_env); 14971 break; 14972 case DISAS_YIELD: 14973 gen_a64_update_pc(dc, 4); 14974 gen_helper_yield(cpu_env); 14975 break; 14976 case DISAS_WFI: 14977 /* 14978 * This is a special case because we don't want to just halt 14979 * the CPU if trying to debug across a WFI. 14980 */ 14981 gen_a64_update_pc(dc, 4); 14982 gen_helper_wfi(cpu_env, tcg_constant_i32(4)); 14983 /* 14984 * The helper doesn't necessarily throw an exception, but we 14985 * must go back to the main loop to check for interrupts anyway. 14986 */ 14987 tcg_gen_exit_tb(NULL, 0); 14988 break; 14989 } 14990 } 14991 } 14992 14993 static void aarch64_tr_disas_log(const DisasContextBase *dcbase, 14994 CPUState *cpu, FILE *logfile) 14995 { 14996 DisasContext *dc = container_of(dcbase, DisasContext, base); 14997 14998 fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); 14999 target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); 15000 } 15001 15002 const TranslatorOps aarch64_translator_ops = { 15003 .init_disas_context = aarch64_tr_init_disas_context, 15004 .tb_start = aarch64_tr_tb_start, 15005 .insn_start = aarch64_tr_insn_start, 15006 .translate_insn = aarch64_tr_translate_insn, 15007 .tb_stop = aarch64_tr_tb_stop, 15008 .disas_log = aarch64_tr_disas_log, 15009 }; 15010