1 /* 2 * ARM translation: AArch32 VFP instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2019 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "qemu/osdep.h" 24 #include "translate.h" 25 #include "translate-a32.h" 26 27 /* Include the generated VFP decoder */ 28 #include "decode-vfp.c.inc" 29 #include "decode-vfp-uncond.c.inc" 30 31 static inline void vfp_load_reg64(TCGv_i64 var, int reg) 32 { 33 tcg_gen_ld_i64(var, tcg_env, vfp_reg_offset(true, reg)); 34 } 35 36 static inline void vfp_store_reg64(TCGv_i64 var, int reg) 37 { 38 tcg_gen_st_i64(var, tcg_env, vfp_reg_offset(true, reg)); 39 } 40 41 static inline void vfp_load_reg32(TCGv_i32 var, int reg) 42 { 43 tcg_gen_ld_i32(var, tcg_env, vfp_reg_offset(false, reg)); 44 } 45 46 static inline void vfp_store_reg32(TCGv_i32 var, int reg) 47 { 48 tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg)); 49 } 50 51 static inline void vfp_load_reg16(TCGv_i32 var, int reg) 52 { 53 tcg_gen_ld16u_i32(var, tcg_env, 54 vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2); 55 } 56 57 /* 58 * The imm8 encodes the sign bit, enough bits to represent an exponent in 59 * the range 01....1xx to 10....0xx, and the most significant 4 bits of 60 * the mantissa; see VFPExpandImm() in the v8 ARM ARM. 61 */ 62 uint64_t vfp_expand_imm(int size, uint8_t imm8) 63 { 64 uint64_t imm; 65 66 switch (size) { 67 case MO_64: 68 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 69 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) | 70 extract32(imm8, 0, 6); 71 imm <<= 48; 72 break; 73 case MO_32: 74 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 75 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) | 76 (extract32(imm8, 0, 6) << 3); 77 imm <<= 16; 78 break; 79 case MO_16: 80 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 81 (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) | 82 (extract32(imm8, 0, 6) << 6); 83 break; 84 default: 85 g_assert_not_reached(); 86 } 87 return imm; 88 } 89 90 /* 91 * Return the offset of a 16-bit half of the specified VFP single-precision 92 * register. If top is true, returns the top 16 bits; otherwise the bottom 93 * 16 bits. 94 */ 95 static inline long vfp_f16_offset(unsigned reg, bool top) 96 { 97 long offs = vfp_reg_offset(false, reg); 98 #if HOST_BIG_ENDIAN 99 if (!top) { 100 offs += 2; 101 } 102 #else 103 if (top) { 104 offs += 2; 105 } 106 #endif 107 return offs; 108 } 109 110 /* 111 * Generate code for M-profile lazy FP state preservation if needed; 112 * this corresponds to the pseudocode PreserveFPState() function. 113 */ 114 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update) 115 { 116 if (s->v7m_lspact) { 117 /* 118 * Lazy state saving affects external memory and also the NVIC, 119 * so we must mark it as an IO operation for icount (and cause 120 * this to be the last insn in the TB). 121 */ 122 if (translator_io_start(&s->base)) { 123 s->base.is_jmp = DISAS_UPDATE_EXIT; 124 } 125 gen_helper_v7m_preserve_fp_state(tcg_env); 126 /* 127 * If the preserve_fp_state helper doesn't throw an exception 128 * then it will clear LSPACT; we don't need to repeat this for 129 * any further FP insns in this TB. 130 */ 131 s->v7m_lspact = false; 132 /* 133 * The helper might have zeroed VPR, so we do not know the 134 * correct value for the MVE_NO_PRED TB flag any more. 135 * If we're about to create a new fp context then that 136 * will precisely determine the MVE_NO_PRED value (see 137 * gen_update_fp_context()). Otherwise, we must: 138 * - set s->mve_no_pred to false, so this instruction 139 * is generated to use helper functions 140 * - end the TB now, without chaining to the next TB 141 */ 142 if (skip_context_update || !s->v7m_new_fp_ctxt_needed) { 143 s->mve_no_pred = false; 144 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 145 } 146 } 147 } 148 149 /* 150 * Generate code for M-profile FP context handling: update the 151 * ownership of the FP context, and create a new context if 152 * necessary. This corresponds to the parts of the pseudocode 153 * ExecuteFPCheck() after the initial PreserveFPState() call. 154 */ 155 static void gen_update_fp_context(DisasContext *s) 156 { 157 /* Update ownership of FP context: set FPCCR.S to match current state */ 158 if (s->v8m_fpccr_s_wrong) { 159 TCGv_i32 tmp; 160 161 tmp = load_cpu_field(v7m.fpccr[M_REG_S]); 162 if (s->v8m_secure) { 163 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK); 164 } else { 165 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK); 166 } 167 store_cpu_field(tmp, v7m.fpccr[M_REG_S]); 168 /* Don't need to do this for any further FP insns in this TB */ 169 s->v8m_fpccr_s_wrong = false; 170 } 171 172 if (s->v7m_new_fp_ctxt_needed) { 173 /* 174 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA, 175 * the FPSCR, and VPR. 176 */ 177 TCGv_i32 control, fpscr; 178 uint32_t bits = R_V7M_CONTROL_FPCA_MASK; 179 180 fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]); 181 gen_helper_vfp_set_fpscr(tcg_env, fpscr); 182 if (dc_isar_feature(aa32_mve, s)) { 183 store_cpu_field(tcg_constant_i32(0), v7m.vpr); 184 } 185 /* 186 * We just updated the FPSCR and VPR. Some of this state is cached 187 * in the MVE_NO_PRED TB flag. We want to avoid having to end the 188 * TB here, which means we need the new value of the MVE_NO_PRED 189 * flag to be exactly known here and the same for all executions. 190 * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is 191 * always set to 0, so the new MVE_NO_PRED flag is always 1 192 * if and only if we have MVE. 193 * 194 * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE, 195 * but those do not exist for M-profile, so are not relevant here.) 196 */ 197 s->mve_no_pred = dc_isar_feature(aa32_mve, s); 198 199 if (s->v8m_secure) { 200 bits |= R_V7M_CONTROL_SFPA_MASK; 201 } 202 control = load_cpu_field(v7m.control[M_REG_S]); 203 tcg_gen_ori_i32(control, control, bits); 204 store_cpu_field(control, v7m.control[M_REG_S]); 205 /* Don't need to do this for any further FP insns in this TB */ 206 s->v7m_new_fp_ctxt_needed = false; 207 } 208 } 209 210 /* 211 * Check that VFP access is enabled, A-profile specific version. 212 * 213 * If VFP is enabled, return true. If not, emit code to generate an 214 * appropriate exception and return false. 215 * The ignore_vfp_enabled argument specifies that we should ignore 216 * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX 217 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns. 218 */ 219 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled) 220 { 221 if (s->fp_excp_el) { 222 /* 223 * The full syndrome is only used for HSR when HCPTR traps: 224 * For v8, when TA==0, coproc is RES0. 225 * For v7, any use of a Floating-point instruction or access 226 * to a Floating-point Extension register that is trapped to 227 * Hyp mode because of a trap configured in the HCPTR sets 228 * this field to 0xA. 229 */ 230 int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa; 231 uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc); 232 233 gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el); 234 return false; 235 } 236 237 /* 238 * Note that rebuild_hflags_a32 has already accounted for being in EL0 239 * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not 240 * appear to be any insns which touch VFP which are allowed. 241 */ 242 if (s->sme_trap_nonstreaming) { 243 gen_exception_insn(s, 0, EXCP_UDEF, 244 syn_smetrap(SME_ET_Streaming, 245 curr_insn_len(s) == 2)); 246 return false; 247 } 248 249 if (!s->vfp_enabled && !ignore_vfp_enabled) { 250 assert(!arm_dc_feature(s, ARM_FEATURE_M)); 251 unallocated_encoding(s); 252 return false; 253 } 254 return true; 255 } 256 257 /* 258 * Check that VFP access is enabled, M-profile specific version. 259 * 260 * If VFP is enabled, do the necessary M-profile lazy-FP handling and then 261 * return true. If not, emit code to generate an appropriate exception and 262 * return false. 263 * skip_context_update is true to skip the "update FP context" part of this. 264 */ 265 bool vfp_access_check_m(DisasContext *s, bool skip_context_update) 266 { 267 if (s->fp_excp_el) { 268 /* 269 * M-profile mostly catches the "FPU disabled" case early, in 270 * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP) 271 * which do coprocessor-checks are outside the large ranges of 272 * the encoding space handled by the patterns in m-nocp.decode, 273 * and for them we may need to raise NOCP here. 274 */ 275 gen_exception_insn_el(s, 0, EXCP_NOCP, 276 syn_uncategorized(), s->fp_excp_el); 277 return false; 278 } 279 280 /* Handle M-profile lazy FP state mechanics */ 281 282 /* Trigger lazy-state preservation if necessary */ 283 gen_preserve_fp_state(s, skip_context_update); 284 285 if (!skip_context_update) { 286 /* Update ownership of FP context and create new FP context if needed */ 287 gen_update_fp_context(s); 288 } 289 290 return true; 291 } 292 293 /* 294 * The most usual kind of VFP access check, for everything except 295 * FMXR/FMRX to the always-available special registers. 296 */ 297 bool vfp_access_check(DisasContext *s) 298 { 299 if (arm_dc_feature(s, ARM_FEATURE_M)) { 300 return vfp_access_check_m(s, false); 301 } else { 302 return vfp_access_check_a(s, false); 303 } 304 } 305 306 static bool trans_VSEL(DisasContext *s, arg_VSEL *a) 307 { 308 uint32_t rd, rn, rm; 309 int sz = a->sz; 310 311 if (!dc_isar_feature(aa32_vsel, s)) { 312 return false; 313 } 314 315 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 316 return false; 317 } 318 319 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 320 return false; 321 } 322 323 /* UNDEF accesses to D16-D31 if they don't exist */ 324 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && 325 ((a->vm | a->vn | a->vd) & 0x10)) { 326 return false; 327 } 328 329 rd = a->vd; 330 rn = a->vn; 331 rm = a->vm; 332 333 if (!vfp_access_check(s)) { 334 return true; 335 } 336 337 if (sz == 3) { 338 TCGv_i64 frn, frm, dest; 339 TCGv_i64 tmp, zero, zf, nf, vf; 340 341 zero = tcg_constant_i64(0); 342 343 frn = tcg_temp_new_i64(); 344 frm = tcg_temp_new_i64(); 345 dest = tcg_temp_new_i64(); 346 347 zf = tcg_temp_new_i64(); 348 nf = tcg_temp_new_i64(); 349 vf = tcg_temp_new_i64(); 350 351 tcg_gen_extu_i32_i64(zf, cpu_ZF); 352 tcg_gen_ext_i32_i64(nf, cpu_NF); 353 tcg_gen_ext_i32_i64(vf, cpu_VF); 354 355 vfp_load_reg64(frn, rn); 356 vfp_load_reg64(frm, rm); 357 switch (a->cc) { 358 case 0: /* eq: Z */ 359 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm); 360 break; 361 case 1: /* vs: V */ 362 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm); 363 break; 364 case 2: /* ge: N == V -> N ^ V == 0 */ 365 tmp = tcg_temp_new_i64(); 366 tcg_gen_xor_i64(tmp, vf, nf); 367 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm); 368 break; 369 case 3: /* gt: !Z && N == V */ 370 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm); 371 tmp = tcg_temp_new_i64(); 372 tcg_gen_xor_i64(tmp, vf, nf); 373 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm); 374 break; 375 } 376 vfp_store_reg64(dest, rd); 377 } else { 378 TCGv_i32 frn, frm, dest; 379 TCGv_i32 tmp, zero; 380 381 zero = tcg_constant_i32(0); 382 383 frn = tcg_temp_new_i32(); 384 frm = tcg_temp_new_i32(); 385 dest = tcg_temp_new_i32(); 386 vfp_load_reg32(frn, rn); 387 vfp_load_reg32(frm, rm); 388 switch (a->cc) { 389 case 0: /* eq: Z */ 390 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm); 391 break; 392 case 1: /* vs: V */ 393 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm); 394 break; 395 case 2: /* ge: N == V -> N ^ V == 0 */ 396 tmp = tcg_temp_new_i32(); 397 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); 398 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm); 399 break; 400 case 3: /* gt: !Z && N == V */ 401 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm); 402 tmp = tcg_temp_new_i32(); 403 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); 404 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm); 405 break; 406 } 407 /* For fp16 the top half is always zeroes */ 408 if (sz == 1) { 409 tcg_gen_andi_i32(dest, dest, 0xffff); 410 } 411 vfp_store_reg32(dest, rd); 412 } 413 414 return true; 415 } 416 417 /* 418 * Table for converting the most common AArch32 encoding of 419 * rounding mode to arm_fprounding order (which matches the 420 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM(). 421 */ 422 static const uint8_t fp_decode_rm[] = { 423 FPROUNDING_TIEAWAY, 424 FPROUNDING_TIEEVEN, 425 FPROUNDING_POSINF, 426 FPROUNDING_NEGINF, 427 }; 428 429 static bool trans_VRINT(DisasContext *s, arg_VRINT *a) 430 { 431 uint32_t rd, rm; 432 int sz = a->sz; 433 TCGv_ptr fpst; 434 TCGv_i32 tcg_rmode; 435 int rounding = fp_decode_rm[a->rm]; 436 437 if (!dc_isar_feature(aa32_vrint, s)) { 438 return false; 439 } 440 441 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 442 return false; 443 } 444 445 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 446 return false; 447 } 448 449 /* UNDEF accesses to D16-D31 if they don't exist */ 450 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && 451 ((a->vm | a->vd) & 0x10)) { 452 return false; 453 } 454 455 rd = a->vd; 456 rm = a->vm; 457 458 if (!vfp_access_check(s)) { 459 return true; 460 } 461 462 if (sz == 1) { 463 fpst = fpstatus_ptr(FPST_FPCR_F16); 464 } else { 465 fpst = fpstatus_ptr(FPST_FPCR); 466 } 467 468 tcg_rmode = gen_set_rmode(rounding, fpst); 469 470 if (sz == 3) { 471 TCGv_i64 tcg_op; 472 TCGv_i64 tcg_res; 473 tcg_op = tcg_temp_new_i64(); 474 tcg_res = tcg_temp_new_i64(); 475 vfp_load_reg64(tcg_op, rm); 476 gen_helper_rintd(tcg_res, tcg_op, fpst); 477 vfp_store_reg64(tcg_res, rd); 478 } else { 479 TCGv_i32 tcg_op; 480 TCGv_i32 tcg_res; 481 tcg_op = tcg_temp_new_i32(); 482 tcg_res = tcg_temp_new_i32(); 483 vfp_load_reg32(tcg_op, rm); 484 if (sz == 1) { 485 gen_helper_rinth(tcg_res, tcg_op, fpst); 486 } else { 487 gen_helper_rints(tcg_res, tcg_op, fpst); 488 } 489 vfp_store_reg32(tcg_res, rd); 490 } 491 492 gen_restore_rmode(tcg_rmode, fpst); 493 return true; 494 } 495 496 static bool trans_VCVT(DisasContext *s, arg_VCVT *a) 497 { 498 uint32_t rd, rm; 499 int sz = a->sz; 500 TCGv_ptr fpst; 501 TCGv_i32 tcg_rmode, tcg_shift; 502 int rounding = fp_decode_rm[a->rm]; 503 bool is_signed = a->op; 504 505 if (!dc_isar_feature(aa32_vcvt_dr, s)) { 506 return false; 507 } 508 509 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 510 return false; 511 } 512 513 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 514 return false; 515 } 516 517 /* UNDEF accesses to D16-D31 if they don't exist */ 518 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 519 return false; 520 } 521 522 rd = a->vd; 523 rm = a->vm; 524 525 if (!vfp_access_check(s)) { 526 return true; 527 } 528 529 if (sz == 1) { 530 fpst = fpstatus_ptr(FPST_FPCR_F16); 531 } else { 532 fpst = fpstatus_ptr(FPST_FPCR); 533 } 534 535 tcg_shift = tcg_constant_i32(0); 536 tcg_rmode = gen_set_rmode(rounding, fpst); 537 538 if (sz == 3) { 539 TCGv_i64 tcg_double, tcg_res; 540 TCGv_i32 tcg_tmp; 541 tcg_double = tcg_temp_new_i64(); 542 tcg_res = tcg_temp_new_i64(); 543 tcg_tmp = tcg_temp_new_i32(); 544 vfp_load_reg64(tcg_double, rm); 545 if (is_signed) { 546 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst); 547 } else { 548 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); 549 } 550 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res); 551 vfp_store_reg32(tcg_tmp, rd); 552 } else { 553 TCGv_i32 tcg_single, tcg_res; 554 tcg_single = tcg_temp_new_i32(); 555 tcg_res = tcg_temp_new_i32(); 556 vfp_load_reg32(tcg_single, rm); 557 if (sz == 1) { 558 if (is_signed) { 559 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst); 560 } else { 561 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst); 562 } 563 } else { 564 if (is_signed) { 565 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); 566 } else { 567 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); 568 } 569 } 570 vfp_store_reg32(tcg_res, rd); 571 } 572 573 gen_restore_rmode(tcg_rmode, fpst); 574 return true; 575 } 576 577 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size) 578 { 579 /* 580 * In a CPU with MVE, the VMOV (vector lane to general-purpose register) 581 * and VMOV (general-purpose register to vector lane) insns are not 582 * predicated, but they are subject to beatwise execution if they are 583 * not in an IT block. 584 * 585 * Since our implementation always executes all 4 beats in one tick, 586 * this means only that if PSR.ECI says we should not be executing 587 * the beat corresponding to the lane of the vector register being 588 * accessed then we should skip performing the move, and that we need 589 * to do the usual check for bad ECI state and advance of ECI state. 590 * 591 * Note that if PSR.ECI is non-zero then we cannot be in an IT block. 592 * 593 * Return true if this VMOV scalar <-> gpreg should be skipped because 594 * the MVE PSR.ECI state says we skip the beat where the store happens. 595 */ 596 597 /* Calculate the byte offset into Qn which we're going to access */ 598 int ofs = (index << size) + ((vn & 1) * 8); 599 600 if (!dc_isar_feature(aa32_mve, s)) { 601 return false; 602 } 603 604 switch (s->eci) { 605 case ECI_NONE: 606 return false; 607 case ECI_A0: 608 return ofs < 4; 609 case ECI_A0A1: 610 return ofs < 8; 611 case ECI_A0A1A2: 612 case ECI_A0A1A2B0: 613 return ofs < 12; 614 default: 615 g_assert_not_reached(); 616 } 617 } 618 619 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) 620 { 621 /* VMOV scalar to general purpose register */ 622 TCGv_i32 tmp; 623 624 /* 625 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has 626 * all sizes, whether the CPU has fp or not. 627 */ 628 if (!dc_isar_feature(aa32_mve, s)) { 629 if (a->size == MO_32 630 ? !dc_isar_feature(aa32_fpsp_v2, s) 631 : !arm_dc_feature(s, ARM_FEATURE_NEON)) { 632 return false; 633 } 634 } 635 636 /* UNDEF accesses to D16-D31 if they don't exist */ 637 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 638 return false; 639 } 640 641 if (dc_isar_feature(aa32_mve, s)) { 642 if (!mve_eci_check(s)) { 643 return true; 644 } 645 } 646 647 if (!vfp_access_check(s)) { 648 return true; 649 } 650 651 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { 652 tmp = tcg_temp_new_i32(); 653 read_neon_element32(tmp, a->vn, a->index, 654 a->size | (a->u ? 0 : MO_SIGN)); 655 store_reg(s, a->rt, tmp); 656 } 657 658 if (dc_isar_feature(aa32_mve, s)) { 659 mve_update_and_store_eci(s); 660 } 661 return true; 662 } 663 664 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) 665 { 666 /* VMOV general purpose register to scalar */ 667 TCGv_i32 tmp; 668 669 /* 670 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has 671 * all sizes, whether the CPU has fp or not. 672 */ 673 if (!dc_isar_feature(aa32_mve, s)) { 674 if (a->size == MO_32 675 ? !dc_isar_feature(aa32_fpsp_v2, s) 676 : !arm_dc_feature(s, ARM_FEATURE_NEON)) { 677 return false; 678 } 679 } 680 681 /* UNDEF accesses to D16-D31 if they don't exist */ 682 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 683 return false; 684 } 685 686 if (dc_isar_feature(aa32_mve, s)) { 687 if (!mve_eci_check(s)) { 688 return true; 689 } 690 } 691 692 if (!vfp_access_check(s)) { 693 return true; 694 } 695 696 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { 697 tmp = load_reg(s, a->rt); 698 write_neon_element32(tmp, a->vn, a->index, a->size); 699 } 700 701 if (dc_isar_feature(aa32_mve, s)) { 702 mve_update_and_store_eci(s); 703 } 704 return true; 705 } 706 707 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 708 { 709 /* VDUP (general purpose register) */ 710 TCGv_i32 tmp; 711 int size, vec_size; 712 713 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 714 return false; 715 } 716 717 /* UNDEF accesses to D16-D31 if they don't exist */ 718 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 719 return false; 720 } 721 722 if (a->b && a->e) { 723 return false; 724 } 725 726 if (a->q && (a->vn & 1)) { 727 return false; 728 } 729 730 vec_size = a->q ? 16 : 8; 731 if (a->b) { 732 size = 0; 733 } else if (a->e) { 734 size = 1; 735 } else { 736 size = 2; 737 } 738 739 if (!vfp_access_check(s)) { 740 return true; 741 } 742 743 tmp = load_reg(s, a->rt); 744 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn), 745 vec_size, vec_size, tmp); 746 return true; 747 } 748 749 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) 750 { 751 TCGv_i32 tmp; 752 bool ignore_vfp_enabled = false; 753 754 if (arm_dc_feature(s, ARM_FEATURE_M)) { 755 /* M profile version was already handled in m-nocp.decode */ 756 return false; 757 } 758 759 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 760 return false; 761 } 762 763 switch (a->reg) { 764 case ARM_VFP_FPSID: 765 /* 766 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts 767 * all ID registers to privileged access only. 768 */ 769 if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) { 770 return false; 771 } 772 ignore_vfp_enabled = true; 773 break; 774 case ARM_VFP_MVFR0: 775 case ARM_VFP_MVFR1: 776 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) { 777 return false; 778 } 779 ignore_vfp_enabled = true; 780 break; 781 case ARM_VFP_MVFR2: 782 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) { 783 return false; 784 } 785 ignore_vfp_enabled = true; 786 break; 787 case ARM_VFP_FPSCR: 788 break; 789 case ARM_VFP_FPEXC: 790 if (IS_USER(s)) { 791 return false; 792 } 793 ignore_vfp_enabled = true; 794 break; 795 case ARM_VFP_FPINST: 796 case ARM_VFP_FPINST2: 797 /* Not present in VFPv3 */ 798 if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) { 799 return false; 800 } 801 break; 802 default: 803 return false; 804 } 805 806 /* 807 * Call vfp_access_check_a() directly, because we need to tell 808 * it to ignore FPEXC.EN for some register accesses. 809 */ 810 if (!vfp_access_check_a(s, ignore_vfp_enabled)) { 811 return true; 812 } 813 814 if (a->l) { 815 /* VMRS, move VFP special register to gp register */ 816 switch (a->reg) { 817 case ARM_VFP_MVFR0: 818 case ARM_VFP_MVFR1: 819 case ARM_VFP_MVFR2: 820 case ARM_VFP_FPSID: 821 if (s->current_el == 1) { 822 gen_set_condexec(s); 823 gen_update_pc(s, 0); 824 gen_helper_check_hcr_el2_trap(tcg_env, 825 tcg_constant_i32(a->rt), 826 tcg_constant_i32(a->reg)); 827 } 828 /* fall through */ 829 case ARM_VFP_FPEXC: 830 case ARM_VFP_FPINST: 831 case ARM_VFP_FPINST2: 832 tmp = load_cpu_field(vfp.xregs[a->reg]); 833 break; 834 case ARM_VFP_FPSCR: 835 if (a->rt == 15) { 836 tmp = load_cpu_field_low32(vfp.fpsr); 837 tcg_gen_andi_i32(tmp, tmp, FPSR_NZCV_MASK); 838 } else { 839 tmp = tcg_temp_new_i32(); 840 gen_helper_vfp_get_fpscr(tmp, tcg_env); 841 } 842 break; 843 default: 844 g_assert_not_reached(); 845 } 846 847 if (a->rt == 15) { 848 /* Set the 4 flag bits in the CPSR. */ 849 gen_set_nzcv(tmp); 850 } else { 851 store_reg(s, a->rt, tmp); 852 } 853 } else { 854 /* VMSR, move gp register to VFP special register */ 855 switch (a->reg) { 856 case ARM_VFP_FPSID: 857 case ARM_VFP_MVFR0: 858 case ARM_VFP_MVFR1: 859 case ARM_VFP_MVFR2: 860 /* Writes are ignored. */ 861 break; 862 case ARM_VFP_FPSCR: 863 tmp = load_reg(s, a->rt); 864 gen_helper_vfp_set_fpscr(tcg_env, tmp); 865 gen_lookup_tb(s); 866 break; 867 case ARM_VFP_FPEXC: 868 /* 869 * TODO: VFP subarchitecture support. 870 * For now, keep the EN bit only 871 */ 872 tmp = load_reg(s, a->rt); 873 tcg_gen_andi_i32(tmp, tmp, 1 << 30); 874 store_cpu_field(tmp, vfp.xregs[a->reg]); 875 gen_lookup_tb(s); 876 break; 877 case ARM_VFP_FPINST: 878 case ARM_VFP_FPINST2: 879 tmp = load_reg(s, a->rt); 880 store_cpu_field(tmp, vfp.xregs[a->reg]); 881 break; 882 default: 883 g_assert_not_reached(); 884 } 885 } 886 887 return true; 888 } 889 890 891 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) 892 { 893 TCGv_i32 tmp; 894 895 if (!dc_isar_feature(aa32_fp16_arith, s)) { 896 return false; 897 } 898 899 if (a->rt == 15) { 900 /* UNPREDICTABLE; we choose to UNDEF */ 901 return false; 902 } 903 904 if (!vfp_access_check(s)) { 905 return true; 906 } 907 908 if (a->l) { 909 /* VFP to general purpose register */ 910 tmp = tcg_temp_new_i32(); 911 vfp_load_reg16(tmp, a->vn); 912 store_reg(s, a->rt, tmp); 913 } else { 914 /* general purpose register to VFP */ 915 tmp = load_reg(s, a->rt); 916 tcg_gen_andi_i32(tmp, tmp, 0xffff); 917 vfp_store_reg32(tmp, a->vn); 918 } 919 920 return true; 921 } 922 923 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) 924 { 925 TCGv_i32 tmp; 926 927 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 928 return false; 929 } 930 931 if (!vfp_access_check(s)) { 932 return true; 933 } 934 935 if (a->l) { 936 /* VFP to general purpose register */ 937 tmp = tcg_temp_new_i32(); 938 vfp_load_reg32(tmp, a->vn); 939 if (a->rt == 15) { 940 /* Set the 4 flag bits in the CPSR. */ 941 gen_set_nzcv(tmp); 942 } else { 943 store_reg(s, a->rt, tmp); 944 } 945 } else { 946 /* general purpose register to VFP */ 947 tmp = load_reg(s, a->rt); 948 vfp_store_reg32(tmp, a->vn); 949 } 950 951 return true; 952 } 953 954 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) 955 { 956 TCGv_i32 tmp; 957 958 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 959 return false; 960 } 961 962 /* 963 * VMOV between two general-purpose registers and two single precision 964 * floating point registers 965 */ 966 if (!vfp_access_check(s)) { 967 return true; 968 } 969 970 if (a->op) { 971 /* fpreg to gpreg */ 972 tmp = tcg_temp_new_i32(); 973 vfp_load_reg32(tmp, a->vm); 974 store_reg(s, a->rt, tmp); 975 tmp = tcg_temp_new_i32(); 976 vfp_load_reg32(tmp, a->vm + 1); 977 store_reg(s, a->rt2, tmp); 978 } else { 979 /* gpreg to fpreg */ 980 tmp = load_reg(s, a->rt); 981 vfp_store_reg32(tmp, a->vm); 982 tmp = load_reg(s, a->rt2); 983 vfp_store_reg32(tmp, a->vm + 1); 984 } 985 986 return true; 987 } 988 989 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) 990 { 991 TCGv_i32 tmp; 992 993 /* 994 * VMOV between two general-purpose registers and one double precision 995 * floating point register. Note that this does not require support 996 * for double precision arithmetic. 997 */ 998 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 999 return false; 1000 } 1001 1002 /* UNDEF accesses to D16-D31 if they don't exist */ 1003 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 1004 return false; 1005 } 1006 1007 if (!vfp_access_check(s)) { 1008 return true; 1009 } 1010 1011 if (a->op) { 1012 /* fpreg to gpreg */ 1013 tmp = tcg_temp_new_i32(); 1014 vfp_load_reg32(tmp, a->vm * 2); 1015 store_reg(s, a->rt, tmp); 1016 tmp = tcg_temp_new_i32(); 1017 vfp_load_reg32(tmp, a->vm * 2 + 1); 1018 store_reg(s, a->rt2, tmp); 1019 } else { 1020 /* gpreg to fpreg */ 1021 tmp = load_reg(s, a->rt); 1022 vfp_store_reg32(tmp, a->vm * 2); 1023 tmp = load_reg(s, a->rt2); 1024 vfp_store_reg32(tmp, a->vm * 2 + 1); 1025 } 1026 1027 return true; 1028 } 1029 1030 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) 1031 { 1032 uint32_t offset; 1033 TCGv_i32 addr, tmp; 1034 1035 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1036 return false; 1037 } 1038 1039 if (!vfp_access_check(s)) { 1040 return true; 1041 } 1042 1043 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */ 1044 offset = a->imm << 1; 1045 if (!a->u) { 1046 offset = -offset; 1047 } 1048 1049 /* For thumb, use of PC is UNPREDICTABLE. */ 1050 addr = add_reg_for_lit(s, a->rn, offset); 1051 tmp = tcg_temp_new_i32(); 1052 if (a->l) { 1053 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); 1054 vfp_store_reg32(tmp, a->vd); 1055 } else { 1056 vfp_load_reg32(tmp, a->vd); 1057 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); 1058 } 1059 return true; 1060 } 1061 1062 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) 1063 { 1064 uint32_t offset; 1065 TCGv_i32 addr, tmp; 1066 1067 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1068 return false; 1069 } 1070 1071 if (!vfp_access_check(s)) { 1072 return true; 1073 } 1074 1075 offset = a->imm << 2; 1076 if (!a->u) { 1077 offset = -offset; 1078 } 1079 1080 /* For thumb, use of PC is UNPREDICTABLE. */ 1081 addr = add_reg_for_lit(s, a->rn, offset); 1082 tmp = tcg_temp_new_i32(); 1083 if (a->l) { 1084 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1085 vfp_store_reg32(tmp, a->vd); 1086 } else { 1087 vfp_load_reg32(tmp, a->vd); 1088 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1089 } 1090 return true; 1091 } 1092 1093 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) 1094 { 1095 uint32_t offset; 1096 TCGv_i32 addr; 1097 TCGv_i64 tmp; 1098 1099 /* Note that this does not require support for double arithmetic. */ 1100 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1101 return false; 1102 } 1103 1104 /* UNDEF accesses to D16-D31 if they don't exist */ 1105 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1106 return false; 1107 } 1108 1109 if (!vfp_access_check(s)) { 1110 return true; 1111 } 1112 1113 offset = a->imm << 2; 1114 if (!a->u) { 1115 offset = -offset; 1116 } 1117 1118 /* For thumb, use of PC is UNPREDICTABLE. */ 1119 addr = add_reg_for_lit(s, a->rn, offset); 1120 tmp = tcg_temp_new_i64(); 1121 if (a->l) { 1122 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1123 vfp_store_reg64(tmp, a->vd); 1124 } else { 1125 vfp_load_reg64(tmp, a->vd); 1126 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1127 } 1128 return true; 1129 } 1130 1131 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) 1132 { 1133 uint32_t offset; 1134 TCGv_i32 addr, tmp; 1135 int i, n; 1136 1137 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1138 return false; 1139 } 1140 1141 n = a->imm; 1142 1143 if (n == 0 || (a->vd + n) > 32) { 1144 /* 1145 * UNPREDICTABLE cases for bad immediates: we choose to 1146 * UNDEF to avoid generating huge numbers of TCG ops 1147 */ 1148 return false; 1149 } 1150 if (a->rn == 15 && a->w) { 1151 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ 1152 return false; 1153 } 1154 1155 s->eci_handled = true; 1156 1157 if (!vfp_access_check(s)) { 1158 return true; 1159 } 1160 1161 /* For thumb, use of PC is UNPREDICTABLE. */ 1162 addr = add_reg_for_lit(s, a->rn, 0); 1163 if (a->p) { 1164 /* pre-decrement */ 1165 tcg_gen_addi_i32(addr, addr, -(a->imm << 2)); 1166 } 1167 1168 if (s->v8m_stackcheck && a->rn == 13 && a->w) { 1169 /* 1170 * Here 'addr' is the lowest address we will store to, 1171 * and is either the old SP (if post-increment) or 1172 * the new SP (if pre-decrement). For post-increment 1173 * where the old value is below the limit and the new 1174 * value is above, it is UNKNOWN whether the limit check 1175 * triggers; we choose to trigger. 1176 */ 1177 gen_helper_v8m_stackcheck(tcg_env, addr); 1178 } 1179 1180 offset = 4; 1181 tmp = tcg_temp_new_i32(); 1182 for (i = 0; i < n; i++) { 1183 if (a->l) { 1184 /* load */ 1185 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1186 vfp_store_reg32(tmp, a->vd + i); 1187 } else { 1188 /* store */ 1189 vfp_load_reg32(tmp, a->vd + i); 1190 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1191 } 1192 tcg_gen_addi_i32(addr, addr, offset); 1193 } 1194 if (a->w) { 1195 /* writeback */ 1196 if (a->p) { 1197 offset = -offset * n; 1198 tcg_gen_addi_i32(addr, addr, offset); 1199 } 1200 store_reg(s, a->rn, addr); 1201 } 1202 1203 clear_eci_state(s); 1204 return true; 1205 } 1206 1207 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) 1208 { 1209 uint32_t offset; 1210 TCGv_i32 addr; 1211 TCGv_i64 tmp; 1212 int i, n; 1213 1214 /* Note that this does not require support for double arithmetic. */ 1215 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1216 return false; 1217 } 1218 1219 n = a->imm >> 1; 1220 1221 if (n == 0 || (a->vd + n) > 32 || n > 16) { 1222 /* 1223 * UNPREDICTABLE cases for bad immediates: we choose to 1224 * UNDEF to avoid generating huge numbers of TCG ops 1225 */ 1226 return false; 1227 } 1228 if (a->rn == 15 && a->w) { 1229 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ 1230 return false; 1231 } 1232 1233 /* UNDEF accesses to D16-D31 if they don't exist */ 1234 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) { 1235 return false; 1236 } 1237 1238 s->eci_handled = true; 1239 1240 if (!vfp_access_check(s)) { 1241 return true; 1242 } 1243 1244 /* For thumb, use of PC is UNPREDICTABLE. */ 1245 addr = add_reg_for_lit(s, a->rn, 0); 1246 if (a->p) { 1247 /* pre-decrement */ 1248 tcg_gen_addi_i32(addr, addr, -(a->imm << 2)); 1249 } 1250 1251 if (s->v8m_stackcheck && a->rn == 13 && a->w) { 1252 /* 1253 * Here 'addr' is the lowest address we will store to, 1254 * and is either the old SP (if post-increment) or 1255 * the new SP (if pre-decrement). For post-increment 1256 * where the old value is below the limit and the new 1257 * value is above, it is UNKNOWN whether the limit check 1258 * triggers; we choose to trigger. 1259 */ 1260 gen_helper_v8m_stackcheck(tcg_env, addr); 1261 } 1262 1263 offset = 8; 1264 tmp = tcg_temp_new_i64(); 1265 for (i = 0; i < n; i++) { 1266 if (a->l) { 1267 /* load */ 1268 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1269 vfp_store_reg64(tmp, a->vd + i); 1270 } else { 1271 /* store */ 1272 vfp_load_reg64(tmp, a->vd + i); 1273 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1274 } 1275 tcg_gen_addi_i32(addr, addr, offset); 1276 } 1277 if (a->w) { 1278 /* writeback */ 1279 if (a->p) { 1280 offset = -offset * n; 1281 } else if (a->imm & 1) { 1282 offset = 4; 1283 } else { 1284 offset = 0; 1285 } 1286 1287 if (offset != 0) { 1288 tcg_gen_addi_i32(addr, addr, offset); 1289 } 1290 store_reg(s, a->rn, addr); 1291 } 1292 1293 clear_eci_state(s); 1294 return true; 1295 } 1296 1297 /* 1298 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp(). 1299 * The callback should emit code to write a value to vd. If 1300 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd 1301 * will contain the old value of the relevant VFP register; 1302 * otherwise it must be written to only. 1303 */ 1304 typedef void VFPGen3OpSPFn(TCGv_i32 vd, 1305 TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst); 1306 typedef void VFPGen3OpDPFn(TCGv_i64 vd, 1307 TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst); 1308 1309 /* 1310 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp(). 1311 * The callback should emit code to write a value to vd (which 1312 * should be written to only). 1313 */ 1314 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm); 1315 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm); 1316 1317 /* 1318 * Return true if the specified S reg is in a scalar bank 1319 * (ie if it is s0..s7) 1320 */ 1321 static inline bool vfp_sreg_is_scalar(int reg) 1322 { 1323 return (reg & 0x18) == 0; 1324 } 1325 1326 /* 1327 * Return true if the specified D reg is in a scalar bank 1328 * (ie if it is d0..d3 or d16..d19) 1329 */ 1330 static inline bool vfp_dreg_is_scalar(int reg) 1331 { 1332 return (reg & 0xc) == 0; 1333 } 1334 1335 /* 1336 * Advance the S reg number forwards by delta within its bank 1337 * (ie increment the low 3 bits but leave the rest the same) 1338 */ 1339 static inline int vfp_advance_sreg(int reg, int delta) 1340 { 1341 return ((reg + delta) & 0x7) | (reg & ~0x7); 1342 } 1343 1344 /* 1345 * Advance the D reg number forwards by delta within its bank 1346 * (ie increment the low 2 bits but leave the rest the same) 1347 */ 1348 static inline int vfp_advance_dreg(int reg, int delta) 1349 { 1350 return ((reg + delta) & 0x3) | (reg & ~0x3); 1351 } 1352 1353 /* 1354 * Perform a 3-operand VFP data processing instruction. fn is the 1355 * callback to do the actual operation; this function deals with the 1356 * code to handle looping around for VFP vector processing. 1357 */ 1358 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, 1359 int vd, int vn, int vm, bool reads_vd) 1360 { 1361 uint32_t delta_m = 0; 1362 uint32_t delta_d = 0; 1363 int veclen = s->vec_len; 1364 TCGv_i32 f0, f1, fd; 1365 TCGv_ptr fpst; 1366 1367 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 1368 return false; 1369 } 1370 1371 if (!dc_isar_feature(aa32_fpshvec, s) && 1372 (veclen != 0 || s->vec_stride != 0)) { 1373 return false; 1374 } 1375 1376 if (!vfp_access_check(s)) { 1377 return true; 1378 } 1379 1380 if (veclen > 0) { 1381 /* Figure out what type of vector operation this is. */ 1382 if (vfp_sreg_is_scalar(vd)) { 1383 /* scalar */ 1384 veclen = 0; 1385 } else { 1386 delta_d = s->vec_stride + 1; 1387 1388 if (vfp_sreg_is_scalar(vm)) { 1389 /* mixed scalar/vector */ 1390 delta_m = 0; 1391 } else { 1392 /* vector */ 1393 delta_m = delta_d; 1394 } 1395 } 1396 } 1397 1398 f0 = tcg_temp_new_i32(); 1399 f1 = tcg_temp_new_i32(); 1400 fd = tcg_temp_new_i32(); 1401 fpst = fpstatus_ptr(FPST_FPCR); 1402 1403 vfp_load_reg32(f0, vn); 1404 vfp_load_reg32(f1, vm); 1405 1406 for (;;) { 1407 if (reads_vd) { 1408 vfp_load_reg32(fd, vd); 1409 } 1410 fn(fd, f0, f1, fpst); 1411 vfp_store_reg32(fd, vd); 1412 1413 if (veclen == 0) { 1414 break; 1415 } 1416 1417 /* Set up the operands for the next iteration */ 1418 veclen--; 1419 vd = vfp_advance_sreg(vd, delta_d); 1420 vn = vfp_advance_sreg(vn, delta_d); 1421 vfp_load_reg32(f0, vn); 1422 if (delta_m) { 1423 vm = vfp_advance_sreg(vm, delta_m); 1424 vfp_load_reg32(f1, vm); 1425 } 1426 } 1427 return true; 1428 } 1429 1430 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, 1431 int vd, int vn, int vm, bool reads_vd) 1432 { 1433 /* 1434 * Do a half-precision operation. Functionally this is 1435 * the same as do_vfp_3op_sp(), except: 1436 * - it uses the FPST_FPCR_F16 1437 * - it doesn't need the VFP vector handling (fp16 is a 1438 * v8 feature, and in v8 VFP vectors don't exist) 1439 * - it does the aa32_fp16_arith feature test 1440 */ 1441 TCGv_i32 f0, f1, fd; 1442 TCGv_ptr fpst; 1443 1444 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1445 return false; 1446 } 1447 1448 if (s->vec_len != 0 || s->vec_stride != 0) { 1449 return false; 1450 } 1451 1452 if (!vfp_access_check(s)) { 1453 return true; 1454 } 1455 1456 f0 = tcg_temp_new_i32(); 1457 f1 = tcg_temp_new_i32(); 1458 fd = tcg_temp_new_i32(); 1459 fpst = fpstatus_ptr(FPST_FPCR_F16); 1460 1461 vfp_load_reg16(f0, vn); 1462 vfp_load_reg16(f1, vm); 1463 1464 if (reads_vd) { 1465 vfp_load_reg16(fd, vd); 1466 } 1467 fn(fd, f0, f1, fpst); 1468 vfp_store_reg32(fd, vd); 1469 return true; 1470 } 1471 1472 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, 1473 int vd, int vn, int vm, bool reads_vd) 1474 { 1475 uint32_t delta_m = 0; 1476 uint32_t delta_d = 0; 1477 int veclen = s->vec_len; 1478 TCGv_i64 f0, f1, fd; 1479 TCGv_ptr fpst; 1480 1481 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 1482 return false; 1483 } 1484 1485 /* UNDEF accesses to D16-D31 if they don't exist */ 1486 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) { 1487 return false; 1488 } 1489 1490 if (!dc_isar_feature(aa32_fpshvec, s) && 1491 (veclen != 0 || s->vec_stride != 0)) { 1492 return false; 1493 } 1494 1495 if (!vfp_access_check(s)) { 1496 return true; 1497 } 1498 1499 if (veclen > 0) { 1500 /* Figure out what type of vector operation this is. */ 1501 if (vfp_dreg_is_scalar(vd)) { 1502 /* scalar */ 1503 veclen = 0; 1504 } else { 1505 delta_d = (s->vec_stride >> 1) + 1; 1506 1507 if (vfp_dreg_is_scalar(vm)) { 1508 /* mixed scalar/vector */ 1509 delta_m = 0; 1510 } else { 1511 /* vector */ 1512 delta_m = delta_d; 1513 } 1514 } 1515 } 1516 1517 f0 = tcg_temp_new_i64(); 1518 f1 = tcg_temp_new_i64(); 1519 fd = tcg_temp_new_i64(); 1520 fpst = fpstatus_ptr(FPST_FPCR); 1521 1522 vfp_load_reg64(f0, vn); 1523 vfp_load_reg64(f1, vm); 1524 1525 for (;;) { 1526 if (reads_vd) { 1527 vfp_load_reg64(fd, vd); 1528 } 1529 fn(fd, f0, f1, fpst); 1530 vfp_store_reg64(fd, vd); 1531 1532 if (veclen == 0) { 1533 break; 1534 } 1535 /* Set up the operands for the next iteration */ 1536 veclen--; 1537 vd = vfp_advance_dreg(vd, delta_d); 1538 vn = vfp_advance_dreg(vn, delta_d); 1539 vfp_load_reg64(f0, vn); 1540 if (delta_m) { 1541 vm = vfp_advance_dreg(vm, delta_m); 1542 vfp_load_reg64(f1, vm); 1543 } 1544 } 1545 return true; 1546 } 1547 1548 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) 1549 { 1550 uint32_t delta_m = 0; 1551 uint32_t delta_d = 0; 1552 int veclen = s->vec_len; 1553 TCGv_i32 f0, fd; 1554 1555 /* Note that the caller must check the aa32_fpsp_v2 feature. */ 1556 1557 if (!dc_isar_feature(aa32_fpshvec, s) && 1558 (veclen != 0 || s->vec_stride != 0)) { 1559 return false; 1560 } 1561 1562 if (!vfp_access_check(s)) { 1563 return true; 1564 } 1565 1566 if (veclen > 0) { 1567 /* Figure out what type of vector operation this is. */ 1568 if (vfp_sreg_is_scalar(vd)) { 1569 /* scalar */ 1570 veclen = 0; 1571 } else { 1572 delta_d = s->vec_stride + 1; 1573 1574 if (vfp_sreg_is_scalar(vm)) { 1575 /* mixed scalar/vector */ 1576 delta_m = 0; 1577 } else { 1578 /* vector */ 1579 delta_m = delta_d; 1580 } 1581 } 1582 } 1583 1584 f0 = tcg_temp_new_i32(); 1585 fd = tcg_temp_new_i32(); 1586 1587 vfp_load_reg32(f0, vm); 1588 1589 for (;;) { 1590 fn(fd, f0); 1591 vfp_store_reg32(fd, vd); 1592 1593 if (veclen == 0) { 1594 break; 1595 } 1596 1597 if (delta_m == 0) { 1598 /* single source one-many */ 1599 while (veclen--) { 1600 vd = vfp_advance_sreg(vd, delta_d); 1601 vfp_store_reg32(fd, vd); 1602 } 1603 break; 1604 } 1605 1606 /* Set up the operands for the next iteration */ 1607 veclen--; 1608 vd = vfp_advance_sreg(vd, delta_d); 1609 vm = vfp_advance_sreg(vm, delta_m); 1610 vfp_load_reg32(f0, vm); 1611 } 1612 return true; 1613 } 1614 1615 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) 1616 { 1617 /* 1618 * Do a half-precision operation. Functionally this is 1619 * the same as do_vfp_2op_sp(), except: 1620 * - it doesn't need the VFP vector handling (fp16 is a 1621 * v8 feature, and in v8 VFP vectors don't exist) 1622 * - it does the aa32_fp16_arith feature test 1623 */ 1624 TCGv_i32 f0; 1625 1626 /* Note that the caller must check the aa32_fp16_arith feature */ 1627 1628 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1629 return false; 1630 } 1631 1632 if (s->vec_len != 0 || s->vec_stride != 0) { 1633 return false; 1634 } 1635 1636 if (!vfp_access_check(s)) { 1637 return true; 1638 } 1639 1640 f0 = tcg_temp_new_i32(); 1641 vfp_load_reg16(f0, vm); 1642 fn(f0, f0); 1643 vfp_store_reg32(f0, vd); 1644 1645 return true; 1646 } 1647 1648 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) 1649 { 1650 uint32_t delta_m = 0; 1651 uint32_t delta_d = 0; 1652 int veclen = s->vec_len; 1653 TCGv_i64 f0, fd; 1654 1655 /* Note that the caller must check the aa32_fpdp_v2 feature. */ 1656 1657 /* UNDEF accesses to D16-D31 if they don't exist */ 1658 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) { 1659 return false; 1660 } 1661 1662 if (!dc_isar_feature(aa32_fpshvec, s) && 1663 (veclen != 0 || s->vec_stride != 0)) { 1664 return false; 1665 } 1666 1667 if (!vfp_access_check(s)) { 1668 return true; 1669 } 1670 1671 if (veclen > 0) { 1672 /* Figure out what type of vector operation this is. */ 1673 if (vfp_dreg_is_scalar(vd)) { 1674 /* scalar */ 1675 veclen = 0; 1676 } else { 1677 delta_d = (s->vec_stride >> 1) + 1; 1678 1679 if (vfp_dreg_is_scalar(vm)) { 1680 /* mixed scalar/vector */ 1681 delta_m = 0; 1682 } else { 1683 /* vector */ 1684 delta_m = delta_d; 1685 } 1686 } 1687 } 1688 1689 f0 = tcg_temp_new_i64(); 1690 fd = tcg_temp_new_i64(); 1691 1692 vfp_load_reg64(f0, vm); 1693 1694 for (;;) { 1695 fn(fd, f0); 1696 vfp_store_reg64(fd, vd); 1697 1698 if (veclen == 0) { 1699 break; 1700 } 1701 1702 if (delta_m == 0) { 1703 /* single source one-many */ 1704 while (veclen--) { 1705 vd = vfp_advance_dreg(vd, delta_d); 1706 vfp_store_reg64(fd, vd); 1707 } 1708 break; 1709 } 1710 1711 /* Set up the operands for the next iteration */ 1712 veclen--; 1713 vd = vfp_advance_dreg(vd, delta_d); 1714 vd = vfp_advance_dreg(vm, delta_m); 1715 vfp_load_reg64(f0, vm); 1716 } 1717 return true; 1718 } 1719 1720 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1721 { 1722 /* Note that order of inputs to the add matters for NaNs */ 1723 TCGv_i32 tmp = tcg_temp_new_i32(); 1724 1725 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1726 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1727 } 1728 1729 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a) 1730 { 1731 return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true); 1732 } 1733 1734 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1735 { 1736 /* Note that order of inputs to the add matters for NaNs */ 1737 TCGv_i32 tmp = tcg_temp_new_i32(); 1738 1739 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1740 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1741 } 1742 1743 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a) 1744 { 1745 return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true); 1746 } 1747 1748 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1749 { 1750 /* Note that order of inputs to the add matters for NaNs */ 1751 TCGv_i64 tmp = tcg_temp_new_i64(); 1752 1753 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1754 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1755 } 1756 1757 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a) 1758 { 1759 return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true); 1760 } 1761 1762 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1763 { 1764 /* 1765 * VMLS: vd = vd + -(vn * vm) 1766 * Note that order of inputs to the add matters for NaNs. 1767 */ 1768 TCGv_i32 tmp = tcg_temp_new_i32(); 1769 1770 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1771 gen_vfp_negh(tmp, tmp); 1772 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1773 } 1774 1775 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a) 1776 { 1777 return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true); 1778 } 1779 1780 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1781 { 1782 /* 1783 * VMLS: vd = vd + -(vn * vm) 1784 * Note that order of inputs to the add matters for NaNs. 1785 */ 1786 TCGv_i32 tmp = tcg_temp_new_i32(); 1787 1788 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1789 gen_vfp_negs(tmp, tmp); 1790 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1791 } 1792 1793 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a) 1794 { 1795 return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true); 1796 } 1797 1798 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1799 { 1800 /* 1801 * VMLS: vd = vd + -(vn * vm) 1802 * Note that order of inputs to the add matters for NaNs. 1803 */ 1804 TCGv_i64 tmp = tcg_temp_new_i64(); 1805 1806 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1807 gen_vfp_negd(tmp, tmp); 1808 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1809 } 1810 1811 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a) 1812 { 1813 return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true); 1814 } 1815 1816 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1817 { 1818 /* 1819 * VNMLS: -fd + (fn * fm) 1820 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1821 * plausible looking simplifications because this will give wrong results 1822 * for NaNs. 1823 */ 1824 TCGv_i32 tmp = tcg_temp_new_i32(); 1825 1826 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1827 gen_vfp_negh(vd, vd); 1828 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1829 } 1830 1831 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a) 1832 { 1833 return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true); 1834 } 1835 1836 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1837 { 1838 /* 1839 * VNMLS: -fd + (fn * fm) 1840 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1841 * plausible looking simplifications because this will give wrong results 1842 * for NaNs. 1843 */ 1844 TCGv_i32 tmp = tcg_temp_new_i32(); 1845 1846 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1847 gen_vfp_negs(vd, vd); 1848 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1849 } 1850 1851 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a) 1852 { 1853 return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true); 1854 } 1855 1856 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1857 { 1858 /* 1859 * VNMLS: -fd + (fn * fm) 1860 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1861 * plausible looking simplifications because this will give wrong results 1862 * for NaNs. 1863 */ 1864 TCGv_i64 tmp = tcg_temp_new_i64(); 1865 1866 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1867 gen_vfp_negd(vd, vd); 1868 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1869 } 1870 1871 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a) 1872 { 1873 return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true); 1874 } 1875 1876 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1877 { 1878 /* VNMLA: -fd + -(fn * fm) */ 1879 TCGv_i32 tmp = tcg_temp_new_i32(); 1880 1881 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1882 gen_vfp_negh(tmp, tmp); 1883 gen_vfp_negh(vd, vd); 1884 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1885 } 1886 1887 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a) 1888 { 1889 return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true); 1890 } 1891 1892 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1893 { 1894 /* VNMLA: -fd + -(fn * fm) */ 1895 TCGv_i32 tmp = tcg_temp_new_i32(); 1896 1897 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1898 gen_vfp_negs(tmp, tmp); 1899 gen_vfp_negs(vd, vd); 1900 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1901 } 1902 1903 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a) 1904 { 1905 return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true); 1906 } 1907 1908 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1909 { 1910 /* VNMLA: -fd + (fn * fm) */ 1911 TCGv_i64 tmp = tcg_temp_new_i64(); 1912 1913 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1914 gen_vfp_negd(tmp, tmp); 1915 gen_vfp_negd(vd, vd); 1916 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1917 } 1918 1919 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) 1920 { 1921 return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true); 1922 } 1923 1924 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a) 1925 { 1926 return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false); 1927 } 1928 1929 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a) 1930 { 1931 return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false); 1932 } 1933 1934 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a) 1935 { 1936 return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false); 1937 } 1938 1939 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1940 { 1941 /* VNMUL: -(fn * fm) */ 1942 gen_helper_vfp_mulh(vd, vn, vm, fpst); 1943 gen_vfp_negh(vd, vd); 1944 } 1945 1946 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a) 1947 { 1948 return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false); 1949 } 1950 1951 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1952 { 1953 /* VNMUL: -(fn * fm) */ 1954 gen_helper_vfp_muls(vd, vn, vm, fpst); 1955 gen_vfp_negs(vd, vd); 1956 } 1957 1958 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a) 1959 { 1960 return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false); 1961 } 1962 1963 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1964 { 1965 /* VNMUL: -(fn * fm) */ 1966 gen_helper_vfp_muld(vd, vn, vm, fpst); 1967 gen_vfp_negd(vd, vd); 1968 } 1969 1970 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) 1971 { 1972 return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false); 1973 } 1974 1975 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a) 1976 { 1977 return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false); 1978 } 1979 1980 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a) 1981 { 1982 return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false); 1983 } 1984 1985 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a) 1986 { 1987 return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false); 1988 } 1989 1990 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a) 1991 { 1992 return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false); 1993 } 1994 1995 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a) 1996 { 1997 return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false); 1998 } 1999 2000 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a) 2001 { 2002 return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false); 2003 } 2004 2005 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a) 2006 { 2007 return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false); 2008 } 2009 2010 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a) 2011 { 2012 return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false); 2013 } 2014 2015 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a) 2016 { 2017 return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false); 2018 } 2019 2020 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a) 2021 { 2022 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2023 return false; 2024 } 2025 return do_vfp_3op_hp(s, gen_helper_vfp_minnumh, 2026 a->vd, a->vn, a->vm, false); 2027 } 2028 2029 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a) 2030 { 2031 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2032 return false; 2033 } 2034 return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh, 2035 a->vd, a->vn, a->vm, false); 2036 } 2037 2038 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a) 2039 { 2040 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2041 return false; 2042 } 2043 return do_vfp_3op_sp(s, gen_helper_vfp_minnums, 2044 a->vd, a->vn, a->vm, false); 2045 } 2046 2047 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a) 2048 { 2049 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2050 return false; 2051 } 2052 return do_vfp_3op_sp(s, gen_helper_vfp_maxnums, 2053 a->vd, a->vn, a->vm, false); 2054 } 2055 2056 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a) 2057 { 2058 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2059 return false; 2060 } 2061 return do_vfp_3op_dp(s, gen_helper_vfp_minnumd, 2062 a->vd, a->vn, a->vm, false); 2063 } 2064 2065 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a) 2066 { 2067 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2068 return false; 2069 } 2070 return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd, 2071 a->vd, a->vn, a->vm, false); 2072 } 2073 2074 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) 2075 { 2076 /* 2077 * VFNMA : fd = muladd(-fd, fn, fm) 2078 * VFNMS : fd = muladd(-fd, -fn, fm) 2079 * VFMA : fd = muladd( fd, fn, fm) 2080 * VFMS : fd = muladd( fd, -fn, fm) 2081 * 2082 * These are fused multiply-add, and must be done as one floating 2083 * point operation with no rounding between the multiplication and 2084 * addition steps. NB that doing the negations here as separate 2085 * steps is correct : an input NaN should come out with its sign 2086 * bit flipped if it is a negated-input. 2087 */ 2088 TCGv_ptr fpst; 2089 TCGv_i32 vn, vm, vd; 2090 2091 /* 2092 * Present in VFPv4 only, and only with the FP16 extension. 2093 * Note that we can't rely on the SIMDFMAC check alone, because 2094 * in a Neon-no-VFP core that ID register field will be non-zero. 2095 */ 2096 if (!dc_isar_feature(aa32_fp16_arith, s) || 2097 !dc_isar_feature(aa32_simdfmac, s) || 2098 !dc_isar_feature(aa32_fpsp_v2, s)) { 2099 return false; 2100 } 2101 2102 if (s->vec_len != 0 || s->vec_stride != 0) { 2103 return false; 2104 } 2105 2106 if (!vfp_access_check(s)) { 2107 return true; 2108 } 2109 2110 vn = tcg_temp_new_i32(); 2111 vm = tcg_temp_new_i32(); 2112 vd = tcg_temp_new_i32(); 2113 2114 vfp_load_reg16(vn, a->vn); 2115 vfp_load_reg16(vm, a->vm); 2116 if (neg_n) { 2117 /* VFNMS, VFMS */ 2118 gen_vfp_negh(vn, vn); 2119 } 2120 vfp_load_reg16(vd, a->vd); 2121 if (neg_d) { 2122 /* VFNMA, VFNMS */ 2123 gen_vfp_negh(vd, vd); 2124 } 2125 fpst = fpstatus_ptr(FPST_FPCR_F16); 2126 gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); 2127 vfp_store_reg32(vd, a->vd); 2128 return true; 2129 } 2130 2131 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) 2132 { 2133 /* 2134 * VFNMA : fd = muladd(-fd, fn, fm) 2135 * VFNMS : fd = muladd(-fd, -fn, fm) 2136 * VFMA : fd = muladd( fd, fn, fm) 2137 * VFMS : fd = muladd( fd, -fn, fm) 2138 * 2139 * These are fused multiply-add, and must be done as one floating 2140 * point operation with no rounding between the multiplication and 2141 * addition steps. NB that doing the negations here as separate 2142 * steps is correct : an input NaN should come out with its sign 2143 * bit flipped if it is a negated-input. 2144 */ 2145 TCGv_ptr fpst; 2146 TCGv_i32 vn, vm, vd; 2147 2148 /* 2149 * Present in VFPv4 only. 2150 * Note that we can't rely on the SIMDFMAC check alone, because 2151 * in a Neon-no-VFP core that ID register field will be non-zero. 2152 */ 2153 if (!dc_isar_feature(aa32_simdfmac, s) || 2154 !dc_isar_feature(aa32_fpsp_v2, s)) { 2155 return false; 2156 } 2157 /* 2158 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from 2159 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. 2160 */ 2161 if (s->vec_len != 0 || s->vec_stride != 0) { 2162 return false; 2163 } 2164 2165 if (!vfp_access_check(s)) { 2166 return true; 2167 } 2168 2169 vn = tcg_temp_new_i32(); 2170 vm = tcg_temp_new_i32(); 2171 vd = tcg_temp_new_i32(); 2172 2173 vfp_load_reg32(vn, a->vn); 2174 vfp_load_reg32(vm, a->vm); 2175 if (neg_n) { 2176 /* VFNMS, VFMS */ 2177 gen_vfp_negs(vn, vn); 2178 } 2179 vfp_load_reg32(vd, a->vd); 2180 if (neg_d) { 2181 /* VFNMA, VFNMS */ 2182 gen_vfp_negs(vd, vd); 2183 } 2184 fpst = fpstatus_ptr(FPST_FPCR); 2185 gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); 2186 vfp_store_reg32(vd, a->vd); 2187 return true; 2188 } 2189 2190 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) 2191 { 2192 /* 2193 * VFNMA : fd = muladd(-fd, fn, fm) 2194 * VFNMS : fd = muladd(-fd, -fn, fm) 2195 * VFMA : fd = muladd( fd, fn, fm) 2196 * VFMS : fd = muladd( fd, -fn, fm) 2197 * 2198 * These are fused multiply-add, and must be done as one floating 2199 * point operation with no rounding between the multiplication and 2200 * addition steps. NB that doing the negations here as separate 2201 * steps is correct : an input NaN should come out with its sign 2202 * bit flipped if it is a negated-input. 2203 */ 2204 TCGv_ptr fpst; 2205 TCGv_i64 vn, vm, vd; 2206 2207 /* 2208 * Present in VFPv4 only. 2209 * Note that we can't rely on the SIMDFMAC check alone, because 2210 * in a Neon-no-VFP core that ID register field will be non-zero. 2211 */ 2212 if (!dc_isar_feature(aa32_simdfmac, s) || 2213 !dc_isar_feature(aa32_fpdp_v2, s)) { 2214 return false; 2215 } 2216 /* 2217 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from 2218 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. 2219 */ 2220 if (s->vec_len != 0 || s->vec_stride != 0) { 2221 return false; 2222 } 2223 2224 /* UNDEF accesses to D16-D31 if they don't exist. */ 2225 if (!dc_isar_feature(aa32_simd_r32, s) && 2226 ((a->vd | a->vn | a->vm) & 0x10)) { 2227 return false; 2228 } 2229 2230 if (!vfp_access_check(s)) { 2231 return true; 2232 } 2233 2234 vn = tcg_temp_new_i64(); 2235 vm = tcg_temp_new_i64(); 2236 vd = tcg_temp_new_i64(); 2237 2238 vfp_load_reg64(vn, a->vn); 2239 vfp_load_reg64(vm, a->vm); 2240 if (neg_n) { 2241 /* VFNMS, VFMS */ 2242 gen_vfp_negd(vn, vn); 2243 } 2244 vfp_load_reg64(vd, a->vd); 2245 if (neg_d) { 2246 /* VFNMA, VFNMS */ 2247 gen_vfp_negd(vd, vd); 2248 } 2249 fpst = fpstatus_ptr(FPST_FPCR); 2250 gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); 2251 vfp_store_reg64(vd, a->vd); 2252 return true; 2253 } 2254 2255 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \ 2256 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2257 arg_##INSN##_##PREC *a) \ 2258 { \ 2259 return do_vfm_##PREC(s, a, NEGN, NEGD); \ 2260 } 2261 2262 #define MAKE_VFM_TRANS_FNS(PREC) \ 2263 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \ 2264 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \ 2265 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ 2266 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) 2267 2268 MAKE_VFM_TRANS_FNS(hp) 2269 MAKE_VFM_TRANS_FNS(sp) 2270 MAKE_VFM_TRANS_FNS(dp) 2271 2272 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) 2273 { 2274 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2275 return false; 2276 } 2277 2278 if (s->vec_len != 0 || s->vec_stride != 0) { 2279 return false; 2280 } 2281 2282 if (!vfp_access_check(s)) { 2283 return true; 2284 } 2285 2286 vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd); 2287 return true; 2288 } 2289 2290 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) 2291 { 2292 uint32_t delta_d = 0; 2293 int veclen = s->vec_len; 2294 TCGv_i32 fd; 2295 uint32_t vd; 2296 2297 vd = a->vd; 2298 2299 if (!dc_isar_feature(aa32_fpsp_v3, s)) { 2300 return false; 2301 } 2302 2303 if (!dc_isar_feature(aa32_fpshvec, s) && 2304 (veclen != 0 || s->vec_stride != 0)) { 2305 return false; 2306 } 2307 2308 if (!vfp_access_check(s)) { 2309 return true; 2310 } 2311 2312 if (veclen > 0) { 2313 /* Figure out what type of vector operation this is. */ 2314 if (vfp_sreg_is_scalar(vd)) { 2315 /* scalar */ 2316 veclen = 0; 2317 } else { 2318 delta_d = s->vec_stride + 1; 2319 } 2320 } 2321 2322 fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm)); 2323 2324 for (;;) { 2325 vfp_store_reg32(fd, vd); 2326 2327 if (veclen == 0) { 2328 break; 2329 } 2330 2331 /* Set up the operands for the next iteration */ 2332 veclen--; 2333 vd = vfp_advance_sreg(vd, delta_d); 2334 } 2335 2336 return true; 2337 } 2338 2339 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) 2340 { 2341 uint32_t delta_d = 0; 2342 int veclen = s->vec_len; 2343 TCGv_i64 fd; 2344 uint32_t vd; 2345 2346 vd = a->vd; 2347 2348 if (!dc_isar_feature(aa32_fpdp_v3, s)) { 2349 return false; 2350 } 2351 2352 /* UNDEF accesses to D16-D31 if they don't exist. */ 2353 if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) { 2354 return false; 2355 } 2356 2357 if (!dc_isar_feature(aa32_fpshvec, s) && 2358 (veclen != 0 || s->vec_stride != 0)) { 2359 return false; 2360 } 2361 2362 if (!vfp_access_check(s)) { 2363 return true; 2364 } 2365 2366 if (veclen > 0) { 2367 /* Figure out what type of vector operation this is. */ 2368 if (vfp_dreg_is_scalar(vd)) { 2369 /* scalar */ 2370 veclen = 0; 2371 } else { 2372 delta_d = (s->vec_stride >> 1) + 1; 2373 } 2374 } 2375 2376 fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm)); 2377 2378 for (;;) { 2379 vfp_store_reg64(fd, vd); 2380 2381 if (veclen == 0) { 2382 break; 2383 } 2384 2385 /* Set up the operands for the next iteration */ 2386 veclen--; 2387 vd = vfp_advance_dreg(vd, delta_d); 2388 } 2389 2390 return true; 2391 } 2392 2393 #define DO_VFP_2OP(INSN, PREC, FN, CHECK) \ 2394 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2395 arg_##INSN##_##PREC *a) \ 2396 { \ 2397 if (!dc_isar_feature(CHECK, s)) { \ 2398 return false; \ 2399 } \ 2400 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ 2401 } 2402 2403 #define DO_VFP_VMOV(INSN, PREC, FN) \ 2404 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2405 arg_##INSN##_##PREC *a) \ 2406 { \ 2407 if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \ 2408 !dc_isar_feature(aa32_mve, s)) { \ 2409 return false; \ 2410 } \ 2411 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ 2412 } 2413 2414 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32) 2415 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64) 2416 2417 DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith) 2418 DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2) 2419 DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2) 2420 2421 DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith) 2422 DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2) 2423 DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2) 2424 2425 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) 2426 { 2427 gen_helper_vfp_sqrth(vd, vm, tcg_env); 2428 } 2429 2430 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) 2431 { 2432 gen_helper_vfp_sqrts(vd, vm, tcg_env); 2433 } 2434 2435 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) 2436 { 2437 gen_helper_vfp_sqrtd(vd, vm, tcg_env); 2438 } 2439 2440 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) 2441 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2) 2442 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2) 2443 2444 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) 2445 { 2446 TCGv_i32 vd, vm; 2447 2448 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2449 return false; 2450 } 2451 2452 /* Vm/M bits must be zero for the Z variant */ 2453 if (a->z && a->vm != 0) { 2454 return false; 2455 } 2456 2457 if (!vfp_access_check(s)) { 2458 return true; 2459 } 2460 2461 vd = tcg_temp_new_i32(); 2462 vm = tcg_temp_new_i32(); 2463 2464 vfp_load_reg16(vd, a->vd); 2465 if (a->z) { 2466 tcg_gen_movi_i32(vm, 0); 2467 } else { 2468 vfp_load_reg16(vm, a->vm); 2469 } 2470 2471 if (a->e) { 2472 gen_helper_vfp_cmpeh(vd, vm, tcg_env); 2473 } else { 2474 gen_helper_vfp_cmph(vd, vm, tcg_env); 2475 } 2476 return true; 2477 } 2478 2479 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) 2480 { 2481 TCGv_i32 vd, vm; 2482 2483 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 2484 return false; 2485 } 2486 2487 /* Vm/M bits must be zero for the Z variant */ 2488 if (a->z && a->vm != 0) { 2489 return false; 2490 } 2491 2492 if (!vfp_access_check(s)) { 2493 return true; 2494 } 2495 2496 vd = tcg_temp_new_i32(); 2497 vm = tcg_temp_new_i32(); 2498 2499 vfp_load_reg32(vd, a->vd); 2500 if (a->z) { 2501 tcg_gen_movi_i32(vm, 0); 2502 } else { 2503 vfp_load_reg32(vm, a->vm); 2504 } 2505 2506 if (a->e) { 2507 gen_helper_vfp_cmpes(vd, vm, tcg_env); 2508 } else { 2509 gen_helper_vfp_cmps(vd, vm, tcg_env); 2510 } 2511 return true; 2512 } 2513 2514 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) 2515 { 2516 TCGv_i64 vd, vm; 2517 2518 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2519 return false; 2520 } 2521 2522 /* Vm/M bits must be zero for the Z variant */ 2523 if (a->z && a->vm != 0) { 2524 return false; 2525 } 2526 2527 /* UNDEF accesses to D16-D31 if they don't exist. */ 2528 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2529 return false; 2530 } 2531 2532 if (!vfp_access_check(s)) { 2533 return true; 2534 } 2535 2536 vd = tcg_temp_new_i64(); 2537 vm = tcg_temp_new_i64(); 2538 2539 vfp_load_reg64(vd, a->vd); 2540 if (a->z) { 2541 tcg_gen_movi_i64(vm, 0); 2542 } else { 2543 vfp_load_reg64(vm, a->vm); 2544 } 2545 2546 if (a->e) { 2547 gen_helper_vfp_cmped(vd, vm, tcg_env); 2548 } else { 2549 gen_helper_vfp_cmpd(vd, vm, tcg_env); 2550 } 2551 return true; 2552 } 2553 2554 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) 2555 { 2556 TCGv_ptr fpst; 2557 TCGv_i32 ahp_mode; 2558 TCGv_i32 tmp; 2559 2560 if (!dc_isar_feature(aa32_fp16_spconv, s)) { 2561 return false; 2562 } 2563 2564 if (!vfp_access_check(s)) { 2565 return true; 2566 } 2567 2568 fpst = fpstatus_ptr(FPST_FPCR); 2569 ahp_mode = get_ahp_flag(); 2570 tmp = tcg_temp_new_i32(); 2571 /* The T bit tells us if we want the low or high 16 bits of Vm */ 2572 tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t)); 2573 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode); 2574 vfp_store_reg32(tmp, a->vd); 2575 return true; 2576 } 2577 2578 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) 2579 { 2580 TCGv_ptr fpst; 2581 TCGv_i32 ahp_mode; 2582 TCGv_i32 tmp; 2583 TCGv_i64 vd; 2584 2585 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2586 return false; 2587 } 2588 2589 if (!dc_isar_feature(aa32_fp16_dpconv, s)) { 2590 return false; 2591 } 2592 2593 /* UNDEF accesses to D16-D31 if they don't exist. */ 2594 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 2595 return false; 2596 } 2597 2598 if (!vfp_access_check(s)) { 2599 return true; 2600 } 2601 2602 fpst = fpstatus_ptr(FPST_FPCR); 2603 ahp_mode = get_ahp_flag(); 2604 tmp = tcg_temp_new_i32(); 2605 /* The T bit tells us if we want the low or high 16 bits of Vm */ 2606 tcg_gen_ld16u_i32(tmp, tcg_env, vfp_f16_offset(a->vm, a->t)); 2607 vd = tcg_temp_new_i64(); 2608 gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode); 2609 vfp_store_reg64(vd, a->vd); 2610 return true; 2611 } 2612 2613 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) 2614 { 2615 TCGv_ptr fpst; 2616 TCGv_i32 tmp; 2617 2618 if (!dc_isar_feature(aa32_bf16, s)) { 2619 return false; 2620 } 2621 2622 if (!vfp_access_check(s)) { 2623 return true; 2624 } 2625 2626 fpst = fpstatus_ptr(FPST_FPCR); 2627 tmp = tcg_temp_new_i32(); 2628 2629 vfp_load_reg32(tmp, a->vm); 2630 gen_helper_bfcvt(tmp, tmp, fpst); 2631 tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t)); 2632 return true; 2633 } 2634 2635 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) 2636 { 2637 TCGv_ptr fpst; 2638 TCGv_i32 ahp_mode; 2639 TCGv_i32 tmp; 2640 2641 if (!dc_isar_feature(aa32_fp16_spconv, s)) { 2642 return false; 2643 } 2644 2645 if (!vfp_access_check(s)) { 2646 return true; 2647 } 2648 2649 fpst = fpstatus_ptr(FPST_FPCR); 2650 ahp_mode = get_ahp_flag(); 2651 tmp = tcg_temp_new_i32(); 2652 2653 vfp_load_reg32(tmp, a->vm); 2654 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode); 2655 tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t)); 2656 return true; 2657 } 2658 2659 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) 2660 { 2661 TCGv_ptr fpst; 2662 TCGv_i32 ahp_mode; 2663 TCGv_i32 tmp; 2664 TCGv_i64 vm; 2665 2666 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2667 return false; 2668 } 2669 2670 if (!dc_isar_feature(aa32_fp16_dpconv, s)) { 2671 return false; 2672 } 2673 2674 /* UNDEF accesses to D16-D31 if they don't exist. */ 2675 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 2676 return false; 2677 } 2678 2679 if (!vfp_access_check(s)) { 2680 return true; 2681 } 2682 2683 fpst = fpstatus_ptr(FPST_FPCR); 2684 ahp_mode = get_ahp_flag(); 2685 tmp = tcg_temp_new_i32(); 2686 vm = tcg_temp_new_i64(); 2687 2688 vfp_load_reg64(vm, a->vm); 2689 gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode); 2690 tcg_gen_st16_i32(tmp, tcg_env, vfp_f16_offset(a->vd, a->t)); 2691 return true; 2692 } 2693 2694 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) 2695 { 2696 TCGv_ptr fpst; 2697 TCGv_i32 tmp; 2698 2699 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2700 return false; 2701 } 2702 2703 if (!vfp_access_check(s)) { 2704 return true; 2705 } 2706 2707 tmp = tcg_temp_new_i32(); 2708 vfp_load_reg16(tmp, a->vm); 2709 fpst = fpstatus_ptr(FPST_FPCR_F16); 2710 gen_helper_rinth(tmp, tmp, fpst); 2711 vfp_store_reg32(tmp, a->vd); 2712 return true; 2713 } 2714 2715 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) 2716 { 2717 TCGv_ptr fpst; 2718 TCGv_i32 tmp; 2719 2720 if (!dc_isar_feature(aa32_vrint, s)) { 2721 return false; 2722 } 2723 2724 if (!vfp_access_check(s)) { 2725 return true; 2726 } 2727 2728 tmp = tcg_temp_new_i32(); 2729 vfp_load_reg32(tmp, a->vm); 2730 fpst = fpstatus_ptr(FPST_FPCR); 2731 gen_helper_rints(tmp, tmp, fpst); 2732 vfp_store_reg32(tmp, a->vd); 2733 return true; 2734 } 2735 2736 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) 2737 { 2738 TCGv_ptr fpst; 2739 TCGv_i64 tmp; 2740 2741 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2742 return false; 2743 } 2744 2745 if (!dc_isar_feature(aa32_vrint, s)) { 2746 return false; 2747 } 2748 2749 /* UNDEF accesses to D16-D31 if they don't exist. */ 2750 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2751 return false; 2752 } 2753 2754 if (!vfp_access_check(s)) { 2755 return true; 2756 } 2757 2758 tmp = tcg_temp_new_i64(); 2759 vfp_load_reg64(tmp, a->vm); 2760 fpst = fpstatus_ptr(FPST_FPCR); 2761 gen_helper_rintd(tmp, tmp, fpst); 2762 vfp_store_reg64(tmp, a->vd); 2763 return true; 2764 } 2765 2766 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) 2767 { 2768 TCGv_ptr fpst; 2769 TCGv_i32 tmp; 2770 TCGv_i32 tcg_rmode; 2771 2772 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2773 return false; 2774 } 2775 2776 if (!vfp_access_check(s)) { 2777 return true; 2778 } 2779 2780 tmp = tcg_temp_new_i32(); 2781 vfp_load_reg16(tmp, a->vm); 2782 fpst = fpstatus_ptr(FPST_FPCR_F16); 2783 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); 2784 gen_helper_rinth(tmp, tmp, fpst); 2785 gen_restore_rmode(tcg_rmode, fpst); 2786 vfp_store_reg32(tmp, a->vd); 2787 return true; 2788 } 2789 2790 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) 2791 { 2792 TCGv_ptr fpst; 2793 TCGv_i32 tmp; 2794 TCGv_i32 tcg_rmode; 2795 2796 if (!dc_isar_feature(aa32_vrint, s)) { 2797 return false; 2798 } 2799 2800 if (!vfp_access_check(s)) { 2801 return true; 2802 } 2803 2804 tmp = tcg_temp_new_i32(); 2805 vfp_load_reg32(tmp, a->vm); 2806 fpst = fpstatus_ptr(FPST_FPCR); 2807 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); 2808 gen_helper_rints(tmp, tmp, fpst); 2809 gen_restore_rmode(tcg_rmode, fpst); 2810 vfp_store_reg32(tmp, a->vd); 2811 return true; 2812 } 2813 2814 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) 2815 { 2816 TCGv_ptr fpst; 2817 TCGv_i64 tmp; 2818 TCGv_i32 tcg_rmode; 2819 2820 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2821 return false; 2822 } 2823 2824 if (!dc_isar_feature(aa32_vrint, s)) { 2825 return false; 2826 } 2827 2828 /* UNDEF accesses to D16-D31 if they don't exist. */ 2829 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2830 return false; 2831 } 2832 2833 if (!vfp_access_check(s)) { 2834 return true; 2835 } 2836 2837 tmp = tcg_temp_new_i64(); 2838 vfp_load_reg64(tmp, a->vm); 2839 fpst = fpstatus_ptr(FPST_FPCR); 2840 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); 2841 gen_helper_rintd(tmp, tmp, fpst); 2842 gen_restore_rmode(tcg_rmode, fpst); 2843 vfp_store_reg64(tmp, a->vd); 2844 return true; 2845 } 2846 2847 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) 2848 { 2849 TCGv_ptr fpst; 2850 TCGv_i32 tmp; 2851 2852 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2853 return false; 2854 } 2855 2856 if (!vfp_access_check(s)) { 2857 return true; 2858 } 2859 2860 tmp = tcg_temp_new_i32(); 2861 vfp_load_reg16(tmp, a->vm); 2862 fpst = fpstatus_ptr(FPST_FPCR_F16); 2863 gen_helper_rinth_exact(tmp, tmp, fpst); 2864 vfp_store_reg32(tmp, a->vd); 2865 return true; 2866 } 2867 2868 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) 2869 { 2870 TCGv_ptr fpst; 2871 TCGv_i32 tmp; 2872 2873 if (!dc_isar_feature(aa32_vrint, s)) { 2874 return false; 2875 } 2876 2877 if (!vfp_access_check(s)) { 2878 return true; 2879 } 2880 2881 tmp = tcg_temp_new_i32(); 2882 vfp_load_reg32(tmp, a->vm); 2883 fpst = fpstatus_ptr(FPST_FPCR); 2884 gen_helper_rints_exact(tmp, tmp, fpst); 2885 vfp_store_reg32(tmp, a->vd); 2886 return true; 2887 } 2888 2889 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) 2890 { 2891 TCGv_ptr fpst; 2892 TCGv_i64 tmp; 2893 2894 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2895 return false; 2896 } 2897 2898 if (!dc_isar_feature(aa32_vrint, s)) { 2899 return false; 2900 } 2901 2902 /* UNDEF accesses to D16-D31 if they don't exist. */ 2903 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2904 return false; 2905 } 2906 2907 if (!vfp_access_check(s)) { 2908 return true; 2909 } 2910 2911 tmp = tcg_temp_new_i64(); 2912 vfp_load_reg64(tmp, a->vm); 2913 fpst = fpstatus_ptr(FPST_FPCR); 2914 gen_helper_rintd_exact(tmp, tmp, fpst); 2915 vfp_store_reg64(tmp, a->vd); 2916 return true; 2917 } 2918 2919 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) 2920 { 2921 TCGv_i64 vd; 2922 TCGv_i32 vm; 2923 2924 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2925 return false; 2926 } 2927 2928 /* UNDEF accesses to D16-D31 if they don't exist. */ 2929 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 2930 return false; 2931 } 2932 2933 if (!vfp_access_check(s)) { 2934 return true; 2935 } 2936 2937 vm = tcg_temp_new_i32(); 2938 vd = tcg_temp_new_i64(); 2939 vfp_load_reg32(vm, a->vm); 2940 gen_helper_vfp_fcvtds(vd, vm, tcg_env); 2941 vfp_store_reg64(vd, a->vd); 2942 return true; 2943 } 2944 2945 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) 2946 { 2947 TCGv_i64 vm; 2948 TCGv_i32 vd; 2949 2950 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2951 return false; 2952 } 2953 2954 /* UNDEF accesses to D16-D31 if they don't exist. */ 2955 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 2956 return false; 2957 } 2958 2959 if (!vfp_access_check(s)) { 2960 return true; 2961 } 2962 2963 vd = tcg_temp_new_i32(); 2964 vm = tcg_temp_new_i64(); 2965 vfp_load_reg64(vm, a->vm); 2966 gen_helper_vfp_fcvtsd(vd, vm, tcg_env); 2967 vfp_store_reg32(vd, a->vd); 2968 return true; 2969 } 2970 2971 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) 2972 { 2973 TCGv_i32 vm; 2974 TCGv_ptr fpst; 2975 2976 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2977 return false; 2978 } 2979 2980 if (!vfp_access_check(s)) { 2981 return true; 2982 } 2983 2984 vm = tcg_temp_new_i32(); 2985 vfp_load_reg32(vm, a->vm); 2986 fpst = fpstatus_ptr(FPST_FPCR_F16); 2987 if (a->s) { 2988 /* i32 -> f16 */ 2989 gen_helper_vfp_sitoh(vm, vm, fpst); 2990 } else { 2991 /* u32 -> f16 */ 2992 gen_helper_vfp_uitoh(vm, vm, fpst); 2993 } 2994 vfp_store_reg32(vm, a->vd); 2995 return true; 2996 } 2997 2998 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) 2999 { 3000 TCGv_i32 vm; 3001 TCGv_ptr fpst; 3002 3003 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 3004 return false; 3005 } 3006 3007 if (!vfp_access_check(s)) { 3008 return true; 3009 } 3010 3011 vm = tcg_temp_new_i32(); 3012 vfp_load_reg32(vm, a->vm); 3013 fpst = fpstatus_ptr(FPST_FPCR); 3014 if (a->s) { 3015 /* i32 -> f32 */ 3016 gen_helper_vfp_sitos(vm, vm, fpst); 3017 } else { 3018 /* u32 -> f32 */ 3019 gen_helper_vfp_uitos(vm, vm, fpst); 3020 } 3021 vfp_store_reg32(vm, a->vd); 3022 return true; 3023 } 3024 3025 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) 3026 { 3027 TCGv_i32 vm; 3028 TCGv_i64 vd; 3029 TCGv_ptr fpst; 3030 3031 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3032 return false; 3033 } 3034 3035 /* UNDEF accesses to D16-D31 if they don't exist. */ 3036 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3037 return false; 3038 } 3039 3040 if (!vfp_access_check(s)) { 3041 return true; 3042 } 3043 3044 vm = tcg_temp_new_i32(); 3045 vd = tcg_temp_new_i64(); 3046 vfp_load_reg32(vm, a->vm); 3047 fpst = fpstatus_ptr(FPST_FPCR); 3048 if (a->s) { 3049 /* i32 -> f64 */ 3050 gen_helper_vfp_sitod(vd, vm, fpst); 3051 } else { 3052 /* u32 -> f64 */ 3053 gen_helper_vfp_uitod(vd, vm, fpst); 3054 } 3055 vfp_store_reg64(vd, a->vd); 3056 return true; 3057 } 3058 3059 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) 3060 { 3061 TCGv_i32 vd; 3062 TCGv_i64 vm; 3063 3064 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3065 return false; 3066 } 3067 3068 if (!dc_isar_feature(aa32_jscvt, s)) { 3069 return false; 3070 } 3071 3072 /* UNDEF accesses to D16-D31 if they don't exist. */ 3073 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3074 return false; 3075 } 3076 3077 if (!vfp_access_check(s)) { 3078 return true; 3079 } 3080 3081 vm = tcg_temp_new_i64(); 3082 vd = tcg_temp_new_i32(); 3083 vfp_load_reg64(vm, a->vm); 3084 gen_helper_vjcvt(vd, vm, tcg_env); 3085 vfp_store_reg32(vd, a->vd); 3086 return true; 3087 } 3088 3089 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) 3090 { 3091 TCGv_i32 vd, shift; 3092 TCGv_ptr fpst; 3093 int frac_bits; 3094 3095 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3096 return false; 3097 } 3098 3099 if (!vfp_access_check(s)) { 3100 return true; 3101 } 3102 3103 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3104 3105 vd = tcg_temp_new_i32(); 3106 vfp_load_reg32(vd, a->vd); 3107 3108 fpst = fpstatus_ptr(FPST_FPCR_F16); 3109 shift = tcg_constant_i32(frac_bits); 3110 3111 /* Switch on op:U:sx bits */ 3112 switch (a->opc) { 3113 case 0: 3114 gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst); 3115 break; 3116 case 1: 3117 gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst); 3118 break; 3119 case 2: 3120 gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst); 3121 break; 3122 case 3: 3123 gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst); 3124 break; 3125 case 4: 3126 gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst); 3127 break; 3128 case 5: 3129 gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst); 3130 break; 3131 case 6: 3132 gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst); 3133 break; 3134 case 7: 3135 gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst); 3136 break; 3137 default: 3138 g_assert_not_reached(); 3139 } 3140 3141 vfp_store_reg32(vd, a->vd); 3142 return true; 3143 } 3144 3145 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) 3146 { 3147 TCGv_i32 vd, shift; 3148 TCGv_ptr fpst; 3149 int frac_bits; 3150 3151 if (!dc_isar_feature(aa32_fpsp_v3, s)) { 3152 return false; 3153 } 3154 3155 if (!vfp_access_check(s)) { 3156 return true; 3157 } 3158 3159 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3160 3161 vd = tcg_temp_new_i32(); 3162 vfp_load_reg32(vd, a->vd); 3163 3164 fpst = fpstatus_ptr(FPST_FPCR); 3165 shift = tcg_constant_i32(frac_bits); 3166 3167 /* Switch on op:U:sx bits */ 3168 switch (a->opc) { 3169 case 0: 3170 gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst); 3171 break; 3172 case 1: 3173 gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst); 3174 break; 3175 case 2: 3176 gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst); 3177 break; 3178 case 3: 3179 gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst); 3180 break; 3181 case 4: 3182 gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst); 3183 break; 3184 case 5: 3185 gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst); 3186 break; 3187 case 6: 3188 gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst); 3189 break; 3190 case 7: 3191 gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst); 3192 break; 3193 default: 3194 g_assert_not_reached(); 3195 } 3196 3197 vfp_store_reg32(vd, a->vd); 3198 return true; 3199 } 3200 3201 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) 3202 { 3203 TCGv_i64 vd; 3204 TCGv_i32 shift; 3205 TCGv_ptr fpst; 3206 int frac_bits; 3207 3208 if (!dc_isar_feature(aa32_fpdp_v3, s)) { 3209 return false; 3210 } 3211 3212 /* UNDEF accesses to D16-D31 if they don't exist. */ 3213 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3214 return false; 3215 } 3216 3217 if (!vfp_access_check(s)) { 3218 return true; 3219 } 3220 3221 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3222 3223 vd = tcg_temp_new_i64(); 3224 vfp_load_reg64(vd, a->vd); 3225 3226 fpst = fpstatus_ptr(FPST_FPCR); 3227 shift = tcg_constant_i32(frac_bits); 3228 3229 /* Switch on op:U:sx bits */ 3230 switch (a->opc) { 3231 case 0: 3232 gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst); 3233 break; 3234 case 1: 3235 gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst); 3236 break; 3237 case 2: 3238 gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst); 3239 break; 3240 case 3: 3241 gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst); 3242 break; 3243 case 4: 3244 gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst); 3245 break; 3246 case 5: 3247 gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst); 3248 break; 3249 case 6: 3250 gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst); 3251 break; 3252 case 7: 3253 gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst); 3254 break; 3255 default: 3256 g_assert_not_reached(); 3257 } 3258 3259 vfp_store_reg64(vd, a->vd); 3260 return true; 3261 } 3262 3263 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) 3264 { 3265 TCGv_i32 vm; 3266 TCGv_ptr fpst; 3267 3268 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3269 return false; 3270 } 3271 3272 if (!vfp_access_check(s)) { 3273 return true; 3274 } 3275 3276 fpst = fpstatus_ptr(FPST_FPCR_F16); 3277 vm = tcg_temp_new_i32(); 3278 vfp_load_reg16(vm, a->vm); 3279 3280 if (a->s) { 3281 if (a->rz) { 3282 gen_helper_vfp_tosizh(vm, vm, fpst); 3283 } else { 3284 gen_helper_vfp_tosih(vm, vm, fpst); 3285 } 3286 } else { 3287 if (a->rz) { 3288 gen_helper_vfp_touizh(vm, vm, fpst); 3289 } else { 3290 gen_helper_vfp_touih(vm, vm, fpst); 3291 } 3292 } 3293 vfp_store_reg32(vm, a->vd); 3294 return true; 3295 } 3296 3297 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) 3298 { 3299 TCGv_i32 vm; 3300 TCGv_ptr fpst; 3301 3302 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 3303 return false; 3304 } 3305 3306 if (!vfp_access_check(s)) { 3307 return true; 3308 } 3309 3310 fpst = fpstatus_ptr(FPST_FPCR); 3311 vm = tcg_temp_new_i32(); 3312 vfp_load_reg32(vm, a->vm); 3313 3314 if (a->s) { 3315 if (a->rz) { 3316 gen_helper_vfp_tosizs(vm, vm, fpst); 3317 } else { 3318 gen_helper_vfp_tosis(vm, vm, fpst); 3319 } 3320 } else { 3321 if (a->rz) { 3322 gen_helper_vfp_touizs(vm, vm, fpst); 3323 } else { 3324 gen_helper_vfp_touis(vm, vm, fpst); 3325 } 3326 } 3327 vfp_store_reg32(vm, a->vd); 3328 return true; 3329 } 3330 3331 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) 3332 { 3333 TCGv_i32 vd; 3334 TCGv_i64 vm; 3335 TCGv_ptr fpst; 3336 3337 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3338 return false; 3339 } 3340 3341 /* UNDEF accesses to D16-D31 if they don't exist. */ 3342 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3343 return false; 3344 } 3345 3346 if (!vfp_access_check(s)) { 3347 return true; 3348 } 3349 3350 fpst = fpstatus_ptr(FPST_FPCR); 3351 vm = tcg_temp_new_i64(); 3352 vd = tcg_temp_new_i32(); 3353 vfp_load_reg64(vm, a->vm); 3354 3355 if (a->s) { 3356 if (a->rz) { 3357 gen_helper_vfp_tosizd(vd, vm, fpst); 3358 } else { 3359 gen_helper_vfp_tosid(vd, vm, fpst); 3360 } 3361 } else { 3362 if (a->rz) { 3363 gen_helper_vfp_touizd(vd, vm, fpst); 3364 } else { 3365 gen_helper_vfp_touid(vd, vm, fpst); 3366 } 3367 } 3368 vfp_store_reg32(vd, a->vd); 3369 return true; 3370 } 3371 3372 static bool trans_VINS(DisasContext *s, arg_VINS *a) 3373 { 3374 TCGv_i32 rd, rm; 3375 3376 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3377 return false; 3378 } 3379 3380 if (s->vec_len != 0 || s->vec_stride != 0) { 3381 return false; 3382 } 3383 3384 if (!vfp_access_check(s)) { 3385 return true; 3386 } 3387 3388 /* Insert low half of Vm into high half of Vd */ 3389 rm = tcg_temp_new_i32(); 3390 rd = tcg_temp_new_i32(); 3391 vfp_load_reg16(rm, a->vm); 3392 vfp_load_reg16(rd, a->vd); 3393 tcg_gen_deposit_i32(rd, rd, rm, 16, 16); 3394 vfp_store_reg32(rd, a->vd); 3395 return true; 3396 } 3397 3398 static bool trans_VMOVX(DisasContext *s, arg_VINS *a) 3399 { 3400 TCGv_i32 rm; 3401 3402 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3403 return false; 3404 } 3405 3406 if (s->vec_len != 0 || s->vec_stride != 0) { 3407 return false; 3408 } 3409 3410 if (!vfp_access_check(s)) { 3411 return true; 3412 } 3413 3414 /* Set Vd to high half of Vm */ 3415 rm = tcg_temp_new_i32(); 3416 vfp_load_reg32(rm, a->vm); 3417 tcg_gen_shri_i32(rm, rm, 16); 3418 vfp_store_reg32(rm, a->vd); 3419 return true; 3420 } 3421