1 /* 2 * ARM translation: AArch32 VFP instructions 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * Copyright (c) 2005-2007 CodeSourcery 6 * Copyright (c) 2007 OpenedHand, Ltd. 7 * Copyright (c) 2019 Linaro, Ltd. 8 * 9 * This library is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 2.1 of the License, or (at your option) any later version. 13 * 14 * This library is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "qemu/osdep.h" 24 #include "tcg/tcg-op.h" 25 #include "tcg/tcg-op-gvec.h" 26 #include "exec/exec-all.h" 27 #include "exec/gen-icount.h" 28 #include "translate.h" 29 #include "translate-a32.h" 30 31 /* Include the generated VFP decoder */ 32 #include "decode-vfp.c.inc" 33 #include "decode-vfp-uncond.c.inc" 34 35 static inline void vfp_load_reg64(TCGv_i64 var, int reg) 36 { 37 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(true, reg)); 38 } 39 40 static inline void vfp_store_reg64(TCGv_i64 var, int reg) 41 { 42 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(true, reg)); 43 } 44 45 static inline void vfp_load_reg32(TCGv_i32 var, int reg) 46 { 47 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg)); 48 } 49 50 static inline void vfp_store_reg32(TCGv_i32 var, int reg) 51 { 52 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg)); 53 } 54 55 /* 56 * The imm8 encodes the sign bit, enough bits to represent an exponent in 57 * the range 01....1xx to 10....0xx, and the most significant 4 bits of 58 * the mantissa; see VFPExpandImm() in the v8 ARM ARM. 59 */ 60 uint64_t vfp_expand_imm(int size, uint8_t imm8) 61 { 62 uint64_t imm; 63 64 switch (size) { 65 case MO_64: 66 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 67 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) | 68 extract32(imm8, 0, 6); 69 imm <<= 48; 70 break; 71 case MO_32: 72 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 73 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) | 74 (extract32(imm8, 0, 6) << 3); 75 imm <<= 16; 76 break; 77 case MO_16: 78 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) | 79 (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) | 80 (extract32(imm8, 0, 6) << 6); 81 break; 82 default: 83 g_assert_not_reached(); 84 } 85 return imm; 86 } 87 88 /* 89 * Return the offset of a 16-bit half of the specified VFP single-precision 90 * register. If top is true, returns the top 16 bits; otherwise the bottom 91 * 16 bits. 92 */ 93 static inline long vfp_f16_offset(unsigned reg, bool top) 94 { 95 long offs = vfp_reg_offset(false, reg); 96 #if HOST_BIG_ENDIAN 97 if (!top) { 98 offs += 2; 99 } 100 #else 101 if (top) { 102 offs += 2; 103 } 104 #endif 105 return offs; 106 } 107 108 /* 109 * Generate code for M-profile lazy FP state preservation if needed; 110 * this corresponds to the pseudocode PreserveFPState() function. 111 */ 112 static void gen_preserve_fp_state(DisasContext *s, bool skip_context_update) 113 { 114 if (s->v7m_lspact) { 115 /* 116 * Lazy state saving affects external memory and also the NVIC, 117 * so we must mark it as an IO operation for icount (and cause 118 * this to be the last insn in the TB). 119 */ 120 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { 121 s->base.is_jmp = DISAS_UPDATE_EXIT; 122 gen_io_start(); 123 } 124 gen_helper_v7m_preserve_fp_state(cpu_env); 125 /* 126 * If the preserve_fp_state helper doesn't throw an exception 127 * then it will clear LSPACT; we don't need to repeat this for 128 * any further FP insns in this TB. 129 */ 130 s->v7m_lspact = false; 131 /* 132 * The helper might have zeroed VPR, so we do not know the 133 * correct value for the MVE_NO_PRED TB flag any more. 134 * If we're about to create a new fp context then that 135 * will precisely determine the MVE_NO_PRED value (see 136 * gen_update_fp_context()). Otherwise, we must: 137 * - set s->mve_no_pred to false, so this instruction 138 * is generated to use helper functions 139 * - end the TB now, without chaining to the next TB 140 */ 141 if (skip_context_update || !s->v7m_new_fp_ctxt_needed) { 142 s->mve_no_pred = false; 143 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 144 } 145 } 146 } 147 148 /* 149 * Generate code for M-profile FP context handling: update the 150 * ownership of the FP context, and create a new context if 151 * necessary. This corresponds to the parts of the pseudocode 152 * ExecuteFPCheck() after the inital PreserveFPState() call. 153 */ 154 static void gen_update_fp_context(DisasContext *s) 155 { 156 /* Update ownership of FP context: set FPCCR.S to match current state */ 157 if (s->v8m_fpccr_s_wrong) { 158 TCGv_i32 tmp; 159 160 tmp = load_cpu_field(v7m.fpccr[M_REG_S]); 161 if (s->v8m_secure) { 162 tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK); 163 } else { 164 tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK); 165 } 166 store_cpu_field(tmp, v7m.fpccr[M_REG_S]); 167 /* Don't need to do this for any further FP insns in this TB */ 168 s->v8m_fpccr_s_wrong = false; 169 } 170 171 if (s->v7m_new_fp_ctxt_needed) { 172 /* 173 * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA, 174 * the FPSCR, and VPR. 175 */ 176 TCGv_i32 control, fpscr; 177 uint32_t bits = R_V7M_CONTROL_FPCA_MASK; 178 179 fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]); 180 gen_helper_vfp_set_fpscr(cpu_env, fpscr); 181 if (dc_isar_feature(aa32_mve, s)) { 182 store_cpu_field(tcg_constant_i32(0), v7m.vpr); 183 } 184 /* 185 * We just updated the FPSCR and VPR. Some of this state is cached 186 * in the MVE_NO_PRED TB flag. We want to avoid having to end the 187 * TB here, which means we need the new value of the MVE_NO_PRED 188 * flag to be exactly known here and the same for all executions. 189 * Luckily FPDSCR.LTPSIZE is always constant 4 and the VPR is 190 * always set to 0, so the new MVE_NO_PRED flag is always 1 191 * if and only if we have MVE. 192 * 193 * (The other FPSCR state cached in TB flags is VECLEN and VECSTRIDE, 194 * but those do not exist for M-profile, so are not relevant here.) 195 */ 196 s->mve_no_pred = dc_isar_feature(aa32_mve, s); 197 198 if (s->v8m_secure) { 199 bits |= R_V7M_CONTROL_SFPA_MASK; 200 } 201 control = load_cpu_field(v7m.control[M_REG_S]); 202 tcg_gen_ori_i32(control, control, bits); 203 store_cpu_field(control, v7m.control[M_REG_S]); 204 /* Don't need to do this for any further FP insns in this TB */ 205 s->v7m_new_fp_ctxt_needed = false; 206 } 207 } 208 209 /* 210 * Check that VFP access is enabled, A-profile specific version. 211 * 212 * If VFP is enabled, return true. If not, emit code to generate an 213 * appropriate exception and return false. 214 * The ignore_vfp_enabled argument specifies that we should ignore 215 * whether VFP is enabled via FPEXC.EN: this should be true for FMXR/FMRX 216 * accesses to FPSID, FPEXC, MVFR0, MVFR1, MVFR2, and false for all other insns. 217 */ 218 static bool vfp_access_check_a(DisasContext *s, bool ignore_vfp_enabled) 219 { 220 if (s->fp_excp_el) { 221 /* 222 * The full syndrome is only used for HSR when HCPTR traps: 223 * For v8, when TA==0, coproc is RES0. 224 * For v7, any use of a Floating-point instruction or access 225 * to a Floating-point Extension register that is trapped to 226 * Hyp mode because of a trap configured in the HCPTR sets 227 * this field to 0xA. 228 */ 229 int coproc = arm_dc_feature(s, ARM_FEATURE_V8) ? 0 : 0xa; 230 uint32_t syn = syn_fp_access_trap(1, 0xe, false, coproc); 231 232 gen_exception_insn_el(s, 0, EXCP_UDEF, syn, s->fp_excp_el); 233 return false; 234 } 235 236 /* 237 * Note that rebuild_hflags_a32 has already accounted for being in EL0 238 * and the higher EL in A64 mode, etc. Unlike A64 mode, there do not 239 * appear to be any insns which touch VFP which are allowed. 240 */ 241 if (s->sme_trap_nonstreaming) { 242 gen_exception_insn(s, 0, EXCP_UDEF, 243 syn_smetrap(SME_ET_Streaming, 244 curr_insn_len(s) == 2)); 245 return false; 246 } 247 248 if (!s->vfp_enabled && !ignore_vfp_enabled) { 249 assert(!arm_dc_feature(s, ARM_FEATURE_M)); 250 unallocated_encoding(s); 251 return false; 252 } 253 return true; 254 } 255 256 /* 257 * Check that VFP access is enabled, M-profile specific version. 258 * 259 * If VFP is enabled, do the necessary M-profile lazy-FP handling and then 260 * return true. If not, emit code to generate an appropriate exception and 261 * return false. 262 * skip_context_update is true to skip the "update FP context" part of this. 263 */ 264 bool vfp_access_check_m(DisasContext *s, bool skip_context_update) 265 { 266 if (s->fp_excp_el) { 267 /* 268 * M-profile mostly catches the "FPU disabled" case early, in 269 * disas_m_nocp(), but a few insns (eg LCTP, WLSTP, DLSTP) 270 * which do coprocessor-checks are outside the large ranges of 271 * the encoding space handled by the patterns in m-nocp.decode, 272 * and for them we may need to raise NOCP here. 273 */ 274 gen_exception_insn_el(s, 0, EXCP_NOCP, 275 syn_uncategorized(), s->fp_excp_el); 276 return false; 277 } 278 279 /* Handle M-profile lazy FP state mechanics */ 280 281 /* Trigger lazy-state preservation if necessary */ 282 gen_preserve_fp_state(s, skip_context_update); 283 284 if (!skip_context_update) { 285 /* Update ownership of FP context and create new FP context if needed */ 286 gen_update_fp_context(s); 287 } 288 289 return true; 290 } 291 292 /* 293 * The most usual kind of VFP access check, for everything except 294 * FMXR/FMRX to the always-available special registers. 295 */ 296 bool vfp_access_check(DisasContext *s) 297 { 298 if (arm_dc_feature(s, ARM_FEATURE_M)) { 299 return vfp_access_check_m(s, false); 300 } else { 301 return vfp_access_check_a(s, false); 302 } 303 } 304 305 static bool trans_VSEL(DisasContext *s, arg_VSEL *a) 306 { 307 uint32_t rd, rn, rm; 308 int sz = a->sz; 309 310 if (!dc_isar_feature(aa32_vsel, s)) { 311 return false; 312 } 313 314 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 315 return false; 316 } 317 318 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 319 return false; 320 } 321 322 /* UNDEF accesses to D16-D31 if they don't exist */ 323 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && 324 ((a->vm | a->vn | a->vd) & 0x10)) { 325 return false; 326 } 327 328 rd = a->vd; 329 rn = a->vn; 330 rm = a->vm; 331 332 if (!vfp_access_check(s)) { 333 return true; 334 } 335 336 if (sz == 3) { 337 TCGv_i64 frn, frm, dest; 338 TCGv_i64 tmp, zero, zf, nf, vf; 339 340 zero = tcg_constant_i64(0); 341 342 frn = tcg_temp_new_i64(); 343 frm = tcg_temp_new_i64(); 344 dest = tcg_temp_new_i64(); 345 346 zf = tcg_temp_new_i64(); 347 nf = tcg_temp_new_i64(); 348 vf = tcg_temp_new_i64(); 349 350 tcg_gen_extu_i32_i64(zf, cpu_ZF); 351 tcg_gen_ext_i32_i64(nf, cpu_NF); 352 tcg_gen_ext_i32_i64(vf, cpu_VF); 353 354 vfp_load_reg64(frn, rn); 355 vfp_load_reg64(frm, rm); 356 switch (a->cc) { 357 case 0: /* eq: Z */ 358 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero, frn, frm); 359 break; 360 case 1: /* vs: V */ 361 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero, frn, frm); 362 break; 363 case 2: /* ge: N == V -> N ^ V == 0 */ 364 tmp = tcg_temp_new_i64(); 365 tcg_gen_xor_i64(tmp, vf, nf); 366 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, frn, frm); 367 break; 368 case 3: /* gt: !Z && N == V */ 369 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero, frn, frm); 370 tmp = tcg_temp_new_i64(); 371 tcg_gen_xor_i64(tmp, vf, nf); 372 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero, dest, frm); 373 break; 374 } 375 vfp_store_reg64(dest, rd); 376 } else { 377 TCGv_i32 frn, frm, dest; 378 TCGv_i32 tmp, zero; 379 380 zero = tcg_constant_i32(0); 381 382 frn = tcg_temp_new_i32(); 383 frm = tcg_temp_new_i32(); 384 dest = tcg_temp_new_i32(); 385 vfp_load_reg32(frn, rn); 386 vfp_load_reg32(frm, rm); 387 switch (a->cc) { 388 case 0: /* eq: Z */ 389 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero, frn, frm); 390 break; 391 case 1: /* vs: V */ 392 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero, frn, frm); 393 break; 394 case 2: /* ge: N == V -> N ^ V == 0 */ 395 tmp = tcg_temp_new_i32(); 396 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); 397 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, frn, frm); 398 break; 399 case 3: /* gt: !Z && N == V */ 400 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero, frn, frm); 401 tmp = tcg_temp_new_i32(); 402 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF); 403 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero, dest, frm); 404 break; 405 } 406 /* For fp16 the top half is always zeroes */ 407 if (sz == 1) { 408 tcg_gen_andi_i32(dest, dest, 0xffff); 409 } 410 vfp_store_reg32(dest, rd); 411 } 412 413 return true; 414 } 415 416 /* 417 * Table for converting the most common AArch32 encoding of 418 * rounding mode to arm_fprounding order (which matches the 419 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM(). 420 */ 421 static const uint8_t fp_decode_rm[] = { 422 FPROUNDING_TIEAWAY, 423 FPROUNDING_TIEEVEN, 424 FPROUNDING_POSINF, 425 FPROUNDING_NEGINF, 426 }; 427 428 static bool trans_VRINT(DisasContext *s, arg_VRINT *a) 429 { 430 uint32_t rd, rm; 431 int sz = a->sz; 432 TCGv_ptr fpst; 433 TCGv_i32 tcg_rmode; 434 int rounding = fp_decode_rm[a->rm]; 435 436 if (!dc_isar_feature(aa32_vrint, s)) { 437 return false; 438 } 439 440 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 441 return false; 442 } 443 444 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 445 return false; 446 } 447 448 /* UNDEF accesses to D16-D31 if they don't exist */ 449 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && 450 ((a->vm | a->vd) & 0x10)) { 451 return false; 452 } 453 454 rd = a->vd; 455 rm = a->vm; 456 457 if (!vfp_access_check(s)) { 458 return true; 459 } 460 461 if (sz == 1) { 462 fpst = fpstatus_ptr(FPST_FPCR_F16); 463 } else { 464 fpst = fpstatus_ptr(FPST_FPCR); 465 } 466 467 tcg_rmode = gen_set_rmode(rounding, fpst); 468 469 if (sz == 3) { 470 TCGv_i64 tcg_op; 471 TCGv_i64 tcg_res; 472 tcg_op = tcg_temp_new_i64(); 473 tcg_res = tcg_temp_new_i64(); 474 vfp_load_reg64(tcg_op, rm); 475 gen_helper_rintd(tcg_res, tcg_op, fpst); 476 vfp_store_reg64(tcg_res, rd); 477 } else { 478 TCGv_i32 tcg_op; 479 TCGv_i32 tcg_res; 480 tcg_op = tcg_temp_new_i32(); 481 tcg_res = tcg_temp_new_i32(); 482 vfp_load_reg32(tcg_op, rm); 483 if (sz == 1) { 484 gen_helper_rinth(tcg_res, tcg_op, fpst); 485 } else { 486 gen_helper_rints(tcg_res, tcg_op, fpst); 487 } 488 vfp_store_reg32(tcg_res, rd); 489 } 490 491 gen_restore_rmode(tcg_rmode, fpst); 492 return true; 493 } 494 495 static bool trans_VCVT(DisasContext *s, arg_VCVT *a) 496 { 497 uint32_t rd, rm; 498 int sz = a->sz; 499 TCGv_ptr fpst; 500 TCGv_i32 tcg_rmode, tcg_shift; 501 int rounding = fp_decode_rm[a->rm]; 502 bool is_signed = a->op; 503 504 if (!dc_isar_feature(aa32_vcvt_dr, s)) { 505 return false; 506 } 507 508 if (sz == 3 && !dc_isar_feature(aa32_fpdp_v2, s)) { 509 return false; 510 } 511 512 if (sz == 1 && !dc_isar_feature(aa32_fp16_arith, s)) { 513 return false; 514 } 515 516 /* UNDEF accesses to D16-D31 if they don't exist */ 517 if (sz == 3 && !dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 518 return false; 519 } 520 521 rd = a->vd; 522 rm = a->vm; 523 524 if (!vfp_access_check(s)) { 525 return true; 526 } 527 528 if (sz == 1) { 529 fpst = fpstatus_ptr(FPST_FPCR_F16); 530 } else { 531 fpst = fpstatus_ptr(FPST_FPCR); 532 } 533 534 tcg_shift = tcg_constant_i32(0); 535 tcg_rmode = gen_set_rmode(rounding, fpst); 536 537 if (sz == 3) { 538 TCGv_i64 tcg_double, tcg_res; 539 TCGv_i32 tcg_tmp; 540 tcg_double = tcg_temp_new_i64(); 541 tcg_res = tcg_temp_new_i64(); 542 tcg_tmp = tcg_temp_new_i32(); 543 vfp_load_reg64(tcg_double, rm); 544 if (is_signed) { 545 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst); 546 } else { 547 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); 548 } 549 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res); 550 vfp_store_reg32(tcg_tmp, rd); 551 } else { 552 TCGv_i32 tcg_single, tcg_res; 553 tcg_single = tcg_temp_new_i32(); 554 tcg_res = tcg_temp_new_i32(); 555 vfp_load_reg32(tcg_single, rm); 556 if (sz == 1) { 557 if (is_signed) { 558 gen_helper_vfp_toslh(tcg_res, tcg_single, tcg_shift, fpst); 559 } else { 560 gen_helper_vfp_toulh(tcg_res, tcg_single, tcg_shift, fpst); 561 } 562 } else { 563 if (is_signed) { 564 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst); 565 } else { 566 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst); 567 } 568 } 569 vfp_store_reg32(tcg_res, rd); 570 } 571 572 gen_restore_rmode(tcg_rmode, fpst); 573 return true; 574 } 575 576 bool mve_skip_vmov(DisasContext *s, int vn, int index, int size) 577 { 578 /* 579 * In a CPU with MVE, the VMOV (vector lane to general-purpose register) 580 * and VMOV (general-purpose register to vector lane) insns are not 581 * predicated, but they are subject to beatwise execution if they are 582 * not in an IT block. 583 * 584 * Since our implementation always executes all 4 beats in one tick, 585 * this means only that if PSR.ECI says we should not be executing 586 * the beat corresponding to the lane of the vector register being 587 * accessed then we should skip performing the move, and that we need 588 * to do the usual check for bad ECI state and advance of ECI state. 589 * 590 * Note that if PSR.ECI is non-zero then we cannot be in an IT block. 591 * 592 * Return true if this VMOV scalar <-> gpreg should be skipped because 593 * the MVE PSR.ECI state says we skip the beat where the store happens. 594 */ 595 596 /* Calculate the byte offset into Qn which we're going to access */ 597 int ofs = (index << size) + ((vn & 1) * 8); 598 599 if (!dc_isar_feature(aa32_mve, s)) { 600 return false; 601 } 602 603 switch (s->eci) { 604 case ECI_NONE: 605 return false; 606 case ECI_A0: 607 return ofs < 4; 608 case ECI_A0A1: 609 return ofs < 8; 610 case ECI_A0A1A2: 611 case ECI_A0A1A2B0: 612 return ofs < 12; 613 default: 614 g_assert_not_reached(); 615 } 616 } 617 618 static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) 619 { 620 /* VMOV scalar to general purpose register */ 621 TCGv_i32 tmp; 622 623 /* 624 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has 625 * all sizes, whether the CPU has fp or not. 626 */ 627 if (!dc_isar_feature(aa32_mve, s)) { 628 if (a->size == MO_32 629 ? !dc_isar_feature(aa32_fpsp_v2, s) 630 : !arm_dc_feature(s, ARM_FEATURE_NEON)) { 631 return false; 632 } 633 } 634 635 /* UNDEF accesses to D16-D31 if they don't exist */ 636 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 637 return false; 638 } 639 640 if (dc_isar_feature(aa32_mve, s)) { 641 if (!mve_eci_check(s)) { 642 return true; 643 } 644 } 645 646 if (!vfp_access_check(s)) { 647 return true; 648 } 649 650 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { 651 tmp = tcg_temp_new_i32(); 652 read_neon_element32(tmp, a->vn, a->index, 653 a->size | (a->u ? 0 : MO_SIGN)); 654 store_reg(s, a->rt, tmp); 655 } 656 657 if (dc_isar_feature(aa32_mve, s)) { 658 mve_update_and_store_eci(s); 659 } 660 return true; 661 } 662 663 static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) 664 { 665 /* VMOV general purpose register to scalar */ 666 TCGv_i32 tmp; 667 668 /* 669 * SIZE == MO_32 is a VFP instruction; otherwise NEON. MVE has 670 * all sizes, whether the CPU has fp or not. 671 */ 672 if (!dc_isar_feature(aa32_mve, s)) { 673 if (a->size == MO_32 674 ? !dc_isar_feature(aa32_fpsp_v2, s) 675 : !arm_dc_feature(s, ARM_FEATURE_NEON)) { 676 return false; 677 } 678 } 679 680 /* UNDEF accesses to D16-D31 if they don't exist */ 681 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 682 return false; 683 } 684 685 if (dc_isar_feature(aa32_mve, s)) { 686 if (!mve_eci_check(s)) { 687 return true; 688 } 689 } 690 691 if (!vfp_access_check(s)) { 692 return true; 693 } 694 695 if (!mve_skip_vmov(s, a->vn, a->index, a->size)) { 696 tmp = load_reg(s, a->rt); 697 write_neon_element32(tmp, a->vn, a->index, a->size); 698 } 699 700 if (dc_isar_feature(aa32_mve, s)) { 701 mve_update_and_store_eci(s); 702 } 703 return true; 704 } 705 706 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 707 { 708 /* VDUP (general purpose register) */ 709 TCGv_i32 tmp; 710 int size, vec_size; 711 712 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { 713 return false; 714 } 715 716 /* UNDEF accesses to D16-D31 if they don't exist */ 717 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vn & 0x10)) { 718 return false; 719 } 720 721 if (a->b && a->e) { 722 return false; 723 } 724 725 if (a->q && (a->vn & 1)) { 726 return false; 727 } 728 729 vec_size = a->q ? 16 : 8; 730 if (a->b) { 731 size = 0; 732 } else if (a->e) { 733 size = 1; 734 } else { 735 size = 2; 736 } 737 738 if (!vfp_access_check(s)) { 739 return true; 740 } 741 742 tmp = load_reg(s, a->rt); 743 tcg_gen_gvec_dup_i32(size, neon_full_reg_offset(a->vn), 744 vec_size, vec_size, tmp); 745 return true; 746 } 747 748 static bool trans_VMSR_VMRS(DisasContext *s, arg_VMSR_VMRS *a) 749 { 750 TCGv_i32 tmp; 751 bool ignore_vfp_enabled = false; 752 753 if (arm_dc_feature(s, ARM_FEATURE_M)) { 754 /* M profile version was already handled in m-nocp.decode */ 755 return false; 756 } 757 758 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 759 return false; 760 } 761 762 switch (a->reg) { 763 case ARM_VFP_FPSID: 764 /* 765 * VFPv2 allows access to FPSID from userspace; VFPv3 restricts 766 * all ID registers to privileged access only. 767 */ 768 if (IS_USER(s) && dc_isar_feature(aa32_fpsp_v3, s)) { 769 return false; 770 } 771 ignore_vfp_enabled = true; 772 break; 773 case ARM_VFP_MVFR0: 774 case ARM_VFP_MVFR1: 775 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_MVFR)) { 776 return false; 777 } 778 ignore_vfp_enabled = true; 779 break; 780 case ARM_VFP_MVFR2: 781 if (IS_USER(s) || !arm_dc_feature(s, ARM_FEATURE_V8)) { 782 return false; 783 } 784 ignore_vfp_enabled = true; 785 break; 786 case ARM_VFP_FPSCR: 787 break; 788 case ARM_VFP_FPEXC: 789 if (IS_USER(s)) { 790 return false; 791 } 792 ignore_vfp_enabled = true; 793 break; 794 case ARM_VFP_FPINST: 795 case ARM_VFP_FPINST2: 796 /* Not present in VFPv3 */ 797 if (IS_USER(s) || dc_isar_feature(aa32_fpsp_v3, s)) { 798 return false; 799 } 800 break; 801 default: 802 return false; 803 } 804 805 /* 806 * Call vfp_access_check_a() directly, because we need to tell 807 * it to ignore FPEXC.EN for some register accesses. 808 */ 809 if (!vfp_access_check_a(s, ignore_vfp_enabled)) { 810 return true; 811 } 812 813 if (a->l) { 814 /* VMRS, move VFP special register to gp register */ 815 switch (a->reg) { 816 case ARM_VFP_MVFR0: 817 case ARM_VFP_MVFR1: 818 case ARM_VFP_MVFR2: 819 case ARM_VFP_FPSID: 820 if (s->current_el == 1) { 821 gen_set_condexec(s); 822 gen_update_pc(s, 0); 823 gen_helper_check_hcr_el2_trap(cpu_env, 824 tcg_constant_i32(a->rt), 825 tcg_constant_i32(a->reg)); 826 } 827 /* fall through */ 828 case ARM_VFP_FPEXC: 829 case ARM_VFP_FPINST: 830 case ARM_VFP_FPINST2: 831 tmp = load_cpu_field(vfp.xregs[a->reg]); 832 break; 833 case ARM_VFP_FPSCR: 834 if (a->rt == 15) { 835 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); 836 tcg_gen_andi_i32(tmp, tmp, FPCR_NZCV_MASK); 837 } else { 838 tmp = tcg_temp_new_i32(); 839 gen_helper_vfp_get_fpscr(tmp, cpu_env); 840 } 841 break; 842 default: 843 g_assert_not_reached(); 844 } 845 846 if (a->rt == 15) { 847 /* Set the 4 flag bits in the CPSR. */ 848 gen_set_nzcv(tmp); 849 } else { 850 store_reg(s, a->rt, tmp); 851 } 852 } else { 853 /* VMSR, move gp register to VFP special register */ 854 switch (a->reg) { 855 case ARM_VFP_FPSID: 856 case ARM_VFP_MVFR0: 857 case ARM_VFP_MVFR1: 858 case ARM_VFP_MVFR2: 859 /* Writes are ignored. */ 860 break; 861 case ARM_VFP_FPSCR: 862 tmp = load_reg(s, a->rt); 863 gen_helper_vfp_set_fpscr(cpu_env, tmp); 864 gen_lookup_tb(s); 865 break; 866 case ARM_VFP_FPEXC: 867 /* 868 * TODO: VFP subarchitecture support. 869 * For now, keep the EN bit only 870 */ 871 tmp = load_reg(s, a->rt); 872 tcg_gen_andi_i32(tmp, tmp, 1 << 30); 873 store_cpu_field(tmp, vfp.xregs[a->reg]); 874 gen_lookup_tb(s); 875 break; 876 case ARM_VFP_FPINST: 877 case ARM_VFP_FPINST2: 878 tmp = load_reg(s, a->rt); 879 store_cpu_field(tmp, vfp.xregs[a->reg]); 880 break; 881 default: 882 g_assert_not_reached(); 883 } 884 } 885 886 return true; 887 } 888 889 890 static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) 891 { 892 TCGv_i32 tmp; 893 894 if (!dc_isar_feature(aa32_fp16_arith, s)) { 895 return false; 896 } 897 898 if (a->rt == 15) { 899 /* UNPREDICTABLE; we choose to UNDEF */ 900 return false; 901 } 902 903 if (!vfp_access_check(s)) { 904 return true; 905 } 906 907 if (a->l) { 908 /* VFP to general purpose register */ 909 tmp = tcg_temp_new_i32(); 910 vfp_load_reg32(tmp, a->vn); 911 tcg_gen_andi_i32(tmp, tmp, 0xffff); 912 store_reg(s, a->rt, tmp); 913 } else { 914 /* general purpose register to VFP */ 915 tmp = load_reg(s, a->rt); 916 tcg_gen_andi_i32(tmp, tmp, 0xffff); 917 vfp_store_reg32(tmp, a->vn); 918 } 919 920 return true; 921 } 922 923 static bool trans_VMOV_single(DisasContext *s, arg_VMOV_single *a) 924 { 925 TCGv_i32 tmp; 926 927 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 928 return false; 929 } 930 931 if (!vfp_access_check(s)) { 932 return true; 933 } 934 935 if (a->l) { 936 /* VFP to general purpose register */ 937 tmp = tcg_temp_new_i32(); 938 vfp_load_reg32(tmp, a->vn); 939 if (a->rt == 15) { 940 /* Set the 4 flag bits in the CPSR. */ 941 gen_set_nzcv(tmp); 942 } else { 943 store_reg(s, a->rt, tmp); 944 } 945 } else { 946 /* general purpose register to VFP */ 947 tmp = load_reg(s, a->rt); 948 vfp_store_reg32(tmp, a->vn); 949 } 950 951 return true; 952 } 953 954 static bool trans_VMOV_64_sp(DisasContext *s, arg_VMOV_64_sp *a) 955 { 956 TCGv_i32 tmp; 957 958 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 959 return false; 960 } 961 962 /* 963 * VMOV between two general-purpose registers and two single precision 964 * floating point registers 965 */ 966 if (!vfp_access_check(s)) { 967 return true; 968 } 969 970 if (a->op) { 971 /* fpreg to gpreg */ 972 tmp = tcg_temp_new_i32(); 973 vfp_load_reg32(tmp, a->vm); 974 store_reg(s, a->rt, tmp); 975 tmp = tcg_temp_new_i32(); 976 vfp_load_reg32(tmp, a->vm + 1); 977 store_reg(s, a->rt2, tmp); 978 } else { 979 /* gpreg to fpreg */ 980 tmp = load_reg(s, a->rt); 981 vfp_store_reg32(tmp, a->vm); 982 tmp = load_reg(s, a->rt2); 983 vfp_store_reg32(tmp, a->vm + 1); 984 } 985 986 return true; 987 } 988 989 static bool trans_VMOV_64_dp(DisasContext *s, arg_VMOV_64_dp *a) 990 { 991 TCGv_i32 tmp; 992 993 /* 994 * VMOV between two general-purpose registers and one double precision 995 * floating point register. Note that this does not require support 996 * for double precision arithmetic. 997 */ 998 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 999 return false; 1000 } 1001 1002 /* UNDEF accesses to D16-D31 if they don't exist */ 1003 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 1004 return false; 1005 } 1006 1007 if (!vfp_access_check(s)) { 1008 return true; 1009 } 1010 1011 if (a->op) { 1012 /* fpreg to gpreg */ 1013 tmp = tcg_temp_new_i32(); 1014 vfp_load_reg32(tmp, a->vm * 2); 1015 store_reg(s, a->rt, tmp); 1016 tmp = tcg_temp_new_i32(); 1017 vfp_load_reg32(tmp, a->vm * 2 + 1); 1018 store_reg(s, a->rt2, tmp); 1019 } else { 1020 /* gpreg to fpreg */ 1021 tmp = load_reg(s, a->rt); 1022 vfp_store_reg32(tmp, a->vm * 2); 1023 tmp = load_reg(s, a->rt2); 1024 vfp_store_reg32(tmp, a->vm * 2 + 1); 1025 } 1026 1027 return true; 1028 } 1029 1030 static bool trans_VLDR_VSTR_hp(DisasContext *s, arg_VLDR_VSTR_sp *a) 1031 { 1032 uint32_t offset; 1033 TCGv_i32 addr, tmp; 1034 1035 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1036 return false; 1037 } 1038 1039 if (!vfp_access_check(s)) { 1040 return true; 1041 } 1042 1043 /* imm8 field is offset/2 for fp16, unlike fp32 and fp64 */ 1044 offset = a->imm << 1; 1045 if (!a->u) { 1046 offset = -offset; 1047 } 1048 1049 /* For thumb, use of PC is UNPREDICTABLE. */ 1050 addr = add_reg_for_lit(s, a->rn, offset); 1051 tmp = tcg_temp_new_i32(); 1052 if (a->l) { 1053 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); 1054 vfp_store_reg32(tmp, a->vd); 1055 } else { 1056 vfp_load_reg32(tmp, a->vd); 1057 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UW | MO_ALIGN); 1058 } 1059 return true; 1060 } 1061 1062 static bool trans_VLDR_VSTR_sp(DisasContext *s, arg_VLDR_VSTR_sp *a) 1063 { 1064 uint32_t offset; 1065 TCGv_i32 addr, tmp; 1066 1067 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1068 return false; 1069 } 1070 1071 if (!vfp_access_check(s)) { 1072 return true; 1073 } 1074 1075 offset = a->imm << 2; 1076 if (!a->u) { 1077 offset = -offset; 1078 } 1079 1080 /* For thumb, use of PC is UNPREDICTABLE. */ 1081 addr = add_reg_for_lit(s, a->rn, offset); 1082 tmp = tcg_temp_new_i32(); 1083 if (a->l) { 1084 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1085 vfp_store_reg32(tmp, a->vd); 1086 } else { 1087 vfp_load_reg32(tmp, a->vd); 1088 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1089 } 1090 return true; 1091 } 1092 1093 static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_dp *a) 1094 { 1095 uint32_t offset; 1096 TCGv_i32 addr; 1097 TCGv_i64 tmp; 1098 1099 /* Note that this does not require support for double arithmetic. */ 1100 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1101 return false; 1102 } 1103 1104 /* UNDEF accesses to D16-D31 if they don't exist */ 1105 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 1106 return false; 1107 } 1108 1109 if (!vfp_access_check(s)) { 1110 return true; 1111 } 1112 1113 offset = a->imm << 2; 1114 if (!a->u) { 1115 offset = -offset; 1116 } 1117 1118 /* For thumb, use of PC is UNPREDICTABLE. */ 1119 addr = add_reg_for_lit(s, a->rn, offset); 1120 tmp = tcg_temp_new_i64(); 1121 if (a->l) { 1122 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1123 vfp_store_reg64(tmp, a->vd); 1124 } else { 1125 vfp_load_reg64(tmp, a->vd); 1126 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1127 } 1128 return true; 1129 } 1130 1131 static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a) 1132 { 1133 uint32_t offset; 1134 TCGv_i32 addr, tmp; 1135 int i, n; 1136 1137 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1138 return false; 1139 } 1140 1141 n = a->imm; 1142 1143 if (n == 0 || (a->vd + n) > 32) { 1144 /* 1145 * UNPREDICTABLE cases for bad immediates: we choose to 1146 * UNDEF to avoid generating huge numbers of TCG ops 1147 */ 1148 return false; 1149 } 1150 if (a->rn == 15 && a->w) { 1151 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ 1152 return false; 1153 } 1154 1155 s->eci_handled = true; 1156 1157 if (!vfp_access_check(s)) { 1158 return true; 1159 } 1160 1161 /* For thumb, use of PC is UNPREDICTABLE. */ 1162 addr = add_reg_for_lit(s, a->rn, 0); 1163 if (a->p) { 1164 /* pre-decrement */ 1165 tcg_gen_addi_i32(addr, addr, -(a->imm << 2)); 1166 } 1167 1168 if (s->v8m_stackcheck && a->rn == 13 && a->w) { 1169 /* 1170 * Here 'addr' is the lowest address we will store to, 1171 * and is either the old SP (if post-increment) or 1172 * the new SP (if pre-decrement). For post-increment 1173 * where the old value is below the limit and the new 1174 * value is above, it is UNKNOWN whether the limit check 1175 * triggers; we choose to trigger. 1176 */ 1177 gen_helper_v8m_stackcheck(cpu_env, addr); 1178 } 1179 1180 offset = 4; 1181 tmp = tcg_temp_new_i32(); 1182 for (i = 0; i < n; i++) { 1183 if (a->l) { 1184 /* load */ 1185 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1186 vfp_store_reg32(tmp, a->vd + i); 1187 } else { 1188 /* store */ 1189 vfp_load_reg32(tmp, a->vd + i); 1190 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), MO_UL | MO_ALIGN); 1191 } 1192 tcg_gen_addi_i32(addr, addr, offset); 1193 } 1194 if (a->w) { 1195 /* writeback */ 1196 if (a->p) { 1197 offset = -offset * n; 1198 tcg_gen_addi_i32(addr, addr, offset); 1199 } 1200 store_reg(s, a->rn, addr); 1201 } 1202 1203 clear_eci_state(s); 1204 return true; 1205 } 1206 1207 static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a) 1208 { 1209 uint32_t offset; 1210 TCGv_i32 addr; 1211 TCGv_i64 tmp; 1212 int i, n; 1213 1214 /* Note that this does not require support for double arithmetic. */ 1215 if (!dc_isar_feature(aa32_fpsp_v2, s) && !dc_isar_feature(aa32_mve, s)) { 1216 return false; 1217 } 1218 1219 n = a->imm >> 1; 1220 1221 if (n == 0 || (a->vd + n) > 32 || n > 16) { 1222 /* 1223 * UNPREDICTABLE cases for bad immediates: we choose to 1224 * UNDEF to avoid generating huge numbers of TCG ops 1225 */ 1226 return false; 1227 } 1228 if (a->rn == 15 && a->w) { 1229 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */ 1230 return false; 1231 } 1232 1233 /* UNDEF accesses to D16-D31 if they don't exist */ 1234 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd + n) > 16) { 1235 return false; 1236 } 1237 1238 s->eci_handled = true; 1239 1240 if (!vfp_access_check(s)) { 1241 return true; 1242 } 1243 1244 /* For thumb, use of PC is UNPREDICTABLE. */ 1245 addr = add_reg_for_lit(s, a->rn, 0); 1246 if (a->p) { 1247 /* pre-decrement */ 1248 tcg_gen_addi_i32(addr, addr, -(a->imm << 2)); 1249 } 1250 1251 if (s->v8m_stackcheck && a->rn == 13 && a->w) { 1252 /* 1253 * Here 'addr' is the lowest address we will store to, 1254 * and is either the old SP (if post-increment) or 1255 * the new SP (if pre-decrement). For post-increment 1256 * where the old value is below the limit and the new 1257 * value is above, it is UNKNOWN whether the limit check 1258 * triggers; we choose to trigger. 1259 */ 1260 gen_helper_v8m_stackcheck(cpu_env, addr); 1261 } 1262 1263 offset = 8; 1264 tmp = tcg_temp_new_i64(); 1265 for (i = 0; i < n; i++) { 1266 if (a->l) { 1267 /* load */ 1268 gen_aa32_ld_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1269 vfp_store_reg64(tmp, a->vd + i); 1270 } else { 1271 /* store */ 1272 vfp_load_reg64(tmp, a->vd + i); 1273 gen_aa32_st_i64(s, tmp, addr, get_mem_index(s), MO_UQ | MO_ALIGN_4); 1274 } 1275 tcg_gen_addi_i32(addr, addr, offset); 1276 } 1277 if (a->w) { 1278 /* writeback */ 1279 if (a->p) { 1280 offset = -offset * n; 1281 } else if (a->imm & 1) { 1282 offset = 4; 1283 } else { 1284 offset = 0; 1285 } 1286 1287 if (offset != 0) { 1288 tcg_gen_addi_i32(addr, addr, offset); 1289 } 1290 store_reg(s, a->rn, addr); 1291 } 1292 1293 clear_eci_state(s); 1294 return true; 1295 } 1296 1297 /* 1298 * Types for callbacks for do_vfp_3op_sp() and do_vfp_3op_dp(). 1299 * The callback should emit code to write a value to vd. If 1300 * do_vfp_3op_{sp,dp}() was passed reads_vd then the TCGv vd 1301 * will contain the old value of the relevant VFP register; 1302 * otherwise it must be written to only. 1303 */ 1304 typedef void VFPGen3OpSPFn(TCGv_i32 vd, 1305 TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst); 1306 typedef void VFPGen3OpDPFn(TCGv_i64 vd, 1307 TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst); 1308 1309 /* 1310 * Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp(). 1311 * The callback should emit code to write a value to vd (which 1312 * should be written to only). 1313 */ 1314 typedef void VFPGen2OpSPFn(TCGv_i32 vd, TCGv_i32 vm); 1315 typedef void VFPGen2OpDPFn(TCGv_i64 vd, TCGv_i64 vm); 1316 1317 /* 1318 * Return true if the specified S reg is in a scalar bank 1319 * (ie if it is s0..s7) 1320 */ 1321 static inline bool vfp_sreg_is_scalar(int reg) 1322 { 1323 return (reg & 0x18) == 0; 1324 } 1325 1326 /* 1327 * Return true if the specified D reg is in a scalar bank 1328 * (ie if it is d0..d3 or d16..d19) 1329 */ 1330 static inline bool vfp_dreg_is_scalar(int reg) 1331 { 1332 return (reg & 0xc) == 0; 1333 } 1334 1335 /* 1336 * Advance the S reg number forwards by delta within its bank 1337 * (ie increment the low 3 bits but leave the rest the same) 1338 */ 1339 static inline int vfp_advance_sreg(int reg, int delta) 1340 { 1341 return ((reg + delta) & 0x7) | (reg & ~0x7); 1342 } 1343 1344 /* 1345 * Advance the D reg number forwards by delta within its bank 1346 * (ie increment the low 2 bits but leave the rest the same) 1347 */ 1348 static inline int vfp_advance_dreg(int reg, int delta) 1349 { 1350 return ((reg + delta) & 0x3) | (reg & ~0x3); 1351 } 1352 1353 /* 1354 * Perform a 3-operand VFP data processing instruction. fn is the 1355 * callback to do the actual operation; this function deals with the 1356 * code to handle looping around for VFP vector processing. 1357 */ 1358 static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn, 1359 int vd, int vn, int vm, bool reads_vd) 1360 { 1361 uint32_t delta_m = 0; 1362 uint32_t delta_d = 0; 1363 int veclen = s->vec_len; 1364 TCGv_i32 f0, f1, fd; 1365 TCGv_ptr fpst; 1366 1367 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 1368 return false; 1369 } 1370 1371 if (!dc_isar_feature(aa32_fpshvec, s) && 1372 (veclen != 0 || s->vec_stride != 0)) { 1373 return false; 1374 } 1375 1376 if (!vfp_access_check(s)) { 1377 return true; 1378 } 1379 1380 if (veclen > 0) { 1381 /* Figure out what type of vector operation this is. */ 1382 if (vfp_sreg_is_scalar(vd)) { 1383 /* scalar */ 1384 veclen = 0; 1385 } else { 1386 delta_d = s->vec_stride + 1; 1387 1388 if (vfp_sreg_is_scalar(vm)) { 1389 /* mixed scalar/vector */ 1390 delta_m = 0; 1391 } else { 1392 /* vector */ 1393 delta_m = delta_d; 1394 } 1395 } 1396 } 1397 1398 f0 = tcg_temp_new_i32(); 1399 f1 = tcg_temp_new_i32(); 1400 fd = tcg_temp_new_i32(); 1401 fpst = fpstatus_ptr(FPST_FPCR); 1402 1403 vfp_load_reg32(f0, vn); 1404 vfp_load_reg32(f1, vm); 1405 1406 for (;;) { 1407 if (reads_vd) { 1408 vfp_load_reg32(fd, vd); 1409 } 1410 fn(fd, f0, f1, fpst); 1411 vfp_store_reg32(fd, vd); 1412 1413 if (veclen == 0) { 1414 break; 1415 } 1416 1417 /* Set up the operands for the next iteration */ 1418 veclen--; 1419 vd = vfp_advance_sreg(vd, delta_d); 1420 vn = vfp_advance_sreg(vn, delta_d); 1421 vfp_load_reg32(f0, vn); 1422 if (delta_m) { 1423 vm = vfp_advance_sreg(vm, delta_m); 1424 vfp_load_reg32(f1, vm); 1425 } 1426 } 1427 return true; 1428 } 1429 1430 static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, 1431 int vd, int vn, int vm, bool reads_vd) 1432 { 1433 /* 1434 * Do a half-precision operation. Functionally this is 1435 * the same as do_vfp_3op_sp(), except: 1436 * - it uses the FPST_FPCR_F16 1437 * - it doesn't need the VFP vector handling (fp16 is a 1438 * v8 feature, and in v8 VFP vectors don't exist) 1439 * - it does the aa32_fp16_arith feature test 1440 */ 1441 TCGv_i32 f0, f1, fd; 1442 TCGv_ptr fpst; 1443 1444 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1445 return false; 1446 } 1447 1448 if (s->vec_len != 0 || s->vec_stride != 0) { 1449 return false; 1450 } 1451 1452 if (!vfp_access_check(s)) { 1453 return true; 1454 } 1455 1456 f0 = tcg_temp_new_i32(); 1457 f1 = tcg_temp_new_i32(); 1458 fd = tcg_temp_new_i32(); 1459 fpst = fpstatus_ptr(FPST_FPCR_F16); 1460 1461 vfp_load_reg32(f0, vn); 1462 vfp_load_reg32(f1, vm); 1463 1464 if (reads_vd) { 1465 vfp_load_reg32(fd, vd); 1466 } 1467 fn(fd, f0, f1, fpst); 1468 vfp_store_reg32(fd, vd); 1469 return true; 1470 } 1471 1472 static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn, 1473 int vd, int vn, int vm, bool reads_vd) 1474 { 1475 uint32_t delta_m = 0; 1476 uint32_t delta_d = 0; 1477 int veclen = s->vec_len; 1478 TCGv_i64 f0, f1, fd; 1479 TCGv_ptr fpst; 1480 1481 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 1482 return false; 1483 } 1484 1485 /* UNDEF accesses to D16-D31 if they don't exist */ 1486 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vn | vm) & 0x10)) { 1487 return false; 1488 } 1489 1490 if (!dc_isar_feature(aa32_fpshvec, s) && 1491 (veclen != 0 || s->vec_stride != 0)) { 1492 return false; 1493 } 1494 1495 if (!vfp_access_check(s)) { 1496 return true; 1497 } 1498 1499 if (veclen > 0) { 1500 /* Figure out what type of vector operation this is. */ 1501 if (vfp_dreg_is_scalar(vd)) { 1502 /* scalar */ 1503 veclen = 0; 1504 } else { 1505 delta_d = (s->vec_stride >> 1) + 1; 1506 1507 if (vfp_dreg_is_scalar(vm)) { 1508 /* mixed scalar/vector */ 1509 delta_m = 0; 1510 } else { 1511 /* vector */ 1512 delta_m = delta_d; 1513 } 1514 } 1515 } 1516 1517 f0 = tcg_temp_new_i64(); 1518 f1 = tcg_temp_new_i64(); 1519 fd = tcg_temp_new_i64(); 1520 fpst = fpstatus_ptr(FPST_FPCR); 1521 1522 vfp_load_reg64(f0, vn); 1523 vfp_load_reg64(f1, vm); 1524 1525 for (;;) { 1526 if (reads_vd) { 1527 vfp_load_reg64(fd, vd); 1528 } 1529 fn(fd, f0, f1, fpst); 1530 vfp_store_reg64(fd, vd); 1531 1532 if (veclen == 0) { 1533 break; 1534 } 1535 /* Set up the operands for the next iteration */ 1536 veclen--; 1537 vd = vfp_advance_dreg(vd, delta_d); 1538 vn = vfp_advance_dreg(vn, delta_d); 1539 vfp_load_reg64(f0, vn); 1540 if (delta_m) { 1541 vm = vfp_advance_dreg(vm, delta_m); 1542 vfp_load_reg64(f1, vm); 1543 } 1544 } 1545 return true; 1546 } 1547 1548 static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) 1549 { 1550 uint32_t delta_m = 0; 1551 uint32_t delta_d = 0; 1552 int veclen = s->vec_len; 1553 TCGv_i32 f0, fd; 1554 1555 /* Note that the caller must check the aa32_fpsp_v2 feature. */ 1556 1557 if (!dc_isar_feature(aa32_fpshvec, s) && 1558 (veclen != 0 || s->vec_stride != 0)) { 1559 return false; 1560 } 1561 1562 if (!vfp_access_check(s)) { 1563 return true; 1564 } 1565 1566 if (veclen > 0) { 1567 /* Figure out what type of vector operation this is. */ 1568 if (vfp_sreg_is_scalar(vd)) { 1569 /* scalar */ 1570 veclen = 0; 1571 } else { 1572 delta_d = s->vec_stride + 1; 1573 1574 if (vfp_sreg_is_scalar(vm)) { 1575 /* mixed scalar/vector */ 1576 delta_m = 0; 1577 } else { 1578 /* vector */ 1579 delta_m = delta_d; 1580 } 1581 } 1582 } 1583 1584 f0 = tcg_temp_new_i32(); 1585 fd = tcg_temp_new_i32(); 1586 1587 vfp_load_reg32(f0, vm); 1588 1589 for (;;) { 1590 fn(fd, f0); 1591 vfp_store_reg32(fd, vd); 1592 1593 if (veclen == 0) { 1594 break; 1595 } 1596 1597 if (delta_m == 0) { 1598 /* single source one-many */ 1599 while (veclen--) { 1600 vd = vfp_advance_sreg(vd, delta_d); 1601 vfp_store_reg32(fd, vd); 1602 } 1603 break; 1604 } 1605 1606 /* Set up the operands for the next iteration */ 1607 veclen--; 1608 vd = vfp_advance_sreg(vd, delta_d); 1609 vm = vfp_advance_sreg(vm, delta_m); 1610 vfp_load_reg32(f0, vm); 1611 } 1612 return true; 1613 } 1614 1615 static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) 1616 { 1617 /* 1618 * Do a half-precision operation. Functionally this is 1619 * the same as do_vfp_2op_sp(), except: 1620 * - it doesn't need the VFP vector handling (fp16 is a 1621 * v8 feature, and in v8 VFP vectors don't exist) 1622 * - it does the aa32_fp16_arith feature test 1623 */ 1624 TCGv_i32 f0; 1625 1626 /* Note that the caller must check the aa32_fp16_arith feature */ 1627 1628 if (!dc_isar_feature(aa32_fp16_arith, s)) { 1629 return false; 1630 } 1631 1632 if (s->vec_len != 0 || s->vec_stride != 0) { 1633 return false; 1634 } 1635 1636 if (!vfp_access_check(s)) { 1637 return true; 1638 } 1639 1640 f0 = tcg_temp_new_i32(); 1641 vfp_load_reg32(f0, vm); 1642 fn(f0, f0); 1643 vfp_store_reg32(f0, vd); 1644 1645 return true; 1646 } 1647 1648 static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm) 1649 { 1650 uint32_t delta_m = 0; 1651 uint32_t delta_d = 0; 1652 int veclen = s->vec_len; 1653 TCGv_i64 f0, fd; 1654 1655 /* Note that the caller must check the aa32_fpdp_v2 feature. */ 1656 1657 /* UNDEF accesses to D16-D31 if they don't exist */ 1658 if (!dc_isar_feature(aa32_simd_r32, s) && ((vd | vm) & 0x10)) { 1659 return false; 1660 } 1661 1662 if (!dc_isar_feature(aa32_fpshvec, s) && 1663 (veclen != 0 || s->vec_stride != 0)) { 1664 return false; 1665 } 1666 1667 if (!vfp_access_check(s)) { 1668 return true; 1669 } 1670 1671 if (veclen > 0) { 1672 /* Figure out what type of vector operation this is. */ 1673 if (vfp_dreg_is_scalar(vd)) { 1674 /* scalar */ 1675 veclen = 0; 1676 } else { 1677 delta_d = (s->vec_stride >> 1) + 1; 1678 1679 if (vfp_dreg_is_scalar(vm)) { 1680 /* mixed scalar/vector */ 1681 delta_m = 0; 1682 } else { 1683 /* vector */ 1684 delta_m = delta_d; 1685 } 1686 } 1687 } 1688 1689 f0 = tcg_temp_new_i64(); 1690 fd = tcg_temp_new_i64(); 1691 1692 vfp_load_reg64(f0, vm); 1693 1694 for (;;) { 1695 fn(fd, f0); 1696 vfp_store_reg64(fd, vd); 1697 1698 if (veclen == 0) { 1699 break; 1700 } 1701 1702 if (delta_m == 0) { 1703 /* single source one-many */ 1704 while (veclen--) { 1705 vd = vfp_advance_dreg(vd, delta_d); 1706 vfp_store_reg64(fd, vd); 1707 } 1708 break; 1709 } 1710 1711 /* Set up the operands for the next iteration */ 1712 veclen--; 1713 vd = vfp_advance_dreg(vd, delta_d); 1714 vd = vfp_advance_dreg(vm, delta_m); 1715 vfp_load_reg64(f0, vm); 1716 } 1717 return true; 1718 } 1719 1720 static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1721 { 1722 /* Note that order of inputs to the add matters for NaNs */ 1723 TCGv_i32 tmp = tcg_temp_new_i32(); 1724 1725 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1726 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1727 } 1728 1729 static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a) 1730 { 1731 return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true); 1732 } 1733 1734 static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1735 { 1736 /* Note that order of inputs to the add matters for NaNs */ 1737 TCGv_i32 tmp = tcg_temp_new_i32(); 1738 1739 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1740 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1741 } 1742 1743 static bool trans_VMLA_sp(DisasContext *s, arg_VMLA_sp *a) 1744 { 1745 return do_vfp_3op_sp(s, gen_VMLA_sp, a->vd, a->vn, a->vm, true); 1746 } 1747 1748 static void gen_VMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1749 { 1750 /* Note that order of inputs to the add matters for NaNs */ 1751 TCGv_i64 tmp = tcg_temp_new_i64(); 1752 1753 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1754 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1755 } 1756 1757 static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp *a) 1758 { 1759 return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true); 1760 } 1761 1762 static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1763 { 1764 /* 1765 * VMLS: vd = vd + -(vn * vm) 1766 * Note that order of inputs to the add matters for NaNs. 1767 */ 1768 TCGv_i32 tmp = tcg_temp_new_i32(); 1769 1770 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1771 gen_helper_vfp_negh(tmp, tmp); 1772 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1773 } 1774 1775 static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a) 1776 { 1777 return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true); 1778 } 1779 1780 static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1781 { 1782 /* 1783 * VMLS: vd = vd + -(vn * vm) 1784 * Note that order of inputs to the add matters for NaNs. 1785 */ 1786 TCGv_i32 tmp = tcg_temp_new_i32(); 1787 1788 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1789 gen_helper_vfp_negs(tmp, tmp); 1790 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1791 } 1792 1793 static bool trans_VMLS_sp(DisasContext *s, arg_VMLS_sp *a) 1794 { 1795 return do_vfp_3op_sp(s, gen_VMLS_sp, a->vd, a->vn, a->vm, true); 1796 } 1797 1798 static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1799 { 1800 /* 1801 * VMLS: vd = vd + -(vn * vm) 1802 * Note that order of inputs to the add matters for NaNs. 1803 */ 1804 TCGv_i64 tmp = tcg_temp_new_i64(); 1805 1806 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1807 gen_helper_vfp_negd(tmp, tmp); 1808 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1809 } 1810 1811 static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp *a) 1812 { 1813 return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true); 1814 } 1815 1816 static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1817 { 1818 /* 1819 * VNMLS: -fd + (fn * fm) 1820 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1821 * plausible looking simplifications because this will give wrong results 1822 * for NaNs. 1823 */ 1824 TCGv_i32 tmp = tcg_temp_new_i32(); 1825 1826 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1827 gen_helper_vfp_negh(vd, vd); 1828 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1829 } 1830 1831 static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a) 1832 { 1833 return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true); 1834 } 1835 1836 static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1837 { 1838 /* 1839 * VNMLS: -fd + (fn * fm) 1840 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1841 * plausible looking simplifications because this will give wrong results 1842 * for NaNs. 1843 */ 1844 TCGv_i32 tmp = tcg_temp_new_i32(); 1845 1846 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1847 gen_helper_vfp_negs(vd, vd); 1848 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1849 } 1850 1851 static bool trans_VNMLS_sp(DisasContext *s, arg_VNMLS_sp *a) 1852 { 1853 return do_vfp_3op_sp(s, gen_VNMLS_sp, a->vd, a->vn, a->vm, true); 1854 } 1855 1856 static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1857 { 1858 /* 1859 * VNMLS: -fd + (fn * fm) 1860 * Note that it isn't valid to replace (-A + B) with (B - A) or similar 1861 * plausible looking simplifications because this will give wrong results 1862 * for NaNs. 1863 */ 1864 TCGv_i64 tmp = tcg_temp_new_i64(); 1865 1866 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1867 gen_helper_vfp_negd(vd, vd); 1868 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1869 } 1870 1871 static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp *a) 1872 { 1873 return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true); 1874 } 1875 1876 static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1877 { 1878 /* VNMLA: -fd + -(fn * fm) */ 1879 TCGv_i32 tmp = tcg_temp_new_i32(); 1880 1881 gen_helper_vfp_mulh(tmp, vn, vm, fpst); 1882 gen_helper_vfp_negh(tmp, tmp); 1883 gen_helper_vfp_negh(vd, vd); 1884 gen_helper_vfp_addh(vd, vd, tmp, fpst); 1885 } 1886 1887 static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a) 1888 { 1889 return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true); 1890 } 1891 1892 static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1893 { 1894 /* VNMLA: -fd + -(fn * fm) */ 1895 TCGv_i32 tmp = tcg_temp_new_i32(); 1896 1897 gen_helper_vfp_muls(tmp, vn, vm, fpst); 1898 gen_helper_vfp_negs(tmp, tmp); 1899 gen_helper_vfp_negs(vd, vd); 1900 gen_helper_vfp_adds(vd, vd, tmp, fpst); 1901 } 1902 1903 static bool trans_VNMLA_sp(DisasContext *s, arg_VNMLA_sp *a) 1904 { 1905 return do_vfp_3op_sp(s, gen_VNMLA_sp, a->vd, a->vn, a->vm, true); 1906 } 1907 1908 static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1909 { 1910 /* VNMLA: -fd + (fn * fm) */ 1911 TCGv_i64 tmp = tcg_temp_new_i64(); 1912 1913 gen_helper_vfp_muld(tmp, vn, vm, fpst); 1914 gen_helper_vfp_negd(tmp, tmp); 1915 gen_helper_vfp_negd(vd, vd); 1916 gen_helper_vfp_addd(vd, vd, tmp, fpst); 1917 } 1918 1919 static bool trans_VNMLA_dp(DisasContext *s, arg_VNMLA_dp *a) 1920 { 1921 return do_vfp_3op_dp(s, gen_VNMLA_dp, a->vd, a->vn, a->vm, true); 1922 } 1923 1924 static bool trans_VMUL_hp(DisasContext *s, arg_VMUL_sp *a) 1925 { 1926 return do_vfp_3op_hp(s, gen_helper_vfp_mulh, a->vd, a->vn, a->vm, false); 1927 } 1928 1929 static bool trans_VMUL_sp(DisasContext *s, arg_VMUL_sp *a) 1930 { 1931 return do_vfp_3op_sp(s, gen_helper_vfp_muls, a->vd, a->vn, a->vm, false); 1932 } 1933 1934 static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp *a) 1935 { 1936 return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false); 1937 } 1938 1939 static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1940 { 1941 /* VNMUL: -(fn * fm) */ 1942 gen_helper_vfp_mulh(vd, vn, vm, fpst); 1943 gen_helper_vfp_negh(vd, vd); 1944 } 1945 1946 static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a) 1947 { 1948 return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false); 1949 } 1950 1951 static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) 1952 { 1953 /* VNMUL: -(fn * fm) */ 1954 gen_helper_vfp_muls(vd, vn, vm, fpst); 1955 gen_helper_vfp_negs(vd, vd); 1956 } 1957 1958 static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a) 1959 { 1960 return do_vfp_3op_sp(s, gen_VNMUL_sp, a->vd, a->vn, a->vm, false); 1961 } 1962 1963 static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) 1964 { 1965 /* VNMUL: -(fn * fm) */ 1966 gen_helper_vfp_muld(vd, vn, vm, fpst); 1967 gen_helper_vfp_negd(vd, vd); 1968 } 1969 1970 static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) 1971 { 1972 return do_vfp_3op_dp(s, gen_VNMUL_dp, a->vd, a->vn, a->vm, false); 1973 } 1974 1975 static bool trans_VADD_hp(DisasContext *s, arg_VADD_sp *a) 1976 { 1977 return do_vfp_3op_hp(s, gen_helper_vfp_addh, a->vd, a->vn, a->vm, false); 1978 } 1979 1980 static bool trans_VADD_sp(DisasContext *s, arg_VADD_sp *a) 1981 { 1982 return do_vfp_3op_sp(s, gen_helper_vfp_adds, a->vd, a->vn, a->vm, false); 1983 } 1984 1985 static bool trans_VADD_dp(DisasContext *s, arg_VADD_dp *a) 1986 { 1987 return do_vfp_3op_dp(s, gen_helper_vfp_addd, a->vd, a->vn, a->vm, false); 1988 } 1989 1990 static bool trans_VSUB_hp(DisasContext *s, arg_VSUB_sp *a) 1991 { 1992 return do_vfp_3op_hp(s, gen_helper_vfp_subh, a->vd, a->vn, a->vm, false); 1993 } 1994 1995 static bool trans_VSUB_sp(DisasContext *s, arg_VSUB_sp *a) 1996 { 1997 return do_vfp_3op_sp(s, gen_helper_vfp_subs, a->vd, a->vn, a->vm, false); 1998 } 1999 2000 static bool trans_VSUB_dp(DisasContext *s, arg_VSUB_dp *a) 2001 { 2002 return do_vfp_3op_dp(s, gen_helper_vfp_subd, a->vd, a->vn, a->vm, false); 2003 } 2004 2005 static bool trans_VDIV_hp(DisasContext *s, arg_VDIV_sp *a) 2006 { 2007 return do_vfp_3op_hp(s, gen_helper_vfp_divh, a->vd, a->vn, a->vm, false); 2008 } 2009 2010 static bool trans_VDIV_sp(DisasContext *s, arg_VDIV_sp *a) 2011 { 2012 return do_vfp_3op_sp(s, gen_helper_vfp_divs, a->vd, a->vn, a->vm, false); 2013 } 2014 2015 static bool trans_VDIV_dp(DisasContext *s, arg_VDIV_dp *a) 2016 { 2017 return do_vfp_3op_dp(s, gen_helper_vfp_divd, a->vd, a->vn, a->vm, false); 2018 } 2019 2020 static bool trans_VMINNM_hp(DisasContext *s, arg_VMINNM_sp *a) 2021 { 2022 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2023 return false; 2024 } 2025 return do_vfp_3op_hp(s, gen_helper_vfp_minnumh, 2026 a->vd, a->vn, a->vm, false); 2027 } 2028 2029 static bool trans_VMAXNM_hp(DisasContext *s, arg_VMAXNM_sp *a) 2030 { 2031 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2032 return false; 2033 } 2034 return do_vfp_3op_hp(s, gen_helper_vfp_maxnumh, 2035 a->vd, a->vn, a->vm, false); 2036 } 2037 2038 static bool trans_VMINNM_sp(DisasContext *s, arg_VMINNM_sp *a) 2039 { 2040 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2041 return false; 2042 } 2043 return do_vfp_3op_sp(s, gen_helper_vfp_minnums, 2044 a->vd, a->vn, a->vm, false); 2045 } 2046 2047 static bool trans_VMAXNM_sp(DisasContext *s, arg_VMAXNM_sp *a) 2048 { 2049 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2050 return false; 2051 } 2052 return do_vfp_3op_sp(s, gen_helper_vfp_maxnums, 2053 a->vd, a->vn, a->vm, false); 2054 } 2055 2056 static bool trans_VMINNM_dp(DisasContext *s, arg_VMINNM_dp *a) 2057 { 2058 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2059 return false; 2060 } 2061 return do_vfp_3op_dp(s, gen_helper_vfp_minnumd, 2062 a->vd, a->vn, a->vm, false); 2063 } 2064 2065 static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a) 2066 { 2067 if (!dc_isar_feature(aa32_vminmaxnm, s)) { 2068 return false; 2069 } 2070 return do_vfp_3op_dp(s, gen_helper_vfp_maxnumd, 2071 a->vd, a->vn, a->vm, false); 2072 } 2073 2074 static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) 2075 { 2076 /* 2077 * VFNMA : fd = muladd(-fd, fn, fm) 2078 * VFNMS : fd = muladd(-fd, -fn, fm) 2079 * VFMA : fd = muladd( fd, fn, fm) 2080 * VFMS : fd = muladd( fd, -fn, fm) 2081 * 2082 * These are fused multiply-add, and must be done as one floating 2083 * point operation with no rounding between the multiplication and 2084 * addition steps. NB that doing the negations here as separate 2085 * steps is correct : an input NaN should come out with its sign 2086 * bit flipped if it is a negated-input. 2087 */ 2088 TCGv_ptr fpst; 2089 TCGv_i32 vn, vm, vd; 2090 2091 /* 2092 * Present in VFPv4 only, and only with the FP16 extension. 2093 * Note that we can't rely on the SIMDFMAC check alone, because 2094 * in a Neon-no-VFP core that ID register field will be non-zero. 2095 */ 2096 if (!dc_isar_feature(aa32_fp16_arith, s) || 2097 !dc_isar_feature(aa32_simdfmac, s) || 2098 !dc_isar_feature(aa32_fpsp_v2, s)) { 2099 return false; 2100 } 2101 2102 if (s->vec_len != 0 || s->vec_stride != 0) { 2103 return false; 2104 } 2105 2106 if (!vfp_access_check(s)) { 2107 return true; 2108 } 2109 2110 vn = tcg_temp_new_i32(); 2111 vm = tcg_temp_new_i32(); 2112 vd = tcg_temp_new_i32(); 2113 2114 vfp_load_reg32(vn, a->vn); 2115 vfp_load_reg32(vm, a->vm); 2116 if (neg_n) { 2117 /* VFNMS, VFMS */ 2118 gen_helper_vfp_negh(vn, vn); 2119 } 2120 vfp_load_reg32(vd, a->vd); 2121 if (neg_d) { 2122 /* VFNMA, VFNMS */ 2123 gen_helper_vfp_negh(vd, vd); 2124 } 2125 fpst = fpstatus_ptr(FPST_FPCR_F16); 2126 gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); 2127 vfp_store_reg32(vd, a->vd); 2128 return true; 2129 } 2130 2131 static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) 2132 { 2133 /* 2134 * VFNMA : fd = muladd(-fd, fn, fm) 2135 * VFNMS : fd = muladd(-fd, -fn, fm) 2136 * VFMA : fd = muladd( fd, fn, fm) 2137 * VFMS : fd = muladd( fd, -fn, fm) 2138 * 2139 * These are fused multiply-add, and must be done as one floating 2140 * point operation with no rounding between the multiplication and 2141 * addition steps. NB that doing the negations here as separate 2142 * steps is correct : an input NaN should come out with its sign 2143 * bit flipped if it is a negated-input. 2144 */ 2145 TCGv_ptr fpst; 2146 TCGv_i32 vn, vm, vd; 2147 2148 /* 2149 * Present in VFPv4 only. 2150 * Note that we can't rely on the SIMDFMAC check alone, because 2151 * in a Neon-no-VFP core that ID register field will be non-zero. 2152 */ 2153 if (!dc_isar_feature(aa32_simdfmac, s) || 2154 !dc_isar_feature(aa32_fpsp_v2, s)) { 2155 return false; 2156 } 2157 /* 2158 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from 2159 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. 2160 */ 2161 if (s->vec_len != 0 || s->vec_stride != 0) { 2162 return false; 2163 } 2164 2165 if (!vfp_access_check(s)) { 2166 return true; 2167 } 2168 2169 vn = tcg_temp_new_i32(); 2170 vm = tcg_temp_new_i32(); 2171 vd = tcg_temp_new_i32(); 2172 2173 vfp_load_reg32(vn, a->vn); 2174 vfp_load_reg32(vm, a->vm); 2175 if (neg_n) { 2176 /* VFNMS, VFMS */ 2177 gen_helper_vfp_negs(vn, vn); 2178 } 2179 vfp_load_reg32(vd, a->vd); 2180 if (neg_d) { 2181 /* VFNMA, VFNMS */ 2182 gen_helper_vfp_negs(vd, vd); 2183 } 2184 fpst = fpstatus_ptr(FPST_FPCR); 2185 gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); 2186 vfp_store_reg32(vd, a->vd); 2187 return true; 2188 } 2189 2190 static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) 2191 { 2192 /* 2193 * VFNMA : fd = muladd(-fd, fn, fm) 2194 * VFNMS : fd = muladd(-fd, -fn, fm) 2195 * VFMA : fd = muladd( fd, fn, fm) 2196 * VFMS : fd = muladd( fd, -fn, fm) 2197 * 2198 * These are fused multiply-add, and must be done as one floating 2199 * point operation with no rounding between the multiplication and 2200 * addition steps. NB that doing the negations here as separate 2201 * steps is correct : an input NaN should come out with its sign 2202 * bit flipped if it is a negated-input. 2203 */ 2204 TCGv_ptr fpst; 2205 TCGv_i64 vn, vm, vd; 2206 2207 /* 2208 * Present in VFPv4 only. 2209 * Note that we can't rely on the SIMDFMAC check alone, because 2210 * in a Neon-no-VFP core that ID register field will be non-zero. 2211 */ 2212 if (!dc_isar_feature(aa32_simdfmac, s) || 2213 !dc_isar_feature(aa32_fpdp_v2, s)) { 2214 return false; 2215 } 2216 /* 2217 * In v7A, UNPREDICTABLE with non-zero vector length/stride; from 2218 * v8A, must UNDEF. We choose to UNDEF for both v7A and v8A. 2219 */ 2220 if (s->vec_len != 0 || s->vec_stride != 0) { 2221 return false; 2222 } 2223 2224 /* UNDEF accesses to D16-D31 if they don't exist. */ 2225 if (!dc_isar_feature(aa32_simd_r32, s) && 2226 ((a->vd | a->vn | a->vm) & 0x10)) { 2227 return false; 2228 } 2229 2230 if (!vfp_access_check(s)) { 2231 return true; 2232 } 2233 2234 vn = tcg_temp_new_i64(); 2235 vm = tcg_temp_new_i64(); 2236 vd = tcg_temp_new_i64(); 2237 2238 vfp_load_reg64(vn, a->vn); 2239 vfp_load_reg64(vm, a->vm); 2240 if (neg_n) { 2241 /* VFNMS, VFMS */ 2242 gen_helper_vfp_negd(vn, vn); 2243 } 2244 vfp_load_reg64(vd, a->vd); 2245 if (neg_d) { 2246 /* VFNMA, VFNMS */ 2247 gen_helper_vfp_negd(vd, vd); 2248 } 2249 fpst = fpstatus_ptr(FPST_FPCR); 2250 gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); 2251 vfp_store_reg64(vd, a->vd); 2252 return true; 2253 } 2254 2255 #define MAKE_ONE_VFM_TRANS_FN(INSN, PREC, NEGN, NEGD) \ 2256 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2257 arg_##INSN##_##PREC *a) \ 2258 { \ 2259 return do_vfm_##PREC(s, a, NEGN, NEGD); \ 2260 } 2261 2262 #define MAKE_VFM_TRANS_FNS(PREC) \ 2263 MAKE_ONE_VFM_TRANS_FN(VFMA, PREC, false, false) \ 2264 MAKE_ONE_VFM_TRANS_FN(VFMS, PREC, true, false) \ 2265 MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \ 2266 MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true) 2267 2268 MAKE_VFM_TRANS_FNS(hp) 2269 MAKE_VFM_TRANS_FNS(sp) 2270 MAKE_VFM_TRANS_FNS(dp) 2271 2272 static bool trans_VMOV_imm_hp(DisasContext *s, arg_VMOV_imm_sp *a) 2273 { 2274 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2275 return false; 2276 } 2277 2278 if (s->vec_len != 0 || s->vec_stride != 0) { 2279 return false; 2280 } 2281 2282 if (!vfp_access_check(s)) { 2283 return true; 2284 } 2285 2286 vfp_store_reg32(tcg_constant_i32(vfp_expand_imm(MO_16, a->imm)), a->vd); 2287 return true; 2288 } 2289 2290 static bool trans_VMOV_imm_sp(DisasContext *s, arg_VMOV_imm_sp *a) 2291 { 2292 uint32_t delta_d = 0; 2293 int veclen = s->vec_len; 2294 TCGv_i32 fd; 2295 uint32_t vd; 2296 2297 vd = a->vd; 2298 2299 if (!dc_isar_feature(aa32_fpsp_v3, s)) { 2300 return false; 2301 } 2302 2303 if (!dc_isar_feature(aa32_fpshvec, s) && 2304 (veclen != 0 || s->vec_stride != 0)) { 2305 return false; 2306 } 2307 2308 if (!vfp_access_check(s)) { 2309 return true; 2310 } 2311 2312 if (veclen > 0) { 2313 /* Figure out what type of vector operation this is. */ 2314 if (vfp_sreg_is_scalar(vd)) { 2315 /* scalar */ 2316 veclen = 0; 2317 } else { 2318 delta_d = s->vec_stride + 1; 2319 } 2320 } 2321 2322 fd = tcg_constant_i32(vfp_expand_imm(MO_32, a->imm)); 2323 2324 for (;;) { 2325 vfp_store_reg32(fd, vd); 2326 2327 if (veclen == 0) { 2328 break; 2329 } 2330 2331 /* Set up the operands for the next iteration */ 2332 veclen--; 2333 vd = vfp_advance_sreg(vd, delta_d); 2334 } 2335 2336 return true; 2337 } 2338 2339 static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) 2340 { 2341 uint32_t delta_d = 0; 2342 int veclen = s->vec_len; 2343 TCGv_i64 fd; 2344 uint32_t vd; 2345 2346 vd = a->vd; 2347 2348 if (!dc_isar_feature(aa32_fpdp_v3, s)) { 2349 return false; 2350 } 2351 2352 /* UNDEF accesses to D16-D31 if they don't exist. */ 2353 if (!dc_isar_feature(aa32_simd_r32, s) && (vd & 0x10)) { 2354 return false; 2355 } 2356 2357 if (!dc_isar_feature(aa32_fpshvec, s) && 2358 (veclen != 0 || s->vec_stride != 0)) { 2359 return false; 2360 } 2361 2362 if (!vfp_access_check(s)) { 2363 return true; 2364 } 2365 2366 if (veclen > 0) { 2367 /* Figure out what type of vector operation this is. */ 2368 if (vfp_dreg_is_scalar(vd)) { 2369 /* scalar */ 2370 veclen = 0; 2371 } else { 2372 delta_d = (s->vec_stride >> 1) + 1; 2373 } 2374 } 2375 2376 fd = tcg_constant_i64(vfp_expand_imm(MO_64, a->imm)); 2377 2378 for (;;) { 2379 vfp_store_reg64(fd, vd); 2380 2381 if (veclen == 0) { 2382 break; 2383 } 2384 2385 /* Set up the operands for the next iteration */ 2386 veclen--; 2387 vd = vfp_advance_dreg(vd, delta_d); 2388 } 2389 2390 return true; 2391 } 2392 2393 #define DO_VFP_2OP(INSN, PREC, FN, CHECK) \ 2394 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2395 arg_##INSN##_##PREC *a) \ 2396 { \ 2397 if (!dc_isar_feature(CHECK, s)) { \ 2398 return false; \ 2399 } \ 2400 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ 2401 } 2402 2403 #define DO_VFP_VMOV(INSN, PREC, FN) \ 2404 static bool trans_##INSN##_##PREC(DisasContext *s, \ 2405 arg_##INSN##_##PREC *a) \ 2406 { \ 2407 if (!dc_isar_feature(aa32_fp##PREC##_v2, s) && \ 2408 !dc_isar_feature(aa32_mve, s)) { \ 2409 return false; \ 2410 } \ 2411 return do_vfp_2op_##PREC(s, FN, a->vd, a->vm); \ 2412 } 2413 2414 DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32) 2415 DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64) 2416 2417 DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith) 2418 DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2) 2419 DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2) 2420 2421 DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith) 2422 DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2) 2423 DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2) 2424 2425 static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) 2426 { 2427 gen_helper_vfp_sqrth(vd, vm, cpu_env); 2428 } 2429 2430 static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm) 2431 { 2432 gen_helper_vfp_sqrts(vd, vm, cpu_env); 2433 } 2434 2435 static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm) 2436 { 2437 gen_helper_vfp_sqrtd(vd, vm, cpu_env); 2438 } 2439 2440 DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith) 2441 DO_VFP_2OP(VSQRT, sp, gen_VSQRT_sp, aa32_fpsp_v2) 2442 DO_VFP_2OP(VSQRT, dp, gen_VSQRT_dp, aa32_fpdp_v2) 2443 2444 static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) 2445 { 2446 TCGv_i32 vd, vm; 2447 2448 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2449 return false; 2450 } 2451 2452 /* Vm/M bits must be zero for the Z variant */ 2453 if (a->z && a->vm != 0) { 2454 return false; 2455 } 2456 2457 if (!vfp_access_check(s)) { 2458 return true; 2459 } 2460 2461 vd = tcg_temp_new_i32(); 2462 vm = tcg_temp_new_i32(); 2463 2464 vfp_load_reg32(vd, a->vd); 2465 if (a->z) { 2466 tcg_gen_movi_i32(vm, 0); 2467 } else { 2468 vfp_load_reg32(vm, a->vm); 2469 } 2470 2471 if (a->e) { 2472 gen_helper_vfp_cmpeh(vd, vm, cpu_env); 2473 } else { 2474 gen_helper_vfp_cmph(vd, vm, cpu_env); 2475 } 2476 return true; 2477 } 2478 2479 static bool trans_VCMP_sp(DisasContext *s, arg_VCMP_sp *a) 2480 { 2481 TCGv_i32 vd, vm; 2482 2483 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 2484 return false; 2485 } 2486 2487 /* Vm/M bits must be zero for the Z variant */ 2488 if (a->z && a->vm != 0) { 2489 return false; 2490 } 2491 2492 if (!vfp_access_check(s)) { 2493 return true; 2494 } 2495 2496 vd = tcg_temp_new_i32(); 2497 vm = tcg_temp_new_i32(); 2498 2499 vfp_load_reg32(vd, a->vd); 2500 if (a->z) { 2501 tcg_gen_movi_i32(vm, 0); 2502 } else { 2503 vfp_load_reg32(vm, a->vm); 2504 } 2505 2506 if (a->e) { 2507 gen_helper_vfp_cmpes(vd, vm, cpu_env); 2508 } else { 2509 gen_helper_vfp_cmps(vd, vm, cpu_env); 2510 } 2511 return true; 2512 } 2513 2514 static bool trans_VCMP_dp(DisasContext *s, arg_VCMP_dp *a) 2515 { 2516 TCGv_i64 vd, vm; 2517 2518 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2519 return false; 2520 } 2521 2522 /* Vm/M bits must be zero for the Z variant */ 2523 if (a->z && a->vm != 0) { 2524 return false; 2525 } 2526 2527 /* UNDEF accesses to D16-D31 if they don't exist. */ 2528 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2529 return false; 2530 } 2531 2532 if (!vfp_access_check(s)) { 2533 return true; 2534 } 2535 2536 vd = tcg_temp_new_i64(); 2537 vm = tcg_temp_new_i64(); 2538 2539 vfp_load_reg64(vd, a->vd); 2540 if (a->z) { 2541 tcg_gen_movi_i64(vm, 0); 2542 } else { 2543 vfp_load_reg64(vm, a->vm); 2544 } 2545 2546 if (a->e) { 2547 gen_helper_vfp_cmped(vd, vm, cpu_env); 2548 } else { 2549 gen_helper_vfp_cmpd(vd, vm, cpu_env); 2550 } 2551 return true; 2552 } 2553 2554 static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a) 2555 { 2556 TCGv_ptr fpst; 2557 TCGv_i32 ahp_mode; 2558 TCGv_i32 tmp; 2559 2560 if (!dc_isar_feature(aa32_fp16_spconv, s)) { 2561 return false; 2562 } 2563 2564 if (!vfp_access_check(s)) { 2565 return true; 2566 } 2567 2568 fpst = fpstatus_ptr(FPST_FPCR); 2569 ahp_mode = get_ahp_flag(); 2570 tmp = tcg_temp_new_i32(); 2571 /* The T bit tells us if we want the low or high 16 bits of Vm */ 2572 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); 2573 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp_mode); 2574 vfp_store_reg32(tmp, a->vd); 2575 return true; 2576 } 2577 2578 static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a) 2579 { 2580 TCGv_ptr fpst; 2581 TCGv_i32 ahp_mode; 2582 TCGv_i32 tmp; 2583 TCGv_i64 vd; 2584 2585 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2586 return false; 2587 } 2588 2589 if (!dc_isar_feature(aa32_fp16_dpconv, s)) { 2590 return false; 2591 } 2592 2593 /* UNDEF accesses to D16-D31 if they don't exist. */ 2594 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 2595 return false; 2596 } 2597 2598 if (!vfp_access_check(s)) { 2599 return true; 2600 } 2601 2602 fpst = fpstatus_ptr(FPST_FPCR); 2603 ahp_mode = get_ahp_flag(); 2604 tmp = tcg_temp_new_i32(); 2605 /* The T bit tells us if we want the low or high 16 bits of Vm */ 2606 tcg_gen_ld16u_i32(tmp, cpu_env, vfp_f16_offset(a->vm, a->t)); 2607 vd = tcg_temp_new_i64(); 2608 gen_helper_vfp_fcvt_f16_to_f64(vd, tmp, fpst, ahp_mode); 2609 vfp_store_reg64(vd, a->vd); 2610 return true; 2611 } 2612 2613 static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a) 2614 { 2615 TCGv_ptr fpst; 2616 TCGv_i32 tmp; 2617 2618 if (!dc_isar_feature(aa32_bf16, s)) { 2619 return false; 2620 } 2621 2622 if (!vfp_access_check(s)) { 2623 return true; 2624 } 2625 2626 fpst = fpstatus_ptr(FPST_FPCR); 2627 tmp = tcg_temp_new_i32(); 2628 2629 vfp_load_reg32(tmp, a->vm); 2630 gen_helper_bfcvt(tmp, tmp, fpst); 2631 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); 2632 return true; 2633 } 2634 2635 static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a) 2636 { 2637 TCGv_ptr fpst; 2638 TCGv_i32 ahp_mode; 2639 TCGv_i32 tmp; 2640 2641 if (!dc_isar_feature(aa32_fp16_spconv, s)) { 2642 return false; 2643 } 2644 2645 if (!vfp_access_check(s)) { 2646 return true; 2647 } 2648 2649 fpst = fpstatus_ptr(FPST_FPCR); 2650 ahp_mode = get_ahp_flag(); 2651 tmp = tcg_temp_new_i32(); 2652 2653 vfp_load_reg32(tmp, a->vm); 2654 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp_mode); 2655 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); 2656 return true; 2657 } 2658 2659 static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a) 2660 { 2661 TCGv_ptr fpst; 2662 TCGv_i32 ahp_mode; 2663 TCGv_i32 tmp; 2664 TCGv_i64 vm; 2665 2666 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2667 return false; 2668 } 2669 2670 if (!dc_isar_feature(aa32_fp16_dpconv, s)) { 2671 return false; 2672 } 2673 2674 /* UNDEF accesses to D16-D31 if they don't exist. */ 2675 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 2676 return false; 2677 } 2678 2679 if (!vfp_access_check(s)) { 2680 return true; 2681 } 2682 2683 fpst = fpstatus_ptr(FPST_FPCR); 2684 ahp_mode = get_ahp_flag(); 2685 tmp = tcg_temp_new_i32(); 2686 vm = tcg_temp_new_i64(); 2687 2688 vfp_load_reg64(vm, a->vm); 2689 gen_helper_vfp_fcvt_f64_to_f16(tmp, vm, fpst, ahp_mode); 2690 tcg_gen_st16_i32(tmp, cpu_env, vfp_f16_offset(a->vd, a->t)); 2691 return true; 2692 } 2693 2694 static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) 2695 { 2696 TCGv_ptr fpst; 2697 TCGv_i32 tmp; 2698 2699 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2700 return false; 2701 } 2702 2703 if (!vfp_access_check(s)) { 2704 return true; 2705 } 2706 2707 tmp = tcg_temp_new_i32(); 2708 vfp_load_reg32(tmp, a->vm); 2709 fpst = fpstatus_ptr(FPST_FPCR_F16); 2710 gen_helper_rinth(tmp, tmp, fpst); 2711 vfp_store_reg32(tmp, a->vd); 2712 return true; 2713 } 2714 2715 static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a) 2716 { 2717 TCGv_ptr fpst; 2718 TCGv_i32 tmp; 2719 2720 if (!dc_isar_feature(aa32_vrint, s)) { 2721 return false; 2722 } 2723 2724 if (!vfp_access_check(s)) { 2725 return true; 2726 } 2727 2728 tmp = tcg_temp_new_i32(); 2729 vfp_load_reg32(tmp, a->vm); 2730 fpst = fpstatus_ptr(FPST_FPCR); 2731 gen_helper_rints(tmp, tmp, fpst); 2732 vfp_store_reg32(tmp, a->vd); 2733 return true; 2734 } 2735 2736 static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a) 2737 { 2738 TCGv_ptr fpst; 2739 TCGv_i64 tmp; 2740 2741 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2742 return false; 2743 } 2744 2745 if (!dc_isar_feature(aa32_vrint, s)) { 2746 return false; 2747 } 2748 2749 /* UNDEF accesses to D16-D31 if they don't exist. */ 2750 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2751 return false; 2752 } 2753 2754 if (!vfp_access_check(s)) { 2755 return true; 2756 } 2757 2758 tmp = tcg_temp_new_i64(); 2759 vfp_load_reg64(tmp, a->vm); 2760 fpst = fpstatus_ptr(FPST_FPCR); 2761 gen_helper_rintd(tmp, tmp, fpst); 2762 vfp_store_reg64(tmp, a->vd); 2763 return true; 2764 } 2765 2766 static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) 2767 { 2768 TCGv_ptr fpst; 2769 TCGv_i32 tmp; 2770 TCGv_i32 tcg_rmode; 2771 2772 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2773 return false; 2774 } 2775 2776 if (!vfp_access_check(s)) { 2777 return true; 2778 } 2779 2780 tmp = tcg_temp_new_i32(); 2781 vfp_load_reg32(tmp, a->vm); 2782 fpst = fpstatus_ptr(FPST_FPCR_F16); 2783 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); 2784 gen_helper_rinth(tmp, tmp, fpst); 2785 gen_restore_rmode(tcg_rmode, fpst); 2786 vfp_store_reg32(tmp, a->vd); 2787 return true; 2788 } 2789 2790 static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a) 2791 { 2792 TCGv_ptr fpst; 2793 TCGv_i32 tmp; 2794 TCGv_i32 tcg_rmode; 2795 2796 if (!dc_isar_feature(aa32_vrint, s)) { 2797 return false; 2798 } 2799 2800 if (!vfp_access_check(s)) { 2801 return true; 2802 } 2803 2804 tmp = tcg_temp_new_i32(); 2805 vfp_load_reg32(tmp, a->vm); 2806 fpst = fpstatus_ptr(FPST_FPCR); 2807 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); 2808 gen_helper_rints(tmp, tmp, fpst); 2809 gen_restore_rmode(tcg_rmode, fpst); 2810 vfp_store_reg32(tmp, a->vd); 2811 return true; 2812 } 2813 2814 static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a) 2815 { 2816 TCGv_ptr fpst; 2817 TCGv_i64 tmp; 2818 TCGv_i32 tcg_rmode; 2819 2820 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2821 return false; 2822 } 2823 2824 if (!dc_isar_feature(aa32_vrint, s)) { 2825 return false; 2826 } 2827 2828 /* UNDEF accesses to D16-D31 if they don't exist. */ 2829 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2830 return false; 2831 } 2832 2833 if (!vfp_access_check(s)) { 2834 return true; 2835 } 2836 2837 tmp = tcg_temp_new_i64(); 2838 vfp_load_reg64(tmp, a->vm); 2839 fpst = fpstatus_ptr(FPST_FPCR); 2840 tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); 2841 gen_helper_rintd(tmp, tmp, fpst); 2842 gen_restore_rmode(tcg_rmode, fpst); 2843 vfp_store_reg64(tmp, a->vd); 2844 return true; 2845 } 2846 2847 static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) 2848 { 2849 TCGv_ptr fpst; 2850 TCGv_i32 tmp; 2851 2852 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2853 return false; 2854 } 2855 2856 if (!vfp_access_check(s)) { 2857 return true; 2858 } 2859 2860 tmp = tcg_temp_new_i32(); 2861 vfp_load_reg32(tmp, a->vm); 2862 fpst = fpstatus_ptr(FPST_FPCR_F16); 2863 gen_helper_rinth_exact(tmp, tmp, fpst); 2864 vfp_store_reg32(tmp, a->vd); 2865 return true; 2866 } 2867 2868 static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a) 2869 { 2870 TCGv_ptr fpst; 2871 TCGv_i32 tmp; 2872 2873 if (!dc_isar_feature(aa32_vrint, s)) { 2874 return false; 2875 } 2876 2877 if (!vfp_access_check(s)) { 2878 return true; 2879 } 2880 2881 tmp = tcg_temp_new_i32(); 2882 vfp_load_reg32(tmp, a->vm); 2883 fpst = fpstatus_ptr(FPST_FPCR); 2884 gen_helper_rints_exact(tmp, tmp, fpst); 2885 vfp_store_reg32(tmp, a->vd); 2886 return true; 2887 } 2888 2889 static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a) 2890 { 2891 TCGv_ptr fpst; 2892 TCGv_i64 tmp; 2893 2894 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2895 return false; 2896 } 2897 2898 if (!dc_isar_feature(aa32_vrint, s)) { 2899 return false; 2900 } 2901 2902 /* UNDEF accesses to D16-D31 if they don't exist. */ 2903 if (!dc_isar_feature(aa32_simd_r32, s) && ((a->vd | a->vm) & 0x10)) { 2904 return false; 2905 } 2906 2907 if (!vfp_access_check(s)) { 2908 return true; 2909 } 2910 2911 tmp = tcg_temp_new_i64(); 2912 vfp_load_reg64(tmp, a->vm); 2913 fpst = fpstatus_ptr(FPST_FPCR); 2914 gen_helper_rintd_exact(tmp, tmp, fpst); 2915 vfp_store_reg64(tmp, a->vd); 2916 return true; 2917 } 2918 2919 static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a) 2920 { 2921 TCGv_i64 vd; 2922 TCGv_i32 vm; 2923 2924 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2925 return false; 2926 } 2927 2928 /* UNDEF accesses to D16-D31 if they don't exist. */ 2929 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 2930 return false; 2931 } 2932 2933 if (!vfp_access_check(s)) { 2934 return true; 2935 } 2936 2937 vm = tcg_temp_new_i32(); 2938 vd = tcg_temp_new_i64(); 2939 vfp_load_reg32(vm, a->vm); 2940 gen_helper_vfp_fcvtds(vd, vm, cpu_env); 2941 vfp_store_reg64(vd, a->vd); 2942 return true; 2943 } 2944 2945 static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a) 2946 { 2947 TCGv_i64 vm; 2948 TCGv_i32 vd; 2949 2950 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 2951 return false; 2952 } 2953 2954 /* UNDEF accesses to D16-D31 if they don't exist. */ 2955 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 2956 return false; 2957 } 2958 2959 if (!vfp_access_check(s)) { 2960 return true; 2961 } 2962 2963 vd = tcg_temp_new_i32(); 2964 vm = tcg_temp_new_i64(); 2965 vfp_load_reg64(vm, a->vm); 2966 gen_helper_vfp_fcvtsd(vd, vm, cpu_env); 2967 vfp_store_reg32(vd, a->vd); 2968 return true; 2969 } 2970 2971 static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a) 2972 { 2973 TCGv_i32 vm; 2974 TCGv_ptr fpst; 2975 2976 if (!dc_isar_feature(aa32_fp16_arith, s)) { 2977 return false; 2978 } 2979 2980 if (!vfp_access_check(s)) { 2981 return true; 2982 } 2983 2984 vm = tcg_temp_new_i32(); 2985 vfp_load_reg32(vm, a->vm); 2986 fpst = fpstatus_ptr(FPST_FPCR_F16); 2987 if (a->s) { 2988 /* i32 -> f16 */ 2989 gen_helper_vfp_sitoh(vm, vm, fpst); 2990 } else { 2991 /* u32 -> f16 */ 2992 gen_helper_vfp_uitoh(vm, vm, fpst); 2993 } 2994 vfp_store_reg32(vm, a->vd); 2995 return true; 2996 } 2997 2998 static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a) 2999 { 3000 TCGv_i32 vm; 3001 TCGv_ptr fpst; 3002 3003 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 3004 return false; 3005 } 3006 3007 if (!vfp_access_check(s)) { 3008 return true; 3009 } 3010 3011 vm = tcg_temp_new_i32(); 3012 vfp_load_reg32(vm, a->vm); 3013 fpst = fpstatus_ptr(FPST_FPCR); 3014 if (a->s) { 3015 /* i32 -> f32 */ 3016 gen_helper_vfp_sitos(vm, vm, fpst); 3017 } else { 3018 /* u32 -> f32 */ 3019 gen_helper_vfp_uitos(vm, vm, fpst); 3020 } 3021 vfp_store_reg32(vm, a->vd); 3022 return true; 3023 } 3024 3025 static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a) 3026 { 3027 TCGv_i32 vm; 3028 TCGv_i64 vd; 3029 TCGv_ptr fpst; 3030 3031 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3032 return false; 3033 } 3034 3035 /* UNDEF accesses to D16-D31 if they don't exist. */ 3036 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3037 return false; 3038 } 3039 3040 if (!vfp_access_check(s)) { 3041 return true; 3042 } 3043 3044 vm = tcg_temp_new_i32(); 3045 vd = tcg_temp_new_i64(); 3046 vfp_load_reg32(vm, a->vm); 3047 fpst = fpstatus_ptr(FPST_FPCR); 3048 if (a->s) { 3049 /* i32 -> f64 */ 3050 gen_helper_vfp_sitod(vd, vm, fpst); 3051 } else { 3052 /* u32 -> f64 */ 3053 gen_helper_vfp_uitod(vd, vm, fpst); 3054 } 3055 vfp_store_reg64(vd, a->vd); 3056 return true; 3057 } 3058 3059 static bool trans_VJCVT(DisasContext *s, arg_VJCVT *a) 3060 { 3061 TCGv_i32 vd; 3062 TCGv_i64 vm; 3063 3064 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3065 return false; 3066 } 3067 3068 if (!dc_isar_feature(aa32_jscvt, s)) { 3069 return false; 3070 } 3071 3072 /* UNDEF accesses to D16-D31 if they don't exist. */ 3073 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3074 return false; 3075 } 3076 3077 if (!vfp_access_check(s)) { 3078 return true; 3079 } 3080 3081 vm = tcg_temp_new_i64(); 3082 vd = tcg_temp_new_i32(); 3083 vfp_load_reg64(vm, a->vm); 3084 gen_helper_vjcvt(vd, vm, cpu_env); 3085 vfp_store_reg32(vd, a->vd); 3086 return true; 3087 } 3088 3089 static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a) 3090 { 3091 TCGv_i32 vd, shift; 3092 TCGv_ptr fpst; 3093 int frac_bits; 3094 3095 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3096 return false; 3097 } 3098 3099 if (!vfp_access_check(s)) { 3100 return true; 3101 } 3102 3103 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3104 3105 vd = tcg_temp_new_i32(); 3106 vfp_load_reg32(vd, a->vd); 3107 3108 fpst = fpstatus_ptr(FPST_FPCR_F16); 3109 shift = tcg_constant_i32(frac_bits); 3110 3111 /* Switch on op:U:sx bits */ 3112 switch (a->opc) { 3113 case 0: 3114 gen_helper_vfp_shtoh_round_to_nearest(vd, vd, shift, fpst); 3115 break; 3116 case 1: 3117 gen_helper_vfp_sltoh_round_to_nearest(vd, vd, shift, fpst); 3118 break; 3119 case 2: 3120 gen_helper_vfp_uhtoh_round_to_nearest(vd, vd, shift, fpst); 3121 break; 3122 case 3: 3123 gen_helper_vfp_ultoh_round_to_nearest(vd, vd, shift, fpst); 3124 break; 3125 case 4: 3126 gen_helper_vfp_toshh_round_to_zero(vd, vd, shift, fpst); 3127 break; 3128 case 5: 3129 gen_helper_vfp_toslh_round_to_zero(vd, vd, shift, fpst); 3130 break; 3131 case 6: 3132 gen_helper_vfp_touhh_round_to_zero(vd, vd, shift, fpst); 3133 break; 3134 case 7: 3135 gen_helper_vfp_toulh_round_to_zero(vd, vd, shift, fpst); 3136 break; 3137 default: 3138 g_assert_not_reached(); 3139 } 3140 3141 vfp_store_reg32(vd, a->vd); 3142 return true; 3143 } 3144 3145 static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a) 3146 { 3147 TCGv_i32 vd, shift; 3148 TCGv_ptr fpst; 3149 int frac_bits; 3150 3151 if (!dc_isar_feature(aa32_fpsp_v3, s)) { 3152 return false; 3153 } 3154 3155 if (!vfp_access_check(s)) { 3156 return true; 3157 } 3158 3159 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3160 3161 vd = tcg_temp_new_i32(); 3162 vfp_load_reg32(vd, a->vd); 3163 3164 fpst = fpstatus_ptr(FPST_FPCR); 3165 shift = tcg_constant_i32(frac_bits); 3166 3167 /* Switch on op:U:sx bits */ 3168 switch (a->opc) { 3169 case 0: 3170 gen_helper_vfp_shtos_round_to_nearest(vd, vd, shift, fpst); 3171 break; 3172 case 1: 3173 gen_helper_vfp_sltos_round_to_nearest(vd, vd, shift, fpst); 3174 break; 3175 case 2: 3176 gen_helper_vfp_uhtos_round_to_nearest(vd, vd, shift, fpst); 3177 break; 3178 case 3: 3179 gen_helper_vfp_ultos_round_to_nearest(vd, vd, shift, fpst); 3180 break; 3181 case 4: 3182 gen_helper_vfp_toshs_round_to_zero(vd, vd, shift, fpst); 3183 break; 3184 case 5: 3185 gen_helper_vfp_tosls_round_to_zero(vd, vd, shift, fpst); 3186 break; 3187 case 6: 3188 gen_helper_vfp_touhs_round_to_zero(vd, vd, shift, fpst); 3189 break; 3190 case 7: 3191 gen_helper_vfp_touls_round_to_zero(vd, vd, shift, fpst); 3192 break; 3193 default: 3194 g_assert_not_reached(); 3195 } 3196 3197 vfp_store_reg32(vd, a->vd); 3198 return true; 3199 } 3200 3201 static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a) 3202 { 3203 TCGv_i64 vd; 3204 TCGv_i32 shift; 3205 TCGv_ptr fpst; 3206 int frac_bits; 3207 3208 if (!dc_isar_feature(aa32_fpdp_v3, s)) { 3209 return false; 3210 } 3211 3212 /* UNDEF accesses to D16-D31 if they don't exist. */ 3213 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { 3214 return false; 3215 } 3216 3217 if (!vfp_access_check(s)) { 3218 return true; 3219 } 3220 3221 frac_bits = (a->opc & 1) ? (32 - a->imm) : (16 - a->imm); 3222 3223 vd = tcg_temp_new_i64(); 3224 vfp_load_reg64(vd, a->vd); 3225 3226 fpst = fpstatus_ptr(FPST_FPCR); 3227 shift = tcg_constant_i32(frac_bits); 3228 3229 /* Switch on op:U:sx bits */ 3230 switch (a->opc) { 3231 case 0: 3232 gen_helper_vfp_shtod_round_to_nearest(vd, vd, shift, fpst); 3233 break; 3234 case 1: 3235 gen_helper_vfp_sltod_round_to_nearest(vd, vd, shift, fpst); 3236 break; 3237 case 2: 3238 gen_helper_vfp_uhtod_round_to_nearest(vd, vd, shift, fpst); 3239 break; 3240 case 3: 3241 gen_helper_vfp_ultod_round_to_nearest(vd, vd, shift, fpst); 3242 break; 3243 case 4: 3244 gen_helper_vfp_toshd_round_to_zero(vd, vd, shift, fpst); 3245 break; 3246 case 5: 3247 gen_helper_vfp_tosld_round_to_zero(vd, vd, shift, fpst); 3248 break; 3249 case 6: 3250 gen_helper_vfp_touhd_round_to_zero(vd, vd, shift, fpst); 3251 break; 3252 case 7: 3253 gen_helper_vfp_tould_round_to_zero(vd, vd, shift, fpst); 3254 break; 3255 default: 3256 g_assert_not_reached(); 3257 } 3258 3259 vfp_store_reg64(vd, a->vd); 3260 return true; 3261 } 3262 3263 static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) 3264 { 3265 TCGv_i32 vm; 3266 TCGv_ptr fpst; 3267 3268 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3269 return false; 3270 } 3271 3272 if (!vfp_access_check(s)) { 3273 return true; 3274 } 3275 3276 fpst = fpstatus_ptr(FPST_FPCR_F16); 3277 vm = tcg_temp_new_i32(); 3278 vfp_load_reg32(vm, a->vm); 3279 3280 if (a->s) { 3281 if (a->rz) { 3282 gen_helper_vfp_tosizh(vm, vm, fpst); 3283 } else { 3284 gen_helper_vfp_tosih(vm, vm, fpst); 3285 } 3286 } else { 3287 if (a->rz) { 3288 gen_helper_vfp_touizh(vm, vm, fpst); 3289 } else { 3290 gen_helper_vfp_touih(vm, vm, fpst); 3291 } 3292 } 3293 vfp_store_reg32(vm, a->vd); 3294 return true; 3295 } 3296 3297 static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a) 3298 { 3299 TCGv_i32 vm; 3300 TCGv_ptr fpst; 3301 3302 if (!dc_isar_feature(aa32_fpsp_v2, s)) { 3303 return false; 3304 } 3305 3306 if (!vfp_access_check(s)) { 3307 return true; 3308 } 3309 3310 fpst = fpstatus_ptr(FPST_FPCR); 3311 vm = tcg_temp_new_i32(); 3312 vfp_load_reg32(vm, a->vm); 3313 3314 if (a->s) { 3315 if (a->rz) { 3316 gen_helper_vfp_tosizs(vm, vm, fpst); 3317 } else { 3318 gen_helper_vfp_tosis(vm, vm, fpst); 3319 } 3320 } else { 3321 if (a->rz) { 3322 gen_helper_vfp_touizs(vm, vm, fpst); 3323 } else { 3324 gen_helper_vfp_touis(vm, vm, fpst); 3325 } 3326 } 3327 vfp_store_reg32(vm, a->vd); 3328 return true; 3329 } 3330 3331 static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a) 3332 { 3333 TCGv_i32 vd; 3334 TCGv_i64 vm; 3335 TCGv_ptr fpst; 3336 3337 if (!dc_isar_feature(aa32_fpdp_v2, s)) { 3338 return false; 3339 } 3340 3341 /* UNDEF accesses to D16-D31 if they don't exist. */ 3342 if (!dc_isar_feature(aa32_simd_r32, s) && (a->vm & 0x10)) { 3343 return false; 3344 } 3345 3346 if (!vfp_access_check(s)) { 3347 return true; 3348 } 3349 3350 fpst = fpstatus_ptr(FPST_FPCR); 3351 vm = tcg_temp_new_i64(); 3352 vd = tcg_temp_new_i32(); 3353 vfp_load_reg64(vm, a->vm); 3354 3355 if (a->s) { 3356 if (a->rz) { 3357 gen_helper_vfp_tosizd(vd, vm, fpst); 3358 } else { 3359 gen_helper_vfp_tosid(vd, vm, fpst); 3360 } 3361 } else { 3362 if (a->rz) { 3363 gen_helper_vfp_touizd(vd, vm, fpst); 3364 } else { 3365 gen_helper_vfp_touid(vd, vm, fpst); 3366 } 3367 } 3368 vfp_store_reg32(vd, a->vd); 3369 return true; 3370 } 3371 3372 static bool trans_VINS(DisasContext *s, arg_VINS *a) 3373 { 3374 TCGv_i32 rd, rm; 3375 3376 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3377 return false; 3378 } 3379 3380 if (s->vec_len != 0 || s->vec_stride != 0) { 3381 return false; 3382 } 3383 3384 if (!vfp_access_check(s)) { 3385 return true; 3386 } 3387 3388 /* Insert low half of Vm into high half of Vd */ 3389 rm = tcg_temp_new_i32(); 3390 rd = tcg_temp_new_i32(); 3391 vfp_load_reg32(rm, a->vm); 3392 vfp_load_reg32(rd, a->vd); 3393 tcg_gen_deposit_i32(rd, rd, rm, 16, 16); 3394 vfp_store_reg32(rd, a->vd); 3395 return true; 3396 } 3397 3398 static bool trans_VMOVX(DisasContext *s, arg_VINS *a) 3399 { 3400 TCGv_i32 rm; 3401 3402 if (!dc_isar_feature(aa32_fp16_arith, s)) { 3403 return false; 3404 } 3405 3406 if (s->vec_len != 0 || s->vec_stride != 0) { 3407 return false; 3408 } 3409 3410 if (!vfp_access_check(s)) { 3411 return true; 3412 } 3413 3414 /* Set Vd to high half of Vm */ 3415 rm = tcg_temp_new_i32(); 3416 vfp_load_reg32(rm, a->vm); 3417 tcg_gen_shri_i32(rm, rm, 16); 3418 vfp_store_reg32(rm, a->vd); 3419 return true; 3420 } 3421