1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "exec/gen-icount.h" 25 #include "translate.h" 26 #include "translate-a32.h" 27 28 static inline int vidup_imm(DisasContext *s, int x) 29 { 30 return 1 << x; 31 } 32 33 /* Include the generated decoder */ 34 #include "decode-mve.c.inc" 35 36 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 39 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 41 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 43 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 44 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 45 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 46 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 47 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 48 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 49 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 53 54 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 55 static inline long mve_qreg_offset(unsigned reg) 56 { 57 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 58 } 59 60 static TCGv_ptr mve_qreg_ptr(unsigned reg) 61 { 62 TCGv_ptr ret = tcg_temp_new_ptr(); 63 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 64 return ret; 65 } 66 67 static bool mve_no_predication(DisasContext *s) 68 { 69 /* 70 * Return true if we are executing the entire MVE instruction 71 * with no predication or partial-execution, and so we can safely 72 * use an inline TCG vector implementation. 73 */ 74 return s->eci == 0 && s->mve_no_pred; 75 } 76 77 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 78 { 79 /* 80 * Check whether Qregs are in range. For v8.1M only Q0..Q7 81 * are supported, see VFPSmallRegisterBank(). 82 */ 83 return qmask < 8; 84 } 85 86 bool mve_eci_check(DisasContext *s) 87 { 88 /* 89 * This is a beatwise insn: check that ECI is valid (not a 90 * reserved value) and note that we are handling it. 91 * Return true if OK, false if we generated an exception. 92 */ 93 s->eci_handled = true; 94 switch (s->eci) { 95 case ECI_NONE: 96 case ECI_A0: 97 case ECI_A0A1: 98 case ECI_A0A1A2: 99 case ECI_A0A1A2B0: 100 return true; 101 default: 102 /* Reserved value: INVSTATE UsageFault */ 103 gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); 104 return false; 105 } 106 } 107 108 void mve_update_eci(DisasContext *s) 109 { 110 /* 111 * The helper function will always update the CPUState field, 112 * so we only need to update the DisasContext field. 113 */ 114 if (s->eci) { 115 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 116 } 117 } 118 119 void mve_update_and_store_eci(DisasContext *s) 120 { 121 /* 122 * For insns which don't call a helper function that will call 123 * mve_advance_vpt(), this version updates s->eci and also stores 124 * it out to the CPUState field. 125 */ 126 if (s->eci) { 127 mve_update_eci(s); 128 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 129 } 130 } 131 132 static bool mve_skip_first_beat(DisasContext *s) 133 { 134 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 135 switch (s->eci) { 136 case ECI_NONE: 137 return false; 138 case ECI_A0: 139 case ECI_A0A1: 140 case ECI_A0A1A2: 141 case ECI_A0A1A2B0: 142 return true; 143 default: 144 g_assert_not_reached(); 145 } 146 } 147 148 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 149 unsigned msize) 150 { 151 TCGv_i32 addr; 152 uint32_t offset; 153 TCGv_ptr qreg; 154 155 if (!dc_isar_feature(aa32_mve, s) || 156 !mve_check_qreg_bank(s, a->qd) || 157 !fn) { 158 return false; 159 } 160 161 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 162 if (a->rn == 15 || (a->rn == 13 && a->w)) { 163 return false; 164 } 165 166 if (!mve_eci_check(s) || !vfp_access_check(s)) { 167 return true; 168 } 169 170 offset = a->imm << msize; 171 if (!a->a) { 172 offset = -offset; 173 } 174 addr = load_reg(s, a->rn); 175 if (a->p) { 176 tcg_gen_addi_i32(addr, addr, offset); 177 } 178 179 qreg = mve_qreg_ptr(a->qd); 180 fn(cpu_env, qreg, addr); 181 tcg_temp_free_ptr(qreg); 182 183 /* 184 * Writeback always happens after the last beat of the insn, 185 * regardless of predication 186 */ 187 if (a->w) { 188 if (!a->p) { 189 tcg_gen_addi_i32(addr, addr, offset); 190 } 191 store_reg(s, a->rn, addr); 192 } else { 193 tcg_temp_free_i32(addr); 194 } 195 mve_update_eci(s); 196 return true; 197 } 198 199 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 200 { 201 static MVEGenLdStFn * const ldstfns[4][2] = { 202 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 203 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 204 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 205 { NULL, NULL } 206 }; 207 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 208 } 209 210 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 211 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 212 { \ 213 static MVEGenLdStFn * const ldstfns[2][2] = { \ 214 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 215 { NULL, gen_helper_mve_##ULD }, \ 216 }; \ 217 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 218 } 219 220 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 221 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 222 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 223 224 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 225 { 226 TCGv_i32 addr; 227 TCGv_ptr qd, qm; 228 229 if (!dc_isar_feature(aa32_mve, s) || 230 !mve_check_qreg_bank(s, a->qd | a->qm) || 231 !fn || a->rn == 15) { 232 /* Rn case is UNPREDICTABLE */ 233 return false; 234 } 235 236 if (!mve_eci_check(s) || !vfp_access_check(s)) { 237 return true; 238 } 239 240 addr = load_reg(s, a->rn); 241 242 qd = mve_qreg_ptr(a->qd); 243 qm = mve_qreg_ptr(a->qm); 244 fn(cpu_env, qd, qm, addr); 245 tcg_temp_free_ptr(qd); 246 tcg_temp_free_ptr(qm); 247 tcg_temp_free_i32(addr); 248 mve_update_eci(s); 249 return true; 250 } 251 252 /* 253 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 254 * signextended to halfword elements in register". _os_ indicates that 255 * the offsets in Qm should be scaled by the element size. 256 */ 257 /* This macro is just to make the arrays more compact in these functions */ 258 #define F(N) gen_helper_mve_##N 259 260 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 261 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 262 { 263 static MVEGenLdStSGFn * const fns[2][4][4] = { { 264 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 265 { NULL, NULL, F(vldrh_sg_sw), NULL }, 266 { NULL, NULL, NULL, NULL }, 267 { NULL, NULL, NULL, NULL } 268 }, { 269 { NULL, NULL, NULL, NULL }, 270 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 271 { NULL, NULL, NULL, NULL }, 272 { NULL, NULL, NULL, NULL } 273 } 274 }; 275 if (a->qd == a->qm) { 276 return false; /* UNPREDICTABLE */ 277 } 278 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 279 } 280 281 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 282 { 283 static MVEGenLdStSGFn * const fns[2][4][4] = { { 284 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 285 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 286 { NULL, NULL, F(vldrw_sg_uw), NULL }, 287 { NULL, NULL, NULL, F(vldrd_sg_ud) } 288 }, { 289 { NULL, NULL, NULL, NULL }, 290 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 291 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 292 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 293 } 294 }; 295 if (a->qd == a->qm) { 296 return false; /* UNPREDICTABLE */ 297 } 298 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 299 } 300 301 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 302 { 303 static MVEGenLdStSGFn * const fns[2][4][4] = { { 304 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 305 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 306 { NULL, NULL, F(vstrw_sg_uw), NULL }, 307 { NULL, NULL, NULL, F(vstrd_sg_ud) } 308 }, { 309 { NULL, NULL, NULL, NULL }, 310 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 311 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 312 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 313 } 314 }; 315 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 316 } 317 318 #undef F 319 320 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 321 MVEGenLdStSGFn *fn, unsigned msize) 322 { 323 uint32_t offset; 324 TCGv_ptr qd, qm; 325 326 if (!dc_isar_feature(aa32_mve, s) || 327 !mve_check_qreg_bank(s, a->qd | a->qm) || 328 !fn) { 329 return false; 330 } 331 332 if (!mve_eci_check(s) || !vfp_access_check(s)) { 333 return true; 334 } 335 336 offset = a->imm << msize; 337 if (!a->a) { 338 offset = -offset; 339 } 340 341 qd = mve_qreg_ptr(a->qd); 342 qm = mve_qreg_ptr(a->qm); 343 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 344 tcg_temp_free_ptr(qd); 345 tcg_temp_free_ptr(qm); 346 mve_update_eci(s); 347 return true; 348 } 349 350 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 351 { 352 static MVEGenLdStSGFn * const fns[] = { 353 gen_helper_mve_vldrw_sg_uw, 354 gen_helper_mve_vldrw_sg_wb_uw, 355 }; 356 if (a->qd == a->qm) { 357 return false; /* UNPREDICTABLE */ 358 } 359 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 360 } 361 362 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 363 { 364 static MVEGenLdStSGFn * const fns[] = { 365 gen_helper_mve_vldrd_sg_ud, 366 gen_helper_mve_vldrd_sg_wb_ud, 367 }; 368 if (a->qd == a->qm) { 369 return false; /* UNPREDICTABLE */ 370 } 371 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 372 } 373 374 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 375 { 376 static MVEGenLdStSGFn * const fns[] = { 377 gen_helper_mve_vstrw_sg_uw, 378 gen_helper_mve_vstrw_sg_wb_uw, 379 }; 380 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 381 } 382 383 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 384 { 385 static MVEGenLdStSGFn * const fns[] = { 386 gen_helper_mve_vstrd_sg_ud, 387 gen_helper_mve_vstrd_sg_wb_ud, 388 }; 389 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 390 } 391 392 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 393 int addrinc) 394 { 395 TCGv_i32 rn; 396 397 if (!dc_isar_feature(aa32_mve, s) || 398 !mve_check_qreg_bank(s, a->qd) || 399 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 400 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 401 return false; 402 } 403 if (!mve_eci_check(s) || !vfp_access_check(s)) { 404 return true; 405 } 406 407 rn = load_reg(s, a->rn); 408 /* 409 * We pass the index of Qd, not a pointer, because the helper must 410 * access multiple Q registers starting at Qd and working up. 411 */ 412 fn(cpu_env, tcg_constant_i32(a->qd), rn); 413 414 if (a->w) { 415 tcg_gen_addi_i32(rn, rn, addrinc); 416 store_reg(s, a->rn, rn); 417 } else { 418 tcg_temp_free_i32(rn); 419 } 420 mve_update_and_store_eci(s); 421 return true; 422 } 423 424 /* This macro is just to make the arrays more compact in these functions */ 425 #define F(N) gen_helper_mve_##N 426 427 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 428 { 429 static MVEGenLdStIlFn * const fns[4][4] = { 430 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 431 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 432 { NULL, NULL, NULL, NULL }, 433 { NULL, NULL, NULL, NULL }, 434 }; 435 if (a->qd > 6) { 436 return false; 437 } 438 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 439 } 440 441 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 442 { 443 static MVEGenLdStIlFn * const fns[4][4] = { 444 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 445 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 446 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 447 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 448 }; 449 if (a->qd > 4) { 450 return false; 451 } 452 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 453 } 454 455 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 456 { 457 static MVEGenLdStIlFn * const fns[4][4] = { 458 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 459 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 460 { NULL, NULL, NULL, NULL }, 461 { NULL, NULL, NULL, NULL }, 462 }; 463 if (a->qd > 6) { 464 return false; 465 } 466 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 467 } 468 469 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 470 { 471 static MVEGenLdStIlFn * const fns[4][4] = { 472 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 473 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 474 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 475 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 476 }; 477 if (a->qd > 4) { 478 return false; 479 } 480 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 481 } 482 483 #undef F 484 485 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 486 { 487 TCGv_ptr qd; 488 TCGv_i32 rt; 489 490 if (!dc_isar_feature(aa32_mve, s) || 491 !mve_check_qreg_bank(s, a->qd)) { 492 return false; 493 } 494 if (a->rt == 13 || a->rt == 15) { 495 /* UNPREDICTABLE; we choose to UNDEF */ 496 return false; 497 } 498 if (!mve_eci_check(s) || !vfp_access_check(s)) { 499 return true; 500 } 501 502 rt = load_reg(s, a->rt); 503 if (mve_no_predication(s)) { 504 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt); 505 } else { 506 qd = mve_qreg_ptr(a->qd); 507 tcg_gen_dup_i32(a->size, rt, rt); 508 gen_helper_mve_vdup(cpu_env, qd, rt); 509 tcg_temp_free_ptr(qd); 510 } 511 tcg_temp_free_i32(rt); 512 mve_update_eci(s); 513 return true; 514 } 515 516 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, 517 GVecGen2Fn vecfn) 518 { 519 TCGv_ptr qd, qm; 520 521 if (!dc_isar_feature(aa32_mve, s) || 522 !mve_check_qreg_bank(s, a->qd | a->qm) || 523 !fn) { 524 return false; 525 } 526 527 if (!mve_eci_check(s) || !vfp_access_check(s)) { 528 return true; 529 } 530 531 if (vecfn && mve_no_predication(s)) { 532 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16); 533 } else { 534 qd = mve_qreg_ptr(a->qd); 535 qm = mve_qreg_ptr(a->qm); 536 fn(cpu_env, qd, qm); 537 tcg_temp_free_ptr(qd); 538 tcg_temp_free_ptr(qm); 539 } 540 mve_update_eci(s); 541 return true; 542 } 543 544 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 545 { 546 return do_1op_vec(s, a, fn, NULL); 547 } 548 549 #define DO_1OP_VEC(INSN, FN, VECFN) \ 550 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 551 { \ 552 static MVEGenOneOpFn * const fns[] = { \ 553 gen_helper_mve_##FN##b, \ 554 gen_helper_mve_##FN##h, \ 555 gen_helper_mve_##FN##w, \ 556 NULL, \ 557 }; \ 558 return do_1op_vec(s, a, fns[a->size], VECFN); \ 559 } 560 561 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL) 562 563 DO_1OP(VCLZ, vclz) 564 DO_1OP(VCLS, vcls) 565 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs) 566 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg) 567 DO_1OP(VQABS, vqabs) 568 DO_1OP(VQNEG, vqneg) 569 DO_1OP(VMAXA, vmaxa) 570 DO_1OP(VMINA, vmina) 571 572 /* 573 * For simple float/int conversions we use the fixed-point 574 * conversion helpers with a zero shift count 575 */ 576 #define DO_VCVT(INSN, HFN, SFN) \ 577 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 578 { \ 579 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 580 } \ 581 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 582 { \ 583 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 584 } \ 585 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 586 { \ 587 static MVEGenOneOpFn * const fns[] = { \ 588 NULL, \ 589 gen_##INSN##h, \ 590 gen_##INSN##s, \ 591 NULL, \ 592 }; \ 593 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 594 return false; \ 595 } \ 596 return do_1op(s, a, fns[a->size]); \ 597 } 598 599 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 600 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 601 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 602 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 603 604 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 605 enum arm_fprounding rmode, bool u) 606 { 607 /* 608 * Handle VCVT fp to int with specified rounding mode. 609 * This is a 1op fn but we must pass the rounding mode as 610 * an immediate to the helper. 611 */ 612 TCGv_ptr qd, qm; 613 static MVEGenVCVTRmodeFn * const fns[4][2] = { 614 { NULL, NULL }, 615 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 616 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 617 { NULL, NULL }, 618 }; 619 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 620 621 if (!dc_isar_feature(aa32_mve_fp, s) || 622 !mve_check_qreg_bank(s, a->qd | a->qm) || 623 !fn) { 624 return false; 625 } 626 627 if (!mve_eci_check(s) || !vfp_access_check(s)) { 628 return true; 629 } 630 631 qd = mve_qreg_ptr(a->qd); 632 qm = mve_qreg_ptr(a->qm); 633 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 634 tcg_temp_free_ptr(qd); 635 tcg_temp_free_ptr(qm); 636 mve_update_eci(s); 637 return true; 638 } 639 640 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 641 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 642 { \ 643 return do_vcvt_rmode(s, a, RMODE, U); \ 644 } \ 645 646 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 647 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 648 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 649 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 650 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 651 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 652 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 653 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 654 655 #define DO_VCVT_SH(INSN, FN) \ 656 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 657 { \ 658 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 659 return false; \ 660 } \ 661 return do_1op(s, a, gen_helper_mve_##FN); \ 662 } \ 663 664 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 665 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 666 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 667 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 668 669 #define DO_VRINT(INSN, RMODE) \ 670 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 671 { \ 672 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 673 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 674 } \ 675 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 676 { \ 677 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 678 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 679 } \ 680 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 681 { \ 682 static MVEGenOneOpFn * const fns[] = { \ 683 NULL, \ 684 gen_##INSN##h, \ 685 gen_##INSN##s, \ 686 NULL, \ 687 }; \ 688 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 689 return false; \ 690 } \ 691 return do_1op(s, a, fns[a->size]); \ 692 } 693 694 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 695 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 696 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 697 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 698 DO_VRINT(VRINTP, FPROUNDING_POSINF) 699 700 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 701 { 702 static MVEGenOneOpFn * const fns[] = { 703 NULL, 704 gen_helper_mve_vrintx_h, 705 gen_helper_mve_vrintx_s, 706 NULL, 707 }; 708 if (!dc_isar_feature(aa32_mve_fp, s)) { 709 return false; 710 } 711 return do_1op(s, a, fns[a->size]); 712 } 713 714 /* Narrowing moves: only size 0 and 1 are valid */ 715 #define DO_VMOVN(INSN, FN) \ 716 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 717 { \ 718 static MVEGenOneOpFn * const fns[] = { \ 719 gen_helper_mve_##FN##b, \ 720 gen_helper_mve_##FN##h, \ 721 NULL, \ 722 NULL, \ 723 }; \ 724 return do_1op(s, a, fns[a->size]); \ 725 } 726 727 DO_VMOVN(VMOVNB, vmovnb) 728 DO_VMOVN(VMOVNT, vmovnt) 729 DO_VMOVN(VQMOVUNB, vqmovunb) 730 DO_VMOVN(VQMOVUNT, vqmovunt) 731 DO_VMOVN(VQMOVN_BS, vqmovnbs) 732 DO_VMOVN(VQMOVN_TS, vqmovnts) 733 DO_VMOVN(VQMOVN_BU, vqmovnbu) 734 DO_VMOVN(VQMOVN_TU, vqmovntu) 735 736 static bool trans_VREV16(DisasContext *s, arg_1op *a) 737 { 738 static MVEGenOneOpFn * const fns[] = { 739 gen_helper_mve_vrev16b, 740 NULL, 741 NULL, 742 NULL, 743 }; 744 return do_1op(s, a, fns[a->size]); 745 } 746 747 static bool trans_VREV32(DisasContext *s, arg_1op *a) 748 { 749 static MVEGenOneOpFn * const fns[] = { 750 gen_helper_mve_vrev32b, 751 gen_helper_mve_vrev32h, 752 NULL, 753 NULL, 754 }; 755 return do_1op(s, a, fns[a->size]); 756 } 757 758 static bool trans_VREV64(DisasContext *s, arg_1op *a) 759 { 760 static MVEGenOneOpFn * const fns[] = { 761 gen_helper_mve_vrev64b, 762 gen_helper_mve_vrev64h, 763 gen_helper_mve_vrev64w, 764 NULL, 765 }; 766 return do_1op(s, a, fns[a->size]); 767 } 768 769 static bool trans_VMVN(DisasContext *s, arg_1op *a) 770 { 771 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not); 772 } 773 774 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 775 { 776 static MVEGenOneOpFn * const fns[] = { 777 NULL, 778 gen_helper_mve_vfabsh, 779 gen_helper_mve_vfabss, 780 NULL, 781 }; 782 if (!dc_isar_feature(aa32_mve_fp, s)) { 783 return false; 784 } 785 return do_1op(s, a, fns[a->size]); 786 } 787 788 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 789 { 790 static MVEGenOneOpFn * const fns[] = { 791 NULL, 792 gen_helper_mve_vfnegh, 793 gen_helper_mve_vfnegs, 794 NULL, 795 }; 796 if (!dc_isar_feature(aa32_mve_fp, s)) { 797 return false; 798 } 799 return do_1op(s, a, fns[a->size]); 800 } 801 802 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 803 GVecGen3Fn *vecfn) 804 { 805 TCGv_ptr qd, qn, qm; 806 807 if (!dc_isar_feature(aa32_mve, s) || 808 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 809 !fn) { 810 return false; 811 } 812 if (!mve_eci_check(s) || !vfp_access_check(s)) { 813 return true; 814 } 815 816 if (vecfn && mve_no_predication(s)) { 817 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 818 mve_qreg_offset(a->qm), 16, 16); 819 } else { 820 qd = mve_qreg_ptr(a->qd); 821 qn = mve_qreg_ptr(a->qn); 822 qm = mve_qreg_ptr(a->qm); 823 fn(cpu_env, qd, qn, qm); 824 tcg_temp_free_ptr(qd); 825 tcg_temp_free_ptr(qn); 826 tcg_temp_free_ptr(qm); 827 } 828 mve_update_eci(s); 829 return true; 830 } 831 832 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 833 { 834 return do_2op_vec(s, a, fn, NULL); 835 } 836 837 #define DO_LOGIC(INSN, HELPER, VECFN) \ 838 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 839 { \ 840 return do_2op_vec(s, a, HELPER, VECFN); \ 841 } 842 843 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 844 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 845 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 846 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 847 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 848 849 static bool trans_VPSEL(DisasContext *s, arg_2op *a) 850 { 851 /* This insn updates predication bits */ 852 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 853 return do_2op(s, a, gen_helper_mve_vpsel); 854 } 855 856 #define DO_2OP_VEC(INSN, FN, VECFN) \ 857 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 858 { \ 859 static MVEGenTwoOpFn * const fns[] = { \ 860 gen_helper_mve_##FN##b, \ 861 gen_helper_mve_##FN##h, \ 862 gen_helper_mve_##FN##w, \ 863 NULL, \ 864 }; \ 865 return do_2op_vec(s, a, fns[a->size], VECFN); \ 866 } 867 868 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL) 869 870 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add) 871 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub) 872 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul) 873 DO_2OP(VMULH_S, vmulhs) 874 DO_2OP(VMULH_U, vmulhu) 875 DO_2OP(VRMULH_S, vrmulhs) 876 DO_2OP(VRMULH_U, vrmulhu) 877 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax) 878 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax) 879 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin) 880 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin) 881 DO_2OP(VABD_S, vabds) 882 DO_2OP(VABD_U, vabdu) 883 DO_2OP(VHADD_S, vhadds) 884 DO_2OP(VHADD_U, vhaddu) 885 DO_2OP(VHSUB_S, vhsubs) 886 DO_2OP(VHSUB_U, vhsubu) 887 DO_2OP(VMULL_BS, vmullbs) 888 DO_2OP(VMULL_BU, vmullbu) 889 DO_2OP(VMULL_TS, vmullts) 890 DO_2OP(VMULL_TU, vmulltu) 891 DO_2OP(VQDMULH, vqdmulh) 892 DO_2OP(VQRDMULH, vqrdmulh) 893 DO_2OP(VQADD_S, vqadds) 894 DO_2OP(VQADD_U, vqaddu) 895 DO_2OP(VQSUB_S, vqsubs) 896 DO_2OP(VQSUB_U, vqsubu) 897 DO_2OP(VSHL_S, vshls) 898 DO_2OP(VSHL_U, vshlu) 899 DO_2OP(VRSHL_S, vrshls) 900 DO_2OP(VRSHL_U, vrshlu) 901 DO_2OP(VQSHL_S, vqshls) 902 DO_2OP(VQSHL_U, vqshlu) 903 DO_2OP(VQRSHL_S, vqrshls) 904 DO_2OP(VQRSHL_U, vqrshlu) 905 DO_2OP(VQDMLADH, vqdmladh) 906 DO_2OP(VQDMLADHX, vqdmladhx) 907 DO_2OP(VQRDMLADH, vqrdmladh) 908 DO_2OP(VQRDMLADHX, vqrdmladhx) 909 DO_2OP(VQDMLSDH, vqdmlsdh) 910 DO_2OP(VQDMLSDHX, vqdmlsdhx) 911 DO_2OP(VQRDMLSDH, vqrdmlsdh) 912 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 913 DO_2OP(VRHADD_S, vrhadds) 914 DO_2OP(VRHADD_U, vrhaddu) 915 /* 916 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 917 * so we can reuse the DO_2OP macro. (Our implementation calculates the 918 * "expected" results in this case.) Similarly for VHCADD. 919 */ 920 DO_2OP(VCADD90, vcadd90) 921 DO_2OP(VCADD270, vcadd270) 922 DO_2OP(VHCADD90, vhcadd90) 923 DO_2OP(VHCADD270, vhcadd270) 924 925 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 926 { 927 static MVEGenTwoOpFn * const fns[] = { 928 NULL, 929 gen_helper_mve_vqdmullbh, 930 gen_helper_mve_vqdmullbw, 931 NULL, 932 }; 933 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 934 /* UNPREDICTABLE; we choose to undef */ 935 return false; 936 } 937 return do_2op(s, a, fns[a->size]); 938 } 939 940 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 941 { 942 static MVEGenTwoOpFn * const fns[] = { 943 NULL, 944 gen_helper_mve_vqdmullth, 945 gen_helper_mve_vqdmulltw, 946 NULL, 947 }; 948 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 949 /* UNPREDICTABLE; we choose to undef */ 950 return false; 951 } 952 return do_2op(s, a, fns[a->size]); 953 } 954 955 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 956 { 957 /* 958 * Note that a->size indicates the output size, ie VMULL.P8 959 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 960 * is the 16x16->32 operation and a->size is MO_32. 961 */ 962 static MVEGenTwoOpFn * const fns[] = { 963 NULL, 964 gen_helper_mve_vmullpbh, 965 gen_helper_mve_vmullpbw, 966 NULL, 967 }; 968 return do_2op(s, a, fns[a->size]); 969 } 970 971 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 972 { 973 /* a->size is as for trans_VMULLP_B */ 974 static MVEGenTwoOpFn * const fns[] = { 975 NULL, 976 gen_helper_mve_vmullpth, 977 gen_helper_mve_vmullptw, 978 NULL, 979 }; 980 return do_2op(s, a, fns[a->size]); 981 } 982 983 /* 984 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 985 * of the 32-bit elements in each lane of the input vectors, where the 986 * carry-out of each add is the carry-in of the next. The initial carry 987 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 988 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 989 * These insns are subject to beat-wise execution. Partial execution 990 * of an I=1 (initial carry input fixed) insn which does not 991 * execute the first beat must start with the current FPSCR.NZCV 992 * value, not the fixed constant input. 993 */ 994 static bool trans_VADC(DisasContext *s, arg_2op *a) 995 { 996 return do_2op(s, a, gen_helper_mve_vadc); 997 } 998 999 static bool trans_VADCI(DisasContext *s, arg_2op *a) 1000 { 1001 if (mve_skip_first_beat(s)) { 1002 return trans_VADC(s, a); 1003 } 1004 return do_2op(s, a, gen_helper_mve_vadci); 1005 } 1006 1007 static bool trans_VSBC(DisasContext *s, arg_2op *a) 1008 { 1009 return do_2op(s, a, gen_helper_mve_vsbc); 1010 } 1011 1012 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 1013 { 1014 if (mve_skip_first_beat(s)) { 1015 return trans_VSBC(s, a); 1016 } 1017 return do_2op(s, a, gen_helper_mve_vsbci); 1018 } 1019 1020 #define DO_2OP_FP(INSN, FN) \ 1021 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 1022 { \ 1023 static MVEGenTwoOpFn * const fns[] = { \ 1024 NULL, \ 1025 gen_helper_mve_##FN##h, \ 1026 gen_helper_mve_##FN##s, \ 1027 NULL, \ 1028 }; \ 1029 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1030 return false; \ 1031 } \ 1032 return do_2op(s, a, fns[a->size]); \ 1033 } 1034 1035 DO_2OP_FP(VADD_fp, vfadd) 1036 DO_2OP_FP(VSUB_fp, vfsub) 1037 DO_2OP_FP(VMUL_fp, vfmul) 1038 DO_2OP_FP(VABD_fp, vfabd) 1039 DO_2OP_FP(VMAXNM, vmaxnm) 1040 DO_2OP_FP(VMINNM, vminnm) 1041 DO_2OP_FP(VCADD90_fp, vfcadd90) 1042 DO_2OP_FP(VCADD270_fp, vfcadd270) 1043 DO_2OP_FP(VFMA, vfma) 1044 DO_2OP_FP(VFMS, vfms) 1045 DO_2OP_FP(VCMUL0, vcmul0) 1046 DO_2OP_FP(VCMUL90, vcmul90) 1047 DO_2OP_FP(VCMUL180, vcmul180) 1048 DO_2OP_FP(VCMUL270, vcmul270) 1049 DO_2OP_FP(VCMLA0, vcmla0) 1050 DO_2OP_FP(VCMLA90, vcmla90) 1051 DO_2OP_FP(VCMLA180, vcmla180) 1052 DO_2OP_FP(VCMLA270, vcmla270) 1053 DO_2OP_FP(VMAXNMA, vmaxnma) 1054 DO_2OP_FP(VMINNMA, vminnma) 1055 1056 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1057 MVEGenTwoOpScalarFn fn) 1058 { 1059 TCGv_ptr qd, qn; 1060 TCGv_i32 rm; 1061 1062 if (!dc_isar_feature(aa32_mve, s) || 1063 !mve_check_qreg_bank(s, a->qd | a->qn) || 1064 !fn) { 1065 return false; 1066 } 1067 if (a->rm == 13 || a->rm == 15) { 1068 /* UNPREDICTABLE */ 1069 return false; 1070 } 1071 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1072 return true; 1073 } 1074 1075 qd = mve_qreg_ptr(a->qd); 1076 qn = mve_qreg_ptr(a->qn); 1077 rm = load_reg(s, a->rm); 1078 fn(cpu_env, qd, qn, rm); 1079 tcg_temp_free_i32(rm); 1080 tcg_temp_free_ptr(qd); 1081 tcg_temp_free_ptr(qn); 1082 mve_update_eci(s); 1083 return true; 1084 } 1085 1086 #define DO_2OP_SCALAR(INSN, FN) \ 1087 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1088 { \ 1089 static MVEGenTwoOpScalarFn * const fns[] = { \ 1090 gen_helper_mve_##FN##b, \ 1091 gen_helper_mve_##FN##h, \ 1092 gen_helper_mve_##FN##w, \ 1093 NULL, \ 1094 }; \ 1095 return do_2op_scalar(s, a, fns[a->size]); \ 1096 } 1097 1098 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1099 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1100 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1101 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1102 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1103 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1104 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1105 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1106 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1107 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1108 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1109 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1110 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1111 DO_2OP_SCALAR(VBRSR, vbrsr) 1112 DO_2OP_SCALAR(VMLA, vmla) 1113 DO_2OP_SCALAR(VMLAS, vmlas) 1114 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1115 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1116 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1117 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1118 1119 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1120 { 1121 static MVEGenTwoOpScalarFn * const fns[] = { 1122 NULL, 1123 gen_helper_mve_vqdmullb_scalarh, 1124 gen_helper_mve_vqdmullb_scalarw, 1125 NULL, 1126 }; 1127 if (a->qd == a->qn && a->size == MO_32) { 1128 /* UNPREDICTABLE; we choose to undef */ 1129 return false; 1130 } 1131 return do_2op_scalar(s, a, fns[a->size]); 1132 } 1133 1134 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1135 { 1136 static MVEGenTwoOpScalarFn * const fns[] = { 1137 NULL, 1138 gen_helper_mve_vqdmullt_scalarh, 1139 gen_helper_mve_vqdmullt_scalarw, 1140 NULL, 1141 }; 1142 if (a->qd == a->qn && a->size == MO_32) { 1143 /* UNPREDICTABLE; we choose to undef */ 1144 return false; 1145 } 1146 return do_2op_scalar(s, a, fns[a->size]); 1147 } 1148 1149 1150 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1151 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1152 { \ 1153 static MVEGenTwoOpScalarFn * const fns[] = { \ 1154 NULL, \ 1155 gen_helper_mve_##FN##h, \ 1156 gen_helper_mve_##FN##s, \ 1157 NULL, \ 1158 }; \ 1159 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1160 return false; \ 1161 } \ 1162 return do_2op_scalar(s, a, fns[a->size]); \ 1163 } 1164 1165 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1166 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1167 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1168 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1169 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1170 1171 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1172 MVEGenLongDualAccOpFn *fn) 1173 { 1174 TCGv_ptr qn, qm; 1175 TCGv_i64 rda; 1176 TCGv_i32 rdalo, rdahi; 1177 1178 if (!dc_isar_feature(aa32_mve, s) || 1179 !mve_check_qreg_bank(s, a->qn | a->qm) || 1180 !fn) { 1181 return false; 1182 } 1183 /* 1184 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1185 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1186 */ 1187 if (a->rdahi == 13 || a->rdahi == 15) { 1188 return false; 1189 } 1190 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1191 return true; 1192 } 1193 1194 qn = mve_qreg_ptr(a->qn); 1195 qm = mve_qreg_ptr(a->qm); 1196 1197 /* 1198 * This insn is subject to beat-wise execution. Partial execution 1199 * of an A=0 (no-accumulate) insn which does not execute the first 1200 * beat must start with the current rda value, not 0. 1201 */ 1202 if (a->a || mve_skip_first_beat(s)) { 1203 rda = tcg_temp_new_i64(); 1204 rdalo = load_reg(s, a->rdalo); 1205 rdahi = load_reg(s, a->rdahi); 1206 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1207 tcg_temp_free_i32(rdalo); 1208 tcg_temp_free_i32(rdahi); 1209 } else { 1210 rda = tcg_const_i64(0); 1211 } 1212 1213 fn(rda, cpu_env, qn, qm, rda); 1214 tcg_temp_free_ptr(qn); 1215 tcg_temp_free_ptr(qm); 1216 1217 rdalo = tcg_temp_new_i32(); 1218 rdahi = tcg_temp_new_i32(); 1219 tcg_gen_extrl_i64_i32(rdalo, rda); 1220 tcg_gen_extrh_i64_i32(rdahi, rda); 1221 store_reg(s, a->rdalo, rdalo); 1222 store_reg(s, a->rdahi, rdahi); 1223 tcg_temp_free_i64(rda); 1224 mve_update_eci(s); 1225 return true; 1226 } 1227 1228 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1229 { 1230 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1231 { NULL, NULL }, 1232 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1233 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1234 { NULL, NULL }, 1235 }; 1236 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1237 } 1238 1239 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1240 { 1241 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1242 { NULL, NULL }, 1243 { gen_helper_mve_vmlaldavuh, NULL }, 1244 { gen_helper_mve_vmlaldavuw, NULL }, 1245 { NULL, NULL }, 1246 }; 1247 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1248 } 1249 1250 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1251 { 1252 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1253 { NULL, NULL }, 1254 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1255 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1256 { NULL, NULL }, 1257 }; 1258 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1259 } 1260 1261 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1262 { 1263 static MVEGenLongDualAccOpFn * const fns[] = { 1264 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1265 }; 1266 return do_long_dual_acc(s, a, fns[a->x]); 1267 } 1268 1269 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1270 { 1271 static MVEGenLongDualAccOpFn * const fns[] = { 1272 gen_helper_mve_vrmlaldavhuw, NULL, 1273 }; 1274 return do_long_dual_acc(s, a, fns[a->x]); 1275 } 1276 1277 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1278 { 1279 static MVEGenLongDualAccOpFn * const fns[] = { 1280 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1281 }; 1282 return do_long_dual_acc(s, a, fns[a->x]); 1283 } 1284 1285 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1286 { 1287 TCGv_ptr qn, qm; 1288 TCGv_i32 rda; 1289 1290 if (!dc_isar_feature(aa32_mve, s) || 1291 !mve_check_qreg_bank(s, a->qn) || 1292 !fn) { 1293 return false; 1294 } 1295 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1296 return true; 1297 } 1298 1299 qn = mve_qreg_ptr(a->qn); 1300 qm = mve_qreg_ptr(a->qm); 1301 1302 /* 1303 * This insn is subject to beat-wise execution. Partial execution 1304 * of an A=0 (no-accumulate) insn which does not execute the first 1305 * beat must start with the current rda value, not 0. 1306 */ 1307 if (a->a || mve_skip_first_beat(s)) { 1308 rda = load_reg(s, a->rda); 1309 } else { 1310 rda = tcg_const_i32(0); 1311 } 1312 1313 fn(rda, cpu_env, qn, qm, rda); 1314 store_reg(s, a->rda, rda); 1315 tcg_temp_free_ptr(qn); 1316 tcg_temp_free_ptr(qm); 1317 1318 mve_update_eci(s); 1319 return true; 1320 } 1321 1322 #define DO_DUAL_ACC(INSN, FN) \ 1323 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1324 { \ 1325 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1326 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1327 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1328 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1329 { NULL, NULL }, \ 1330 }; \ 1331 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1332 } 1333 1334 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1335 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1336 1337 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1338 { 1339 static MVEGenDualAccOpFn * const fns[4][2] = { 1340 { gen_helper_mve_vmladavub, NULL }, 1341 { gen_helper_mve_vmladavuh, NULL }, 1342 { gen_helper_mve_vmladavuw, NULL }, 1343 { NULL, NULL }, 1344 }; 1345 return do_dual_acc(s, a, fns[a->size][a->x]); 1346 } 1347 1348 static void gen_vpst(DisasContext *s, uint32_t mask) 1349 { 1350 /* 1351 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1352 * being adjacent fields in the register. 1353 * 1354 * Updating the masks is not predicated, but it is subject to beat-wise 1355 * execution, and the mask is updated on the odd-numbered beats. 1356 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1357 * 01 mask field. 1358 */ 1359 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1360 switch (s->eci) { 1361 case ECI_NONE: 1362 case ECI_A0: 1363 /* Update both 01 and 23 fields */ 1364 tcg_gen_deposit_i32(vpr, vpr, 1365 tcg_constant_i32(mask | (mask << 4)), 1366 R_V7M_VPR_MASK01_SHIFT, 1367 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1368 break; 1369 case ECI_A0A1: 1370 case ECI_A0A1A2: 1371 case ECI_A0A1A2B0: 1372 /* Update only the 23 mask field */ 1373 tcg_gen_deposit_i32(vpr, vpr, 1374 tcg_constant_i32(mask), 1375 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1376 break; 1377 default: 1378 g_assert_not_reached(); 1379 } 1380 store_cpu_field(vpr, v7m.vpr); 1381 } 1382 1383 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1384 { 1385 /* mask == 0 is a "related encoding" */ 1386 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1387 return false; 1388 } 1389 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1390 return true; 1391 } 1392 gen_vpst(s, a->mask); 1393 mve_update_and_store_eci(s); 1394 return true; 1395 } 1396 1397 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1398 { 1399 /* 1400 * Invert the predicate in VPR.P0. We have call out to 1401 * a helper because this insn itself is beatwise and can 1402 * be predicated. 1403 */ 1404 if (!dc_isar_feature(aa32_mve, s)) { 1405 return false; 1406 } 1407 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1408 return true; 1409 } 1410 1411 gen_helper_mve_vpnot(cpu_env); 1412 /* This insn updates predication bits */ 1413 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1414 mve_update_eci(s); 1415 return true; 1416 } 1417 1418 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1419 { 1420 /* VADDV: vector add across vector */ 1421 static MVEGenVADDVFn * const fns[4][2] = { 1422 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1423 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1424 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1425 { NULL, NULL } 1426 }; 1427 TCGv_ptr qm; 1428 TCGv_i32 rda; 1429 1430 if (!dc_isar_feature(aa32_mve, s) || 1431 a->size == 3) { 1432 return false; 1433 } 1434 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1435 return true; 1436 } 1437 1438 /* 1439 * This insn is subject to beat-wise execution. Partial execution 1440 * of an A=0 (no-accumulate) insn which does not execute the first 1441 * beat must start with the current value of Rda, not zero. 1442 */ 1443 if (a->a || mve_skip_first_beat(s)) { 1444 /* Accumulate input from Rda */ 1445 rda = load_reg(s, a->rda); 1446 } else { 1447 /* Accumulate starting at zero */ 1448 rda = tcg_const_i32(0); 1449 } 1450 1451 qm = mve_qreg_ptr(a->qm); 1452 fns[a->size][a->u](rda, cpu_env, qm, rda); 1453 store_reg(s, a->rda, rda); 1454 tcg_temp_free_ptr(qm); 1455 1456 mve_update_eci(s); 1457 return true; 1458 } 1459 1460 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1461 { 1462 /* 1463 * Vector Add Long Across Vector: accumulate the 32-bit 1464 * elements of the vector into a 64-bit result stored in 1465 * a pair of general-purpose registers. 1466 * No need to check Qm's bank: it is only 3 bits in decode. 1467 */ 1468 TCGv_ptr qm; 1469 TCGv_i64 rda; 1470 TCGv_i32 rdalo, rdahi; 1471 1472 if (!dc_isar_feature(aa32_mve, s)) { 1473 return false; 1474 } 1475 /* 1476 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1477 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1478 */ 1479 if (a->rdahi == 13 || a->rdahi == 15) { 1480 return false; 1481 } 1482 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1483 return true; 1484 } 1485 1486 /* 1487 * This insn is subject to beat-wise execution. Partial execution 1488 * of an A=0 (no-accumulate) insn which does not execute the first 1489 * beat must start with the current value of RdaHi:RdaLo, not zero. 1490 */ 1491 if (a->a || mve_skip_first_beat(s)) { 1492 /* Accumulate input from RdaHi:RdaLo */ 1493 rda = tcg_temp_new_i64(); 1494 rdalo = load_reg(s, a->rdalo); 1495 rdahi = load_reg(s, a->rdahi); 1496 tcg_gen_concat_i32_i64(rda, rdalo, rdahi); 1497 tcg_temp_free_i32(rdalo); 1498 tcg_temp_free_i32(rdahi); 1499 } else { 1500 /* Accumulate starting at zero */ 1501 rda = tcg_const_i64(0); 1502 } 1503 1504 qm = mve_qreg_ptr(a->qm); 1505 if (a->u) { 1506 gen_helper_mve_vaddlv_u(rda, cpu_env, qm, rda); 1507 } else { 1508 gen_helper_mve_vaddlv_s(rda, cpu_env, qm, rda); 1509 } 1510 tcg_temp_free_ptr(qm); 1511 1512 rdalo = tcg_temp_new_i32(); 1513 rdahi = tcg_temp_new_i32(); 1514 tcg_gen_extrl_i64_i32(rdalo, rda); 1515 tcg_gen_extrh_i64_i32(rdahi, rda); 1516 store_reg(s, a->rdalo, rdalo); 1517 store_reg(s, a->rdahi, rdahi); 1518 tcg_temp_free_i64(rda); 1519 mve_update_eci(s); 1520 return true; 1521 } 1522 1523 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn, 1524 GVecGen2iFn *vecfn) 1525 { 1526 TCGv_ptr qd; 1527 uint64_t imm; 1528 1529 if (!dc_isar_feature(aa32_mve, s) || 1530 !mve_check_qreg_bank(s, a->qd) || 1531 !fn) { 1532 return false; 1533 } 1534 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1535 return true; 1536 } 1537 1538 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1539 1540 if (vecfn && mve_no_predication(s)) { 1541 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd), 1542 imm, 16, 16); 1543 } else { 1544 qd = mve_qreg_ptr(a->qd); 1545 fn(cpu_env, qd, tcg_constant_i64(imm)); 1546 tcg_temp_free_ptr(qd); 1547 } 1548 mve_update_eci(s); 1549 return true; 1550 } 1551 1552 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs, 1553 int64_t c, uint32_t oprsz, uint32_t maxsz) 1554 { 1555 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c); 1556 } 1557 1558 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1559 { 1560 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1561 MVEGenOneOpImmFn *fn; 1562 GVecGen2iFn *vecfn; 1563 1564 if ((a->cmode & 1) && a->cmode < 12) { 1565 if (a->op) { 1566 /* 1567 * For op=1, the immediate will be inverted by asimd_imm_const(), 1568 * so the VBIC becomes a logical AND operation. 1569 */ 1570 fn = gen_helper_mve_vandi; 1571 vecfn = tcg_gen_gvec_andi; 1572 } else { 1573 fn = gen_helper_mve_vorri; 1574 vecfn = tcg_gen_gvec_ori; 1575 } 1576 } else { 1577 /* There is one unallocated cmode/op combination in this space */ 1578 if (a->cmode == 15 && a->op == 1) { 1579 return false; 1580 } 1581 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1582 fn = gen_helper_mve_vmovi; 1583 vecfn = gen_gvec_vmovi; 1584 } 1585 return do_1imm(s, a, fn, vecfn); 1586 } 1587 1588 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1589 bool negateshift, GVecGen2iFn vecfn) 1590 { 1591 TCGv_ptr qd, qm; 1592 int shift = a->shift; 1593 1594 if (!dc_isar_feature(aa32_mve, s) || 1595 !mve_check_qreg_bank(s, a->qd | a->qm) || 1596 !fn) { 1597 return false; 1598 } 1599 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1600 return true; 1601 } 1602 1603 /* 1604 * When we handle a right shift insn using a left-shift helper 1605 * which permits a negative shift count to indicate a right-shift, 1606 * we must negate the shift count. 1607 */ 1608 if (negateshift) { 1609 shift = -shift; 1610 } 1611 1612 if (vecfn && mve_no_predication(s)) { 1613 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 1614 shift, 16, 16); 1615 } else { 1616 qd = mve_qreg_ptr(a->qd); 1617 qm = mve_qreg_ptr(a->qm); 1618 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1619 tcg_temp_free_ptr(qd); 1620 tcg_temp_free_ptr(qm); 1621 } 1622 mve_update_eci(s); 1623 return true; 1624 } 1625 1626 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1627 bool negateshift) 1628 { 1629 return do_2shift_vec(s, a, fn, negateshift, NULL); 1630 } 1631 1632 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \ 1633 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1634 { \ 1635 static MVEGenTwoOpShiftFn * const fns[] = { \ 1636 gen_helper_mve_##FN##b, \ 1637 gen_helper_mve_##FN##h, \ 1638 gen_helper_mve_##FN##w, \ 1639 NULL, \ 1640 }; \ 1641 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \ 1642 } 1643 1644 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1645 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL) 1646 1647 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs, 1648 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1649 { 1650 /* 1651 * We get here with a negated shift count, and we must handle 1652 * shifts by the element size, which tcg_gen_gvec_sari() does not do. 1653 */ 1654 shift = -shift; 1655 if (shift == (8 << vece)) { 1656 shift--; 1657 } 1658 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz); 1659 } 1660 1661 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs, 1662 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1663 { 1664 /* 1665 * We get here with a negated shift count, and we must handle 1666 * shifts by the element size, which tcg_gen_gvec_shri() does not do. 1667 */ 1668 shift = -shift; 1669 if (shift == (8 << vece)) { 1670 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0); 1671 } else { 1672 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz); 1673 } 1674 } 1675 1676 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli) 1677 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1678 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1679 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1680 /* These right shifts use a left-shift helper with negated shift count */ 1681 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s) 1682 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u) 1683 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1684 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1685 1686 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri) 1687 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli) 1688 1689 #define DO_2SHIFT_FP(INSN, FN) \ 1690 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1691 { \ 1692 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1693 return false; \ 1694 } \ 1695 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1696 } 1697 1698 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1699 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1700 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1701 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1702 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1703 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1704 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1705 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1706 1707 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1708 MVEGenTwoOpShiftFn *fn) 1709 { 1710 TCGv_ptr qda; 1711 TCGv_i32 rm; 1712 1713 if (!dc_isar_feature(aa32_mve, s) || 1714 !mve_check_qreg_bank(s, a->qda) || 1715 a->rm == 13 || a->rm == 15 || !fn) { 1716 /* Rm cases are UNPREDICTABLE */ 1717 return false; 1718 } 1719 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1720 return true; 1721 } 1722 1723 qda = mve_qreg_ptr(a->qda); 1724 rm = load_reg(s, a->rm); 1725 fn(cpu_env, qda, qda, rm); 1726 tcg_temp_free_ptr(qda); 1727 tcg_temp_free_i32(rm); 1728 mve_update_eci(s); 1729 return true; 1730 } 1731 1732 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1733 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1734 { \ 1735 static MVEGenTwoOpShiftFn * const fns[] = { \ 1736 gen_helper_mve_##FN##b, \ 1737 gen_helper_mve_##FN##h, \ 1738 gen_helper_mve_##FN##w, \ 1739 NULL, \ 1740 }; \ 1741 return do_2shift_scalar(s, a, fns[a->size]); \ 1742 } 1743 1744 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1745 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1746 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1747 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1748 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1749 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1750 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1751 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1752 1753 #define DO_VSHLL(INSN, FN) \ 1754 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1755 { \ 1756 static MVEGenTwoOpShiftFn * const fns[] = { \ 1757 gen_helper_mve_##FN##b, \ 1758 gen_helper_mve_##FN##h, \ 1759 }; \ 1760 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \ 1761 } 1762 1763 /* 1764 * For the VSHLL vector helpers, the vece is the size of the input 1765 * (ie MO_8 or MO_16); the helpers want to work in the output size. 1766 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.) 1767 */ 1768 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs, 1769 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1770 { 1771 unsigned ovece = vece + 1; 1772 unsigned ibits = vece == MO_8 ? 8 : 16; 1773 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz); 1774 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1775 } 1776 1777 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs, 1778 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1779 { 1780 unsigned ovece = vece + 1; 1781 tcg_gen_gvec_andi(ovece, dofs, aofs, 1782 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz); 1783 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz); 1784 } 1785 1786 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs, 1787 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1788 { 1789 unsigned ovece = vece + 1; 1790 unsigned ibits = vece == MO_8 ? 8 : 16; 1791 if (shift == 0) { 1792 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz); 1793 } else { 1794 tcg_gen_gvec_andi(ovece, dofs, aofs, 1795 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1796 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1797 } 1798 } 1799 1800 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs, 1801 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1802 { 1803 unsigned ovece = vece + 1; 1804 unsigned ibits = vece == MO_8 ? 8 : 16; 1805 if (shift == 0) { 1806 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz); 1807 } else { 1808 tcg_gen_gvec_andi(ovece, dofs, aofs, 1809 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1810 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1811 } 1812 } 1813 1814 DO_VSHLL(VSHLL_BS, vshllbs) 1815 DO_VSHLL(VSHLL_BU, vshllbu) 1816 DO_VSHLL(VSHLL_TS, vshllts) 1817 DO_VSHLL(VSHLL_TU, vshlltu) 1818 1819 #define DO_2SHIFT_N(INSN, FN) \ 1820 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1821 { \ 1822 static MVEGenTwoOpShiftFn * const fns[] = { \ 1823 gen_helper_mve_##FN##b, \ 1824 gen_helper_mve_##FN##h, \ 1825 }; \ 1826 return do_2shift(s, a, fns[a->size], false); \ 1827 } 1828 1829 DO_2SHIFT_N(VSHRNB, vshrnb) 1830 DO_2SHIFT_N(VSHRNT, vshrnt) 1831 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1832 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1833 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1834 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1835 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1836 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1837 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1838 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1839 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1840 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1841 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1842 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1843 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1844 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1845 1846 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1847 { 1848 /* 1849 * Whole Vector Left Shift with Carry. The carry is taken 1850 * from a general purpose register and written back there. 1851 * An imm of 0 means "shift by 32". 1852 */ 1853 TCGv_ptr qd; 1854 TCGv_i32 rdm; 1855 1856 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1857 return false; 1858 } 1859 if (a->rdm == 13 || a->rdm == 15) { 1860 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1861 return false; 1862 } 1863 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1864 return true; 1865 } 1866 1867 qd = mve_qreg_ptr(a->qd); 1868 rdm = load_reg(s, a->rdm); 1869 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1870 store_reg(s, a->rdm, rdm); 1871 tcg_temp_free_ptr(qd); 1872 mve_update_eci(s); 1873 return true; 1874 } 1875 1876 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1877 { 1878 TCGv_ptr qd; 1879 TCGv_i32 rn; 1880 1881 /* 1882 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1883 * This fills the vector with elements of successively increasing 1884 * or decreasing values, starting from Rn. 1885 */ 1886 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1887 return false; 1888 } 1889 if (a->size == MO_64) { 1890 /* size 0b11 is another encoding */ 1891 return false; 1892 } 1893 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1894 return true; 1895 } 1896 1897 qd = mve_qreg_ptr(a->qd); 1898 rn = load_reg(s, a->rn); 1899 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1900 store_reg(s, a->rn, rn); 1901 tcg_temp_free_ptr(qd); 1902 mve_update_eci(s); 1903 return true; 1904 } 1905 1906 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1907 { 1908 TCGv_ptr qd; 1909 TCGv_i32 rn, rm; 1910 1911 /* 1912 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1913 * This fills the vector with elements of successively increasing 1914 * or decreasing values, starting from Rn. Rm specifies a point where 1915 * the count wraps back around to 0. The updated offset is written back 1916 * to Rn. 1917 */ 1918 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1919 return false; 1920 } 1921 if (!fn || a->rm == 13 || a->rm == 15) { 1922 /* 1923 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1924 * Rm == 13 is VIWDUP, VDWDUP. 1925 */ 1926 return false; 1927 } 1928 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1929 return true; 1930 } 1931 1932 qd = mve_qreg_ptr(a->qd); 1933 rn = load_reg(s, a->rn); 1934 rm = load_reg(s, a->rm); 1935 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1936 store_reg(s, a->rn, rn); 1937 tcg_temp_free_ptr(qd); 1938 tcg_temp_free_i32(rm); 1939 mve_update_eci(s); 1940 return true; 1941 } 1942 1943 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1944 { 1945 static MVEGenVIDUPFn * const fns[] = { 1946 gen_helper_mve_vidupb, 1947 gen_helper_mve_viduph, 1948 gen_helper_mve_vidupw, 1949 NULL, 1950 }; 1951 return do_vidup(s, a, fns[a->size]); 1952 } 1953 1954 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1955 { 1956 static MVEGenVIDUPFn * const fns[] = { 1957 gen_helper_mve_vidupb, 1958 gen_helper_mve_viduph, 1959 gen_helper_mve_vidupw, 1960 NULL, 1961 }; 1962 /* VDDUP is just like VIDUP but with a negative immediate */ 1963 a->imm = -a->imm; 1964 return do_vidup(s, a, fns[a->size]); 1965 } 1966 1967 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1968 { 1969 static MVEGenVIWDUPFn * const fns[] = { 1970 gen_helper_mve_viwdupb, 1971 gen_helper_mve_viwduph, 1972 gen_helper_mve_viwdupw, 1973 NULL, 1974 }; 1975 return do_viwdup(s, a, fns[a->size]); 1976 } 1977 1978 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1979 { 1980 static MVEGenVIWDUPFn * const fns[] = { 1981 gen_helper_mve_vdwdupb, 1982 gen_helper_mve_vdwduph, 1983 gen_helper_mve_vdwdupw, 1984 NULL, 1985 }; 1986 return do_viwdup(s, a, fns[a->size]); 1987 } 1988 1989 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1990 { 1991 TCGv_ptr qn, qm; 1992 1993 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1994 !fn) { 1995 return false; 1996 } 1997 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1998 return true; 1999 } 2000 2001 qn = mve_qreg_ptr(a->qn); 2002 qm = mve_qreg_ptr(a->qm); 2003 fn(cpu_env, qn, qm); 2004 tcg_temp_free_ptr(qn); 2005 tcg_temp_free_ptr(qm); 2006 if (a->mask) { 2007 /* VPT */ 2008 gen_vpst(s, a->mask); 2009 } 2010 /* This insn updates predication bits */ 2011 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2012 mve_update_eci(s); 2013 return true; 2014 } 2015 2016 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 2017 MVEGenScalarCmpFn *fn) 2018 { 2019 TCGv_ptr qn; 2020 TCGv_i32 rm; 2021 2022 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 2023 return false; 2024 } 2025 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2026 return true; 2027 } 2028 2029 qn = mve_qreg_ptr(a->qn); 2030 if (a->rm == 15) { 2031 /* Encoding Rm=0b1111 means "constant zero" */ 2032 rm = tcg_constant_i32(0); 2033 } else { 2034 rm = load_reg(s, a->rm); 2035 } 2036 fn(cpu_env, qn, rm); 2037 tcg_temp_free_ptr(qn); 2038 tcg_temp_free_i32(rm); 2039 if (a->mask) { 2040 /* VPT */ 2041 gen_vpst(s, a->mask); 2042 } 2043 /* This insn updates predication bits */ 2044 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2045 mve_update_eci(s); 2046 return true; 2047 } 2048 2049 #define DO_VCMP(INSN, FN) \ 2050 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2051 { \ 2052 static MVEGenCmpFn * const fns[] = { \ 2053 gen_helper_mve_##FN##b, \ 2054 gen_helper_mve_##FN##h, \ 2055 gen_helper_mve_##FN##w, \ 2056 NULL, \ 2057 }; \ 2058 return do_vcmp(s, a, fns[a->size]); \ 2059 } \ 2060 static bool trans_##INSN##_scalar(DisasContext *s, \ 2061 arg_vcmp_scalar *a) \ 2062 { \ 2063 static MVEGenScalarCmpFn * const fns[] = { \ 2064 gen_helper_mve_##FN##_scalarb, \ 2065 gen_helper_mve_##FN##_scalarh, \ 2066 gen_helper_mve_##FN##_scalarw, \ 2067 NULL, \ 2068 }; \ 2069 return do_vcmp_scalar(s, a, fns[a->size]); \ 2070 } 2071 2072 DO_VCMP(VCMPEQ, vcmpeq) 2073 DO_VCMP(VCMPNE, vcmpne) 2074 DO_VCMP(VCMPCS, vcmpcs) 2075 DO_VCMP(VCMPHI, vcmphi) 2076 DO_VCMP(VCMPGE, vcmpge) 2077 DO_VCMP(VCMPLT, vcmplt) 2078 DO_VCMP(VCMPGT, vcmpgt) 2079 DO_VCMP(VCMPLE, vcmple) 2080 2081 #define DO_VCMP_FP(INSN, FN) \ 2082 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2083 { \ 2084 static MVEGenCmpFn * const fns[] = { \ 2085 NULL, \ 2086 gen_helper_mve_##FN##h, \ 2087 gen_helper_mve_##FN##s, \ 2088 NULL, \ 2089 }; \ 2090 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2091 return false; \ 2092 } \ 2093 return do_vcmp(s, a, fns[a->size]); \ 2094 } \ 2095 static bool trans_##INSN##_scalar(DisasContext *s, \ 2096 arg_vcmp_scalar *a) \ 2097 { \ 2098 static MVEGenScalarCmpFn * const fns[] = { \ 2099 NULL, \ 2100 gen_helper_mve_##FN##_scalarh, \ 2101 gen_helper_mve_##FN##_scalars, \ 2102 NULL, \ 2103 }; \ 2104 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2105 return false; \ 2106 } \ 2107 return do_vcmp_scalar(s, a, fns[a->size]); \ 2108 } 2109 2110 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 2111 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 2112 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 2113 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 2114 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 2115 DO_VCMP_FP(VCMPLE_fp, vfcmple) 2116 2117 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 2118 { 2119 /* 2120 * MIN/MAX operations across a vector: compute the min or 2121 * max of the initial value in a general purpose register 2122 * and all the elements in the vector, and store it back 2123 * into the general purpose register. 2124 */ 2125 TCGv_ptr qm; 2126 TCGv_i32 rda; 2127 2128 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2129 !fn || a->rda == 13 || a->rda == 15) { 2130 /* Rda cases are UNPREDICTABLE */ 2131 return false; 2132 } 2133 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2134 return true; 2135 } 2136 2137 qm = mve_qreg_ptr(a->qm); 2138 rda = load_reg(s, a->rda); 2139 fn(rda, cpu_env, qm, rda); 2140 store_reg(s, a->rda, rda); 2141 tcg_temp_free_ptr(qm); 2142 mve_update_eci(s); 2143 return true; 2144 } 2145 2146 #define DO_VMAXV(INSN, FN) \ 2147 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2148 { \ 2149 static MVEGenVADDVFn * const fns[] = { \ 2150 gen_helper_mve_##FN##b, \ 2151 gen_helper_mve_##FN##h, \ 2152 gen_helper_mve_##FN##w, \ 2153 NULL, \ 2154 }; \ 2155 return do_vmaxv(s, a, fns[a->size]); \ 2156 } 2157 2158 DO_VMAXV(VMAXV_S, vmaxvs) 2159 DO_VMAXV(VMAXV_U, vmaxvu) 2160 DO_VMAXV(VMAXAV, vmaxav) 2161 DO_VMAXV(VMINV_S, vminvs) 2162 DO_VMAXV(VMINV_U, vminvu) 2163 DO_VMAXV(VMINAV, vminav) 2164 2165 #define DO_VMAXV_FP(INSN, FN) \ 2166 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2167 { \ 2168 static MVEGenVADDVFn * const fns[] = { \ 2169 NULL, \ 2170 gen_helper_mve_##FN##h, \ 2171 gen_helper_mve_##FN##s, \ 2172 NULL, \ 2173 }; \ 2174 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2175 return false; \ 2176 } \ 2177 return do_vmaxv(s, a, fns[a->size]); \ 2178 } 2179 2180 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2181 DO_VMAXV_FP(VMINNMV, vminnmv) 2182 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2183 DO_VMAXV_FP(VMINNMAV, vminnmav) 2184 2185 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2186 { 2187 /* Absolute difference accumulated across vector */ 2188 TCGv_ptr qn, qm; 2189 TCGv_i32 rda; 2190 2191 if (!dc_isar_feature(aa32_mve, s) || 2192 !mve_check_qreg_bank(s, a->qm | a->qn) || 2193 !fn || a->rda == 13 || a->rda == 15) { 2194 /* Rda cases are UNPREDICTABLE */ 2195 return false; 2196 } 2197 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2198 return true; 2199 } 2200 2201 qm = mve_qreg_ptr(a->qm); 2202 qn = mve_qreg_ptr(a->qn); 2203 rda = load_reg(s, a->rda); 2204 fn(rda, cpu_env, qn, qm, rda); 2205 store_reg(s, a->rda, rda); 2206 tcg_temp_free_ptr(qm); 2207 tcg_temp_free_ptr(qn); 2208 mve_update_eci(s); 2209 return true; 2210 } 2211 2212 #define DO_VABAV(INSN, FN) \ 2213 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2214 { \ 2215 static MVEGenVABAVFn * const fns[] = { \ 2216 gen_helper_mve_##FN##b, \ 2217 gen_helper_mve_##FN##h, \ 2218 gen_helper_mve_##FN##w, \ 2219 NULL, \ 2220 }; \ 2221 return do_vabav(s, a, fns[a->size]); \ 2222 } 2223 2224 DO_VABAV(VABAV_S, vabavs) 2225 DO_VABAV(VABAV_U, vabavu) 2226 2227 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2228 { 2229 /* 2230 * VMOV two 32-bit vector lanes to two general-purpose registers. 2231 * This insn is not predicated but it is subject to beat-wise 2232 * execution if it is not in an IT block. For us this means 2233 * only that if PSR.ECI says we should not be executing the beat 2234 * corresponding to the lane of the vector register being accessed 2235 * then we should skip perfoming the move, and that we need to do 2236 * the usual check for bad ECI state and advance of ECI state. 2237 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2238 */ 2239 TCGv_i32 tmp; 2240 int vd; 2241 2242 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2243 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2244 a->rt == a->rt2) { 2245 /* Rt/Rt2 cases are UNPREDICTABLE */ 2246 return false; 2247 } 2248 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2249 return true; 2250 } 2251 2252 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2253 vd = a->qd * 2; 2254 2255 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2256 tmp = tcg_temp_new_i32(); 2257 read_neon_element32(tmp, vd, a->idx, MO_32); 2258 store_reg(s, a->rt, tmp); 2259 } 2260 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2261 tmp = tcg_temp_new_i32(); 2262 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2263 store_reg(s, a->rt2, tmp); 2264 } 2265 2266 mve_update_and_store_eci(s); 2267 return true; 2268 } 2269 2270 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2271 { 2272 /* 2273 * VMOV two general-purpose registers to two 32-bit vector lanes. 2274 * This insn is not predicated but it is subject to beat-wise 2275 * execution if it is not in an IT block. For us this means 2276 * only that if PSR.ECI says we should not be executing the beat 2277 * corresponding to the lane of the vector register being accessed 2278 * then we should skip perfoming the move, and that we need to do 2279 * the usual check for bad ECI state and advance of ECI state. 2280 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2281 */ 2282 TCGv_i32 tmp; 2283 int vd; 2284 2285 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2286 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2287 /* Rt/Rt2 cases are UNPREDICTABLE */ 2288 return false; 2289 } 2290 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2291 return true; 2292 } 2293 2294 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2295 vd = a->qd * 2; 2296 2297 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2298 tmp = load_reg(s, a->rt); 2299 write_neon_element32(tmp, vd, a->idx, MO_32); 2300 tcg_temp_free_i32(tmp); 2301 } 2302 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2303 tmp = load_reg(s, a->rt2); 2304 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2305 tcg_temp_free_i32(tmp); 2306 } 2307 2308 mve_update_and_store_eci(s); 2309 return true; 2310 } 2311