1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "tcg/tcg-op.h" 22 #include "tcg/tcg-op-gvec.h" 23 #include "exec/exec-all.h" 24 #include "translate.h" 25 #include "translate-a32.h" 26 27 static inline int vidup_imm(DisasContext *s, int x) 28 { 29 return 1 << x; 30 } 31 32 /* Include the generated decoder */ 33 #include "decode-mve.c.inc" 34 35 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 36 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 37 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 38 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 39 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 40 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 42 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 43 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 44 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 45 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 46 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 47 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 48 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 49 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 50 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 51 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 52 53 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 54 static inline long mve_qreg_offset(unsigned reg) 55 { 56 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 57 } 58 59 static TCGv_ptr mve_qreg_ptr(unsigned reg) 60 { 61 TCGv_ptr ret = tcg_temp_new_ptr(); 62 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 63 return ret; 64 } 65 66 static bool mve_no_predication(DisasContext *s) 67 { 68 /* 69 * Return true if we are executing the entire MVE instruction 70 * with no predication or partial-execution, and so we can safely 71 * use an inline TCG vector implementation. 72 */ 73 return s->eci == 0 && s->mve_no_pred; 74 } 75 76 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 77 { 78 /* 79 * Check whether Qregs are in range. For v8.1M only Q0..Q7 80 * are supported, see VFPSmallRegisterBank(). 81 */ 82 return qmask < 8; 83 } 84 85 bool mve_eci_check(DisasContext *s) 86 { 87 /* 88 * This is a beatwise insn: check that ECI is valid (not a 89 * reserved value) and note that we are handling it. 90 * Return true if OK, false if we generated an exception. 91 */ 92 s->eci_handled = true; 93 switch (s->eci) { 94 case ECI_NONE: 95 case ECI_A0: 96 case ECI_A0A1: 97 case ECI_A0A1A2: 98 case ECI_A0A1A2B0: 99 return true; 100 default: 101 /* Reserved value: INVSTATE UsageFault */ 102 gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); 103 return false; 104 } 105 } 106 107 void mve_update_eci(DisasContext *s) 108 { 109 /* 110 * The helper function will always update the CPUState field, 111 * so we only need to update the DisasContext field. 112 */ 113 if (s->eci) { 114 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 115 } 116 } 117 118 void mve_update_and_store_eci(DisasContext *s) 119 { 120 /* 121 * For insns which don't call a helper function that will call 122 * mve_advance_vpt(), this version updates s->eci and also stores 123 * it out to the CPUState field. 124 */ 125 if (s->eci) { 126 mve_update_eci(s); 127 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 128 } 129 } 130 131 static bool mve_skip_first_beat(DisasContext *s) 132 { 133 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 134 switch (s->eci) { 135 case ECI_NONE: 136 return false; 137 case ECI_A0: 138 case ECI_A0A1: 139 case ECI_A0A1A2: 140 case ECI_A0A1A2B0: 141 return true; 142 default: 143 g_assert_not_reached(); 144 } 145 } 146 147 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 148 unsigned msize) 149 { 150 TCGv_i32 addr; 151 uint32_t offset; 152 TCGv_ptr qreg; 153 154 if (!dc_isar_feature(aa32_mve, s) || 155 !mve_check_qreg_bank(s, a->qd) || 156 !fn) { 157 return false; 158 } 159 160 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 161 if (a->rn == 15 || (a->rn == 13 && a->w)) { 162 return false; 163 } 164 165 if (!mve_eci_check(s) || !vfp_access_check(s)) { 166 return true; 167 } 168 169 offset = a->imm << msize; 170 if (!a->a) { 171 offset = -offset; 172 } 173 addr = load_reg(s, a->rn); 174 if (a->p) { 175 tcg_gen_addi_i32(addr, addr, offset); 176 } 177 178 qreg = mve_qreg_ptr(a->qd); 179 fn(cpu_env, qreg, addr); 180 181 /* 182 * Writeback always happens after the last beat of the insn, 183 * regardless of predication 184 */ 185 if (a->w) { 186 if (!a->p) { 187 tcg_gen_addi_i32(addr, addr, offset); 188 } 189 store_reg(s, a->rn, addr); 190 } 191 mve_update_eci(s); 192 return true; 193 } 194 195 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 196 { 197 static MVEGenLdStFn * const ldstfns[4][2] = { 198 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 199 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 200 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 201 { NULL, NULL } 202 }; 203 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 204 } 205 206 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 207 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 208 { \ 209 static MVEGenLdStFn * const ldstfns[2][2] = { \ 210 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 211 { NULL, gen_helper_mve_##ULD }, \ 212 }; \ 213 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 214 } 215 216 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 217 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 218 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 219 220 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 221 { 222 TCGv_i32 addr; 223 TCGv_ptr qd, qm; 224 225 if (!dc_isar_feature(aa32_mve, s) || 226 !mve_check_qreg_bank(s, a->qd | a->qm) || 227 !fn || a->rn == 15) { 228 /* Rn case is UNPREDICTABLE */ 229 return false; 230 } 231 232 if (!mve_eci_check(s) || !vfp_access_check(s)) { 233 return true; 234 } 235 236 addr = load_reg(s, a->rn); 237 238 qd = mve_qreg_ptr(a->qd); 239 qm = mve_qreg_ptr(a->qm); 240 fn(cpu_env, qd, qm, addr); 241 mve_update_eci(s); 242 return true; 243 } 244 245 /* 246 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 247 * signextended to halfword elements in register". _os_ indicates that 248 * the offsets in Qm should be scaled by the element size. 249 */ 250 /* This macro is just to make the arrays more compact in these functions */ 251 #define F(N) gen_helper_mve_##N 252 253 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 254 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 255 { 256 static MVEGenLdStSGFn * const fns[2][4][4] = { { 257 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 258 { NULL, NULL, F(vldrh_sg_sw), NULL }, 259 { NULL, NULL, NULL, NULL }, 260 { NULL, NULL, NULL, NULL } 261 }, { 262 { NULL, NULL, NULL, NULL }, 263 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 264 { NULL, NULL, NULL, NULL }, 265 { NULL, NULL, NULL, NULL } 266 } 267 }; 268 if (a->qd == a->qm) { 269 return false; /* UNPREDICTABLE */ 270 } 271 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 272 } 273 274 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 275 { 276 static MVEGenLdStSGFn * const fns[2][4][4] = { { 277 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 278 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 279 { NULL, NULL, F(vldrw_sg_uw), NULL }, 280 { NULL, NULL, NULL, F(vldrd_sg_ud) } 281 }, { 282 { NULL, NULL, NULL, NULL }, 283 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 284 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 285 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 286 } 287 }; 288 if (a->qd == a->qm) { 289 return false; /* UNPREDICTABLE */ 290 } 291 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 292 } 293 294 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 295 { 296 static MVEGenLdStSGFn * const fns[2][4][4] = { { 297 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 298 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 299 { NULL, NULL, F(vstrw_sg_uw), NULL }, 300 { NULL, NULL, NULL, F(vstrd_sg_ud) } 301 }, { 302 { NULL, NULL, NULL, NULL }, 303 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 304 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 305 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 306 } 307 }; 308 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 309 } 310 311 #undef F 312 313 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 314 MVEGenLdStSGFn *fn, unsigned msize) 315 { 316 uint32_t offset; 317 TCGv_ptr qd, qm; 318 319 if (!dc_isar_feature(aa32_mve, s) || 320 !mve_check_qreg_bank(s, a->qd | a->qm) || 321 !fn) { 322 return false; 323 } 324 325 if (!mve_eci_check(s) || !vfp_access_check(s)) { 326 return true; 327 } 328 329 offset = a->imm << msize; 330 if (!a->a) { 331 offset = -offset; 332 } 333 334 qd = mve_qreg_ptr(a->qd); 335 qm = mve_qreg_ptr(a->qm); 336 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 337 mve_update_eci(s); 338 return true; 339 } 340 341 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 342 { 343 static MVEGenLdStSGFn * const fns[] = { 344 gen_helper_mve_vldrw_sg_uw, 345 gen_helper_mve_vldrw_sg_wb_uw, 346 }; 347 if (a->qd == a->qm) { 348 return false; /* UNPREDICTABLE */ 349 } 350 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 351 } 352 353 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 354 { 355 static MVEGenLdStSGFn * const fns[] = { 356 gen_helper_mve_vldrd_sg_ud, 357 gen_helper_mve_vldrd_sg_wb_ud, 358 }; 359 if (a->qd == a->qm) { 360 return false; /* UNPREDICTABLE */ 361 } 362 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 363 } 364 365 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 366 { 367 static MVEGenLdStSGFn * const fns[] = { 368 gen_helper_mve_vstrw_sg_uw, 369 gen_helper_mve_vstrw_sg_wb_uw, 370 }; 371 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 372 } 373 374 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 375 { 376 static MVEGenLdStSGFn * const fns[] = { 377 gen_helper_mve_vstrd_sg_ud, 378 gen_helper_mve_vstrd_sg_wb_ud, 379 }; 380 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 381 } 382 383 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 384 int addrinc) 385 { 386 TCGv_i32 rn; 387 388 if (!dc_isar_feature(aa32_mve, s) || 389 !mve_check_qreg_bank(s, a->qd) || 390 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 391 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 392 return false; 393 } 394 if (!mve_eci_check(s) || !vfp_access_check(s)) { 395 return true; 396 } 397 398 rn = load_reg(s, a->rn); 399 /* 400 * We pass the index of Qd, not a pointer, because the helper must 401 * access multiple Q registers starting at Qd and working up. 402 */ 403 fn(cpu_env, tcg_constant_i32(a->qd), rn); 404 405 if (a->w) { 406 tcg_gen_addi_i32(rn, rn, addrinc); 407 store_reg(s, a->rn, rn); 408 } 409 mve_update_and_store_eci(s); 410 return true; 411 } 412 413 /* This macro is just to make the arrays more compact in these functions */ 414 #define F(N) gen_helper_mve_##N 415 416 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 417 { 418 static MVEGenLdStIlFn * const fns[4][4] = { 419 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 420 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 421 { NULL, NULL, NULL, NULL }, 422 { NULL, NULL, NULL, NULL }, 423 }; 424 if (a->qd > 6) { 425 return false; 426 } 427 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 428 } 429 430 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 431 { 432 static MVEGenLdStIlFn * const fns[4][4] = { 433 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 434 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 435 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 436 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 437 }; 438 if (a->qd > 4) { 439 return false; 440 } 441 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 442 } 443 444 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 445 { 446 static MVEGenLdStIlFn * const fns[4][4] = { 447 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 448 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 449 { NULL, NULL, NULL, NULL }, 450 { NULL, NULL, NULL, NULL }, 451 }; 452 if (a->qd > 6) { 453 return false; 454 } 455 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 456 } 457 458 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 459 { 460 static MVEGenLdStIlFn * const fns[4][4] = { 461 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 462 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 463 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 464 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 465 }; 466 if (a->qd > 4) { 467 return false; 468 } 469 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 470 } 471 472 #undef F 473 474 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 475 { 476 TCGv_ptr qd; 477 TCGv_i32 rt; 478 479 if (!dc_isar_feature(aa32_mve, s) || 480 !mve_check_qreg_bank(s, a->qd)) { 481 return false; 482 } 483 if (a->rt == 13 || a->rt == 15) { 484 /* UNPREDICTABLE; we choose to UNDEF */ 485 return false; 486 } 487 if (!mve_eci_check(s) || !vfp_access_check(s)) { 488 return true; 489 } 490 491 rt = load_reg(s, a->rt); 492 if (mve_no_predication(s)) { 493 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt); 494 } else { 495 qd = mve_qreg_ptr(a->qd); 496 tcg_gen_dup_i32(a->size, rt, rt); 497 gen_helper_mve_vdup(cpu_env, qd, rt); 498 } 499 mve_update_eci(s); 500 return true; 501 } 502 503 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, 504 GVecGen2Fn vecfn) 505 { 506 TCGv_ptr qd, qm; 507 508 if (!dc_isar_feature(aa32_mve, s) || 509 !mve_check_qreg_bank(s, a->qd | a->qm) || 510 !fn) { 511 return false; 512 } 513 514 if (!mve_eci_check(s) || !vfp_access_check(s)) { 515 return true; 516 } 517 518 if (vecfn && mve_no_predication(s)) { 519 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16); 520 } else { 521 qd = mve_qreg_ptr(a->qd); 522 qm = mve_qreg_ptr(a->qm); 523 fn(cpu_env, qd, qm); 524 } 525 mve_update_eci(s); 526 return true; 527 } 528 529 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 530 { 531 return do_1op_vec(s, a, fn, NULL); 532 } 533 534 #define DO_1OP_VEC(INSN, FN, VECFN) \ 535 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 536 { \ 537 static MVEGenOneOpFn * const fns[] = { \ 538 gen_helper_mve_##FN##b, \ 539 gen_helper_mve_##FN##h, \ 540 gen_helper_mve_##FN##w, \ 541 NULL, \ 542 }; \ 543 return do_1op_vec(s, a, fns[a->size], VECFN); \ 544 } 545 546 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL) 547 548 DO_1OP(VCLZ, vclz) 549 DO_1OP(VCLS, vcls) 550 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs) 551 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg) 552 DO_1OP(VQABS, vqabs) 553 DO_1OP(VQNEG, vqneg) 554 DO_1OP(VMAXA, vmaxa) 555 DO_1OP(VMINA, vmina) 556 557 /* 558 * For simple float/int conversions we use the fixed-point 559 * conversion helpers with a zero shift count 560 */ 561 #define DO_VCVT(INSN, HFN, SFN) \ 562 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 563 { \ 564 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 565 } \ 566 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 567 { \ 568 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 569 } \ 570 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 571 { \ 572 static MVEGenOneOpFn * const fns[] = { \ 573 NULL, \ 574 gen_##INSN##h, \ 575 gen_##INSN##s, \ 576 NULL, \ 577 }; \ 578 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 579 return false; \ 580 } \ 581 return do_1op(s, a, fns[a->size]); \ 582 } 583 584 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 585 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 586 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 587 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 588 589 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 590 ARMFPRounding rmode, bool u) 591 { 592 /* 593 * Handle VCVT fp to int with specified rounding mode. 594 * This is a 1op fn but we must pass the rounding mode as 595 * an immediate to the helper. 596 */ 597 TCGv_ptr qd, qm; 598 static MVEGenVCVTRmodeFn * const fns[4][2] = { 599 { NULL, NULL }, 600 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 601 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 602 { NULL, NULL }, 603 }; 604 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 605 606 if (!dc_isar_feature(aa32_mve_fp, s) || 607 !mve_check_qreg_bank(s, a->qd | a->qm) || 608 !fn) { 609 return false; 610 } 611 612 if (!mve_eci_check(s) || !vfp_access_check(s)) { 613 return true; 614 } 615 616 qd = mve_qreg_ptr(a->qd); 617 qm = mve_qreg_ptr(a->qm); 618 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 619 mve_update_eci(s); 620 return true; 621 } 622 623 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 624 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 625 { \ 626 return do_vcvt_rmode(s, a, RMODE, U); \ 627 } \ 628 629 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 630 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 631 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 632 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 633 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 634 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 635 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 636 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 637 638 #define DO_VCVT_SH(INSN, FN) \ 639 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 640 { \ 641 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 642 return false; \ 643 } \ 644 return do_1op(s, a, gen_helper_mve_##FN); \ 645 } \ 646 647 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 648 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 649 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 650 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 651 652 #define DO_VRINT(INSN, RMODE) \ 653 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 654 { \ 655 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 656 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 657 } \ 658 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 659 { \ 660 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 661 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 662 } \ 663 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 664 { \ 665 static MVEGenOneOpFn * const fns[] = { \ 666 NULL, \ 667 gen_##INSN##h, \ 668 gen_##INSN##s, \ 669 NULL, \ 670 }; \ 671 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 672 return false; \ 673 } \ 674 return do_1op(s, a, fns[a->size]); \ 675 } 676 677 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 678 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 679 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 680 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 681 DO_VRINT(VRINTP, FPROUNDING_POSINF) 682 683 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 684 { 685 static MVEGenOneOpFn * const fns[] = { 686 NULL, 687 gen_helper_mve_vrintx_h, 688 gen_helper_mve_vrintx_s, 689 NULL, 690 }; 691 if (!dc_isar_feature(aa32_mve_fp, s)) { 692 return false; 693 } 694 return do_1op(s, a, fns[a->size]); 695 } 696 697 /* Narrowing moves: only size 0 and 1 are valid */ 698 #define DO_VMOVN(INSN, FN) \ 699 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 700 { \ 701 static MVEGenOneOpFn * const fns[] = { \ 702 gen_helper_mve_##FN##b, \ 703 gen_helper_mve_##FN##h, \ 704 NULL, \ 705 NULL, \ 706 }; \ 707 return do_1op(s, a, fns[a->size]); \ 708 } 709 710 DO_VMOVN(VMOVNB, vmovnb) 711 DO_VMOVN(VMOVNT, vmovnt) 712 DO_VMOVN(VQMOVUNB, vqmovunb) 713 DO_VMOVN(VQMOVUNT, vqmovunt) 714 DO_VMOVN(VQMOVN_BS, vqmovnbs) 715 DO_VMOVN(VQMOVN_TS, vqmovnts) 716 DO_VMOVN(VQMOVN_BU, vqmovnbu) 717 DO_VMOVN(VQMOVN_TU, vqmovntu) 718 719 static bool trans_VREV16(DisasContext *s, arg_1op *a) 720 { 721 static MVEGenOneOpFn * const fns[] = { 722 gen_helper_mve_vrev16b, 723 NULL, 724 NULL, 725 NULL, 726 }; 727 return do_1op(s, a, fns[a->size]); 728 } 729 730 static bool trans_VREV32(DisasContext *s, arg_1op *a) 731 { 732 static MVEGenOneOpFn * const fns[] = { 733 gen_helper_mve_vrev32b, 734 gen_helper_mve_vrev32h, 735 NULL, 736 NULL, 737 }; 738 return do_1op(s, a, fns[a->size]); 739 } 740 741 static bool trans_VREV64(DisasContext *s, arg_1op *a) 742 { 743 static MVEGenOneOpFn * const fns[] = { 744 gen_helper_mve_vrev64b, 745 gen_helper_mve_vrev64h, 746 gen_helper_mve_vrev64w, 747 NULL, 748 }; 749 return do_1op(s, a, fns[a->size]); 750 } 751 752 static bool trans_VMVN(DisasContext *s, arg_1op *a) 753 { 754 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not); 755 } 756 757 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 758 { 759 static MVEGenOneOpFn * const fns[] = { 760 NULL, 761 gen_helper_mve_vfabsh, 762 gen_helper_mve_vfabss, 763 NULL, 764 }; 765 if (!dc_isar_feature(aa32_mve_fp, s)) { 766 return false; 767 } 768 return do_1op(s, a, fns[a->size]); 769 } 770 771 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 772 { 773 static MVEGenOneOpFn * const fns[] = { 774 NULL, 775 gen_helper_mve_vfnegh, 776 gen_helper_mve_vfnegs, 777 NULL, 778 }; 779 if (!dc_isar_feature(aa32_mve_fp, s)) { 780 return false; 781 } 782 return do_1op(s, a, fns[a->size]); 783 } 784 785 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 786 GVecGen3Fn *vecfn) 787 { 788 TCGv_ptr qd, qn, qm; 789 790 if (!dc_isar_feature(aa32_mve, s) || 791 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 792 !fn) { 793 return false; 794 } 795 if (!mve_eci_check(s) || !vfp_access_check(s)) { 796 return true; 797 } 798 799 if (vecfn && mve_no_predication(s)) { 800 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 801 mve_qreg_offset(a->qm), 16, 16); 802 } else { 803 qd = mve_qreg_ptr(a->qd); 804 qn = mve_qreg_ptr(a->qn); 805 qm = mve_qreg_ptr(a->qm); 806 fn(cpu_env, qd, qn, qm); 807 } 808 mve_update_eci(s); 809 return true; 810 } 811 812 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 813 { 814 return do_2op_vec(s, a, fn, NULL); 815 } 816 817 #define DO_LOGIC(INSN, HELPER, VECFN) \ 818 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 819 { \ 820 return do_2op_vec(s, a, HELPER, VECFN); \ 821 } 822 823 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 824 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 825 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 826 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 827 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 828 829 static bool trans_VPSEL(DisasContext *s, arg_2op *a) 830 { 831 /* This insn updates predication bits */ 832 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 833 return do_2op(s, a, gen_helper_mve_vpsel); 834 } 835 836 #define DO_2OP_VEC(INSN, FN, VECFN) \ 837 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 838 { \ 839 static MVEGenTwoOpFn * const fns[] = { \ 840 gen_helper_mve_##FN##b, \ 841 gen_helper_mve_##FN##h, \ 842 gen_helper_mve_##FN##w, \ 843 NULL, \ 844 }; \ 845 return do_2op_vec(s, a, fns[a->size], VECFN); \ 846 } 847 848 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL) 849 850 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add) 851 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub) 852 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul) 853 DO_2OP(VMULH_S, vmulhs) 854 DO_2OP(VMULH_U, vmulhu) 855 DO_2OP(VRMULH_S, vrmulhs) 856 DO_2OP(VRMULH_U, vrmulhu) 857 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax) 858 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax) 859 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin) 860 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin) 861 DO_2OP(VABD_S, vabds) 862 DO_2OP(VABD_U, vabdu) 863 DO_2OP(VHADD_S, vhadds) 864 DO_2OP(VHADD_U, vhaddu) 865 DO_2OP(VHSUB_S, vhsubs) 866 DO_2OP(VHSUB_U, vhsubu) 867 DO_2OP(VMULL_BS, vmullbs) 868 DO_2OP(VMULL_BU, vmullbu) 869 DO_2OP(VMULL_TS, vmullts) 870 DO_2OP(VMULL_TU, vmulltu) 871 DO_2OP(VQDMULH, vqdmulh) 872 DO_2OP(VQRDMULH, vqrdmulh) 873 DO_2OP(VQADD_S, vqadds) 874 DO_2OP(VQADD_U, vqaddu) 875 DO_2OP(VQSUB_S, vqsubs) 876 DO_2OP(VQSUB_U, vqsubu) 877 DO_2OP(VSHL_S, vshls) 878 DO_2OP(VSHL_U, vshlu) 879 DO_2OP(VRSHL_S, vrshls) 880 DO_2OP(VRSHL_U, vrshlu) 881 DO_2OP(VQSHL_S, vqshls) 882 DO_2OP(VQSHL_U, vqshlu) 883 DO_2OP(VQRSHL_S, vqrshls) 884 DO_2OP(VQRSHL_U, vqrshlu) 885 DO_2OP(VQDMLADH, vqdmladh) 886 DO_2OP(VQDMLADHX, vqdmladhx) 887 DO_2OP(VQRDMLADH, vqrdmladh) 888 DO_2OP(VQRDMLADHX, vqrdmladhx) 889 DO_2OP(VQDMLSDH, vqdmlsdh) 890 DO_2OP(VQDMLSDHX, vqdmlsdhx) 891 DO_2OP(VQRDMLSDH, vqrdmlsdh) 892 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 893 DO_2OP(VRHADD_S, vrhadds) 894 DO_2OP(VRHADD_U, vrhaddu) 895 /* 896 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 897 * so we can reuse the DO_2OP macro. (Our implementation calculates the 898 * "expected" results in this case.) Similarly for VHCADD. 899 */ 900 DO_2OP(VCADD90, vcadd90) 901 DO_2OP(VCADD270, vcadd270) 902 DO_2OP(VHCADD90, vhcadd90) 903 DO_2OP(VHCADD270, vhcadd270) 904 905 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 906 { 907 static MVEGenTwoOpFn * const fns[] = { 908 NULL, 909 gen_helper_mve_vqdmullbh, 910 gen_helper_mve_vqdmullbw, 911 NULL, 912 }; 913 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 914 /* UNPREDICTABLE; we choose to undef */ 915 return false; 916 } 917 return do_2op(s, a, fns[a->size]); 918 } 919 920 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 921 { 922 static MVEGenTwoOpFn * const fns[] = { 923 NULL, 924 gen_helper_mve_vqdmullth, 925 gen_helper_mve_vqdmulltw, 926 NULL, 927 }; 928 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 929 /* UNPREDICTABLE; we choose to undef */ 930 return false; 931 } 932 return do_2op(s, a, fns[a->size]); 933 } 934 935 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 936 { 937 /* 938 * Note that a->size indicates the output size, ie VMULL.P8 939 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 940 * is the 16x16->32 operation and a->size is MO_32. 941 */ 942 static MVEGenTwoOpFn * const fns[] = { 943 NULL, 944 gen_helper_mve_vmullpbh, 945 gen_helper_mve_vmullpbw, 946 NULL, 947 }; 948 return do_2op(s, a, fns[a->size]); 949 } 950 951 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 952 { 953 /* a->size is as for trans_VMULLP_B */ 954 static MVEGenTwoOpFn * const fns[] = { 955 NULL, 956 gen_helper_mve_vmullpth, 957 gen_helper_mve_vmullptw, 958 NULL, 959 }; 960 return do_2op(s, a, fns[a->size]); 961 } 962 963 /* 964 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 965 * of the 32-bit elements in each lane of the input vectors, where the 966 * carry-out of each add is the carry-in of the next. The initial carry 967 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 968 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 969 * These insns are subject to beat-wise execution. Partial execution 970 * of an I=1 (initial carry input fixed) insn which does not 971 * execute the first beat must start with the current FPSCR.NZCV 972 * value, not the fixed constant input. 973 */ 974 static bool trans_VADC(DisasContext *s, arg_2op *a) 975 { 976 return do_2op(s, a, gen_helper_mve_vadc); 977 } 978 979 static bool trans_VADCI(DisasContext *s, arg_2op *a) 980 { 981 if (mve_skip_first_beat(s)) { 982 return trans_VADC(s, a); 983 } 984 return do_2op(s, a, gen_helper_mve_vadci); 985 } 986 987 static bool trans_VSBC(DisasContext *s, arg_2op *a) 988 { 989 return do_2op(s, a, gen_helper_mve_vsbc); 990 } 991 992 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 993 { 994 if (mve_skip_first_beat(s)) { 995 return trans_VSBC(s, a); 996 } 997 return do_2op(s, a, gen_helper_mve_vsbci); 998 } 999 1000 #define DO_2OP_FP(INSN, FN) \ 1001 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 1002 { \ 1003 static MVEGenTwoOpFn * const fns[] = { \ 1004 NULL, \ 1005 gen_helper_mve_##FN##h, \ 1006 gen_helper_mve_##FN##s, \ 1007 NULL, \ 1008 }; \ 1009 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1010 return false; \ 1011 } \ 1012 return do_2op(s, a, fns[a->size]); \ 1013 } 1014 1015 DO_2OP_FP(VADD_fp, vfadd) 1016 DO_2OP_FP(VSUB_fp, vfsub) 1017 DO_2OP_FP(VMUL_fp, vfmul) 1018 DO_2OP_FP(VABD_fp, vfabd) 1019 DO_2OP_FP(VMAXNM, vmaxnm) 1020 DO_2OP_FP(VMINNM, vminnm) 1021 DO_2OP_FP(VCADD90_fp, vfcadd90) 1022 DO_2OP_FP(VCADD270_fp, vfcadd270) 1023 DO_2OP_FP(VFMA, vfma) 1024 DO_2OP_FP(VFMS, vfms) 1025 DO_2OP_FP(VCMUL0, vcmul0) 1026 DO_2OP_FP(VCMUL90, vcmul90) 1027 DO_2OP_FP(VCMUL180, vcmul180) 1028 DO_2OP_FP(VCMUL270, vcmul270) 1029 DO_2OP_FP(VCMLA0, vcmla0) 1030 DO_2OP_FP(VCMLA90, vcmla90) 1031 DO_2OP_FP(VCMLA180, vcmla180) 1032 DO_2OP_FP(VCMLA270, vcmla270) 1033 DO_2OP_FP(VMAXNMA, vmaxnma) 1034 DO_2OP_FP(VMINNMA, vminnma) 1035 1036 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1037 MVEGenTwoOpScalarFn fn) 1038 { 1039 TCGv_ptr qd, qn; 1040 TCGv_i32 rm; 1041 1042 if (!dc_isar_feature(aa32_mve, s) || 1043 !mve_check_qreg_bank(s, a->qd | a->qn) || 1044 !fn) { 1045 return false; 1046 } 1047 if (a->rm == 13 || a->rm == 15) { 1048 /* UNPREDICTABLE */ 1049 return false; 1050 } 1051 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1052 return true; 1053 } 1054 1055 qd = mve_qreg_ptr(a->qd); 1056 qn = mve_qreg_ptr(a->qn); 1057 rm = load_reg(s, a->rm); 1058 fn(cpu_env, qd, qn, rm); 1059 mve_update_eci(s); 1060 return true; 1061 } 1062 1063 #define DO_2OP_SCALAR(INSN, FN) \ 1064 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1065 { \ 1066 static MVEGenTwoOpScalarFn * const fns[] = { \ 1067 gen_helper_mve_##FN##b, \ 1068 gen_helper_mve_##FN##h, \ 1069 gen_helper_mve_##FN##w, \ 1070 NULL, \ 1071 }; \ 1072 return do_2op_scalar(s, a, fns[a->size]); \ 1073 } 1074 1075 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1076 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1077 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1078 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1079 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1080 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1081 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1082 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1083 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1084 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1085 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1086 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1087 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1088 DO_2OP_SCALAR(VBRSR, vbrsr) 1089 DO_2OP_SCALAR(VMLA, vmla) 1090 DO_2OP_SCALAR(VMLAS, vmlas) 1091 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1092 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1093 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1094 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1095 1096 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1097 { 1098 static MVEGenTwoOpScalarFn * const fns[] = { 1099 NULL, 1100 gen_helper_mve_vqdmullb_scalarh, 1101 gen_helper_mve_vqdmullb_scalarw, 1102 NULL, 1103 }; 1104 if (a->qd == a->qn && a->size == MO_32) { 1105 /* UNPREDICTABLE; we choose to undef */ 1106 return false; 1107 } 1108 return do_2op_scalar(s, a, fns[a->size]); 1109 } 1110 1111 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1112 { 1113 static MVEGenTwoOpScalarFn * const fns[] = { 1114 NULL, 1115 gen_helper_mve_vqdmullt_scalarh, 1116 gen_helper_mve_vqdmullt_scalarw, 1117 NULL, 1118 }; 1119 if (a->qd == a->qn && a->size == MO_32) { 1120 /* UNPREDICTABLE; we choose to undef */ 1121 return false; 1122 } 1123 return do_2op_scalar(s, a, fns[a->size]); 1124 } 1125 1126 1127 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1128 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1129 { \ 1130 static MVEGenTwoOpScalarFn * const fns[] = { \ 1131 NULL, \ 1132 gen_helper_mve_##FN##h, \ 1133 gen_helper_mve_##FN##s, \ 1134 NULL, \ 1135 }; \ 1136 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1137 return false; \ 1138 } \ 1139 return do_2op_scalar(s, a, fns[a->size]); \ 1140 } 1141 1142 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1143 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1144 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1145 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1146 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1147 1148 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1149 MVEGenLongDualAccOpFn *fn) 1150 { 1151 TCGv_ptr qn, qm; 1152 TCGv_i64 rda_i, rda_o; 1153 TCGv_i32 rdalo, rdahi; 1154 1155 if (!dc_isar_feature(aa32_mve, s) || 1156 !mve_check_qreg_bank(s, a->qn | a->qm) || 1157 !fn) { 1158 return false; 1159 } 1160 /* 1161 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1162 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1163 */ 1164 if (a->rdahi == 13 || a->rdahi == 15) { 1165 return false; 1166 } 1167 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1168 return true; 1169 } 1170 1171 qn = mve_qreg_ptr(a->qn); 1172 qm = mve_qreg_ptr(a->qm); 1173 1174 /* 1175 * This insn is subject to beat-wise execution. Partial execution 1176 * of an A=0 (no-accumulate) insn which does not execute the first 1177 * beat must start with the current rda value, not 0. 1178 */ 1179 rda_o = tcg_temp_new_i64(); 1180 if (a->a || mve_skip_first_beat(s)) { 1181 rda_i = rda_o; 1182 rdalo = load_reg(s, a->rdalo); 1183 rdahi = load_reg(s, a->rdahi); 1184 tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi); 1185 } else { 1186 rda_i = tcg_constant_i64(0); 1187 } 1188 1189 fn(rda_o, cpu_env, qn, qm, rda_i); 1190 1191 rdalo = tcg_temp_new_i32(); 1192 rdahi = tcg_temp_new_i32(); 1193 tcg_gen_extrl_i64_i32(rdalo, rda_o); 1194 tcg_gen_extrh_i64_i32(rdahi, rda_o); 1195 store_reg(s, a->rdalo, rdalo); 1196 store_reg(s, a->rdahi, rdahi); 1197 mve_update_eci(s); 1198 return true; 1199 } 1200 1201 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1202 { 1203 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1204 { NULL, NULL }, 1205 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1206 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1207 { NULL, NULL }, 1208 }; 1209 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1210 } 1211 1212 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1213 { 1214 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1215 { NULL, NULL }, 1216 { gen_helper_mve_vmlaldavuh, NULL }, 1217 { gen_helper_mve_vmlaldavuw, NULL }, 1218 { NULL, NULL }, 1219 }; 1220 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1221 } 1222 1223 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1224 { 1225 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1226 { NULL, NULL }, 1227 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1228 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1229 { NULL, NULL }, 1230 }; 1231 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1232 } 1233 1234 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1235 { 1236 static MVEGenLongDualAccOpFn * const fns[] = { 1237 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1238 }; 1239 return do_long_dual_acc(s, a, fns[a->x]); 1240 } 1241 1242 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1243 { 1244 static MVEGenLongDualAccOpFn * const fns[] = { 1245 gen_helper_mve_vrmlaldavhuw, NULL, 1246 }; 1247 return do_long_dual_acc(s, a, fns[a->x]); 1248 } 1249 1250 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1251 { 1252 static MVEGenLongDualAccOpFn * const fns[] = { 1253 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1254 }; 1255 return do_long_dual_acc(s, a, fns[a->x]); 1256 } 1257 1258 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1259 { 1260 TCGv_ptr qn, qm; 1261 TCGv_i32 rda_i, rda_o; 1262 1263 if (!dc_isar_feature(aa32_mve, s) || 1264 !mve_check_qreg_bank(s, a->qn) || 1265 !fn) { 1266 return false; 1267 } 1268 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1269 return true; 1270 } 1271 1272 qn = mve_qreg_ptr(a->qn); 1273 qm = mve_qreg_ptr(a->qm); 1274 1275 /* 1276 * This insn is subject to beat-wise execution. Partial execution 1277 * of an A=0 (no-accumulate) insn which does not execute the first 1278 * beat must start with the current rda value, not 0. 1279 */ 1280 if (a->a || mve_skip_first_beat(s)) { 1281 rda_o = rda_i = load_reg(s, a->rda); 1282 } else { 1283 rda_i = tcg_constant_i32(0); 1284 rda_o = tcg_temp_new_i32(); 1285 } 1286 1287 fn(rda_o, cpu_env, qn, qm, rda_i); 1288 store_reg(s, a->rda, rda_o); 1289 1290 mve_update_eci(s); 1291 return true; 1292 } 1293 1294 #define DO_DUAL_ACC(INSN, FN) \ 1295 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1296 { \ 1297 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1298 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1299 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1300 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1301 { NULL, NULL }, \ 1302 }; \ 1303 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1304 } 1305 1306 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1307 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1308 1309 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1310 { 1311 static MVEGenDualAccOpFn * const fns[4][2] = { 1312 { gen_helper_mve_vmladavub, NULL }, 1313 { gen_helper_mve_vmladavuh, NULL }, 1314 { gen_helper_mve_vmladavuw, NULL }, 1315 { NULL, NULL }, 1316 }; 1317 return do_dual_acc(s, a, fns[a->size][a->x]); 1318 } 1319 1320 static void gen_vpst(DisasContext *s, uint32_t mask) 1321 { 1322 /* 1323 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1324 * being adjacent fields in the register. 1325 * 1326 * Updating the masks is not predicated, but it is subject to beat-wise 1327 * execution, and the mask is updated on the odd-numbered beats. 1328 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1329 * 01 mask field. 1330 */ 1331 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1332 switch (s->eci) { 1333 case ECI_NONE: 1334 case ECI_A0: 1335 /* Update both 01 and 23 fields */ 1336 tcg_gen_deposit_i32(vpr, vpr, 1337 tcg_constant_i32(mask | (mask << 4)), 1338 R_V7M_VPR_MASK01_SHIFT, 1339 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1340 break; 1341 case ECI_A0A1: 1342 case ECI_A0A1A2: 1343 case ECI_A0A1A2B0: 1344 /* Update only the 23 mask field */ 1345 tcg_gen_deposit_i32(vpr, vpr, 1346 tcg_constant_i32(mask), 1347 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1348 break; 1349 default: 1350 g_assert_not_reached(); 1351 } 1352 store_cpu_field(vpr, v7m.vpr); 1353 } 1354 1355 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1356 { 1357 /* mask == 0 is a "related encoding" */ 1358 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1359 return false; 1360 } 1361 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1362 return true; 1363 } 1364 gen_vpst(s, a->mask); 1365 mve_update_and_store_eci(s); 1366 return true; 1367 } 1368 1369 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1370 { 1371 /* 1372 * Invert the predicate in VPR.P0. We have call out to 1373 * a helper because this insn itself is beatwise and can 1374 * be predicated. 1375 */ 1376 if (!dc_isar_feature(aa32_mve, s)) { 1377 return false; 1378 } 1379 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1380 return true; 1381 } 1382 1383 gen_helper_mve_vpnot(cpu_env); 1384 /* This insn updates predication bits */ 1385 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1386 mve_update_eci(s); 1387 return true; 1388 } 1389 1390 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1391 { 1392 /* VADDV: vector add across vector */ 1393 static MVEGenVADDVFn * const fns[4][2] = { 1394 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1395 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1396 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1397 { NULL, NULL } 1398 }; 1399 TCGv_ptr qm; 1400 TCGv_i32 rda_i, rda_o; 1401 1402 if (!dc_isar_feature(aa32_mve, s) || 1403 a->size == 3) { 1404 return false; 1405 } 1406 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1407 return true; 1408 } 1409 1410 /* 1411 * This insn is subject to beat-wise execution. Partial execution 1412 * of an A=0 (no-accumulate) insn which does not execute the first 1413 * beat must start with the current value of Rda, not zero. 1414 */ 1415 if (a->a || mve_skip_first_beat(s)) { 1416 /* Accumulate input from Rda */ 1417 rda_o = rda_i = load_reg(s, a->rda); 1418 } else { 1419 /* Accumulate starting at zero */ 1420 rda_i = tcg_constant_i32(0); 1421 rda_o = tcg_temp_new_i32(); 1422 } 1423 1424 qm = mve_qreg_ptr(a->qm); 1425 fns[a->size][a->u](rda_o, cpu_env, qm, rda_i); 1426 store_reg(s, a->rda, rda_o); 1427 1428 mve_update_eci(s); 1429 return true; 1430 } 1431 1432 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1433 { 1434 /* 1435 * Vector Add Long Across Vector: accumulate the 32-bit 1436 * elements of the vector into a 64-bit result stored in 1437 * a pair of general-purpose registers. 1438 * No need to check Qm's bank: it is only 3 bits in decode. 1439 */ 1440 TCGv_ptr qm; 1441 TCGv_i64 rda_i, rda_o; 1442 TCGv_i32 rdalo, rdahi; 1443 1444 if (!dc_isar_feature(aa32_mve, s)) { 1445 return false; 1446 } 1447 /* 1448 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1449 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1450 */ 1451 if (a->rdahi == 13 || a->rdahi == 15) { 1452 return false; 1453 } 1454 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1455 return true; 1456 } 1457 1458 /* 1459 * This insn is subject to beat-wise execution. Partial execution 1460 * of an A=0 (no-accumulate) insn which does not execute the first 1461 * beat must start with the current value of RdaHi:RdaLo, not zero. 1462 */ 1463 rda_o = tcg_temp_new_i64(); 1464 if (a->a || mve_skip_first_beat(s)) { 1465 /* Accumulate input from RdaHi:RdaLo */ 1466 rda_i = rda_o; 1467 rdalo = load_reg(s, a->rdalo); 1468 rdahi = load_reg(s, a->rdahi); 1469 tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi); 1470 } else { 1471 /* Accumulate starting at zero */ 1472 rda_i = tcg_constant_i64(0); 1473 } 1474 1475 qm = mve_qreg_ptr(a->qm); 1476 if (a->u) { 1477 gen_helper_mve_vaddlv_u(rda_o, cpu_env, qm, rda_i); 1478 } else { 1479 gen_helper_mve_vaddlv_s(rda_o, cpu_env, qm, rda_i); 1480 } 1481 1482 rdalo = tcg_temp_new_i32(); 1483 rdahi = tcg_temp_new_i32(); 1484 tcg_gen_extrl_i64_i32(rdalo, rda_o); 1485 tcg_gen_extrh_i64_i32(rdahi, rda_o); 1486 store_reg(s, a->rdalo, rdalo); 1487 store_reg(s, a->rdahi, rdahi); 1488 mve_update_eci(s); 1489 return true; 1490 } 1491 1492 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn, 1493 GVecGen2iFn *vecfn) 1494 { 1495 TCGv_ptr qd; 1496 uint64_t imm; 1497 1498 if (!dc_isar_feature(aa32_mve, s) || 1499 !mve_check_qreg_bank(s, a->qd) || 1500 !fn) { 1501 return false; 1502 } 1503 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1504 return true; 1505 } 1506 1507 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1508 1509 if (vecfn && mve_no_predication(s)) { 1510 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd), 1511 imm, 16, 16); 1512 } else { 1513 qd = mve_qreg_ptr(a->qd); 1514 fn(cpu_env, qd, tcg_constant_i64(imm)); 1515 } 1516 mve_update_eci(s); 1517 return true; 1518 } 1519 1520 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs, 1521 int64_t c, uint32_t oprsz, uint32_t maxsz) 1522 { 1523 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c); 1524 } 1525 1526 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1527 { 1528 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1529 MVEGenOneOpImmFn *fn; 1530 GVecGen2iFn *vecfn; 1531 1532 if ((a->cmode & 1) && a->cmode < 12) { 1533 if (a->op) { 1534 /* 1535 * For op=1, the immediate will be inverted by asimd_imm_const(), 1536 * so the VBIC becomes a logical AND operation. 1537 */ 1538 fn = gen_helper_mve_vandi; 1539 vecfn = tcg_gen_gvec_andi; 1540 } else { 1541 fn = gen_helper_mve_vorri; 1542 vecfn = tcg_gen_gvec_ori; 1543 } 1544 } else { 1545 /* There is one unallocated cmode/op combination in this space */ 1546 if (a->cmode == 15 && a->op == 1) { 1547 return false; 1548 } 1549 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1550 fn = gen_helper_mve_vmovi; 1551 vecfn = gen_gvec_vmovi; 1552 } 1553 return do_1imm(s, a, fn, vecfn); 1554 } 1555 1556 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1557 bool negateshift, GVecGen2iFn vecfn) 1558 { 1559 TCGv_ptr qd, qm; 1560 int shift = a->shift; 1561 1562 if (!dc_isar_feature(aa32_mve, s) || 1563 !mve_check_qreg_bank(s, a->qd | a->qm) || 1564 !fn) { 1565 return false; 1566 } 1567 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1568 return true; 1569 } 1570 1571 /* 1572 * When we handle a right shift insn using a left-shift helper 1573 * which permits a negative shift count to indicate a right-shift, 1574 * we must negate the shift count. 1575 */ 1576 if (negateshift) { 1577 shift = -shift; 1578 } 1579 1580 if (vecfn && mve_no_predication(s)) { 1581 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 1582 shift, 16, 16); 1583 } else { 1584 qd = mve_qreg_ptr(a->qd); 1585 qm = mve_qreg_ptr(a->qm); 1586 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1587 } 1588 mve_update_eci(s); 1589 return true; 1590 } 1591 1592 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1593 bool negateshift) 1594 { 1595 return do_2shift_vec(s, a, fn, negateshift, NULL); 1596 } 1597 1598 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \ 1599 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1600 { \ 1601 static MVEGenTwoOpShiftFn * const fns[] = { \ 1602 gen_helper_mve_##FN##b, \ 1603 gen_helper_mve_##FN##h, \ 1604 gen_helper_mve_##FN##w, \ 1605 NULL, \ 1606 }; \ 1607 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \ 1608 } 1609 1610 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1611 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL) 1612 1613 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs, 1614 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1615 { 1616 /* 1617 * We get here with a negated shift count, and we must handle 1618 * shifts by the element size, which tcg_gen_gvec_sari() does not do. 1619 */ 1620 shift = -shift; 1621 if (shift == (8 << vece)) { 1622 shift--; 1623 } 1624 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz); 1625 } 1626 1627 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs, 1628 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1629 { 1630 /* 1631 * We get here with a negated shift count, and we must handle 1632 * shifts by the element size, which tcg_gen_gvec_shri() does not do. 1633 */ 1634 shift = -shift; 1635 if (shift == (8 << vece)) { 1636 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0); 1637 } else { 1638 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz); 1639 } 1640 } 1641 1642 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli) 1643 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1644 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1645 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1646 /* These right shifts use a left-shift helper with negated shift count */ 1647 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s) 1648 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u) 1649 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1650 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1651 1652 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri) 1653 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli) 1654 1655 #define DO_2SHIFT_FP(INSN, FN) \ 1656 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1657 { \ 1658 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1659 return false; \ 1660 } \ 1661 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1662 } 1663 1664 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1665 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1666 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1667 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1668 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1669 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1670 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1671 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1672 1673 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1674 MVEGenTwoOpShiftFn *fn) 1675 { 1676 TCGv_ptr qda; 1677 TCGv_i32 rm; 1678 1679 if (!dc_isar_feature(aa32_mve, s) || 1680 !mve_check_qreg_bank(s, a->qda) || 1681 a->rm == 13 || a->rm == 15 || !fn) { 1682 /* Rm cases are UNPREDICTABLE */ 1683 return false; 1684 } 1685 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1686 return true; 1687 } 1688 1689 qda = mve_qreg_ptr(a->qda); 1690 rm = load_reg(s, a->rm); 1691 fn(cpu_env, qda, qda, rm); 1692 mve_update_eci(s); 1693 return true; 1694 } 1695 1696 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1697 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1698 { \ 1699 static MVEGenTwoOpShiftFn * const fns[] = { \ 1700 gen_helper_mve_##FN##b, \ 1701 gen_helper_mve_##FN##h, \ 1702 gen_helper_mve_##FN##w, \ 1703 NULL, \ 1704 }; \ 1705 return do_2shift_scalar(s, a, fns[a->size]); \ 1706 } 1707 1708 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1709 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1710 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1711 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1712 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1713 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1714 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1715 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1716 1717 #define DO_VSHLL(INSN, FN) \ 1718 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1719 { \ 1720 static MVEGenTwoOpShiftFn * const fns[] = { \ 1721 gen_helper_mve_##FN##b, \ 1722 gen_helper_mve_##FN##h, \ 1723 }; \ 1724 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \ 1725 } 1726 1727 /* 1728 * For the VSHLL vector helpers, the vece is the size of the input 1729 * (ie MO_8 or MO_16); the helpers want to work in the output size. 1730 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.) 1731 */ 1732 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs, 1733 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1734 { 1735 unsigned ovece = vece + 1; 1736 unsigned ibits = vece == MO_8 ? 8 : 16; 1737 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz); 1738 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1739 } 1740 1741 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs, 1742 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1743 { 1744 unsigned ovece = vece + 1; 1745 tcg_gen_gvec_andi(ovece, dofs, aofs, 1746 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz); 1747 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz); 1748 } 1749 1750 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs, 1751 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1752 { 1753 unsigned ovece = vece + 1; 1754 unsigned ibits = vece == MO_8 ? 8 : 16; 1755 if (shift == 0) { 1756 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz); 1757 } else { 1758 tcg_gen_gvec_andi(ovece, dofs, aofs, 1759 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1760 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1761 } 1762 } 1763 1764 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs, 1765 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1766 { 1767 unsigned ovece = vece + 1; 1768 unsigned ibits = vece == MO_8 ? 8 : 16; 1769 if (shift == 0) { 1770 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz); 1771 } else { 1772 tcg_gen_gvec_andi(ovece, dofs, aofs, 1773 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1774 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1775 } 1776 } 1777 1778 DO_VSHLL(VSHLL_BS, vshllbs) 1779 DO_VSHLL(VSHLL_BU, vshllbu) 1780 DO_VSHLL(VSHLL_TS, vshllts) 1781 DO_VSHLL(VSHLL_TU, vshlltu) 1782 1783 #define DO_2SHIFT_N(INSN, FN) \ 1784 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1785 { \ 1786 static MVEGenTwoOpShiftFn * const fns[] = { \ 1787 gen_helper_mve_##FN##b, \ 1788 gen_helper_mve_##FN##h, \ 1789 }; \ 1790 return do_2shift(s, a, fns[a->size], false); \ 1791 } 1792 1793 DO_2SHIFT_N(VSHRNB, vshrnb) 1794 DO_2SHIFT_N(VSHRNT, vshrnt) 1795 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1796 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1797 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1798 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1799 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1800 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1801 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1802 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1803 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1804 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1805 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1806 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1807 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1808 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1809 1810 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1811 { 1812 /* 1813 * Whole Vector Left Shift with Carry. The carry is taken 1814 * from a general purpose register and written back there. 1815 * An imm of 0 means "shift by 32". 1816 */ 1817 TCGv_ptr qd; 1818 TCGv_i32 rdm; 1819 1820 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1821 return false; 1822 } 1823 if (a->rdm == 13 || a->rdm == 15) { 1824 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1825 return false; 1826 } 1827 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1828 return true; 1829 } 1830 1831 qd = mve_qreg_ptr(a->qd); 1832 rdm = load_reg(s, a->rdm); 1833 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1834 store_reg(s, a->rdm, rdm); 1835 mve_update_eci(s); 1836 return true; 1837 } 1838 1839 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1840 { 1841 TCGv_ptr qd; 1842 TCGv_i32 rn; 1843 1844 /* 1845 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1846 * This fills the vector with elements of successively increasing 1847 * or decreasing values, starting from Rn. 1848 */ 1849 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1850 return false; 1851 } 1852 if (a->size == MO_64) { 1853 /* size 0b11 is another encoding */ 1854 return false; 1855 } 1856 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1857 return true; 1858 } 1859 1860 qd = mve_qreg_ptr(a->qd); 1861 rn = load_reg(s, a->rn); 1862 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1863 store_reg(s, a->rn, rn); 1864 mve_update_eci(s); 1865 return true; 1866 } 1867 1868 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1869 { 1870 TCGv_ptr qd; 1871 TCGv_i32 rn, rm; 1872 1873 /* 1874 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1875 * This fills the vector with elements of successively increasing 1876 * or decreasing values, starting from Rn. Rm specifies a point where 1877 * the count wraps back around to 0. The updated offset is written back 1878 * to Rn. 1879 */ 1880 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1881 return false; 1882 } 1883 if (!fn || a->rm == 13 || a->rm == 15) { 1884 /* 1885 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1886 * Rm == 13 is VIWDUP, VDWDUP. 1887 */ 1888 return false; 1889 } 1890 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1891 return true; 1892 } 1893 1894 qd = mve_qreg_ptr(a->qd); 1895 rn = load_reg(s, a->rn); 1896 rm = load_reg(s, a->rm); 1897 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1898 store_reg(s, a->rn, rn); 1899 mve_update_eci(s); 1900 return true; 1901 } 1902 1903 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1904 { 1905 static MVEGenVIDUPFn * const fns[] = { 1906 gen_helper_mve_vidupb, 1907 gen_helper_mve_viduph, 1908 gen_helper_mve_vidupw, 1909 NULL, 1910 }; 1911 return do_vidup(s, a, fns[a->size]); 1912 } 1913 1914 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1915 { 1916 static MVEGenVIDUPFn * const fns[] = { 1917 gen_helper_mve_vidupb, 1918 gen_helper_mve_viduph, 1919 gen_helper_mve_vidupw, 1920 NULL, 1921 }; 1922 /* VDDUP is just like VIDUP but with a negative immediate */ 1923 a->imm = -a->imm; 1924 return do_vidup(s, a, fns[a->size]); 1925 } 1926 1927 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1928 { 1929 static MVEGenVIWDUPFn * const fns[] = { 1930 gen_helper_mve_viwdupb, 1931 gen_helper_mve_viwduph, 1932 gen_helper_mve_viwdupw, 1933 NULL, 1934 }; 1935 return do_viwdup(s, a, fns[a->size]); 1936 } 1937 1938 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1939 { 1940 static MVEGenVIWDUPFn * const fns[] = { 1941 gen_helper_mve_vdwdupb, 1942 gen_helper_mve_vdwduph, 1943 gen_helper_mve_vdwdupw, 1944 NULL, 1945 }; 1946 return do_viwdup(s, a, fns[a->size]); 1947 } 1948 1949 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1950 { 1951 TCGv_ptr qn, qm; 1952 1953 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1954 !fn) { 1955 return false; 1956 } 1957 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1958 return true; 1959 } 1960 1961 qn = mve_qreg_ptr(a->qn); 1962 qm = mve_qreg_ptr(a->qm); 1963 fn(cpu_env, qn, qm); 1964 if (a->mask) { 1965 /* VPT */ 1966 gen_vpst(s, a->mask); 1967 } 1968 /* This insn updates predication bits */ 1969 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1970 mve_update_eci(s); 1971 return true; 1972 } 1973 1974 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1975 MVEGenScalarCmpFn *fn) 1976 { 1977 TCGv_ptr qn; 1978 TCGv_i32 rm; 1979 1980 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1981 return false; 1982 } 1983 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1984 return true; 1985 } 1986 1987 qn = mve_qreg_ptr(a->qn); 1988 if (a->rm == 15) { 1989 /* Encoding Rm=0b1111 means "constant zero" */ 1990 rm = tcg_constant_i32(0); 1991 } else { 1992 rm = load_reg(s, a->rm); 1993 } 1994 fn(cpu_env, qn, rm); 1995 if (a->mask) { 1996 /* VPT */ 1997 gen_vpst(s, a->mask); 1998 } 1999 /* This insn updates predication bits */ 2000 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 2001 mve_update_eci(s); 2002 return true; 2003 } 2004 2005 #define DO_VCMP(INSN, FN) \ 2006 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2007 { \ 2008 static MVEGenCmpFn * const fns[] = { \ 2009 gen_helper_mve_##FN##b, \ 2010 gen_helper_mve_##FN##h, \ 2011 gen_helper_mve_##FN##w, \ 2012 NULL, \ 2013 }; \ 2014 return do_vcmp(s, a, fns[a->size]); \ 2015 } \ 2016 static bool trans_##INSN##_scalar(DisasContext *s, \ 2017 arg_vcmp_scalar *a) \ 2018 { \ 2019 static MVEGenScalarCmpFn * const fns[] = { \ 2020 gen_helper_mve_##FN##_scalarb, \ 2021 gen_helper_mve_##FN##_scalarh, \ 2022 gen_helper_mve_##FN##_scalarw, \ 2023 NULL, \ 2024 }; \ 2025 return do_vcmp_scalar(s, a, fns[a->size]); \ 2026 } 2027 2028 DO_VCMP(VCMPEQ, vcmpeq) 2029 DO_VCMP(VCMPNE, vcmpne) 2030 DO_VCMP(VCMPCS, vcmpcs) 2031 DO_VCMP(VCMPHI, vcmphi) 2032 DO_VCMP(VCMPGE, vcmpge) 2033 DO_VCMP(VCMPLT, vcmplt) 2034 DO_VCMP(VCMPGT, vcmpgt) 2035 DO_VCMP(VCMPLE, vcmple) 2036 2037 #define DO_VCMP_FP(INSN, FN) \ 2038 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2039 { \ 2040 static MVEGenCmpFn * const fns[] = { \ 2041 NULL, \ 2042 gen_helper_mve_##FN##h, \ 2043 gen_helper_mve_##FN##s, \ 2044 NULL, \ 2045 }; \ 2046 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2047 return false; \ 2048 } \ 2049 return do_vcmp(s, a, fns[a->size]); \ 2050 } \ 2051 static bool trans_##INSN##_scalar(DisasContext *s, \ 2052 arg_vcmp_scalar *a) \ 2053 { \ 2054 static MVEGenScalarCmpFn * const fns[] = { \ 2055 NULL, \ 2056 gen_helper_mve_##FN##_scalarh, \ 2057 gen_helper_mve_##FN##_scalars, \ 2058 NULL, \ 2059 }; \ 2060 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2061 return false; \ 2062 } \ 2063 return do_vcmp_scalar(s, a, fns[a->size]); \ 2064 } 2065 2066 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 2067 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 2068 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 2069 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 2070 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 2071 DO_VCMP_FP(VCMPLE_fp, vfcmple) 2072 2073 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 2074 { 2075 /* 2076 * MIN/MAX operations across a vector: compute the min or 2077 * max of the initial value in a general purpose register 2078 * and all the elements in the vector, and store it back 2079 * into the general purpose register. 2080 */ 2081 TCGv_ptr qm; 2082 TCGv_i32 rda; 2083 2084 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2085 !fn || a->rda == 13 || a->rda == 15) { 2086 /* Rda cases are UNPREDICTABLE */ 2087 return false; 2088 } 2089 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2090 return true; 2091 } 2092 2093 qm = mve_qreg_ptr(a->qm); 2094 rda = load_reg(s, a->rda); 2095 fn(rda, cpu_env, qm, rda); 2096 store_reg(s, a->rda, rda); 2097 mve_update_eci(s); 2098 return true; 2099 } 2100 2101 #define DO_VMAXV(INSN, FN) \ 2102 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2103 { \ 2104 static MVEGenVADDVFn * const fns[] = { \ 2105 gen_helper_mve_##FN##b, \ 2106 gen_helper_mve_##FN##h, \ 2107 gen_helper_mve_##FN##w, \ 2108 NULL, \ 2109 }; \ 2110 return do_vmaxv(s, a, fns[a->size]); \ 2111 } 2112 2113 DO_VMAXV(VMAXV_S, vmaxvs) 2114 DO_VMAXV(VMAXV_U, vmaxvu) 2115 DO_VMAXV(VMAXAV, vmaxav) 2116 DO_VMAXV(VMINV_S, vminvs) 2117 DO_VMAXV(VMINV_U, vminvu) 2118 DO_VMAXV(VMINAV, vminav) 2119 2120 #define DO_VMAXV_FP(INSN, FN) \ 2121 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2122 { \ 2123 static MVEGenVADDVFn * const fns[] = { \ 2124 NULL, \ 2125 gen_helper_mve_##FN##h, \ 2126 gen_helper_mve_##FN##s, \ 2127 NULL, \ 2128 }; \ 2129 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2130 return false; \ 2131 } \ 2132 return do_vmaxv(s, a, fns[a->size]); \ 2133 } 2134 2135 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2136 DO_VMAXV_FP(VMINNMV, vminnmv) 2137 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2138 DO_VMAXV_FP(VMINNMAV, vminnmav) 2139 2140 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2141 { 2142 /* Absolute difference accumulated across vector */ 2143 TCGv_ptr qn, qm; 2144 TCGv_i32 rda; 2145 2146 if (!dc_isar_feature(aa32_mve, s) || 2147 !mve_check_qreg_bank(s, a->qm | a->qn) || 2148 !fn || a->rda == 13 || a->rda == 15) { 2149 /* Rda cases are UNPREDICTABLE */ 2150 return false; 2151 } 2152 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2153 return true; 2154 } 2155 2156 qm = mve_qreg_ptr(a->qm); 2157 qn = mve_qreg_ptr(a->qn); 2158 rda = load_reg(s, a->rda); 2159 fn(rda, cpu_env, qn, qm, rda); 2160 store_reg(s, a->rda, rda); 2161 mve_update_eci(s); 2162 return true; 2163 } 2164 2165 #define DO_VABAV(INSN, FN) \ 2166 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2167 { \ 2168 static MVEGenVABAVFn * const fns[] = { \ 2169 gen_helper_mve_##FN##b, \ 2170 gen_helper_mve_##FN##h, \ 2171 gen_helper_mve_##FN##w, \ 2172 NULL, \ 2173 }; \ 2174 return do_vabav(s, a, fns[a->size]); \ 2175 } 2176 2177 DO_VABAV(VABAV_S, vabavs) 2178 DO_VABAV(VABAV_U, vabavu) 2179 2180 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2181 { 2182 /* 2183 * VMOV two 32-bit vector lanes to two general-purpose registers. 2184 * This insn is not predicated but it is subject to beat-wise 2185 * execution if it is not in an IT block. For us this means 2186 * only that if PSR.ECI says we should not be executing the beat 2187 * corresponding to the lane of the vector register being accessed 2188 * then we should skip perfoming the move, and that we need to do 2189 * the usual check for bad ECI state and advance of ECI state. 2190 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2191 */ 2192 TCGv_i32 tmp; 2193 int vd; 2194 2195 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2196 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2197 a->rt == a->rt2) { 2198 /* Rt/Rt2 cases are UNPREDICTABLE */ 2199 return false; 2200 } 2201 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2202 return true; 2203 } 2204 2205 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2206 vd = a->qd * 2; 2207 2208 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2209 tmp = tcg_temp_new_i32(); 2210 read_neon_element32(tmp, vd, a->idx, MO_32); 2211 store_reg(s, a->rt, tmp); 2212 } 2213 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2214 tmp = tcg_temp_new_i32(); 2215 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2216 store_reg(s, a->rt2, tmp); 2217 } 2218 2219 mve_update_and_store_eci(s); 2220 return true; 2221 } 2222 2223 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2224 { 2225 /* 2226 * VMOV two general-purpose registers to two 32-bit vector lanes. 2227 * This insn is not predicated but it is subject to beat-wise 2228 * execution if it is not in an IT block. For us this means 2229 * only that if PSR.ECI says we should not be executing the beat 2230 * corresponding to the lane of the vector register being accessed 2231 * then we should skip perfoming the move, and that we need to do 2232 * the usual check for bad ECI state and advance of ECI state. 2233 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2234 */ 2235 TCGv_i32 tmp; 2236 int vd; 2237 2238 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2239 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2240 /* Rt/Rt2 cases are UNPREDICTABLE */ 2241 return false; 2242 } 2243 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2244 return true; 2245 } 2246 2247 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2248 vd = a->qd * 2; 2249 2250 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2251 tmp = load_reg(s, a->rt); 2252 write_neon_element32(tmp, vd, a->idx, MO_32); 2253 } 2254 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2255 tmp = load_reg(s, a->rt2); 2256 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2257 } 2258 2259 mve_update_and_store_eci(s); 2260 return true; 2261 } 2262