1 /* 2 * ARM translation: M-profile MVE instructions 3 * 4 * Copyright (c) 2021 Linaro, Ltd. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "translate.h" 22 #include "translate-a32.h" 23 24 static inline int vidup_imm(DisasContext *s, int x) 25 { 26 return 1 << x; 27 } 28 29 /* Include the generated decoder */ 30 #include "decode-mve.c.inc" 31 32 typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 33 typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 34 typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); 35 typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 36 typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); 37 typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 38 typedef void MVEGenTwoOpShiftFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 39 typedef void MVEGenLongDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64); 40 typedef void MVEGenVADDVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32); 41 typedef void MVEGenOneOpImmFn(TCGv_ptr, TCGv_ptr, TCGv_i64); 42 typedef void MVEGenVIDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32); 43 typedef void MVEGenVIWDUPFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32); 44 typedef void MVEGenCmpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); 45 typedef void MVEGenScalarCmpFn(TCGv_ptr, TCGv_ptr, TCGv_i32); 46 typedef void MVEGenVABAVFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 47 typedef void MVEGenDualAccOpFn(TCGv_i32, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 48 typedef void MVEGenVCVTRmodeFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); 49 50 /* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */ 51 static inline long mve_qreg_offset(unsigned reg) 52 { 53 return offsetof(CPUARMState, vfp.zregs[reg].d[0]); 54 } 55 56 static TCGv_ptr mve_qreg_ptr(unsigned reg) 57 { 58 TCGv_ptr ret = tcg_temp_new_ptr(); 59 tcg_gen_addi_ptr(ret, cpu_env, mve_qreg_offset(reg)); 60 return ret; 61 } 62 63 static bool mve_no_predication(DisasContext *s) 64 { 65 /* 66 * Return true if we are executing the entire MVE instruction 67 * with no predication or partial-execution, and so we can safely 68 * use an inline TCG vector implementation. 69 */ 70 return s->eci == 0 && s->mve_no_pred; 71 } 72 73 static bool mve_check_qreg_bank(DisasContext *s, int qmask) 74 { 75 /* 76 * Check whether Qregs are in range. For v8.1M only Q0..Q7 77 * are supported, see VFPSmallRegisterBank(). 78 */ 79 return qmask < 8; 80 } 81 82 bool mve_eci_check(DisasContext *s) 83 { 84 /* 85 * This is a beatwise insn: check that ECI is valid (not a 86 * reserved value) and note that we are handling it. 87 * Return true if OK, false if we generated an exception. 88 */ 89 s->eci_handled = true; 90 switch (s->eci) { 91 case ECI_NONE: 92 case ECI_A0: 93 case ECI_A0A1: 94 case ECI_A0A1A2: 95 case ECI_A0A1A2B0: 96 return true; 97 default: 98 /* Reserved value: INVSTATE UsageFault */ 99 gen_exception_insn(s, 0, EXCP_INVSTATE, syn_uncategorized()); 100 return false; 101 } 102 } 103 104 void mve_update_eci(DisasContext *s) 105 { 106 /* 107 * The helper function will always update the CPUState field, 108 * so we only need to update the DisasContext field. 109 */ 110 if (s->eci) { 111 s->eci = (s->eci == ECI_A0A1A2B0) ? ECI_A0 : ECI_NONE; 112 } 113 } 114 115 void mve_update_and_store_eci(DisasContext *s) 116 { 117 /* 118 * For insns which don't call a helper function that will call 119 * mve_advance_vpt(), this version updates s->eci and also stores 120 * it out to the CPUState field. 121 */ 122 if (s->eci) { 123 mve_update_eci(s); 124 store_cpu_field(tcg_constant_i32(s->eci << 4), condexec_bits); 125 } 126 } 127 128 static bool mve_skip_first_beat(DisasContext *s) 129 { 130 /* Return true if PSR.ECI says we must skip the first beat of this insn */ 131 switch (s->eci) { 132 case ECI_NONE: 133 return false; 134 case ECI_A0: 135 case ECI_A0A1: 136 case ECI_A0A1A2: 137 case ECI_A0A1A2B0: 138 return true; 139 default: 140 g_assert_not_reached(); 141 } 142 } 143 144 static bool do_ldst(DisasContext *s, arg_VLDR_VSTR *a, MVEGenLdStFn *fn, 145 unsigned msize) 146 { 147 TCGv_i32 addr; 148 uint32_t offset; 149 TCGv_ptr qreg; 150 151 if (!dc_isar_feature(aa32_mve, s) || 152 !mve_check_qreg_bank(s, a->qd) || 153 !fn) { 154 return false; 155 } 156 157 /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */ 158 if (a->rn == 15 || (a->rn == 13 && a->w)) { 159 return false; 160 } 161 162 if (!mve_eci_check(s) || !vfp_access_check(s)) { 163 return true; 164 } 165 166 offset = a->imm << msize; 167 if (!a->a) { 168 offset = -offset; 169 } 170 addr = load_reg(s, a->rn); 171 if (a->p) { 172 tcg_gen_addi_i32(addr, addr, offset); 173 } 174 175 qreg = mve_qreg_ptr(a->qd); 176 fn(cpu_env, qreg, addr); 177 178 /* 179 * Writeback always happens after the last beat of the insn, 180 * regardless of predication 181 */ 182 if (a->w) { 183 if (!a->p) { 184 tcg_gen_addi_i32(addr, addr, offset); 185 } 186 store_reg(s, a->rn, addr); 187 } 188 mve_update_eci(s); 189 return true; 190 } 191 192 static bool trans_VLDR_VSTR(DisasContext *s, arg_VLDR_VSTR *a) 193 { 194 static MVEGenLdStFn * const ldstfns[4][2] = { 195 { gen_helper_mve_vstrb, gen_helper_mve_vldrb }, 196 { gen_helper_mve_vstrh, gen_helper_mve_vldrh }, 197 { gen_helper_mve_vstrw, gen_helper_mve_vldrw }, 198 { NULL, NULL } 199 }; 200 return do_ldst(s, a, ldstfns[a->size][a->l], a->size); 201 } 202 203 #define DO_VLDST_WIDE_NARROW(OP, SLD, ULD, ST, MSIZE) \ 204 static bool trans_##OP(DisasContext *s, arg_VLDR_VSTR *a) \ 205 { \ 206 static MVEGenLdStFn * const ldstfns[2][2] = { \ 207 { gen_helper_mve_##ST, gen_helper_mve_##SLD }, \ 208 { NULL, gen_helper_mve_##ULD }, \ 209 }; \ 210 return do_ldst(s, a, ldstfns[a->u][a->l], MSIZE); \ 211 } 212 213 DO_VLDST_WIDE_NARROW(VLDSTB_H, vldrb_sh, vldrb_uh, vstrb_h, MO_8) 214 DO_VLDST_WIDE_NARROW(VLDSTB_W, vldrb_sw, vldrb_uw, vstrb_w, MO_8) 215 DO_VLDST_WIDE_NARROW(VLDSTH_W, vldrh_sw, vldrh_uw, vstrh_w, MO_16) 216 217 static bool do_ldst_sg(DisasContext *s, arg_vldst_sg *a, MVEGenLdStSGFn fn) 218 { 219 TCGv_i32 addr; 220 TCGv_ptr qd, qm; 221 222 if (!dc_isar_feature(aa32_mve, s) || 223 !mve_check_qreg_bank(s, a->qd | a->qm) || 224 !fn || a->rn == 15) { 225 /* Rn case is UNPREDICTABLE */ 226 return false; 227 } 228 229 if (!mve_eci_check(s) || !vfp_access_check(s)) { 230 return true; 231 } 232 233 addr = load_reg(s, a->rn); 234 235 qd = mve_qreg_ptr(a->qd); 236 qm = mve_qreg_ptr(a->qm); 237 fn(cpu_env, qd, qm, addr); 238 mve_update_eci(s); 239 return true; 240 } 241 242 /* 243 * The naming scheme here is "vldrb_sg_sh == in-memory byte loads 244 * signextended to halfword elements in register". _os_ indicates that 245 * the offsets in Qm should be scaled by the element size. 246 */ 247 /* This macro is just to make the arrays more compact in these functions */ 248 #define F(N) gen_helper_mve_##N 249 250 /* VLDRB/VSTRB (ie msize 1) with OS=1 is UNPREDICTABLE; we UNDEF */ 251 static bool trans_VLDR_S_sg(DisasContext *s, arg_vldst_sg *a) 252 { 253 static MVEGenLdStSGFn * const fns[2][4][4] = { { 254 { NULL, F(vldrb_sg_sh), F(vldrb_sg_sw), NULL }, 255 { NULL, NULL, F(vldrh_sg_sw), NULL }, 256 { NULL, NULL, NULL, NULL }, 257 { NULL, NULL, NULL, NULL } 258 }, { 259 { NULL, NULL, NULL, NULL }, 260 { NULL, NULL, F(vldrh_sg_os_sw), NULL }, 261 { NULL, NULL, NULL, NULL }, 262 { NULL, NULL, NULL, NULL } 263 } 264 }; 265 if (a->qd == a->qm) { 266 return false; /* UNPREDICTABLE */ 267 } 268 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 269 } 270 271 static bool trans_VLDR_U_sg(DisasContext *s, arg_vldst_sg *a) 272 { 273 static MVEGenLdStSGFn * const fns[2][4][4] = { { 274 { F(vldrb_sg_ub), F(vldrb_sg_uh), F(vldrb_sg_uw), NULL }, 275 { NULL, F(vldrh_sg_uh), F(vldrh_sg_uw), NULL }, 276 { NULL, NULL, F(vldrw_sg_uw), NULL }, 277 { NULL, NULL, NULL, F(vldrd_sg_ud) } 278 }, { 279 { NULL, NULL, NULL, NULL }, 280 { NULL, F(vldrh_sg_os_uh), F(vldrh_sg_os_uw), NULL }, 281 { NULL, NULL, F(vldrw_sg_os_uw), NULL }, 282 { NULL, NULL, NULL, F(vldrd_sg_os_ud) } 283 } 284 }; 285 if (a->qd == a->qm) { 286 return false; /* UNPREDICTABLE */ 287 } 288 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 289 } 290 291 static bool trans_VSTR_sg(DisasContext *s, arg_vldst_sg *a) 292 { 293 static MVEGenLdStSGFn * const fns[2][4][4] = { { 294 { F(vstrb_sg_ub), F(vstrb_sg_uh), F(vstrb_sg_uw), NULL }, 295 { NULL, F(vstrh_sg_uh), F(vstrh_sg_uw), NULL }, 296 { NULL, NULL, F(vstrw_sg_uw), NULL }, 297 { NULL, NULL, NULL, F(vstrd_sg_ud) } 298 }, { 299 { NULL, NULL, NULL, NULL }, 300 { NULL, F(vstrh_sg_os_uh), F(vstrh_sg_os_uw), NULL }, 301 { NULL, NULL, F(vstrw_sg_os_uw), NULL }, 302 { NULL, NULL, NULL, F(vstrd_sg_os_ud) } 303 } 304 }; 305 return do_ldst_sg(s, a, fns[a->os][a->msize][a->size]); 306 } 307 308 #undef F 309 310 static bool do_ldst_sg_imm(DisasContext *s, arg_vldst_sg_imm *a, 311 MVEGenLdStSGFn *fn, unsigned msize) 312 { 313 uint32_t offset; 314 TCGv_ptr qd, qm; 315 316 if (!dc_isar_feature(aa32_mve, s) || 317 !mve_check_qreg_bank(s, a->qd | a->qm) || 318 !fn) { 319 return false; 320 } 321 322 if (!mve_eci_check(s) || !vfp_access_check(s)) { 323 return true; 324 } 325 326 offset = a->imm << msize; 327 if (!a->a) { 328 offset = -offset; 329 } 330 331 qd = mve_qreg_ptr(a->qd); 332 qm = mve_qreg_ptr(a->qm); 333 fn(cpu_env, qd, qm, tcg_constant_i32(offset)); 334 mve_update_eci(s); 335 return true; 336 } 337 338 static bool trans_VLDRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 339 { 340 static MVEGenLdStSGFn * const fns[] = { 341 gen_helper_mve_vldrw_sg_uw, 342 gen_helper_mve_vldrw_sg_wb_uw, 343 }; 344 if (a->qd == a->qm) { 345 return false; /* UNPREDICTABLE */ 346 } 347 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 348 } 349 350 static bool trans_VLDRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 351 { 352 static MVEGenLdStSGFn * const fns[] = { 353 gen_helper_mve_vldrd_sg_ud, 354 gen_helper_mve_vldrd_sg_wb_ud, 355 }; 356 if (a->qd == a->qm) { 357 return false; /* UNPREDICTABLE */ 358 } 359 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 360 } 361 362 static bool trans_VSTRW_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 363 { 364 static MVEGenLdStSGFn * const fns[] = { 365 gen_helper_mve_vstrw_sg_uw, 366 gen_helper_mve_vstrw_sg_wb_uw, 367 }; 368 return do_ldst_sg_imm(s, a, fns[a->w], MO_32); 369 } 370 371 static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) 372 { 373 static MVEGenLdStSGFn * const fns[] = { 374 gen_helper_mve_vstrd_sg_ud, 375 gen_helper_mve_vstrd_sg_wb_ud, 376 }; 377 return do_ldst_sg_imm(s, a, fns[a->w], MO_64); 378 } 379 380 static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, 381 int addrinc) 382 { 383 TCGv_i32 rn; 384 385 if (!dc_isar_feature(aa32_mve, s) || 386 !mve_check_qreg_bank(s, a->qd) || 387 !fn || (a->rn == 13 && a->w) || a->rn == 15) { 388 /* Variously UNPREDICTABLE or UNDEF or related-encoding */ 389 return false; 390 } 391 if (!mve_eci_check(s) || !vfp_access_check(s)) { 392 return true; 393 } 394 395 rn = load_reg(s, a->rn); 396 /* 397 * We pass the index of Qd, not a pointer, because the helper must 398 * access multiple Q registers starting at Qd and working up. 399 */ 400 fn(cpu_env, tcg_constant_i32(a->qd), rn); 401 402 if (a->w) { 403 tcg_gen_addi_i32(rn, rn, addrinc); 404 store_reg(s, a->rn, rn); 405 } 406 mve_update_and_store_eci(s); 407 return true; 408 } 409 410 /* This macro is just to make the arrays more compact in these functions */ 411 #define F(N) gen_helper_mve_##N 412 413 static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) 414 { 415 static MVEGenLdStIlFn * const fns[4][4] = { 416 { F(vld20b), F(vld20h), F(vld20w), NULL, }, 417 { F(vld21b), F(vld21h), F(vld21w), NULL, }, 418 { NULL, NULL, NULL, NULL }, 419 { NULL, NULL, NULL, NULL }, 420 }; 421 if (a->qd > 6) { 422 return false; 423 } 424 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 425 } 426 427 static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) 428 { 429 static MVEGenLdStIlFn * const fns[4][4] = { 430 { F(vld40b), F(vld40h), F(vld40w), NULL, }, 431 { F(vld41b), F(vld41h), F(vld41w), NULL, }, 432 { F(vld42b), F(vld42h), F(vld42w), NULL, }, 433 { F(vld43b), F(vld43h), F(vld43w), NULL, }, 434 }; 435 if (a->qd > 4) { 436 return false; 437 } 438 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 439 } 440 441 static bool trans_VST2(DisasContext *s, arg_vldst_il *a) 442 { 443 static MVEGenLdStIlFn * const fns[4][4] = { 444 { F(vst20b), F(vst20h), F(vst20w), NULL, }, 445 { F(vst21b), F(vst21h), F(vst21w), NULL, }, 446 { NULL, NULL, NULL, NULL }, 447 { NULL, NULL, NULL, NULL }, 448 }; 449 if (a->qd > 6) { 450 return false; 451 } 452 return do_vldst_il(s, a, fns[a->pat][a->size], 32); 453 } 454 455 static bool trans_VST4(DisasContext *s, arg_vldst_il *a) 456 { 457 static MVEGenLdStIlFn * const fns[4][4] = { 458 { F(vst40b), F(vst40h), F(vst40w), NULL, }, 459 { F(vst41b), F(vst41h), F(vst41w), NULL, }, 460 { F(vst42b), F(vst42h), F(vst42w), NULL, }, 461 { F(vst43b), F(vst43h), F(vst43w), NULL, }, 462 }; 463 if (a->qd > 4) { 464 return false; 465 } 466 return do_vldst_il(s, a, fns[a->pat][a->size], 64); 467 } 468 469 #undef F 470 471 static bool trans_VDUP(DisasContext *s, arg_VDUP *a) 472 { 473 TCGv_ptr qd; 474 TCGv_i32 rt; 475 476 if (!dc_isar_feature(aa32_mve, s) || 477 !mve_check_qreg_bank(s, a->qd)) { 478 return false; 479 } 480 if (a->rt == 13 || a->rt == 15) { 481 /* UNPREDICTABLE; we choose to UNDEF */ 482 return false; 483 } 484 if (!mve_eci_check(s) || !vfp_access_check(s)) { 485 return true; 486 } 487 488 rt = load_reg(s, a->rt); 489 if (mve_no_predication(s)) { 490 tcg_gen_gvec_dup_i32(a->size, mve_qreg_offset(a->qd), 16, 16, rt); 491 } else { 492 qd = mve_qreg_ptr(a->qd); 493 tcg_gen_dup_i32(a->size, rt, rt); 494 gen_helper_mve_vdup(cpu_env, qd, rt); 495 } 496 mve_update_eci(s); 497 return true; 498 } 499 500 static bool do_1op_vec(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn, 501 GVecGen2Fn vecfn) 502 { 503 TCGv_ptr qd, qm; 504 505 if (!dc_isar_feature(aa32_mve, s) || 506 !mve_check_qreg_bank(s, a->qd | a->qm) || 507 !fn) { 508 return false; 509 } 510 511 if (!mve_eci_check(s) || !vfp_access_check(s)) { 512 return true; 513 } 514 515 if (vecfn && mve_no_predication(s)) { 516 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 16, 16); 517 } else { 518 qd = mve_qreg_ptr(a->qd); 519 qm = mve_qreg_ptr(a->qm); 520 fn(cpu_env, qd, qm); 521 } 522 mve_update_eci(s); 523 return true; 524 } 525 526 static bool do_1op(DisasContext *s, arg_1op *a, MVEGenOneOpFn fn) 527 { 528 return do_1op_vec(s, a, fn, NULL); 529 } 530 531 #define DO_1OP_VEC(INSN, FN, VECFN) \ 532 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 533 { \ 534 static MVEGenOneOpFn * const fns[] = { \ 535 gen_helper_mve_##FN##b, \ 536 gen_helper_mve_##FN##h, \ 537 gen_helper_mve_##FN##w, \ 538 NULL, \ 539 }; \ 540 return do_1op_vec(s, a, fns[a->size], VECFN); \ 541 } 542 543 #define DO_1OP(INSN, FN) DO_1OP_VEC(INSN, FN, NULL) 544 545 DO_1OP(VCLZ, vclz) 546 DO_1OP(VCLS, vcls) 547 DO_1OP_VEC(VABS, vabs, tcg_gen_gvec_abs) 548 DO_1OP_VEC(VNEG, vneg, tcg_gen_gvec_neg) 549 DO_1OP(VQABS, vqabs) 550 DO_1OP(VQNEG, vqneg) 551 DO_1OP(VMAXA, vmaxa) 552 DO_1OP(VMINA, vmina) 553 554 /* 555 * For simple float/int conversions we use the fixed-point 556 * conversion helpers with a zero shift count 557 */ 558 #define DO_VCVT(INSN, HFN, SFN) \ 559 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 560 { \ 561 gen_helper_mve_##HFN(env, qd, qm, tcg_constant_i32(0)); \ 562 } \ 563 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 564 { \ 565 gen_helper_mve_##SFN(env, qd, qm, tcg_constant_i32(0)); \ 566 } \ 567 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 568 { \ 569 static MVEGenOneOpFn * const fns[] = { \ 570 NULL, \ 571 gen_##INSN##h, \ 572 gen_##INSN##s, \ 573 NULL, \ 574 }; \ 575 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 576 return false; \ 577 } \ 578 return do_1op(s, a, fns[a->size]); \ 579 } 580 581 DO_VCVT(VCVT_SF, vcvt_sh, vcvt_sf) 582 DO_VCVT(VCVT_UF, vcvt_uh, vcvt_uf) 583 DO_VCVT(VCVT_FS, vcvt_hs, vcvt_fs) 584 DO_VCVT(VCVT_FU, vcvt_hu, vcvt_fu) 585 586 static bool do_vcvt_rmode(DisasContext *s, arg_1op *a, 587 ARMFPRounding rmode, bool u) 588 { 589 /* 590 * Handle VCVT fp to int with specified rounding mode. 591 * This is a 1op fn but we must pass the rounding mode as 592 * an immediate to the helper. 593 */ 594 TCGv_ptr qd, qm; 595 static MVEGenVCVTRmodeFn * const fns[4][2] = { 596 { NULL, NULL }, 597 { gen_helper_mve_vcvt_rm_sh, gen_helper_mve_vcvt_rm_uh }, 598 { gen_helper_mve_vcvt_rm_ss, gen_helper_mve_vcvt_rm_us }, 599 { NULL, NULL }, 600 }; 601 MVEGenVCVTRmodeFn *fn = fns[a->size][u]; 602 603 if (!dc_isar_feature(aa32_mve_fp, s) || 604 !mve_check_qreg_bank(s, a->qd | a->qm) || 605 !fn) { 606 return false; 607 } 608 609 if (!mve_eci_check(s) || !vfp_access_check(s)) { 610 return true; 611 } 612 613 qd = mve_qreg_ptr(a->qd); 614 qm = mve_qreg_ptr(a->qm); 615 fn(cpu_env, qd, qm, tcg_constant_i32(arm_rmode_to_sf(rmode))); 616 mve_update_eci(s); 617 return true; 618 } 619 620 #define DO_VCVT_RMODE(INSN, RMODE, U) \ 621 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 622 { \ 623 return do_vcvt_rmode(s, a, RMODE, U); \ 624 } \ 625 626 DO_VCVT_RMODE(VCVTAS, FPROUNDING_TIEAWAY, false) 627 DO_VCVT_RMODE(VCVTAU, FPROUNDING_TIEAWAY, true) 628 DO_VCVT_RMODE(VCVTNS, FPROUNDING_TIEEVEN, false) 629 DO_VCVT_RMODE(VCVTNU, FPROUNDING_TIEEVEN, true) 630 DO_VCVT_RMODE(VCVTPS, FPROUNDING_POSINF, false) 631 DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) 632 DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) 633 DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) 634 635 #define DO_VCVT_SH(INSN, FN) \ 636 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 637 { \ 638 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 639 return false; \ 640 } \ 641 return do_1op(s, a, gen_helper_mve_##FN); \ 642 } \ 643 644 DO_VCVT_SH(VCVTB_SH, vcvtb_sh) 645 DO_VCVT_SH(VCVTT_SH, vcvtt_sh) 646 DO_VCVT_SH(VCVTB_HS, vcvtb_hs) 647 DO_VCVT_SH(VCVTT_HS, vcvtt_hs) 648 649 #define DO_VRINT(INSN, RMODE) \ 650 static void gen_##INSN##h(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 651 { \ 652 gen_helper_mve_vrint_rm_h(env, qd, qm, \ 653 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 654 } \ 655 static void gen_##INSN##s(TCGv_ptr env, TCGv_ptr qd, TCGv_ptr qm) \ 656 { \ 657 gen_helper_mve_vrint_rm_s(env, qd, qm, \ 658 tcg_constant_i32(arm_rmode_to_sf(RMODE))); \ 659 } \ 660 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 661 { \ 662 static MVEGenOneOpFn * const fns[] = { \ 663 NULL, \ 664 gen_##INSN##h, \ 665 gen_##INSN##s, \ 666 NULL, \ 667 }; \ 668 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 669 return false; \ 670 } \ 671 return do_1op(s, a, fns[a->size]); \ 672 } 673 674 DO_VRINT(VRINTN, FPROUNDING_TIEEVEN) 675 DO_VRINT(VRINTA, FPROUNDING_TIEAWAY) 676 DO_VRINT(VRINTZ, FPROUNDING_ZERO) 677 DO_VRINT(VRINTM, FPROUNDING_NEGINF) 678 DO_VRINT(VRINTP, FPROUNDING_POSINF) 679 680 static bool trans_VRINTX(DisasContext *s, arg_1op *a) 681 { 682 static MVEGenOneOpFn * const fns[] = { 683 NULL, 684 gen_helper_mve_vrintx_h, 685 gen_helper_mve_vrintx_s, 686 NULL, 687 }; 688 if (!dc_isar_feature(aa32_mve_fp, s)) { 689 return false; 690 } 691 return do_1op(s, a, fns[a->size]); 692 } 693 694 /* Narrowing moves: only size 0 and 1 are valid */ 695 #define DO_VMOVN(INSN, FN) \ 696 static bool trans_##INSN(DisasContext *s, arg_1op *a) \ 697 { \ 698 static MVEGenOneOpFn * const fns[] = { \ 699 gen_helper_mve_##FN##b, \ 700 gen_helper_mve_##FN##h, \ 701 NULL, \ 702 NULL, \ 703 }; \ 704 return do_1op(s, a, fns[a->size]); \ 705 } 706 707 DO_VMOVN(VMOVNB, vmovnb) 708 DO_VMOVN(VMOVNT, vmovnt) 709 DO_VMOVN(VQMOVUNB, vqmovunb) 710 DO_VMOVN(VQMOVUNT, vqmovunt) 711 DO_VMOVN(VQMOVN_BS, vqmovnbs) 712 DO_VMOVN(VQMOVN_TS, vqmovnts) 713 DO_VMOVN(VQMOVN_BU, vqmovnbu) 714 DO_VMOVN(VQMOVN_TU, vqmovntu) 715 716 static bool trans_VREV16(DisasContext *s, arg_1op *a) 717 { 718 static MVEGenOneOpFn * const fns[] = { 719 gen_helper_mve_vrev16b, 720 NULL, 721 NULL, 722 NULL, 723 }; 724 return do_1op(s, a, fns[a->size]); 725 } 726 727 static bool trans_VREV32(DisasContext *s, arg_1op *a) 728 { 729 static MVEGenOneOpFn * const fns[] = { 730 gen_helper_mve_vrev32b, 731 gen_helper_mve_vrev32h, 732 NULL, 733 NULL, 734 }; 735 return do_1op(s, a, fns[a->size]); 736 } 737 738 static bool trans_VREV64(DisasContext *s, arg_1op *a) 739 { 740 static MVEGenOneOpFn * const fns[] = { 741 gen_helper_mve_vrev64b, 742 gen_helper_mve_vrev64h, 743 gen_helper_mve_vrev64w, 744 NULL, 745 }; 746 return do_1op(s, a, fns[a->size]); 747 } 748 749 static bool trans_VMVN(DisasContext *s, arg_1op *a) 750 { 751 return do_1op_vec(s, a, gen_helper_mve_vmvn, tcg_gen_gvec_not); 752 } 753 754 static bool trans_VABS_fp(DisasContext *s, arg_1op *a) 755 { 756 static MVEGenOneOpFn * const fns[] = { 757 NULL, 758 gen_helper_mve_vfabsh, 759 gen_helper_mve_vfabss, 760 NULL, 761 }; 762 if (!dc_isar_feature(aa32_mve_fp, s)) { 763 return false; 764 } 765 return do_1op(s, a, fns[a->size]); 766 } 767 768 static bool trans_VNEG_fp(DisasContext *s, arg_1op *a) 769 { 770 static MVEGenOneOpFn * const fns[] = { 771 NULL, 772 gen_helper_mve_vfnegh, 773 gen_helper_mve_vfnegs, 774 NULL, 775 }; 776 if (!dc_isar_feature(aa32_mve_fp, s)) { 777 return false; 778 } 779 return do_1op(s, a, fns[a->size]); 780 } 781 782 static bool do_2op_vec(DisasContext *s, arg_2op *a, MVEGenTwoOpFn fn, 783 GVecGen3Fn *vecfn) 784 { 785 TCGv_ptr qd, qn, qm; 786 787 if (!dc_isar_feature(aa32_mve, s) || 788 !mve_check_qreg_bank(s, a->qd | a->qn | a->qm) || 789 !fn) { 790 return false; 791 } 792 if (!mve_eci_check(s) || !vfp_access_check(s)) { 793 return true; 794 } 795 796 if (vecfn && mve_no_predication(s)) { 797 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qn), 798 mve_qreg_offset(a->qm), 16, 16); 799 } else { 800 qd = mve_qreg_ptr(a->qd); 801 qn = mve_qreg_ptr(a->qn); 802 qm = mve_qreg_ptr(a->qm); 803 fn(cpu_env, qd, qn, qm); 804 } 805 mve_update_eci(s); 806 return true; 807 } 808 809 static bool do_2op(DisasContext *s, arg_2op *a, MVEGenTwoOpFn *fn) 810 { 811 return do_2op_vec(s, a, fn, NULL); 812 } 813 814 #define DO_LOGIC(INSN, HELPER, VECFN) \ 815 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 816 { \ 817 return do_2op_vec(s, a, HELPER, VECFN); \ 818 } 819 820 DO_LOGIC(VAND, gen_helper_mve_vand, tcg_gen_gvec_and) 821 DO_LOGIC(VBIC, gen_helper_mve_vbic, tcg_gen_gvec_andc) 822 DO_LOGIC(VORR, gen_helper_mve_vorr, tcg_gen_gvec_or) 823 DO_LOGIC(VORN, gen_helper_mve_vorn, tcg_gen_gvec_orc) 824 DO_LOGIC(VEOR, gen_helper_mve_veor, tcg_gen_gvec_xor) 825 826 static bool trans_VPSEL(DisasContext *s, arg_2op *a) 827 { 828 /* This insn updates predication bits */ 829 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 830 return do_2op(s, a, gen_helper_mve_vpsel); 831 } 832 833 #define DO_2OP_VEC(INSN, FN, VECFN) \ 834 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 835 { \ 836 static MVEGenTwoOpFn * const fns[] = { \ 837 gen_helper_mve_##FN##b, \ 838 gen_helper_mve_##FN##h, \ 839 gen_helper_mve_##FN##w, \ 840 NULL, \ 841 }; \ 842 return do_2op_vec(s, a, fns[a->size], VECFN); \ 843 } 844 845 #define DO_2OP(INSN, FN) DO_2OP_VEC(INSN, FN, NULL) 846 847 DO_2OP_VEC(VADD, vadd, tcg_gen_gvec_add) 848 DO_2OP_VEC(VSUB, vsub, tcg_gen_gvec_sub) 849 DO_2OP_VEC(VMUL, vmul, tcg_gen_gvec_mul) 850 DO_2OP(VMULH_S, vmulhs) 851 DO_2OP(VMULH_U, vmulhu) 852 DO_2OP(VRMULH_S, vrmulhs) 853 DO_2OP(VRMULH_U, vrmulhu) 854 DO_2OP_VEC(VMAX_S, vmaxs, tcg_gen_gvec_smax) 855 DO_2OP_VEC(VMAX_U, vmaxu, tcg_gen_gvec_umax) 856 DO_2OP_VEC(VMIN_S, vmins, tcg_gen_gvec_smin) 857 DO_2OP_VEC(VMIN_U, vminu, tcg_gen_gvec_umin) 858 DO_2OP(VABD_S, vabds) 859 DO_2OP(VABD_U, vabdu) 860 DO_2OP(VHADD_S, vhadds) 861 DO_2OP(VHADD_U, vhaddu) 862 DO_2OP(VHSUB_S, vhsubs) 863 DO_2OP(VHSUB_U, vhsubu) 864 DO_2OP(VMULL_BS, vmullbs) 865 DO_2OP(VMULL_BU, vmullbu) 866 DO_2OP(VMULL_TS, vmullts) 867 DO_2OP(VMULL_TU, vmulltu) 868 DO_2OP(VQDMULH, vqdmulh) 869 DO_2OP(VQRDMULH, vqrdmulh) 870 DO_2OP(VQADD_S, vqadds) 871 DO_2OP(VQADD_U, vqaddu) 872 DO_2OP(VQSUB_S, vqsubs) 873 DO_2OP(VQSUB_U, vqsubu) 874 DO_2OP(VSHL_S, vshls) 875 DO_2OP(VSHL_U, vshlu) 876 DO_2OP(VRSHL_S, vrshls) 877 DO_2OP(VRSHL_U, vrshlu) 878 DO_2OP(VQSHL_S, vqshls) 879 DO_2OP(VQSHL_U, vqshlu) 880 DO_2OP(VQRSHL_S, vqrshls) 881 DO_2OP(VQRSHL_U, vqrshlu) 882 DO_2OP(VQDMLADH, vqdmladh) 883 DO_2OP(VQDMLADHX, vqdmladhx) 884 DO_2OP(VQRDMLADH, vqrdmladh) 885 DO_2OP(VQRDMLADHX, vqrdmladhx) 886 DO_2OP(VQDMLSDH, vqdmlsdh) 887 DO_2OP(VQDMLSDHX, vqdmlsdhx) 888 DO_2OP(VQRDMLSDH, vqrdmlsdh) 889 DO_2OP(VQRDMLSDHX, vqrdmlsdhx) 890 DO_2OP(VRHADD_S, vrhadds) 891 DO_2OP(VRHADD_U, vrhaddu) 892 /* 893 * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose 894 * so we can reuse the DO_2OP macro. (Our implementation calculates the 895 * "expected" results in this case.) Similarly for VHCADD. 896 */ 897 DO_2OP(VCADD90, vcadd90) 898 DO_2OP(VCADD270, vcadd270) 899 DO_2OP(VHCADD90, vhcadd90) 900 DO_2OP(VHCADD270, vhcadd270) 901 902 static bool trans_VQDMULLB(DisasContext *s, arg_2op *a) 903 { 904 static MVEGenTwoOpFn * const fns[] = { 905 NULL, 906 gen_helper_mve_vqdmullbh, 907 gen_helper_mve_vqdmullbw, 908 NULL, 909 }; 910 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 911 /* UNPREDICTABLE; we choose to undef */ 912 return false; 913 } 914 return do_2op(s, a, fns[a->size]); 915 } 916 917 static bool trans_VQDMULLT(DisasContext *s, arg_2op *a) 918 { 919 static MVEGenTwoOpFn * const fns[] = { 920 NULL, 921 gen_helper_mve_vqdmullth, 922 gen_helper_mve_vqdmulltw, 923 NULL, 924 }; 925 if (a->size == MO_32 && (a->qd == a->qm || a->qd == a->qn)) { 926 /* UNPREDICTABLE; we choose to undef */ 927 return false; 928 } 929 return do_2op(s, a, fns[a->size]); 930 } 931 932 static bool trans_VMULLP_B(DisasContext *s, arg_2op *a) 933 { 934 /* 935 * Note that a->size indicates the output size, ie VMULL.P8 936 * is the 8x8->16 operation and a->size is MO_16; VMULL.P16 937 * is the 16x16->32 operation and a->size is MO_32. 938 */ 939 static MVEGenTwoOpFn * const fns[] = { 940 NULL, 941 gen_helper_mve_vmullpbh, 942 gen_helper_mve_vmullpbw, 943 NULL, 944 }; 945 return do_2op(s, a, fns[a->size]); 946 } 947 948 static bool trans_VMULLP_T(DisasContext *s, arg_2op *a) 949 { 950 /* a->size is as for trans_VMULLP_B */ 951 static MVEGenTwoOpFn * const fns[] = { 952 NULL, 953 gen_helper_mve_vmullpth, 954 gen_helper_mve_vmullptw, 955 NULL, 956 }; 957 return do_2op(s, a, fns[a->size]); 958 } 959 960 /* 961 * VADC and VSBC: these perform an add-with-carry or subtract-with-carry 962 * of the 32-bit elements in each lane of the input vectors, where the 963 * carry-out of each add is the carry-in of the next. The initial carry 964 * input is either fixed (0 for VADCI, 1 for VSBCI) or is from FPSCR.C 965 * (for VADC and VSBC); the carry out at the end is written back to FPSCR.C. 966 * These insns are subject to beat-wise execution. Partial execution 967 * of an I=1 (initial carry input fixed) insn which does not 968 * execute the first beat must start with the current FPSCR.NZCV 969 * value, not the fixed constant input. 970 */ 971 static bool trans_VADC(DisasContext *s, arg_2op *a) 972 { 973 return do_2op(s, a, gen_helper_mve_vadc); 974 } 975 976 static bool trans_VADCI(DisasContext *s, arg_2op *a) 977 { 978 if (mve_skip_first_beat(s)) { 979 return trans_VADC(s, a); 980 } 981 return do_2op(s, a, gen_helper_mve_vadci); 982 } 983 984 static bool trans_VSBC(DisasContext *s, arg_2op *a) 985 { 986 return do_2op(s, a, gen_helper_mve_vsbc); 987 } 988 989 static bool trans_VSBCI(DisasContext *s, arg_2op *a) 990 { 991 if (mve_skip_first_beat(s)) { 992 return trans_VSBC(s, a); 993 } 994 return do_2op(s, a, gen_helper_mve_vsbci); 995 } 996 997 #define DO_2OP_FP(INSN, FN) \ 998 static bool trans_##INSN(DisasContext *s, arg_2op *a) \ 999 { \ 1000 static MVEGenTwoOpFn * const fns[] = { \ 1001 NULL, \ 1002 gen_helper_mve_##FN##h, \ 1003 gen_helper_mve_##FN##s, \ 1004 NULL, \ 1005 }; \ 1006 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1007 return false; \ 1008 } \ 1009 return do_2op(s, a, fns[a->size]); \ 1010 } 1011 1012 DO_2OP_FP(VADD_fp, vfadd) 1013 DO_2OP_FP(VSUB_fp, vfsub) 1014 DO_2OP_FP(VMUL_fp, vfmul) 1015 DO_2OP_FP(VABD_fp, vfabd) 1016 DO_2OP_FP(VMAXNM, vmaxnm) 1017 DO_2OP_FP(VMINNM, vminnm) 1018 DO_2OP_FP(VCADD90_fp, vfcadd90) 1019 DO_2OP_FP(VCADD270_fp, vfcadd270) 1020 DO_2OP_FP(VFMA, vfma) 1021 DO_2OP_FP(VFMS, vfms) 1022 DO_2OP_FP(VCMUL0, vcmul0) 1023 DO_2OP_FP(VCMUL90, vcmul90) 1024 DO_2OP_FP(VCMUL180, vcmul180) 1025 DO_2OP_FP(VCMUL270, vcmul270) 1026 DO_2OP_FP(VCMLA0, vcmla0) 1027 DO_2OP_FP(VCMLA90, vcmla90) 1028 DO_2OP_FP(VCMLA180, vcmla180) 1029 DO_2OP_FP(VCMLA270, vcmla270) 1030 DO_2OP_FP(VMAXNMA, vmaxnma) 1031 DO_2OP_FP(VMINNMA, vminnma) 1032 1033 static bool do_2op_scalar(DisasContext *s, arg_2scalar *a, 1034 MVEGenTwoOpScalarFn fn) 1035 { 1036 TCGv_ptr qd, qn; 1037 TCGv_i32 rm; 1038 1039 if (!dc_isar_feature(aa32_mve, s) || 1040 !mve_check_qreg_bank(s, a->qd | a->qn) || 1041 !fn) { 1042 return false; 1043 } 1044 if (a->rm == 13 || a->rm == 15) { 1045 /* UNPREDICTABLE */ 1046 return false; 1047 } 1048 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1049 return true; 1050 } 1051 1052 qd = mve_qreg_ptr(a->qd); 1053 qn = mve_qreg_ptr(a->qn); 1054 rm = load_reg(s, a->rm); 1055 fn(cpu_env, qd, qn, rm); 1056 mve_update_eci(s); 1057 return true; 1058 } 1059 1060 #define DO_2OP_SCALAR(INSN, FN) \ 1061 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1062 { \ 1063 static MVEGenTwoOpScalarFn * const fns[] = { \ 1064 gen_helper_mve_##FN##b, \ 1065 gen_helper_mve_##FN##h, \ 1066 gen_helper_mve_##FN##w, \ 1067 NULL, \ 1068 }; \ 1069 return do_2op_scalar(s, a, fns[a->size]); \ 1070 } 1071 1072 DO_2OP_SCALAR(VADD_scalar, vadd_scalar) 1073 DO_2OP_SCALAR(VSUB_scalar, vsub_scalar) 1074 DO_2OP_SCALAR(VMUL_scalar, vmul_scalar) 1075 DO_2OP_SCALAR(VHADD_S_scalar, vhadds_scalar) 1076 DO_2OP_SCALAR(VHADD_U_scalar, vhaddu_scalar) 1077 DO_2OP_SCALAR(VHSUB_S_scalar, vhsubs_scalar) 1078 DO_2OP_SCALAR(VHSUB_U_scalar, vhsubu_scalar) 1079 DO_2OP_SCALAR(VQADD_S_scalar, vqadds_scalar) 1080 DO_2OP_SCALAR(VQADD_U_scalar, vqaddu_scalar) 1081 DO_2OP_SCALAR(VQSUB_S_scalar, vqsubs_scalar) 1082 DO_2OP_SCALAR(VQSUB_U_scalar, vqsubu_scalar) 1083 DO_2OP_SCALAR(VQDMULH_scalar, vqdmulh_scalar) 1084 DO_2OP_SCALAR(VQRDMULH_scalar, vqrdmulh_scalar) 1085 DO_2OP_SCALAR(VBRSR, vbrsr) 1086 DO_2OP_SCALAR(VMLA, vmla) 1087 DO_2OP_SCALAR(VMLAS, vmlas) 1088 DO_2OP_SCALAR(VQDMLAH, vqdmlah) 1089 DO_2OP_SCALAR(VQRDMLAH, vqrdmlah) 1090 DO_2OP_SCALAR(VQDMLASH, vqdmlash) 1091 DO_2OP_SCALAR(VQRDMLASH, vqrdmlash) 1092 1093 static bool trans_VQDMULLB_scalar(DisasContext *s, arg_2scalar *a) 1094 { 1095 static MVEGenTwoOpScalarFn * const fns[] = { 1096 NULL, 1097 gen_helper_mve_vqdmullb_scalarh, 1098 gen_helper_mve_vqdmullb_scalarw, 1099 NULL, 1100 }; 1101 if (a->qd == a->qn && a->size == MO_32) { 1102 /* UNPREDICTABLE; we choose to undef */ 1103 return false; 1104 } 1105 return do_2op_scalar(s, a, fns[a->size]); 1106 } 1107 1108 static bool trans_VQDMULLT_scalar(DisasContext *s, arg_2scalar *a) 1109 { 1110 static MVEGenTwoOpScalarFn * const fns[] = { 1111 NULL, 1112 gen_helper_mve_vqdmullt_scalarh, 1113 gen_helper_mve_vqdmullt_scalarw, 1114 NULL, 1115 }; 1116 if (a->qd == a->qn && a->size == MO_32) { 1117 /* UNPREDICTABLE; we choose to undef */ 1118 return false; 1119 } 1120 return do_2op_scalar(s, a, fns[a->size]); 1121 } 1122 1123 1124 #define DO_2OP_FP_SCALAR(INSN, FN) \ 1125 static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \ 1126 { \ 1127 static MVEGenTwoOpScalarFn * const fns[] = { \ 1128 NULL, \ 1129 gen_helper_mve_##FN##h, \ 1130 gen_helper_mve_##FN##s, \ 1131 NULL, \ 1132 }; \ 1133 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1134 return false; \ 1135 } \ 1136 return do_2op_scalar(s, a, fns[a->size]); \ 1137 } 1138 1139 DO_2OP_FP_SCALAR(VADD_fp_scalar, vfadd_scalar) 1140 DO_2OP_FP_SCALAR(VSUB_fp_scalar, vfsub_scalar) 1141 DO_2OP_FP_SCALAR(VMUL_fp_scalar, vfmul_scalar) 1142 DO_2OP_FP_SCALAR(VFMA_scalar, vfma_scalar) 1143 DO_2OP_FP_SCALAR(VFMAS_scalar, vfmas_scalar) 1144 1145 static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a, 1146 MVEGenLongDualAccOpFn *fn) 1147 { 1148 TCGv_ptr qn, qm; 1149 TCGv_i64 rda_i, rda_o; 1150 TCGv_i32 rdalo, rdahi; 1151 1152 if (!dc_isar_feature(aa32_mve, s) || 1153 !mve_check_qreg_bank(s, a->qn | a->qm) || 1154 !fn) { 1155 return false; 1156 } 1157 /* 1158 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1159 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1160 */ 1161 if (a->rdahi == 13 || a->rdahi == 15) { 1162 return false; 1163 } 1164 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1165 return true; 1166 } 1167 1168 qn = mve_qreg_ptr(a->qn); 1169 qm = mve_qreg_ptr(a->qm); 1170 1171 /* 1172 * This insn is subject to beat-wise execution. Partial execution 1173 * of an A=0 (no-accumulate) insn which does not execute the first 1174 * beat must start with the current rda value, not 0. 1175 */ 1176 rda_o = tcg_temp_new_i64(); 1177 if (a->a || mve_skip_first_beat(s)) { 1178 rda_i = rda_o; 1179 rdalo = load_reg(s, a->rdalo); 1180 rdahi = load_reg(s, a->rdahi); 1181 tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi); 1182 } else { 1183 rda_i = tcg_constant_i64(0); 1184 } 1185 1186 fn(rda_o, cpu_env, qn, qm, rda_i); 1187 1188 rdalo = tcg_temp_new_i32(); 1189 rdahi = tcg_temp_new_i32(); 1190 tcg_gen_extrl_i64_i32(rdalo, rda_o); 1191 tcg_gen_extrh_i64_i32(rdahi, rda_o); 1192 store_reg(s, a->rdalo, rdalo); 1193 store_reg(s, a->rdahi, rdahi); 1194 mve_update_eci(s); 1195 return true; 1196 } 1197 1198 static bool trans_VMLALDAV_S(DisasContext *s, arg_vmlaldav *a) 1199 { 1200 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1201 { NULL, NULL }, 1202 { gen_helper_mve_vmlaldavsh, gen_helper_mve_vmlaldavxsh }, 1203 { gen_helper_mve_vmlaldavsw, gen_helper_mve_vmlaldavxsw }, 1204 { NULL, NULL }, 1205 }; 1206 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1207 } 1208 1209 static bool trans_VMLALDAV_U(DisasContext *s, arg_vmlaldav *a) 1210 { 1211 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1212 { NULL, NULL }, 1213 { gen_helper_mve_vmlaldavuh, NULL }, 1214 { gen_helper_mve_vmlaldavuw, NULL }, 1215 { NULL, NULL }, 1216 }; 1217 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1218 } 1219 1220 static bool trans_VMLSLDAV(DisasContext *s, arg_vmlaldav *a) 1221 { 1222 static MVEGenLongDualAccOpFn * const fns[4][2] = { 1223 { NULL, NULL }, 1224 { gen_helper_mve_vmlsldavsh, gen_helper_mve_vmlsldavxsh }, 1225 { gen_helper_mve_vmlsldavsw, gen_helper_mve_vmlsldavxsw }, 1226 { NULL, NULL }, 1227 }; 1228 return do_long_dual_acc(s, a, fns[a->size][a->x]); 1229 } 1230 1231 static bool trans_VRMLALDAVH_S(DisasContext *s, arg_vmlaldav *a) 1232 { 1233 static MVEGenLongDualAccOpFn * const fns[] = { 1234 gen_helper_mve_vrmlaldavhsw, gen_helper_mve_vrmlaldavhxsw, 1235 }; 1236 return do_long_dual_acc(s, a, fns[a->x]); 1237 } 1238 1239 static bool trans_VRMLALDAVH_U(DisasContext *s, arg_vmlaldav *a) 1240 { 1241 static MVEGenLongDualAccOpFn * const fns[] = { 1242 gen_helper_mve_vrmlaldavhuw, NULL, 1243 }; 1244 return do_long_dual_acc(s, a, fns[a->x]); 1245 } 1246 1247 static bool trans_VRMLSLDAVH(DisasContext *s, arg_vmlaldav *a) 1248 { 1249 static MVEGenLongDualAccOpFn * const fns[] = { 1250 gen_helper_mve_vrmlsldavhsw, gen_helper_mve_vrmlsldavhxsw, 1251 }; 1252 return do_long_dual_acc(s, a, fns[a->x]); 1253 } 1254 1255 static bool do_dual_acc(DisasContext *s, arg_vmladav *a, MVEGenDualAccOpFn *fn) 1256 { 1257 TCGv_ptr qn, qm; 1258 TCGv_i32 rda_i, rda_o; 1259 1260 if (!dc_isar_feature(aa32_mve, s) || 1261 !mve_check_qreg_bank(s, a->qn) || 1262 !fn) { 1263 return false; 1264 } 1265 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1266 return true; 1267 } 1268 1269 qn = mve_qreg_ptr(a->qn); 1270 qm = mve_qreg_ptr(a->qm); 1271 1272 /* 1273 * This insn is subject to beat-wise execution. Partial execution 1274 * of an A=0 (no-accumulate) insn which does not execute the first 1275 * beat must start with the current rda value, not 0. 1276 */ 1277 if (a->a || mve_skip_first_beat(s)) { 1278 rda_o = rda_i = load_reg(s, a->rda); 1279 } else { 1280 rda_i = tcg_constant_i32(0); 1281 rda_o = tcg_temp_new_i32(); 1282 } 1283 1284 fn(rda_o, cpu_env, qn, qm, rda_i); 1285 store_reg(s, a->rda, rda_o); 1286 1287 mve_update_eci(s); 1288 return true; 1289 } 1290 1291 #define DO_DUAL_ACC(INSN, FN) \ 1292 static bool trans_##INSN(DisasContext *s, arg_vmladav *a) \ 1293 { \ 1294 static MVEGenDualAccOpFn * const fns[4][2] = { \ 1295 { gen_helper_mve_##FN##b, gen_helper_mve_##FN##xb }, \ 1296 { gen_helper_mve_##FN##h, gen_helper_mve_##FN##xh }, \ 1297 { gen_helper_mve_##FN##w, gen_helper_mve_##FN##xw }, \ 1298 { NULL, NULL }, \ 1299 }; \ 1300 return do_dual_acc(s, a, fns[a->size][a->x]); \ 1301 } 1302 1303 DO_DUAL_ACC(VMLADAV_S, vmladavs) 1304 DO_DUAL_ACC(VMLSDAV, vmlsdav) 1305 1306 static bool trans_VMLADAV_U(DisasContext *s, arg_vmladav *a) 1307 { 1308 static MVEGenDualAccOpFn * const fns[4][2] = { 1309 { gen_helper_mve_vmladavub, NULL }, 1310 { gen_helper_mve_vmladavuh, NULL }, 1311 { gen_helper_mve_vmladavuw, NULL }, 1312 { NULL, NULL }, 1313 }; 1314 return do_dual_acc(s, a, fns[a->size][a->x]); 1315 } 1316 1317 static void gen_vpst(DisasContext *s, uint32_t mask) 1318 { 1319 /* 1320 * Set the VPR mask fields. We take advantage of MASK01 and MASK23 1321 * being adjacent fields in the register. 1322 * 1323 * Updating the masks is not predicated, but it is subject to beat-wise 1324 * execution, and the mask is updated on the odd-numbered beats. 1325 * So if PSR.ECI says we should skip beat 1, we mustn't update the 1326 * 01 mask field. 1327 */ 1328 TCGv_i32 vpr = load_cpu_field(v7m.vpr); 1329 switch (s->eci) { 1330 case ECI_NONE: 1331 case ECI_A0: 1332 /* Update both 01 and 23 fields */ 1333 tcg_gen_deposit_i32(vpr, vpr, 1334 tcg_constant_i32(mask | (mask << 4)), 1335 R_V7M_VPR_MASK01_SHIFT, 1336 R_V7M_VPR_MASK01_LENGTH + R_V7M_VPR_MASK23_LENGTH); 1337 break; 1338 case ECI_A0A1: 1339 case ECI_A0A1A2: 1340 case ECI_A0A1A2B0: 1341 /* Update only the 23 mask field */ 1342 tcg_gen_deposit_i32(vpr, vpr, 1343 tcg_constant_i32(mask), 1344 R_V7M_VPR_MASK23_SHIFT, R_V7M_VPR_MASK23_LENGTH); 1345 break; 1346 default: 1347 g_assert_not_reached(); 1348 } 1349 store_cpu_field(vpr, v7m.vpr); 1350 } 1351 1352 static bool trans_VPST(DisasContext *s, arg_VPST *a) 1353 { 1354 /* mask == 0 is a "related encoding" */ 1355 if (!dc_isar_feature(aa32_mve, s) || !a->mask) { 1356 return false; 1357 } 1358 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1359 return true; 1360 } 1361 gen_vpst(s, a->mask); 1362 mve_update_and_store_eci(s); 1363 return true; 1364 } 1365 1366 static bool trans_VPNOT(DisasContext *s, arg_VPNOT *a) 1367 { 1368 /* 1369 * Invert the predicate in VPR.P0. We have call out to 1370 * a helper because this insn itself is beatwise and can 1371 * be predicated. 1372 */ 1373 if (!dc_isar_feature(aa32_mve, s)) { 1374 return false; 1375 } 1376 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1377 return true; 1378 } 1379 1380 gen_helper_mve_vpnot(cpu_env); 1381 /* This insn updates predication bits */ 1382 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1383 mve_update_eci(s); 1384 return true; 1385 } 1386 1387 static bool trans_VADDV(DisasContext *s, arg_VADDV *a) 1388 { 1389 /* VADDV: vector add across vector */ 1390 static MVEGenVADDVFn * const fns[4][2] = { 1391 { gen_helper_mve_vaddvsb, gen_helper_mve_vaddvub }, 1392 { gen_helper_mve_vaddvsh, gen_helper_mve_vaddvuh }, 1393 { gen_helper_mve_vaddvsw, gen_helper_mve_vaddvuw }, 1394 { NULL, NULL } 1395 }; 1396 TCGv_ptr qm; 1397 TCGv_i32 rda_i, rda_o; 1398 1399 if (!dc_isar_feature(aa32_mve, s) || 1400 a->size == 3) { 1401 return false; 1402 } 1403 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1404 return true; 1405 } 1406 1407 /* 1408 * This insn is subject to beat-wise execution. Partial execution 1409 * of an A=0 (no-accumulate) insn which does not execute the first 1410 * beat must start with the current value of Rda, not zero. 1411 */ 1412 if (a->a || mve_skip_first_beat(s)) { 1413 /* Accumulate input from Rda */ 1414 rda_o = rda_i = load_reg(s, a->rda); 1415 } else { 1416 /* Accumulate starting at zero */ 1417 rda_i = tcg_constant_i32(0); 1418 rda_o = tcg_temp_new_i32(); 1419 } 1420 1421 qm = mve_qreg_ptr(a->qm); 1422 fns[a->size][a->u](rda_o, cpu_env, qm, rda_i); 1423 store_reg(s, a->rda, rda_o); 1424 1425 mve_update_eci(s); 1426 return true; 1427 } 1428 1429 static bool trans_VADDLV(DisasContext *s, arg_VADDLV *a) 1430 { 1431 /* 1432 * Vector Add Long Across Vector: accumulate the 32-bit 1433 * elements of the vector into a 64-bit result stored in 1434 * a pair of general-purpose registers. 1435 * No need to check Qm's bank: it is only 3 bits in decode. 1436 */ 1437 TCGv_ptr qm; 1438 TCGv_i64 rda_i, rda_o; 1439 TCGv_i32 rdalo, rdahi; 1440 1441 if (!dc_isar_feature(aa32_mve, s)) { 1442 return false; 1443 } 1444 /* 1445 * rdahi == 13 is UNPREDICTABLE; rdahi == 15 is a related 1446 * encoding; rdalo always has bit 0 clear so cannot be 13 or 15. 1447 */ 1448 if (a->rdahi == 13 || a->rdahi == 15) { 1449 return false; 1450 } 1451 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1452 return true; 1453 } 1454 1455 /* 1456 * This insn is subject to beat-wise execution. Partial execution 1457 * of an A=0 (no-accumulate) insn which does not execute the first 1458 * beat must start with the current value of RdaHi:RdaLo, not zero. 1459 */ 1460 rda_o = tcg_temp_new_i64(); 1461 if (a->a || mve_skip_first_beat(s)) { 1462 /* Accumulate input from RdaHi:RdaLo */ 1463 rda_i = rda_o; 1464 rdalo = load_reg(s, a->rdalo); 1465 rdahi = load_reg(s, a->rdahi); 1466 tcg_gen_concat_i32_i64(rda_i, rdalo, rdahi); 1467 } else { 1468 /* Accumulate starting at zero */ 1469 rda_i = tcg_constant_i64(0); 1470 } 1471 1472 qm = mve_qreg_ptr(a->qm); 1473 if (a->u) { 1474 gen_helper_mve_vaddlv_u(rda_o, cpu_env, qm, rda_i); 1475 } else { 1476 gen_helper_mve_vaddlv_s(rda_o, cpu_env, qm, rda_i); 1477 } 1478 1479 rdalo = tcg_temp_new_i32(); 1480 rdahi = tcg_temp_new_i32(); 1481 tcg_gen_extrl_i64_i32(rdalo, rda_o); 1482 tcg_gen_extrh_i64_i32(rdahi, rda_o); 1483 store_reg(s, a->rdalo, rdalo); 1484 store_reg(s, a->rdahi, rdahi); 1485 mve_update_eci(s); 1486 return true; 1487 } 1488 1489 static bool do_1imm(DisasContext *s, arg_1imm *a, MVEGenOneOpImmFn *fn, 1490 GVecGen2iFn *vecfn) 1491 { 1492 TCGv_ptr qd; 1493 uint64_t imm; 1494 1495 if (!dc_isar_feature(aa32_mve, s) || 1496 !mve_check_qreg_bank(s, a->qd) || 1497 !fn) { 1498 return false; 1499 } 1500 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1501 return true; 1502 } 1503 1504 imm = asimd_imm_const(a->imm, a->cmode, a->op); 1505 1506 if (vecfn && mve_no_predication(s)) { 1507 vecfn(MO_64, mve_qreg_offset(a->qd), mve_qreg_offset(a->qd), 1508 imm, 16, 16); 1509 } else { 1510 qd = mve_qreg_ptr(a->qd); 1511 fn(cpu_env, qd, tcg_constant_i64(imm)); 1512 } 1513 mve_update_eci(s); 1514 return true; 1515 } 1516 1517 static void gen_gvec_vmovi(unsigned vece, uint32_t dofs, uint32_t aofs, 1518 int64_t c, uint32_t oprsz, uint32_t maxsz) 1519 { 1520 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, c); 1521 } 1522 1523 static bool trans_Vimm_1r(DisasContext *s, arg_1imm *a) 1524 { 1525 /* Handle decode of cmode/op here between VORR/VBIC/VMOV */ 1526 MVEGenOneOpImmFn *fn; 1527 GVecGen2iFn *vecfn; 1528 1529 if ((a->cmode & 1) && a->cmode < 12) { 1530 if (a->op) { 1531 /* 1532 * For op=1, the immediate will be inverted by asimd_imm_const(), 1533 * so the VBIC becomes a logical AND operation. 1534 */ 1535 fn = gen_helper_mve_vandi; 1536 vecfn = tcg_gen_gvec_andi; 1537 } else { 1538 fn = gen_helper_mve_vorri; 1539 vecfn = tcg_gen_gvec_ori; 1540 } 1541 } else { 1542 /* There is one unallocated cmode/op combination in this space */ 1543 if (a->cmode == 15 && a->op == 1) { 1544 return false; 1545 } 1546 /* asimd_imm_const() sorts out VMVNI vs VMOVI for us */ 1547 fn = gen_helper_mve_vmovi; 1548 vecfn = gen_gvec_vmovi; 1549 } 1550 return do_1imm(s, a, fn, vecfn); 1551 } 1552 1553 static bool do_2shift_vec(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1554 bool negateshift, GVecGen2iFn vecfn) 1555 { 1556 TCGv_ptr qd, qm; 1557 int shift = a->shift; 1558 1559 if (!dc_isar_feature(aa32_mve, s) || 1560 !mve_check_qreg_bank(s, a->qd | a->qm) || 1561 !fn) { 1562 return false; 1563 } 1564 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1565 return true; 1566 } 1567 1568 /* 1569 * When we handle a right shift insn using a left-shift helper 1570 * which permits a negative shift count to indicate a right-shift, 1571 * we must negate the shift count. 1572 */ 1573 if (negateshift) { 1574 shift = -shift; 1575 } 1576 1577 if (vecfn && mve_no_predication(s)) { 1578 vecfn(a->size, mve_qreg_offset(a->qd), mve_qreg_offset(a->qm), 1579 shift, 16, 16); 1580 } else { 1581 qd = mve_qreg_ptr(a->qd); 1582 qm = mve_qreg_ptr(a->qm); 1583 fn(cpu_env, qd, qm, tcg_constant_i32(shift)); 1584 } 1585 mve_update_eci(s); 1586 return true; 1587 } 1588 1589 static bool do_2shift(DisasContext *s, arg_2shift *a, MVEGenTwoOpShiftFn fn, 1590 bool negateshift) 1591 { 1592 return do_2shift_vec(s, a, fn, negateshift, NULL); 1593 } 1594 1595 #define DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, VECFN) \ 1596 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1597 { \ 1598 static MVEGenTwoOpShiftFn * const fns[] = { \ 1599 gen_helper_mve_##FN##b, \ 1600 gen_helper_mve_##FN##h, \ 1601 gen_helper_mve_##FN##w, \ 1602 NULL, \ 1603 }; \ 1604 return do_2shift_vec(s, a, fns[a->size], NEGATESHIFT, VECFN); \ 1605 } 1606 1607 #define DO_2SHIFT(INSN, FN, NEGATESHIFT) \ 1608 DO_2SHIFT_VEC(INSN, FN, NEGATESHIFT, NULL) 1609 1610 static void do_gvec_shri_s(unsigned vece, uint32_t dofs, uint32_t aofs, 1611 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1612 { 1613 /* 1614 * We get here with a negated shift count, and we must handle 1615 * shifts by the element size, which tcg_gen_gvec_sari() does not do. 1616 */ 1617 shift = -shift; 1618 if (shift == (8 << vece)) { 1619 shift--; 1620 } 1621 tcg_gen_gvec_sari(vece, dofs, aofs, shift, oprsz, maxsz); 1622 } 1623 1624 static void do_gvec_shri_u(unsigned vece, uint32_t dofs, uint32_t aofs, 1625 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1626 { 1627 /* 1628 * We get here with a negated shift count, and we must handle 1629 * shifts by the element size, which tcg_gen_gvec_shri() does not do. 1630 */ 1631 shift = -shift; 1632 if (shift == (8 << vece)) { 1633 tcg_gen_gvec_dup_imm(vece, dofs, oprsz, maxsz, 0); 1634 } else { 1635 tcg_gen_gvec_shri(vece, dofs, aofs, shift, oprsz, maxsz); 1636 } 1637 } 1638 1639 DO_2SHIFT_VEC(VSHLI, vshli_u, false, tcg_gen_gvec_shli) 1640 DO_2SHIFT(VQSHLI_S, vqshli_s, false) 1641 DO_2SHIFT(VQSHLI_U, vqshli_u, false) 1642 DO_2SHIFT(VQSHLUI, vqshlui_s, false) 1643 /* These right shifts use a left-shift helper with negated shift count */ 1644 DO_2SHIFT_VEC(VSHRI_S, vshli_s, true, do_gvec_shri_s) 1645 DO_2SHIFT_VEC(VSHRI_U, vshli_u, true, do_gvec_shri_u) 1646 DO_2SHIFT(VRSHRI_S, vrshli_s, true) 1647 DO_2SHIFT(VRSHRI_U, vrshli_u, true) 1648 1649 DO_2SHIFT_VEC(VSRI, vsri, false, gen_gvec_sri) 1650 DO_2SHIFT_VEC(VSLI, vsli, false, gen_gvec_sli) 1651 1652 #define DO_2SHIFT_FP(INSN, FN) \ 1653 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1654 { \ 1655 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 1656 return false; \ 1657 } \ 1658 return do_2shift(s, a, gen_helper_mve_##FN, false); \ 1659 } 1660 1661 DO_2SHIFT_FP(VCVT_SH_fixed, vcvt_sh) 1662 DO_2SHIFT_FP(VCVT_UH_fixed, vcvt_uh) 1663 DO_2SHIFT_FP(VCVT_HS_fixed, vcvt_hs) 1664 DO_2SHIFT_FP(VCVT_HU_fixed, vcvt_hu) 1665 DO_2SHIFT_FP(VCVT_SF_fixed, vcvt_sf) 1666 DO_2SHIFT_FP(VCVT_UF_fixed, vcvt_uf) 1667 DO_2SHIFT_FP(VCVT_FS_fixed, vcvt_fs) 1668 DO_2SHIFT_FP(VCVT_FU_fixed, vcvt_fu) 1669 1670 static bool do_2shift_scalar(DisasContext *s, arg_shl_scalar *a, 1671 MVEGenTwoOpShiftFn *fn) 1672 { 1673 TCGv_ptr qda; 1674 TCGv_i32 rm; 1675 1676 if (!dc_isar_feature(aa32_mve, s) || 1677 !mve_check_qreg_bank(s, a->qda) || 1678 a->rm == 13 || a->rm == 15 || !fn) { 1679 /* Rm cases are UNPREDICTABLE */ 1680 return false; 1681 } 1682 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1683 return true; 1684 } 1685 1686 qda = mve_qreg_ptr(a->qda); 1687 rm = load_reg(s, a->rm); 1688 fn(cpu_env, qda, qda, rm); 1689 mve_update_eci(s); 1690 return true; 1691 } 1692 1693 #define DO_2SHIFT_SCALAR(INSN, FN) \ 1694 static bool trans_##INSN(DisasContext *s, arg_shl_scalar *a) \ 1695 { \ 1696 static MVEGenTwoOpShiftFn * const fns[] = { \ 1697 gen_helper_mve_##FN##b, \ 1698 gen_helper_mve_##FN##h, \ 1699 gen_helper_mve_##FN##w, \ 1700 NULL, \ 1701 }; \ 1702 return do_2shift_scalar(s, a, fns[a->size]); \ 1703 } 1704 1705 DO_2SHIFT_SCALAR(VSHL_S_scalar, vshli_s) 1706 DO_2SHIFT_SCALAR(VSHL_U_scalar, vshli_u) 1707 DO_2SHIFT_SCALAR(VRSHL_S_scalar, vrshli_s) 1708 DO_2SHIFT_SCALAR(VRSHL_U_scalar, vrshli_u) 1709 DO_2SHIFT_SCALAR(VQSHL_S_scalar, vqshli_s) 1710 DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) 1711 DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) 1712 DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) 1713 1714 #define DO_VSHLL(INSN, FN) \ 1715 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1716 { \ 1717 static MVEGenTwoOpShiftFn * const fns[] = { \ 1718 gen_helper_mve_##FN##b, \ 1719 gen_helper_mve_##FN##h, \ 1720 }; \ 1721 return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \ 1722 } 1723 1724 /* 1725 * For the VSHLL vector helpers, the vece is the size of the input 1726 * (ie MO_8 or MO_16); the helpers want to work in the output size. 1727 * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.) 1728 */ 1729 static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs, 1730 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1731 { 1732 unsigned ovece = vece + 1; 1733 unsigned ibits = vece == MO_8 ? 8 : 16; 1734 tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz); 1735 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1736 } 1737 1738 static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs, 1739 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1740 { 1741 unsigned ovece = vece + 1; 1742 tcg_gen_gvec_andi(ovece, dofs, aofs, 1743 ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz); 1744 tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz); 1745 } 1746 1747 static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs, 1748 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1749 { 1750 unsigned ovece = vece + 1; 1751 unsigned ibits = vece == MO_8 ? 8 : 16; 1752 if (shift == 0) { 1753 tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz); 1754 } else { 1755 tcg_gen_gvec_andi(ovece, dofs, aofs, 1756 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1757 tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1758 } 1759 } 1760 1761 static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs, 1762 int64_t shift, uint32_t oprsz, uint32_t maxsz) 1763 { 1764 unsigned ovece = vece + 1; 1765 unsigned ibits = vece == MO_8 ? 8 : 16; 1766 if (shift == 0) { 1767 tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz); 1768 } else { 1769 tcg_gen_gvec_andi(ovece, dofs, aofs, 1770 ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); 1771 tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); 1772 } 1773 } 1774 1775 DO_VSHLL(VSHLL_BS, vshllbs) 1776 DO_VSHLL(VSHLL_BU, vshllbu) 1777 DO_VSHLL(VSHLL_TS, vshllts) 1778 DO_VSHLL(VSHLL_TU, vshlltu) 1779 1780 #define DO_2SHIFT_N(INSN, FN) \ 1781 static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ 1782 { \ 1783 static MVEGenTwoOpShiftFn * const fns[] = { \ 1784 gen_helper_mve_##FN##b, \ 1785 gen_helper_mve_##FN##h, \ 1786 }; \ 1787 return do_2shift(s, a, fns[a->size], false); \ 1788 } 1789 1790 DO_2SHIFT_N(VSHRNB, vshrnb) 1791 DO_2SHIFT_N(VSHRNT, vshrnt) 1792 DO_2SHIFT_N(VRSHRNB, vrshrnb) 1793 DO_2SHIFT_N(VRSHRNT, vrshrnt) 1794 DO_2SHIFT_N(VQSHRNB_S, vqshrnb_s) 1795 DO_2SHIFT_N(VQSHRNT_S, vqshrnt_s) 1796 DO_2SHIFT_N(VQSHRNB_U, vqshrnb_u) 1797 DO_2SHIFT_N(VQSHRNT_U, vqshrnt_u) 1798 DO_2SHIFT_N(VQSHRUNB, vqshrunb) 1799 DO_2SHIFT_N(VQSHRUNT, vqshrunt) 1800 DO_2SHIFT_N(VQRSHRNB_S, vqrshrnb_s) 1801 DO_2SHIFT_N(VQRSHRNT_S, vqrshrnt_s) 1802 DO_2SHIFT_N(VQRSHRNB_U, vqrshrnb_u) 1803 DO_2SHIFT_N(VQRSHRNT_U, vqrshrnt_u) 1804 DO_2SHIFT_N(VQRSHRUNB, vqrshrunb) 1805 DO_2SHIFT_N(VQRSHRUNT, vqrshrunt) 1806 1807 static bool trans_VSHLC(DisasContext *s, arg_VSHLC *a) 1808 { 1809 /* 1810 * Whole Vector Left Shift with Carry. The carry is taken 1811 * from a general purpose register and written back there. 1812 * An imm of 0 means "shift by 32". 1813 */ 1814 TCGv_ptr qd; 1815 TCGv_i32 rdm; 1816 1817 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1818 return false; 1819 } 1820 if (a->rdm == 13 || a->rdm == 15) { 1821 /* CONSTRAINED UNPREDICTABLE: we UNDEF */ 1822 return false; 1823 } 1824 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1825 return true; 1826 } 1827 1828 qd = mve_qreg_ptr(a->qd); 1829 rdm = load_reg(s, a->rdm); 1830 gen_helper_mve_vshlc(rdm, cpu_env, qd, rdm, tcg_constant_i32(a->imm)); 1831 store_reg(s, a->rdm, rdm); 1832 mve_update_eci(s); 1833 return true; 1834 } 1835 1836 static bool do_vidup(DisasContext *s, arg_vidup *a, MVEGenVIDUPFn *fn) 1837 { 1838 TCGv_ptr qd; 1839 TCGv_i32 rn; 1840 1841 /* 1842 * Vector increment/decrement with wrap and duplicate (VIDUP, VDDUP). 1843 * This fills the vector with elements of successively increasing 1844 * or decreasing values, starting from Rn. 1845 */ 1846 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1847 return false; 1848 } 1849 if (a->size == MO_64) { 1850 /* size 0b11 is another encoding */ 1851 return false; 1852 } 1853 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1854 return true; 1855 } 1856 1857 qd = mve_qreg_ptr(a->qd); 1858 rn = load_reg(s, a->rn); 1859 fn(rn, cpu_env, qd, rn, tcg_constant_i32(a->imm)); 1860 store_reg(s, a->rn, rn); 1861 mve_update_eci(s); 1862 return true; 1863 } 1864 1865 static bool do_viwdup(DisasContext *s, arg_viwdup *a, MVEGenVIWDUPFn *fn) 1866 { 1867 TCGv_ptr qd; 1868 TCGv_i32 rn, rm; 1869 1870 /* 1871 * Vector increment/decrement with wrap and duplicate (VIWDUp, VDWDUP) 1872 * This fills the vector with elements of successively increasing 1873 * or decreasing values, starting from Rn. Rm specifies a point where 1874 * the count wraps back around to 0. The updated offset is written back 1875 * to Rn. 1876 */ 1877 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd)) { 1878 return false; 1879 } 1880 if (!fn || a->rm == 13 || a->rm == 15) { 1881 /* 1882 * size 0b11 is another encoding; Rm == 13 is UNPREDICTABLE; 1883 * Rm == 13 is VIWDUP, VDWDUP. 1884 */ 1885 return false; 1886 } 1887 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1888 return true; 1889 } 1890 1891 qd = mve_qreg_ptr(a->qd); 1892 rn = load_reg(s, a->rn); 1893 rm = load_reg(s, a->rm); 1894 fn(rn, cpu_env, qd, rn, rm, tcg_constant_i32(a->imm)); 1895 store_reg(s, a->rn, rn); 1896 mve_update_eci(s); 1897 return true; 1898 } 1899 1900 static bool trans_VIDUP(DisasContext *s, arg_vidup *a) 1901 { 1902 static MVEGenVIDUPFn * const fns[] = { 1903 gen_helper_mve_vidupb, 1904 gen_helper_mve_viduph, 1905 gen_helper_mve_vidupw, 1906 NULL, 1907 }; 1908 return do_vidup(s, a, fns[a->size]); 1909 } 1910 1911 static bool trans_VDDUP(DisasContext *s, arg_vidup *a) 1912 { 1913 static MVEGenVIDUPFn * const fns[] = { 1914 gen_helper_mve_vidupb, 1915 gen_helper_mve_viduph, 1916 gen_helper_mve_vidupw, 1917 NULL, 1918 }; 1919 /* VDDUP is just like VIDUP but with a negative immediate */ 1920 a->imm = -a->imm; 1921 return do_vidup(s, a, fns[a->size]); 1922 } 1923 1924 static bool trans_VIWDUP(DisasContext *s, arg_viwdup *a) 1925 { 1926 static MVEGenVIWDUPFn * const fns[] = { 1927 gen_helper_mve_viwdupb, 1928 gen_helper_mve_viwduph, 1929 gen_helper_mve_viwdupw, 1930 NULL, 1931 }; 1932 return do_viwdup(s, a, fns[a->size]); 1933 } 1934 1935 static bool trans_VDWDUP(DisasContext *s, arg_viwdup *a) 1936 { 1937 static MVEGenVIWDUPFn * const fns[] = { 1938 gen_helper_mve_vdwdupb, 1939 gen_helper_mve_vdwduph, 1940 gen_helper_mve_vdwdupw, 1941 NULL, 1942 }; 1943 return do_viwdup(s, a, fns[a->size]); 1944 } 1945 1946 static bool do_vcmp(DisasContext *s, arg_vcmp *a, MVEGenCmpFn *fn) 1947 { 1948 TCGv_ptr qn, qm; 1949 1950 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 1951 !fn) { 1952 return false; 1953 } 1954 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1955 return true; 1956 } 1957 1958 qn = mve_qreg_ptr(a->qn); 1959 qm = mve_qreg_ptr(a->qm); 1960 fn(cpu_env, qn, qm); 1961 if (a->mask) { 1962 /* VPT */ 1963 gen_vpst(s, a->mask); 1964 } 1965 /* This insn updates predication bits */ 1966 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1967 mve_update_eci(s); 1968 return true; 1969 } 1970 1971 static bool do_vcmp_scalar(DisasContext *s, arg_vcmp_scalar *a, 1972 MVEGenScalarCmpFn *fn) 1973 { 1974 TCGv_ptr qn; 1975 TCGv_i32 rm; 1976 1977 if (!dc_isar_feature(aa32_mve, s) || !fn || a->rm == 13) { 1978 return false; 1979 } 1980 if (!mve_eci_check(s) || !vfp_access_check(s)) { 1981 return true; 1982 } 1983 1984 qn = mve_qreg_ptr(a->qn); 1985 if (a->rm == 15) { 1986 /* Encoding Rm=0b1111 means "constant zero" */ 1987 rm = tcg_constant_i32(0); 1988 } else { 1989 rm = load_reg(s, a->rm); 1990 } 1991 fn(cpu_env, qn, rm); 1992 if (a->mask) { 1993 /* VPT */ 1994 gen_vpst(s, a->mask); 1995 } 1996 /* This insn updates predication bits */ 1997 s->base.is_jmp = DISAS_UPDATE_NOCHAIN; 1998 mve_update_eci(s); 1999 return true; 2000 } 2001 2002 #define DO_VCMP(INSN, FN) \ 2003 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2004 { \ 2005 static MVEGenCmpFn * const fns[] = { \ 2006 gen_helper_mve_##FN##b, \ 2007 gen_helper_mve_##FN##h, \ 2008 gen_helper_mve_##FN##w, \ 2009 NULL, \ 2010 }; \ 2011 return do_vcmp(s, a, fns[a->size]); \ 2012 } \ 2013 static bool trans_##INSN##_scalar(DisasContext *s, \ 2014 arg_vcmp_scalar *a) \ 2015 { \ 2016 static MVEGenScalarCmpFn * const fns[] = { \ 2017 gen_helper_mve_##FN##_scalarb, \ 2018 gen_helper_mve_##FN##_scalarh, \ 2019 gen_helper_mve_##FN##_scalarw, \ 2020 NULL, \ 2021 }; \ 2022 return do_vcmp_scalar(s, a, fns[a->size]); \ 2023 } 2024 2025 DO_VCMP(VCMPEQ, vcmpeq) 2026 DO_VCMP(VCMPNE, vcmpne) 2027 DO_VCMP(VCMPCS, vcmpcs) 2028 DO_VCMP(VCMPHI, vcmphi) 2029 DO_VCMP(VCMPGE, vcmpge) 2030 DO_VCMP(VCMPLT, vcmplt) 2031 DO_VCMP(VCMPGT, vcmpgt) 2032 DO_VCMP(VCMPLE, vcmple) 2033 2034 #define DO_VCMP_FP(INSN, FN) \ 2035 static bool trans_##INSN(DisasContext *s, arg_vcmp *a) \ 2036 { \ 2037 static MVEGenCmpFn * const fns[] = { \ 2038 NULL, \ 2039 gen_helper_mve_##FN##h, \ 2040 gen_helper_mve_##FN##s, \ 2041 NULL, \ 2042 }; \ 2043 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2044 return false; \ 2045 } \ 2046 return do_vcmp(s, a, fns[a->size]); \ 2047 } \ 2048 static bool trans_##INSN##_scalar(DisasContext *s, \ 2049 arg_vcmp_scalar *a) \ 2050 { \ 2051 static MVEGenScalarCmpFn * const fns[] = { \ 2052 NULL, \ 2053 gen_helper_mve_##FN##_scalarh, \ 2054 gen_helper_mve_##FN##_scalars, \ 2055 NULL, \ 2056 }; \ 2057 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2058 return false; \ 2059 } \ 2060 return do_vcmp_scalar(s, a, fns[a->size]); \ 2061 } 2062 2063 DO_VCMP_FP(VCMPEQ_fp, vfcmpeq) 2064 DO_VCMP_FP(VCMPNE_fp, vfcmpne) 2065 DO_VCMP_FP(VCMPGE_fp, vfcmpge) 2066 DO_VCMP_FP(VCMPLT_fp, vfcmplt) 2067 DO_VCMP_FP(VCMPGT_fp, vfcmpgt) 2068 DO_VCMP_FP(VCMPLE_fp, vfcmple) 2069 2070 static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) 2071 { 2072 /* 2073 * MIN/MAX operations across a vector: compute the min or 2074 * max of the initial value in a general purpose register 2075 * and all the elements in the vector, and store it back 2076 * into the general purpose register. 2077 */ 2078 TCGv_ptr qm; 2079 TCGv_i32 rda; 2080 2081 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || 2082 !fn || a->rda == 13 || a->rda == 15) { 2083 /* Rda cases are UNPREDICTABLE */ 2084 return false; 2085 } 2086 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2087 return true; 2088 } 2089 2090 qm = mve_qreg_ptr(a->qm); 2091 rda = load_reg(s, a->rda); 2092 fn(rda, cpu_env, qm, rda); 2093 store_reg(s, a->rda, rda); 2094 mve_update_eci(s); 2095 return true; 2096 } 2097 2098 #define DO_VMAXV(INSN, FN) \ 2099 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2100 { \ 2101 static MVEGenVADDVFn * const fns[] = { \ 2102 gen_helper_mve_##FN##b, \ 2103 gen_helper_mve_##FN##h, \ 2104 gen_helper_mve_##FN##w, \ 2105 NULL, \ 2106 }; \ 2107 return do_vmaxv(s, a, fns[a->size]); \ 2108 } 2109 2110 DO_VMAXV(VMAXV_S, vmaxvs) 2111 DO_VMAXV(VMAXV_U, vmaxvu) 2112 DO_VMAXV(VMAXAV, vmaxav) 2113 DO_VMAXV(VMINV_S, vminvs) 2114 DO_VMAXV(VMINV_U, vminvu) 2115 DO_VMAXV(VMINAV, vminav) 2116 2117 #define DO_VMAXV_FP(INSN, FN) \ 2118 static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ 2119 { \ 2120 static MVEGenVADDVFn * const fns[] = { \ 2121 NULL, \ 2122 gen_helper_mve_##FN##h, \ 2123 gen_helper_mve_##FN##s, \ 2124 NULL, \ 2125 }; \ 2126 if (!dc_isar_feature(aa32_mve_fp, s)) { \ 2127 return false; \ 2128 } \ 2129 return do_vmaxv(s, a, fns[a->size]); \ 2130 } 2131 2132 DO_VMAXV_FP(VMAXNMV, vmaxnmv) 2133 DO_VMAXV_FP(VMINNMV, vminnmv) 2134 DO_VMAXV_FP(VMAXNMAV, vmaxnmav) 2135 DO_VMAXV_FP(VMINNMAV, vminnmav) 2136 2137 static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn) 2138 { 2139 /* Absolute difference accumulated across vector */ 2140 TCGv_ptr qn, qm; 2141 TCGv_i32 rda; 2142 2143 if (!dc_isar_feature(aa32_mve, s) || 2144 !mve_check_qreg_bank(s, a->qm | a->qn) || 2145 !fn || a->rda == 13 || a->rda == 15) { 2146 /* Rda cases are UNPREDICTABLE */ 2147 return false; 2148 } 2149 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2150 return true; 2151 } 2152 2153 qm = mve_qreg_ptr(a->qm); 2154 qn = mve_qreg_ptr(a->qn); 2155 rda = load_reg(s, a->rda); 2156 fn(rda, cpu_env, qn, qm, rda); 2157 store_reg(s, a->rda, rda); 2158 mve_update_eci(s); 2159 return true; 2160 } 2161 2162 #define DO_VABAV(INSN, FN) \ 2163 static bool trans_##INSN(DisasContext *s, arg_vabav *a) \ 2164 { \ 2165 static MVEGenVABAVFn * const fns[] = { \ 2166 gen_helper_mve_##FN##b, \ 2167 gen_helper_mve_##FN##h, \ 2168 gen_helper_mve_##FN##w, \ 2169 NULL, \ 2170 }; \ 2171 return do_vabav(s, a, fns[a->size]); \ 2172 } 2173 2174 DO_VABAV(VABAV_S, vabavs) 2175 DO_VABAV(VABAV_U, vabavu) 2176 2177 static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2178 { 2179 /* 2180 * VMOV two 32-bit vector lanes to two general-purpose registers. 2181 * This insn is not predicated but it is subject to beat-wise 2182 * execution if it is not in an IT block. For us this means 2183 * only that if PSR.ECI says we should not be executing the beat 2184 * corresponding to the lane of the vector register being accessed 2185 * then we should skip performing the move, and that we need to do 2186 * the usual check for bad ECI state and advance of ECI state. 2187 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2188 */ 2189 TCGv_i32 tmp; 2190 int vd; 2191 2192 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2193 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 || 2194 a->rt == a->rt2) { 2195 /* Rt/Rt2 cases are UNPREDICTABLE */ 2196 return false; 2197 } 2198 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2199 return true; 2200 } 2201 2202 /* Convert Qreg index to Dreg for read_neon_element32() etc */ 2203 vd = a->qd * 2; 2204 2205 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2206 tmp = tcg_temp_new_i32(); 2207 read_neon_element32(tmp, vd, a->idx, MO_32); 2208 store_reg(s, a->rt, tmp); 2209 } 2210 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2211 tmp = tcg_temp_new_i32(); 2212 read_neon_element32(tmp, vd + 1, a->idx, MO_32); 2213 store_reg(s, a->rt2, tmp); 2214 } 2215 2216 mve_update_and_store_eci(s); 2217 return true; 2218 } 2219 2220 static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a) 2221 { 2222 /* 2223 * VMOV two general-purpose registers to two 32-bit vector lanes. 2224 * This insn is not predicated but it is subject to beat-wise 2225 * execution if it is not in an IT block. For us this means 2226 * only that if PSR.ECI says we should not be executing the beat 2227 * corresponding to the lane of the vector register being accessed 2228 * then we should skip performing the move, and that we need to do 2229 * the usual check for bad ECI state and advance of ECI state. 2230 * (If PSR.ECI is non-zero then we cannot be in an IT block.) 2231 */ 2232 TCGv_i32 tmp; 2233 int vd; 2234 2235 if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) || 2236 a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) { 2237 /* Rt/Rt2 cases are UNPREDICTABLE */ 2238 return false; 2239 } 2240 if (!mve_eci_check(s) || !vfp_access_check(s)) { 2241 return true; 2242 } 2243 2244 /* Convert Qreg idx to Dreg for read_neon_element32() etc */ 2245 vd = a->qd * 2; 2246 2247 if (!mve_skip_vmov(s, vd, a->idx, MO_32)) { 2248 tmp = load_reg(s, a->rt); 2249 write_neon_element32(tmp, vd, a->idx, MO_32); 2250 } 2251 if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) { 2252 tmp = load_reg(s, a->rt2); 2253 write_neon_element32(tmp, vd + 1, a->idx, MO_32); 2254 } 2255 2256 mve_update_and_store_eci(s); 2257 return true; 2258 } 2259