1/* 2 * 3 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2 or later, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17#include "tcg/tcg-op-gvec.h" 18#include "tcg/tcg-gvec-desc.h" 19#include "internals.h" 20 21static inline bool is_overlapped(const int8_t astart, int8_t asize, 22 const int8_t bstart, int8_t bsize) 23{ 24 const int8_t aend = astart + asize; 25 const int8_t bend = bstart + bsize; 26 27 return MAX(aend, bend) - MIN(astart, bstart) < asize + bsize; 28} 29 30static bool require_rvv(DisasContext *s) 31{ 32 return s->mstatus_vs != EXT_STATUS_DISABLED; 33} 34 35static bool require_rvf(DisasContext *s) 36{ 37 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 38 return false; 39 } 40 41 switch (s->sew) { 42 case MO_16: 43 return s->cfg_ptr->ext_zvfh; 44 case MO_32: 45 return s->cfg_ptr->ext_zve32f; 46 case MO_64: 47 return s->cfg_ptr->ext_zve64d; 48 default: 49 return false; 50 } 51} 52 53static bool require_rvfmin(DisasContext *s) 54{ 55 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 56 return false; 57 } 58 59 switch (s->sew) { 60 case MO_16: 61 return s->cfg_ptr->ext_zvfhmin; 62 case MO_32: 63 return s->cfg_ptr->ext_zve32f; 64 default: 65 return false; 66 } 67} 68 69static bool require_scale_rvf(DisasContext *s) 70{ 71 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 72 return false; 73 } 74 75 switch (s->sew) { 76 case MO_8: 77 return s->cfg_ptr->ext_zvfh; 78 case MO_16: 79 return s->cfg_ptr->ext_zve32f; 80 case MO_32: 81 return s->cfg_ptr->ext_zve64d; 82 default: 83 return false; 84 } 85} 86 87static bool require_scale_rvfmin(DisasContext *s) 88{ 89 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 90 return false; 91 } 92 93 switch (s->sew) { 94 case MO_16: 95 return s->cfg_ptr->ext_zve32f; 96 case MO_32: 97 return s->cfg_ptr->ext_zve64d; 98 default: 99 return false; 100 } 101} 102 103/* 104 * Source and destination vector register groups cannot overlap source mask 105 * register: 106 * 107 * A vector register cannot be used to provide source operands with more than 108 * one EEW for a single instruction. A mask register source is considered to 109 * have EEW=1 for this constraint. An encoding that would result in the same 110 * vector register being read with two or more different EEWs, including when 111 * the vector register appears at different positions within two or more vector 112 * register groups, is reserved. 113 * (Section 5.2) 114 * 115 * A destination vector register group can overlap a source vector 116 * register group only if one of the following holds: 117 * 1. The destination EEW equals the source EEW. 118 * 2. The destination EEW is smaller than the source EEW and the overlap 119 * is in the lowest-numbered part of the source register group. 120 * 3. The destination EEW is greater than the source EEW, the source EMUL 121 * is at least 1, and the overlap is in the highest-numbered part of 122 * the destination register group. 123 * For the purpose of determining register group overlap constraints, mask 124 * elements have EEW=1. 125 * (Section 5.2) 126 */ 127static bool require_vm(int vm, int v) 128{ 129 return (vm != 0 || v != 0); 130} 131 132static bool require_nf(int vd, int nf, int lmul) 133{ 134 int size = nf << MAX(lmul, 0); 135 return size <= 8 && vd + size <= 32; 136} 137 138/* 139 * Vector register should aligned with the passed-in LMUL (EMUL). 140 * If LMUL < 0, i.e. fractional LMUL, any vector register is allowed. 141 */ 142static bool require_align(const int8_t val, const int8_t lmul) 143{ 144 return lmul <= 0 || extract32(val, 0, lmul) == 0; 145} 146 147/* 148 * A destination vector register group can overlap a source vector 149 * register group only if one of the following holds: 150 * 1. The destination EEW equals the source EEW. 151 * 2. The destination EEW is smaller than the source EEW and the overlap 152 * is in the lowest-numbered part of the source register group. 153 * 3. The destination EEW is greater than the source EEW, the source EMUL 154 * is at least 1, and the overlap is in the highest-numbered part of 155 * the destination register group. 156 * (Section 5.2) 157 * 158 * This function returns true if one of the following holds: 159 * * Destination vector register group does not overlap a source vector 160 * register group. 161 * * Rule 3 met. 162 * For rule 1, overlap is allowed so this function doesn't need to be called. 163 * For rule 2, (vd == vs). Caller has to check whether: (vd != vs) before 164 * calling this function. 165 */ 166static bool require_noover(const int8_t dst, const int8_t dst_lmul, 167 const int8_t src, const int8_t src_lmul) 168{ 169 int8_t dst_size = dst_lmul <= 0 ? 1 : 1 << dst_lmul; 170 int8_t src_size = src_lmul <= 0 ? 1 : 1 << src_lmul; 171 172 /* Destination EEW is greater than the source EEW, check rule 3. */ 173 if (dst_size > src_size) { 174 if (dst < src && 175 src_lmul >= 0 && 176 is_overlapped(dst, dst_size, src, src_size) && 177 !is_overlapped(dst, dst_size, src + src_size, src_size)) { 178 return true; 179 } 180 } 181 182 return !is_overlapped(dst, dst_size, src, src_size); 183} 184 185static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) 186{ 187 TCGv s1, dst; 188 189 if (!require_rvv(s) || !s->cfg_ptr->ext_zve32x) { 190 return false; 191 } 192 193 dst = dest_gpr(s, rd); 194 195 if (rd == 0 && rs1 == 0) { 196 s1 = tcg_temp_new(); 197 tcg_gen_mov_tl(s1, cpu_vl); 198 } else if (rs1 == 0) { 199 /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ 200 s1 = tcg_constant_tl(RV_VLEN_MAX); 201 } else { 202 s1 = get_gpr(s, rs1, EXT_ZERO); 203 } 204 205 gen_helper_vsetvl(dst, tcg_env, s1, s2); 206 gen_set_gpr(s, rd, dst); 207 finalize_rvv_inst(s); 208 209 gen_update_pc(s, s->cur_insn_len); 210 lookup_and_goto_ptr(s); 211 s->base.is_jmp = DISAS_NORETURN; 212 return true; 213} 214 215static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) 216{ 217 TCGv dst; 218 219 if (!require_rvv(s) || !s->cfg_ptr->ext_zve32x) { 220 return false; 221 } 222 223 dst = dest_gpr(s, rd); 224 225 gen_helper_vsetvl(dst, tcg_env, s1, s2); 226 gen_set_gpr(s, rd, dst); 227 finalize_rvv_inst(s); 228 gen_update_pc(s, s->cur_insn_len); 229 lookup_and_goto_ptr(s); 230 s->base.is_jmp = DISAS_NORETURN; 231 232 return true; 233} 234 235static bool trans_vsetvl(DisasContext *s, arg_vsetvl *a) 236{ 237 TCGv s2 = get_gpr(s, a->rs2, EXT_ZERO); 238 return do_vsetvl(s, a->rd, a->rs1, s2); 239} 240 241static bool trans_vsetvli(DisasContext *s, arg_vsetvli *a) 242{ 243 TCGv s2 = tcg_constant_tl(a->zimm); 244 return do_vsetvl(s, a->rd, a->rs1, s2); 245} 246 247static bool trans_vsetivli(DisasContext *s, arg_vsetivli *a) 248{ 249 TCGv s1 = tcg_constant_tl(a->rs1); 250 TCGv s2 = tcg_constant_tl(a->zimm); 251 return do_vsetivli(s, a->rd, s1, s2); 252} 253 254/* vector register offset from env */ 255static uint32_t vreg_ofs(DisasContext *s, int reg) 256{ 257 return offsetof(CPURISCVState, vreg) + reg * s->cfg_ptr->vlenb; 258} 259 260/* check functions */ 261 262/* 263 * Vector unit-stride, strided, unit-stride segment, strided segment 264 * store check function. 265 * 266 * Rules to be checked here: 267 * 1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3) 268 * 2. Destination vector register number is multiples of EMUL. 269 * (Section 3.4.2, 7.3) 270 * 3. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8) 271 * 4. Vector register numbers accessed by the segment load or store 272 * cannot increment past 31. (Section 7.8) 273 */ 274static bool vext_check_store(DisasContext *s, int vd, int nf, uint8_t eew) 275{ 276 int8_t emul = eew - s->sew + s->lmul; 277 return (emul >= -3 && emul <= 3) && 278 require_align(vd, emul) && 279 require_nf(vd, nf, emul); 280} 281 282/* 283 * Vector unit-stride, strided, unit-stride segment, strided segment 284 * load check function. 285 * 286 * Rules to be checked here: 287 * 1. All rules applies to store instructions are applies 288 * to load instructions. 289 * 2. Destination vector register group for a masked vector 290 * instruction cannot overlap the source mask register (v0). 291 * (Section 5.3) 292 */ 293static bool vext_check_load(DisasContext *s, int vd, int nf, int vm, 294 uint8_t eew) 295{ 296 return vext_check_store(s, vd, nf, eew) && require_vm(vm, vd); 297} 298 299/* 300 * Vector indexed, indexed segment store check function. 301 * 302 * Rules to be checked here: 303 * 1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3) 304 * 2. Index vector register number is multiples of EMUL. 305 * (Section 3.4.2, 7.3) 306 * 3. Destination vector register number is multiples of LMUL. 307 * (Section 3.4.2, 7.3) 308 * 4. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8) 309 * 5. Vector register numbers accessed by the segment load or store 310 * cannot increment past 31. (Section 7.8) 311 */ 312static bool vext_check_st_index(DisasContext *s, int vd, int vs2, int nf, 313 uint8_t eew) 314{ 315 int8_t emul = eew - s->sew + s->lmul; 316 bool ret = (emul >= -3 && emul <= 3) && 317 require_align(vs2, emul) && 318 require_align(vd, s->lmul) && 319 require_nf(vd, nf, s->lmul); 320 321 /* 322 * V extension supports all vector load and store instructions, 323 * except V extension does not support EEW=64 for index values 324 * when XLEN=32. (Section 18.3) 325 */ 326 if (get_xl(s) == MXL_RV32) { 327 ret &= (eew != MO_64); 328 } 329 330 return ret; 331} 332 333/* 334 * Vector indexed, indexed segment load check function. 335 * 336 * Rules to be checked here: 337 * 1. All rules applies to store instructions are applies 338 * to load instructions. 339 * 2. Destination vector register group for a masked vector 340 * instruction cannot overlap the source mask register (v0). 341 * (Section 5.3) 342 * 3. Destination vector register cannot overlap a source vector 343 * register (vs2) group. 344 * (Section 5.2) 345 * 4. Destination vector register groups cannot overlap 346 * the source vector register (vs2) group for 347 * indexed segment load instructions. (Section 7.8.3) 348 */ 349static bool vext_check_ld_index(DisasContext *s, int vd, int vs2, 350 int nf, int vm, uint8_t eew) 351{ 352 int8_t seg_vd; 353 int8_t emul = eew - s->sew + s->lmul; 354 bool ret = vext_check_st_index(s, vd, vs2, nf, eew) && 355 require_vm(vm, vd); 356 357 /* Each segment register group has to follow overlap rules. */ 358 for (int i = 0; i < nf; ++i) { 359 seg_vd = vd + (1 << MAX(s->lmul, 0)) * i; 360 361 if (eew > s->sew) { 362 if (seg_vd != vs2) { 363 ret &= require_noover(seg_vd, s->lmul, vs2, emul); 364 } 365 } else if (eew < s->sew) { 366 ret &= require_noover(seg_vd, s->lmul, vs2, emul); 367 } 368 369 /* 370 * Destination vector register groups cannot overlap 371 * the source vector register (vs2) group for 372 * indexed segment load instructions. 373 */ 374 if (nf > 1) { 375 ret &= !is_overlapped(seg_vd, 1 << MAX(s->lmul, 0), 376 vs2, 1 << MAX(emul, 0)); 377 } 378 } 379 return ret; 380} 381 382/* 383 * Check whether a vector register is used to provide source operands with 384 * more than one EEW for the vector instruction. 385 * Returns true if the instruction has valid encoding 386 * Returns false if encoding violates the mismatched input EEWs constraint 387 */ 388static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1, 389 int vs2, uint8_t eew_vs2, int vm) 390{ 391 bool is_valid = true; 392 int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul; 393 int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul; 394 395 /* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */ 396 if ((vs1 != -1 && !require_vm(vm, vs1)) || 397 (vs2 != -1 && !require_vm(vm, vs2))) { 398 is_valid = false; 399 } 400 401 /* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */ 402 if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) && 403 is_overlapped(vs1, 1 << MAX(emul_vs1, 0), 404 vs2, 1 << MAX(emul_vs2, 0))) { 405 is_valid = false; 406 } 407 408 return is_valid; 409} 410 411static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) 412{ 413 return require_vm(vm, vd) && 414 require_align(vd, s->lmul) && 415 require_align(vs, s->lmul) && 416 vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm); 417} 418 419/* 420 * Check function for vector instruction with format: 421 * single-width result and single-width sources (SEW = SEW op SEW) 422 * 423 * Rules to be checked here: 424 * 1. Destination vector register group for a masked vector 425 * instruction cannot overlap the source mask register (v0). 426 * (Section 5.3) 427 * 2. Destination vector register number is multiples of LMUL. 428 * (Section 3.4.2) 429 * 3. Source (vs2, vs1) vector register number are multiples of LMUL. 430 * (Section 3.4.2) 431 */ 432static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm) 433{ 434 return vext_check_ss(s, vd, vs2, vm) && 435 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && 436 require_align(vs1, s->lmul); 437} 438 439static bool vext_check_ms(DisasContext *s, int vd, int vs) 440{ 441 bool ret = require_align(vs, s->lmul); 442 if (vd != vs) { 443 ret &= require_noover(vd, 0, vs, s->lmul); 444 } 445 return ret; 446} 447 448/* 449 * Check function for maskable vector instruction with format: 450 * single-width result and single-width sources (SEW = SEW op SEW) 451 * 452 * Rules to be checked here: 453 * 1. Source (vs2, vs1) vector register number are multiples of LMUL. 454 * (Section 3.4.2) 455 * 2. Destination vector register cannot overlap a source vector 456 * register (vs2, vs1) group. 457 * (Section 5.2) 458 * 3. The destination vector register group for a masked vector 459 * instruction cannot overlap the source mask register (v0), 460 * unless the destination vector register is being written 461 * with a mask value (e.g., comparisons) or the scalar result 462 * of a reduction. (Section 5.3) 463 */ 464static bool vext_check_mss(DisasContext *s, int vd, int vs1, int vs2) 465{ 466 bool ret = vext_check_ms(s, vd, vs2) && 467 require_align(vs1, s->lmul); 468 if (vd != vs1) { 469 ret &= require_noover(vd, 0, vs1, s->lmul); 470 } 471 return ret; 472} 473 474/* 475 * Common check function for vector widening instructions 476 * of double-width result (2*SEW). 477 * 478 * Rules to be checked here: 479 * 1. The largest vector register group used by an instruction 480 * can not be greater than 8 vector registers (Section 5.2): 481 * => LMUL < 8. 482 * => SEW < 64. 483 * 2. Double-width SEW cannot greater than ELEN. 484 * 3. Destination vector register number is multiples of 2 * LMUL. 485 * (Section 3.4.2) 486 * 4. Destination vector register group for a masked vector 487 * instruction cannot overlap the source mask register (v0). 488 * (Section 5.3) 489 */ 490static bool vext_wide_check_common(DisasContext *s, int vd, int vm) 491{ 492 return (s->lmul <= 2) && 493 (s->sew < MO_64) && 494 ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)) && 495 require_align(vd, s->lmul + 1) && 496 require_vm(vm, vd); 497} 498 499/* 500 * Common check function for vector narrowing instructions 501 * of single-width result (SEW) and double-width source (2*SEW). 502 * 503 * Rules to be checked here: 504 * 1. The largest vector register group used by an instruction 505 * can not be greater than 8 vector registers (Section 5.2): 506 * => LMUL < 8. 507 * => SEW < 64. 508 * 2. Double-width SEW cannot greater than ELEN. 509 * 3. Source vector register number is multiples of 2 * LMUL. 510 * (Section 3.4.2) 511 * 4. Destination vector register number is multiples of LMUL. 512 * (Section 3.4.2) 513 * 5. Destination vector register group for a masked vector 514 * instruction cannot overlap the source mask register (v0). 515 * (Section 5.3) 516 */ 517static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2, 518 int vm) 519{ 520 return (s->lmul <= 2) && 521 (s->sew < MO_64) && 522 ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)) && 523 require_align(vs2, s->lmul + 1) && 524 require_align(vd, s->lmul) && 525 require_vm(vm, vd); 526} 527 528static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) 529{ 530 return vext_wide_check_common(s, vd, vm) && 531 vext_check_input_eew(s, vs, s->sew, -1, 0, vm) && 532 require_align(vs, s->lmul) && 533 require_noover(vd, s->lmul + 1, vs, s->lmul); 534} 535 536static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) 537{ 538 return vext_wide_check_common(s, vd, vm) && 539 vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) && 540 require_align(vs, s->lmul + 1); 541} 542 543/* 544 * Check function for vector instruction with format: 545 * double-width result and single-width sources (2*SEW = SEW op SEW) 546 * 547 * Rules to be checked here: 548 * 1. All rules in defined in widen common rules are applied. 549 * 2. Source (vs2, vs1) vector register number are multiples of LMUL. 550 * (Section 3.4.2) 551 * 3. Destination vector register cannot overlap a source vector 552 * register (vs2, vs1) group. 553 * (Section 5.2) 554 */ 555static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) 556{ 557 return vext_check_ds(s, vd, vs2, vm) && 558 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && 559 require_align(vs1, s->lmul) && 560 require_noover(vd, s->lmul + 1, vs1, s->lmul); 561} 562 563/* 564 * Check function for vector instruction with format: 565 * double-width result and double-width source1 and single-width 566 * source2 (2*SEW = 2*SEW op SEW) 567 * 568 * Rules to be checked here: 569 * 1. All rules in defined in widen common rules are applied. 570 * 2. Source 1 (vs2) vector register number is multiples of 2 * LMUL. 571 * (Section 3.4.2) 572 * 3. Source 2 (vs1) vector register number is multiples of LMUL. 573 * (Section 3.4.2) 574 * 4. Destination vector register cannot overlap a source vector 575 * register (vs1) group. 576 * (Section 5.2) 577 */ 578static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) 579{ 580 return vext_check_ds(s, vd, vs1, vm) && 581 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && 582 require_align(vs2, s->lmul + 1); 583} 584 585static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) 586{ 587 bool ret = vext_narrow_check_common(s, vd, vs, vm) && 588 vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm); 589 if (vd != vs) { 590 ret &= require_noover(vd, s->lmul, vs, s->lmul + 1); 591 } 592 return ret; 593} 594 595/* 596 * Check function for vector instruction with format: 597 * single-width result and double-width source 1 and single-width 598 * source 2 (SEW = 2*SEW op SEW) 599 * 600 * Rules to be checked here: 601 * 1. All rules in defined in narrow common rules are applied. 602 * 2. Destination vector register cannot overlap a source vector 603 * register (vs2) group. 604 * (Section 5.2) 605 * 3. Source 2 (vs1) vector register number is multiples of LMUL. 606 * (Section 3.4.2) 607 */ 608static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm) 609{ 610 return vext_check_sd(s, vd, vs2, vm) && 611 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && 612 require_align(vs1, s->lmul); 613} 614 615/* 616 * Check function for vector reduction instructions. 617 * 618 * Rules to be checked here: 619 * 1. Source 1 (vs2) vector register number is multiples of LMUL. 620 * (Section 3.4.2) 621 */ 622static bool vext_check_reduction(DisasContext *s, int vs2) 623{ 624 return require_align(vs2, s->lmul) && s->vstart_eq_zero; 625} 626 627/* 628 * Check function for vector slide instructions. 629 * 630 * Rules to be checked here: 631 * 1. Source 1 (vs2) vector register number is multiples of LMUL. 632 * (Section 3.4.2) 633 * 2. Destination vector register number is multiples of LMUL. 634 * (Section 3.4.2) 635 * 3. Destination vector register group for a masked vector 636 * instruction cannot overlap the source mask register (v0). 637 * (Section 5.3) 638 * 4. The destination vector register group for vslideup, vslide1up, 639 * vfslide1up, cannot overlap the source vector register (vs2) group. 640 * (Section 5.2, 16.3.1, 16.3.3) 641 */ 642static bool vext_check_slide(DisasContext *s, int vd, int vs2, 643 int vm, bool is_over) 644{ 645 bool ret = require_align(vs2, s->lmul) && 646 require_align(vd, s->lmul) && 647 require_vm(vm, vd) && 648 vext_check_input_eew(s, -1, 0, vs2, s->sew, vm); 649 650 if (is_over) { 651 ret &= (vd != vs2); 652 } 653 return ret; 654} 655 656/* 657 * In cpu_get_tb_cpu_state(), set VILL if RVV was not present. 658 * So RVV is also be checked in this function. 659 */ 660static bool vext_check_isa_ill(DisasContext *s) 661{ 662 return !s->vill; 663} 664 665/* common translation macro */ 666#define GEN_VEXT_TRANS(NAME, EEW, ARGTYPE, OP, CHECK) \ 667static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE * a) \ 668{ \ 669 if (CHECK(s, a, EEW)) { \ 670 return OP(s, a, EEW); \ 671 } \ 672 return false; \ 673} 674 675static uint8_t vext_get_emul(DisasContext *s, uint8_t eew) 676{ 677 int8_t emul = eew - s->sew + s->lmul; 678 return emul < 0 ? 0 : emul; 679} 680 681/* 682 *** unit stride load and store 683 */ 684typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv, 685 TCGv_env, TCGv_i32); 686 687static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, 688 gen_helper_ldst_us *fn, DisasContext *s, 689 bool is_store) 690{ 691 TCGv_ptr dest, mask; 692 TCGv base; 693 TCGv_i32 desc; 694 695 dest = tcg_temp_new_ptr(); 696 mask = tcg_temp_new_ptr(); 697 base = get_gpr(s, rs1, EXT_NONE); 698 699 /* 700 * As simd_desc supports at most 2048 bytes, and in this implementation, 701 * the max vector group length is 4096 bytes. So split it into two parts. 702 * 703 * The first part is vlen in bytes (vlenb), encoded in maxsz of simd_desc. 704 * The second part is lmul, encoded in data of simd_desc. 705 */ 706 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 707 s->cfg_ptr->vlenb, data)); 708 709 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 710 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 711 712 /* 713 * According to the specification 714 * 715 * Additionally, if the Ztso extension is implemented, then vector memory 716 * instructions in the V extension and Zve family of extensions follow 717 * RVTSO at the instruction level. The Ztso extension does not 718 * strengthen the ordering of intra-instruction element accesses. 719 * 720 * as a result neither ordered nor unordered accesses from the V 721 * instructions need ordering within the loop but we do still need barriers 722 * around the loop. 723 */ 724 if (is_store && s->ztso) { 725 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 726 } 727 728 mark_vs_dirty(s); 729 730 fn(dest, mask, base, tcg_env, desc); 731 732 if (!is_store && s->ztso) { 733 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 734 } 735 736 finalize_rvv_inst(s); 737 return true; 738} 739 740static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) 741{ 742 uint32_t data = 0; 743 gen_helper_ldst_us *fn; 744 static gen_helper_ldst_us * const fns[2][4] = { 745 /* masked unit stride load */ 746 { gen_helper_vle8_v_mask, gen_helper_vle16_v_mask, 747 gen_helper_vle32_v_mask, gen_helper_vle64_v_mask }, 748 /* unmasked unit stride load */ 749 { gen_helper_vle8_v, gen_helper_vle16_v, 750 gen_helper_vle32_v, gen_helper_vle64_v } 751 }; 752 753 fn = fns[a->vm][eew]; 754 if (fn == NULL) { 755 return false; 756 } 757 758 /* 759 * Vector load/store instructions have the EEW encoded 760 * directly in the instructions. The maximum vector size is 761 * calculated with EMUL rather than LMUL. 762 */ 763 uint8_t emul = vext_get_emul(s, eew); 764 data = FIELD_DP32(data, VDATA, VM, a->vm); 765 data = FIELD_DP32(data, VDATA, LMUL, emul); 766 data = FIELD_DP32(data, VDATA, NF, a->nf); 767 data = FIELD_DP32(data, VDATA, VTA, s->vta); 768 data = FIELD_DP32(data, VDATA, VMA, s->vma); 769 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); 770} 771 772static bool ld_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew) 773{ 774 return require_rvv(s) && 775 vext_check_isa_ill(s) && 776 vext_check_load(s, a->rd, a->nf, a->vm, eew); 777} 778 779GEN_VEXT_TRANS(vle8_v, MO_8, r2nfvm, ld_us_op, ld_us_check) 780GEN_VEXT_TRANS(vle16_v, MO_16, r2nfvm, ld_us_op, ld_us_check) 781GEN_VEXT_TRANS(vle32_v, MO_32, r2nfvm, ld_us_op, ld_us_check) 782GEN_VEXT_TRANS(vle64_v, MO_64, r2nfvm, ld_us_op, ld_us_check) 783 784static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) 785{ 786 uint32_t data = 0; 787 gen_helper_ldst_us *fn; 788 static gen_helper_ldst_us * const fns[2][4] = { 789 /* masked unit stride store */ 790 { gen_helper_vse8_v_mask, gen_helper_vse16_v_mask, 791 gen_helper_vse32_v_mask, gen_helper_vse64_v_mask }, 792 /* unmasked unit stride store */ 793 { gen_helper_vse8_v, gen_helper_vse16_v, 794 gen_helper_vse32_v, gen_helper_vse64_v } 795 }; 796 797 fn = fns[a->vm][eew]; 798 if (fn == NULL) { 799 return false; 800 } 801 802 uint8_t emul = vext_get_emul(s, eew); 803 data = FIELD_DP32(data, VDATA, VM, a->vm); 804 data = FIELD_DP32(data, VDATA, LMUL, emul); 805 data = FIELD_DP32(data, VDATA, NF, a->nf); 806 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); 807} 808 809static bool st_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew) 810{ 811 return require_rvv(s) && 812 vext_check_isa_ill(s) && 813 vext_check_store(s, a->rd, a->nf, eew); 814} 815 816GEN_VEXT_TRANS(vse8_v, MO_8, r2nfvm, st_us_op, st_us_check) 817GEN_VEXT_TRANS(vse16_v, MO_16, r2nfvm, st_us_op, st_us_check) 818GEN_VEXT_TRANS(vse32_v, MO_32, r2nfvm, st_us_op, st_us_check) 819GEN_VEXT_TRANS(vse64_v, MO_64, r2nfvm, st_us_op, st_us_check) 820 821/* 822 *** unit stride mask load and store 823 */ 824static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew) 825{ 826 uint32_t data = 0; 827 gen_helper_ldst_us *fn = gen_helper_vlm_v; 828 829 /* EMUL = 1, NFIELDS = 1 */ 830 data = FIELD_DP32(data, VDATA, LMUL, 0); 831 data = FIELD_DP32(data, VDATA, NF, 1); 832 /* Mask destination register are always tail-agnostic */ 833 data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s); 834 data = FIELD_DP32(data, VDATA, VMA, s->vma); 835 data = FIELD_DP32(data, VDATA, VM, 1); 836 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); 837} 838 839static bool ld_us_mask_check(DisasContext *s, arg_vlm_v *a, uint8_t eew) 840{ 841 /* EMUL = 1, NFIELDS = 1 */ 842 return require_rvv(s) && vext_check_isa_ill(s); 843} 844 845static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew) 846{ 847 uint32_t data = 0; 848 gen_helper_ldst_us *fn = gen_helper_vsm_v; 849 850 /* EMUL = 1, NFIELDS = 1 */ 851 data = FIELD_DP32(data, VDATA, LMUL, 0); 852 data = FIELD_DP32(data, VDATA, NF, 1); 853 data = FIELD_DP32(data, VDATA, VM, 1); 854 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); 855} 856 857static bool st_us_mask_check(DisasContext *s, arg_vsm_v *a, uint8_t eew) 858{ 859 /* EMUL = 1, NFIELDS = 1 */ 860 return require_rvv(s) && vext_check_isa_ill(s); 861} 862 863GEN_VEXT_TRANS(vlm_v, MO_8, vlm_v, ld_us_mask_op, ld_us_mask_check) 864GEN_VEXT_TRANS(vsm_v, MO_8, vsm_v, st_us_mask_op, st_us_mask_check) 865 866/* 867 * MAXSZ returns the maximum vector size can be operated in bytes, 868 * which is used in GVEC IR when vl_eq_vlmax flag is set to true 869 * to accelerate vector operation. 870 */ 871static inline uint32_t MAXSZ(DisasContext *s) 872{ 873 int max_sz = s->cfg_ptr->vlenb << 3; 874 return max_sz >> (3 - s->lmul); 875} 876 877static inline uint32_t get_log2(uint32_t a) 878{ 879 uint32_t i = 0; 880 for (; a > 0;) { 881 a >>= 1; 882 i++; 883 } 884 return i; 885} 886 887typedef void gen_tl_ldst(TCGv, TCGv_ptr, tcg_target_long); 888 889/* 890 * Simulate the strided load/store main loop: 891 * 892 * for (i = env->vstart; i < env->vl; env->vstart = ++i) { 893 * k = 0; 894 * while (k < nf) { 895 * if (!vm && !vext_elem_mask(v0, i)) { 896 * vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 897 * (i + k * max_elems + 1) * esz); 898 * k++; 899 * continue; 900 * } 901 * target_ulong addr = base + stride * i + (k << log2_esz); 902 * ldst(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 903 * k++; 904 * } 905 * } 906 */ 907static void gen_ldst_stride_main_loop(DisasContext *s, TCGv dest, uint32_t rs1, 908 uint32_t rs2, uint32_t vm, uint32_t nf, 909 gen_tl_ldst *ld_fn, gen_tl_ldst *st_fn, 910 bool is_load) 911{ 912 TCGv addr = tcg_temp_new(); 913 TCGv base = get_gpr(s, rs1, EXT_NONE); 914 TCGv stride = get_gpr(s, rs2, EXT_NONE); 915 916 TCGv i = tcg_temp_new(); 917 TCGv i_esz = tcg_temp_new(); 918 TCGv k = tcg_temp_new(); 919 TCGv k_esz = tcg_temp_new(); 920 TCGv k_max = tcg_temp_new(); 921 TCGv mask = tcg_temp_new(); 922 TCGv mask_offs = tcg_temp_new(); 923 TCGv mask_offs_64 = tcg_temp_new(); 924 TCGv mask_elem = tcg_temp_new(); 925 TCGv mask_offs_rem = tcg_temp_new(); 926 TCGv vreg = tcg_temp_new(); 927 TCGv dest_offs = tcg_temp_new(); 928 TCGv stride_offs = tcg_temp_new(); 929 930 uint32_t max_elems = MAXSZ(s) >> s->sew; 931 932 TCGLabel *start = gen_new_label(); 933 TCGLabel *end = gen_new_label(); 934 TCGLabel *start_k = gen_new_label(); 935 TCGLabel *inc_k = gen_new_label(); 936 TCGLabel *end_k = gen_new_label(); 937 938 MemOp atomicity = MO_ATOM_NONE; 939 if (s->sew == 0) { 940 atomicity = MO_ATOM_NONE; 941 } else { 942 atomicity = MO_ATOM_IFALIGN_PAIR; 943 } 944 945 mark_vs_dirty(s); 946 947 tcg_gen_addi_tl(mask, (TCGv)tcg_env, vreg_ofs(s, 0)); 948 949 /* Start of outer loop. */ 950 tcg_gen_mov_tl(i, cpu_vstart); 951 gen_set_label(start); 952 tcg_gen_brcond_tl(TCG_COND_GE, i, cpu_vl, end); 953 tcg_gen_shli_tl(i_esz, i, s->sew); 954 /* Start of inner loop. */ 955 tcg_gen_movi_tl(k, 0); 956 gen_set_label(start_k); 957 tcg_gen_brcond_tl(TCG_COND_GE, k, tcg_constant_tl(nf), end_k); 958 /* 959 * If we are in mask agnostic regime and the operation is not unmasked we 960 * set the inactive elements to 1. 961 */ 962 if (!vm && s->vma) { 963 TCGLabel *active_element = gen_new_label(); 964 /* (i + k * max_elems) * esz */ 965 tcg_gen_shli_tl(mask_offs, k, get_log2(max_elems << s->sew)); 966 tcg_gen_add_tl(mask_offs, mask_offs, i_esz); 967 968 /* 969 * Check whether the i bit of the mask is 0 or 1. 970 * 971 * static inline int vext_elem_mask(void *v0, int index) 972 * { 973 * int idx = index / 64; 974 * int pos = index % 64; 975 * return (((uint64_t *)v0)[idx] >> pos) & 1; 976 * } 977 */ 978 tcg_gen_shri_tl(mask_offs_64, mask_offs, 3); 979 tcg_gen_add_tl(mask_offs_64, mask_offs_64, mask); 980 tcg_gen_ld_i64((TCGv_i64)mask_elem, (TCGv_ptr)mask_offs_64, 0); 981 tcg_gen_rem_tl(mask_offs_rem, mask_offs, tcg_constant_tl(8)); 982 tcg_gen_shr_tl(mask_elem, mask_elem, mask_offs_rem); 983 tcg_gen_andi_tl(mask_elem, mask_elem, 1); 984 tcg_gen_brcond_tl(TCG_COND_NE, mask_elem, tcg_constant_tl(0), 985 active_element); 986 /* 987 * Set masked-off elements in the destination vector register to 1s. 988 * Store instructions simply skip this bit as memory ops access memory 989 * only for active elements. 990 */ 991 if (is_load) { 992 tcg_gen_shli_tl(mask_offs, mask_offs, s->sew); 993 tcg_gen_add_tl(mask_offs, mask_offs, dest); 994 st_fn(tcg_constant_tl(-1), (TCGv_ptr)mask_offs, 0); 995 } 996 tcg_gen_br(inc_k); 997 gen_set_label(active_element); 998 } 999 /* 1000 * The element is active, calculate the address with stride: 1001 * target_ulong addr = base + stride * i + (k << log2_esz); 1002 */ 1003 tcg_gen_mul_tl(stride_offs, stride, i); 1004 tcg_gen_shli_tl(k_esz, k, s->sew); 1005 tcg_gen_add_tl(stride_offs, stride_offs, k_esz); 1006 tcg_gen_add_tl(addr, base, stride_offs); 1007 /* Calculate the offset in the dst/src vector register. */ 1008 tcg_gen_shli_tl(k_max, k, get_log2(max_elems)); 1009 tcg_gen_add_tl(dest_offs, i, k_max); 1010 tcg_gen_shli_tl(dest_offs, dest_offs, s->sew); 1011 tcg_gen_add_tl(dest_offs, dest_offs, dest); 1012 if (is_load) { 1013 tcg_gen_qemu_ld_tl(vreg, addr, s->mem_idx, MO_LE | s->sew | atomicity); 1014 st_fn((TCGv)vreg, (TCGv_ptr)dest_offs, 0); 1015 } else { 1016 ld_fn((TCGv)vreg, (TCGv_ptr)dest_offs, 0); 1017 tcg_gen_qemu_st_tl(vreg, addr, s->mem_idx, MO_LE | s->sew | atomicity); 1018 } 1019 /* 1020 * We don't execute the load/store above if the element was inactive. 1021 * We jump instead directly to incrementing k and continuing the loop. 1022 */ 1023 if (!vm && s->vma) { 1024 gen_set_label(inc_k); 1025 } 1026 tcg_gen_addi_tl(k, k, 1); 1027 tcg_gen_br(start_k); 1028 /* End of the inner loop. */ 1029 gen_set_label(end_k); 1030 1031 tcg_gen_addi_tl(i, i, 1); 1032 tcg_gen_mov_tl(cpu_vstart, i); 1033 tcg_gen_br(start); 1034 1035 /* End of the outer loop. */ 1036 gen_set_label(end); 1037 1038 return; 1039} 1040 1041 1042/* 1043 * Set the tail bytes of the strided loads/stores to 1: 1044 * 1045 * for (k = 0; k < nf; ++k) { 1046 * cnt = (k * max_elems + vl) * esz; 1047 * tot = (k * max_elems + max_elems) * esz; 1048 * for (i = cnt; i < tot; i += esz) { 1049 * store_1s(-1, vd[vl+i]); 1050 * } 1051 * } 1052 */ 1053static void gen_ldst_stride_tail_loop(DisasContext *s, TCGv dest, uint32_t nf, 1054 gen_tl_ldst *st_fn) 1055{ 1056 TCGv i = tcg_temp_new(); 1057 TCGv k = tcg_temp_new(); 1058 TCGv tail_cnt = tcg_temp_new(); 1059 TCGv tail_tot = tcg_temp_new(); 1060 TCGv tail_addr = tcg_temp_new(); 1061 1062 TCGLabel *start = gen_new_label(); 1063 TCGLabel *end = gen_new_label(); 1064 TCGLabel *start_i = gen_new_label(); 1065 TCGLabel *end_i = gen_new_label(); 1066 1067 uint32_t max_elems_b = MAXSZ(s); 1068 uint32_t esz = 1 << s->sew; 1069 1070 /* Start of the outer loop. */ 1071 tcg_gen_movi_tl(k, 0); 1072 tcg_gen_shli_tl(tail_cnt, cpu_vl, s->sew); 1073 tcg_gen_movi_tl(tail_tot, max_elems_b); 1074 tcg_gen_add_tl(tail_addr, dest, tail_cnt); 1075 gen_set_label(start); 1076 tcg_gen_brcond_tl(TCG_COND_GE, k, tcg_constant_tl(nf), end); 1077 /* Start of the inner loop. */ 1078 tcg_gen_mov_tl(i, tail_cnt); 1079 gen_set_label(start_i); 1080 tcg_gen_brcond_tl(TCG_COND_GE, i, tail_tot, end_i); 1081 /* store_1s(-1, vd[vl+i]); */ 1082 st_fn(tcg_constant_tl(-1), (TCGv_ptr)tail_addr, 0); 1083 tcg_gen_addi_tl(tail_addr, tail_addr, esz); 1084 tcg_gen_addi_tl(i, i, esz); 1085 tcg_gen_br(start_i); 1086 /* End of the inner loop. */ 1087 gen_set_label(end_i); 1088 /* Update the counts */ 1089 tcg_gen_addi_tl(tail_cnt, tail_cnt, max_elems_b); 1090 tcg_gen_addi_tl(tail_tot, tail_cnt, max_elems_b); 1091 tcg_gen_addi_tl(k, k, 1); 1092 tcg_gen_br(start); 1093 /* End of the outer loop. */ 1094 gen_set_label(end); 1095 1096 return; 1097} 1098 1099static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, 1100 uint32_t data, DisasContext *s, bool is_load) 1101{ 1102 if (!s->vstart_eq_zero) { 1103 return false; 1104 } 1105 1106 TCGv dest = tcg_temp_new(); 1107 1108 uint32_t nf = FIELD_EX32(data, VDATA, NF); 1109 uint32_t vm = FIELD_EX32(data, VDATA, VM); 1110 1111 /* Destination register and mask register */ 1112 tcg_gen_addi_tl(dest, (TCGv)tcg_env, vreg_ofs(s, vd)); 1113 1114 /* 1115 * Select the appropriate load/tore to retrieve data from the vector 1116 * register given a specific sew. 1117 */ 1118 static gen_tl_ldst * const ld_fns[4] = { 1119 tcg_gen_ld8u_tl, tcg_gen_ld16u_tl, 1120 tcg_gen_ld32u_tl, tcg_gen_ld_tl 1121 }; 1122 1123 static gen_tl_ldst * const st_fns[4] = { 1124 tcg_gen_st8_tl, tcg_gen_st16_tl, 1125 tcg_gen_st32_tl, tcg_gen_st_tl 1126 }; 1127 1128 gen_tl_ldst *ld_fn = ld_fns[s->sew]; 1129 gen_tl_ldst *st_fn = st_fns[s->sew]; 1130 1131 if (ld_fn == NULL || st_fn == NULL) { 1132 return false; 1133 } 1134 1135 mark_vs_dirty(s); 1136 1137 gen_ldst_stride_main_loop(s, dest, rs1, rs2, vm, nf, ld_fn, st_fn, is_load); 1138 1139 tcg_gen_movi_tl(cpu_vstart, 0); 1140 1141 /* 1142 * Set the tail bytes to 1 if tail agnostic: 1143 */ 1144 if (s->vta != 0 && is_load) { 1145 gen_ldst_stride_tail_loop(s, dest, nf, st_fn); 1146 } 1147 1148 finalize_rvv_inst(s); 1149 return true; 1150} 1151 1152static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1153{ 1154 uint32_t data = 0; 1155 1156 uint8_t emul = vext_get_emul(s, eew); 1157 data = FIELD_DP32(data, VDATA, VM, a->vm); 1158 data = FIELD_DP32(data, VDATA, LMUL, emul); 1159 data = FIELD_DP32(data, VDATA, NF, a->nf); 1160 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1161 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1162 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, true); 1163} 1164 1165static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1166{ 1167 return require_rvv(s) && 1168 vext_check_isa_ill(s) && 1169 vext_check_load(s, a->rd, a->nf, a->vm, eew); 1170} 1171 1172GEN_VEXT_TRANS(vlse8_v, MO_8, rnfvm, ld_stride_op, ld_stride_check) 1173GEN_VEXT_TRANS(vlse16_v, MO_16, rnfvm, ld_stride_op, ld_stride_check) 1174GEN_VEXT_TRANS(vlse32_v, MO_32, rnfvm, ld_stride_op, ld_stride_check) 1175GEN_VEXT_TRANS(vlse64_v, MO_64, rnfvm, ld_stride_op, ld_stride_check) 1176 1177static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1178{ 1179 uint32_t data = 0; 1180 1181 uint8_t emul = vext_get_emul(s, eew); 1182 data = FIELD_DP32(data, VDATA, VM, a->vm); 1183 data = FIELD_DP32(data, VDATA, LMUL, emul); 1184 data = FIELD_DP32(data, VDATA, NF, a->nf); 1185 1186 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, false); 1187} 1188 1189static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1190{ 1191 return require_rvv(s) && 1192 vext_check_isa_ill(s) && 1193 vext_check_store(s, a->rd, a->nf, eew); 1194} 1195 1196GEN_VEXT_TRANS(vsse8_v, MO_8, rnfvm, st_stride_op, st_stride_check) 1197GEN_VEXT_TRANS(vsse16_v, MO_16, rnfvm, st_stride_op, st_stride_check) 1198GEN_VEXT_TRANS(vsse32_v, MO_32, rnfvm, st_stride_op, st_stride_check) 1199GEN_VEXT_TRANS(vsse64_v, MO_64, rnfvm, st_stride_op, st_stride_check) 1200 1201/* 1202 *** index load and store 1203 */ 1204typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv, 1205 TCGv_ptr, TCGv_env, TCGv_i32); 1206 1207static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, 1208 uint32_t data, gen_helper_ldst_index *fn, 1209 DisasContext *s) 1210{ 1211 TCGv_ptr dest, mask, index; 1212 TCGv base; 1213 TCGv_i32 desc; 1214 1215 dest = tcg_temp_new_ptr(); 1216 mask = tcg_temp_new_ptr(); 1217 index = tcg_temp_new_ptr(); 1218 base = get_gpr(s, rs1, EXT_NONE); 1219 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1220 s->cfg_ptr->vlenb, data)); 1221 1222 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1223 tcg_gen_addi_ptr(index, tcg_env, vreg_ofs(s, vs2)); 1224 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1225 1226 mark_vs_dirty(s); 1227 1228 fn(dest, mask, base, index, tcg_env, desc); 1229 1230 finalize_rvv_inst(s); 1231 return true; 1232} 1233 1234static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1235{ 1236 uint32_t data = 0; 1237 gen_helper_ldst_index *fn; 1238 static gen_helper_ldst_index * const fns[4][4] = { 1239 /* 1240 * offset vector register group EEW = 8, 1241 * data vector register group EEW = SEW 1242 */ 1243 { gen_helper_vlxei8_8_v, gen_helper_vlxei8_16_v, 1244 gen_helper_vlxei8_32_v, gen_helper_vlxei8_64_v }, 1245 /* 1246 * offset vector register group EEW = 16, 1247 * data vector register group EEW = SEW 1248 */ 1249 { gen_helper_vlxei16_8_v, gen_helper_vlxei16_16_v, 1250 gen_helper_vlxei16_32_v, gen_helper_vlxei16_64_v }, 1251 /* 1252 * offset vector register group EEW = 32, 1253 * data vector register group EEW = SEW 1254 */ 1255 { gen_helper_vlxei32_8_v, gen_helper_vlxei32_16_v, 1256 gen_helper_vlxei32_32_v, gen_helper_vlxei32_64_v }, 1257 /* 1258 * offset vector register group EEW = 64, 1259 * data vector register group EEW = SEW 1260 */ 1261 { gen_helper_vlxei64_8_v, gen_helper_vlxei64_16_v, 1262 gen_helper_vlxei64_32_v, gen_helper_vlxei64_64_v } 1263 }; 1264 1265 fn = fns[eew][s->sew]; 1266 1267 uint8_t emul = vext_get_emul(s, s->sew); 1268 data = FIELD_DP32(data, VDATA, VM, a->vm); 1269 data = FIELD_DP32(data, VDATA, LMUL, emul); 1270 data = FIELD_DP32(data, VDATA, NF, a->nf); 1271 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1272 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1273 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); 1274} 1275 1276static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1277{ 1278 return require_rvv(s) && 1279 vext_check_isa_ill(s) && 1280 vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) && 1281 vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm); 1282} 1283 1284GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check) 1285GEN_VEXT_TRANS(vlxei16_v, MO_16, rnfvm, ld_index_op, ld_index_check) 1286GEN_VEXT_TRANS(vlxei32_v, MO_32, rnfvm, ld_index_op, ld_index_check) 1287GEN_VEXT_TRANS(vlxei64_v, MO_64, rnfvm, ld_index_op, ld_index_check) 1288 1289static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1290{ 1291 uint32_t data = 0; 1292 gen_helper_ldst_index *fn; 1293 static gen_helper_ldst_index * const fns[4][4] = { 1294 /* 1295 * offset vector register group EEW = 8, 1296 * data vector register group EEW = SEW 1297 */ 1298 { gen_helper_vsxei8_8_v, gen_helper_vsxei8_16_v, 1299 gen_helper_vsxei8_32_v, gen_helper_vsxei8_64_v }, 1300 /* 1301 * offset vector register group EEW = 16, 1302 * data vector register group EEW = SEW 1303 */ 1304 { gen_helper_vsxei16_8_v, gen_helper_vsxei16_16_v, 1305 gen_helper_vsxei16_32_v, gen_helper_vsxei16_64_v }, 1306 /* 1307 * offset vector register group EEW = 32, 1308 * data vector register group EEW = SEW 1309 */ 1310 { gen_helper_vsxei32_8_v, gen_helper_vsxei32_16_v, 1311 gen_helper_vsxei32_32_v, gen_helper_vsxei32_64_v }, 1312 /* 1313 * offset vector register group EEW = 64, 1314 * data vector register group EEW = SEW 1315 */ 1316 { gen_helper_vsxei64_8_v, gen_helper_vsxei64_16_v, 1317 gen_helper_vsxei64_32_v, gen_helper_vsxei64_64_v } 1318 }; 1319 1320 fn = fns[eew][s->sew]; 1321 1322 uint8_t emul = vext_get_emul(s, s->sew); 1323 data = FIELD_DP32(data, VDATA, VM, a->vm); 1324 data = FIELD_DP32(data, VDATA, LMUL, emul); 1325 data = FIELD_DP32(data, VDATA, NF, a->nf); 1326 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); 1327} 1328 1329static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1330{ 1331 return require_rvv(s) && 1332 vext_check_isa_ill(s) && 1333 vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) && 1334 vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm); 1335} 1336 1337GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check) 1338GEN_VEXT_TRANS(vsxei16_v, MO_16, rnfvm, st_index_op, st_index_check) 1339GEN_VEXT_TRANS(vsxei32_v, MO_32, rnfvm, st_index_op, st_index_check) 1340GEN_VEXT_TRANS(vsxei64_v, MO_64, rnfvm, st_index_op, st_index_check) 1341 1342/* 1343 *** unit stride fault-only-first load 1344 */ 1345static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, 1346 gen_helper_ldst_us *fn, DisasContext *s) 1347{ 1348 TCGv_ptr dest, mask; 1349 TCGv base; 1350 TCGv_i32 desc; 1351 1352 dest = tcg_temp_new_ptr(); 1353 mask = tcg_temp_new_ptr(); 1354 base = get_gpr(s, rs1, EXT_NONE); 1355 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1356 s->cfg_ptr->vlenb, data)); 1357 1358 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1359 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1360 1361 fn(dest, mask, base, tcg_env, desc); 1362 1363 finalize_rvv_inst(s); 1364 return true; 1365} 1366 1367static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) 1368{ 1369 uint32_t data = 0; 1370 gen_helper_ldst_us *fn; 1371 static gen_helper_ldst_us * const fns[4] = { 1372 gen_helper_vle8ff_v, gen_helper_vle16ff_v, 1373 gen_helper_vle32ff_v, gen_helper_vle64ff_v 1374 }; 1375 1376 fn = fns[eew]; 1377 if (fn == NULL) { 1378 return false; 1379 } 1380 1381 uint8_t emul = vext_get_emul(s, eew); 1382 data = FIELD_DP32(data, VDATA, VM, a->vm); 1383 data = FIELD_DP32(data, VDATA, LMUL, emul); 1384 data = FIELD_DP32(data, VDATA, NF, a->nf); 1385 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1386 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1387 return ldff_trans(a->rd, a->rs1, data, fn, s); 1388} 1389 1390GEN_VEXT_TRANS(vle8ff_v, MO_8, r2nfvm, ldff_op, ld_us_check) 1391GEN_VEXT_TRANS(vle16ff_v, MO_16, r2nfvm, ldff_op, ld_us_check) 1392GEN_VEXT_TRANS(vle32ff_v, MO_32, r2nfvm, ldff_op, ld_us_check) 1393GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check) 1394 1395/* 1396 * load and store whole register instructions 1397 */ 1398typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); 1399 1400static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, 1401 uint32_t log2_esz, gen_helper_ldst_whole *fn, 1402 DisasContext *s, bool is_load) 1403{ 1404 mark_vs_dirty(s); 1405 1406 /* 1407 * Load/store multiple bytes per iteration. 1408 * When possible do this atomically. 1409 * Update vstart with the number of processed elements. 1410 * Use the helper function if either: 1411 * - vstart is not 0. 1412 * - the target has 32 bit registers and we are loading/storing 64 bit long 1413 * elements. This is to ensure that we process every element with a single 1414 * memory instruction. 1415 */ 1416 1417 bool use_helper_fn = !(s->vstart_eq_zero) || 1418 (TCG_TARGET_REG_BITS == 32 && log2_esz == 3); 1419 1420 if (!use_helper_fn) { 1421 TCGv addr = tcg_temp_new(); 1422 uint32_t size = s->cfg_ptr->vlenb * nf; 1423 TCGv_i64 t8 = tcg_temp_new_i64(); 1424 TCGv_i32 t4 = tcg_temp_new_i32(); 1425 MemOp atomicity = MO_ATOM_NONE; 1426 if (log2_esz == 0) { 1427 atomicity = MO_ATOM_NONE; 1428 } else { 1429 atomicity = MO_ATOM_IFALIGN_PAIR; 1430 } 1431 if (TCG_TARGET_REG_BITS == 64) { 1432 for (int i = 0; i < size; i += 8) { 1433 addr = get_address(s, rs1, i); 1434 if (is_load) { 1435 tcg_gen_qemu_ld_i64(t8, addr, s->mem_idx, 1436 MO_LE | MO_64 | atomicity); 1437 tcg_gen_st_i64(t8, tcg_env, vreg_ofs(s, vd) + i); 1438 } else { 1439 tcg_gen_ld_i64(t8, tcg_env, vreg_ofs(s, vd) + i); 1440 tcg_gen_qemu_st_i64(t8, addr, s->mem_idx, 1441 MO_LE | MO_64 | atomicity); 1442 } 1443 if (i == size - 8) { 1444 tcg_gen_movi_tl(cpu_vstart, 0); 1445 } else { 1446 tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz); 1447 } 1448 } 1449 } else { 1450 for (int i = 0; i < size; i += 4) { 1451 addr = get_address(s, rs1, i); 1452 if (is_load) { 1453 tcg_gen_qemu_ld_i32(t4, addr, s->mem_idx, 1454 MO_LE | MO_32 | atomicity); 1455 tcg_gen_st_i32(t4, tcg_env, vreg_ofs(s, vd) + i); 1456 } else { 1457 tcg_gen_ld_i32(t4, tcg_env, vreg_ofs(s, vd) + i); 1458 tcg_gen_qemu_st_i32(t4, addr, s->mem_idx, 1459 MO_LE | MO_32 | atomicity); 1460 } 1461 if (i == size - 4) { 1462 tcg_gen_movi_tl(cpu_vstart, 0); 1463 } else { 1464 tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz); 1465 } 1466 } 1467 } 1468 } else { 1469 TCGv_ptr dest; 1470 TCGv base; 1471 TCGv_i32 desc; 1472 uint32_t data = FIELD_DP32(0, VDATA, NF, nf); 1473 data = FIELD_DP32(data, VDATA, VM, 1); 1474 dest = tcg_temp_new_ptr(); 1475 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1476 s->cfg_ptr->vlenb, data)); 1477 base = get_gpr(s, rs1, EXT_NONE); 1478 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1479 fn(dest, base, tcg_env, desc); 1480 } 1481 1482 finalize_rvv_inst(s); 1483 return true; 1484} 1485 1486/* 1487 * load and store whole register instructions ignore vtype and vl setting. 1488 * Thus, we don't need to check vill bit. (Section 7.9) 1489 */ 1490#define GEN_LDST_WHOLE_TRANS(NAME, ETYPE, ARG_NF, IS_LOAD) \ 1491static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ 1492{ \ 1493 if (require_rvv(s) && \ 1494 QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ 1495 return ldst_whole_trans(a->rd, a->rs1, ARG_NF, ctzl(sizeof(ETYPE)), \ 1496 gen_helper_##NAME, s, IS_LOAD); \ 1497 } \ 1498 return false; \ 1499} 1500 1501GEN_LDST_WHOLE_TRANS(vl1re8_v, int8_t, 1, true) 1502GEN_LDST_WHOLE_TRANS(vl1re16_v, int16_t, 1, true) 1503GEN_LDST_WHOLE_TRANS(vl1re32_v, int32_t, 1, true) 1504GEN_LDST_WHOLE_TRANS(vl1re64_v, int64_t, 1, true) 1505GEN_LDST_WHOLE_TRANS(vl2re8_v, int8_t, 2, true) 1506GEN_LDST_WHOLE_TRANS(vl2re16_v, int16_t, 2, true) 1507GEN_LDST_WHOLE_TRANS(vl2re32_v, int32_t, 2, true) 1508GEN_LDST_WHOLE_TRANS(vl2re64_v, int64_t, 2, true) 1509GEN_LDST_WHOLE_TRANS(vl4re8_v, int8_t, 4, true) 1510GEN_LDST_WHOLE_TRANS(vl4re16_v, int16_t, 4, true) 1511GEN_LDST_WHOLE_TRANS(vl4re32_v, int32_t, 4, true) 1512GEN_LDST_WHOLE_TRANS(vl4re64_v, int64_t, 4, true) 1513GEN_LDST_WHOLE_TRANS(vl8re8_v, int8_t, 8, true) 1514GEN_LDST_WHOLE_TRANS(vl8re16_v, int16_t, 8, true) 1515GEN_LDST_WHOLE_TRANS(vl8re32_v, int32_t, 8, true) 1516GEN_LDST_WHOLE_TRANS(vl8re64_v, int64_t, 8, true) 1517 1518/* 1519 * The vector whole register store instructions are encoded similar to 1520 * unmasked unit-stride store of elements with EEW=8. 1521 */ 1522GEN_LDST_WHOLE_TRANS(vs1r_v, int8_t, 1, false) 1523GEN_LDST_WHOLE_TRANS(vs2r_v, int8_t, 2, false) 1524GEN_LDST_WHOLE_TRANS(vs4r_v, int8_t, 4, false) 1525GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false) 1526 1527/* 1528 *** Vector Integer Arithmetic Instructions 1529 */ 1530 1531static bool opivv_check(DisasContext *s, arg_rmrr *a) 1532{ 1533 return require_rvv(s) && 1534 vext_check_isa_ill(s) && 1535 vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); 1536} 1537 1538typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, 1539 uint32_t, uint32_t, uint32_t); 1540 1541static inline bool 1542do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, 1543 gen_helper_gvec_4_ptr *fn) 1544{ 1545 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 1546 gvec_fn(s->sew, vreg_ofs(s, a->rd), 1547 vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), 1548 MAXSZ(s), MAXSZ(s)); 1549 } else { 1550 uint32_t data = 0; 1551 1552 data = FIELD_DP32(data, VDATA, VM, a->vm); 1553 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1554 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1555 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1556 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 1557 vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), 1558 tcg_env, s->cfg_ptr->vlenb, 1559 s->cfg_ptr->vlenb, data, fn); 1560 } 1561 finalize_rvv_inst(s); 1562 return true; 1563} 1564 1565/* OPIVV with GVEC IR */ 1566#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ 1567static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1568{ \ 1569 static gen_helper_gvec_4_ptr * const fns[4] = { \ 1570 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 1571 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 1572 }; \ 1573 if (!opivv_check(s, a)) { \ 1574 return false; \ 1575 } \ 1576 return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ 1577} 1578 1579GEN_OPIVV_GVEC_TRANS(vadd_vv, add) 1580GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) 1581 1582typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, 1583 TCGv_env, TCGv_i32); 1584 1585static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, 1586 gen_helper_opivx *fn, DisasContext *s) 1587{ 1588 TCGv_ptr dest, src2, mask; 1589 TCGv src1; 1590 TCGv_i32 desc; 1591 uint32_t data = 0; 1592 1593 dest = tcg_temp_new_ptr(); 1594 mask = tcg_temp_new_ptr(); 1595 src2 = tcg_temp_new_ptr(); 1596 src1 = get_gpr(s, rs1, EXT_SIGN); 1597 1598 data = FIELD_DP32(data, VDATA, VM, vm); 1599 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1600 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1601 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); 1602 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1603 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1604 s->cfg_ptr->vlenb, data)); 1605 1606 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1607 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, vs2)); 1608 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1609 1610 fn(dest, mask, src1, src2, tcg_env, desc); 1611 1612 finalize_rvv_inst(s); 1613 return true; 1614} 1615 1616static bool opivx_check(DisasContext *s, arg_rmrr *a) 1617{ 1618 return require_rvv(s) && 1619 vext_check_isa_ill(s) && 1620 vext_check_ss(s, a->rd, a->rs2, a->vm); 1621} 1622 1623typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64, 1624 uint32_t, uint32_t); 1625 1626static inline bool 1627do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, 1628 gen_helper_opivx *fn) 1629{ 1630 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 1631 TCGv_i64 src1 = tcg_temp_new_i64(); 1632 1633 tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); 1634 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), 1635 src1, MAXSZ(s), MAXSZ(s)); 1636 1637 finalize_rvv_inst(s); 1638 return true; 1639 } 1640 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); 1641} 1642 1643/* OPIVX with GVEC IR */ 1644#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ 1645static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1646{ \ 1647 static gen_helper_opivx * const fns[4] = { \ 1648 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 1649 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 1650 }; \ 1651 if (!opivx_check(s, a)) { \ 1652 return false; \ 1653 } \ 1654 return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ 1655} 1656 1657GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) 1658GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) 1659 1660static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1661{ 1662 tcg_gen_vec_sub8_i64(d, b, a); 1663} 1664 1665static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1666{ 1667 tcg_gen_vec_sub16_i64(d, b, a); 1668} 1669 1670static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) 1671{ 1672 tcg_gen_sub_i32(ret, arg2, arg1); 1673} 1674 1675static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) 1676{ 1677 tcg_gen_sub_i64(ret, arg2, arg1); 1678} 1679 1680static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 1681{ 1682 tcg_gen_sub_vec(vece, r, b, a); 1683} 1684 1685static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs, 1686 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) 1687{ 1688 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 1689 static const GVecGen2s rsub_op[4] = { 1690 { .fni8 = gen_vec_rsub8_i64, 1691 .fniv = gen_rsub_vec, 1692 .fno = gen_helper_vec_rsubs8, 1693 .opt_opc = vecop_list, 1694 .vece = MO_8 }, 1695 { .fni8 = gen_vec_rsub16_i64, 1696 .fniv = gen_rsub_vec, 1697 .fno = gen_helper_vec_rsubs16, 1698 .opt_opc = vecop_list, 1699 .vece = MO_16 }, 1700 { .fni4 = gen_rsub_i32, 1701 .fniv = gen_rsub_vec, 1702 .fno = gen_helper_vec_rsubs32, 1703 .opt_opc = vecop_list, 1704 .vece = MO_32 }, 1705 { .fni8 = gen_rsub_i64, 1706 .fniv = gen_rsub_vec, 1707 .fno = gen_helper_vec_rsubs64, 1708 .opt_opc = vecop_list, 1709 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1710 .vece = MO_64 }, 1711 }; 1712 1713 tcg_debug_assert(vece <= MO_64); 1714 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); 1715} 1716 1717GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs) 1718 1719typedef enum { 1720 IMM_ZX, /* Zero-extended */ 1721 IMM_SX, /* Sign-extended */ 1722 IMM_TRUNC_SEW, /* Truncate to log(SEW) bits */ 1723 IMM_TRUNC_2SEW, /* Truncate to log(2*SEW) bits */ 1724} imm_mode_t; 1725 1726static int64_t extract_imm(DisasContext *s, uint32_t imm, imm_mode_t imm_mode) 1727{ 1728 switch (imm_mode) { 1729 case IMM_ZX: 1730 return extract64(imm, 0, 5); 1731 case IMM_SX: 1732 return sextract64(imm, 0, 5); 1733 case IMM_TRUNC_SEW: 1734 return extract64(imm, 0, s->sew + 3); 1735 case IMM_TRUNC_2SEW: 1736 return extract64(imm, 0, s->sew + 4); 1737 default: 1738 g_assert_not_reached(); 1739 } 1740} 1741 1742static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, 1743 gen_helper_opivx *fn, DisasContext *s, 1744 imm_mode_t imm_mode) 1745{ 1746 TCGv_ptr dest, src2, mask; 1747 TCGv src1; 1748 TCGv_i32 desc; 1749 uint32_t data = 0; 1750 1751 dest = tcg_temp_new_ptr(); 1752 mask = tcg_temp_new_ptr(); 1753 src2 = tcg_temp_new_ptr(); 1754 src1 = tcg_constant_tl(extract_imm(s, imm, imm_mode)); 1755 1756 data = FIELD_DP32(data, VDATA, VM, vm); 1757 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1758 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1759 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); 1760 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1761 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1762 s->cfg_ptr->vlenb, data)); 1763 1764 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1765 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, vs2)); 1766 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1767 1768 fn(dest, mask, src1, src2, tcg_env, desc); 1769 1770 finalize_rvv_inst(s); 1771 return true; 1772} 1773 1774typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, 1775 uint32_t, uint32_t); 1776 1777static inline bool 1778do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, 1779 gen_helper_opivx *fn, imm_mode_t imm_mode) 1780{ 1781 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 1782 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), 1783 extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); 1784 finalize_rvv_inst(s); 1785 return true; 1786 } 1787 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, imm_mode); 1788} 1789 1790/* OPIVI with GVEC IR */ 1791#define GEN_OPIVI_GVEC_TRANS(NAME, IMM_MODE, OPIVX, SUF) \ 1792static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1793{ \ 1794 static gen_helper_opivx * const fns[4] = { \ 1795 gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ 1796 gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ 1797 }; \ 1798 if (!opivx_check(s, a)) { \ 1799 return false; \ 1800 } \ 1801 return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ 1802 fns[s->sew], IMM_MODE); \ 1803} 1804 1805GEN_OPIVI_GVEC_TRANS(vadd_vi, IMM_SX, vadd_vx, addi) 1806 1807static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, 1808 int64_t c, uint32_t oprsz, uint32_t maxsz) 1809{ 1810 TCGv_i64 tmp = tcg_constant_i64(c); 1811 tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz); 1812} 1813 1814GEN_OPIVI_GVEC_TRANS(vrsub_vi, IMM_SX, vrsub_vx, rsubi) 1815 1816/* Vector Widening Integer Add/Subtract */ 1817 1818/* OPIVV with WIDEN */ 1819static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) 1820{ 1821 return require_rvv(s) && 1822 vext_check_isa_ill(s) && 1823 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); 1824} 1825 1826/* OPIVV with overwrite and WIDEN */ 1827static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 1828{ 1829 return require_rvv(s) && 1830 vext_check_isa_ill(s) && 1831 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && 1832 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && 1833 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 1834} 1835 1836static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, 1837 gen_helper_gvec_4_ptr *fn, 1838 bool (*checkfn)(DisasContext *, arg_rmrr *)) 1839{ 1840 if (checkfn(s, a)) { 1841 uint32_t data = 0; 1842 1843 data = FIELD_DP32(data, VDATA, VM, a->vm); 1844 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1845 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1846 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1847 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 1848 vreg_ofs(s, a->rs1), 1849 vreg_ofs(s, a->rs2), 1850 tcg_env, s->cfg_ptr->vlenb, 1851 s->cfg_ptr->vlenb, 1852 data, fn); 1853 finalize_rvv_inst(s); 1854 return true; 1855 } 1856 return false; 1857} 1858 1859#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \ 1860static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1861{ \ 1862 static gen_helper_gvec_4_ptr * const fns[3] = { \ 1863 gen_helper_##NAME##_b, \ 1864 gen_helper_##NAME##_h, \ 1865 gen_helper_##NAME##_w \ 1866 }; \ 1867 return do_opivv_widen(s, a, fns[s->sew], CHECK); \ 1868} 1869 1870GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check) 1871GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check) 1872GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check) 1873GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check) 1874 1875/* OPIVX with WIDEN */ 1876static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) 1877{ 1878 return require_rvv(s) && 1879 vext_check_isa_ill(s) && 1880 vext_check_ds(s, a->rd, a->rs2, a->vm); 1881} 1882 1883static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 1884{ 1885 return require_rvv(s) && 1886 vext_check_isa_ill(s) && 1887 vext_check_ds(s, a->rd, a->rs2, a->vm) && 1888 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 1889} 1890 1891#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ 1892static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1893{ \ 1894 if (CHECK(s, a)) { \ 1895 static gen_helper_opivx * const fns[3] = { \ 1896 gen_helper_##NAME##_b, \ 1897 gen_helper_##NAME##_h, \ 1898 gen_helper_##NAME##_w \ 1899 }; \ 1900 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \ 1901 } \ 1902 return false; \ 1903} 1904 1905GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check) 1906GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check) 1907GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check) 1908GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check) 1909 1910/* WIDEN OPIVV with WIDEN */ 1911static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) 1912{ 1913 return require_rvv(s) && 1914 vext_check_isa_ill(s) && 1915 vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm); 1916} 1917 1918static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, 1919 gen_helper_gvec_4_ptr *fn) 1920{ 1921 if (opiwv_widen_check(s, a)) { 1922 uint32_t data = 0; 1923 1924 data = FIELD_DP32(data, VDATA, VM, a->vm); 1925 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1926 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1927 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1928 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 1929 vreg_ofs(s, a->rs1), 1930 vreg_ofs(s, a->rs2), 1931 tcg_env, s->cfg_ptr->vlenb, 1932 s->cfg_ptr->vlenb, data, fn); 1933 finalize_rvv_inst(s); 1934 return true; 1935 } 1936 return false; 1937} 1938 1939#define GEN_OPIWV_WIDEN_TRANS(NAME) \ 1940static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1941{ \ 1942 static gen_helper_gvec_4_ptr * const fns[3] = { \ 1943 gen_helper_##NAME##_b, \ 1944 gen_helper_##NAME##_h, \ 1945 gen_helper_##NAME##_w \ 1946 }; \ 1947 return do_opiwv_widen(s, a, fns[s->sew]); \ 1948} 1949 1950GEN_OPIWV_WIDEN_TRANS(vwaddu_wv) 1951GEN_OPIWV_WIDEN_TRANS(vwadd_wv) 1952GEN_OPIWV_WIDEN_TRANS(vwsubu_wv) 1953GEN_OPIWV_WIDEN_TRANS(vwsub_wv) 1954 1955/* WIDEN OPIVX with WIDEN */ 1956static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a) 1957{ 1958 return require_rvv(s) && 1959 vext_check_isa_ill(s) && 1960 vext_check_dd(s, a->rd, a->rs2, a->vm); 1961} 1962 1963static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a, 1964 gen_helper_opivx *fn) 1965{ 1966 if (opiwx_widen_check(s, a)) { 1967 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); 1968 } 1969 return false; 1970} 1971 1972#define GEN_OPIWX_WIDEN_TRANS(NAME) \ 1973static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1974{ \ 1975 static gen_helper_opivx * const fns[3] = { \ 1976 gen_helper_##NAME##_b, \ 1977 gen_helper_##NAME##_h, \ 1978 gen_helper_##NAME##_w \ 1979 }; \ 1980 return do_opiwx_widen(s, a, fns[s->sew]); \ 1981} 1982 1983GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) 1984GEN_OPIWX_WIDEN_TRANS(vwadd_wx) 1985GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) 1986GEN_OPIWX_WIDEN_TRANS(vwsub_wx) 1987 1988static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, 1989 gen_helper_gvec_4_ptr *fn, DisasContext *s) 1990{ 1991 uint32_t data = 0; 1992 1993 data = FIELD_DP32(data, VDATA, VM, vm); 1994 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1995 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1996 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); 1997 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1998 tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1), 1999 vreg_ofs(s, vs2), tcg_env, s->cfg_ptr->vlenb, 2000 s->cfg_ptr->vlenb, data, fn); 2001 finalize_rvv_inst(s); 2002 return true; 2003} 2004 2005/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 2006/* OPIVV without GVEC IR */ 2007#define GEN_OPIVV_TRANS(NAME, CHECK) \ 2008static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2009{ \ 2010 if (CHECK(s, a)) { \ 2011 static gen_helper_gvec_4_ptr * const fns[4] = { \ 2012 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 2013 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 2014 }; \ 2015 return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ 2016 } \ 2017 return false; \ 2018} 2019 2020/* 2021 * For vadc and vsbc, an illegal instruction exception is raised if the 2022 * destination vector register is v0 and LMUL > 1. (Section 11.4) 2023 */ 2024static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a) 2025{ 2026 return require_rvv(s) && 2027 vext_check_isa_ill(s) && 2028 (a->rd != 0) && 2029 vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); 2030} 2031 2032GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check) 2033GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check) 2034 2035/* 2036 * For vmadc and vmsbc, an illegal instruction exception is raised if the 2037 * destination vector register overlaps a source vector register group. 2038 */ 2039static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a) 2040{ 2041 return require_rvv(s) && 2042 vext_check_isa_ill(s) && 2043 vext_check_mss(s, a->rd, a->rs1, a->rs2); 2044} 2045 2046GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check) 2047GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check) 2048 2049static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a) 2050{ 2051 return require_rvv(s) && 2052 vext_check_isa_ill(s) && 2053 (a->rd != 0) && 2054 vext_check_ss(s, a->rd, a->rs2, a->vm); 2055} 2056 2057/* OPIVX without GVEC IR */ 2058#define GEN_OPIVX_TRANS(NAME, CHECK) \ 2059static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2060{ \ 2061 if (CHECK(s, a)) { \ 2062 static gen_helper_opivx * const fns[4] = { \ 2063 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 2064 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 2065 }; \ 2066 \ 2067 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ 2068 } \ 2069 return false; \ 2070} 2071 2072GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check) 2073GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check) 2074 2075static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a) 2076{ 2077 return require_rvv(s) && 2078 vext_check_isa_ill(s) && 2079 vext_check_ms(s, a->rd, a->rs2); 2080} 2081 2082GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check) 2083GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check) 2084 2085/* OPIVI without GVEC IR */ 2086#define GEN_OPIVI_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \ 2087static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2088{ \ 2089 if (CHECK(s, a)) { \ 2090 static gen_helper_opivx * const fns[4] = { \ 2091 gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ 2092 gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ 2093 }; \ 2094 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ 2095 fns[s->sew], s, IMM_MODE); \ 2096 } \ 2097 return false; \ 2098} 2099 2100GEN_OPIVI_TRANS(vadc_vim, IMM_SX, vadc_vxm, opivx_vadc_check) 2101GEN_OPIVI_TRANS(vmadc_vim, IMM_SX, vmadc_vxm, opivx_vmadc_check) 2102 2103/* Vector Bitwise Logical Instructions */ 2104GEN_OPIVV_GVEC_TRANS(vand_vv, and) 2105GEN_OPIVV_GVEC_TRANS(vor_vv, or) 2106GEN_OPIVV_GVEC_TRANS(vxor_vv, xor) 2107GEN_OPIVX_GVEC_TRANS(vand_vx, ands) 2108GEN_OPIVX_GVEC_TRANS(vor_vx, ors) 2109GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) 2110GEN_OPIVI_GVEC_TRANS(vand_vi, IMM_SX, vand_vx, andi) 2111GEN_OPIVI_GVEC_TRANS(vor_vi, IMM_SX, vor_vx, ori) 2112GEN_OPIVI_GVEC_TRANS(vxor_vi, IMM_SX, vxor_vx, xori) 2113 2114/* Vector Single-Width Bit Shift Instructions */ 2115GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv) 2116GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv) 2117GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv) 2118 2119typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32, 2120 uint32_t, uint32_t); 2121 2122static inline bool 2123do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, 2124 gen_helper_opivx *fn) 2125{ 2126 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2127 TCGv_i32 src1 = tcg_temp_new_i32(); 2128 2129 tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE)); 2130 tcg_gen_extract_i32(src1, src1, 0, s->sew + 3); 2131 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), 2132 src1, MAXSZ(s), MAXSZ(s)); 2133 2134 finalize_rvv_inst(s); 2135 return true; 2136 } 2137 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); 2138} 2139 2140#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \ 2141static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2142{ \ 2143 static gen_helper_opivx * const fns[4] = { \ 2144 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 2145 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 2146 }; \ 2147 if (!opivx_check(s, a)) { \ 2148 return false; \ 2149 } \ 2150 return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ 2151} 2152 2153GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls) 2154GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs) 2155GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) 2156 2157GEN_OPIVI_GVEC_TRANS(vsll_vi, IMM_TRUNC_SEW, vsll_vx, shli) 2158GEN_OPIVI_GVEC_TRANS(vsrl_vi, IMM_TRUNC_SEW, vsrl_vx, shri) 2159GEN_OPIVI_GVEC_TRANS(vsra_vi, IMM_TRUNC_SEW, vsra_vx, sari) 2160 2161/* Vector Narrowing Integer Right Shift Instructions */ 2162static bool opiwv_narrow_check(DisasContext *s, arg_rmrr *a) 2163{ 2164 return require_rvv(s) && 2165 vext_check_isa_ill(s) && 2166 vext_check_sds(s, a->rd, a->rs1, a->rs2, a->vm); 2167} 2168 2169/* OPIVV with NARROW */ 2170#define GEN_OPIWV_NARROW_TRANS(NAME) \ 2171static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2172{ \ 2173 if (opiwv_narrow_check(s, a)) { \ 2174 uint32_t data = 0; \ 2175 static gen_helper_gvec_4_ptr * const fns[3] = { \ 2176 gen_helper_##NAME##_b, \ 2177 gen_helper_##NAME##_h, \ 2178 gen_helper_##NAME##_w, \ 2179 }; \ 2180 \ 2181 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2182 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2183 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2184 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2185 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2186 vreg_ofs(s, a->rs1), \ 2187 vreg_ofs(s, a->rs2), tcg_env, \ 2188 s->cfg_ptr->vlenb, \ 2189 s->cfg_ptr->vlenb, data, \ 2190 fns[s->sew]); \ 2191 finalize_rvv_inst(s); \ 2192 return true; \ 2193 } \ 2194 return false; \ 2195} 2196GEN_OPIWV_NARROW_TRANS(vnsra_wv) 2197GEN_OPIWV_NARROW_TRANS(vnsrl_wv) 2198 2199static bool opiwx_narrow_check(DisasContext *s, arg_rmrr *a) 2200{ 2201 return require_rvv(s) && 2202 vext_check_isa_ill(s) && 2203 vext_check_sd(s, a->rd, a->rs2, a->vm); 2204} 2205 2206/* OPIVX with NARROW */ 2207#define GEN_OPIWX_NARROW_TRANS(NAME) \ 2208static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2209{ \ 2210 if (opiwx_narrow_check(s, a)) { \ 2211 static gen_helper_opivx * const fns[3] = { \ 2212 gen_helper_##NAME##_b, \ 2213 gen_helper_##NAME##_h, \ 2214 gen_helper_##NAME##_w, \ 2215 }; \ 2216 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ 2217 } \ 2218 return false; \ 2219} 2220 2221GEN_OPIWX_NARROW_TRANS(vnsra_wx) 2222GEN_OPIWX_NARROW_TRANS(vnsrl_wx) 2223 2224/* OPIWI with NARROW */ 2225#define GEN_OPIWI_NARROW_TRANS(NAME, IMM_MODE, OPIVX) \ 2226static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2227{ \ 2228 if (opiwx_narrow_check(s, a)) { \ 2229 static gen_helper_opivx * const fns[3] = { \ 2230 gen_helper_##OPIVX##_b, \ 2231 gen_helper_##OPIVX##_h, \ 2232 gen_helper_##OPIVX##_w, \ 2233 }; \ 2234 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ 2235 fns[s->sew], s, IMM_MODE); \ 2236 } \ 2237 return false; \ 2238} 2239 2240GEN_OPIWI_NARROW_TRANS(vnsra_wi, IMM_ZX, vnsra_wx) 2241GEN_OPIWI_NARROW_TRANS(vnsrl_wi, IMM_ZX, vnsrl_wx) 2242 2243/* Vector Integer Comparison Instructions */ 2244/* 2245 * For all comparison instructions, an illegal instruction exception is raised 2246 * if the destination vector register overlaps a source vector register group 2247 * and LMUL > 1. 2248 */ 2249static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a) 2250{ 2251 return require_rvv(s) && 2252 vext_check_isa_ill(s) && 2253 vext_check_mss(s, a->rd, a->rs1, a->rs2); 2254} 2255 2256GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check) 2257GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check) 2258GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check) 2259GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check) 2260GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check) 2261GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check) 2262 2263static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a) 2264{ 2265 return require_rvv(s) && 2266 vext_check_isa_ill(s) && 2267 vext_check_ms(s, a->rd, a->rs2); 2268} 2269 2270GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check) 2271GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check) 2272GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check) 2273GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check) 2274GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check) 2275GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check) 2276GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check) 2277GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check) 2278 2279GEN_OPIVI_TRANS(vmseq_vi, IMM_SX, vmseq_vx, opivx_cmp_check) 2280GEN_OPIVI_TRANS(vmsne_vi, IMM_SX, vmsne_vx, opivx_cmp_check) 2281GEN_OPIVI_TRANS(vmsleu_vi, IMM_SX, vmsleu_vx, opivx_cmp_check) 2282GEN_OPIVI_TRANS(vmsle_vi, IMM_SX, vmsle_vx, opivx_cmp_check) 2283GEN_OPIVI_TRANS(vmsgtu_vi, IMM_SX, vmsgtu_vx, opivx_cmp_check) 2284GEN_OPIVI_TRANS(vmsgt_vi, IMM_SX, vmsgt_vx, opivx_cmp_check) 2285 2286/* Vector Integer Min/Max Instructions */ 2287GEN_OPIVV_GVEC_TRANS(vminu_vv, umin) 2288GEN_OPIVV_GVEC_TRANS(vmin_vv, smin) 2289GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax) 2290GEN_OPIVV_GVEC_TRANS(vmax_vv, smax) 2291GEN_OPIVX_TRANS(vminu_vx, opivx_check) 2292GEN_OPIVX_TRANS(vmin_vx, opivx_check) 2293GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) 2294GEN_OPIVX_TRANS(vmax_vx, opivx_check) 2295 2296/* Vector Single-Width Integer Multiply Instructions */ 2297 2298static bool vmulh_vv_check(DisasContext *s, arg_rmrr *a) 2299{ 2300 /* 2301 * All Zve* extensions support all vector integer instructions, 2302 * except that the vmulh integer multiply variants 2303 * that return the high word of the product 2304 * (vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx) 2305 * are not included for EEW=64 in Zve64*. (Section 18.2) 2306 */ 2307 return opivv_check(s, a) && 2308 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2309} 2310 2311static bool vmulh_vx_check(DisasContext *s, arg_rmrr *a) 2312{ 2313 /* 2314 * All Zve* extensions support all vector integer instructions, 2315 * except that the vmulh integer multiply variants 2316 * that return the high word of the product 2317 * (vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx) 2318 * are not included for EEW=64 in Zve64*. (Section 18.2) 2319 */ 2320 return opivx_check(s, a) && 2321 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2322} 2323 2324GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) 2325GEN_OPIVV_TRANS(vmulh_vv, vmulh_vv_check) 2326GEN_OPIVV_TRANS(vmulhu_vv, vmulh_vv_check) 2327GEN_OPIVV_TRANS(vmulhsu_vv, vmulh_vv_check) 2328GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) 2329GEN_OPIVX_TRANS(vmulh_vx, vmulh_vx_check) 2330GEN_OPIVX_TRANS(vmulhu_vx, vmulh_vx_check) 2331GEN_OPIVX_TRANS(vmulhsu_vx, vmulh_vx_check) 2332 2333/* Vector Integer Divide Instructions */ 2334GEN_OPIVV_TRANS(vdivu_vv, opivv_check) 2335GEN_OPIVV_TRANS(vdiv_vv, opivv_check) 2336GEN_OPIVV_TRANS(vremu_vv, opivv_check) 2337GEN_OPIVV_TRANS(vrem_vv, opivv_check) 2338GEN_OPIVX_TRANS(vdivu_vx, opivx_check) 2339GEN_OPIVX_TRANS(vdiv_vx, opivx_check) 2340GEN_OPIVX_TRANS(vremu_vx, opivx_check) 2341GEN_OPIVX_TRANS(vrem_vx, opivx_check) 2342 2343/* Vector Widening Integer Multiply Instructions */ 2344GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) 2345GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) 2346GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) 2347GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check) 2348GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check) 2349GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check) 2350 2351/* Vector Single-Width Integer Multiply-Add Instructions */ 2352GEN_OPIVV_TRANS(vmacc_vv, opivv_check) 2353GEN_OPIVV_TRANS(vnmsac_vv, opivv_check) 2354GEN_OPIVV_TRANS(vmadd_vv, opivv_check) 2355GEN_OPIVV_TRANS(vnmsub_vv, opivv_check) 2356GEN_OPIVX_TRANS(vmacc_vx, opivx_check) 2357GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) 2358GEN_OPIVX_TRANS(vmadd_vx, opivx_check) 2359GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) 2360 2361/* Vector Widening Integer Multiply-Add Instructions */ 2362GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check) 2363GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check) 2364GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check) 2365GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check) 2366GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check) 2367GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check) 2368GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check) 2369 2370/* Vector Integer Merge and Move Instructions */ 2371static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) 2372{ 2373 if (require_rvv(s) && 2374 vext_check_isa_ill(s) && 2375 /* vmv.v.v has rs2 = 0 and vm = 1 */ 2376 vext_check_sss(s, a->rd, a->rs1, 0, 1)) { 2377 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2378 tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), 2379 vreg_ofs(s, a->rs1), 2380 MAXSZ(s), MAXSZ(s)); 2381 } else { 2382 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 2383 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2384 static gen_helper_gvec_2_ptr * const fns[4] = { 2385 gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, 2386 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, 2387 }; 2388 2389 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), 2390 tcg_env, s->cfg_ptr->vlenb, 2391 s->cfg_ptr->vlenb, data, 2392 fns[s->sew]); 2393 } 2394 finalize_rvv_inst(s); 2395 return true; 2396 } 2397 return false; 2398} 2399 2400typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32); 2401static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) 2402{ 2403 if (require_rvv(s) && 2404 vext_check_isa_ill(s) && 2405 /* vmv.v.x has rs2 = 0 and vm = 1 */ 2406 vext_check_ss(s, a->rd, 0, 1)) { 2407 TCGv s1; 2408 2409 s1 = get_gpr(s, a->rs1, EXT_SIGN); 2410 2411 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2412 if (get_xl(s) == MXL_RV32 && s->sew == MO_64) { 2413 TCGv_i64 s1_i64 = tcg_temp_new_i64(); 2414 tcg_gen_ext_tl_i64(s1_i64, s1); 2415 tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), 2416 MAXSZ(s), MAXSZ(s), s1_i64); 2417 } else { 2418 tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), 2419 MAXSZ(s), MAXSZ(s), s1); 2420 } 2421 } else { 2422 TCGv_i32 desc; 2423 TCGv_i64 s1_i64 = tcg_temp_new_i64(); 2424 TCGv_ptr dest = tcg_temp_new_ptr(); 2425 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 2426 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2427 static gen_helper_vmv_vx * const fns[4] = { 2428 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, 2429 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, 2430 }; 2431 2432 tcg_gen_ext_tl_i64(s1_i64, s1); 2433 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 2434 s->cfg_ptr->vlenb, data)); 2435 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); 2436 fns[s->sew](dest, s1_i64, tcg_env, desc); 2437 } 2438 2439 finalize_rvv_inst(s); 2440 return true; 2441 } 2442 return false; 2443} 2444 2445static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) 2446{ 2447 if (require_rvv(s) && 2448 vext_check_isa_ill(s) && 2449 /* vmv.v.i has rs2 = 0 and vm = 1 */ 2450 vext_check_ss(s, a->rd, 0, 1)) { 2451 int64_t simm = sextract64(a->rs1, 0, 5); 2452 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2453 tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), 2454 MAXSZ(s), MAXSZ(s), simm); 2455 } else { 2456 TCGv_i32 desc; 2457 TCGv_i64 s1; 2458 TCGv_ptr dest; 2459 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 2460 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2461 static gen_helper_vmv_vx * const fns[4] = { 2462 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, 2463 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, 2464 }; 2465 2466 s1 = tcg_constant_i64(simm); 2467 dest = tcg_temp_new_ptr(); 2468 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 2469 s->cfg_ptr->vlenb, data)); 2470 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); 2471 fns[s->sew](dest, s1, tcg_env, desc); 2472 } 2473 finalize_rvv_inst(s); 2474 return true; 2475 } 2476 return false; 2477} 2478 2479GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) 2480GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) 2481GEN_OPIVI_TRANS(vmerge_vim, IMM_SX, vmerge_vxm, opivx_vadc_check) 2482 2483/* 2484 *** Vector Fixed-Point Arithmetic Instructions 2485 */ 2486 2487/* Vector Single-Width Saturating Add and Subtract */ 2488GEN_OPIVV_TRANS(vsaddu_vv, opivv_check) 2489GEN_OPIVV_TRANS(vsadd_vv, opivv_check) 2490GEN_OPIVV_TRANS(vssubu_vv, opivv_check) 2491GEN_OPIVV_TRANS(vssub_vv, opivv_check) 2492GEN_OPIVX_TRANS(vsaddu_vx, opivx_check) 2493GEN_OPIVX_TRANS(vsadd_vx, opivx_check) 2494GEN_OPIVX_TRANS(vssubu_vx, opivx_check) 2495GEN_OPIVX_TRANS(vssub_vx, opivx_check) 2496GEN_OPIVI_TRANS(vsaddu_vi, IMM_SX, vsaddu_vx, opivx_check) 2497GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check) 2498 2499/* Vector Single-Width Averaging Add and Subtract */ 2500GEN_OPIVV_TRANS(vaadd_vv, opivv_check) 2501GEN_OPIVV_TRANS(vaaddu_vv, opivv_check) 2502GEN_OPIVV_TRANS(vasub_vv, opivv_check) 2503GEN_OPIVV_TRANS(vasubu_vv, opivv_check) 2504GEN_OPIVX_TRANS(vaadd_vx, opivx_check) 2505GEN_OPIVX_TRANS(vaaddu_vx, opivx_check) 2506GEN_OPIVX_TRANS(vasub_vx, opivx_check) 2507GEN_OPIVX_TRANS(vasubu_vx, opivx_check) 2508 2509/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2510 2511static bool vsmul_vv_check(DisasContext *s, arg_rmrr *a) 2512{ 2513 /* 2514 * All Zve* extensions support all vector fixed-point arithmetic 2515 * instructions, except that vsmul.vv and vsmul.vx are not supported 2516 * for EEW=64 in Zve64*. (Section 18.2) 2517 */ 2518 return opivv_check(s, a) && 2519 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2520} 2521 2522static bool vsmul_vx_check(DisasContext *s, arg_rmrr *a) 2523{ 2524 /* 2525 * All Zve* extensions support all vector fixed-point arithmetic 2526 * instructions, except that vsmul.vv and vsmul.vx are not supported 2527 * for EEW=64 in Zve64*. (Section 18.2) 2528 */ 2529 return opivx_check(s, a) && 2530 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2531} 2532 2533GEN_OPIVV_TRANS(vsmul_vv, vsmul_vv_check) 2534GEN_OPIVX_TRANS(vsmul_vx, vsmul_vx_check) 2535 2536/* Vector Single-Width Scaling Shift Instructions */ 2537GEN_OPIVV_TRANS(vssrl_vv, opivv_check) 2538GEN_OPIVV_TRANS(vssra_vv, opivv_check) 2539GEN_OPIVX_TRANS(vssrl_vx, opivx_check) 2540GEN_OPIVX_TRANS(vssra_vx, opivx_check) 2541GEN_OPIVI_TRANS(vssrl_vi, IMM_TRUNC_SEW, vssrl_vx, opivx_check) 2542GEN_OPIVI_TRANS(vssra_vi, IMM_TRUNC_SEW, vssra_vx, opivx_check) 2543 2544/* Vector Narrowing Fixed-Point Clip Instructions */ 2545GEN_OPIWV_NARROW_TRANS(vnclipu_wv) 2546GEN_OPIWV_NARROW_TRANS(vnclip_wv) 2547GEN_OPIWX_NARROW_TRANS(vnclipu_wx) 2548GEN_OPIWX_NARROW_TRANS(vnclip_wx) 2549GEN_OPIWI_NARROW_TRANS(vnclipu_wi, IMM_ZX, vnclipu_wx) 2550GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx) 2551 2552/* 2553 *** Vector Float Point Arithmetic Instructions 2554 */ 2555 2556/* 2557 * As RVF-only cpus always have values NaN-boxed to 64-bits, 2558 * RVF and RVD can be treated equally. 2559 * We don't have to deal with the cases of: SEW > FLEN. 2560 * 2561 * If SEW < FLEN, check whether input fp register is a valid 2562 * NaN-boxed value, in which case the least-significant SEW bits 2563 * of the f register are used, else the canonical NaN value is used. 2564 */ 2565static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in) 2566{ 2567 switch (s->sew) { 2568 case 1: 2569 gen_check_nanbox_h(out, in); 2570 break; 2571 case 2: 2572 gen_check_nanbox_s(out, in); 2573 break; 2574 case 3: 2575 tcg_gen_mov_i64(out, in); 2576 break; 2577 default: 2578 g_assert_not_reached(); 2579 } 2580} 2581 2582/* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2583 2584/* 2585 * If the current SEW does not correspond to a supported IEEE floating-point 2586 * type, an illegal instruction exception is raised. 2587 */ 2588static bool opfvv_check(DisasContext *s, arg_rmrr *a) 2589{ 2590 return require_rvv(s) && 2591 require_rvf(s) && 2592 vext_check_isa_ill(s) && 2593 vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); 2594} 2595 2596/* OPFVV without GVEC IR */ 2597#define GEN_OPFVV_TRANS(NAME, CHECK) \ 2598static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2599{ \ 2600 if (CHECK(s, a)) { \ 2601 uint32_t data = 0; \ 2602 static gen_helper_gvec_4_ptr * const fns[3] = { \ 2603 gen_helper_##NAME##_h, \ 2604 gen_helper_##NAME##_w, \ 2605 gen_helper_##NAME##_d, \ 2606 }; \ 2607 gen_set_rm(s, RISCV_FRM_DYN); \ 2608 \ 2609 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2610 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2611 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2612 data = \ 2613 FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ 2614 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2615 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2616 vreg_ofs(s, a->rs1), \ 2617 vreg_ofs(s, a->rs2), tcg_env, \ 2618 s->cfg_ptr->vlenb, \ 2619 s->cfg_ptr->vlenb, data, \ 2620 fns[s->sew - 1]); \ 2621 finalize_rvv_inst(s); \ 2622 return true; \ 2623 } \ 2624 return false; \ 2625} 2626GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) 2627GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) 2628 2629typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, 2630 TCGv_env, TCGv_i32); 2631 2632static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, 2633 uint32_t data, gen_helper_opfvf *fn, DisasContext *s) 2634{ 2635 TCGv_ptr dest, src2, mask; 2636 TCGv_i32 desc; 2637 TCGv_i64 t1; 2638 2639 dest = tcg_temp_new_ptr(); 2640 mask = tcg_temp_new_ptr(); 2641 src2 = tcg_temp_new_ptr(); 2642 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 2643 s->cfg_ptr->vlenb, data)); 2644 2645 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 2646 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, vs2)); 2647 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 2648 2649 /* NaN-box f[rs1] */ 2650 t1 = tcg_temp_new_i64(); 2651 do_nanbox(s, t1, cpu_fpr[rs1]); 2652 2653 fn(dest, mask, t1, src2, tcg_env, desc); 2654 2655 finalize_rvv_inst(s); 2656 return true; 2657} 2658 2659/* 2660 * If the current SEW does not correspond to a supported IEEE floating-point 2661 * type, an illegal instruction exception is raised 2662 */ 2663static bool opfvf_check(DisasContext *s, arg_rmrr *a) 2664{ 2665 return require_rvv(s) && 2666 require_rvf(s) && 2667 vext_check_isa_ill(s) && 2668 vext_check_ss(s, a->rd, a->rs2, a->vm); 2669} 2670 2671/* OPFVF without GVEC IR */ 2672#define GEN_OPFVF_TRANS(NAME, CHECK) \ 2673static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2674{ \ 2675 if (CHECK(s, a)) { \ 2676 uint32_t data = 0; \ 2677 static gen_helper_opfvf *const fns[3] = { \ 2678 gen_helper_##NAME##_h, \ 2679 gen_helper_##NAME##_w, \ 2680 gen_helper_##NAME##_d, \ 2681 }; \ 2682 gen_set_rm(s, RISCV_FRM_DYN); \ 2683 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2684 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2685 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2686 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ 2687 s->cfg_vta_all_1s); \ 2688 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2689 return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ 2690 fns[s->sew - 1], s); \ 2691 } \ 2692 return false; \ 2693} 2694 2695GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) 2696GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) 2697GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) 2698 2699/* Vector Widening Floating-Point Add/Subtract Instructions */ 2700static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) 2701{ 2702 return require_rvv(s) && 2703 require_rvf(s) && 2704 require_scale_rvf(s) && 2705 vext_check_isa_ill(s) && 2706 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); 2707} 2708 2709static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 2710{ 2711 return require_rvv(s) && 2712 require_rvf(s) && 2713 require_scale_rvf(s) && 2714 vext_check_isa_ill(s) && 2715 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && 2716 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && 2717 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 2718} 2719 2720/* OPFVV with WIDEN */ 2721#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ 2722static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2723{ \ 2724 if (CHECK(s, a)) { \ 2725 uint32_t data = 0; \ 2726 static gen_helper_gvec_4_ptr * const fns[2] = { \ 2727 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2728 }; \ 2729 gen_set_rm(s, RISCV_FRM_DYN); \ 2730 \ 2731 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2732 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2733 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2734 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2735 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2736 vreg_ofs(s, a->rs1), \ 2737 vreg_ofs(s, a->rs2), tcg_env, \ 2738 s->cfg_ptr->vlenb, \ 2739 s->cfg_ptr->vlenb, data, \ 2740 fns[s->sew - 1]); \ 2741 finalize_rvv_inst(s); \ 2742 return true; \ 2743 } \ 2744 return false; \ 2745} 2746 2747GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) 2748GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) 2749 2750static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) 2751{ 2752 return require_rvv(s) && 2753 require_rvf(s) && 2754 require_scale_rvf(s) && 2755 vext_check_isa_ill(s) && 2756 vext_check_ds(s, a->rd, a->rs2, a->vm); 2757} 2758 2759static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 2760{ 2761 return require_rvv(s) && 2762 require_rvf(s) && 2763 require_scale_rvf(s) && 2764 vext_check_isa_ill(s) && 2765 vext_check_ds(s, a->rd, a->rs2, a->vm) && 2766 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 2767} 2768 2769/* OPFVF with WIDEN */ 2770#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ 2771static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2772{ \ 2773 if (CHECK(s, a)) { \ 2774 uint32_t data = 0; \ 2775 static gen_helper_opfvf *const fns[2] = { \ 2776 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2777 }; \ 2778 gen_set_rm(s, RISCV_FRM_DYN); \ 2779 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2780 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2781 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2782 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2783 return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ 2784 fns[s->sew - 1], s); \ 2785 } \ 2786 return false; \ 2787} 2788 2789GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) 2790GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) 2791 2792static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) 2793{ 2794 return require_rvv(s) && 2795 require_rvf(s) && 2796 require_scale_rvf(s) && 2797 vext_check_isa_ill(s) && 2798 vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm); 2799} 2800 2801/* WIDEN OPFVV with WIDEN */ 2802#define GEN_OPFWV_WIDEN_TRANS(NAME) \ 2803static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2804{ \ 2805 if (opfwv_widen_check(s, a)) { \ 2806 uint32_t data = 0; \ 2807 static gen_helper_gvec_4_ptr * const fns[2] = { \ 2808 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2809 }; \ 2810 gen_set_rm(s, RISCV_FRM_DYN); \ 2811 \ 2812 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2813 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2814 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2815 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2816 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2817 vreg_ofs(s, a->rs1), \ 2818 vreg_ofs(s, a->rs2), tcg_env, \ 2819 s->cfg_ptr->vlenb, \ 2820 s->cfg_ptr->vlenb, data, \ 2821 fns[s->sew - 1]); \ 2822 finalize_rvv_inst(s); \ 2823 return true; \ 2824 } \ 2825 return false; \ 2826} 2827 2828GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) 2829GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) 2830 2831static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) 2832{ 2833 return require_rvv(s) && 2834 require_rvf(s) && 2835 require_scale_rvf(s) && 2836 vext_check_isa_ill(s) && 2837 vext_check_dd(s, a->rd, a->rs2, a->vm); 2838} 2839 2840/* WIDEN OPFVF with WIDEN */ 2841#define GEN_OPFWF_WIDEN_TRANS(NAME) \ 2842static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2843{ \ 2844 if (opfwf_widen_check(s, a)) { \ 2845 uint32_t data = 0; \ 2846 static gen_helper_opfvf *const fns[2] = { \ 2847 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2848 }; \ 2849 gen_set_rm(s, RISCV_FRM_DYN); \ 2850 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2851 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2852 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2853 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2854 return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ 2855 fns[s->sew - 1], s); \ 2856 } \ 2857 return false; \ 2858} 2859 2860GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) 2861GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) 2862 2863/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2864GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) 2865GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) 2866GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) 2867GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) 2868GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) 2869 2870/* Vector Widening Floating-Point Multiply */ 2871GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) 2872GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) 2873 2874/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 2875GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) 2876GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) 2877GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) 2878GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) 2879GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) 2880GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) 2881GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) 2882GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) 2883GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) 2884GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) 2885GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) 2886GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) 2887GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) 2888GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) 2889GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) 2890GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) 2891 2892/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 2893GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check) 2894GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check) 2895GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check) 2896GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check) 2897GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check) 2898GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check) 2899GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check) 2900GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) 2901 2902/* Vector Floating-Point Square-Root Instruction */ 2903 2904/* 2905 * If the current SEW does not correspond to a supported IEEE floating-point 2906 * type, an illegal instruction exception is raised 2907 */ 2908static bool opfv_check(DisasContext *s, arg_rmr *a) 2909{ 2910 return require_rvv(s) && 2911 require_rvf(s) && 2912 vext_check_isa_ill(s) && 2913 /* OPFV instructions ignore vs1 check */ 2914 vext_check_ss(s, a->rd, a->rs2, a->vm); 2915} 2916 2917static bool do_opfv(DisasContext *s, arg_rmr *a, 2918 gen_helper_gvec_3_ptr *fn, 2919 bool (*checkfn)(DisasContext *, arg_rmr *), 2920 int rm) 2921{ 2922 if (checkfn(s, a)) { 2923 uint32_t data = 0; 2924 gen_set_rm_chkfrm(s, rm); 2925 2926 data = FIELD_DP32(data, VDATA, VM, a->vm); 2927 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 2928 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2929 data = FIELD_DP32(data, VDATA, VMA, s->vma); 2930 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 2931 vreg_ofs(s, a->rs2), tcg_env, 2932 s->cfg_ptr->vlenb, 2933 s->cfg_ptr->vlenb, data, fn); 2934 finalize_rvv_inst(s); 2935 return true; 2936 } 2937 return false; 2938} 2939 2940#define GEN_OPFV_TRANS(NAME, CHECK, FRM) \ 2941static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 2942{ \ 2943 static gen_helper_gvec_3_ptr * const fns[3] = { \ 2944 gen_helper_##NAME##_h, \ 2945 gen_helper_##NAME##_w, \ 2946 gen_helper_##NAME##_d \ 2947 }; \ 2948 return do_opfv(s, a, fns[s->sew - 1], CHECK, FRM); \ 2949} 2950 2951GEN_OPFV_TRANS(vfsqrt_v, opfv_check, RISCV_FRM_DYN) 2952GEN_OPFV_TRANS(vfrsqrt7_v, opfv_check, RISCV_FRM_DYN) 2953GEN_OPFV_TRANS(vfrec7_v, opfv_check, RISCV_FRM_DYN) 2954 2955/* Vector Floating-Point MIN/MAX Instructions */ 2956GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) 2957GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) 2958GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) 2959GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) 2960 2961/* Vector Floating-Point Sign-Injection Instructions */ 2962GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) 2963GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) 2964GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) 2965GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) 2966GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) 2967GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) 2968 2969/* Vector Floating-Point Compare Instructions */ 2970static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) 2971{ 2972 return require_rvv(s) && 2973 require_rvf(s) && 2974 vext_check_isa_ill(s) && 2975 vext_check_mss(s, a->rd, a->rs1, a->rs2); 2976} 2977 2978GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) 2979GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) 2980GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) 2981GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) 2982 2983static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) 2984{ 2985 return require_rvv(s) && 2986 require_rvf(s) && 2987 vext_check_isa_ill(s) && 2988 vext_check_ms(s, a->rd, a->rs2); 2989} 2990 2991GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) 2992GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) 2993GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) 2994GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) 2995GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) 2996GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) 2997 2998/* Vector Floating-Point Classify Instruction */ 2999GEN_OPFV_TRANS(vfclass_v, opfv_check, RISCV_FRM_DYN) 3000 3001/* Vector Floating-Point Merge Instruction */ 3002GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) 3003 3004static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) 3005{ 3006 if (require_rvv(s) && 3007 require_rvf(s) && 3008 vext_check_isa_ill(s) && 3009 require_align(a->rd, s->lmul)) { 3010 gen_set_rm(s, RISCV_FRM_DYN); 3011 3012 TCGv_i64 t1; 3013 3014 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 3015 t1 = tcg_temp_new_i64(); 3016 /* NaN-box f[rs1] */ 3017 do_nanbox(s, t1, cpu_fpr[a->rs1]); 3018 3019 tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), 3020 MAXSZ(s), MAXSZ(s), t1); 3021 } else { 3022 TCGv_ptr dest; 3023 TCGv_i32 desc; 3024 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 3025 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3026 data = FIELD_DP32(data, VDATA, VMA, s->vma); 3027 static gen_helper_vmv_vx * const fns[3] = { 3028 gen_helper_vmv_v_x_h, 3029 gen_helper_vmv_v_x_w, 3030 gen_helper_vmv_v_x_d, 3031 }; 3032 3033 t1 = tcg_temp_new_i64(); 3034 /* NaN-box f[rs1] */ 3035 do_nanbox(s, t1, cpu_fpr[a->rs1]); 3036 3037 dest = tcg_temp_new_ptr(); 3038 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 3039 s->cfg_ptr->vlenb, data)); 3040 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); 3041 3042 fns[s->sew - 1](dest, t1, tcg_env, desc); 3043 } 3044 finalize_rvv_inst(s); 3045 return true; 3046 } 3047 return false; 3048} 3049 3050/* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3051#define GEN_OPFV_CVT_TRANS(NAME, HELPER, FRM) \ 3052static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3053{ \ 3054 static gen_helper_gvec_3_ptr * const fns[3] = { \ 3055 gen_helper_##HELPER##_h, \ 3056 gen_helper_##HELPER##_w, \ 3057 gen_helper_##HELPER##_d \ 3058 }; \ 3059 return do_opfv(s, a, fns[s->sew - 1], opfv_check, FRM); \ 3060} 3061 3062GEN_OPFV_CVT_TRANS(vfcvt_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_DYN) 3063GEN_OPFV_CVT_TRANS(vfcvt_x_f_v, vfcvt_x_f_v, RISCV_FRM_DYN) 3064GEN_OPFV_CVT_TRANS(vfcvt_f_xu_v, vfcvt_f_xu_v, RISCV_FRM_DYN) 3065GEN_OPFV_CVT_TRANS(vfcvt_f_x_v, vfcvt_f_x_v, RISCV_FRM_DYN) 3066/* Reuse the helper functions from vfcvt.xu.f.v and vfcvt.x.f.v */ 3067GEN_OPFV_CVT_TRANS(vfcvt_rtz_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_RTZ) 3068GEN_OPFV_CVT_TRANS(vfcvt_rtz_x_f_v, vfcvt_x_f_v, RISCV_FRM_RTZ) 3069 3070/* Widening Floating-Point/Integer Type-Convert Instructions */ 3071 3072/* 3073 * If the current SEW does not correspond to a supported IEEE floating-point 3074 * type, an illegal instruction exception is raised 3075 */ 3076static bool opfv_widen_check(DisasContext *s, arg_rmr *a) 3077{ 3078 return require_rvv(s) && 3079 vext_check_isa_ill(s) && 3080 vext_check_ds(s, a->rd, a->rs2, a->vm); 3081} 3082 3083static bool opxfv_widen_check(DisasContext *s, arg_rmr *a) 3084{ 3085 return opfv_widen_check(s, a) && 3086 require_rvf(s); 3087} 3088 3089static bool opffv_widen_check(DisasContext *s, arg_rmr *a) 3090{ 3091 return opfv_widen_check(s, a) && 3092 require_rvfmin(s) && 3093 require_scale_rvfmin(s); 3094} 3095 3096#define GEN_OPFV_WIDEN_TRANS(NAME, CHECK, HELPER, FRM) \ 3097static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3098{ \ 3099 if (CHECK(s, a)) { \ 3100 uint32_t data = 0; \ 3101 static gen_helper_gvec_3_ptr * const fns[2] = { \ 3102 gen_helper_##HELPER##_h, \ 3103 gen_helper_##HELPER##_w, \ 3104 }; \ 3105 gen_set_rm_chkfrm(s, FRM); \ 3106 \ 3107 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3108 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3109 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3110 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3111 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3112 vreg_ofs(s, a->rs2), tcg_env, \ 3113 s->cfg_ptr->vlenb, \ 3114 s->cfg_ptr->vlenb, data, \ 3115 fns[s->sew - 1]); \ 3116 finalize_rvv_inst(s); \ 3117 return true; \ 3118 } \ 3119 return false; \ 3120} 3121 3122GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v, opxfv_widen_check, vfwcvt_xu_f_v, 3123 RISCV_FRM_DYN) 3124GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v, opxfv_widen_check, vfwcvt_x_f_v, 3125 RISCV_FRM_DYN) 3126GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v, opffv_widen_check, vfwcvt_f_f_v, 3127 RISCV_FRM_DYN) 3128/* Reuse the helper functions from vfwcvt.xu.f.v and vfwcvt.x.f.v */ 3129GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_xu_f_v, opxfv_widen_check, vfwcvt_xu_f_v, 3130 RISCV_FRM_RTZ) 3131GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_x_f_v, opxfv_widen_check, vfwcvt_x_f_v, 3132 RISCV_FRM_RTZ) 3133 3134static bool opfxv_widen_check(DisasContext *s, arg_rmr *a) 3135{ 3136 return require_rvv(s) && 3137 require_scale_rvf(s) && 3138 vext_check_isa_ill(s) && 3139 /* OPFV widening instructions ignore vs1 check */ 3140 vext_check_ds(s, a->rd, a->rs2, a->vm); 3141} 3142 3143#define GEN_OPFXV_WIDEN_TRANS(NAME) \ 3144static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3145{ \ 3146 if (opfxv_widen_check(s, a)) { \ 3147 uint32_t data = 0; \ 3148 static gen_helper_gvec_3_ptr * const fns[3] = { \ 3149 gen_helper_##NAME##_b, \ 3150 gen_helper_##NAME##_h, \ 3151 gen_helper_##NAME##_w, \ 3152 }; \ 3153 gen_set_rm(s, RISCV_FRM_DYN); \ 3154 \ 3155 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3156 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3157 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3158 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3159 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3160 vreg_ofs(s, a->rs2), tcg_env, \ 3161 s->cfg_ptr->vlenb, \ 3162 s->cfg_ptr->vlenb, data, \ 3163 fns[s->sew]); \ 3164 finalize_rvv_inst(s); \ 3165 return true; \ 3166 } \ 3167 return false; \ 3168} 3169 3170GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_xu_v) 3171GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_x_v) 3172 3173/* Narrowing Floating-Point/Integer Type-Convert Instructions */ 3174 3175/* 3176 * If the current SEW does not correspond to a supported IEEE floating-point 3177 * type, an illegal instruction exception is raised 3178 */ 3179static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) 3180{ 3181 return require_rvv(s) && 3182 vext_check_isa_ill(s) && 3183 /* OPFV narrowing instructions ignore vs1 check */ 3184 vext_check_sd(s, a->rd, a->rs2, a->vm); 3185} 3186 3187static bool opfxv_narrow_check(DisasContext *s, arg_rmr *a) 3188{ 3189 return opfv_narrow_check(s, a) && 3190 require_rvf(s) && 3191 (s->sew != MO_64); 3192} 3193 3194static bool opffv_narrow_check(DisasContext *s, arg_rmr *a) 3195{ 3196 return opfv_narrow_check(s, a) && 3197 require_rvfmin(s) && 3198 require_scale_rvfmin(s); 3199} 3200 3201static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a) 3202{ 3203 return opfv_narrow_check(s, a) && 3204 require_rvf(s) && 3205 require_scale_rvf(s); 3206} 3207 3208#define GEN_OPFV_NARROW_TRANS(NAME, CHECK, HELPER, FRM) \ 3209static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3210{ \ 3211 if (CHECK(s, a)) { \ 3212 uint32_t data = 0; \ 3213 static gen_helper_gvec_3_ptr * const fns[2] = { \ 3214 gen_helper_##HELPER##_h, \ 3215 gen_helper_##HELPER##_w, \ 3216 }; \ 3217 gen_set_rm_chkfrm(s, FRM); \ 3218 \ 3219 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3220 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3221 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3222 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3223 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3224 vreg_ofs(s, a->rs2), tcg_env, \ 3225 s->cfg_ptr->vlenb, \ 3226 s->cfg_ptr->vlenb, data, \ 3227 fns[s->sew - 1]); \ 3228 finalize_rvv_inst(s); \ 3229 return true; \ 3230 } \ 3231 return false; \ 3232} 3233 3234GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_w, opfxv_narrow_check, vfncvt_f_xu_w, 3235 RISCV_FRM_DYN) 3236GEN_OPFV_NARROW_TRANS(vfncvt_f_x_w, opfxv_narrow_check, vfncvt_f_x_w, 3237 RISCV_FRM_DYN) 3238GEN_OPFV_NARROW_TRANS(vfncvt_f_f_w, opffv_narrow_check, vfncvt_f_f_w, 3239 RISCV_FRM_DYN) 3240/* Reuse the helper function from vfncvt.f.f.w */ 3241GEN_OPFV_NARROW_TRANS(vfncvt_rod_f_f_w, opffv_rod_narrow_check, vfncvt_f_f_w, 3242 RISCV_FRM_ROD) 3243 3244static bool opxfv_narrow_check(DisasContext *s, arg_rmr *a) 3245{ 3246 return require_rvv(s) && 3247 require_scale_rvf(s) && 3248 vext_check_isa_ill(s) && 3249 /* OPFV narrowing instructions ignore vs1 check */ 3250 vext_check_sd(s, a->rd, a->rs2, a->vm); 3251} 3252 3253#define GEN_OPXFV_NARROW_TRANS(NAME, HELPER, FRM) \ 3254static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3255{ \ 3256 if (opxfv_narrow_check(s, a)) { \ 3257 uint32_t data = 0; \ 3258 static gen_helper_gvec_3_ptr * const fns[3] = { \ 3259 gen_helper_##HELPER##_b, \ 3260 gen_helper_##HELPER##_h, \ 3261 gen_helper_##HELPER##_w, \ 3262 }; \ 3263 gen_set_rm_chkfrm(s, FRM); \ 3264 \ 3265 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3266 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3267 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3268 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3269 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3270 vreg_ofs(s, a->rs2), tcg_env, \ 3271 s->cfg_ptr->vlenb, \ 3272 s->cfg_ptr->vlenb, data, \ 3273 fns[s->sew]); \ 3274 finalize_rvv_inst(s); \ 3275 return true; \ 3276 } \ 3277 return false; \ 3278} 3279 3280GEN_OPXFV_NARROW_TRANS(vfncvt_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_DYN) 3281GEN_OPXFV_NARROW_TRANS(vfncvt_x_f_w, vfncvt_x_f_w, RISCV_FRM_DYN) 3282/* Reuse the helper functions from vfncvt.xu.f.w and vfncvt.x.f.w */ 3283GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_RTZ) 3284GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_x_f_w, vfncvt_x_f_w, RISCV_FRM_RTZ) 3285 3286/* 3287 *** Vector Reduction Operations 3288 */ 3289/* Vector Single-Width Integer Reduction Instructions */ 3290static bool reduction_check(DisasContext *s, arg_rmrr *a) 3291{ 3292 return require_rvv(s) && 3293 vext_check_isa_ill(s) && 3294 vext_check_reduction(s, a->rs2); 3295} 3296 3297GEN_OPIVV_TRANS(vredsum_vs, reduction_check) 3298GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check) 3299GEN_OPIVV_TRANS(vredmax_vs, reduction_check) 3300GEN_OPIVV_TRANS(vredminu_vs, reduction_check) 3301GEN_OPIVV_TRANS(vredmin_vs, reduction_check) 3302GEN_OPIVV_TRANS(vredand_vs, reduction_check) 3303GEN_OPIVV_TRANS(vredor_vs, reduction_check) 3304GEN_OPIVV_TRANS(vredxor_vs, reduction_check) 3305 3306/* Vector Widening Integer Reduction Instructions */ 3307static bool reduction_widen_check(DisasContext *s, arg_rmrr *a) 3308{ 3309 return reduction_check(s, a) && (s->sew < MO_64) && 3310 ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)); 3311} 3312 3313GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_widen_check) 3314GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check) 3315 3316/* Vector Single-Width Floating-Point Reduction Instructions */ 3317static bool freduction_check(DisasContext *s, arg_rmrr *a) 3318{ 3319 return reduction_check(s, a) && 3320 require_rvf(s); 3321} 3322 3323GEN_OPFVV_TRANS(vfredusum_vs, freduction_check) 3324GEN_OPFVV_TRANS(vfredosum_vs, freduction_check) 3325GEN_OPFVV_TRANS(vfredmax_vs, freduction_check) 3326GEN_OPFVV_TRANS(vfredmin_vs, freduction_check) 3327 3328/* Vector Widening Floating-Point Reduction Instructions */ 3329static bool freduction_widen_check(DisasContext *s, arg_rmrr *a) 3330{ 3331 return reduction_widen_check(s, a) && 3332 require_rvf(s) && 3333 require_scale_rvf(s); 3334} 3335 3336GEN_OPFVV_WIDEN_TRANS(vfwredusum_vs, freduction_widen_check) 3337GEN_OPFVV_WIDEN_TRANS(vfwredosum_vs, freduction_widen_check) 3338 3339/* 3340 *** Vector Mask Operations 3341 */ 3342 3343/* Vector Mask-Register Logical Instructions */ 3344#define GEN_MM_TRANS(NAME) \ 3345static bool trans_##NAME(DisasContext *s, arg_r *a) \ 3346{ \ 3347 if (require_rvv(s) && \ 3348 vext_check_isa_ill(s)) { \ 3349 uint32_t data = 0; \ 3350 gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ 3351 \ 3352 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3353 data = \ 3354 FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ 3355 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3356 vreg_ofs(s, a->rs1), \ 3357 vreg_ofs(s, a->rs2), tcg_env, \ 3358 s->cfg_ptr->vlenb, \ 3359 s->cfg_ptr->vlenb, data, fn); \ 3360 finalize_rvv_inst(s); \ 3361 return true; \ 3362 } \ 3363 return false; \ 3364} 3365 3366GEN_MM_TRANS(vmand_mm) 3367GEN_MM_TRANS(vmnand_mm) 3368GEN_MM_TRANS(vmandn_mm) 3369GEN_MM_TRANS(vmxor_mm) 3370GEN_MM_TRANS(vmor_mm) 3371GEN_MM_TRANS(vmnor_mm) 3372GEN_MM_TRANS(vmorn_mm) 3373GEN_MM_TRANS(vmxnor_mm) 3374 3375/* Vector count population in mask vcpop */ 3376static bool trans_vcpop_m(DisasContext *s, arg_rmr *a) 3377{ 3378 if (require_rvv(s) && 3379 vext_check_isa_ill(s) && 3380 s->vstart_eq_zero) { 3381 TCGv_ptr src2, mask; 3382 TCGv dst; 3383 TCGv_i32 desc; 3384 uint32_t data = 0; 3385 data = FIELD_DP32(data, VDATA, VM, a->vm); 3386 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3387 3388 mask = tcg_temp_new_ptr(); 3389 src2 = tcg_temp_new_ptr(); 3390 dst = dest_gpr(s, a->rd); 3391 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 3392 s->cfg_ptr->vlenb, data)); 3393 3394 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); 3395 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 3396 3397 gen_helper_vcpop_m(dst, mask, src2, tcg_env, desc); 3398 gen_set_gpr(s, a->rd, dst); 3399 return true; 3400 } 3401 return false; 3402} 3403 3404/* vmfirst find-first-set mask bit */ 3405static bool trans_vfirst_m(DisasContext *s, arg_rmr *a) 3406{ 3407 if (require_rvv(s) && 3408 vext_check_isa_ill(s) && 3409 s->vstart_eq_zero) { 3410 TCGv_ptr src2, mask; 3411 TCGv dst; 3412 TCGv_i32 desc; 3413 uint32_t data = 0; 3414 data = FIELD_DP32(data, VDATA, VM, a->vm); 3415 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3416 3417 mask = tcg_temp_new_ptr(); 3418 src2 = tcg_temp_new_ptr(); 3419 dst = dest_gpr(s, a->rd); 3420 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 3421 s->cfg_ptr->vlenb, data)); 3422 3423 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); 3424 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 3425 3426 gen_helper_vfirst_m(dst, mask, src2, tcg_env, desc); 3427 gen_set_gpr(s, a->rd, dst); 3428 return true; 3429 } 3430 return false; 3431} 3432 3433/* 3434 * vmsbf.m set-before-first mask bit 3435 * vmsif.m set-including-first mask bit 3436 * vmsof.m set-only-first mask bit 3437 */ 3438#define GEN_M_TRANS(NAME) \ 3439static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3440{ \ 3441 if (require_rvv(s) && \ 3442 vext_check_isa_ill(s) && \ 3443 require_vm(a->vm, a->rd) && \ 3444 (a->rd != a->rs2) && \ 3445 s->vstart_eq_zero) { \ 3446 uint32_t data = 0; \ 3447 gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \ 3448 \ 3449 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3450 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3451 data = \ 3452 FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ 3453 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3454 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ 3455 vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ 3456 tcg_env, s->cfg_ptr->vlenb, \ 3457 s->cfg_ptr->vlenb, \ 3458 data, fn); \ 3459 finalize_rvv_inst(s); \ 3460 return true; \ 3461 } \ 3462 return false; \ 3463} 3464 3465GEN_M_TRANS(vmsbf_m) 3466GEN_M_TRANS(vmsif_m) 3467GEN_M_TRANS(vmsof_m) 3468 3469/* 3470 * Vector Iota Instruction 3471 * 3472 * 1. The destination register cannot overlap the source register. 3473 * 2. If masked, cannot overlap the mask register ('v0'). 3474 * 3. An illegal instruction exception is raised if vstart is non-zero. 3475 */ 3476static bool trans_viota_m(DisasContext *s, arg_viota_m *a) 3477{ 3478 if (require_rvv(s) && 3479 vext_check_isa_ill(s) && 3480 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) && 3481 require_vm(a->vm, a->rd) && 3482 require_align(a->rd, s->lmul) && 3483 s->vstart_eq_zero) { 3484 uint32_t data = 0; 3485 3486 data = FIELD_DP32(data, VDATA, VM, a->vm); 3487 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3488 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3489 data = FIELD_DP32(data, VDATA, VMA, s->vma); 3490 static gen_helper_gvec_3_ptr * const fns[4] = { 3491 gen_helper_viota_m_b, gen_helper_viota_m_h, 3492 gen_helper_viota_m_w, gen_helper_viota_m_d, 3493 }; 3494 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 3495 vreg_ofs(s, a->rs2), tcg_env, 3496 s->cfg_ptr->vlenb, 3497 s->cfg_ptr->vlenb, data, fns[s->sew]); 3498 finalize_rvv_inst(s); 3499 return true; 3500 } 3501 return false; 3502} 3503 3504/* Vector Element Index Instruction */ 3505static bool trans_vid_v(DisasContext *s, arg_vid_v *a) 3506{ 3507 if (require_rvv(s) && 3508 vext_check_isa_ill(s) && 3509 require_align(a->rd, s->lmul) && 3510 require_vm(a->vm, a->rd)) { 3511 uint32_t data = 0; 3512 3513 data = FIELD_DP32(data, VDATA, VM, a->vm); 3514 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3515 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3516 data = FIELD_DP32(data, VDATA, VMA, s->vma); 3517 static gen_helper_gvec_2_ptr * const fns[4] = { 3518 gen_helper_vid_v_b, gen_helper_vid_v_h, 3519 gen_helper_vid_v_w, gen_helper_vid_v_d, 3520 }; 3521 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 3522 tcg_env, s->cfg_ptr->vlenb, 3523 s->cfg_ptr->vlenb, 3524 data, fns[s->sew]); 3525 finalize_rvv_inst(s); 3526 return true; 3527 } 3528 return false; 3529} 3530 3531/* 3532 *** Vector Permutation Instructions 3533 */ 3534 3535static void load_element(TCGv_i64 dest, TCGv_ptr base, 3536 int ofs, int sew, bool sign) 3537{ 3538 switch (sew) { 3539 case MO_8: 3540 if (!sign) { 3541 tcg_gen_ld8u_i64(dest, base, ofs); 3542 } else { 3543 tcg_gen_ld8s_i64(dest, base, ofs); 3544 } 3545 break; 3546 case MO_16: 3547 if (!sign) { 3548 tcg_gen_ld16u_i64(dest, base, ofs); 3549 } else { 3550 tcg_gen_ld16s_i64(dest, base, ofs); 3551 } 3552 break; 3553 case MO_32: 3554 if (!sign) { 3555 tcg_gen_ld32u_i64(dest, base, ofs); 3556 } else { 3557 tcg_gen_ld32s_i64(dest, base, ofs); 3558 } 3559 break; 3560 case MO_64: 3561 tcg_gen_ld_i64(dest, base, ofs); 3562 break; 3563 default: 3564 g_assert_not_reached(); 3565 } 3566} 3567 3568/* offset of the idx element with base register r */ 3569static uint32_t endian_ofs(DisasContext *s, int r, int idx) 3570{ 3571#if HOST_BIG_ENDIAN 3572 return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); 3573#else 3574 return vreg_ofs(s, r) + (idx << s->sew); 3575#endif 3576} 3577 3578/* adjust the index according to the endian */ 3579static void endian_adjust(TCGv_i32 ofs, int sew) 3580{ 3581#if HOST_BIG_ENDIAN 3582 tcg_gen_xori_i32(ofs, ofs, 7 >> sew); 3583#endif 3584} 3585 3586/* Load idx >= VLMAX ? 0 : vreg[idx] */ 3587static void vec_element_loadx(DisasContext *s, TCGv_i64 dest, 3588 int vreg, TCGv idx, int vlmax) 3589{ 3590 TCGv_i32 ofs = tcg_temp_new_i32(); 3591 TCGv_ptr base = tcg_temp_new_ptr(); 3592 TCGv_i64 t_idx = tcg_temp_new_i64(); 3593 TCGv_i64 t_vlmax, t_zero; 3594 3595 /* 3596 * Mask the index to the length so that we do 3597 * not produce an out-of-range load. 3598 */ 3599 tcg_gen_trunc_tl_i32(ofs, idx); 3600 tcg_gen_andi_i32(ofs, ofs, vlmax - 1); 3601 3602 /* Convert the index to an offset. */ 3603 endian_adjust(ofs, s->sew); 3604 tcg_gen_shli_i32(ofs, ofs, s->sew); 3605 3606 /* Convert the index to a pointer. */ 3607 tcg_gen_ext_i32_ptr(base, ofs); 3608 tcg_gen_add_ptr(base, base, tcg_env); 3609 3610 /* Perform the load. */ 3611 load_element(dest, base, 3612 vreg_ofs(s, vreg), s->sew, false); 3613 3614 /* Flush out-of-range indexing to zero. */ 3615 t_vlmax = tcg_constant_i64(vlmax); 3616 t_zero = tcg_constant_i64(0); 3617 tcg_gen_extu_tl_i64(t_idx, idx); 3618 3619 tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx, 3620 t_vlmax, dest, t_zero); 3621} 3622 3623static void vec_element_loadi(DisasContext *s, TCGv_i64 dest, 3624 int vreg, int idx, bool sign) 3625{ 3626 load_element(dest, tcg_env, endian_ofs(s, vreg, idx), s->sew, sign); 3627} 3628 3629/* Integer Scalar Move Instruction */ 3630 3631static void store_element(TCGv_i64 val, TCGv_ptr base, 3632 int ofs, int sew) 3633{ 3634 switch (sew) { 3635 case MO_8: 3636 tcg_gen_st8_i64(val, base, ofs); 3637 break; 3638 case MO_16: 3639 tcg_gen_st16_i64(val, base, ofs); 3640 break; 3641 case MO_32: 3642 tcg_gen_st32_i64(val, base, ofs); 3643 break; 3644 case MO_64: 3645 tcg_gen_st_i64(val, base, ofs); 3646 break; 3647 default: 3648 g_assert_not_reached(); 3649 } 3650} 3651 3652/* 3653 * Store vreg[idx] = val. 3654 * The index must be in range of VLMAX. 3655 */ 3656static void vec_element_storei(DisasContext *s, int vreg, 3657 int idx, TCGv_i64 val) 3658{ 3659 store_element(val, tcg_env, endian_ofs(s, vreg, idx), s->sew); 3660} 3661 3662/* vmv.x.s rd, vs2 # x[rd] = vs2[0] */ 3663static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a) 3664{ 3665 if (require_rvv(s) && 3666 vext_check_isa_ill(s)) { 3667 TCGv_i64 t1; 3668 TCGv dest; 3669 3670 t1 = tcg_temp_new_i64(); 3671 dest = tcg_temp_new(); 3672 /* 3673 * load vreg and sign-extend to 64 bits, 3674 * then truncate to XLEN bits before storing to gpr. 3675 */ 3676 vec_element_loadi(s, t1, a->rs2, 0, true); 3677 tcg_gen_trunc_i64_tl(dest, t1); 3678 gen_set_gpr(s, a->rd, dest); 3679 tcg_gen_movi_tl(cpu_vstart, 0); 3680 finalize_rvv_inst(s); 3681 return true; 3682 } 3683 return false; 3684} 3685 3686/* vmv.s.x vd, rs1 # vd[0] = rs1 */ 3687static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) 3688{ 3689 if (require_rvv(s) && 3690 vext_check_isa_ill(s)) { 3691 /* This instruction ignores LMUL and vector register groups */ 3692 TCGv_i64 t1; 3693 TCGv s1; 3694 TCGLabel *over = gen_new_label(); 3695 3696 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); 3697 3698 t1 = tcg_temp_new_i64(); 3699 3700 /* 3701 * load gpr and sign-extend to 64 bits, 3702 * then truncate to SEW bits when storing to vreg. 3703 */ 3704 s1 = get_gpr(s, a->rs1, EXT_NONE); 3705 tcg_gen_ext_tl_i64(t1, s1); 3706 vec_element_storei(s, a->rd, 0, t1); 3707 gen_set_label(over); 3708 tcg_gen_movi_tl(cpu_vstart, 0); 3709 finalize_rvv_inst(s); 3710 return true; 3711 } 3712 return false; 3713} 3714 3715/* Floating-Point Scalar Move Instructions */ 3716static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) 3717{ 3718 if (require_rvv(s) && 3719 require_rvf(s) && 3720 vext_check_isa_ill(s)) { 3721 gen_set_rm(s, RISCV_FRM_DYN); 3722 3723 unsigned int ofs = (8 << s->sew); 3724 unsigned int len = 64 - ofs; 3725 TCGv_i64 t_nan; 3726 3727 vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false); 3728 /* NaN-box f[rd] as necessary for SEW */ 3729 if (len) { 3730 t_nan = tcg_constant_i64(UINT64_MAX); 3731 tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], 3732 t_nan, ofs, len); 3733 } 3734 3735 mark_fs_dirty(s); 3736 tcg_gen_movi_tl(cpu_vstart, 0); 3737 finalize_rvv_inst(s); 3738 return true; 3739 } 3740 return false; 3741} 3742 3743/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ 3744static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) 3745{ 3746 if (require_rvv(s) && 3747 require_rvf(s) && 3748 vext_check_isa_ill(s)) { 3749 gen_set_rm(s, RISCV_FRM_DYN); 3750 3751 /* The instructions ignore LMUL and vector register group. */ 3752 TCGv_i64 t1; 3753 TCGLabel *over = gen_new_label(); 3754 3755 /* if vstart >= vl, skip vector register write back */ 3756 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); 3757 3758 /* NaN-box f[rs1] */ 3759 t1 = tcg_temp_new_i64(); 3760 do_nanbox(s, t1, cpu_fpr[a->rs1]); 3761 3762 vec_element_storei(s, a->rd, 0, t1); 3763 3764 gen_set_label(over); 3765 tcg_gen_movi_tl(cpu_vstart, 0); 3766 finalize_rvv_inst(s); 3767 return true; 3768 } 3769 return false; 3770} 3771 3772/* Vector Slide Instructions */ 3773static bool slideup_check(DisasContext *s, arg_rmrr *a) 3774{ 3775 return require_rvv(s) && 3776 vext_check_isa_ill(s) && 3777 vext_check_slide(s, a->rd, a->rs2, a->vm, true); 3778} 3779 3780GEN_OPIVX_TRANS(vslideup_vx, slideup_check) 3781GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) 3782GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check) 3783 3784static bool slidedown_check(DisasContext *s, arg_rmrr *a) 3785{ 3786 return require_rvv(s) && 3787 vext_check_isa_ill(s) && 3788 vext_check_slide(s, a->rd, a->rs2, a->vm, false); 3789} 3790 3791GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check) 3792GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check) 3793GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check) 3794 3795/* Vector Floating-Point Slide Instructions */ 3796static bool fslideup_check(DisasContext *s, arg_rmrr *a) 3797{ 3798 return slideup_check(s, a) && 3799 require_rvf(s); 3800} 3801 3802static bool fslidedown_check(DisasContext *s, arg_rmrr *a) 3803{ 3804 return slidedown_check(s, a) && 3805 require_rvf(s); 3806} 3807 3808GEN_OPFVF_TRANS(vfslide1up_vf, fslideup_check) 3809GEN_OPFVF_TRANS(vfslide1down_vf, fslidedown_check) 3810 3811/* Vector Register Gather Instruction */ 3812static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) 3813{ 3814 return require_rvv(s) && 3815 vext_check_isa_ill(s) && 3816 vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) && 3817 require_align(a->rd, s->lmul) && 3818 require_align(a->rs1, s->lmul) && 3819 require_align(a->rs2, s->lmul) && 3820 (a->rd != a->rs2 && a->rd != a->rs1) && 3821 require_vm(a->vm, a->rd); 3822} 3823 3824static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) 3825{ 3826 int8_t emul = MO_16 - s->sew + s->lmul; 3827 return require_rvv(s) && 3828 vext_check_isa_ill(s) && 3829 vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) && 3830 (emul >= -3 && emul <= 3) && 3831 require_align(a->rd, s->lmul) && 3832 require_align(a->rs1, emul) && 3833 require_align(a->rs2, s->lmul) && 3834 (a->rd != a->rs2 && a->rd != a->rs1) && 3835 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), 3836 a->rs1, 1 << MAX(emul, 0)) && 3837 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), 3838 a->rs2, 1 << MAX(s->lmul, 0)) && 3839 require_vm(a->vm, a->rd); 3840} 3841 3842GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) 3843GEN_OPIVV_TRANS(vrgatherei16_vv, vrgatherei16_vv_check) 3844 3845static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) 3846{ 3847 return require_rvv(s) && 3848 vext_check_isa_ill(s) && 3849 vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) && 3850 require_align(a->rd, s->lmul) && 3851 require_align(a->rs2, s->lmul) && 3852 (a->rd != a->rs2) && 3853 require_vm(a->vm, a->rd); 3854} 3855 3856/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 3857static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) 3858{ 3859 if (!vrgather_vx_check(s, a)) { 3860 return false; 3861 } 3862 3863 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 3864 int vlmax = vext_get_vlmax(s->cfg_ptr->vlenb, s->sew, s->lmul); 3865 TCGv_i64 dest = tcg_temp_new_i64(); 3866 3867 if (a->rs1 == 0) { 3868 vec_element_loadi(s, dest, a->rs2, 0, false); 3869 } else { 3870 vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax); 3871 } 3872 3873 tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), 3874 MAXSZ(s), MAXSZ(s), dest); 3875 finalize_rvv_inst(s); 3876 } else { 3877 static gen_helper_opivx * const fns[4] = { 3878 gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, 3879 gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d 3880 }; 3881 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); 3882 } 3883 return true; 3884} 3885 3886/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ 3887static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) 3888{ 3889 if (!vrgather_vx_check(s, a)) { 3890 return false; 3891 } 3892 3893 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 3894 int vlmax = vext_get_vlmax(s->cfg_ptr->vlenb, s->sew, s->lmul); 3895 if (a->rs1 >= vlmax) { 3896 tcg_gen_gvec_dup_imm(MO_64, vreg_ofs(s, a->rd), 3897 MAXSZ(s), MAXSZ(s), 0); 3898 } else { 3899 tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd), 3900 endian_ofs(s, a->rs2, a->rs1), 3901 MAXSZ(s), MAXSZ(s)); 3902 } 3903 finalize_rvv_inst(s); 3904 } else { 3905 static gen_helper_opivx * const fns[4] = { 3906 gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, 3907 gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d 3908 }; 3909 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], 3910 s, IMM_ZX); 3911 } 3912 return true; 3913} 3914 3915/* 3916 * Vector Compress Instruction 3917 * 3918 * The destination vector register group cannot overlap the 3919 * source vector register group or the source mask register. 3920 */ 3921static bool vcompress_vm_check(DisasContext *s, arg_r *a) 3922{ 3923 return require_rvv(s) && 3924 vext_check_isa_ill(s) && 3925 require_align(a->rd, s->lmul) && 3926 require_align(a->rs2, s->lmul) && 3927 (a->rd != a->rs2) && 3928 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs1, 1) && 3929 s->vstart_eq_zero; 3930} 3931 3932static bool trans_vcompress_vm(DisasContext *s, arg_r *a) 3933{ 3934 if (vcompress_vm_check(s, a)) { 3935 uint32_t data = 0; 3936 static gen_helper_gvec_4_ptr * const fns[4] = { 3937 gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h, 3938 gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d, 3939 }; 3940 3941 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3942 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3943 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 3944 vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), 3945 tcg_env, s->cfg_ptr->vlenb, 3946 s->cfg_ptr->vlenb, data, 3947 fns[s->sew]); 3948 finalize_rvv_inst(s); 3949 return true; 3950 } 3951 return false; 3952} 3953 3954/* 3955 * Whole Vector Register Move Instructions depend on vtype register(vsew). 3956 * Thus, we need to check vill bit. (Section 16.6) 3957 */ 3958#define GEN_VMV_WHOLE_TRANS(NAME, LEN) \ 3959static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ 3960{ \ 3961 if (require_rvv(s) && \ 3962 vext_check_isa_ill(s) && \ 3963 QEMU_IS_ALIGNED(a->rd, LEN) && \ 3964 QEMU_IS_ALIGNED(a->rs2, LEN)) { \ 3965 uint32_t maxsz = s->cfg_ptr->vlenb * LEN; \ 3966 if (s->vstart_eq_zero) { \ 3967 tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), \ 3968 vreg_ofs(s, a->rs2), maxsz, maxsz); \ 3969 } else { \ 3970 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), \ 3971 tcg_env, maxsz, maxsz, 0, gen_helper_vmvr_v); \ 3972 } \ 3973 finalize_rvv_inst(s); \ 3974 return true; \ 3975 } \ 3976 return false; \ 3977} 3978 3979GEN_VMV_WHOLE_TRANS(vmv1r_v, 1) 3980GEN_VMV_WHOLE_TRANS(vmv2r_v, 2) 3981GEN_VMV_WHOLE_TRANS(vmv4r_v, 4) 3982GEN_VMV_WHOLE_TRANS(vmv8r_v, 8) 3983 3984static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div) 3985{ 3986 uint8_t from = (s->sew + 3) - div; 3987 bool ret = require_rvv(s) && 3988 (from >= 3 && from <= 8) && 3989 (a->rd != a->rs2) && 3990 require_align(a->rd, s->lmul) && 3991 require_align(a->rs2, s->lmul - div) && 3992 require_vm(a->vm, a->rd) && 3993 require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) && 3994 vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm); 3995 3996 return ret; 3997} 3998 3999static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) 4000{ 4001 uint32_t data = 0; 4002 gen_helper_gvec_3_ptr *fn; 4003 4004 static gen_helper_gvec_3_ptr * const fns[6][4] = { 4005 { 4006 NULL, gen_helper_vzext_vf2_h, 4007 gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d 4008 }, 4009 { 4010 NULL, NULL, 4011 gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d, 4012 }, 4013 { 4014 NULL, NULL, 4015 NULL, gen_helper_vzext_vf8_d 4016 }, 4017 { 4018 NULL, gen_helper_vsext_vf2_h, 4019 gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d 4020 }, 4021 { 4022 NULL, NULL, 4023 gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d, 4024 }, 4025 { 4026 NULL, NULL, 4027 NULL, gen_helper_vsext_vf8_d 4028 } 4029 }; 4030 4031 fn = fns[seq][s->sew]; 4032 if (fn == NULL) { 4033 return false; 4034 } 4035 4036 data = FIELD_DP32(data, VDATA, VM, a->vm); 4037 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 4038 data = FIELD_DP32(data, VDATA, VTA, s->vta); 4039 data = FIELD_DP32(data, VDATA, VMA, s->vma); 4040 4041 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 4042 vreg_ofs(s, a->rs2), tcg_env, 4043 s->cfg_ptr->vlenb, 4044 s->cfg_ptr->vlenb, data, fn); 4045 4046 finalize_rvv_inst(s); 4047 return true; 4048} 4049 4050/* Vector Integer Extension */ 4051#define GEN_INT_EXT_TRANS(NAME, DIV, SEQ) \ 4052static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 4053{ \ 4054 if (int_ext_check(s, a, DIV)) { \ 4055 return int_ext_op(s, a, SEQ); \ 4056 } \ 4057 return false; \ 4058} 4059 4060GEN_INT_EXT_TRANS(vzext_vf2, 1, 0) 4061GEN_INT_EXT_TRANS(vzext_vf4, 2, 1) 4062GEN_INT_EXT_TRANS(vzext_vf8, 3, 2) 4063GEN_INT_EXT_TRANS(vsext_vf2, 1, 3) 4064GEN_INT_EXT_TRANS(vsext_vf4, 2, 4) 4065GEN_INT_EXT_TRANS(vsext_vf8, 3, 5) 4066