1/* 2 * 3 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2 or later, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17#include "tcg/tcg-op-gvec.h" 18#include "tcg/tcg-gvec-desc.h" 19#include "internals.h" 20 21static inline bool is_overlapped(const int8_t astart, int8_t asize, 22 const int8_t bstart, int8_t bsize) 23{ 24 const int8_t aend = astart + asize; 25 const int8_t bend = bstart + bsize; 26 27 return MAX(aend, bend) - MIN(astart, bstart) < asize + bsize; 28} 29 30static bool require_rvv(DisasContext *s) 31{ 32 return s->mstatus_vs != EXT_STATUS_DISABLED; 33} 34 35static bool require_rvf(DisasContext *s) 36{ 37 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 38 return false; 39 } 40 41 switch (s->sew) { 42 case MO_16: 43 return s->cfg_ptr->ext_zvfh; 44 case MO_32: 45 return s->cfg_ptr->ext_zve32f; 46 case MO_64: 47 return s->cfg_ptr->ext_zve64d; 48 default: 49 return false; 50 } 51} 52 53static bool require_rvfmin(DisasContext *s) 54{ 55 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 56 return false; 57 } 58 59 switch (s->sew) { 60 case MO_16: 61 return s->cfg_ptr->ext_zvfhmin; 62 case MO_32: 63 return s->cfg_ptr->ext_zve32f; 64 default: 65 return false; 66 } 67} 68 69static bool require_scale_rvf(DisasContext *s) 70{ 71 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 72 return false; 73 } 74 75 switch (s->sew) { 76 case MO_8: 77 return s->cfg_ptr->ext_zvfh; 78 case MO_16: 79 return s->cfg_ptr->ext_zve32f; 80 case MO_32: 81 return s->cfg_ptr->ext_zve64d; 82 default: 83 return false; 84 } 85} 86 87static bool require_scale_rvfmin(DisasContext *s) 88{ 89 if (s->mstatus_fs == EXT_STATUS_DISABLED) { 90 return false; 91 } 92 93 switch (s->sew) { 94 case MO_16: 95 return s->cfg_ptr->ext_zve32f; 96 case MO_32: 97 return s->cfg_ptr->ext_zve64d; 98 default: 99 return false; 100 } 101} 102 103/* 104 * Source and destination vector register groups cannot overlap source mask 105 * register: 106 * 107 * A vector register cannot be used to provide source operands with more than 108 * one EEW for a single instruction. A mask register source is considered to 109 * have EEW=1 for this constraint. An encoding that would result in the same 110 * vector register being read with two or more different EEWs, including when 111 * the vector register appears at different positions within two or more vector 112 * register groups, is reserved. 113 * (Section 5.2) 114 * 115 * A destination vector register group can overlap a source vector 116 * register group only if one of the following holds: 117 * 1. The destination EEW equals the source EEW. 118 * 2. The destination EEW is smaller than the source EEW and the overlap 119 * is in the lowest-numbered part of the source register group. 120 * 3. The destination EEW is greater than the source EEW, the source EMUL 121 * is at least 1, and the overlap is in the highest-numbered part of 122 * the destination register group. 123 * For the purpose of determining register group overlap constraints, mask 124 * elements have EEW=1. 125 * (Section 5.2) 126 */ 127static bool require_vm(int vm, int v) 128{ 129 return (vm != 0 || v != 0); 130} 131 132static bool require_nf(int vd, int nf, int lmul) 133{ 134 int size = nf << MAX(lmul, 0); 135 return size <= 8 && vd + size <= 32; 136} 137 138/* 139 * Vector register should aligned with the passed-in LMUL (EMUL). 140 * If LMUL < 0, i.e. fractional LMUL, any vector register is allowed. 141 */ 142static bool require_align(const int8_t val, const int8_t lmul) 143{ 144 return lmul <= 0 || extract32(val, 0, lmul) == 0; 145} 146 147/* 148 * A destination vector register group can overlap a source vector 149 * register group only if one of the following holds: 150 * 1. The destination EEW equals the source EEW. 151 * 2. The destination EEW is smaller than the source EEW and the overlap 152 * is in the lowest-numbered part of the source register group. 153 * 3. The destination EEW is greater than the source EEW, the source EMUL 154 * is at least 1, and the overlap is in the highest-numbered part of 155 * the destination register group. 156 * (Section 5.2) 157 * 158 * This function returns true if one of the following holds: 159 * * Destination vector register group does not overlap a source vector 160 * register group. 161 * * Rule 3 met. 162 * For rule 1, overlap is allowed so this function doesn't need to be called. 163 * For rule 2, (vd == vs). Caller has to check whether: (vd != vs) before 164 * calling this function. 165 */ 166static bool require_noover(const int8_t dst, const int8_t dst_lmul, 167 const int8_t src, const int8_t src_lmul) 168{ 169 int8_t dst_size = dst_lmul <= 0 ? 1 : 1 << dst_lmul; 170 int8_t src_size = src_lmul <= 0 ? 1 : 1 << src_lmul; 171 172 /* Destination EEW is greater than the source EEW, check rule 3. */ 173 if (dst_size > src_size) { 174 if (dst < src && 175 src_lmul >= 0 && 176 is_overlapped(dst, dst_size, src, src_size) && 177 !is_overlapped(dst, dst_size, src + src_size, src_size)) { 178 return true; 179 } 180 } 181 182 return !is_overlapped(dst, dst_size, src, src_size); 183} 184 185static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) 186{ 187 TCGv s1, dst; 188 189 if (!require_rvv(s) || !s->cfg_ptr->ext_zve32x) { 190 return false; 191 } 192 193 dst = dest_gpr(s, rd); 194 195 if (rd == 0 && rs1 == 0) { 196 s1 = tcg_temp_new(); 197 tcg_gen_mov_tl(s1, cpu_vl); 198 } else if (rs1 == 0) { 199 /* As the mask is at least one bit, RV_VLEN_MAX is >= VLMAX */ 200 s1 = tcg_constant_tl(RV_VLEN_MAX); 201 } else { 202 s1 = get_gpr(s, rs1, EXT_ZERO); 203 } 204 205 gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); 206 gen_set_gpr(s, rd, dst); 207 finalize_rvv_inst(s); 208 209 gen_update_pc(s, s->cur_insn_len); 210 lookup_and_goto_ptr(s); 211 s->base.is_jmp = DISAS_NORETURN; 212 return true; 213} 214 215static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) 216{ 217 TCGv dst; 218 219 if (!require_rvv(s) || !s->cfg_ptr->ext_zve32x) { 220 return false; 221 } 222 223 dst = dest_gpr(s, rd); 224 225 gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); 226 gen_set_gpr(s, rd, dst); 227 finalize_rvv_inst(s); 228 gen_update_pc(s, s->cur_insn_len); 229 lookup_and_goto_ptr(s); 230 s->base.is_jmp = DISAS_NORETURN; 231 232 return true; 233} 234 235static bool trans_vsetvl(DisasContext *s, arg_vsetvl *a) 236{ 237 TCGv s2 = get_gpr(s, a->rs2, EXT_ZERO); 238 return do_vsetvl(s, a->rd, a->rs1, s2); 239} 240 241static bool trans_vsetvli(DisasContext *s, arg_vsetvli *a) 242{ 243 TCGv s2 = tcg_constant_tl(a->zimm); 244 return do_vsetvl(s, a->rd, a->rs1, s2); 245} 246 247static bool trans_vsetivli(DisasContext *s, arg_vsetivli *a) 248{ 249 TCGv s1 = tcg_constant_tl(a->rs1); 250 TCGv s2 = tcg_constant_tl(a->zimm); 251 return do_vsetivli(s, a->rd, s1, s2); 252} 253 254/* vector register offset from env */ 255static uint32_t vreg_ofs(DisasContext *s, int reg) 256{ 257 return offsetof(CPURISCVState, vreg) + reg * s->cfg_ptr->vlenb; 258} 259 260/* check functions */ 261 262/* 263 * Vector unit-stride, strided, unit-stride segment, strided segment 264 * store check function. 265 * 266 * Rules to be checked here: 267 * 1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3) 268 * 2. Destination vector register number is multiples of EMUL. 269 * (Section 3.4.2, 7.3) 270 * 3. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8) 271 * 4. Vector register numbers accessed by the segment load or store 272 * cannot increment past 31. (Section 7.8) 273 */ 274static bool vext_check_store(DisasContext *s, int vd, int nf, uint8_t eew) 275{ 276 int8_t emul = eew - s->sew + s->lmul; 277 return (emul >= -3 && emul <= 3) && 278 require_align(vd, emul) && 279 require_nf(vd, nf, emul); 280} 281 282/* 283 * Vector unit-stride, strided, unit-stride segment, strided segment 284 * load check function. 285 * 286 * Rules to be checked here: 287 * 1. All rules applies to store instructions are applies 288 * to load instructions. 289 * 2. Destination vector register group for a masked vector 290 * instruction cannot overlap the source mask register (v0). 291 * (Section 5.3) 292 */ 293static bool vext_check_load(DisasContext *s, int vd, int nf, int vm, 294 uint8_t eew) 295{ 296 return vext_check_store(s, vd, nf, eew) && require_vm(vm, vd); 297} 298 299/* 300 * Vector indexed, indexed segment store check function. 301 * 302 * Rules to be checked here: 303 * 1. EMUL must within the range: 1/8 <= EMUL <= 8. (Section 7.3) 304 * 2. Index vector register number is multiples of EMUL. 305 * (Section 3.4.2, 7.3) 306 * 3. Destination vector register number is multiples of LMUL. 307 * (Section 3.4.2, 7.3) 308 * 4. The EMUL setting must be such that EMUL * NFIELDS ≤ 8. (Section 7.8) 309 * 5. Vector register numbers accessed by the segment load or store 310 * cannot increment past 31. (Section 7.8) 311 */ 312static bool vext_check_st_index(DisasContext *s, int vd, int vs2, int nf, 313 uint8_t eew) 314{ 315 int8_t emul = eew - s->sew + s->lmul; 316 bool ret = (emul >= -3 && emul <= 3) && 317 require_align(vs2, emul) && 318 require_align(vd, s->lmul) && 319 require_nf(vd, nf, s->lmul); 320 321 /* 322 * V extension supports all vector load and store instructions, 323 * except V extension does not support EEW=64 for index values 324 * when XLEN=32. (Section 18.3) 325 */ 326 if (get_xl(s) == MXL_RV32) { 327 ret &= (eew != MO_64); 328 } 329 330 return ret; 331} 332 333/* 334 * Vector indexed, indexed segment load check function. 335 * 336 * Rules to be checked here: 337 * 1. All rules applies to store instructions are applies 338 * to load instructions. 339 * 2. Destination vector register group for a masked vector 340 * instruction cannot overlap the source mask register (v0). 341 * (Section 5.3) 342 * 3. Destination vector register cannot overlap a source vector 343 * register (vs2) group. 344 * (Section 5.2) 345 * 4. Destination vector register groups cannot overlap 346 * the source vector register (vs2) group for 347 * indexed segment load instructions. (Section 7.8.3) 348 */ 349static bool vext_check_ld_index(DisasContext *s, int vd, int vs2, 350 int nf, int vm, uint8_t eew) 351{ 352 int8_t seg_vd; 353 int8_t emul = eew - s->sew + s->lmul; 354 bool ret = vext_check_st_index(s, vd, vs2, nf, eew) && 355 require_vm(vm, vd); 356 357 /* Each segment register group has to follow overlap rules. */ 358 for (int i = 0; i < nf; ++i) { 359 seg_vd = vd + (1 << MAX(s->lmul, 0)) * i; 360 361 if (eew > s->sew) { 362 if (seg_vd != vs2) { 363 ret &= require_noover(seg_vd, s->lmul, vs2, emul); 364 } 365 } else if (eew < s->sew) { 366 ret &= require_noover(seg_vd, s->lmul, vs2, emul); 367 } 368 369 /* 370 * Destination vector register groups cannot overlap 371 * the source vector register (vs2) group for 372 * indexed segment load instructions. 373 */ 374 if (nf > 1) { 375 ret &= !is_overlapped(seg_vd, 1 << MAX(s->lmul, 0), 376 vs2, 1 << MAX(emul, 0)); 377 } 378 } 379 return ret; 380} 381 382/* 383 * Check whether a vector register is used to provide source operands with 384 * more than one EEW for the vector instruction. 385 * Returns true if the instruction has valid encoding 386 * Returns false if encoding violates the mismatched input EEWs constraint 387 */ 388static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1, 389 int vs2, uint8_t eew_vs2, int vm) 390{ 391 bool is_valid = true; 392 int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul; 393 int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul; 394 395 /* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */ 396 if ((vs1 != -1 && !require_vm(vm, vs1)) || 397 (vs2 != -1 && !require_vm(vm, vs2))) { 398 is_valid = false; 399 } 400 401 /* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */ 402 if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) && 403 is_overlapped(vs1, 1 << MAX(emul_vs1, 0), 404 vs2, 1 << MAX(emul_vs2, 0))) { 405 is_valid = false; 406 } 407 408 return is_valid; 409} 410 411static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) 412{ 413 return require_vm(vm, vd) && 414 require_align(vd, s->lmul) && 415 require_align(vs, s->lmul) && 416 vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm); 417} 418 419/* 420 * Check function for vector instruction with format: 421 * single-width result and single-width sources (SEW = SEW op SEW) 422 * 423 * Rules to be checked here: 424 * 1. Destination vector register group for a masked vector 425 * instruction cannot overlap the source mask register (v0). 426 * (Section 5.3) 427 * 2. Destination vector register number is multiples of LMUL. 428 * (Section 3.4.2) 429 * 3. Source (vs2, vs1) vector register number are multiples of LMUL. 430 * (Section 3.4.2) 431 */ 432static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm) 433{ 434 return vext_check_ss(s, vd, vs2, vm) && 435 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && 436 require_align(vs1, s->lmul); 437} 438 439static bool vext_check_ms(DisasContext *s, int vd, int vs) 440{ 441 bool ret = require_align(vs, s->lmul); 442 if (vd != vs) { 443 ret &= require_noover(vd, 0, vs, s->lmul); 444 } 445 return ret; 446} 447 448/* 449 * Check function for maskable vector instruction with format: 450 * single-width result and single-width sources (SEW = SEW op SEW) 451 * 452 * Rules to be checked here: 453 * 1. Source (vs2, vs1) vector register number are multiples of LMUL. 454 * (Section 3.4.2) 455 * 2. Destination vector register cannot overlap a source vector 456 * register (vs2, vs1) group. 457 * (Section 5.2) 458 * 3. The destination vector register group for a masked vector 459 * instruction cannot overlap the source mask register (v0), 460 * unless the destination vector register is being written 461 * with a mask value (e.g., comparisons) or the scalar result 462 * of a reduction. (Section 5.3) 463 */ 464static bool vext_check_mss(DisasContext *s, int vd, int vs1, int vs2) 465{ 466 bool ret = vext_check_ms(s, vd, vs2) && 467 require_align(vs1, s->lmul); 468 if (vd != vs1) { 469 ret &= require_noover(vd, 0, vs1, s->lmul); 470 } 471 return ret; 472} 473 474/* 475 * Common check function for vector widening instructions 476 * of double-width result (2*SEW). 477 * 478 * Rules to be checked here: 479 * 1. The largest vector register group used by an instruction 480 * can not be greater than 8 vector registers (Section 5.2): 481 * => LMUL < 8. 482 * => SEW < 64. 483 * 2. Double-width SEW cannot greater than ELEN. 484 * 3. Destination vector register number is multiples of 2 * LMUL. 485 * (Section 3.4.2) 486 * 4. Destination vector register group for a masked vector 487 * instruction cannot overlap the source mask register (v0). 488 * (Section 5.3) 489 */ 490static bool vext_wide_check_common(DisasContext *s, int vd, int vm) 491{ 492 return (s->lmul <= 2) && 493 (s->sew < MO_64) && 494 ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)) && 495 require_align(vd, s->lmul + 1) && 496 require_vm(vm, vd); 497} 498 499/* 500 * Common check function for vector narrowing instructions 501 * of single-width result (SEW) and double-width source (2*SEW). 502 * 503 * Rules to be checked here: 504 * 1. The largest vector register group used by an instruction 505 * can not be greater than 8 vector registers (Section 5.2): 506 * => LMUL < 8. 507 * => SEW < 64. 508 * 2. Double-width SEW cannot greater than ELEN. 509 * 3. Source vector register number is multiples of 2 * LMUL. 510 * (Section 3.4.2) 511 * 4. Destination vector register number is multiples of LMUL. 512 * (Section 3.4.2) 513 * 5. Destination vector register group for a masked vector 514 * instruction cannot overlap the source mask register (v0). 515 * (Section 5.3) 516 */ 517static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2, 518 int vm) 519{ 520 return (s->lmul <= 2) && 521 (s->sew < MO_64) && 522 ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)) && 523 require_align(vs2, s->lmul + 1) && 524 require_align(vd, s->lmul) && 525 require_vm(vm, vd); 526} 527 528static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) 529{ 530 return vext_wide_check_common(s, vd, vm) && 531 vext_check_input_eew(s, vs, s->sew, -1, 0, vm) && 532 require_align(vs, s->lmul) && 533 require_noover(vd, s->lmul + 1, vs, s->lmul); 534} 535 536static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) 537{ 538 return vext_wide_check_common(s, vd, vm) && 539 vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) && 540 require_align(vs, s->lmul + 1); 541} 542 543/* 544 * Check function for vector instruction with format: 545 * double-width result and single-width sources (2*SEW = SEW op SEW) 546 * 547 * Rules to be checked here: 548 * 1. All rules in defined in widen common rules are applied. 549 * 2. Source (vs2, vs1) vector register number are multiples of LMUL. 550 * (Section 3.4.2) 551 * 3. Destination vector register cannot overlap a source vector 552 * register (vs2, vs1) group. 553 * (Section 5.2) 554 */ 555static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) 556{ 557 return vext_check_ds(s, vd, vs2, vm) && 558 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && 559 require_align(vs1, s->lmul) && 560 require_noover(vd, s->lmul + 1, vs1, s->lmul); 561} 562 563/* 564 * Check function for vector instruction with format: 565 * double-width result and double-width source1 and single-width 566 * source2 (2*SEW = 2*SEW op SEW) 567 * 568 * Rules to be checked here: 569 * 1. All rules in defined in widen common rules are applied. 570 * 2. Source 1 (vs2) vector register number is multiples of 2 * LMUL. 571 * (Section 3.4.2) 572 * 3. Source 2 (vs1) vector register number is multiples of LMUL. 573 * (Section 3.4.2) 574 * 4. Destination vector register cannot overlap a source vector 575 * register (vs1) group. 576 * (Section 5.2) 577 */ 578static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) 579{ 580 return vext_check_ds(s, vd, vs1, vm) && 581 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && 582 require_align(vs2, s->lmul + 1); 583} 584 585static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) 586{ 587 bool ret = vext_narrow_check_common(s, vd, vs, vm) && 588 vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm); 589 if (vd != vs) { 590 ret &= require_noover(vd, s->lmul, vs, s->lmul + 1); 591 } 592 return ret; 593} 594 595/* 596 * Check function for vector instruction with format: 597 * single-width result and double-width source 1 and single-width 598 * source 2 (SEW = 2*SEW op SEW) 599 * 600 * Rules to be checked here: 601 * 1. All rules in defined in narrow common rules are applied. 602 * 2. Destination vector register cannot overlap a source vector 603 * register (vs2) group. 604 * (Section 5.2) 605 * 3. Source 2 (vs1) vector register number is multiples of LMUL. 606 * (Section 3.4.2) 607 */ 608static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm) 609{ 610 return vext_check_sd(s, vd, vs2, vm) && 611 vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && 612 require_align(vs1, s->lmul); 613} 614 615/* 616 * Check function for vector reduction instructions. 617 * 618 * Rules to be checked here: 619 * 1. Source 1 (vs2) vector register number is multiples of LMUL. 620 * (Section 3.4.2) 621 */ 622static bool vext_check_reduction(DisasContext *s, int vs2) 623{ 624 return require_align(vs2, s->lmul) && s->vstart_eq_zero; 625} 626 627/* 628 * Check function for vector slide instructions. 629 * 630 * Rules to be checked here: 631 * 1. Source 1 (vs2) vector register number is multiples of LMUL. 632 * (Section 3.4.2) 633 * 2. Destination vector register number is multiples of LMUL. 634 * (Section 3.4.2) 635 * 3. Destination vector register group for a masked vector 636 * instruction cannot overlap the source mask register (v0). 637 * (Section 5.3) 638 * 4. The destination vector register group for vslideup, vslide1up, 639 * vfslide1up, cannot overlap the source vector register (vs2) group. 640 * (Section 5.2, 16.3.1, 16.3.3) 641 */ 642static bool vext_check_slide(DisasContext *s, int vd, int vs2, 643 int vm, bool is_over) 644{ 645 bool ret = require_align(vs2, s->lmul) && 646 require_align(vd, s->lmul) && 647 require_vm(vm, vd) && 648 vext_check_input_eew(s, -1, 0, vs2, s->sew, vm); 649 650 if (is_over) { 651 ret &= (vd != vs2); 652 } 653 return ret; 654} 655 656/* 657 * In cpu_get_tb_cpu_state(), set VILL if RVV was not present. 658 * So RVV is also be checked in this function. 659 */ 660static bool vext_check_isa_ill(DisasContext *s) 661{ 662 return !s->vill; 663} 664 665/* common translation macro */ 666#define GEN_VEXT_TRANS(NAME, EEW, ARGTYPE, OP, CHECK) \ 667static bool trans_##NAME(DisasContext *s, arg_##ARGTYPE * a) \ 668{ \ 669 if (CHECK(s, a, EEW)) { \ 670 return OP(s, a, EEW); \ 671 } \ 672 return false; \ 673} 674 675static uint8_t vext_get_emul(DisasContext *s, uint8_t eew) 676{ 677 int8_t emul = eew - s->sew + s->lmul; 678 return emul < 0 ? 0 : emul; 679} 680 681/* 682 *** unit stride load and store 683 */ 684typedef void gen_helper_ldst_us(TCGv_ptr, TCGv_ptr, TCGv, 685 TCGv_env, TCGv_i32); 686 687static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, 688 gen_helper_ldst_us *fn, DisasContext *s, 689 bool is_store) 690{ 691 TCGv_ptr dest, mask; 692 TCGv base; 693 TCGv_i32 desc; 694 695 dest = tcg_temp_new_ptr(); 696 mask = tcg_temp_new_ptr(); 697 base = get_gpr(s, rs1, EXT_NONE); 698 699 /* 700 * As simd_desc supports at most 2048 bytes, and in this implementation, 701 * the max vector group length is 4096 bytes. So split it into two parts. 702 * 703 * The first part is vlen in bytes (vlenb), encoded in maxsz of simd_desc. 704 * The second part is lmul, encoded in data of simd_desc. 705 */ 706 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 707 s->cfg_ptr->vlenb, data)); 708 709 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 710 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 711 712 /* 713 * According to the specification 714 * 715 * Additionally, if the Ztso extension is implemented, then vector memory 716 * instructions in the V extension and Zve family of extensions follow 717 * RVTSO at the instruction level. The Ztso extension does not 718 * strengthen the ordering of intra-instruction element accesses. 719 * 720 * as a result neither ordered nor unordered accesses from the V 721 * instructions need ordering within the loop but we do still need barriers 722 * around the loop. 723 */ 724 if (is_store && s->ztso) { 725 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); 726 } 727 728 mark_vs_dirty(s); 729 730 fn(dest, mask, base, tcg_env, desc); 731 732 if (!is_store && s->ztso) { 733 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); 734 } 735 736 finalize_rvv_inst(s); 737 return true; 738} 739 740static bool ld_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) 741{ 742 uint32_t data = 0; 743 gen_helper_ldst_us *fn; 744 static gen_helper_ldst_us * const fns[2][4] = { 745 /* masked unit stride load */ 746 { gen_helper_vle8_v_mask, gen_helper_vle16_v_mask, 747 gen_helper_vle32_v_mask, gen_helper_vle64_v_mask }, 748 /* unmasked unit stride load */ 749 { gen_helper_vle8_v, gen_helper_vle16_v, 750 gen_helper_vle32_v, gen_helper_vle64_v } 751 }; 752 753 fn = fns[a->vm][eew]; 754 if (fn == NULL) { 755 return false; 756 } 757 758 /* 759 * Vector load/store instructions have the EEW encoded 760 * directly in the instructions. The maximum vector size is 761 * calculated with EMUL rather than LMUL. 762 */ 763 uint8_t emul = vext_get_emul(s, eew); 764 data = FIELD_DP32(data, VDATA, VM, a->vm); 765 data = FIELD_DP32(data, VDATA, LMUL, emul); 766 data = FIELD_DP32(data, VDATA, NF, a->nf); 767 data = FIELD_DP32(data, VDATA, VTA, s->vta); 768 data = FIELD_DP32(data, VDATA, VMA, s->vma); 769 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); 770} 771 772static bool ld_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew) 773{ 774 return require_rvv(s) && 775 vext_check_isa_ill(s) && 776 vext_check_load(s, a->rd, a->nf, a->vm, eew); 777} 778 779GEN_VEXT_TRANS(vle8_v, MO_8, r2nfvm, ld_us_op, ld_us_check) 780GEN_VEXT_TRANS(vle16_v, MO_16, r2nfvm, ld_us_op, ld_us_check) 781GEN_VEXT_TRANS(vle32_v, MO_32, r2nfvm, ld_us_op, ld_us_check) 782GEN_VEXT_TRANS(vle64_v, MO_64, r2nfvm, ld_us_op, ld_us_check) 783 784static bool st_us_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) 785{ 786 uint32_t data = 0; 787 gen_helper_ldst_us *fn; 788 static gen_helper_ldst_us * const fns[2][4] = { 789 /* masked unit stride store */ 790 { gen_helper_vse8_v_mask, gen_helper_vse16_v_mask, 791 gen_helper_vse32_v_mask, gen_helper_vse64_v_mask }, 792 /* unmasked unit stride store */ 793 { gen_helper_vse8_v, gen_helper_vse16_v, 794 gen_helper_vse32_v, gen_helper_vse64_v } 795 }; 796 797 fn = fns[a->vm][eew]; 798 if (fn == NULL) { 799 return false; 800 } 801 802 uint8_t emul = vext_get_emul(s, eew); 803 data = FIELD_DP32(data, VDATA, VM, a->vm); 804 data = FIELD_DP32(data, VDATA, LMUL, emul); 805 data = FIELD_DP32(data, VDATA, NF, a->nf); 806 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); 807} 808 809static bool st_us_check(DisasContext *s, arg_r2nfvm* a, uint8_t eew) 810{ 811 return require_rvv(s) && 812 vext_check_isa_ill(s) && 813 vext_check_store(s, a->rd, a->nf, eew); 814} 815 816GEN_VEXT_TRANS(vse8_v, MO_8, r2nfvm, st_us_op, st_us_check) 817GEN_VEXT_TRANS(vse16_v, MO_16, r2nfvm, st_us_op, st_us_check) 818GEN_VEXT_TRANS(vse32_v, MO_32, r2nfvm, st_us_op, st_us_check) 819GEN_VEXT_TRANS(vse64_v, MO_64, r2nfvm, st_us_op, st_us_check) 820 821/* 822 *** unit stride mask load and store 823 */ 824static bool ld_us_mask_op(DisasContext *s, arg_vlm_v *a, uint8_t eew) 825{ 826 uint32_t data = 0; 827 gen_helper_ldst_us *fn = gen_helper_vlm_v; 828 829 /* EMUL = 1, NFIELDS = 1 */ 830 data = FIELD_DP32(data, VDATA, LMUL, 0); 831 data = FIELD_DP32(data, VDATA, NF, 1); 832 /* Mask destination register are always tail-agnostic */ 833 data = FIELD_DP32(data, VDATA, VTA, s->cfg_vta_all_1s); 834 data = FIELD_DP32(data, VDATA, VMA, s->vma); 835 data = FIELD_DP32(data, VDATA, VM, 1); 836 return ldst_us_trans(a->rd, a->rs1, data, fn, s, false); 837} 838 839static bool ld_us_mask_check(DisasContext *s, arg_vlm_v *a, uint8_t eew) 840{ 841 /* EMUL = 1, NFIELDS = 1 */ 842 return require_rvv(s) && vext_check_isa_ill(s); 843} 844 845static bool st_us_mask_op(DisasContext *s, arg_vsm_v *a, uint8_t eew) 846{ 847 uint32_t data = 0; 848 gen_helper_ldst_us *fn = gen_helper_vsm_v; 849 850 /* EMUL = 1, NFIELDS = 1 */ 851 data = FIELD_DP32(data, VDATA, LMUL, 0); 852 data = FIELD_DP32(data, VDATA, NF, 1); 853 data = FIELD_DP32(data, VDATA, VM, 1); 854 return ldst_us_trans(a->rd, a->rs1, data, fn, s, true); 855} 856 857static bool st_us_mask_check(DisasContext *s, arg_vsm_v *a, uint8_t eew) 858{ 859 /* EMUL = 1, NFIELDS = 1 */ 860 return require_rvv(s) && vext_check_isa_ill(s); 861} 862 863GEN_VEXT_TRANS(vlm_v, MO_8, vlm_v, ld_us_mask_op, ld_us_mask_check) 864GEN_VEXT_TRANS(vsm_v, MO_8, vsm_v, st_us_mask_op, st_us_mask_check) 865 866/* 867 * MAXSZ returns the maximum vector size can be operated in bytes, 868 * which is used in GVEC IR when vl_eq_vlmax flag is set to true 869 * to accelerate vector operation. 870 */ 871static inline uint32_t MAXSZ(DisasContext *s) 872{ 873 int max_sz = s->cfg_ptr->vlenb << 3; 874 return max_sz >> (3 - s->lmul); 875} 876 877static inline uint32_t get_log2(uint32_t a) 878{ 879 uint32_t i = 0; 880 for (; a > 0;) { 881 a >>= 1; 882 i++; 883 } 884 return i; 885} 886 887typedef void gen_tl_ldst(TCGv, TCGv_ptr, tcg_target_long); 888 889/* 890 * Simulate the strided load/store main loop: 891 * 892 * for (i = env->vstart; i < env->vl; env->vstart = ++i) { 893 * k = 0; 894 * while (k < nf) { 895 * if (!vm && !vext_elem_mask(v0, i)) { 896 * vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 897 * (i + k * max_elems + 1) * esz); 898 * k++; 899 * continue; 900 * } 901 * target_ulong addr = base + stride * i + (k << log2_esz); 902 * ldst(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 903 * k++; 904 * } 905 * } 906 */ 907static void gen_ldst_stride_main_loop(DisasContext *s, TCGv dest, uint32_t rs1, 908 uint32_t rs2, uint32_t vm, uint32_t nf, 909 gen_tl_ldst *ld_fn, gen_tl_ldst *st_fn, 910 bool is_load) 911{ 912 TCGv addr = tcg_temp_new(); 913 TCGv base = get_gpr(s, rs1, EXT_NONE); 914 TCGv stride = get_gpr(s, rs2, EXT_NONE); 915 916 TCGv i = tcg_temp_new(); 917 TCGv i_esz = tcg_temp_new(); 918 TCGv k = tcg_temp_new(); 919 TCGv k_esz = tcg_temp_new(); 920 TCGv k_max = tcg_temp_new(); 921 TCGv mask = tcg_temp_new(); 922 TCGv mask_offs = tcg_temp_new(); 923 TCGv mask_offs_64 = tcg_temp_new(); 924 TCGv mask_elem = tcg_temp_new(); 925 TCGv mask_offs_rem = tcg_temp_new(); 926 TCGv vreg = tcg_temp_new(); 927 TCGv dest_offs = tcg_temp_new(); 928 TCGv stride_offs = tcg_temp_new(); 929 930 uint32_t max_elems = MAXSZ(s) >> s->sew; 931 932 TCGLabel *start = gen_new_label(); 933 TCGLabel *end = gen_new_label(); 934 TCGLabel *start_k = gen_new_label(); 935 TCGLabel *inc_k = gen_new_label(); 936 TCGLabel *end_k = gen_new_label(); 937 938 MemOp atomicity = MO_ATOM_NONE; 939 if (s->sew == 0) { 940 atomicity = MO_ATOM_NONE; 941 } else { 942 atomicity = MO_ATOM_IFALIGN_PAIR; 943 } 944 945 mark_vs_dirty(s); 946 947 tcg_gen_addi_tl(mask, (TCGv)tcg_env, vreg_ofs(s, 0)); 948 949 /* Start of outer loop. */ 950 tcg_gen_mov_tl(i, cpu_vstart); 951 gen_set_label(start); 952 tcg_gen_brcond_tl(TCG_COND_GE, i, cpu_vl, end); 953 tcg_gen_shli_tl(i_esz, i, s->sew); 954 /* Start of inner loop. */ 955 tcg_gen_movi_tl(k, 0); 956 gen_set_label(start_k); 957 tcg_gen_brcond_tl(TCG_COND_GE, k, tcg_constant_tl(nf), end_k); 958 /* 959 * If we are in mask agnostic regime and the operation is not unmasked we 960 * set the inactive elements to 1. 961 */ 962 if (!vm && s->vma) { 963 TCGLabel *active_element = gen_new_label(); 964 /* (i + k * max_elems) * esz */ 965 tcg_gen_shli_tl(mask_offs, k, get_log2(max_elems << s->sew)); 966 tcg_gen_add_tl(mask_offs, mask_offs, i_esz); 967 968 /* 969 * Check whether the i bit of the mask is 0 or 1. 970 * 971 * static inline int vext_elem_mask(void *v0, int index) 972 * { 973 * int idx = index / 64; 974 * int pos = index % 64; 975 * return (((uint64_t *)v0)[idx] >> pos) & 1; 976 * } 977 */ 978 tcg_gen_shri_tl(mask_offs_64, mask_offs, 3); 979 tcg_gen_add_tl(mask_offs_64, mask_offs_64, mask); 980 tcg_gen_ld_i64((TCGv_i64)mask_elem, (TCGv_ptr)mask_offs_64, 0); 981 tcg_gen_rem_tl(mask_offs_rem, mask_offs, tcg_constant_tl(8)); 982 tcg_gen_shr_tl(mask_elem, mask_elem, mask_offs_rem); 983 tcg_gen_andi_tl(mask_elem, mask_elem, 1); 984 tcg_gen_brcond_tl(TCG_COND_NE, mask_elem, tcg_constant_tl(0), 985 active_element); 986 /* 987 * Set masked-off elements in the destination vector register to 1s. 988 * Store instructions simply skip this bit as memory ops access memory 989 * only for active elements. 990 */ 991 if (is_load) { 992 tcg_gen_shli_tl(mask_offs, mask_offs, s->sew); 993 tcg_gen_add_tl(mask_offs, mask_offs, dest); 994 st_fn(tcg_constant_tl(-1), (TCGv_ptr)mask_offs, 0); 995 } 996 tcg_gen_br(inc_k); 997 gen_set_label(active_element); 998 } 999 /* 1000 * The element is active, calculate the address with stride: 1001 * target_ulong addr = base + stride * i + (k << log2_esz); 1002 */ 1003 tcg_gen_mul_tl(stride_offs, stride, i); 1004 tcg_gen_shli_tl(k_esz, k, s->sew); 1005 tcg_gen_add_tl(stride_offs, stride_offs, k_esz); 1006 tcg_gen_add_tl(addr, base, stride_offs); 1007 /* Calculate the offset in the dst/src vector register. */ 1008 tcg_gen_shli_tl(k_max, k, get_log2(max_elems)); 1009 tcg_gen_add_tl(dest_offs, i, k_max); 1010 tcg_gen_shli_tl(dest_offs, dest_offs, s->sew); 1011 tcg_gen_add_tl(dest_offs, dest_offs, dest); 1012 if (is_load) { 1013 tcg_gen_qemu_ld_tl(vreg, addr, s->mem_idx, MO_LE | s->sew | atomicity); 1014 st_fn((TCGv)vreg, (TCGv_ptr)dest_offs, 0); 1015 } else { 1016 ld_fn((TCGv)vreg, (TCGv_ptr)dest_offs, 0); 1017 tcg_gen_qemu_st_tl(vreg, addr, s->mem_idx, MO_LE | s->sew | atomicity); 1018 } 1019 /* 1020 * We don't execute the load/store above if the element was inactive. 1021 * We jump instead directly to incrementing k and continuing the loop. 1022 */ 1023 if (!vm && s->vma) { 1024 gen_set_label(inc_k); 1025 } 1026 tcg_gen_addi_tl(k, k, 1); 1027 tcg_gen_br(start_k); 1028 /* End of the inner loop. */ 1029 gen_set_label(end_k); 1030 1031 tcg_gen_addi_tl(i, i, 1); 1032 tcg_gen_mov_tl(cpu_vstart, i); 1033 tcg_gen_br(start); 1034 1035 /* End of the outer loop. */ 1036 gen_set_label(end); 1037 1038 return; 1039} 1040 1041 1042/* 1043 * Set the tail bytes of the strided loads/stores to 1: 1044 * 1045 * for (k = 0; k < nf; ++k) { 1046 * cnt = (k * max_elems + vl) * esz; 1047 * tot = (k * max_elems + max_elems) * esz; 1048 * for (i = cnt; i < tot; i += esz) { 1049 * store_1s(-1, vd[vl+i]); 1050 * } 1051 * } 1052 */ 1053static void gen_ldst_stride_tail_loop(DisasContext *s, TCGv dest, uint32_t nf, 1054 gen_tl_ldst *st_fn) 1055{ 1056 TCGv i = tcg_temp_new(); 1057 TCGv k = tcg_temp_new(); 1058 TCGv tail_cnt = tcg_temp_new(); 1059 TCGv tail_tot = tcg_temp_new(); 1060 TCGv tail_addr = tcg_temp_new(); 1061 1062 TCGLabel *start = gen_new_label(); 1063 TCGLabel *end = gen_new_label(); 1064 TCGLabel *start_i = gen_new_label(); 1065 TCGLabel *end_i = gen_new_label(); 1066 1067 uint32_t max_elems_b = MAXSZ(s); 1068 uint32_t esz = 1 << s->sew; 1069 1070 /* Start of the outer loop. */ 1071 tcg_gen_movi_tl(k, 0); 1072 tcg_gen_shli_tl(tail_cnt, cpu_vl, s->sew); 1073 tcg_gen_movi_tl(tail_tot, max_elems_b); 1074 tcg_gen_add_tl(tail_addr, dest, tail_cnt); 1075 gen_set_label(start); 1076 tcg_gen_brcond_tl(TCG_COND_GE, k, tcg_constant_tl(nf), end); 1077 /* Start of the inner loop. */ 1078 tcg_gen_mov_tl(i, tail_cnt); 1079 gen_set_label(start_i); 1080 tcg_gen_brcond_tl(TCG_COND_GE, i, tail_tot, end_i); 1081 /* store_1s(-1, vd[vl+i]); */ 1082 st_fn(tcg_constant_tl(-1), (TCGv_ptr)tail_addr, 0); 1083 tcg_gen_addi_tl(tail_addr, tail_addr, esz); 1084 tcg_gen_addi_tl(i, i, esz); 1085 tcg_gen_br(start_i); 1086 /* End of the inner loop. */ 1087 gen_set_label(end_i); 1088 /* Update the counts */ 1089 tcg_gen_addi_tl(tail_cnt, tail_cnt, max_elems_b); 1090 tcg_gen_addi_tl(tail_tot, tail_cnt, max_elems_b); 1091 tcg_gen_addi_tl(k, k, 1); 1092 tcg_gen_br(start); 1093 /* End of the outer loop. */ 1094 gen_set_label(end); 1095 1096 return; 1097} 1098 1099static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, 1100 uint32_t data, DisasContext *s, bool is_load) 1101{ 1102 if (!s->vstart_eq_zero) { 1103 return false; 1104 } 1105 1106 TCGv dest = tcg_temp_new(); 1107 1108 uint32_t nf = FIELD_EX32(data, VDATA, NF); 1109 uint32_t vm = FIELD_EX32(data, VDATA, VM); 1110 1111 /* Destination register and mask register */ 1112 tcg_gen_addi_tl(dest, (TCGv)tcg_env, vreg_ofs(s, vd)); 1113 1114 /* 1115 * Select the appropriate load/tore to retrieve data from the vector 1116 * register given a specific sew. 1117 */ 1118 static gen_tl_ldst * const ld_fns[4] = { 1119 tcg_gen_ld8u_tl, tcg_gen_ld16u_tl, 1120 tcg_gen_ld32u_tl, tcg_gen_ld_tl 1121 }; 1122 1123 static gen_tl_ldst * const st_fns[4] = { 1124 tcg_gen_st8_tl, tcg_gen_st16_tl, 1125 tcg_gen_st32_tl, tcg_gen_st_tl 1126 }; 1127 1128 gen_tl_ldst *ld_fn = ld_fns[s->sew]; 1129 gen_tl_ldst *st_fn = st_fns[s->sew]; 1130 1131 if (ld_fn == NULL || st_fn == NULL) { 1132 return false; 1133 } 1134 1135 mark_vs_dirty(s); 1136 1137 gen_ldst_stride_main_loop(s, dest, rs1, rs2, vm, nf, ld_fn, st_fn, is_load); 1138 1139 tcg_gen_movi_tl(cpu_vstart, 0); 1140 1141 /* 1142 * Set the tail bytes to 1 if tail agnostic: 1143 */ 1144 if (s->vta != 0 && is_load) { 1145 gen_ldst_stride_tail_loop(s, dest, nf, st_fn); 1146 } 1147 1148 finalize_rvv_inst(s); 1149 return true; 1150} 1151 1152static bool ld_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1153{ 1154 uint32_t data = 0; 1155 1156 uint8_t emul = vext_get_emul(s, eew); 1157 data = FIELD_DP32(data, VDATA, VM, a->vm); 1158 data = FIELD_DP32(data, VDATA, LMUL, emul); 1159 data = FIELD_DP32(data, VDATA, NF, a->nf); 1160 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1161 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1162 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, true); 1163} 1164 1165static bool ld_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1166{ 1167 return require_rvv(s) && 1168 vext_check_isa_ill(s) && 1169 vext_check_load(s, a->rd, a->nf, a->vm, eew); 1170} 1171 1172GEN_VEXT_TRANS(vlse8_v, MO_8, rnfvm, ld_stride_op, ld_stride_check) 1173GEN_VEXT_TRANS(vlse16_v, MO_16, rnfvm, ld_stride_op, ld_stride_check) 1174GEN_VEXT_TRANS(vlse32_v, MO_32, rnfvm, ld_stride_op, ld_stride_check) 1175GEN_VEXT_TRANS(vlse64_v, MO_64, rnfvm, ld_stride_op, ld_stride_check) 1176 1177static bool st_stride_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1178{ 1179 uint32_t data = 0; 1180 1181 uint8_t emul = vext_get_emul(s, eew); 1182 data = FIELD_DP32(data, VDATA, VM, a->vm); 1183 data = FIELD_DP32(data, VDATA, LMUL, emul); 1184 data = FIELD_DP32(data, VDATA, NF, a->nf); 1185 1186 return ldst_stride_trans(a->rd, a->rs1, a->rs2, data, s, false); 1187} 1188 1189static bool st_stride_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1190{ 1191 return require_rvv(s) && 1192 vext_check_isa_ill(s) && 1193 vext_check_store(s, a->rd, a->nf, eew); 1194} 1195 1196GEN_VEXT_TRANS(vsse8_v, MO_8, rnfvm, st_stride_op, st_stride_check) 1197GEN_VEXT_TRANS(vsse16_v, MO_16, rnfvm, st_stride_op, st_stride_check) 1198GEN_VEXT_TRANS(vsse32_v, MO_32, rnfvm, st_stride_op, st_stride_check) 1199GEN_VEXT_TRANS(vsse64_v, MO_64, rnfvm, st_stride_op, st_stride_check) 1200 1201/* 1202 *** index load and store 1203 */ 1204typedef void gen_helper_ldst_index(TCGv_ptr, TCGv_ptr, TCGv, 1205 TCGv_ptr, TCGv_env, TCGv_i32); 1206 1207static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, 1208 uint32_t data, gen_helper_ldst_index *fn, 1209 DisasContext *s) 1210{ 1211 TCGv_ptr dest, mask, index; 1212 TCGv base; 1213 TCGv_i32 desc; 1214 1215 dest = tcg_temp_new_ptr(); 1216 mask = tcg_temp_new_ptr(); 1217 index = tcg_temp_new_ptr(); 1218 base = get_gpr(s, rs1, EXT_NONE); 1219 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1220 s->cfg_ptr->vlenb, data)); 1221 1222 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1223 tcg_gen_addi_ptr(index, tcg_env, vreg_ofs(s, vs2)); 1224 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1225 1226 mark_vs_dirty(s); 1227 1228 fn(dest, mask, base, index, tcg_env, desc); 1229 1230 finalize_rvv_inst(s); 1231 return true; 1232} 1233 1234static bool ld_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1235{ 1236 uint32_t data = 0; 1237 gen_helper_ldst_index *fn; 1238 static gen_helper_ldst_index * const fns[4][4] = { 1239 /* 1240 * offset vector register group EEW = 8, 1241 * data vector register group EEW = SEW 1242 */ 1243 { gen_helper_vlxei8_8_v, gen_helper_vlxei8_16_v, 1244 gen_helper_vlxei8_32_v, gen_helper_vlxei8_64_v }, 1245 /* 1246 * offset vector register group EEW = 16, 1247 * data vector register group EEW = SEW 1248 */ 1249 { gen_helper_vlxei16_8_v, gen_helper_vlxei16_16_v, 1250 gen_helper_vlxei16_32_v, gen_helper_vlxei16_64_v }, 1251 /* 1252 * offset vector register group EEW = 32, 1253 * data vector register group EEW = SEW 1254 */ 1255 { gen_helper_vlxei32_8_v, gen_helper_vlxei32_16_v, 1256 gen_helper_vlxei32_32_v, gen_helper_vlxei32_64_v }, 1257 /* 1258 * offset vector register group EEW = 64, 1259 * data vector register group EEW = SEW 1260 */ 1261 { gen_helper_vlxei64_8_v, gen_helper_vlxei64_16_v, 1262 gen_helper_vlxei64_32_v, gen_helper_vlxei64_64_v } 1263 }; 1264 1265 fn = fns[eew][s->sew]; 1266 1267 uint8_t emul = vext_get_emul(s, s->sew); 1268 data = FIELD_DP32(data, VDATA, VM, a->vm); 1269 data = FIELD_DP32(data, VDATA, LMUL, emul); 1270 data = FIELD_DP32(data, VDATA, NF, a->nf); 1271 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1272 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1273 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); 1274} 1275 1276static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1277{ 1278 return require_rvv(s) && 1279 vext_check_isa_ill(s) && 1280 vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) && 1281 vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm); 1282} 1283 1284GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check) 1285GEN_VEXT_TRANS(vlxei16_v, MO_16, rnfvm, ld_index_op, ld_index_check) 1286GEN_VEXT_TRANS(vlxei32_v, MO_32, rnfvm, ld_index_op, ld_index_check) 1287GEN_VEXT_TRANS(vlxei64_v, MO_64, rnfvm, ld_index_op, ld_index_check) 1288 1289static bool st_index_op(DisasContext *s, arg_rnfvm *a, uint8_t eew) 1290{ 1291 uint32_t data = 0; 1292 gen_helper_ldst_index *fn; 1293 static gen_helper_ldst_index * const fns[4][4] = { 1294 /* 1295 * offset vector register group EEW = 8, 1296 * data vector register group EEW = SEW 1297 */ 1298 { gen_helper_vsxei8_8_v, gen_helper_vsxei8_16_v, 1299 gen_helper_vsxei8_32_v, gen_helper_vsxei8_64_v }, 1300 /* 1301 * offset vector register group EEW = 16, 1302 * data vector register group EEW = SEW 1303 */ 1304 { gen_helper_vsxei16_8_v, gen_helper_vsxei16_16_v, 1305 gen_helper_vsxei16_32_v, gen_helper_vsxei16_64_v }, 1306 /* 1307 * offset vector register group EEW = 32, 1308 * data vector register group EEW = SEW 1309 */ 1310 { gen_helper_vsxei32_8_v, gen_helper_vsxei32_16_v, 1311 gen_helper_vsxei32_32_v, gen_helper_vsxei32_64_v }, 1312 /* 1313 * offset vector register group EEW = 64, 1314 * data vector register group EEW = SEW 1315 */ 1316 { gen_helper_vsxei64_8_v, gen_helper_vsxei64_16_v, 1317 gen_helper_vsxei64_32_v, gen_helper_vsxei64_64_v } 1318 }; 1319 1320 fn = fns[eew][s->sew]; 1321 1322 uint8_t emul = vext_get_emul(s, s->sew); 1323 data = FIELD_DP32(data, VDATA, VM, a->vm); 1324 data = FIELD_DP32(data, VDATA, LMUL, emul); 1325 data = FIELD_DP32(data, VDATA, NF, a->nf); 1326 return ldst_index_trans(a->rd, a->rs1, a->rs2, data, fn, s); 1327} 1328 1329static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) 1330{ 1331 return require_rvv(s) && 1332 vext_check_isa_ill(s) && 1333 vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) && 1334 vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm); 1335} 1336 1337GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check) 1338GEN_VEXT_TRANS(vsxei16_v, MO_16, rnfvm, st_index_op, st_index_check) 1339GEN_VEXT_TRANS(vsxei32_v, MO_32, rnfvm, st_index_op, st_index_check) 1340GEN_VEXT_TRANS(vsxei64_v, MO_64, rnfvm, st_index_op, st_index_check) 1341 1342/* 1343 *** unit stride fault-only-first load 1344 */ 1345static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, 1346 gen_helper_ldst_us *fn, DisasContext *s) 1347{ 1348 TCGv_ptr dest, mask; 1349 TCGv base; 1350 TCGv_i32 desc; 1351 1352 dest = tcg_temp_new_ptr(); 1353 mask = tcg_temp_new_ptr(); 1354 base = get_gpr(s, rs1, EXT_NONE); 1355 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1356 s->cfg_ptr->vlenb, data)); 1357 1358 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1359 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1360 1361 fn(dest, mask, base, tcg_env, desc); 1362 1363 finalize_rvv_inst(s); 1364 1365 /* vector unit-stride fault-only-first load may modify vl CSR */ 1366 gen_update_pc(s, s->cur_insn_len); 1367 lookup_and_goto_ptr(s); 1368 s->base.is_jmp = DISAS_NORETURN; 1369 1370 return true; 1371} 1372 1373static bool ldff_op(DisasContext *s, arg_r2nfvm *a, uint8_t eew) 1374{ 1375 uint32_t data = 0; 1376 gen_helper_ldst_us *fn; 1377 static gen_helper_ldst_us * const fns[4] = { 1378 gen_helper_vle8ff_v, gen_helper_vle16ff_v, 1379 gen_helper_vle32ff_v, gen_helper_vle64ff_v 1380 }; 1381 1382 fn = fns[eew]; 1383 if (fn == NULL) { 1384 return false; 1385 } 1386 1387 uint8_t emul = vext_get_emul(s, eew); 1388 data = FIELD_DP32(data, VDATA, VM, a->vm); 1389 data = FIELD_DP32(data, VDATA, LMUL, emul); 1390 data = FIELD_DP32(data, VDATA, NF, a->nf); 1391 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1392 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1393 return ldff_trans(a->rd, a->rs1, data, fn, s); 1394} 1395 1396GEN_VEXT_TRANS(vle8ff_v, MO_8, r2nfvm, ldff_op, ld_us_check) 1397GEN_VEXT_TRANS(vle16ff_v, MO_16, r2nfvm, ldff_op, ld_us_check) 1398GEN_VEXT_TRANS(vle32ff_v, MO_32, r2nfvm, ldff_op, ld_us_check) 1399GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check) 1400 1401/* 1402 * load and store whole register instructions 1403 */ 1404typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); 1405 1406static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, 1407 uint32_t log2_esz, gen_helper_ldst_whole *fn, 1408 DisasContext *s, bool is_load) 1409{ 1410 mark_vs_dirty(s); 1411 1412 /* 1413 * Load/store multiple bytes per iteration. 1414 * When possible do this atomically. 1415 * Update vstart with the number of processed elements. 1416 * Use the helper function if either: 1417 * - vstart is not 0. 1418 * - the target has 32 bit registers and we are loading/storing 64 bit long 1419 * elements. This is to ensure that we process every element with a single 1420 * memory instruction. 1421 */ 1422 1423 bool use_helper_fn = !(s->vstart_eq_zero) || 1424 (TCG_TARGET_REG_BITS == 32 && log2_esz == 3); 1425 1426 if (!use_helper_fn) { 1427 TCGv addr = tcg_temp_new(); 1428 uint32_t size = s->cfg_ptr->vlenb * nf; 1429 TCGv_i64 t8 = tcg_temp_new_i64(); 1430 TCGv_i32 t4 = tcg_temp_new_i32(); 1431 MemOp atomicity = MO_ATOM_NONE; 1432 if (log2_esz == 0) { 1433 atomicity = MO_ATOM_NONE; 1434 } else { 1435 atomicity = MO_ATOM_IFALIGN_PAIR; 1436 } 1437 if (TCG_TARGET_REG_BITS == 64) { 1438 for (int i = 0; i < size; i += 8) { 1439 addr = get_address(s, rs1, i); 1440 if (is_load) { 1441 tcg_gen_qemu_ld_i64(t8, addr, s->mem_idx, 1442 MO_LE | MO_64 | atomicity); 1443 tcg_gen_st_i64(t8, tcg_env, vreg_ofs(s, vd) + i); 1444 } else { 1445 tcg_gen_ld_i64(t8, tcg_env, vreg_ofs(s, vd) + i); 1446 tcg_gen_qemu_st_i64(t8, addr, s->mem_idx, 1447 MO_LE | MO_64 | atomicity); 1448 } 1449 if (i == size - 8) { 1450 tcg_gen_movi_tl(cpu_vstart, 0); 1451 } else { 1452 tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz); 1453 } 1454 } 1455 } else { 1456 for (int i = 0; i < size; i += 4) { 1457 addr = get_address(s, rs1, i); 1458 if (is_load) { 1459 tcg_gen_qemu_ld_i32(t4, addr, s->mem_idx, 1460 MO_LE | MO_32 | atomicity); 1461 tcg_gen_st_i32(t4, tcg_env, vreg_ofs(s, vd) + i); 1462 } else { 1463 tcg_gen_ld_i32(t4, tcg_env, vreg_ofs(s, vd) + i); 1464 tcg_gen_qemu_st_i32(t4, addr, s->mem_idx, 1465 MO_LE | MO_32 | atomicity); 1466 } 1467 if (i == size - 4) { 1468 tcg_gen_movi_tl(cpu_vstart, 0); 1469 } else { 1470 tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz); 1471 } 1472 } 1473 } 1474 } else { 1475 TCGv_ptr dest; 1476 TCGv base; 1477 TCGv_i32 desc; 1478 uint32_t data = FIELD_DP32(0, VDATA, NF, nf); 1479 data = FIELD_DP32(data, VDATA, VM, 1); 1480 dest = tcg_temp_new_ptr(); 1481 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1482 s->cfg_ptr->vlenb, data)); 1483 base = get_gpr(s, rs1, EXT_NONE); 1484 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1485 fn(dest, base, tcg_env, desc); 1486 } 1487 1488 finalize_rvv_inst(s); 1489 return true; 1490} 1491 1492/* 1493 * load and store whole register instructions ignore vtype and vl setting. 1494 * Thus, we don't need to check vill bit. (Section 7.9) 1495 */ 1496#define GEN_LDST_WHOLE_TRANS(NAME, ETYPE, ARG_NF, IS_LOAD) \ 1497static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ 1498{ \ 1499 if (require_rvv(s) && \ 1500 QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ 1501 return ldst_whole_trans(a->rd, a->rs1, ARG_NF, ctzl(sizeof(ETYPE)), \ 1502 gen_helper_##NAME, s, IS_LOAD); \ 1503 } \ 1504 return false; \ 1505} 1506 1507GEN_LDST_WHOLE_TRANS(vl1re8_v, int8_t, 1, true) 1508GEN_LDST_WHOLE_TRANS(vl1re16_v, int16_t, 1, true) 1509GEN_LDST_WHOLE_TRANS(vl1re32_v, int32_t, 1, true) 1510GEN_LDST_WHOLE_TRANS(vl1re64_v, int64_t, 1, true) 1511GEN_LDST_WHOLE_TRANS(vl2re8_v, int8_t, 2, true) 1512GEN_LDST_WHOLE_TRANS(vl2re16_v, int16_t, 2, true) 1513GEN_LDST_WHOLE_TRANS(vl2re32_v, int32_t, 2, true) 1514GEN_LDST_WHOLE_TRANS(vl2re64_v, int64_t, 2, true) 1515GEN_LDST_WHOLE_TRANS(vl4re8_v, int8_t, 4, true) 1516GEN_LDST_WHOLE_TRANS(vl4re16_v, int16_t, 4, true) 1517GEN_LDST_WHOLE_TRANS(vl4re32_v, int32_t, 4, true) 1518GEN_LDST_WHOLE_TRANS(vl4re64_v, int64_t, 4, true) 1519GEN_LDST_WHOLE_TRANS(vl8re8_v, int8_t, 8, true) 1520GEN_LDST_WHOLE_TRANS(vl8re16_v, int16_t, 8, true) 1521GEN_LDST_WHOLE_TRANS(vl8re32_v, int32_t, 8, true) 1522GEN_LDST_WHOLE_TRANS(vl8re64_v, int64_t, 8, true) 1523 1524/* 1525 * The vector whole register store instructions are encoded similar to 1526 * unmasked unit-stride store of elements with EEW=8. 1527 */ 1528GEN_LDST_WHOLE_TRANS(vs1r_v, int8_t, 1, false) 1529GEN_LDST_WHOLE_TRANS(vs2r_v, int8_t, 2, false) 1530GEN_LDST_WHOLE_TRANS(vs4r_v, int8_t, 4, false) 1531GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false) 1532 1533/* 1534 *** Vector Integer Arithmetic Instructions 1535 */ 1536 1537static bool opivv_check(DisasContext *s, arg_rmrr *a) 1538{ 1539 return require_rvv(s) && 1540 vext_check_isa_ill(s) && 1541 vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); 1542} 1543 1544typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t, 1545 uint32_t, uint32_t, uint32_t); 1546 1547static inline bool 1548do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, 1549 gen_helper_gvec_4_ptr *fn) 1550{ 1551 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 1552 gvec_fn(s->sew, vreg_ofs(s, a->rd), 1553 vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1), 1554 MAXSZ(s), MAXSZ(s)); 1555 } else { 1556 uint32_t data = 0; 1557 1558 data = FIELD_DP32(data, VDATA, VM, a->vm); 1559 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1560 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1561 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1562 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 1563 vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), 1564 tcg_env, s->cfg_ptr->vlenb, 1565 s->cfg_ptr->vlenb, data, fn); 1566 } 1567 finalize_rvv_inst(s); 1568 return true; 1569} 1570 1571/* OPIVV with GVEC IR */ 1572#define GEN_OPIVV_GVEC_TRANS(NAME, SUF) \ 1573static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1574{ \ 1575 static gen_helper_gvec_4_ptr * const fns[4] = { \ 1576 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 1577 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 1578 }; \ 1579 if (!opivv_check(s, a)) { \ 1580 return false; \ 1581 } \ 1582 return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ 1583} 1584 1585GEN_OPIVV_GVEC_TRANS(vadd_vv, add) 1586GEN_OPIVV_GVEC_TRANS(vsub_vv, sub) 1587 1588typedef void gen_helper_opivx(TCGv_ptr, TCGv_ptr, TCGv, TCGv_ptr, 1589 TCGv_env, TCGv_i32); 1590 1591static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, 1592 gen_helper_opivx *fn, DisasContext *s) 1593{ 1594 TCGv_ptr dest, src2, mask; 1595 TCGv src1; 1596 TCGv_i32 desc; 1597 uint32_t data = 0; 1598 1599 dest = tcg_temp_new_ptr(); 1600 mask = tcg_temp_new_ptr(); 1601 src2 = tcg_temp_new_ptr(); 1602 src1 = get_gpr(s, rs1, EXT_SIGN); 1603 1604 data = FIELD_DP32(data, VDATA, VM, vm); 1605 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1606 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1607 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); 1608 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1609 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1610 s->cfg_ptr->vlenb, data)); 1611 1612 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1613 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, vs2)); 1614 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1615 1616 fn(dest, mask, src1, src2, tcg_env, desc); 1617 1618 finalize_rvv_inst(s); 1619 return true; 1620} 1621 1622static bool opivx_check(DisasContext *s, arg_rmrr *a) 1623{ 1624 return require_rvv(s) && 1625 vext_check_isa_ill(s) && 1626 vext_check_ss(s, a->rd, a->rs2, a->vm); 1627} 1628 1629typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t, TCGv_i64, 1630 uint32_t, uint32_t); 1631 1632static inline bool 1633do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, 1634 gen_helper_opivx *fn) 1635{ 1636 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 1637 TCGv_i64 src1 = tcg_temp_new_i64(); 1638 1639 tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); 1640 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), 1641 src1, MAXSZ(s), MAXSZ(s)); 1642 1643 finalize_rvv_inst(s); 1644 return true; 1645 } 1646 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); 1647} 1648 1649/* OPIVX with GVEC IR */ 1650#define GEN_OPIVX_GVEC_TRANS(NAME, SUF) \ 1651static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1652{ \ 1653 static gen_helper_opivx * const fns[4] = { \ 1654 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 1655 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 1656 }; \ 1657 if (!opivx_check(s, a)) { \ 1658 return false; \ 1659 } \ 1660 return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ 1661} 1662 1663GEN_OPIVX_GVEC_TRANS(vadd_vx, adds) 1664GEN_OPIVX_GVEC_TRANS(vsub_vx, subs) 1665 1666static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1667{ 1668 tcg_gen_vec_sub8_i64(d, b, a); 1669} 1670 1671static void gen_vec_rsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) 1672{ 1673 tcg_gen_vec_sub16_i64(d, b, a); 1674} 1675 1676static void gen_rsub_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) 1677{ 1678 tcg_gen_sub_i32(ret, arg2, arg1); 1679} 1680 1681static void gen_rsub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) 1682{ 1683 tcg_gen_sub_i64(ret, arg2, arg1); 1684} 1685 1686static void gen_rsub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) 1687{ 1688 tcg_gen_sub_vec(vece, r, b, a); 1689} 1690 1691static void tcg_gen_gvec_rsubs(unsigned vece, uint32_t dofs, uint32_t aofs, 1692 TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) 1693{ 1694 static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; 1695 static const GVecGen2s rsub_op[4] = { 1696 { .fni8 = gen_vec_rsub8_i64, 1697 .fniv = gen_rsub_vec, 1698 .fno = gen_helper_vec_rsubs8, 1699 .opt_opc = vecop_list, 1700 .vece = MO_8 }, 1701 { .fni8 = gen_vec_rsub16_i64, 1702 .fniv = gen_rsub_vec, 1703 .fno = gen_helper_vec_rsubs16, 1704 .opt_opc = vecop_list, 1705 .vece = MO_16 }, 1706 { .fni4 = gen_rsub_i32, 1707 .fniv = gen_rsub_vec, 1708 .fno = gen_helper_vec_rsubs32, 1709 .opt_opc = vecop_list, 1710 .vece = MO_32 }, 1711 { .fni8 = gen_rsub_i64, 1712 .fniv = gen_rsub_vec, 1713 .fno = gen_helper_vec_rsubs64, 1714 .opt_opc = vecop_list, 1715 .prefer_i64 = TCG_TARGET_REG_BITS == 64, 1716 .vece = MO_64 }, 1717 }; 1718 1719 tcg_debug_assert(vece <= MO_64); 1720 tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]); 1721} 1722 1723GEN_OPIVX_GVEC_TRANS(vrsub_vx, rsubs) 1724 1725typedef enum { 1726 IMM_ZX, /* Zero-extended */ 1727 IMM_SX, /* Sign-extended */ 1728 IMM_TRUNC_SEW, /* Truncate to log(SEW) bits */ 1729 IMM_TRUNC_2SEW, /* Truncate to log(2*SEW) bits */ 1730} imm_mode_t; 1731 1732static int64_t extract_imm(DisasContext *s, uint32_t imm, imm_mode_t imm_mode) 1733{ 1734 switch (imm_mode) { 1735 case IMM_ZX: 1736 return extract64(imm, 0, 5); 1737 case IMM_SX: 1738 return sextract64(imm, 0, 5); 1739 case IMM_TRUNC_SEW: 1740 return extract64(imm, 0, s->sew + 3); 1741 case IMM_TRUNC_2SEW: 1742 return extract64(imm, 0, s->sew + 4); 1743 default: 1744 g_assert_not_reached(); 1745 } 1746} 1747 1748static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, 1749 gen_helper_opivx *fn, DisasContext *s, 1750 imm_mode_t imm_mode) 1751{ 1752 TCGv_ptr dest, src2, mask; 1753 TCGv src1; 1754 TCGv_i32 desc; 1755 uint32_t data = 0; 1756 1757 dest = tcg_temp_new_ptr(); 1758 mask = tcg_temp_new_ptr(); 1759 src2 = tcg_temp_new_ptr(); 1760 src1 = tcg_constant_tl(extract_imm(s, imm, imm_mode)); 1761 1762 data = FIELD_DP32(data, VDATA, VM, vm); 1763 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1764 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1765 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); 1766 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1767 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 1768 s->cfg_ptr->vlenb, data)); 1769 1770 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 1771 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, vs2)); 1772 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 1773 1774 fn(dest, mask, src1, src2, tcg_env, desc); 1775 1776 finalize_rvv_inst(s); 1777 return true; 1778} 1779 1780typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t, 1781 uint32_t, uint32_t); 1782 1783static inline bool 1784do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, 1785 gen_helper_opivx *fn, imm_mode_t imm_mode) 1786{ 1787 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 1788 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), 1789 extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); 1790 finalize_rvv_inst(s); 1791 return true; 1792 } 1793 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s, imm_mode); 1794} 1795 1796/* OPIVI with GVEC IR */ 1797#define GEN_OPIVI_GVEC_TRANS(NAME, IMM_MODE, OPIVX, SUF) \ 1798static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1799{ \ 1800 static gen_helper_opivx * const fns[4] = { \ 1801 gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ 1802 gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ 1803 }; \ 1804 if (!opivx_check(s, a)) { \ 1805 return false; \ 1806 } \ 1807 return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ 1808 fns[s->sew], IMM_MODE); \ 1809} 1810 1811GEN_OPIVI_GVEC_TRANS(vadd_vi, IMM_SX, vadd_vx, addi) 1812 1813static void tcg_gen_gvec_rsubi(unsigned vece, uint32_t dofs, uint32_t aofs, 1814 int64_t c, uint32_t oprsz, uint32_t maxsz) 1815{ 1816 TCGv_i64 tmp = tcg_constant_i64(c); 1817 tcg_gen_gvec_rsubs(vece, dofs, aofs, tmp, oprsz, maxsz); 1818} 1819 1820GEN_OPIVI_GVEC_TRANS(vrsub_vi, IMM_SX, vrsub_vx, rsubi) 1821 1822/* Vector Widening Integer Add/Subtract */ 1823 1824/* OPIVV with WIDEN */ 1825static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) 1826{ 1827 return require_rvv(s) && 1828 vext_check_isa_ill(s) && 1829 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); 1830} 1831 1832/* OPIVV with overwrite and WIDEN */ 1833static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 1834{ 1835 return require_rvv(s) && 1836 vext_check_isa_ill(s) && 1837 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && 1838 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && 1839 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 1840} 1841 1842static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, 1843 gen_helper_gvec_4_ptr *fn, 1844 bool (*checkfn)(DisasContext *, arg_rmrr *)) 1845{ 1846 if (checkfn(s, a)) { 1847 uint32_t data = 0; 1848 1849 data = FIELD_DP32(data, VDATA, VM, a->vm); 1850 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1851 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1852 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1853 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 1854 vreg_ofs(s, a->rs1), 1855 vreg_ofs(s, a->rs2), 1856 tcg_env, s->cfg_ptr->vlenb, 1857 s->cfg_ptr->vlenb, 1858 data, fn); 1859 finalize_rvv_inst(s); 1860 return true; 1861 } 1862 return false; 1863} 1864 1865#define GEN_OPIVV_WIDEN_TRANS(NAME, CHECK) \ 1866static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1867{ \ 1868 static gen_helper_gvec_4_ptr * const fns[3] = { \ 1869 gen_helper_##NAME##_b, \ 1870 gen_helper_##NAME##_h, \ 1871 gen_helper_##NAME##_w \ 1872 }; \ 1873 return do_opivv_widen(s, a, fns[s->sew], CHECK); \ 1874} 1875 1876GEN_OPIVV_WIDEN_TRANS(vwaddu_vv, opivv_widen_check) 1877GEN_OPIVV_WIDEN_TRANS(vwadd_vv, opivv_widen_check) 1878GEN_OPIVV_WIDEN_TRANS(vwsubu_vv, opivv_widen_check) 1879GEN_OPIVV_WIDEN_TRANS(vwsub_vv, opivv_widen_check) 1880 1881/* OPIVX with WIDEN */ 1882static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) 1883{ 1884 return require_rvv(s) && 1885 vext_check_isa_ill(s) && 1886 vext_check_ds(s, a->rd, a->rs2, a->vm); 1887} 1888 1889static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 1890{ 1891 return require_rvv(s) && 1892 vext_check_isa_ill(s) && 1893 vext_check_ds(s, a->rd, a->rs2, a->vm) && 1894 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 1895} 1896 1897#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ 1898static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1899{ \ 1900 if (CHECK(s, a)) { \ 1901 static gen_helper_opivx * const fns[3] = { \ 1902 gen_helper_##NAME##_b, \ 1903 gen_helper_##NAME##_h, \ 1904 gen_helper_##NAME##_w \ 1905 }; \ 1906 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \ 1907 } \ 1908 return false; \ 1909} 1910 1911GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check) 1912GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check) 1913GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check) 1914GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check) 1915 1916/* WIDEN OPIVV with WIDEN */ 1917static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) 1918{ 1919 return require_rvv(s) && 1920 vext_check_isa_ill(s) && 1921 vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm); 1922} 1923 1924static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, 1925 gen_helper_gvec_4_ptr *fn) 1926{ 1927 if (opiwv_widen_check(s, a)) { 1928 uint32_t data = 0; 1929 1930 data = FIELD_DP32(data, VDATA, VM, a->vm); 1931 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 1932 data = FIELD_DP32(data, VDATA, VTA, s->vta); 1933 data = FIELD_DP32(data, VDATA, VMA, s->vma); 1934 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 1935 vreg_ofs(s, a->rs1), 1936 vreg_ofs(s, a->rs2), 1937 tcg_env, s->cfg_ptr->vlenb, 1938 s->cfg_ptr->vlenb, data, fn); 1939 finalize_rvv_inst(s); 1940 return true; 1941 } 1942 return false; 1943} 1944 1945#define GEN_OPIWV_WIDEN_TRANS(NAME) \ 1946static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1947{ \ 1948 static gen_helper_gvec_4_ptr * const fns[3] = { \ 1949 gen_helper_##NAME##_b, \ 1950 gen_helper_##NAME##_h, \ 1951 gen_helper_##NAME##_w \ 1952 }; \ 1953 return do_opiwv_widen(s, a, fns[s->sew]); \ 1954} 1955 1956GEN_OPIWV_WIDEN_TRANS(vwaddu_wv) 1957GEN_OPIWV_WIDEN_TRANS(vwadd_wv) 1958GEN_OPIWV_WIDEN_TRANS(vwsubu_wv) 1959GEN_OPIWV_WIDEN_TRANS(vwsub_wv) 1960 1961/* WIDEN OPIVX with WIDEN */ 1962static bool opiwx_widen_check(DisasContext *s, arg_rmrr *a) 1963{ 1964 return require_rvv(s) && 1965 vext_check_isa_ill(s) && 1966 vext_check_dd(s, a->rd, a->rs2, a->vm); 1967} 1968 1969static bool do_opiwx_widen(DisasContext *s, arg_rmrr *a, 1970 gen_helper_opivx *fn) 1971{ 1972 if (opiwx_widen_check(s, a)) { 1973 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); 1974 } 1975 return false; 1976} 1977 1978#define GEN_OPIWX_WIDEN_TRANS(NAME) \ 1979static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 1980{ \ 1981 static gen_helper_opivx * const fns[3] = { \ 1982 gen_helper_##NAME##_b, \ 1983 gen_helper_##NAME##_h, \ 1984 gen_helper_##NAME##_w \ 1985 }; \ 1986 return do_opiwx_widen(s, a, fns[s->sew]); \ 1987} 1988 1989GEN_OPIWX_WIDEN_TRANS(vwaddu_wx) 1990GEN_OPIWX_WIDEN_TRANS(vwadd_wx) 1991GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) 1992GEN_OPIWX_WIDEN_TRANS(vwsub_wx) 1993 1994static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, 1995 gen_helper_gvec_4_ptr *fn, DisasContext *s) 1996{ 1997 uint32_t data = 0; 1998 1999 data = FIELD_DP32(data, VDATA, VM, vm); 2000 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 2001 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2002 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); 2003 data = FIELD_DP32(data, VDATA, VMA, s->vma); 2004 tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1), 2005 vreg_ofs(s, vs2), tcg_env, s->cfg_ptr->vlenb, 2006 s->cfg_ptr->vlenb, data, fn); 2007 finalize_rvv_inst(s); 2008 return true; 2009} 2010 2011/* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 2012/* OPIVV without GVEC IR */ 2013#define GEN_OPIVV_TRANS(NAME, CHECK) \ 2014static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2015{ \ 2016 if (CHECK(s, a)) { \ 2017 static gen_helper_gvec_4_ptr * const fns[4] = { \ 2018 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 2019 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 2020 }; \ 2021 return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ 2022 } \ 2023 return false; \ 2024} 2025 2026/* 2027 * For vadc and vsbc, an illegal instruction exception is raised if the 2028 * destination vector register is v0 and LMUL > 1. (Section 11.4) 2029 */ 2030static bool opivv_vadc_check(DisasContext *s, arg_rmrr *a) 2031{ 2032 return require_rvv(s) && 2033 vext_check_isa_ill(s) && 2034 (a->rd != 0) && 2035 vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); 2036} 2037 2038GEN_OPIVV_TRANS(vadc_vvm, opivv_vadc_check) 2039GEN_OPIVV_TRANS(vsbc_vvm, opivv_vadc_check) 2040 2041/* 2042 * For vmadc and vmsbc, an illegal instruction exception is raised if the 2043 * destination vector register overlaps a source vector register group. 2044 */ 2045static bool opivv_vmadc_check(DisasContext *s, arg_rmrr *a) 2046{ 2047 return require_rvv(s) && 2048 vext_check_isa_ill(s) && 2049 vext_check_mss(s, a->rd, a->rs1, a->rs2); 2050} 2051 2052GEN_OPIVV_TRANS(vmadc_vvm, opivv_vmadc_check) 2053GEN_OPIVV_TRANS(vmsbc_vvm, opivv_vmadc_check) 2054 2055static bool opivx_vadc_check(DisasContext *s, arg_rmrr *a) 2056{ 2057 return require_rvv(s) && 2058 vext_check_isa_ill(s) && 2059 (a->rd != 0) && 2060 vext_check_ss(s, a->rd, a->rs2, a->vm); 2061} 2062 2063/* OPIVX without GVEC IR */ 2064#define GEN_OPIVX_TRANS(NAME, CHECK) \ 2065static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2066{ \ 2067 if (CHECK(s, a)) { \ 2068 static gen_helper_opivx * const fns[4] = { \ 2069 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 2070 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 2071 }; \ 2072 \ 2073 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ 2074 } \ 2075 return false; \ 2076} 2077 2078GEN_OPIVX_TRANS(vadc_vxm, opivx_vadc_check) 2079GEN_OPIVX_TRANS(vsbc_vxm, opivx_vadc_check) 2080 2081static bool opivx_vmadc_check(DisasContext *s, arg_rmrr *a) 2082{ 2083 return require_rvv(s) && 2084 vext_check_isa_ill(s) && 2085 vext_check_ms(s, a->rd, a->rs2); 2086} 2087 2088GEN_OPIVX_TRANS(vmadc_vxm, opivx_vmadc_check) 2089GEN_OPIVX_TRANS(vmsbc_vxm, opivx_vmadc_check) 2090 2091/* OPIVI without GVEC IR */ 2092#define GEN_OPIVI_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \ 2093static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2094{ \ 2095 if (CHECK(s, a)) { \ 2096 static gen_helper_opivx * const fns[4] = { \ 2097 gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ 2098 gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ 2099 }; \ 2100 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ 2101 fns[s->sew], s, IMM_MODE); \ 2102 } \ 2103 return false; \ 2104} 2105 2106GEN_OPIVI_TRANS(vadc_vim, IMM_SX, vadc_vxm, opivx_vadc_check) 2107GEN_OPIVI_TRANS(vmadc_vim, IMM_SX, vmadc_vxm, opivx_vmadc_check) 2108 2109/* Vector Bitwise Logical Instructions */ 2110GEN_OPIVV_GVEC_TRANS(vand_vv, and) 2111GEN_OPIVV_GVEC_TRANS(vor_vv, or) 2112GEN_OPIVV_GVEC_TRANS(vxor_vv, xor) 2113GEN_OPIVX_GVEC_TRANS(vand_vx, ands) 2114GEN_OPIVX_GVEC_TRANS(vor_vx, ors) 2115GEN_OPIVX_GVEC_TRANS(vxor_vx, xors) 2116GEN_OPIVI_GVEC_TRANS(vand_vi, IMM_SX, vand_vx, andi) 2117GEN_OPIVI_GVEC_TRANS(vor_vi, IMM_SX, vor_vx, ori) 2118GEN_OPIVI_GVEC_TRANS(vxor_vi, IMM_SX, vxor_vx, xori) 2119 2120/* Vector Single-Width Bit Shift Instructions */ 2121GEN_OPIVV_GVEC_TRANS(vsll_vv, shlv) 2122GEN_OPIVV_GVEC_TRANS(vsrl_vv, shrv) 2123GEN_OPIVV_GVEC_TRANS(vsra_vv, sarv) 2124 2125typedef void GVecGen2sFn32(unsigned, uint32_t, uint32_t, TCGv_i32, 2126 uint32_t, uint32_t); 2127 2128static inline bool 2129do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, 2130 gen_helper_opivx *fn) 2131{ 2132 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2133 TCGv_i32 src1 = tcg_temp_new_i32(); 2134 2135 tcg_gen_trunc_tl_i32(src1, get_gpr(s, a->rs1, EXT_NONE)); 2136 tcg_gen_extract_i32(src1, src1, 0, s->sew + 3); 2137 gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), 2138 src1, MAXSZ(s), MAXSZ(s)); 2139 2140 finalize_rvv_inst(s); 2141 return true; 2142 } 2143 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); 2144} 2145 2146#define GEN_OPIVX_GVEC_SHIFT_TRANS(NAME, SUF) \ 2147static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2148{ \ 2149 static gen_helper_opivx * const fns[4] = { \ 2150 gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ 2151 gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ 2152 }; \ 2153 if (!opivx_check(s, a)) { \ 2154 return false; \ 2155 } \ 2156 return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ 2157} 2158 2159GEN_OPIVX_GVEC_SHIFT_TRANS(vsll_vx, shls) 2160GEN_OPIVX_GVEC_SHIFT_TRANS(vsrl_vx, shrs) 2161GEN_OPIVX_GVEC_SHIFT_TRANS(vsra_vx, sars) 2162 2163GEN_OPIVI_GVEC_TRANS(vsll_vi, IMM_TRUNC_SEW, vsll_vx, shli) 2164GEN_OPIVI_GVEC_TRANS(vsrl_vi, IMM_TRUNC_SEW, vsrl_vx, shri) 2165GEN_OPIVI_GVEC_TRANS(vsra_vi, IMM_TRUNC_SEW, vsra_vx, sari) 2166 2167/* Vector Narrowing Integer Right Shift Instructions */ 2168static bool opiwv_narrow_check(DisasContext *s, arg_rmrr *a) 2169{ 2170 return require_rvv(s) && 2171 vext_check_isa_ill(s) && 2172 vext_check_sds(s, a->rd, a->rs1, a->rs2, a->vm); 2173} 2174 2175/* OPIVV with NARROW */ 2176#define GEN_OPIWV_NARROW_TRANS(NAME) \ 2177static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2178{ \ 2179 if (opiwv_narrow_check(s, a)) { \ 2180 uint32_t data = 0; \ 2181 static gen_helper_gvec_4_ptr * const fns[3] = { \ 2182 gen_helper_##NAME##_b, \ 2183 gen_helper_##NAME##_h, \ 2184 gen_helper_##NAME##_w, \ 2185 }; \ 2186 \ 2187 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2188 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2189 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2190 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2191 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2192 vreg_ofs(s, a->rs1), \ 2193 vreg_ofs(s, a->rs2), tcg_env, \ 2194 s->cfg_ptr->vlenb, \ 2195 s->cfg_ptr->vlenb, data, \ 2196 fns[s->sew]); \ 2197 finalize_rvv_inst(s); \ 2198 return true; \ 2199 } \ 2200 return false; \ 2201} 2202GEN_OPIWV_NARROW_TRANS(vnsra_wv) 2203GEN_OPIWV_NARROW_TRANS(vnsrl_wv) 2204 2205static bool opiwx_narrow_check(DisasContext *s, arg_rmrr *a) 2206{ 2207 return require_rvv(s) && 2208 vext_check_isa_ill(s) && 2209 vext_check_sd(s, a->rd, a->rs2, a->vm); 2210} 2211 2212/* OPIVX with NARROW */ 2213#define GEN_OPIWX_NARROW_TRANS(NAME) \ 2214static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2215{ \ 2216 if (opiwx_narrow_check(s, a)) { \ 2217 static gen_helper_opivx * const fns[3] = { \ 2218 gen_helper_##NAME##_b, \ 2219 gen_helper_##NAME##_h, \ 2220 gen_helper_##NAME##_w, \ 2221 }; \ 2222 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ 2223 } \ 2224 return false; \ 2225} 2226 2227GEN_OPIWX_NARROW_TRANS(vnsra_wx) 2228GEN_OPIWX_NARROW_TRANS(vnsrl_wx) 2229 2230/* OPIWI with NARROW */ 2231#define GEN_OPIWI_NARROW_TRANS(NAME, IMM_MODE, OPIVX) \ 2232static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2233{ \ 2234 if (opiwx_narrow_check(s, a)) { \ 2235 static gen_helper_opivx * const fns[3] = { \ 2236 gen_helper_##OPIVX##_b, \ 2237 gen_helper_##OPIVX##_h, \ 2238 gen_helper_##OPIVX##_w, \ 2239 }; \ 2240 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, \ 2241 fns[s->sew], s, IMM_MODE); \ 2242 } \ 2243 return false; \ 2244} 2245 2246GEN_OPIWI_NARROW_TRANS(vnsra_wi, IMM_ZX, vnsra_wx) 2247GEN_OPIWI_NARROW_TRANS(vnsrl_wi, IMM_ZX, vnsrl_wx) 2248 2249/* Vector Integer Comparison Instructions */ 2250/* 2251 * For all comparison instructions, an illegal instruction exception is raised 2252 * if the destination vector register overlaps a source vector register group 2253 * and LMUL > 1. 2254 */ 2255static bool opivv_cmp_check(DisasContext *s, arg_rmrr *a) 2256{ 2257 return require_rvv(s) && 2258 vext_check_isa_ill(s) && 2259 vext_check_mss(s, a->rd, a->rs1, a->rs2); 2260} 2261 2262GEN_OPIVV_TRANS(vmseq_vv, opivv_cmp_check) 2263GEN_OPIVV_TRANS(vmsne_vv, opivv_cmp_check) 2264GEN_OPIVV_TRANS(vmsltu_vv, opivv_cmp_check) 2265GEN_OPIVV_TRANS(vmslt_vv, opivv_cmp_check) 2266GEN_OPIVV_TRANS(vmsleu_vv, opivv_cmp_check) 2267GEN_OPIVV_TRANS(vmsle_vv, opivv_cmp_check) 2268 2269static bool opivx_cmp_check(DisasContext *s, arg_rmrr *a) 2270{ 2271 return require_rvv(s) && 2272 vext_check_isa_ill(s) && 2273 vext_check_ms(s, a->rd, a->rs2); 2274} 2275 2276GEN_OPIVX_TRANS(vmseq_vx, opivx_cmp_check) 2277GEN_OPIVX_TRANS(vmsne_vx, opivx_cmp_check) 2278GEN_OPIVX_TRANS(vmsltu_vx, opivx_cmp_check) 2279GEN_OPIVX_TRANS(vmslt_vx, opivx_cmp_check) 2280GEN_OPIVX_TRANS(vmsleu_vx, opivx_cmp_check) 2281GEN_OPIVX_TRANS(vmsle_vx, opivx_cmp_check) 2282GEN_OPIVX_TRANS(vmsgtu_vx, opivx_cmp_check) 2283GEN_OPIVX_TRANS(vmsgt_vx, opivx_cmp_check) 2284 2285GEN_OPIVI_TRANS(vmseq_vi, IMM_SX, vmseq_vx, opivx_cmp_check) 2286GEN_OPIVI_TRANS(vmsne_vi, IMM_SX, vmsne_vx, opivx_cmp_check) 2287GEN_OPIVI_TRANS(vmsleu_vi, IMM_SX, vmsleu_vx, opivx_cmp_check) 2288GEN_OPIVI_TRANS(vmsle_vi, IMM_SX, vmsle_vx, opivx_cmp_check) 2289GEN_OPIVI_TRANS(vmsgtu_vi, IMM_SX, vmsgtu_vx, opivx_cmp_check) 2290GEN_OPIVI_TRANS(vmsgt_vi, IMM_SX, vmsgt_vx, opivx_cmp_check) 2291 2292/* Vector Integer Min/Max Instructions */ 2293GEN_OPIVV_GVEC_TRANS(vminu_vv, umin) 2294GEN_OPIVV_GVEC_TRANS(vmin_vv, smin) 2295GEN_OPIVV_GVEC_TRANS(vmaxu_vv, umax) 2296GEN_OPIVV_GVEC_TRANS(vmax_vv, smax) 2297GEN_OPIVX_TRANS(vminu_vx, opivx_check) 2298GEN_OPIVX_TRANS(vmin_vx, opivx_check) 2299GEN_OPIVX_TRANS(vmaxu_vx, opivx_check) 2300GEN_OPIVX_TRANS(vmax_vx, opivx_check) 2301 2302/* Vector Single-Width Integer Multiply Instructions */ 2303 2304static bool vmulh_vv_check(DisasContext *s, arg_rmrr *a) 2305{ 2306 /* 2307 * All Zve* extensions support all vector integer instructions, 2308 * except that the vmulh integer multiply variants 2309 * that return the high word of the product 2310 * (vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx) 2311 * are not included for EEW=64 in Zve64*. (Section 18.2) 2312 */ 2313 return opivv_check(s, a) && 2314 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2315} 2316 2317static bool vmulh_vx_check(DisasContext *s, arg_rmrr *a) 2318{ 2319 /* 2320 * All Zve* extensions support all vector integer instructions, 2321 * except that the vmulh integer multiply variants 2322 * that return the high word of the product 2323 * (vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx) 2324 * are not included for EEW=64 in Zve64*. (Section 18.2) 2325 */ 2326 return opivx_check(s, a) && 2327 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2328} 2329 2330GEN_OPIVV_GVEC_TRANS(vmul_vv, mul) 2331GEN_OPIVV_TRANS(vmulh_vv, vmulh_vv_check) 2332GEN_OPIVV_TRANS(vmulhu_vv, vmulh_vv_check) 2333GEN_OPIVV_TRANS(vmulhsu_vv, vmulh_vv_check) 2334GEN_OPIVX_GVEC_TRANS(vmul_vx, muls) 2335GEN_OPIVX_TRANS(vmulh_vx, vmulh_vx_check) 2336GEN_OPIVX_TRANS(vmulhu_vx, vmulh_vx_check) 2337GEN_OPIVX_TRANS(vmulhsu_vx, vmulh_vx_check) 2338 2339/* Vector Integer Divide Instructions */ 2340GEN_OPIVV_TRANS(vdivu_vv, opivv_check) 2341GEN_OPIVV_TRANS(vdiv_vv, opivv_check) 2342GEN_OPIVV_TRANS(vremu_vv, opivv_check) 2343GEN_OPIVV_TRANS(vrem_vv, opivv_check) 2344GEN_OPIVX_TRANS(vdivu_vx, opivx_check) 2345GEN_OPIVX_TRANS(vdiv_vx, opivx_check) 2346GEN_OPIVX_TRANS(vremu_vx, opivx_check) 2347GEN_OPIVX_TRANS(vrem_vx, opivx_check) 2348 2349/* Vector Widening Integer Multiply Instructions */ 2350GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) 2351GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) 2352GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) 2353GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check) 2354GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check) 2355GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check) 2356 2357/* Vector Single-Width Integer Multiply-Add Instructions */ 2358GEN_OPIVV_TRANS(vmacc_vv, opivv_check) 2359GEN_OPIVV_TRANS(vnmsac_vv, opivv_check) 2360GEN_OPIVV_TRANS(vmadd_vv, opivv_check) 2361GEN_OPIVV_TRANS(vnmsub_vv, opivv_check) 2362GEN_OPIVX_TRANS(vmacc_vx, opivx_check) 2363GEN_OPIVX_TRANS(vnmsac_vx, opivx_check) 2364GEN_OPIVX_TRANS(vmadd_vx, opivx_check) 2365GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) 2366 2367/* Vector Widening Integer Multiply-Add Instructions */ 2368GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check) 2369GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check) 2370GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check) 2371GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check) 2372GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check) 2373GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check) 2374GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check) 2375 2376/* Vector Integer Merge and Move Instructions */ 2377static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) 2378{ 2379 if (require_rvv(s) && 2380 vext_check_isa_ill(s) && 2381 /* vmv.v.v has rs2 = 0 and vm = 1 */ 2382 vext_check_sss(s, a->rd, a->rs1, 0, 1)) { 2383 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2384 tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), 2385 vreg_ofs(s, a->rs1), 2386 MAXSZ(s), MAXSZ(s)); 2387 } else { 2388 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 2389 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2390 static gen_helper_gvec_2_ptr * const fns[4] = { 2391 gen_helper_vmv_v_v_b, gen_helper_vmv_v_v_h, 2392 gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, 2393 }; 2394 2395 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), 2396 tcg_env, s->cfg_ptr->vlenb, 2397 s->cfg_ptr->vlenb, data, 2398 fns[s->sew]); 2399 } 2400 finalize_rvv_inst(s); 2401 return true; 2402 } 2403 return false; 2404} 2405 2406typedef void gen_helper_vmv_vx(TCGv_ptr, TCGv_i64, TCGv_env, TCGv_i32); 2407static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) 2408{ 2409 if (require_rvv(s) && 2410 vext_check_isa_ill(s) && 2411 /* vmv.v.x has rs2 = 0 and vm = 1 */ 2412 vext_check_ss(s, a->rd, 0, 1)) { 2413 TCGv s1; 2414 2415 s1 = get_gpr(s, a->rs1, EXT_SIGN); 2416 2417 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2418 if (get_xl(s) == MXL_RV32 && s->sew == MO_64) { 2419 TCGv_i64 s1_i64 = tcg_temp_new_i64(); 2420 tcg_gen_ext_tl_i64(s1_i64, s1); 2421 tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), 2422 MAXSZ(s), MAXSZ(s), s1_i64); 2423 } else { 2424 tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd), 2425 MAXSZ(s), MAXSZ(s), s1); 2426 } 2427 } else { 2428 TCGv_i32 desc; 2429 TCGv_i64 s1_i64 = tcg_temp_new_i64(); 2430 TCGv_ptr dest = tcg_temp_new_ptr(); 2431 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 2432 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2433 static gen_helper_vmv_vx * const fns[4] = { 2434 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, 2435 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, 2436 }; 2437 2438 tcg_gen_ext_tl_i64(s1_i64, s1); 2439 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 2440 s->cfg_ptr->vlenb, data)); 2441 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); 2442 fns[s->sew](dest, s1_i64, tcg_env, desc); 2443 } 2444 2445 finalize_rvv_inst(s); 2446 return true; 2447 } 2448 return false; 2449} 2450 2451static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) 2452{ 2453 if (require_rvv(s) && 2454 vext_check_isa_ill(s) && 2455 /* vmv.v.i has rs2 = 0 and vm = 1 */ 2456 vext_check_ss(s, a->rd, 0, 1)) { 2457 int64_t simm = sextract64(a->rs1, 0, 5); 2458 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 2459 tcg_gen_gvec_dup_imm(s->sew, vreg_ofs(s, a->rd), 2460 MAXSZ(s), MAXSZ(s), simm); 2461 } else { 2462 TCGv_i32 desc; 2463 TCGv_i64 s1; 2464 TCGv_ptr dest; 2465 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 2466 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2467 static gen_helper_vmv_vx * const fns[4] = { 2468 gen_helper_vmv_v_x_b, gen_helper_vmv_v_x_h, 2469 gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, 2470 }; 2471 2472 s1 = tcg_constant_i64(simm); 2473 dest = tcg_temp_new_ptr(); 2474 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 2475 s->cfg_ptr->vlenb, data)); 2476 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); 2477 fns[s->sew](dest, s1, tcg_env, desc); 2478 } 2479 finalize_rvv_inst(s); 2480 return true; 2481 } 2482 return false; 2483} 2484 2485GEN_OPIVV_TRANS(vmerge_vvm, opivv_vadc_check) 2486GEN_OPIVX_TRANS(vmerge_vxm, opivx_vadc_check) 2487GEN_OPIVI_TRANS(vmerge_vim, IMM_SX, vmerge_vxm, opivx_vadc_check) 2488 2489/* 2490 *** Vector Fixed-Point Arithmetic Instructions 2491 */ 2492 2493/* Vector Single-Width Saturating Add and Subtract */ 2494GEN_OPIVV_TRANS(vsaddu_vv, opivv_check) 2495GEN_OPIVV_TRANS(vsadd_vv, opivv_check) 2496GEN_OPIVV_TRANS(vssubu_vv, opivv_check) 2497GEN_OPIVV_TRANS(vssub_vv, opivv_check) 2498GEN_OPIVX_TRANS(vsaddu_vx, opivx_check) 2499GEN_OPIVX_TRANS(vsadd_vx, opivx_check) 2500GEN_OPIVX_TRANS(vssubu_vx, opivx_check) 2501GEN_OPIVX_TRANS(vssub_vx, opivx_check) 2502GEN_OPIVI_TRANS(vsaddu_vi, IMM_SX, vsaddu_vx, opivx_check) 2503GEN_OPIVI_TRANS(vsadd_vi, IMM_SX, vsadd_vx, opivx_check) 2504 2505/* Vector Single-Width Averaging Add and Subtract */ 2506GEN_OPIVV_TRANS(vaadd_vv, opivv_check) 2507GEN_OPIVV_TRANS(vaaddu_vv, opivv_check) 2508GEN_OPIVV_TRANS(vasub_vv, opivv_check) 2509GEN_OPIVV_TRANS(vasubu_vv, opivv_check) 2510GEN_OPIVX_TRANS(vaadd_vx, opivx_check) 2511GEN_OPIVX_TRANS(vaaddu_vx, opivx_check) 2512GEN_OPIVX_TRANS(vasub_vx, opivx_check) 2513GEN_OPIVX_TRANS(vasubu_vx, opivx_check) 2514 2515/* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2516 2517static bool vsmul_vv_check(DisasContext *s, arg_rmrr *a) 2518{ 2519 /* 2520 * All Zve* extensions support all vector fixed-point arithmetic 2521 * instructions, except that vsmul.vv and vsmul.vx are not supported 2522 * for EEW=64 in Zve64*. (Section 18.2) 2523 */ 2524 return opivv_check(s, a) && 2525 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2526} 2527 2528static bool vsmul_vx_check(DisasContext *s, arg_rmrr *a) 2529{ 2530 /* 2531 * All Zve* extensions support all vector fixed-point arithmetic 2532 * instructions, except that vsmul.vv and vsmul.vx are not supported 2533 * for EEW=64 in Zve64*. (Section 18.2) 2534 */ 2535 return opivx_check(s, a) && 2536 (!has_ext(s, RVV) ? s->sew != MO_64 : true); 2537} 2538 2539GEN_OPIVV_TRANS(vsmul_vv, vsmul_vv_check) 2540GEN_OPIVX_TRANS(vsmul_vx, vsmul_vx_check) 2541 2542/* Vector Single-Width Scaling Shift Instructions */ 2543GEN_OPIVV_TRANS(vssrl_vv, opivv_check) 2544GEN_OPIVV_TRANS(vssra_vv, opivv_check) 2545GEN_OPIVX_TRANS(vssrl_vx, opivx_check) 2546GEN_OPIVX_TRANS(vssra_vx, opivx_check) 2547GEN_OPIVI_TRANS(vssrl_vi, IMM_TRUNC_SEW, vssrl_vx, opivx_check) 2548GEN_OPIVI_TRANS(vssra_vi, IMM_TRUNC_SEW, vssra_vx, opivx_check) 2549 2550/* Vector Narrowing Fixed-Point Clip Instructions */ 2551GEN_OPIWV_NARROW_TRANS(vnclipu_wv) 2552GEN_OPIWV_NARROW_TRANS(vnclip_wv) 2553GEN_OPIWX_NARROW_TRANS(vnclipu_wx) 2554GEN_OPIWX_NARROW_TRANS(vnclip_wx) 2555GEN_OPIWI_NARROW_TRANS(vnclipu_wi, IMM_ZX, vnclipu_wx) 2556GEN_OPIWI_NARROW_TRANS(vnclip_wi, IMM_ZX, vnclip_wx) 2557 2558/* 2559 *** Vector Float Point Arithmetic Instructions 2560 */ 2561 2562/* 2563 * As RVF-only cpus always have values NaN-boxed to 64-bits, 2564 * RVF and RVD can be treated equally. 2565 * We don't have to deal with the cases of: SEW > FLEN. 2566 * 2567 * If SEW < FLEN, check whether input fp register is a valid 2568 * NaN-boxed value, in which case the least-significant SEW bits 2569 * of the f register are used, else the canonical NaN value is used. 2570 */ 2571static void do_nanbox(DisasContext *s, TCGv_i64 out, TCGv_i64 in) 2572{ 2573 switch (s->sew) { 2574 case 1: 2575 gen_check_nanbox_h(out, in); 2576 break; 2577 case 2: 2578 gen_check_nanbox_s(out, in); 2579 break; 2580 case 3: 2581 tcg_gen_mov_i64(out, in); 2582 break; 2583 default: 2584 g_assert_not_reached(); 2585 } 2586} 2587 2588/* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2589 2590/* 2591 * If the current SEW does not correspond to a supported IEEE floating-point 2592 * type, an illegal instruction exception is raised. 2593 */ 2594static bool opfvv_check(DisasContext *s, arg_rmrr *a) 2595{ 2596 return require_rvv(s) && 2597 require_rvf(s) && 2598 vext_check_isa_ill(s) && 2599 vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); 2600} 2601 2602/* OPFVV without GVEC IR */ 2603#define GEN_OPFVV_TRANS(NAME, CHECK) \ 2604static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2605{ \ 2606 if (CHECK(s, a)) { \ 2607 uint32_t data = 0; \ 2608 static gen_helper_gvec_4_ptr * const fns[3] = { \ 2609 gen_helper_##NAME##_h, \ 2610 gen_helper_##NAME##_w, \ 2611 gen_helper_##NAME##_d, \ 2612 }; \ 2613 gen_set_rm(s, RISCV_FRM_DYN); \ 2614 \ 2615 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2616 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2617 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2618 data = \ 2619 FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ 2620 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2621 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2622 vreg_ofs(s, a->rs1), \ 2623 vreg_ofs(s, a->rs2), tcg_env, \ 2624 s->cfg_ptr->vlenb, \ 2625 s->cfg_ptr->vlenb, data, \ 2626 fns[s->sew - 1]); \ 2627 finalize_rvv_inst(s); \ 2628 return true; \ 2629 } \ 2630 return false; \ 2631} 2632GEN_OPFVV_TRANS(vfadd_vv, opfvv_check) 2633GEN_OPFVV_TRANS(vfsub_vv, opfvv_check) 2634 2635typedef void gen_helper_opfvf(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, 2636 TCGv_env, TCGv_i32); 2637 2638static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, 2639 uint32_t data, gen_helper_opfvf *fn, DisasContext *s) 2640{ 2641 TCGv_ptr dest, src2, mask; 2642 TCGv_i32 desc; 2643 TCGv_i64 t1; 2644 2645 dest = tcg_temp_new_ptr(); 2646 mask = tcg_temp_new_ptr(); 2647 src2 = tcg_temp_new_ptr(); 2648 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 2649 s->cfg_ptr->vlenb, data)); 2650 2651 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); 2652 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, vs2)); 2653 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 2654 2655 /* NaN-box f[rs1] */ 2656 t1 = tcg_temp_new_i64(); 2657 do_nanbox(s, t1, cpu_fpr[rs1]); 2658 2659 fn(dest, mask, t1, src2, tcg_env, desc); 2660 2661 finalize_rvv_inst(s); 2662 return true; 2663} 2664 2665/* 2666 * If the current SEW does not correspond to a supported IEEE floating-point 2667 * type, an illegal instruction exception is raised 2668 */ 2669static bool opfvf_check(DisasContext *s, arg_rmrr *a) 2670{ 2671 return require_rvv(s) && 2672 require_rvf(s) && 2673 vext_check_isa_ill(s) && 2674 vext_check_ss(s, a->rd, a->rs2, a->vm); 2675} 2676 2677/* OPFVF without GVEC IR */ 2678#define GEN_OPFVF_TRANS(NAME, CHECK) \ 2679static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2680{ \ 2681 if (CHECK(s, a)) { \ 2682 uint32_t data = 0; \ 2683 static gen_helper_opfvf *const fns[3] = { \ 2684 gen_helper_##NAME##_h, \ 2685 gen_helper_##NAME##_w, \ 2686 gen_helper_##NAME##_d, \ 2687 }; \ 2688 gen_set_rm(s, RISCV_FRM_DYN); \ 2689 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2690 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2691 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2692 data = FIELD_DP32(data, VDATA, VTA_ALL_1S, \ 2693 s->cfg_vta_all_1s); \ 2694 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2695 return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ 2696 fns[s->sew - 1], s); \ 2697 } \ 2698 return false; \ 2699} 2700 2701GEN_OPFVF_TRANS(vfadd_vf, opfvf_check) 2702GEN_OPFVF_TRANS(vfsub_vf, opfvf_check) 2703GEN_OPFVF_TRANS(vfrsub_vf, opfvf_check) 2704 2705/* Vector Widening Floating-Point Add/Subtract Instructions */ 2706static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) 2707{ 2708 return require_rvv(s) && 2709 require_rvf(s) && 2710 require_scale_rvf(s) && 2711 vext_check_isa_ill(s) && 2712 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); 2713} 2714 2715static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 2716{ 2717 return require_rvv(s) && 2718 require_rvf(s) && 2719 require_scale_rvf(s) && 2720 vext_check_isa_ill(s) && 2721 vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && 2722 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && 2723 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 2724} 2725 2726/* OPFVV with WIDEN */ 2727#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ 2728static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2729{ \ 2730 if (CHECK(s, a)) { \ 2731 uint32_t data = 0; \ 2732 static gen_helper_gvec_4_ptr * const fns[2] = { \ 2733 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2734 }; \ 2735 gen_set_rm(s, RISCV_FRM_DYN); \ 2736 \ 2737 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2738 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2739 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2740 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2741 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2742 vreg_ofs(s, a->rs1), \ 2743 vreg_ofs(s, a->rs2), tcg_env, \ 2744 s->cfg_ptr->vlenb, \ 2745 s->cfg_ptr->vlenb, data, \ 2746 fns[s->sew - 1]); \ 2747 finalize_rvv_inst(s); \ 2748 return true; \ 2749 } \ 2750 return false; \ 2751} 2752 2753GEN_OPFVV_WIDEN_TRANS(vfwadd_vv, opfvv_widen_check) 2754GEN_OPFVV_WIDEN_TRANS(vfwsub_vv, opfvv_widen_check) 2755 2756static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) 2757{ 2758 return require_rvv(s) && 2759 require_rvf(s) && 2760 require_scale_rvf(s) && 2761 vext_check_isa_ill(s) && 2762 vext_check_ds(s, a->rd, a->rs2, a->vm); 2763} 2764 2765static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) 2766{ 2767 return require_rvv(s) && 2768 require_rvf(s) && 2769 require_scale_rvf(s) && 2770 vext_check_isa_ill(s) && 2771 vext_check_ds(s, a->rd, a->rs2, a->vm) && 2772 vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); 2773} 2774 2775/* OPFVF with WIDEN */ 2776#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ 2777static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2778{ \ 2779 if (CHECK(s, a)) { \ 2780 uint32_t data = 0; \ 2781 static gen_helper_opfvf *const fns[2] = { \ 2782 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2783 }; \ 2784 gen_set_rm(s, RISCV_FRM_DYN); \ 2785 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2786 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2787 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2788 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2789 return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ 2790 fns[s->sew - 1], s); \ 2791 } \ 2792 return false; \ 2793} 2794 2795GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) 2796GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) 2797 2798static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) 2799{ 2800 return require_rvv(s) && 2801 require_rvf(s) && 2802 require_scale_rvf(s) && 2803 vext_check_isa_ill(s) && 2804 vext_check_dds(s, a->rd, a->rs1, a->rs2, a->vm); 2805} 2806 2807/* WIDEN OPFVV with WIDEN */ 2808#define GEN_OPFWV_WIDEN_TRANS(NAME) \ 2809static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2810{ \ 2811 if (opfwv_widen_check(s, a)) { \ 2812 uint32_t data = 0; \ 2813 static gen_helper_gvec_4_ptr * const fns[2] = { \ 2814 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2815 }; \ 2816 gen_set_rm(s, RISCV_FRM_DYN); \ 2817 \ 2818 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2819 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2820 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2821 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2822 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 2823 vreg_ofs(s, a->rs1), \ 2824 vreg_ofs(s, a->rs2), tcg_env, \ 2825 s->cfg_ptr->vlenb, \ 2826 s->cfg_ptr->vlenb, data, \ 2827 fns[s->sew - 1]); \ 2828 finalize_rvv_inst(s); \ 2829 return true; \ 2830 } \ 2831 return false; \ 2832} 2833 2834GEN_OPFWV_WIDEN_TRANS(vfwadd_wv) 2835GEN_OPFWV_WIDEN_TRANS(vfwsub_wv) 2836 2837static bool opfwf_widen_check(DisasContext *s, arg_rmrr *a) 2838{ 2839 return require_rvv(s) && 2840 require_rvf(s) && 2841 require_scale_rvf(s) && 2842 vext_check_isa_ill(s) && 2843 vext_check_dd(s, a->rd, a->rs2, a->vm); 2844} 2845 2846/* WIDEN OPFVF with WIDEN */ 2847#define GEN_OPFWF_WIDEN_TRANS(NAME) \ 2848static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ 2849{ \ 2850 if (opfwf_widen_check(s, a)) { \ 2851 uint32_t data = 0; \ 2852 static gen_helper_opfvf *const fns[2] = { \ 2853 gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ 2854 }; \ 2855 gen_set_rm(s, RISCV_FRM_DYN); \ 2856 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 2857 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 2858 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 2859 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 2860 return opfvf_trans(a->rd, a->rs1, a->rs2, data, \ 2861 fns[s->sew - 1], s); \ 2862 } \ 2863 return false; \ 2864} 2865 2866GEN_OPFWF_WIDEN_TRANS(vfwadd_wf) 2867GEN_OPFWF_WIDEN_TRANS(vfwsub_wf) 2868 2869/* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2870GEN_OPFVV_TRANS(vfmul_vv, opfvv_check) 2871GEN_OPFVV_TRANS(vfdiv_vv, opfvv_check) 2872GEN_OPFVF_TRANS(vfmul_vf, opfvf_check) 2873GEN_OPFVF_TRANS(vfdiv_vf, opfvf_check) 2874GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) 2875 2876/* Vector Widening Floating-Point Multiply */ 2877GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) 2878GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) 2879 2880/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 2881GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) 2882GEN_OPFVV_TRANS(vfnmacc_vv, opfvv_check) 2883GEN_OPFVV_TRANS(vfmsac_vv, opfvv_check) 2884GEN_OPFVV_TRANS(vfnmsac_vv, opfvv_check) 2885GEN_OPFVV_TRANS(vfmadd_vv, opfvv_check) 2886GEN_OPFVV_TRANS(vfnmadd_vv, opfvv_check) 2887GEN_OPFVV_TRANS(vfmsub_vv, opfvv_check) 2888GEN_OPFVV_TRANS(vfnmsub_vv, opfvv_check) 2889GEN_OPFVF_TRANS(vfmacc_vf, opfvf_check) 2890GEN_OPFVF_TRANS(vfnmacc_vf, opfvf_check) 2891GEN_OPFVF_TRANS(vfmsac_vf, opfvf_check) 2892GEN_OPFVF_TRANS(vfnmsac_vf, opfvf_check) 2893GEN_OPFVF_TRANS(vfmadd_vf, opfvf_check) 2894GEN_OPFVF_TRANS(vfnmadd_vf, opfvf_check) 2895GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) 2896GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) 2897 2898/* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 2899GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check) 2900GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check) 2901GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check) 2902GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check) 2903GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check) 2904GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check) 2905GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check) 2906GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) 2907 2908/* Vector Floating-Point Square-Root Instruction */ 2909 2910/* 2911 * If the current SEW does not correspond to a supported IEEE floating-point 2912 * type, an illegal instruction exception is raised 2913 */ 2914static bool opfv_check(DisasContext *s, arg_rmr *a) 2915{ 2916 return require_rvv(s) && 2917 require_rvf(s) && 2918 vext_check_isa_ill(s) && 2919 /* OPFV instructions ignore vs1 check */ 2920 vext_check_ss(s, a->rd, a->rs2, a->vm); 2921} 2922 2923static bool do_opfv(DisasContext *s, arg_rmr *a, 2924 gen_helper_gvec_3_ptr *fn, 2925 bool (*checkfn)(DisasContext *, arg_rmr *), 2926 int rm) 2927{ 2928 if (checkfn(s, a)) { 2929 uint32_t data = 0; 2930 gen_set_rm_chkfrm(s, rm); 2931 2932 data = FIELD_DP32(data, VDATA, VM, a->vm); 2933 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 2934 data = FIELD_DP32(data, VDATA, VTA, s->vta); 2935 data = FIELD_DP32(data, VDATA, VMA, s->vma); 2936 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 2937 vreg_ofs(s, a->rs2), tcg_env, 2938 s->cfg_ptr->vlenb, 2939 s->cfg_ptr->vlenb, data, fn); 2940 finalize_rvv_inst(s); 2941 return true; 2942 } 2943 return false; 2944} 2945 2946#define GEN_OPFV_TRANS(NAME, CHECK, FRM) \ 2947static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 2948{ \ 2949 static gen_helper_gvec_3_ptr * const fns[3] = { \ 2950 gen_helper_##NAME##_h, \ 2951 gen_helper_##NAME##_w, \ 2952 gen_helper_##NAME##_d \ 2953 }; \ 2954 return do_opfv(s, a, fns[s->sew - 1], CHECK, FRM); \ 2955} 2956 2957GEN_OPFV_TRANS(vfsqrt_v, opfv_check, RISCV_FRM_DYN) 2958GEN_OPFV_TRANS(vfrsqrt7_v, opfv_check, RISCV_FRM_DYN) 2959GEN_OPFV_TRANS(vfrec7_v, opfv_check, RISCV_FRM_DYN) 2960 2961/* Vector Floating-Point MIN/MAX Instructions */ 2962GEN_OPFVV_TRANS(vfmin_vv, opfvv_check) 2963GEN_OPFVV_TRANS(vfmax_vv, opfvv_check) 2964GEN_OPFVF_TRANS(vfmin_vf, opfvf_check) 2965GEN_OPFVF_TRANS(vfmax_vf, opfvf_check) 2966 2967/* Vector Floating-Point Sign-Injection Instructions */ 2968GEN_OPFVV_TRANS(vfsgnj_vv, opfvv_check) 2969GEN_OPFVV_TRANS(vfsgnjn_vv, opfvv_check) 2970GEN_OPFVV_TRANS(vfsgnjx_vv, opfvv_check) 2971GEN_OPFVF_TRANS(vfsgnj_vf, opfvf_check) 2972GEN_OPFVF_TRANS(vfsgnjn_vf, opfvf_check) 2973GEN_OPFVF_TRANS(vfsgnjx_vf, opfvf_check) 2974 2975/* Vector Floating-Point Compare Instructions */ 2976static bool opfvv_cmp_check(DisasContext *s, arg_rmrr *a) 2977{ 2978 return require_rvv(s) && 2979 require_rvf(s) && 2980 vext_check_isa_ill(s) && 2981 vext_check_mss(s, a->rd, a->rs1, a->rs2); 2982} 2983 2984GEN_OPFVV_TRANS(vmfeq_vv, opfvv_cmp_check) 2985GEN_OPFVV_TRANS(vmfne_vv, opfvv_cmp_check) 2986GEN_OPFVV_TRANS(vmflt_vv, opfvv_cmp_check) 2987GEN_OPFVV_TRANS(vmfle_vv, opfvv_cmp_check) 2988 2989static bool opfvf_cmp_check(DisasContext *s, arg_rmrr *a) 2990{ 2991 return require_rvv(s) && 2992 require_rvf(s) && 2993 vext_check_isa_ill(s) && 2994 vext_check_ms(s, a->rd, a->rs2); 2995} 2996 2997GEN_OPFVF_TRANS(vmfeq_vf, opfvf_cmp_check) 2998GEN_OPFVF_TRANS(vmfne_vf, opfvf_cmp_check) 2999GEN_OPFVF_TRANS(vmflt_vf, opfvf_cmp_check) 3000GEN_OPFVF_TRANS(vmfle_vf, opfvf_cmp_check) 3001GEN_OPFVF_TRANS(vmfgt_vf, opfvf_cmp_check) 3002GEN_OPFVF_TRANS(vmfge_vf, opfvf_cmp_check) 3003 3004/* Vector Floating-Point Classify Instruction */ 3005GEN_OPFV_TRANS(vfclass_v, opfv_check, RISCV_FRM_DYN) 3006 3007/* Vector Floating-Point Merge Instruction */ 3008GEN_OPFVF_TRANS(vfmerge_vfm, opfvf_check) 3009 3010static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) 3011{ 3012 if (require_rvv(s) && 3013 require_rvf(s) && 3014 vext_check_isa_ill(s) && 3015 require_align(a->rd, s->lmul)) { 3016 gen_set_rm(s, RISCV_FRM_DYN); 3017 3018 TCGv_i64 t1; 3019 3020 if (s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 3021 t1 = tcg_temp_new_i64(); 3022 /* NaN-box f[rs1] */ 3023 do_nanbox(s, t1, cpu_fpr[a->rs1]); 3024 3025 tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), 3026 MAXSZ(s), MAXSZ(s), t1); 3027 } else { 3028 TCGv_ptr dest; 3029 TCGv_i32 desc; 3030 uint32_t data = FIELD_DP32(0, VDATA, LMUL, s->lmul); 3031 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3032 data = FIELD_DP32(data, VDATA, VMA, s->vma); 3033 static gen_helper_vmv_vx * const fns[3] = { 3034 gen_helper_vmv_v_x_h, 3035 gen_helper_vmv_v_x_w, 3036 gen_helper_vmv_v_x_d, 3037 }; 3038 3039 t1 = tcg_temp_new_i64(); 3040 /* NaN-box f[rs1] */ 3041 do_nanbox(s, t1, cpu_fpr[a->rs1]); 3042 3043 dest = tcg_temp_new_ptr(); 3044 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 3045 s->cfg_ptr->vlenb, data)); 3046 tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); 3047 3048 fns[s->sew - 1](dest, t1, tcg_env, desc); 3049 } 3050 finalize_rvv_inst(s); 3051 return true; 3052 } 3053 return false; 3054} 3055 3056/* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3057#define GEN_OPFV_CVT_TRANS(NAME, HELPER, FRM) \ 3058static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3059{ \ 3060 static gen_helper_gvec_3_ptr * const fns[3] = { \ 3061 gen_helper_##HELPER##_h, \ 3062 gen_helper_##HELPER##_w, \ 3063 gen_helper_##HELPER##_d \ 3064 }; \ 3065 return do_opfv(s, a, fns[s->sew - 1], opfv_check, FRM); \ 3066} 3067 3068GEN_OPFV_CVT_TRANS(vfcvt_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_DYN) 3069GEN_OPFV_CVT_TRANS(vfcvt_x_f_v, vfcvt_x_f_v, RISCV_FRM_DYN) 3070GEN_OPFV_CVT_TRANS(vfcvt_f_xu_v, vfcvt_f_xu_v, RISCV_FRM_DYN) 3071GEN_OPFV_CVT_TRANS(vfcvt_f_x_v, vfcvt_f_x_v, RISCV_FRM_DYN) 3072/* Reuse the helper functions from vfcvt.xu.f.v and vfcvt.x.f.v */ 3073GEN_OPFV_CVT_TRANS(vfcvt_rtz_xu_f_v, vfcvt_xu_f_v, RISCV_FRM_RTZ) 3074GEN_OPFV_CVT_TRANS(vfcvt_rtz_x_f_v, vfcvt_x_f_v, RISCV_FRM_RTZ) 3075 3076/* Widening Floating-Point/Integer Type-Convert Instructions */ 3077 3078/* 3079 * If the current SEW does not correspond to a supported IEEE floating-point 3080 * type, an illegal instruction exception is raised 3081 */ 3082static bool opfv_widen_check(DisasContext *s, arg_rmr *a) 3083{ 3084 return require_rvv(s) && 3085 vext_check_isa_ill(s) && 3086 vext_check_ds(s, a->rd, a->rs2, a->vm); 3087} 3088 3089static bool opxfv_widen_check(DisasContext *s, arg_rmr *a) 3090{ 3091 return opfv_widen_check(s, a) && 3092 require_rvf(s); 3093} 3094 3095static bool opffv_widen_check(DisasContext *s, arg_rmr *a) 3096{ 3097 return opfv_widen_check(s, a) && 3098 require_rvfmin(s) && 3099 require_scale_rvfmin(s); 3100} 3101 3102#define GEN_OPFV_WIDEN_TRANS(NAME, CHECK, HELPER, FRM) \ 3103static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3104{ \ 3105 if (CHECK(s, a)) { \ 3106 uint32_t data = 0; \ 3107 static gen_helper_gvec_3_ptr * const fns[2] = { \ 3108 gen_helper_##HELPER##_h, \ 3109 gen_helper_##HELPER##_w, \ 3110 }; \ 3111 gen_set_rm_chkfrm(s, FRM); \ 3112 \ 3113 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3114 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3115 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3116 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3117 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3118 vreg_ofs(s, a->rs2), tcg_env, \ 3119 s->cfg_ptr->vlenb, \ 3120 s->cfg_ptr->vlenb, data, \ 3121 fns[s->sew - 1]); \ 3122 finalize_rvv_inst(s); \ 3123 return true; \ 3124 } \ 3125 return false; \ 3126} 3127 3128GEN_OPFV_WIDEN_TRANS(vfwcvt_xu_f_v, opxfv_widen_check, vfwcvt_xu_f_v, 3129 RISCV_FRM_DYN) 3130GEN_OPFV_WIDEN_TRANS(vfwcvt_x_f_v, opxfv_widen_check, vfwcvt_x_f_v, 3131 RISCV_FRM_DYN) 3132GEN_OPFV_WIDEN_TRANS(vfwcvt_f_f_v, opffv_widen_check, vfwcvt_f_f_v, 3133 RISCV_FRM_DYN) 3134/* Reuse the helper functions from vfwcvt.xu.f.v and vfwcvt.x.f.v */ 3135GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_xu_f_v, opxfv_widen_check, vfwcvt_xu_f_v, 3136 RISCV_FRM_RTZ) 3137GEN_OPFV_WIDEN_TRANS(vfwcvt_rtz_x_f_v, opxfv_widen_check, vfwcvt_x_f_v, 3138 RISCV_FRM_RTZ) 3139 3140static bool opfxv_widen_check(DisasContext *s, arg_rmr *a) 3141{ 3142 return require_rvv(s) && 3143 require_scale_rvf(s) && 3144 vext_check_isa_ill(s) && 3145 /* OPFV widening instructions ignore vs1 check */ 3146 vext_check_ds(s, a->rd, a->rs2, a->vm); 3147} 3148 3149#define GEN_OPFXV_WIDEN_TRANS(NAME) \ 3150static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3151{ \ 3152 if (opfxv_widen_check(s, a)) { \ 3153 uint32_t data = 0; \ 3154 static gen_helper_gvec_3_ptr * const fns[3] = { \ 3155 gen_helper_##NAME##_b, \ 3156 gen_helper_##NAME##_h, \ 3157 gen_helper_##NAME##_w, \ 3158 }; \ 3159 gen_set_rm(s, RISCV_FRM_DYN); \ 3160 \ 3161 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3162 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3163 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3164 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3165 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3166 vreg_ofs(s, a->rs2), tcg_env, \ 3167 s->cfg_ptr->vlenb, \ 3168 s->cfg_ptr->vlenb, data, \ 3169 fns[s->sew]); \ 3170 finalize_rvv_inst(s); \ 3171 return true; \ 3172 } \ 3173 return false; \ 3174} 3175 3176GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_xu_v) 3177GEN_OPFXV_WIDEN_TRANS(vfwcvt_f_x_v) 3178 3179/* Narrowing Floating-Point/Integer Type-Convert Instructions */ 3180 3181/* 3182 * If the current SEW does not correspond to a supported IEEE floating-point 3183 * type, an illegal instruction exception is raised 3184 */ 3185static bool opfv_narrow_check(DisasContext *s, arg_rmr *a) 3186{ 3187 return require_rvv(s) && 3188 vext_check_isa_ill(s) && 3189 /* OPFV narrowing instructions ignore vs1 check */ 3190 vext_check_sd(s, a->rd, a->rs2, a->vm); 3191} 3192 3193static bool opfxv_narrow_check(DisasContext *s, arg_rmr *a) 3194{ 3195 return opfv_narrow_check(s, a) && 3196 require_rvf(s) && 3197 (s->sew != MO_64); 3198} 3199 3200static bool opffv_narrow_check(DisasContext *s, arg_rmr *a) 3201{ 3202 return opfv_narrow_check(s, a) && 3203 require_rvfmin(s) && 3204 require_scale_rvfmin(s); 3205} 3206 3207static bool opffv_rod_narrow_check(DisasContext *s, arg_rmr *a) 3208{ 3209 return opfv_narrow_check(s, a) && 3210 require_rvf(s) && 3211 require_scale_rvf(s); 3212} 3213 3214#define GEN_OPFV_NARROW_TRANS(NAME, CHECK, HELPER, FRM) \ 3215static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3216{ \ 3217 if (CHECK(s, a)) { \ 3218 uint32_t data = 0; \ 3219 static gen_helper_gvec_3_ptr * const fns[2] = { \ 3220 gen_helper_##HELPER##_h, \ 3221 gen_helper_##HELPER##_w, \ 3222 }; \ 3223 gen_set_rm_chkfrm(s, FRM); \ 3224 \ 3225 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3226 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3227 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3228 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3229 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3230 vreg_ofs(s, a->rs2), tcg_env, \ 3231 s->cfg_ptr->vlenb, \ 3232 s->cfg_ptr->vlenb, data, \ 3233 fns[s->sew - 1]); \ 3234 finalize_rvv_inst(s); \ 3235 return true; \ 3236 } \ 3237 return false; \ 3238} 3239 3240GEN_OPFV_NARROW_TRANS(vfncvt_f_xu_w, opfxv_narrow_check, vfncvt_f_xu_w, 3241 RISCV_FRM_DYN) 3242GEN_OPFV_NARROW_TRANS(vfncvt_f_x_w, opfxv_narrow_check, vfncvt_f_x_w, 3243 RISCV_FRM_DYN) 3244GEN_OPFV_NARROW_TRANS(vfncvt_f_f_w, opffv_narrow_check, vfncvt_f_f_w, 3245 RISCV_FRM_DYN) 3246/* Reuse the helper function from vfncvt.f.f.w */ 3247GEN_OPFV_NARROW_TRANS(vfncvt_rod_f_f_w, opffv_rod_narrow_check, vfncvt_f_f_w, 3248 RISCV_FRM_ROD) 3249 3250static bool opxfv_narrow_check(DisasContext *s, arg_rmr *a) 3251{ 3252 return require_rvv(s) && 3253 require_scale_rvf(s) && 3254 vext_check_isa_ill(s) && 3255 /* OPFV narrowing instructions ignore vs1 check */ 3256 vext_check_sd(s, a->rd, a->rs2, a->vm); 3257} 3258 3259#define GEN_OPXFV_NARROW_TRANS(NAME, HELPER, FRM) \ 3260static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3261{ \ 3262 if (opxfv_narrow_check(s, a)) { \ 3263 uint32_t data = 0; \ 3264 static gen_helper_gvec_3_ptr * const fns[3] = { \ 3265 gen_helper_##HELPER##_b, \ 3266 gen_helper_##HELPER##_h, \ 3267 gen_helper_##HELPER##_w, \ 3268 }; \ 3269 gen_set_rm_chkfrm(s, FRM); \ 3270 \ 3271 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3272 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3273 data = FIELD_DP32(data, VDATA, VTA, s->vta); \ 3274 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3275 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3276 vreg_ofs(s, a->rs2), tcg_env, \ 3277 s->cfg_ptr->vlenb, \ 3278 s->cfg_ptr->vlenb, data, \ 3279 fns[s->sew]); \ 3280 finalize_rvv_inst(s); \ 3281 return true; \ 3282 } \ 3283 return false; \ 3284} 3285 3286GEN_OPXFV_NARROW_TRANS(vfncvt_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_DYN) 3287GEN_OPXFV_NARROW_TRANS(vfncvt_x_f_w, vfncvt_x_f_w, RISCV_FRM_DYN) 3288/* Reuse the helper functions from vfncvt.xu.f.w and vfncvt.x.f.w */ 3289GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_xu_f_w, vfncvt_xu_f_w, RISCV_FRM_RTZ) 3290GEN_OPXFV_NARROW_TRANS(vfncvt_rtz_x_f_w, vfncvt_x_f_w, RISCV_FRM_RTZ) 3291 3292/* 3293 *** Vector Reduction Operations 3294 */ 3295/* Vector Single-Width Integer Reduction Instructions */ 3296static bool reduction_check(DisasContext *s, arg_rmrr *a) 3297{ 3298 return require_rvv(s) && 3299 vext_check_isa_ill(s) && 3300 vext_check_reduction(s, a->rs2); 3301} 3302 3303GEN_OPIVV_TRANS(vredsum_vs, reduction_check) 3304GEN_OPIVV_TRANS(vredmaxu_vs, reduction_check) 3305GEN_OPIVV_TRANS(vredmax_vs, reduction_check) 3306GEN_OPIVV_TRANS(vredminu_vs, reduction_check) 3307GEN_OPIVV_TRANS(vredmin_vs, reduction_check) 3308GEN_OPIVV_TRANS(vredand_vs, reduction_check) 3309GEN_OPIVV_TRANS(vredor_vs, reduction_check) 3310GEN_OPIVV_TRANS(vredxor_vs, reduction_check) 3311 3312/* Vector Widening Integer Reduction Instructions */ 3313static bool reduction_widen_check(DisasContext *s, arg_rmrr *a) 3314{ 3315 return reduction_check(s, a) && (s->sew < MO_64) && 3316 ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)); 3317} 3318 3319GEN_OPIVV_WIDEN_TRANS(vwredsum_vs, reduction_widen_check) 3320GEN_OPIVV_WIDEN_TRANS(vwredsumu_vs, reduction_widen_check) 3321 3322/* Vector Single-Width Floating-Point Reduction Instructions */ 3323static bool freduction_check(DisasContext *s, arg_rmrr *a) 3324{ 3325 return reduction_check(s, a) && 3326 require_rvf(s); 3327} 3328 3329GEN_OPFVV_TRANS(vfredusum_vs, freduction_check) 3330GEN_OPFVV_TRANS(vfredosum_vs, freduction_check) 3331GEN_OPFVV_TRANS(vfredmax_vs, freduction_check) 3332GEN_OPFVV_TRANS(vfredmin_vs, freduction_check) 3333 3334/* Vector Widening Floating-Point Reduction Instructions */ 3335static bool freduction_widen_check(DisasContext *s, arg_rmrr *a) 3336{ 3337 return reduction_widen_check(s, a) && 3338 require_rvf(s) && 3339 require_scale_rvf(s); 3340} 3341 3342GEN_OPFVV_WIDEN_TRANS(vfwredusum_vs, freduction_widen_check) 3343GEN_OPFVV_WIDEN_TRANS(vfwredosum_vs, freduction_widen_check) 3344 3345/* 3346 *** Vector Mask Operations 3347 */ 3348 3349/* Vector Mask-Register Logical Instructions */ 3350#define GEN_MM_TRANS(NAME) \ 3351static bool trans_##NAME(DisasContext *s, arg_r *a) \ 3352{ \ 3353 if (require_rvv(s) && \ 3354 vext_check_isa_ill(s)) { \ 3355 uint32_t data = 0; \ 3356 gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ 3357 \ 3358 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3359 data = \ 3360 FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ 3361 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ 3362 vreg_ofs(s, a->rs1), \ 3363 vreg_ofs(s, a->rs2), tcg_env, \ 3364 s->cfg_ptr->vlenb, \ 3365 s->cfg_ptr->vlenb, data, fn); \ 3366 finalize_rvv_inst(s); \ 3367 return true; \ 3368 } \ 3369 return false; \ 3370} 3371 3372GEN_MM_TRANS(vmand_mm) 3373GEN_MM_TRANS(vmnand_mm) 3374GEN_MM_TRANS(vmandn_mm) 3375GEN_MM_TRANS(vmxor_mm) 3376GEN_MM_TRANS(vmor_mm) 3377GEN_MM_TRANS(vmnor_mm) 3378GEN_MM_TRANS(vmorn_mm) 3379GEN_MM_TRANS(vmxnor_mm) 3380 3381/* Vector count population in mask vcpop */ 3382static bool trans_vcpop_m(DisasContext *s, arg_rmr *a) 3383{ 3384 if (require_rvv(s) && 3385 vext_check_isa_ill(s) && 3386 s->vstart_eq_zero) { 3387 TCGv_ptr src2, mask; 3388 TCGv dst; 3389 TCGv_i32 desc; 3390 uint32_t data = 0; 3391 data = FIELD_DP32(data, VDATA, VM, a->vm); 3392 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3393 3394 mask = tcg_temp_new_ptr(); 3395 src2 = tcg_temp_new_ptr(); 3396 dst = dest_gpr(s, a->rd); 3397 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 3398 s->cfg_ptr->vlenb, data)); 3399 3400 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); 3401 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 3402 3403 gen_helper_vcpop_m(dst, mask, src2, tcg_env, desc); 3404 gen_set_gpr(s, a->rd, dst); 3405 return true; 3406 } 3407 return false; 3408} 3409 3410/* vmfirst find-first-set mask bit */ 3411static bool trans_vfirst_m(DisasContext *s, arg_rmr *a) 3412{ 3413 if (require_rvv(s) && 3414 vext_check_isa_ill(s) && 3415 s->vstart_eq_zero) { 3416 TCGv_ptr src2, mask; 3417 TCGv dst; 3418 TCGv_i32 desc; 3419 uint32_t data = 0; 3420 data = FIELD_DP32(data, VDATA, VM, a->vm); 3421 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3422 3423 mask = tcg_temp_new_ptr(); 3424 src2 = tcg_temp_new_ptr(); 3425 dst = dest_gpr(s, a->rd); 3426 desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, 3427 s->cfg_ptr->vlenb, data)); 3428 3429 tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); 3430 tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); 3431 3432 gen_helper_vfirst_m(dst, mask, src2, tcg_env, desc); 3433 gen_set_gpr(s, a->rd, dst); 3434 return true; 3435 } 3436 return false; 3437} 3438 3439/* 3440 * vmsbf.m set-before-first mask bit 3441 * vmsif.m set-including-first mask bit 3442 * vmsof.m set-only-first mask bit 3443 */ 3444#define GEN_M_TRANS(NAME) \ 3445static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 3446{ \ 3447 if (require_rvv(s) && \ 3448 vext_check_isa_ill(s) && \ 3449 require_vm(a->vm, a->rd) && \ 3450 (a->rd != a->rs2) && \ 3451 s->vstart_eq_zero) { \ 3452 uint32_t data = 0; \ 3453 gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \ 3454 \ 3455 data = FIELD_DP32(data, VDATA, VM, a->vm); \ 3456 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ 3457 data = \ 3458 FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ 3459 data = FIELD_DP32(data, VDATA, VMA, s->vma); \ 3460 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), \ 3461 vreg_ofs(s, 0), vreg_ofs(s, a->rs2), \ 3462 tcg_env, s->cfg_ptr->vlenb, \ 3463 s->cfg_ptr->vlenb, \ 3464 data, fn); \ 3465 finalize_rvv_inst(s); \ 3466 return true; \ 3467 } \ 3468 return false; \ 3469} 3470 3471GEN_M_TRANS(vmsbf_m) 3472GEN_M_TRANS(vmsif_m) 3473GEN_M_TRANS(vmsof_m) 3474 3475/* 3476 * Vector Iota Instruction 3477 * 3478 * 1. The destination register cannot overlap the source register. 3479 * 2. If masked, cannot overlap the mask register ('v0'). 3480 * 3. An illegal instruction exception is raised if vstart is non-zero. 3481 */ 3482static bool trans_viota_m(DisasContext *s, arg_viota_m *a) 3483{ 3484 if (require_rvv(s) && 3485 vext_check_isa_ill(s) && 3486 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) && 3487 require_vm(a->vm, a->rd) && 3488 require_align(a->rd, s->lmul) && 3489 s->vstart_eq_zero) { 3490 uint32_t data = 0; 3491 3492 data = FIELD_DP32(data, VDATA, VM, a->vm); 3493 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3494 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3495 data = FIELD_DP32(data, VDATA, VMA, s->vma); 3496 static gen_helper_gvec_3_ptr * const fns[4] = { 3497 gen_helper_viota_m_b, gen_helper_viota_m_h, 3498 gen_helper_viota_m_w, gen_helper_viota_m_d, 3499 }; 3500 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 3501 vreg_ofs(s, a->rs2), tcg_env, 3502 s->cfg_ptr->vlenb, 3503 s->cfg_ptr->vlenb, data, fns[s->sew]); 3504 finalize_rvv_inst(s); 3505 return true; 3506 } 3507 return false; 3508} 3509 3510/* Vector Element Index Instruction */ 3511static bool trans_vid_v(DisasContext *s, arg_vid_v *a) 3512{ 3513 if (require_rvv(s) && 3514 vext_check_isa_ill(s) && 3515 require_align(a->rd, s->lmul) && 3516 require_vm(a->vm, a->rd)) { 3517 uint32_t data = 0; 3518 3519 data = FIELD_DP32(data, VDATA, VM, a->vm); 3520 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3521 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3522 data = FIELD_DP32(data, VDATA, VMA, s->vma); 3523 static gen_helper_gvec_2_ptr * const fns[4] = { 3524 gen_helper_vid_v_b, gen_helper_vid_v_h, 3525 gen_helper_vid_v_w, gen_helper_vid_v_d, 3526 }; 3527 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 3528 tcg_env, s->cfg_ptr->vlenb, 3529 s->cfg_ptr->vlenb, 3530 data, fns[s->sew]); 3531 finalize_rvv_inst(s); 3532 return true; 3533 } 3534 return false; 3535} 3536 3537/* 3538 *** Vector Permutation Instructions 3539 */ 3540 3541static void load_element(TCGv_i64 dest, TCGv_ptr base, 3542 int ofs, int sew, bool sign) 3543{ 3544 switch (sew) { 3545 case MO_8: 3546 if (!sign) { 3547 tcg_gen_ld8u_i64(dest, base, ofs); 3548 } else { 3549 tcg_gen_ld8s_i64(dest, base, ofs); 3550 } 3551 break; 3552 case MO_16: 3553 if (!sign) { 3554 tcg_gen_ld16u_i64(dest, base, ofs); 3555 } else { 3556 tcg_gen_ld16s_i64(dest, base, ofs); 3557 } 3558 break; 3559 case MO_32: 3560 if (!sign) { 3561 tcg_gen_ld32u_i64(dest, base, ofs); 3562 } else { 3563 tcg_gen_ld32s_i64(dest, base, ofs); 3564 } 3565 break; 3566 case MO_64: 3567 tcg_gen_ld_i64(dest, base, ofs); 3568 break; 3569 default: 3570 g_assert_not_reached(); 3571 } 3572} 3573 3574/* offset of the idx element with base register r */ 3575static uint32_t endian_ofs(DisasContext *s, int r, int idx) 3576{ 3577#if HOST_BIG_ENDIAN 3578 return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); 3579#else 3580 return vreg_ofs(s, r) + (idx << s->sew); 3581#endif 3582} 3583 3584/* adjust the index according to the endian */ 3585static void endian_adjust(TCGv_i32 ofs, int sew) 3586{ 3587#if HOST_BIG_ENDIAN 3588 tcg_gen_xori_i32(ofs, ofs, 7 >> sew); 3589#endif 3590} 3591 3592/* Load idx >= VLMAX ? 0 : vreg[idx] */ 3593static void vec_element_loadx(DisasContext *s, TCGv_i64 dest, 3594 int vreg, TCGv idx, int vlmax) 3595{ 3596 TCGv_i32 ofs = tcg_temp_new_i32(); 3597 TCGv_ptr base = tcg_temp_new_ptr(); 3598 TCGv_i64 t_idx = tcg_temp_new_i64(); 3599 TCGv_i64 t_vlmax, t_zero; 3600 3601 /* 3602 * Mask the index to the length so that we do 3603 * not produce an out-of-range load. 3604 */ 3605 tcg_gen_trunc_tl_i32(ofs, idx); 3606 tcg_gen_andi_i32(ofs, ofs, vlmax - 1); 3607 3608 /* Convert the index to an offset. */ 3609 endian_adjust(ofs, s->sew); 3610 tcg_gen_shli_i32(ofs, ofs, s->sew); 3611 3612 /* Convert the index to a pointer. */ 3613 tcg_gen_ext_i32_ptr(base, ofs); 3614 tcg_gen_add_ptr(base, base, tcg_env); 3615 3616 /* Perform the load. */ 3617 load_element(dest, base, 3618 vreg_ofs(s, vreg), s->sew, false); 3619 3620 /* Flush out-of-range indexing to zero. */ 3621 t_vlmax = tcg_constant_i64(vlmax); 3622 t_zero = tcg_constant_i64(0); 3623 tcg_gen_extu_tl_i64(t_idx, idx); 3624 3625 tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx, 3626 t_vlmax, dest, t_zero); 3627} 3628 3629static void vec_element_loadi(DisasContext *s, TCGv_i64 dest, 3630 int vreg, int idx, bool sign) 3631{ 3632 load_element(dest, tcg_env, endian_ofs(s, vreg, idx), s->sew, sign); 3633} 3634 3635/* Integer Scalar Move Instruction */ 3636 3637static void store_element(TCGv_i64 val, TCGv_ptr base, 3638 int ofs, int sew) 3639{ 3640 switch (sew) { 3641 case MO_8: 3642 tcg_gen_st8_i64(val, base, ofs); 3643 break; 3644 case MO_16: 3645 tcg_gen_st16_i64(val, base, ofs); 3646 break; 3647 case MO_32: 3648 tcg_gen_st32_i64(val, base, ofs); 3649 break; 3650 case MO_64: 3651 tcg_gen_st_i64(val, base, ofs); 3652 break; 3653 default: 3654 g_assert_not_reached(); 3655 } 3656} 3657 3658/* 3659 * Store vreg[idx] = val. 3660 * The index must be in range of VLMAX. 3661 */ 3662static void vec_element_storei(DisasContext *s, int vreg, 3663 int idx, TCGv_i64 val) 3664{ 3665 store_element(val, tcg_env, endian_ofs(s, vreg, idx), s->sew); 3666} 3667 3668/* vmv.x.s rd, vs2 # x[rd] = vs2[0] */ 3669static bool trans_vmv_x_s(DisasContext *s, arg_vmv_x_s *a) 3670{ 3671 if (require_rvv(s) && 3672 vext_check_isa_ill(s)) { 3673 TCGv_i64 t1; 3674 TCGv dest; 3675 3676 t1 = tcg_temp_new_i64(); 3677 dest = tcg_temp_new(); 3678 /* 3679 * load vreg and sign-extend to 64 bits, 3680 * then truncate to XLEN bits before storing to gpr. 3681 */ 3682 vec_element_loadi(s, t1, a->rs2, 0, true); 3683 tcg_gen_trunc_i64_tl(dest, t1); 3684 gen_set_gpr(s, a->rd, dest); 3685 tcg_gen_movi_tl(cpu_vstart, 0); 3686 finalize_rvv_inst(s); 3687 return true; 3688 } 3689 return false; 3690} 3691 3692/* vmv.s.x vd, rs1 # vd[0] = rs1 */ 3693static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) 3694{ 3695 if (require_rvv(s) && 3696 vext_check_isa_ill(s)) { 3697 /* This instruction ignores LMUL and vector register groups */ 3698 TCGv_i64 t1; 3699 TCGv s1; 3700 TCGLabel *over = gen_new_label(); 3701 3702 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); 3703 3704 t1 = tcg_temp_new_i64(); 3705 3706 /* 3707 * load gpr and sign-extend to 64 bits, 3708 * then truncate to SEW bits when storing to vreg. 3709 */ 3710 s1 = get_gpr(s, a->rs1, EXT_NONE); 3711 tcg_gen_ext_tl_i64(t1, s1); 3712 vec_element_storei(s, a->rd, 0, t1); 3713 gen_set_label(over); 3714 tcg_gen_movi_tl(cpu_vstart, 0); 3715 finalize_rvv_inst(s); 3716 return true; 3717 } 3718 return false; 3719} 3720 3721/* Floating-Point Scalar Move Instructions */ 3722static bool trans_vfmv_f_s(DisasContext *s, arg_vfmv_f_s *a) 3723{ 3724 if (require_rvv(s) && 3725 require_rvf(s) && 3726 vext_check_isa_ill(s)) { 3727 gen_set_rm(s, RISCV_FRM_DYN); 3728 3729 unsigned int ofs = (8 << s->sew); 3730 unsigned int len = 64 - ofs; 3731 TCGv_i64 t_nan; 3732 3733 vec_element_loadi(s, cpu_fpr[a->rd], a->rs2, 0, false); 3734 /* NaN-box f[rd] as necessary for SEW */ 3735 if (len) { 3736 t_nan = tcg_constant_i64(UINT64_MAX); 3737 tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rd], 3738 t_nan, ofs, len); 3739 } 3740 3741 mark_fs_dirty(s); 3742 tcg_gen_movi_tl(cpu_vstart, 0); 3743 finalize_rvv_inst(s); 3744 return true; 3745 } 3746 return false; 3747} 3748 3749/* vfmv.s.f vd, rs1 # vd[0] = rs1 (vs2=0) */ 3750static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) 3751{ 3752 if (require_rvv(s) && 3753 require_rvf(s) && 3754 vext_check_isa_ill(s)) { 3755 gen_set_rm(s, RISCV_FRM_DYN); 3756 3757 /* The instructions ignore LMUL and vector register group. */ 3758 TCGv_i64 t1; 3759 TCGLabel *over = gen_new_label(); 3760 3761 /* if vstart >= vl, skip vector register write back */ 3762 tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); 3763 3764 /* NaN-box f[rs1] */ 3765 t1 = tcg_temp_new_i64(); 3766 do_nanbox(s, t1, cpu_fpr[a->rs1]); 3767 3768 vec_element_storei(s, a->rd, 0, t1); 3769 3770 gen_set_label(over); 3771 tcg_gen_movi_tl(cpu_vstart, 0); 3772 finalize_rvv_inst(s); 3773 return true; 3774 } 3775 return false; 3776} 3777 3778/* Vector Slide Instructions */ 3779static bool slideup_check(DisasContext *s, arg_rmrr *a) 3780{ 3781 return require_rvv(s) && 3782 vext_check_isa_ill(s) && 3783 vext_check_slide(s, a->rd, a->rs2, a->vm, true); 3784} 3785 3786GEN_OPIVX_TRANS(vslideup_vx, slideup_check) 3787GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) 3788GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check) 3789 3790static bool slidedown_check(DisasContext *s, arg_rmrr *a) 3791{ 3792 return require_rvv(s) && 3793 vext_check_isa_ill(s) && 3794 vext_check_slide(s, a->rd, a->rs2, a->vm, false); 3795} 3796 3797GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check) 3798GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check) 3799GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check) 3800 3801/* Vector Floating-Point Slide Instructions */ 3802static bool fslideup_check(DisasContext *s, arg_rmrr *a) 3803{ 3804 return slideup_check(s, a) && 3805 require_rvf(s); 3806} 3807 3808static bool fslidedown_check(DisasContext *s, arg_rmrr *a) 3809{ 3810 return slidedown_check(s, a) && 3811 require_rvf(s); 3812} 3813 3814GEN_OPFVF_TRANS(vfslide1up_vf, fslideup_check) 3815GEN_OPFVF_TRANS(vfslide1down_vf, fslidedown_check) 3816 3817/* Vector Register Gather Instruction */ 3818static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) 3819{ 3820 return require_rvv(s) && 3821 vext_check_isa_ill(s) && 3822 vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) && 3823 require_align(a->rd, s->lmul) && 3824 require_align(a->rs1, s->lmul) && 3825 require_align(a->rs2, s->lmul) && 3826 (a->rd != a->rs2 && a->rd != a->rs1) && 3827 require_vm(a->vm, a->rd); 3828} 3829 3830static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) 3831{ 3832 int8_t emul = MO_16 - s->sew + s->lmul; 3833 return require_rvv(s) && 3834 vext_check_isa_ill(s) && 3835 vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) && 3836 (emul >= -3 && emul <= 3) && 3837 require_align(a->rd, s->lmul) && 3838 require_align(a->rs1, emul) && 3839 require_align(a->rs2, s->lmul) && 3840 (a->rd != a->rs2 && a->rd != a->rs1) && 3841 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), 3842 a->rs1, 1 << MAX(emul, 0)) && 3843 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), 3844 a->rs2, 1 << MAX(s->lmul, 0)) && 3845 require_vm(a->vm, a->rd); 3846} 3847 3848GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) 3849GEN_OPIVV_TRANS(vrgatherei16_vv, vrgatherei16_vv_check) 3850 3851static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) 3852{ 3853 return require_rvv(s) && 3854 vext_check_isa_ill(s) && 3855 vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) && 3856 require_align(a->rd, s->lmul) && 3857 require_align(a->rs2, s->lmul) && 3858 (a->rd != a->rs2) && 3859 require_vm(a->vm, a->rd); 3860} 3861 3862/* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 3863static bool trans_vrgather_vx(DisasContext *s, arg_rmrr *a) 3864{ 3865 if (!vrgather_vx_check(s, a)) { 3866 return false; 3867 } 3868 3869 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 3870 int vlmax = vext_get_vlmax(s->cfg_ptr->vlenb, s->sew, s->lmul); 3871 TCGv_i64 dest = tcg_temp_new_i64(); 3872 3873 if (a->rs1 == 0) { 3874 vec_element_loadi(s, dest, a->rs2, 0, false); 3875 } else { 3876 vec_element_loadx(s, dest, a->rs2, cpu_gpr[a->rs1], vlmax); 3877 } 3878 3879 tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd), 3880 MAXSZ(s), MAXSZ(s), dest); 3881 finalize_rvv_inst(s); 3882 } else { 3883 static gen_helper_opivx * const fns[4] = { 3884 gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, 3885 gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d 3886 }; 3887 return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); 3888 } 3889 return true; 3890} 3891 3892/* vrgather.vi vd, vs2, imm, vm # vd[i] = (imm >= VLMAX) ? 0 : vs2[imm] */ 3893static bool trans_vrgather_vi(DisasContext *s, arg_rmrr *a) 3894{ 3895 if (!vrgather_vx_check(s, a)) { 3896 return false; 3897 } 3898 3899 if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { 3900 int vlmax = vext_get_vlmax(s->cfg_ptr->vlenb, s->sew, s->lmul); 3901 if (a->rs1 >= vlmax) { 3902 tcg_gen_gvec_dup_imm(MO_64, vreg_ofs(s, a->rd), 3903 MAXSZ(s), MAXSZ(s), 0); 3904 } else { 3905 tcg_gen_gvec_dup_mem(s->sew, vreg_ofs(s, a->rd), 3906 endian_ofs(s, a->rs2, a->rs1), 3907 MAXSZ(s), MAXSZ(s)); 3908 } 3909 finalize_rvv_inst(s); 3910 } else { 3911 static gen_helper_opivx * const fns[4] = { 3912 gen_helper_vrgather_vx_b, gen_helper_vrgather_vx_h, 3913 gen_helper_vrgather_vx_w, gen_helper_vrgather_vx_d 3914 }; 3915 return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], 3916 s, IMM_ZX); 3917 } 3918 return true; 3919} 3920 3921/* 3922 * Vector Compress Instruction 3923 * 3924 * The destination vector register group cannot overlap the 3925 * source vector register group or the source mask register. 3926 */ 3927static bool vcompress_vm_check(DisasContext *s, arg_r *a) 3928{ 3929 return require_rvv(s) && 3930 vext_check_isa_ill(s) && 3931 require_align(a->rd, s->lmul) && 3932 require_align(a->rs2, s->lmul) && 3933 (a->rd != a->rs2) && 3934 !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs1, 1) && 3935 s->vstart_eq_zero; 3936} 3937 3938static bool trans_vcompress_vm(DisasContext *s, arg_r *a) 3939{ 3940 if (vcompress_vm_check(s, a)) { 3941 uint32_t data = 0; 3942 static gen_helper_gvec_4_ptr * const fns[4] = { 3943 gen_helper_vcompress_vm_b, gen_helper_vcompress_vm_h, 3944 gen_helper_vcompress_vm_w, gen_helper_vcompress_vm_d, 3945 }; 3946 3947 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 3948 data = FIELD_DP32(data, VDATA, VTA, s->vta); 3949 tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 3950 vreg_ofs(s, a->rs1), vreg_ofs(s, a->rs2), 3951 tcg_env, s->cfg_ptr->vlenb, 3952 s->cfg_ptr->vlenb, data, 3953 fns[s->sew]); 3954 finalize_rvv_inst(s); 3955 return true; 3956 } 3957 return false; 3958} 3959 3960/* 3961 * Whole Vector Register Move Instructions depend on vtype register(vsew). 3962 * Thus, we need to check vill bit. (Section 16.6) 3963 */ 3964#define GEN_VMV_WHOLE_TRANS(NAME, LEN) \ 3965static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ 3966{ \ 3967 if (require_rvv(s) && \ 3968 vext_check_isa_ill(s) && \ 3969 QEMU_IS_ALIGNED(a->rd, LEN) && \ 3970 QEMU_IS_ALIGNED(a->rs2, LEN)) { \ 3971 uint32_t maxsz = s->cfg_ptr->vlenb * LEN; \ 3972 if (s->vstart_eq_zero) { \ 3973 tcg_gen_gvec_mov(s->sew, vreg_ofs(s, a->rd), \ 3974 vreg_ofs(s, a->rs2), maxsz, maxsz); \ 3975 } else { \ 3976 tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), \ 3977 tcg_env, maxsz, maxsz, 0, gen_helper_vmvr_v); \ 3978 } \ 3979 finalize_rvv_inst(s); \ 3980 return true; \ 3981 } \ 3982 return false; \ 3983} 3984 3985GEN_VMV_WHOLE_TRANS(vmv1r_v, 1) 3986GEN_VMV_WHOLE_TRANS(vmv2r_v, 2) 3987GEN_VMV_WHOLE_TRANS(vmv4r_v, 4) 3988GEN_VMV_WHOLE_TRANS(vmv8r_v, 8) 3989 3990static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div) 3991{ 3992 uint8_t from = (s->sew + 3) - div; 3993 bool ret = require_rvv(s) && 3994 (from >= 3 && from <= 8) && 3995 (a->rd != a->rs2) && 3996 require_align(a->rd, s->lmul) && 3997 require_align(a->rs2, s->lmul - div) && 3998 require_vm(a->vm, a->rd) && 3999 require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) && 4000 vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm); 4001 4002 return ret; 4003} 4004 4005static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) 4006{ 4007 uint32_t data = 0; 4008 gen_helper_gvec_3_ptr *fn; 4009 4010 static gen_helper_gvec_3_ptr * const fns[6][4] = { 4011 { 4012 NULL, gen_helper_vzext_vf2_h, 4013 gen_helper_vzext_vf2_w, gen_helper_vzext_vf2_d 4014 }, 4015 { 4016 NULL, NULL, 4017 gen_helper_vzext_vf4_w, gen_helper_vzext_vf4_d, 4018 }, 4019 { 4020 NULL, NULL, 4021 NULL, gen_helper_vzext_vf8_d 4022 }, 4023 { 4024 NULL, gen_helper_vsext_vf2_h, 4025 gen_helper_vsext_vf2_w, gen_helper_vsext_vf2_d 4026 }, 4027 { 4028 NULL, NULL, 4029 gen_helper_vsext_vf4_w, gen_helper_vsext_vf4_d, 4030 }, 4031 { 4032 NULL, NULL, 4033 NULL, gen_helper_vsext_vf8_d 4034 } 4035 }; 4036 4037 fn = fns[seq][s->sew]; 4038 if (fn == NULL) { 4039 return false; 4040 } 4041 4042 data = FIELD_DP32(data, VDATA, VM, a->vm); 4043 data = FIELD_DP32(data, VDATA, LMUL, s->lmul); 4044 data = FIELD_DP32(data, VDATA, VTA, s->vta); 4045 data = FIELD_DP32(data, VDATA, VMA, s->vma); 4046 4047 tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), 4048 vreg_ofs(s, a->rs2), tcg_env, 4049 s->cfg_ptr->vlenb, 4050 s->cfg_ptr->vlenb, data, fn); 4051 4052 finalize_rvv_inst(s); 4053 return true; 4054} 4055 4056/* Vector Integer Extension */ 4057#define GEN_INT_EXT_TRANS(NAME, DIV, SEQ) \ 4058static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ 4059{ \ 4060 if (int_ext_check(s, a, DIV)) { \ 4061 return int_ext_op(s, a, SEQ); \ 4062 } \ 4063 return false; \ 4064} 4065 4066GEN_INT_EXT_TRANS(vzext_vf2, 1, 0) 4067GEN_INT_EXT_TRANS(vzext_vf4, 2, 1) 4068GEN_INT_EXT_TRANS(vzext_vf8, 3, 2) 4069GEN_INT_EXT_TRANS(vsext_vf2, 1, 3) 4070GEN_INT_EXT_TRANS(vsext_vf4, 2, 4) 4071GEN_INT_EXT_TRANS(vsext_vf8, 3, 5) 4072