1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 static inline uint32_t vext_vma(uint32_t desc) 131 { 132 return FIELD_EX32(simd_data(desc), VDATA, VMA); 133 } 134 135 static inline uint32_t vext_vta_all_1s(uint32_t desc) 136 { 137 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 138 } 139 140 /* 141 * Get the maximum number of elements can be operated. 142 * 143 * log2_esz: log2 of element size in bytes. 144 */ 145 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 146 { 147 /* 148 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 149 * so vlen in bytes (vlenb) is encoded as maxsz. 150 */ 151 uint32_t vlenb = simd_maxsz(desc); 152 153 /* Return VLMAX */ 154 int scale = vext_lmul(desc) - log2_esz; 155 return scale < 0 ? vlenb >> -scale : vlenb << scale; 156 } 157 158 /* 159 * Get number of total elements, including prestart, body and tail elements. 160 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 161 * are held in the same vector register. 162 */ 163 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 164 uint32_t esz) 165 { 166 uint32_t vlenb = simd_maxsz(desc); 167 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 168 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 169 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 170 return (vlenb << emul) / esz; 171 } 172 173 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 174 { 175 return (addr & env->cur_pmmask) | env->cur_pmbase; 176 } 177 178 /* 179 * This function checks watchpoint before real load operation. 180 * 181 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 182 * In user mode, there is no watchpoint support now. 183 * 184 * It will trigger an exception if there is no mapping in TLB 185 * and page table walk can't fill the TLB entry. Then the guest 186 * software can return here after process the exception or never return. 187 */ 188 static void probe_pages(CPURISCVState *env, target_ulong addr, 189 target_ulong len, uintptr_t ra, 190 MMUAccessType access_type) 191 { 192 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 193 target_ulong curlen = MIN(pagelen, len); 194 195 probe_access(env, adjust_addr(env, addr), curlen, access_type, 196 cpu_mmu_index(env, false), ra); 197 if (len > curlen) { 198 addr += curlen; 199 curlen = len - curlen; 200 probe_access(env, adjust_addr(env, addr), curlen, access_type, 201 cpu_mmu_index(env, false), ra); 202 } 203 } 204 205 /* set agnostic elements to 1s */ 206 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 207 uint32_t tot) 208 { 209 if (is_agnostic == 0) { 210 /* policy undisturbed */ 211 return; 212 } 213 if (tot - cnt == 0) { 214 return ; 215 } 216 memset(base + cnt, -1, tot - cnt); 217 } 218 219 static inline void vext_set_elem_mask(void *v0, int index, 220 uint8_t value) 221 { 222 int idx = index / 64; 223 int pos = index % 64; 224 uint64_t old = ((uint64_t *)v0)[idx]; 225 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 226 } 227 228 /* 229 * Earlier designs (pre-0.9) had a varying number of bits 230 * per mask value (MLEN). In the 0.9 design, MLEN=1. 231 * (Section 4.5) 232 */ 233 static inline int vext_elem_mask(void *v0, int index) 234 { 235 int idx = index / 64; 236 int pos = index % 64; 237 return (((uint64_t *)v0)[idx] >> pos) & 1; 238 } 239 240 /* elements operations for load and store */ 241 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 242 uint32_t idx, void *vd, uintptr_t retaddr); 243 244 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 245 static void NAME(CPURISCVState *env, abi_ptr addr, \ 246 uint32_t idx, void *vd, uintptr_t retaddr)\ 247 { \ 248 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 249 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 250 } \ 251 252 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 253 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 254 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 255 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 256 257 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 258 static void NAME(CPURISCVState *env, abi_ptr addr, \ 259 uint32_t idx, void *vd, uintptr_t retaddr)\ 260 { \ 261 ETYPE data = *((ETYPE *)vd + H(idx)); \ 262 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 263 } 264 265 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 266 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 267 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 268 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 269 270 /* 271 *** stride: access vector element from strided memory 272 */ 273 static void 274 vext_ldst_stride(void *vd, void *v0, target_ulong base, 275 target_ulong stride, CPURISCVState *env, 276 uint32_t desc, uint32_t vm, 277 vext_ldst_elem_fn *ldst_elem, 278 uint32_t log2_esz, uintptr_t ra) 279 { 280 uint32_t i, k; 281 uint32_t nf = vext_nf(desc); 282 uint32_t max_elems = vext_max_elems(desc, log2_esz); 283 uint32_t esz = 1 << log2_esz; 284 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 285 uint32_t vta = vext_vta(desc); 286 uint32_t vma = vext_vma(desc); 287 288 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 289 k = 0; 290 while (k < nf) { 291 if (!vm && !vext_elem_mask(v0, i)) { 292 /* set masked-off elements to 1s */ 293 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 294 (i + k * max_elems + 1) * esz); 295 k++; 296 continue; 297 } 298 target_ulong addr = base + stride * i + (k << log2_esz); 299 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 300 k++; 301 } 302 } 303 env->vstart = 0; 304 /* set tail elements to 1s */ 305 for (k = 0; k < nf; ++k) { 306 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 307 (k * max_elems + max_elems) * esz); 308 } 309 if (nf * max_elems % total_elems != 0) { 310 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 311 uint32_t registers_used = 312 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 313 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 314 registers_used * vlenb); 315 } 316 } 317 318 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 319 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 320 target_ulong stride, CPURISCVState *env, \ 321 uint32_t desc) \ 322 { \ 323 uint32_t vm = vext_vm(desc); \ 324 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 325 ctzl(sizeof(ETYPE)), GETPC()); \ 326 } 327 328 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 329 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 330 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 331 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 332 333 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 334 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 335 target_ulong stride, CPURISCVState *env, \ 336 uint32_t desc) \ 337 { \ 338 uint32_t vm = vext_vm(desc); \ 339 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 340 ctzl(sizeof(ETYPE)), GETPC()); \ 341 } 342 343 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 344 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 345 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 346 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 347 348 /* 349 *** unit-stride: access elements stored contiguously in memory 350 */ 351 352 /* unmasked unit-stride load and store operation*/ 353 static void 354 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 355 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 356 uintptr_t ra) 357 { 358 uint32_t i, k; 359 uint32_t nf = vext_nf(desc); 360 uint32_t max_elems = vext_max_elems(desc, log2_esz); 361 uint32_t esz = 1 << log2_esz; 362 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 363 uint32_t vta = vext_vta(desc); 364 365 /* load bytes from guest memory */ 366 for (i = env->vstart; i < evl; i++, env->vstart++) { 367 k = 0; 368 while (k < nf) { 369 target_ulong addr = base + ((i * nf + k) << log2_esz); 370 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 371 k++; 372 } 373 } 374 env->vstart = 0; 375 /* set tail elements to 1s */ 376 for (k = 0; k < nf; ++k) { 377 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, 378 (k * max_elems + max_elems) * esz); 379 } 380 if (nf * max_elems % total_elems != 0) { 381 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 382 uint32_t registers_used = 383 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 384 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 385 registers_used * vlenb); 386 } 387 } 388 389 /* 390 * masked unit-stride load and store operation will be a special case of stride, 391 * stride = NF * sizeof (MTYPE) 392 */ 393 394 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 395 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 396 CPURISCVState *env, uint32_t desc) \ 397 { \ 398 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 399 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 400 ctzl(sizeof(ETYPE)), GETPC()); \ 401 } \ 402 \ 403 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 404 CPURISCVState *env, uint32_t desc) \ 405 { \ 406 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 407 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 408 } 409 410 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 411 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 412 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 413 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 414 415 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 416 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 417 CPURISCVState *env, uint32_t desc) \ 418 { \ 419 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 420 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 421 ctzl(sizeof(ETYPE)), GETPC()); \ 422 } \ 423 \ 424 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 425 CPURISCVState *env, uint32_t desc) \ 426 { \ 427 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 428 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 429 } 430 431 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 432 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 433 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 434 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 435 436 /* 437 *** unit stride mask load and store, EEW = 1 438 */ 439 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, lde_b, 445 0, evl, GETPC()); 446 } 447 448 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 449 CPURISCVState *env, uint32_t desc) 450 { 451 /* evl = ceil(vl/8) */ 452 uint8_t evl = (env->vl + 7) >> 3; 453 vext_ldst_us(vd, base, env, desc, ste_b, 454 0, evl, GETPC()); 455 } 456 457 /* 458 *** index: access vector element from indexed memory 459 */ 460 typedef target_ulong vext_get_index_addr(target_ulong base, 461 uint32_t idx, void *vs2); 462 463 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 464 static target_ulong NAME(target_ulong base, \ 465 uint32_t idx, void *vs2) \ 466 { \ 467 return (base + *((ETYPE *)vs2 + H(idx))); \ 468 } 469 470 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 471 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 472 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 473 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 474 475 static inline void 476 vext_ldst_index(void *vd, void *v0, target_ulong base, 477 void *vs2, CPURISCVState *env, uint32_t desc, 478 vext_get_index_addr get_index_addr, 479 vext_ldst_elem_fn *ldst_elem, 480 uint32_t log2_esz, uintptr_t ra) 481 { 482 uint32_t i, k; 483 uint32_t nf = vext_nf(desc); 484 uint32_t vm = vext_vm(desc); 485 uint32_t max_elems = vext_max_elems(desc, log2_esz); 486 uint32_t esz = 1 << log2_esz; 487 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 488 uint32_t vta = vext_vta(desc); 489 uint32_t vma = vext_vma(desc); 490 491 /* load bytes from guest memory */ 492 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 493 k = 0; 494 while (k < nf) { 495 if (!vm && !vext_elem_mask(v0, i)) { 496 /* set masked-off elements to 1s */ 497 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 498 (i + k * max_elems + 1) * esz); 499 k++; 500 continue; 501 } 502 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 503 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 504 k++; 505 } 506 } 507 env->vstart = 0; 508 /* set tail elements to 1s */ 509 for (k = 0; k < nf; ++k) { 510 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 511 (k * max_elems + max_elems) * esz); 512 } 513 if (nf * max_elems % total_elems != 0) { 514 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 515 uint32_t registers_used = 516 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 517 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 518 registers_used * vlenb); 519 } 520 } 521 522 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 523 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 524 void *vs2, CPURISCVState *env, uint32_t desc) \ 525 { \ 526 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 527 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 528 } 529 530 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 531 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 532 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 533 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 534 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 535 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 536 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 537 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 538 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 539 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 540 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 541 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 542 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 543 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 544 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 545 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 546 547 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 548 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 549 void *vs2, CPURISCVState *env, uint32_t desc) \ 550 { \ 551 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 552 STORE_FN, ctzl(sizeof(ETYPE)), \ 553 GETPC()); \ 554 } 555 556 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 557 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 558 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 559 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 560 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 561 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 562 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 563 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 564 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 565 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 566 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 567 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 568 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 569 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 570 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 571 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 572 573 /* 574 *** unit-stride fault-only-fisrt load instructions 575 */ 576 static inline void 577 vext_ldff(void *vd, void *v0, target_ulong base, 578 CPURISCVState *env, uint32_t desc, 579 vext_ldst_elem_fn *ldst_elem, 580 uint32_t log2_esz, uintptr_t ra) 581 { 582 void *host; 583 uint32_t i, k, vl = 0; 584 uint32_t nf = vext_nf(desc); 585 uint32_t vm = vext_vm(desc); 586 uint32_t max_elems = vext_max_elems(desc, log2_esz); 587 uint32_t esz = 1 << log2_esz; 588 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 589 uint32_t vta = vext_vta(desc); 590 uint32_t vma = vext_vma(desc); 591 target_ulong addr, offset, remain; 592 593 /* probe every access*/ 594 for (i = env->vstart; i < env->vl; i++) { 595 if (!vm && !vext_elem_mask(v0, i)) { 596 continue; 597 } 598 addr = adjust_addr(env, base + i * (nf << log2_esz)); 599 if (i == 0) { 600 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 601 } else { 602 /* if it triggers an exception, no need to check watchpoint */ 603 remain = nf << log2_esz; 604 while (remain > 0) { 605 offset = -(addr | TARGET_PAGE_MASK); 606 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 607 cpu_mmu_index(env, false)); 608 if (host) { 609 #ifdef CONFIG_USER_ONLY 610 if (page_check_range(addr, offset, PAGE_READ) < 0) { 611 vl = i; 612 goto ProbeSuccess; 613 } 614 #else 615 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 616 #endif 617 } else { 618 vl = i; 619 goto ProbeSuccess; 620 } 621 if (remain <= offset) { 622 break; 623 } 624 remain -= offset; 625 addr = adjust_addr(env, addr + offset); 626 } 627 } 628 } 629 ProbeSuccess: 630 /* load bytes from guest memory */ 631 if (vl != 0) { 632 env->vl = vl; 633 } 634 for (i = env->vstart; i < env->vl; i++) { 635 k = 0; 636 while (k < nf) { 637 if (!vm && !vext_elem_mask(v0, i)) { 638 /* set masked-off elements to 1s */ 639 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 640 (i + k * max_elems + 1) * esz); 641 k++; 642 continue; 643 } 644 target_ulong addr = base + ((i * nf + k) << log2_esz); 645 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 646 k++; 647 } 648 } 649 env->vstart = 0; 650 /* set tail elements to 1s */ 651 for (k = 0; k < nf; ++k) { 652 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 653 (k * max_elems + max_elems) * esz); 654 } 655 if (nf * max_elems % total_elems != 0) { 656 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 657 uint32_t registers_used = 658 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 659 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 660 registers_used * vlenb); 661 } 662 } 663 664 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 665 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 666 CPURISCVState *env, uint32_t desc) \ 667 { \ 668 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 669 ctzl(sizeof(ETYPE)), GETPC()); \ 670 } 671 672 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 673 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 674 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 675 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 676 677 #define DO_SWAP(N, M) (M) 678 #define DO_AND(N, M) (N & M) 679 #define DO_XOR(N, M) (N ^ M) 680 #define DO_OR(N, M) (N | M) 681 #define DO_ADD(N, M) (N + M) 682 683 /* Signed min/max */ 684 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 685 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 686 687 /* Unsigned min/max */ 688 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 689 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 690 691 /* 692 *** load and store whole register instructions 693 */ 694 static void 695 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 696 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 697 { 698 uint32_t i, k, off, pos; 699 uint32_t nf = vext_nf(desc); 700 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 701 uint32_t max_elems = vlenb >> log2_esz; 702 703 k = env->vstart / max_elems; 704 off = env->vstart % max_elems; 705 706 if (off) { 707 /* load/store rest of elements of current segment pointed by vstart */ 708 for (pos = off; pos < max_elems; pos++, env->vstart++) { 709 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 710 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 711 } 712 k++; 713 } 714 715 /* load/store elements for rest of segments */ 716 for (; k < nf; k++) { 717 for (i = 0; i < max_elems; i++, env->vstart++) { 718 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 719 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 720 } 721 } 722 723 env->vstart = 0; 724 } 725 726 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 727 void HELPER(NAME)(void *vd, target_ulong base, \ 728 CPURISCVState *env, uint32_t desc) \ 729 { \ 730 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 731 ctzl(sizeof(ETYPE)), GETPC()); \ 732 } 733 734 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 735 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 736 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 737 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 738 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 739 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 740 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 741 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 742 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 743 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 744 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 745 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 746 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 747 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 748 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 749 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 750 751 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 752 void HELPER(NAME)(void *vd, target_ulong base, \ 753 CPURISCVState *env, uint32_t desc) \ 754 { \ 755 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 756 ctzl(sizeof(ETYPE)), GETPC()); \ 757 } 758 759 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 760 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 761 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 762 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 763 764 /* 765 *** Vector Integer Arithmetic Instructions 766 */ 767 768 /* expand macro args before macro */ 769 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 770 771 /* (TD, T1, T2, TX1, TX2) */ 772 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 773 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 774 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 775 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 776 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 777 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 778 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 779 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 780 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 781 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 782 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 783 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 784 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 785 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 786 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 787 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 788 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 789 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 790 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 791 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 792 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 793 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 794 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 795 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 796 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 797 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 798 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 799 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 800 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 801 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 802 803 /* operation of two vector elements */ 804 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 805 806 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 807 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 808 { \ 809 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 810 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 811 *((TD *)vd + HD(i)) = OP(s2, s1); \ 812 } 813 #define DO_SUB(N, M) (N - M) 814 #define DO_RSUB(N, M) (M - N) 815 816 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 817 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 818 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 819 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 820 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 821 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 822 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 823 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 824 825 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 826 CPURISCVState *env, uint32_t desc, 827 opivv2_fn *fn, uint32_t esz) 828 { 829 uint32_t vm = vext_vm(desc); 830 uint32_t vl = env->vl; 831 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 832 uint32_t vta = vext_vta(desc); 833 uint32_t vma = vext_vma(desc); 834 uint32_t i; 835 836 for (i = env->vstart; i < vl; i++) { 837 if (!vm && !vext_elem_mask(v0, i)) { 838 /* set masked-off elements to 1s */ 839 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 840 continue; 841 } 842 fn(vd, vs1, vs2, i); 843 } 844 env->vstart = 0; 845 /* set tail elements to 1s */ 846 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 847 } 848 849 /* generate the helpers for OPIVV */ 850 #define GEN_VEXT_VV(NAME, ESZ) \ 851 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 852 void *vs2, CPURISCVState *env, \ 853 uint32_t desc) \ 854 { \ 855 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 856 do_##NAME, ESZ); \ 857 } 858 859 GEN_VEXT_VV(vadd_vv_b, 1) 860 GEN_VEXT_VV(vadd_vv_h, 2) 861 GEN_VEXT_VV(vadd_vv_w, 4) 862 GEN_VEXT_VV(vadd_vv_d, 8) 863 GEN_VEXT_VV(vsub_vv_b, 1) 864 GEN_VEXT_VV(vsub_vv_h, 2) 865 GEN_VEXT_VV(vsub_vv_w, 4) 866 GEN_VEXT_VV(vsub_vv_d, 8) 867 868 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 869 870 /* 871 * (T1)s1 gives the real operator type. 872 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 873 */ 874 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 875 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 876 { \ 877 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 878 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 879 } 880 881 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 882 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 883 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 884 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 885 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 886 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 887 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 888 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 889 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 890 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 891 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 892 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 893 894 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 895 CPURISCVState *env, uint32_t desc, 896 opivx2_fn fn, uint32_t esz) 897 { 898 uint32_t vm = vext_vm(desc); 899 uint32_t vl = env->vl; 900 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 901 uint32_t vta = vext_vta(desc); 902 uint32_t vma = vext_vma(desc); 903 uint32_t i; 904 905 for (i = env->vstart; i < vl; i++) { 906 if (!vm && !vext_elem_mask(v0, i)) { 907 /* set masked-off elements to 1s */ 908 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 909 continue; 910 } 911 fn(vd, s1, vs2, i); 912 } 913 env->vstart = 0; 914 /* set tail elements to 1s */ 915 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 916 } 917 918 /* generate the helpers for OPIVX */ 919 #define GEN_VEXT_VX(NAME, ESZ) \ 920 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 921 void *vs2, CPURISCVState *env, \ 922 uint32_t desc) \ 923 { \ 924 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 925 do_##NAME, ESZ); \ 926 } 927 928 GEN_VEXT_VX(vadd_vx_b, 1) 929 GEN_VEXT_VX(vadd_vx_h, 2) 930 GEN_VEXT_VX(vadd_vx_w, 4) 931 GEN_VEXT_VX(vadd_vx_d, 8) 932 GEN_VEXT_VX(vsub_vx_b, 1) 933 GEN_VEXT_VX(vsub_vx_h, 2) 934 GEN_VEXT_VX(vsub_vx_w, 4) 935 GEN_VEXT_VX(vsub_vx_d, 8) 936 GEN_VEXT_VX(vrsub_vx_b, 1) 937 GEN_VEXT_VX(vrsub_vx_h, 2) 938 GEN_VEXT_VX(vrsub_vx_w, 4) 939 GEN_VEXT_VX(vrsub_vx_d, 8) 940 941 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 942 { 943 intptr_t oprsz = simd_oprsz(desc); 944 intptr_t i; 945 946 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 947 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 948 } 949 } 950 951 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 952 { 953 intptr_t oprsz = simd_oprsz(desc); 954 intptr_t i; 955 956 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 957 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 958 } 959 } 960 961 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 962 { 963 intptr_t oprsz = simd_oprsz(desc); 964 intptr_t i; 965 966 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 967 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 968 } 969 } 970 971 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 972 { 973 intptr_t oprsz = simd_oprsz(desc); 974 intptr_t i; 975 976 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 977 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 978 } 979 } 980 981 /* Vector Widening Integer Add/Subtract */ 982 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 983 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 984 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 985 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 986 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 987 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 988 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 989 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 990 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 991 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 992 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 993 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 994 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 995 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 996 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 997 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 998 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 999 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1000 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1001 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1002 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1003 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1004 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1005 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1006 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1007 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1008 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1009 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1010 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1011 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1012 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1013 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1014 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1015 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1016 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1017 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1018 GEN_VEXT_VV(vwaddu_vv_b, 2) 1019 GEN_VEXT_VV(vwaddu_vv_h, 4) 1020 GEN_VEXT_VV(vwaddu_vv_w, 8) 1021 GEN_VEXT_VV(vwsubu_vv_b, 2) 1022 GEN_VEXT_VV(vwsubu_vv_h, 4) 1023 GEN_VEXT_VV(vwsubu_vv_w, 8) 1024 GEN_VEXT_VV(vwadd_vv_b, 2) 1025 GEN_VEXT_VV(vwadd_vv_h, 4) 1026 GEN_VEXT_VV(vwadd_vv_w, 8) 1027 GEN_VEXT_VV(vwsub_vv_b, 2) 1028 GEN_VEXT_VV(vwsub_vv_h, 4) 1029 GEN_VEXT_VV(vwsub_vv_w, 8) 1030 GEN_VEXT_VV(vwaddu_wv_b, 2) 1031 GEN_VEXT_VV(vwaddu_wv_h, 4) 1032 GEN_VEXT_VV(vwaddu_wv_w, 8) 1033 GEN_VEXT_VV(vwsubu_wv_b, 2) 1034 GEN_VEXT_VV(vwsubu_wv_h, 4) 1035 GEN_VEXT_VV(vwsubu_wv_w, 8) 1036 GEN_VEXT_VV(vwadd_wv_b, 2) 1037 GEN_VEXT_VV(vwadd_wv_h, 4) 1038 GEN_VEXT_VV(vwadd_wv_w, 8) 1039 GEN_VEXT_VV(vwsub_wv_b, 2) 1040 GEN_VEXT_VV(vwsub_wv_h, 4) 1041 GEN_VEXT_VV(vwsub_wv_w, 8) 1042 1043 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1044 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1045 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1046 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1047 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1048 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1049 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1050 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1051 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1052 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1053 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1054 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1055 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1056 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1057 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1058 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1059 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1060 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1061 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1062 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1063 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1064 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1065 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1066 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1067 GEN_VEXT_VX(vwaddu_vx_b, 2) 1068 GEN_VEXT_VX(vwaddu_vx_h, 4) 1069 GEN_VEXT_VX(vwaddu_vx_w, 8) 1070 GEN_VEXT_VX(vwsubu_vx_b, 2) 1071 GEN_VEXT_VX(vwsubu_vx_h, 4) 1072 GEN_VEXT_VX(vwsubu_vx_w, 8) 1073 GEN_VEXT_VX(vwadd_vx_b, 2) 1074 GEN_VEXT_VX(vwadd_vx_h, 4) 1075 GEN_VEXT_VX(vwadd_vx_w, 8) 1076 GEN_VEXT_VX(vwsub_vx_b, 2) 1077 GEN_VEXT_VX(vwsub_vx_h, 4) 1078 GEN_VEXT_VX(vwsub_vx_w, 8) 1079 GEN_VEXT_VX(vwaddu_wx_b, 2) 1080 GEN_VEXT_VX(vwaddu_wx_h, 4) 1081 GEN_VEXT_VX(vwaddu_wx_w, 8) 1082 GEN_VEXT_VX(vwsubu_wx_b, 2) 1083 GEN_VEXT_VX(vwsubu_wx_h, 4) 1084 GEN_VEXT_VX(vwsubu_wx_w, 8) 1085 GEN_VEXT_VX(vwadd_wx_b, 2) 1086 GEN_VEXT_VX(vwadd_wx_h, 4) 1087 GEN_VEXT_VX(vwadd_wx_w, 8) 1088 GEN_VEXT_VX(vwsub_wx_b, 2) 1089 GEN_VEXT_VX(vwsub_wx_h, 4) 1090 GEN_VEXT_VX(vwsub_wx_w, 8) 1091 1092 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1093 #define DO_VADC(N, M, C) (N + M + C) 1094 #define DO_VSBC(N, M, C) (N - M - C) 1095 1096 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1097 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1098 CPURISCVState *env, uint32_t desc) \ 1099 { \ 1100 uint32_t vl = env->vl; \ 1101 uint32_t esz = sizeof(ETYPE); \ 1102 uint32_t total_elems = \ 1103 vext_get_total_elems(env, desc, esz); \ 1104 uint32_t vta = vext_vta(desc); \ 1105 uint32_t i; \ 1106 \ 1107 for (i = env->vstart; i < vl; i++) { \ 1108 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1109 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1110 ETYPE carry = vext_elem_mask(v0, i); \ 1111 \ 1112 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1113 } \ 1114 env->vstart = 0; \ 1115 /* set tail elements to 1s */ \ 1116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1117 } 1118 1119 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1120 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1121 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1122 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1123 1124 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1125 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1126 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1127 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1128 1129 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1130 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1131 CPURISCVState *env, uint32_t desc) \ 1132 { \ 1133 uint32_t vl = env->vl; \ 1134 uint32_t esz = sizeof(ETYPE); \ 1135 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1136 uint32_t vta = vext_vta(desc); \ 1137 uint32_t i; \ 1138 \ 1139 for (i = env->vstart; i < vl; i++) { \ 1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1141 ETYPE carry = vext_elem_mask(v0, i); \ 1142 \ 1143 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1144 } \ 1145 env->vstart = 0; \ 1146 /* set tail elements to 1s */ \ 1147 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1148 } 1149 1150 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1151 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1152 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1153 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1154 1155 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1156 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1157 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1158 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1159 1160 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1161 (__typeof(N))(N + M) < N) 1162 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1163 1164 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1165 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1166 CPURISCVState *env, uint32_t desc) \ 1167 { \ 1168 uint32_t vl = env->vl; \ 1169 uint32_t vm = vext_vm(desc); \ 1170 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1171 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1172 uint32_t i; \ 1173 \ 1174 for (i = env->vstart; i < vl; i++) { \ 1175 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1177 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1178 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1179 } \ 1180 env->vstart = 0; \ 1181 /* mask destination register are always tail-agnostic */ \ 1182 /* set tail elements to 1s */ \ 1183 if (vta_all_1s) { \ 1184 for (; i < total_elems; i++) { \ 1185 vext_set_elem_mask(vd, i, 1); \ 1186 } \ 1187 } \ 1188 } 1189 1190 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1191 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1192 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1193 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1194 1195 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1196 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1197 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1198 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1199 1200 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1201 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1202 void *vs2, CPURISCVState *env, uint32_t desc) \ 1203 { \ 1204 uint32_t vl = env->vl; \ 1205 uint32_t vm = vext_vm(desc); \ 1206 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1207 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1208 uint32_t i; \ 1209 \ 1210 for (i = env->vstart; i < vl; i++) { \ 1211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1212 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1213 vext_set_elem_mask(vd, i, \ 1214 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1215 } \ 1216 env->vstart = 0; \ 1217 /* mask destination register are always tail-agnostic */ \ 1218 /* set tail elements to 1s */ \ 1219 if (vta_all_1s) { \ 1220 for (; i < total_elems; i++) { \ 1221 vext_set_elem_mask(vd, i, 1); \ 1222 } \ 1223 } \ 1224 } 1225 1226 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1227 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1228 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1229 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1230 1231 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1232 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1233 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1234 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1235 1236 /* Vector Bitwise Logical Instructions */ 1237 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1238 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1239 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1240 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1241 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1242 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1243 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1244 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1245 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1246 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1247 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1248 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1249 GEN_VEXT_VV(vand_vv_b, 1) 1250 GEN_VEXT_VV(vand_vv_h, 2) 1251 GEN_VEXT_VV(vand_vv_w, 4) 1252 GEN_VEXT_VV(vand_vv_d, 8) 1253 GEN_VEXT_VV(vor_vv_b, 1) 1254 GEN_VEXT_VV(vor_vv_h, 2) 1255 GEN_VEXT_VV(vor_vv_w, 4) 1256 GEN_VEXT_VV(vor_vv_d, 8) 1257 GEN_VEXT_VV(vxor_vv_b, 1) 1258 GEN_VEXT_VV(vxor_vv_h, 2) 1259 GEN_VEXT_VV(vxor_vv_w, 4) 1260 GEN_VEXT_VV(vxor_vv_d, 8) 1261 1262 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1263 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1264 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1265 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1266 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1267 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1268 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1269 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1270 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1271 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1272 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1273 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1274 GEN_VEXT_VX(vand_vx_b, 1) 1275 GEN_VEXT_VX(vand_vx_h, 2) 1276 GEN_VEXT_VX(vand_vx_w, 4) 1277 GEN_VEXT_VX(vand_vx_d, 8) 1278 GEN_VEXT_VX(vor_vx_b, 1) 1279 GEN_VEXT_VX(vor_vx_h, 2) 1280 GEN_VEXT_VX(vor_vx_w, 4) 1281 GEN_VEXT_VX(vor_vx_d, 8) 1282 GEN_VEXT_VX(vxor_vx_b, 1) 1283 GEN_VEXT_VX(vxor_vx_h, 2) 1284 GEN_VEXT_VX(vxor_vx_w, 4) 1285 GEN_VEXT_VX(vxor_vx_d, 8) 1286 1287 /* Vector Single-Width Bit Shift Instructions */ 1288 #define DO_SLL(N, M) (N << (M)) 1289 #define DO_SRL(N, M) (N >> (M)) 1290 1291 /* generate the helpers for shift instructions with two vector operators */ 1292 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1293 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1294 void *vs2, CPURISCVState *env, uint32_t desc) \ 1295 { \ 1296 uint32_t vm = vext_vm(desc); \ 1297 uint32_t vl = env->vl; \ 1298 uint32_t esz = sizeof(TS1); \ 1299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1300 uint32_t vta = vext_vta(desc); \ 1301 uint32_t vma = vext_vma(desc); \ 1302 uint32_t i; \ 1303 \ 1304 for (i = env->vstart; i < vl; i++) { \ 1305 if (!vm && !vext_elem_mask(v0, i)) { \ 1306 /* set masked-off elements to 1s */ \ 1307 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1308 continue; \ 1309 } \ 1310 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1311 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1312 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1313 } \ 1314 env->vstart = 0; \ 1315 /* set tail elements to 1s */ \ 1316 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1317 } 1318 1319 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1320 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1321 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1322 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1323 1324 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1325 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1326 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1327 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1328 1329 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1330 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1331 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1332 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1333 1334 /* generate the helpers for shift instructions with one vector and one scalar */ 1335 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1336 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1337 void *vs2, CPURISCVState *env, uint32_t desc) \ 1338 { \ 1339 uint32_t vm = vext_vm(desc); \ 1340 uint32_t vl = env->vl; \ 1341 uint32_t esz = sizeof(TD); \ 1342 uint32_t total_elems = \ 1343 vext_get_total_elems(env, desc, esz); \ 1344 uint32_t vta = vext_vta(desc); \ 1345 uint32_t vma = vext_vma(desc); \ 1346 uint32_t i; \ 1347 \ 1348 for (i = env->vstart; i < vl; i++) { \ 1349 if (!vm && !vext_elem_mask(v0, i)) { \ 1350 /* set masked-off elements to 1s */ \ 1351 vext_set_elems_1s(vd, vma, i * esz, \ 1352 (i + 1) * esz); \ 1353 continue; \ 1354 } \ 1355 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1356 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1357 } \ 1358 env->vstart = 0; \ 1359 /* set tail elements to 1s */ \ 1360 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1361 } 1362 1363 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1364 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1365 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1366 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1367 1368 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1369 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1370 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1371 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1372 1373 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1374 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1375 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1376 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1377 1378 /* Vector Narrowing Integer Right Shift Instructions */ 1379 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1380 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1381 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1382 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1383 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1384 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1385 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1386 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1387 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1388 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1389 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1390 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1391 1392 /* Vector Integer Comparison Instructions */ 1393 #define DO_MSEQ(N, M) (N == M) 1394 #define DO_MSNE(N, M) (N != M) 1395 #define DO_MSLT(N, M) (N < M) 1396 #define DO_MSLE(N, M) (N <= M) 1397 #define DO_MSGT(N, M) (N > M) 1398 1399 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1400 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1401 CPURISCVState *env, uint32_t desc) \ 1402 { \ 1403 uint32_t vm = vext_vm(desc); \ 1404 uint32_t vl = env->vl; \ 1405 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1406 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1407 uint32_t vma = vext_vma(desc); \ 1408 uint32_t i; \ 1409 \ 1410 for (i = env->vstart; i < vl; i++) { \ 1411 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1412 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1413 if (!vm && !vext_elem_mask(v0, i)) { \ 1414 /* set masked-off elements to 1s */ \ 1415 if (vma) { \ 1416 vext_set_elem_mask(vd, i, 1); \ 1417 } \ 1418 continue; \ 1419 } \ 1420 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1421 } \ 1422 env->vstart = 0; \ 1423 /* mask destination register are always tail-agnostic */ \ 1424 /* set tail elements to 1s */ \ 1425 if (vta_all_1s) { \ 1426 for (; i < total_elems; i++) { \ 1427 vext_set_elem_mask(vd, i, 1); \ 1428 } \ 1429 } \ 1430 } 1431 1432 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1433 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1434 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1435 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1436 1437 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1438 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1439 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1440 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1441 1442 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1443 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1444 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1445 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1446 1447 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1448 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1449 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1450 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1451 1452 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1453 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1454 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1455 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1456 1457 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1458 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1459 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1460 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1461 1462 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1463 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1464 CPURISCVState *env, uint32_t desc) \ 1465 { \ 1466 uint32_t vm = vext_vm(desc); \ 1467 uint32_t vl = env->vl; \ 1468 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1469 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1470 uint32_t vma = vext_vma(desc); \ 1471 uint32_t i; \ 1472 \ 1473 for (i = env->vstart; i < vl; i++) { \ 1474 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1475 if (!vm && !vext_elem_mask(v0, i)) { \ 1476 /* set masked-off elements to 1s */ \ 1477 if (vma) { \ 1478 vext_set_elem_mask(vd, i, 1); \ 1479 } \ 1480 continue; \ 1481 } \ 1482 vext_set_elem_mask(vd, i, \ 1483 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1484 } \ 1485 env->vstart = 0; \ 1486 /* mask destination register are always tail-agnostic */ \ 1487 /* set tail elements to 1s */ \ 1488 if (vta_all_1s) { \ 1489 for (; i < total_elems; i++) { \ 1490 vext_set_elem_mask(vd, i, 1); \ 1491 } \ 1492 } \ 1493 } 1494 1495 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1496 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1497 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1498 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1499 1500 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1501 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1502 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1503 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1504 1505 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1506 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1507 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1508 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1509 1510 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1511 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1512 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1513 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1514 1515 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1516 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1517 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1518 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1519 1520 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1521 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1522 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1523 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1524 1525 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1526 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1527 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1528 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1529 1530 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1531 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1532 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1533 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1534 1535 /* Vector Integer Min/Max Instructions */ 1536 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1537 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1538 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1539 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1540 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1541 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1542 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1543 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1544 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1545 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1546 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1547 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1548 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1549 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1550 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1551 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1552 GEN_VEXT_VV(vminu_vv_b, 1) 1553 GEN_VEXT_VV(vminu_vv_h, 2) 1554 GEN_VEXT_VV(vminu_vv_w, 4) 1555 GEN_VEXT_VV(vminu_vv_d, 8) 1556 GEN_VEXT_VV(vmin_vv_b, 1) 1557 GEN_VEXT_VV(vmin_vv_h, 2) 1558 GEN_VEXT_VV(vmin_vv_w, 4) 1559 GEN_VEXT_VV(vmin_vv_d, 8) 1560 GEN_VEXT_VV(vmaxu_vv_b, 1) 1561 GEN_VEXT_VV(vmaxu_vv_h, 2) 1562 GEN_VEXT_VV(vmaxu_vv_w, 4) 1563 GEN_VEXT_VV(vmaxu_vv_d, 8) 1564 GEN_VEXT_VV(vmax_vv_b, 1) 1565 GEN_VEXT_VV(vmax_vv_h, 2) 1566 GEN_VEXT_VV(vmax_vv_w, 4) 1567 GEN_VEXT_VV(vmax_vv_d, 8) 1568 1569 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1570 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1571 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1572 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1573 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1574 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1575 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1576 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1577 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1578 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1579 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1580 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1581 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1582 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1583 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1584 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1585 GEN_VEXT_VX(vminu_vx_b, 1) 1586 GEN_VEXT_VX(vminu_vx_h, 2) 1587 GEN_VEXT_VX(vminu_vx_w, 4) 1588 GEN_VEXT_VX(vminu_vx_d, 8) 1589 GEN_VEXT_VX(vmin_vx_b, 1) 1590 GEN_VEXT_VX(vmin_vx_h, 2) 1591 GEN_VEXT_VX(vmin_vx_w, 4) 1592 GEN_VEXT_VX(vmin_vx_d, 8) 1593 GEN_VEXT_VX(vmaxu_vx_b, 1) 1594 GEN_VEXT_VX(vmaxu_vx_h, 2) 1595 GEN_VEXT_VX(vmaxu_vx_w, 4) 1596 GEN_VEXT_VX(vmaxu_vx_d, 8) 1597 GEN_VEXT_VX(vmax_vx_b, 1) 1598 GEN_VEXT_VX(vmax_vx_h, 2) 1599 GEN_VEXT_VX(vmax_vx_w, 4) 1600 GEN_VEXT_VX(vmax_vx_d, 8) 1601 1602 /* Vector Single-Width Integer Multiply Instructions */ 1603 #define DO_MUL(N, M) (N * M) 1604 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1605 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1606 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1607 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1608 GEN_VEXT_VV(vmul_vv_b, 1) 1609 GEN_VEXT_VV(vmul_vv_h, 2) 1610 GEN_VEXT_VV(vmul_vv_w, 4) 1611 GEN_VEXT_VV(vmul_vv_d, 8) 1612 1613 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1614 { 1615 return (int16_t)s2 * (int16_t)s1 >> 8; 1616 } 1617 1618 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1619 { 1620 return (int32_t)s2 * (int32_t)s1 >> 16; 1621 } 1622 1623 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1624 { 1625 return (int64_t)s2 * (int64_t)s1 >> 32; 1626 } 1627 1628 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1629 { 1630 uint64_t hi_64, lo_64; 1631 1632 muls64(&lo_64, &hi_64, s1, s2); 1633 return hi_64; 1634 } 1635 1636 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1637 { 1638 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1639 } 1640 1641 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1642 { 1643 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1644 } 1645 1646 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1647 { 1648 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1649 } 1650 1651 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1652 { 1653 uint64_t hi_64, lo_64; 1654 1655 mulu64(&lo_64, &hi_64, s2, s1); 1656 return hi_64; 1657 } 1658 1659 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1660 { 1661 return (int16_t)s2 * (uint16_t)s1 >> 8; 1662 } 1663 1664 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1665 { 1666 return (int32_t)s2 * (uint32_t)s1 >> 16; 1667 } 1668 1669 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1670 { 1671 return (int64_t)s2 * (uint64_t)s1 >> 32; 1672 } 1673 1674 /* 1675 * Let A = signed operand, 1676 * B = unsigned operand 1677 * P = mulu64(A, B), unsigned product 1678 * 1679 * LET X = 2 ** 64 - A, 2's complement of A 1680 * SP = signed product 1681 * THEN 1682 * IF A < 0 1683 * SP = -X * B 1684 * = -(2 ** 64 - A) * B 1685 * = A * B - 2 ** 64 * B 1686 * = P - 2 ** 64 * B 1687 * ELSE 1688 * SP = P 1689 * THEN 1690 * HI_P -= (A < 0 ? B : 0) 1691 */ 1692 1693 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1694 { 1695 uint64_t hi_64, lo_64; 1696 1697 mulu64(&lo_64, &hi_64, s2, s1); 1698 1699 hi_64 -= s2 < 0 ? s1 : 0; 1700 return hi_64; 1701 } 1702 1703 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1704 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1705 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1706 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1707 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1708 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1709 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1710 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1711 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1712 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1713 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1714 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1715 GEN_VEXT_VV(vmulh_vv_b, 1) 1716 GEN_VEXT_VV(vmulh_vv_h, 2) 1717 GEN_VEXT_VV(vmulh_vv_w, 4) 1718 GEN_VEXT_VV(vmulh_vv_d, 8) 1719 GEN_VEXT_VV(vmulhu_vv_b, 1) 1720 GEN_VEXT_VV(vmulhu_vv_h, 2) 1721 GEN_VEXT_VV(vmulhu_vv_w, 4) 1722 GEN_VEXT_VV(vmulhu_vv_d, 8) 1723 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1724 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1725 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1726 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1727 1728 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1729 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1730 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1731 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1732 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1733 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1734 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1735 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1736 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1737 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1738 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1739 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1740 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1741 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1742 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1743 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1744 GEN_VEXT_VX(vmul_vx_b, 1) 1745 GEN_VEXT_VX(vmul_vx_h, 2) 1746 GEN_VEXT_VX(vmul_vx_w, 4) 1747 GEN_VEXT_VX(vmul_vx_d, 8) 1748 GEN_VEXT_VX(vmulh_vx_b, 1) 1749 GEN_VEXT_VX(vmulh_vx_h, 2) 1750 GEN_VEXT_VX(vmulh_vx_w, 4) 1751 GEN_VEXT_VX(vmulh_vx_d, 8) 1752 GEN_VEXT_VX(vmulhu_vx_b, 1) 1753 GEN_VEXT_VX(vmulhu_vx_h, 2) 1754 GEN_VEXT_VX(vmulhu_vx_w, 4) 1755 GEN_VEXT_VX(vmulhu_vx_d, 8) 1756 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1757 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1758 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1759 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1760 1761 /* Vector Integer Divide Instructions */ 1762 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1763 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1764 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1765 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1766 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1767 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1768 1769 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1770 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1771 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1772 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1773 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1774 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1775 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1776 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1777 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1778 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1779 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1780 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1781 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1782 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1783 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1784 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1785 GEN_VEXT_VV(vdivu_vv_b, 1) 1786 GEN_VEXT_VV(vdivu_vv_h, 2) 1787 GEN_VEXT_VV(vdivu_vv_w, 4) 1788 GEN_VEXT_VV(vdivu_vv_d, 8) 1789 GEN_VEXT_VV(vdiv_vv_b, 1) 1790 GEN_VEXT_VV(vdiv_vv_h, 2) 1791 GEN_VEXT_VV(vdiv_vv_w, 4) 1792 GEN_VEXT_VV(vdiv_vv_d, 8) 1793 GEN_VEXT_VV(vremu_vv_b, 1) 1794 GEN_VEXT_VV(vremu_vv_h, 2) 1795 GEN_VEXT_VV(vremu_vv_w, 4) 1796 GEN_VEXT_VV(vremu_vv_d, 8) 1797 GEN_VEXT_VV(vrem_vv_b, 1) 1798 GEN_VEXT_VV(vrem_vv_h, 2) 1799 GEN_VEXT_VV(vrem_vv_w, 4) 1800 GEN_VEXT_VV(vrem_vv_d, 8) 1801 1802 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1803 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1804 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1805 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1806 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1807 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1808 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1809 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1810 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1811 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1812 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1813 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1814 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1815 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1816 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1817 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1818 GEN_VEXT_VX(vdivu_vx_b, 1) 1819 GEN_VEXT_VX(vdivu_vx_h, 2) 1820 GEN_VEXT_VX(vdivu_vx_w, 4) 1821 GEN_VEXT_VX(vdivu_vx_d, 8) 1822 GEN_VEXT_VX(vdiv_vx_b, 1) 1823 GEN_VEXT_VX(vdiv_vx_h, 2) 1824 GEN_VEXT_VX(vdiv_vx_w, 4) 1825 GEN_VEXT_VX(vdiv_vx_d, 8) 1826 GEN_VEXT_VX(vremu_vx_b, 1) 1827 GEN_VEXT_VX(vremu_vx_h, 2) 1828 GEN_VEXT_VX(vremu_vx_w, 4) 1829 GEN_VEXT_VX(vremu_vx_d, 8) 1830 GEN_VEXT_VX(vrem_vx_b, 1) 1831 GEN_VEXT_VX(vrem_vx_h, 2) 1832 GEN_VEXT_VX(vrem_vx_w, 4) 1833 GEN_VEXT_VX(vrem_vx_d, 8) 1834 1835 /* Vector Widening Integer Multiply Instructions */ 1836 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1837 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1838 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1839 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1840 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1841 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1842 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1843 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1844 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1845 GEN_VEXT_VV(vwmul_vv_b, 2) 1846 GEN_VEXT_VV(vwmul_vv_h, 4) 1847 GEN_VEXT_VV(vwmul_vv_w, 8) 1848 GEN_VEXT_VV(vwmulu_vv_b, 2) 1849 GEN_VEXT_VV(vwmulu_vv_h, 4) 1850 GEN_VEXT_VV(vwmulu_vv_w, 8) 1851 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1852 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1853 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1854 1855 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1856 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1857 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1858 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1859 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1860 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1861 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1862 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1863 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1864 GEN_VEXT_VX(vwmul_vx_b, 2) 1865 GEN_VEXT_VX(vwmul_vx_h, 4) 1866 GEN_VEXT_VX(vwmul_vx_w, 8) 1867 GEN_VEXT_VX(vwmulu_vx_b, 2) 1868 GEN_VEXT_VX(vwmulu_vx_h, 4) 1869 GEN_VEXT_VX(vwmulu_vx_w, 8) 1870 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1871 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1872 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1873 1874 /* Vector Single-Width Integer Multiply-Add Instructions */ 1875 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1876 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1877 { \ 1878 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1879 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1880 TD d = *((TD *)vd + HD(i)); \ 1881 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1882 } 1883 1884 #define DO_MACC(N, M, D) (M * N + D) 1885 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1886 #define DO_MADD(N, M, D) (M * D + N) 1887 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1888 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1889 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1890 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1891 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1892 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1893 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1894 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1895 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1896 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1897 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1898 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1899 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1900 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1901 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1902 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1903 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1904 GEN_VEXT_VV(vmacc_vv_b, 1) 1905 GEN_VEXT_VV(vmacc_vv_h, 2) 1906 GEN_VEXT_VV(vmacc_vv_w, 4) 1907 GEN_VEXT_VV(vmacc_vv_d, 8) 1908 GEN_VEXT_VV(vnmsac_vv_b, 1) 1909 GEN_VEXT_VV(vnmsac_vv_h, 2) 1910 GEN_VEXT_VV(vnmsac_vv_w, 4) 1911 GEN_VEXT_VV(vnmsac_vv_d, 8) 1912 GEN_VEXT_VV(vmadd_vv_b, 1) 1913 GEN_VEXT_VV(vmadd_vv_h, 2) 1914 GEN_VEXT_VV(vmadd_vv_w, 4) 1915 GEN_VEXT_VV(vmadd_vv_d, 8) 1916 GEN_VEXT_VV(vnmsub_vv_b, 1) 1917 GEN_VEXT_VV(vnmsub_vv_h, 2) 1918 GEN_VEXT_VV(vnmsub_vv_w, 4) 1919 GEN_VEXT_VV(vnmsub_vv_d, 8) 1920 1921 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1922 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1923 { \ 1924 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1925 TD d = *((TD *)vd + HD(i)); \ 1926 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1927 } 1928 1929 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1930 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1931 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1932 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1933 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1934 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1935 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1936 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1937 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1938 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1939 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1940 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1941 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1942 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1943 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1944 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1945 GEN_VEXT_VX(vmacc_vx_b, 1) 1946 GEN_VEXT_VX(vmacc_vx_h, 2) 1947 GEN_VEXT_VX(vmacc_vx_w, 4) 1948 GEN_VEXT_VX(vmacc_vx_d, 8) 1949 GEN_VEXT_VX(vnmsac_vx_b, 1) 1950 GEN_VEXT_VX(vnmsac_vx_h, 2) 1951 GEN_VEXT_VX(vnmsac_vx_w, 4) 1952 GEN_VEXT_VX(vnmsac_vx_d, 8) 1953 GEN_VEXT_VX(vmadd_vx_b, 1) 1954 GEN_VEXT_VX(vmadd_vx_h, 2) 1955 GEN_VEXT_VX(vmadd_vx_w, 4) 1956 GEN_VEXT_VX(vmadd_vx_d, 8) 1957 GEN_VEXT_VX(vnmsub_vx_b, 1) 1958 GEN_VEXT_VX(vnmsub_vx_h, 2) 1959 GEN_VEXT_VX(vnmsub_vx_w, 4) 1960 GEN_VEXT_VX(vnmsub_vx_d, 8) 1961 1962 /* Vector Widening Integer Multiply-Add Instructions */ 1963 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1964 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1965 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1966 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1967 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1968 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1969 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1970 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1971 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1972 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1973 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1974 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1975 GEN_VEXT_VV(vwmacc_vv_b, 2) 1976 GEN_VEXT_VV(vwmacc_vv_h, 4) 1977 GEN_VEXT_VV(vwmacc_vv_w, 8) 1978 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1979 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1980 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1981 1982 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1983 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1984 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1985 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1986 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1987 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1988 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1989 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1990 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1991 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1992 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1993 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1994 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1995 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1996 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1997 GEN_VEXT_VX(vwmacc_vx_b, 2) 1998 GEN_VEXT_VX(vwmacc_vx_h, 4) 1999 GEN_VEXT_VX(vwmacc_vx_w, 8) 2000 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2001 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2002 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2003 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2004 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2005 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2006 2007 /* Vector Integer Merge and Move Instructions */ 2008 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2009 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2010 uint32_t desc) \ 2011 { \ 2012 uint32_t vl = env->vl; \ 2013 uint32_t esz = sizeof(ETYPE); \ 2014 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2015 uint32_t vta = vext_vta(desc); \ 2016 uint32_t i; \ 2017 \ 2018 for (i = env->vstart; i < vl; i++) { \ 2019 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2020 *((ETYPE *)vd + H(i)) = s1; \ 2021 } \ 2022 env->vstart = 0; \ 2023 /* set tail elements to 1s */ \ 2024 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2025 } 2026 2027 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2028 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2029 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2030 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2031 2032 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2033 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2034 uint32_t desc) \ 2035 { \ 2036 uint32_t vl = env->vl; \ 2037 uint32_t esz = sizeof(ETYPE); \ 2038 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2039 uint32_t vta = vext_vta(desc); \ 2040 uint32_t i; \ 2041 \ 2042 for (i = env->vstart; i < vl; i++) { \ 2043 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2044 } \ 2045 env->vstart = 0; \ 2046 /* set tail elements to 1s */ \ 2047 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2048 } 2049 2050 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2051 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2052 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2053 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2054 2055 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2056 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2057 CPURISCVState *env, uint32_t desc) \ 2058 { \ 2059 uint32_t vl = env->vl; \ 2060 uint32_t esz = sizeof(ETYPE); \ 2061 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2062 uint32_t vta = vext_vta(desc); \ 2063 uint32_t i; \ 2064 \ 2065 for (i = env->vstart; i < vl; i++) { \ 2066 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2067 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2068 } \ 2069 env->vstart = 0; \ 2070 /* set tail elements to 1s */ \ 2071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2072 } 2073 2074 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2075 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2076 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2077 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2078 2079 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2080 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2081 void *vs2, CPURISCVState *env, uint32_t desc) \ 2082 { \ 2083 uint32_t vl = env->vl; \ 2084 uint32_t esz = sizeof(ETYPE); \ 2085 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2086 uint32_t vta = vext_vta(desc); \ 2087 uint32_t i; \ 2088 \ 2089 for (i = env->vstart; i < vl; i++) { \ 2090 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2091 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2092 (ETYPE)(target_long)s1); \ 2093 *((ETYPE *)vd + H(i)) = d; \ 2094 } \ 2095 env->vstart = 0; \ 2096 /* set tail elements to 1s */ \ 2097 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2098 } 2099 2100 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2101 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2102 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2103 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2104 2105 /* 2106 *** Vector Fixed-Point Arithmetic Instructions 2107 */ 2108 2109 /* Vector Single-Width Saturating Add and Subtract */ 2110 2111 /* 2112 * As fixed point instructions probably have round mode and saturation, 2113 * define common macros for fixed point here. 2114 */ 2115 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2116 CPURISCVState *env, int vxrm); 2117 2118 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2119 static inline void \ 2120 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2121 CPURISCVState *env, int vxrm) \ 2122 { \ 2123 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2124 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2125 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2126 } 2127 2128 static inline void 2129 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2130 CPURISCVState *env, 2131 uint32_t vl, uint32_t vm, int vxrm, 2132 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2133 { 2134 for (uint32_t i = env->vstart; i < vl; i++) { 2135 if (!vm && !vext_elem_mask(v0, i)) { 2136 /* set masked-off elements to 1s */ 2137 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2138 continue; 2139 } 2140 fn(vd, vs1, vs2, i, env, vxrm); 2141 } 2142 env->vstart = 0; 2143 } 2144 2145 static inline void 2146 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2147 CPURISCVState *env, 2148 uint32_t desc, 2149 opivv2_rm_fn *fn, uint32_t esz) 2150 { 2151 uint32_t vm = vext_vm(desc); 2152 uint32_t vl = env->vl; 2153 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2154 uint32_t vta = vext_vta(desc); 2155 uint32_t vma = vext_vma(desc); 2156 2157 switch (env->vxrm) { 2158 case 0: /* rnu */ 2159 vext_vv_rm_1(vd, v0, vs1, vs2, 2160 env, vl, vm, 0, fn, vma, esz); 2161 break; 2162 case 1: /* rne */ 2163 vext_vv_rm_1(vd, v0, vs1, vs2, 2164 env, vl, vm, 1, fn, vma, esz); 2165 break; 2166 case 2: /* rdn */ 2167 vext_vv_rm_1(vd, v0, vs1, vs2, 2168 env, vl, vm, 2, fn, vma, esz); 2169 break; 2170 default: /* rod */ 2171 vext_vv_rm_1(vd, v0, vs1, vs2, 2172 env, vl, vm, 3, fn, vma, esz); 2173 break; 2174 } 2175 /* set tail elements to 1s */ 2176 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2177 } 2178 2179 /* generate helpers for fixed point instructions with OPIVV format */ 2180 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2181 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2182 CPURISCVState *env, uint32_t desc) \ 2183 { \ 2184 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2185 do_##NAME, ESZ); \ 2186 } 2187 2188 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2189 { 2190 uint8_t res = a + b; 2191 if (res < a) { 2192 res = UINT8_MAX; 2193 env->vxsat = 0x1; 2194 } 2195 return res; 2196 } 2197 2198 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2199 uint16_t b) 2200 { 2201 uint16_t res = a + b; 2202 if (res < a) { 2203 res = UINT16_MAX; 2204 env->vxsat = 0x1; 2205 } 2206 return res; 2207 } 2208 2209 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2210 uint32_t b) 2211 { 2212 uint32_t res = a + b; 2213 if (res < a) { 2214 res = UINT32_MAX; 2215 env->vxsat = 0x1; 2216 } 2217 return res; 2218 } 2219 2220 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2221 uint64_t b) 2222 { 2223 uint64_t res = a + b; 2224 if (res < a) { 2225 res = UINT64_MAX; 2226 env->vxsat = 0x1; 2227 } 2228 return res; 2229 } 2230 2231 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2232 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2233 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2234 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2235 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2236 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2237 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2238 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2239 2240 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2241 CPURISCVState *env, int vxrm); 2242 2243 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2244 static inline void \ 2245 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2246 CPURISCVState *env, int vxrm) \ 2247 { \ 2248 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2249 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2250 } 2251 2252 static inline void 2253 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2254 CPURISCVState *env, 2255 uint32_t vl, uint32_t vm, int vxrm, 2256 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2257 { 2258 for (uint32_t i = env->vstart; i < vl; i++) { 2259 if (!vm && !vext_elem_mask(v0, i)) { 2260 /* set masked-off elements to 1s */ 2261 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2262 continue; 2263 } 2264 fn(vd, s1, vs2, i, env, vxrm); 2265 } 2266 env->vstart = 0; 2267 } 2268 2269 static inline void 2270 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2271 CPURISCVState *env, 2272 uint32_t desc, 2273 opivx2_rm_fn *fn, uint32_t esz) 2274 { 2275 uint32_t vm = vext_vm(desc); 2276 uint32_t vl = env->vl; 2277 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2278 uint32_t vta = vext_vta(desc); 2279 uint32_t vma = vext_vma(desc); 2280 2281 switch (env->vxrm) { 2282 case 0: /* rnu */ 2283 vext_vx_rm_1(vd, v0, s1, vs2, 2284 env, vl, vm, 0, fn, vma, esz); 2285 break; 2286 case 1: /* rne */ 2287 vext_vx_rm_1(vd, v0, s1, vs2, 2288 env, vl, vm, 1, fn, vma, esz); 2289 break; 2290 case 2: /* rdn */ 2291 vext_vx_rm_1(vd, v0, s1, vs2, 2292 env, vl, vm, 2, fn, vma, esz); 2293 break; 2294 default: /* rod */ 2295 vext_vx_rm_1(vd, v0, s1, vs2, 2296 env, vl, vm, 3, fn, vma, esz); 2297 break; 2298 } 2299 /* set tail elements to 1s */ 2300 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2301 } 2302 2303 /* generate helpers for fixed point instructions with OPIVX format */ 2304 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2305 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2306 void *vs2, CPURISCVState *env, uint32_t desc) \ 2307 { \ 2308 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2309 do_##NAME, ESZ); \ 2310 } 2311 2312 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2313 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2314 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2315 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2316 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2317 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2318 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2319 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2320 2321 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2322 { 2323 int8_t res = a + b; 2324 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2325 res = a > 0 ? INT8_MAX : INT8_MIN; 2326 env->vxsat = 0x1; 2327 } 2328 return res; 2329 } 2330 2331 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2332 { 2333 int16_t res = a + b; 2334 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2335 res = a > 0 ? INT16_MAX : INT16_MIN; 2336 env->vxsat = 0x1; 2337 } 2338 return res; 2339 } 2340 2341 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2342 { 2343 int32_t res = a + b; 2344 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2345 res = a > 0 ? INT32_MAX : INT32_MIN; 2346 env->vxsat = 0x1; 2347 } 2348 return res; 2349 } 2350 2351 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2352 { 2353 int64_t res = a + b; 2354 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2355 res = a > 0 ? INT64_MAX : INT64_MIN; 2356 env->vxsat = 0x1; 2357 } 2358 return res; 2359 } 2360 2361 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2362 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2363 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2364 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2365 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2366 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2367 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2368 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2369 2370 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2371 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2372 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2373 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2374 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2375 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2376 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2377 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2378 2379 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2380 { 2381 uint8_t res = a - b; 2382 if (res > a) { 2383 res = 0; 2384 env->vxsat = 0x1; 2385 } 2386 return res; 2387 } 2388 2389 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2390 uint16_t b) 2391 { 2392 uint16_t res = a - b; 2393 if (res > a) { 2394 res = 0; 2395 env->vxsat = 0x1; 2396 } 2397 return res; 2398 } 2399 2400 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2401 uint32_t b) 2402 { 2403 uint32_t res = a - b; 2404 if (res > a) { 2405 res = 0; 2406 env->vxsat = 0x1; 2407 } 2408 return res; 2409 } 2410 2411 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2412 uint64_t b) 2413 { 2414 uint64_t res = a - b; 2415 if (res > a) { 2416 res = 0; 2417 env->vxsat = 0x1; 2418 } 2419 return res; 2420 } 2421 2422 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2423 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2424 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2425 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2426 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2427 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2428 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2429 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2430 2431 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2432 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2433 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2434 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2435 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2436 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2437 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2438 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2439 2440 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2441 { 2442 int8_t res = a - b; 2443 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2444 res = a >= 0 ? INT8_MAX : INT8_MIN; 2445 env->vxsat = 0x1; 2446 } 2447 return res; 2448 } 2449 2450 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2451 { 2452 int16_t res = a - b; 2453 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2454 res = a >= 0 ? INT16_MAX : INT16_MIN; 2455 env->vxsat = 0x1; 2456 } 2457 return res; 2458 } 2459 2460 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2461 { 2462 int32_t res = a - b; 2463 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2464 res = a >= 0 ? INT32_MAX : INT32_MIN; 2465 env->vxsat = 0x1; 2466 } 2467 return res; 2468 } 2469 2470 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2471 { 2472 int64_t res = a - b; 2473 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2474 res = a >= 0 ? INT64_MAX : INT64_MIN; 2475 env->vxsat = 0x1; 2476 } 2477 return res; 2478 } 2479 2480 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2481 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2482 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2483 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2484 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2485 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2486 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2487 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2488 2489 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2490 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2491 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2492 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2493 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2494 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2495 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2496 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2497 2498 /* Vector Single-Width Averaging Add and Subtract */ 2499 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2500 { 2501 uint8_t d = extract64(v, shift, 1); 2502 uint8_t d1; 2503 uint64_t D1, D2; 2504 2505 if (shift == 0 || shift > 64) { 2506 return 0; 2507 } 2508 2509 d1 = extract64(v, shift - 1, 1); 2510 D1 = extract64(v, 0, shift); 2511 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2512 return d1; 2513 } else if (vxrm == 1) { /* round-to-nearest-even */ 2514 if (shift > 1) { 2515 D2 = extract64(v, 0, shift - 1); 2516 return d1 & ((D2 != 0) | d); 2517 } else { 2518 return d1 & d; 2519 } 2520 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2521 return !d & (D1 != 0); 2522 } 2523 return 0; /* round-down (truncate) */ 2524 } 2525 2526 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2527 { 2528 int64_t res = (int64_t)a + b; 2529 uint8_t round = get_round(vxrm, res, 1); 2530 2531 return (res >> 1) + round; 2532 } 2533 2534 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2535 { 2536 int64_t res = a + b; 2537 uint8_t round = get_round(vxrm, res, 1); 2538 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2539 2540 /* With signed overflow, bit 64 is inverse of bit 63. */ 2541 return ((res >> 1) ^ over) + round; 2542 } 2543 2544 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2545 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2546 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2547 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2548 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2549 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2550 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2551 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2552 2553 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2554 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2555 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2556 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2557 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2558 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2559 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2560 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2561 2562 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2563 uint32_t a, uint32_t b) 2564 { 2565 uint64_t res = (uint64_t)a + b; 2566 uint8_t round = get_round(vxrm, res, 1); 2567 2568 return (res >> 1) + round; 2569 } 2570 2571 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2572 uint64_t a, uint64_t b) 2573 { 2574 uint64_t res = a + b; 2575 uint8_t round = get_round(vxrm, res, 1); 2576 uint64_t over = (uint64_t)(res < a) << 63; 2577 2578 return ((res >> 1) | over) + round; 2579 } 2580 2581 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2582 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2583 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2584 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2585 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2586 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2587 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2588 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2589 2590 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2591 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2592 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2593 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2594 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2595 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2596 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2597 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2598 2599 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2600 { 2601 int64_t res = (int64_t)a - b; 2602 uint8_t round = get_round(vxrm, res, 1); 2603 2604 return (res >> 1) + round; 2605 } 2606 2607 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2608 { 2609 int64_t res = (int64_t)a - b; 2610 uint8_t round = get_round(vxrm, res, 1); 2611 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2612 2613 /* With signed overflow, bit 64 is inverse of bit 63. */ 2614 return ((res >> 1) ^ over) + round; 2615 } 2616 2617 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2618 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2619 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2620 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2621 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2622 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2623 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2624 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2625 2626 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2627 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2628 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2629 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2630 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2631 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2632 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2633 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2634 2635 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2636 uint32_t a, uint32_t b) 2637 { 2638 int64_t res = (int64_t)a - b; 2639 uint8_t round = get_round(vxrm, res, 1); 2640 2641 return (res >> 1) + round; 2642 } 2643 2644 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2645 uint64_t a, uint64_t b) 2646 { 2647 uint64_t res = (uint64_t)a - b; 2648 uint8_t round = get_round(vxrm, res, 1); 2649 uint64_t over = (uint64_t)(res > a) << 63; 2650 2651 return ((res >> 1) | over) + round; 2652 } 2653 2654 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2655 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2656 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2657 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2658 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2659 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2660 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2661 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2662 2663 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2664 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2665 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2666 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2667 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2668 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2669 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2670 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2671 2672 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2673 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2674 { 2675 uint8_t round; 2676 int16_t res; 2677 2678 res = (int16_t)a * (int16_t)b; 2679 round = get_round(vxrm, res, 7); 2680 res = (res >> 7) + round; 2681 2682 if (res > INT8_MAX) { 2683 env->vxsat = 0x1; 2684 return INT8_MAX; 2685 } else if (res < INT8_MIN) { 2686 env->vxsat = 0x1; 2687 return INT8_MIN; 2688 } else { 2689 return res; 2690 } 2691 } 2692 2693 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2694 { 2695 uint8_t round; 2696 int32_t res; 2697 2698 res = (int32_t)a * (int32_t)b; 2699 round = get_round(vxrm, res, 15); 2700 res = (res >> 15) + round; 2701 2702 if (res > INT16_MAX) { 2703 env->vxsat = 0x1; 2704 return INT16_MAX; 2705 } else if (res < INT16_MIN) { 2706 env->vxsat = 0x1; 2707 return INT16_MIN; 2708 } else { 2709 return res; 2710 } 2711 } 2712 2713 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2714 { 2715 uint8_t round; 2716 int64_t res; 2717 2718 res = (int64_t)a * (int64_t)b; 2719 round = get_round(vxrm, res, 31); 2720 res = (res >> 31) + round; 2721 2722 if (res > INT32_MAX) { 2723 env->vxsat = 0x1; 2724 return INT32_MAX; 2725 } else if (res < INT32_MIN) { 2726 env->vxsat = 0x1; 2727 return INT32_MIN; 2728 } else { 2729 return res; 2730 } 2731 } 2732 2733 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2734 { 2735 uint8_t round; 2736 uint64_t hi_64, lo_64; 2737 int64_t res; 2738 2739 if (a == INT64_MIN && b == INT64_MIN) { 2740 env->vxsat = 1; 2741 return INT64_MAX; 2742 } 2743 2744 muls64(&lo_64, &hi_64, a, b); 2745 round = get_round(vxrm, lo_64, 63); 2746 /* 2747 * Cannot overflow, as there are always 2748 * 2 sign bits after multiply. 2749 */ 2750 res = (hi_64 << 1) | (lo_64 >> 63); 2751 if (round) { 2752 if (res == INT64_MAX) { 2753 env->vxsat = 1; 2754 } else { 2755 res += 1; 2756 } 2757 } 2758 return res; 2759 } 2760 2761 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2762 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2763 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2764 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2765 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2766 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2767 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2768 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2769 2770 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2771 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2772 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2773 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2774 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2775 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2776 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2777 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2778 2779 /* Vector Single-Width Scaling Shift Instructions */ 2780 static inline uint8_t 2781 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2782 { 2783 uint8_t round, shift = b & 0x7; 2784 uint8_t res; 2785 2786 round = get_round(vxrm, a, shift); 2787 res = (a >> shift) + round; 2788 return res; 2789 } 2790 static inline uint16_t 2791 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2792 { 2793 uint8_t round, shift = b & 0xf; 2794 uint16_t res; 2795 2796 round = get_round(vxrm, a, shift); 2797 res = (a >> shift) + round; 2798 return res; 2799 } 2800 static inline uint32_t 2801 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2802 { 2803 uint8_t round, shift = b & 0x1f; 2804 uint32_t res; 2805 2806 round = get_round(vxrm, a, shift); 2807 res = (a >> shift) + round; 2808 return res; 2809 } 2810 static inline uint64_t 2811 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2812 { 2813 uint8_t round, shift = b & 0x3f; 2814 uint64_t res; 2815 2816 round = get_round(vxrm, a, shift); 2817 res = (a >> shift) + round; 2818 return res; 2819 } 2820 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2821 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2822 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2823 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2824 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2825 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2826 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2827 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2828 2829 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2830 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2831 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2832 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2833 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2834 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2835 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2836 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2837 2838 static inline int8_t 2839 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2840 { 2841 uint8_t round, shift = b & 0x7; 2842 int8_t res; 2843 2844 round = get_round(vxrm, a, shift); 2845 res = (a >> shift) + round; 2846 return res; 2847 } 2848 static inline int16_t 2849 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2850 { 2851 uint8_t round, shift = b & 0xf; 2852 int16_t res; 2853 2854 round = get_round(vxrm, a, shift); 2855 res = (a >> shift) + round; 2856 return res; 2857 } 2858 static inline int32_t 2859 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2860 { 2861 uint8_t round, shift = b & 0x1f; 2862 int32_t res; 2863 2864 round = get_round(vxrm, a, shift); 2865 res = (a >> shift) + round; 2866 return res; 2867 } 2868 static inline int64_t 2869 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2870 { 2871 uint8_t round, shift = b & 0x3f; 2872 int64_t res; 2873 2874 round = get_round(vxrm, a, shift); 2875 res = (a >> shift) + round; 2876 return res; 2877 } 2878 2879 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2880 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2881 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2882 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2883 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2884 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2885 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2886 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2887 2888 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2889 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2890 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2891 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2892 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2893 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2894 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2895 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2896 2897 /* Vector Narrowing Fixed-Point Clip Instructions */ 2898 static inline int8_t 2899 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2900 { 2901 uint8_t round, shift = b & 0xf; 2902 int16_t res; 2903 2904 round = get_round(vxrm, a, shift); 2905 res = (a >> shift) + round; 2906 if (res > INT8_MAX) { 2907 env->vxsat = 0x1; 2908 return INT8_MAX; 2909 } else if (res < INT8_MIN) { 2910 env->vxsat = 0x1; 2911 return INT8_MIN; 2912 } else { 2913 return res; 2914 } 2915 } 2916 2917 static inline int16_t 2918 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2919 { 2920 uint8_t round, shift = b & 0x1f; 2921 int32_t res; 2922 2923 round = get_round(vxrm, a, shift); 2924 res = (a >> shift) + round; 2925 if (res > INT16_MAX) { 2926 env->vxsat = 0x1; 2927 return INT16_MAX; 2928 } else if (res < INT16_MIN) { 2929 env->vxsat = 0x1; 2930 return INT16_MIN; 2931 } else { 2932 return res; 2933 } 2934 } 2935 2936 static inline int32_t 2937 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2938 { 2939 uint8_t round, shift = b & 0x3f; 2940 int64_t res; 2941 2942 round = get_round(vxrm, a, shift); 2943 res = (a >> shift) + round; 2944 if (res > INT32_MAX) { 2945 env->vxsat = 0x1; 2946 return INT32_MAX; 2947 } else if (res < INT32_MIN) { 2948 env->vxsat = 0x1; 2949 return INT32_MIN; 2950 } else { 2951 return res; 2952 } 2953 } 2954 2955 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2956 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2957 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2958 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2959 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2960 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2961 2962 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2963 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2964 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2965 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2966 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2967 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2968 2969 static inline uint8_t 2970 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2971 { 2972 uint8_t round, shift = b & 0xf; 2973 uint16_t res; 2974 2975 round = get_round(vxrm, a, shift); 2976 res = (a >> shift) + round; 2977 if (res > UINT8_MAX) { 2978 env->vxsat = 0x1; 2979 return UINT8_MAX; 2980 } else { 2981 return res; 2982 } 2983 } 2984 2985 static inline uint16_t 2986 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2987 { 2988 uint8_t round, shift = b & 0x1f; 2989 uint32_t res; 2990 2991 round = get_round(vxrm, a, shift); 2992 res = (a >> shift) + round; 2993 if (res > UINT16_MAX) { 2994 env->vxsat = 0x1; 2995 return UINT16_MAX; 2996 } else { 2997 return res; 2998 } 2999 } 3000 3001 static inline uint32_t 3002 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3003 { 3004 uint8_t round, shift = b & 0x3f; 3005 uint64_t res; 3006 3007 round = get_round(vxrm, a, shift); 3008 res = (a >> shift) + round; 3009 if (res > UINT32_MAX) { 3010 env->vxsat = 0x1; 3011 return UINT32_MAX; 3012 } else { 3013 return res; 3014 } 3015 } 3016 3017 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3018 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3019 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3020 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3021 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3022 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3023 3024 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3025 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3026 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3027 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3028 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3029 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3030 3031 /* 3032 *** Vector Float Point Arithmetic Instructions 3033 */ 3034 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3035 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3036 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3037 CPURISCVState *env) \ 3038 { \ 3039 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3040 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3041 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3042 } 3043 3044 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3045 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3046 void *vs2, CPURISCVState *env, \ 3047 uint32_t desc) \ 3048 { \ 3049 uint32_t vm = vext_vm(desc); \ 3050 uint32_t vl = env->vl; \ 3051 uint32_t total_elems = \ 3052 vext_get_total_elems(env, desc, ESZ); \ 3053 uint32_t vta = vext_vta(desc); \ 3054 uint32_t vma = vext_vma(desc); \ 3055 uint32_t i; \ 3056 \ 3057 for (i = env->vstart; i < vl; i++) { \ 3058 if (!vm && !vext_elem_mask(v0, i)) { \ 3059 /* set masked-off elements to 1s */ \ 3060 vext_set_elems_1s(vd, vma, i * ESZ, \ 3061 (i + 1) * ESZ); \ 3062 continue; \ 3063 } \ 3064 do_##NAME(vd, vs1, vs2, i, env); \ 3065 } \ 3066 env->vstart = 0; \ 3067 /* set tail elements to 1s */ \ 3068 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3069 total_elems * ESZ); \ 3070 } 3071 3072 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3073 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3074 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3075 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3076 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3077 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3078 3079 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3080 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3081 CPURISCVState *env) \ 3082 { \ 3083 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3084 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3085 } 3086 3087 #define GEN_VEXT_VF(NAME, ESZ) \ 3088 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3089 void *vs2, CPURISCVState *env, \ 3090 uint32_t desc) \ 3091 { \ 3092 uint32_t vm = vext_vm(desc); \ 3093 uint32_t vl = env->vl; \ 3094 uint32_t total_elems = \ 3095 vext_get_total_elems(env, desc, ESZ); \ 3096 uint32_t vta = vext_vta(desc); \ 3097 uint32_t vma = vext_vma(desc); \ 3098 uint32_t i; \ 3099 \ 3100 for (i = env->vstart; i < vl; i++) { \ 3101 if (!vm && !vext_elem_mask(v0, i)) { \ 3102 /* set masked-off elements to 1s */ \ 3103 vext_set_elems_1s(vd, vma, i * ESZ, \ 3104 (i + 1) * ESZ); \ 3105 continue; \ 3106 } \ 3107 do_##NAME(vd, s1, vs2, i, env); \ 3108 } \ 3109 env->vstart = 0; \ 3110 /* set tail elements to 1s */ \ 3111 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3112 total_elems * ESZ); \ 3113 } 3114 3115 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3116 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3117 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3118 GEN_VEXT_VF(vfadd_vf_h, 2) 3119 GEN_VEXT_VF(vfadd_vf_w, 4) 3120 GEN_VEXT_VF(vfadd_vf_d, 8) 3121 3122 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3123 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3124 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3125 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3126 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3127 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3128 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3129 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3130 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3131 GEN_VEXT_VF(vfsub_vf_h, 2) 3132 GEN_VEXT_VF(vfsub_vf_w, 4) 3133 GEN_VEXT_VF(vfsub_vf_d, 8) 3134 3135 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3136 { 3137 return float16_sub(b, a, s); 3138 } 3139 3140 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3141 { 3142 return float32_sub(b, a, s); 3143 } 3144 3145 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3146 { 3147 return float64_sub(b, a, s); 3148 } 3149 3150 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3151 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3152 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3153 GEN_VEXT_VF(vfrsub_vf_h, 2) 3154 GEN_VEXT_VF(vfrsub_vf_w, 4) 3155 GEN_VEXT_VF(vfrsub_vf_d, 8) 3156 3157 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3158 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3159 { 3160 return float32_add(float16_to_float32(a, true, s), 3161 float16_to_float32(b, true, s), s); 3162 } 3163 3164 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3165 { 3166 return float64_add(float32_to_float64(a, s), 3167 float32_to_float64(b, s), s); 3168 3169 } 3170 3171 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3172 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3173 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3174 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3175 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3176 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3177 GEN_VEXT_VF(vfwadd_vf_h, 4) 3178 GEN_VEXT_VF(vfwadd_vf_w, 8) 3179 3180 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3181 { 3182 return float32_sub(float16_to_float32(a, true, s), 3183 float16_to_float32(b, true, s), s); 3184 } 3185 3186 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3187 { 3188 return float64_sub(float32_to_float64(a, s), 3189 float32_to_float64(b, s), s); 3190 3191 } 3192 3193 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3194 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3195 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3196 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3197 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3198 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3199 GEN_VEXT_VF(vfwsub_vf_h, 4) 3200 GEN_VEXT_VF(vfwsub_vf_w, 8) 3201 3202 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3203 { 3204 return float32_add(a, float16_to_float32(b, true, s), s); 3205 } 3206 3207 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3208 { 3209 return float64_add(a, float32_to_float64(b, s), s); 3210 } 3211 3212 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3213 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3214 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3215 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3216 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3217 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3218 GEN_VEXT_VF(vfwadd_wf_h, 4) 3219 GEN_VEXT_VF(vfwadd_wf_w, 8) 3220 3221 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3222 { 3223 return float32_sub(a, float16_to_float32(b, true, s), s); 3224 } 3225 3226 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3227 { 3228 return float64_sub(a, float32_to_float64(b, s), s); 3229 } 3230 3231 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3232 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3233 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3234 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3235 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3236 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3237 GEN_VEXT_VF(vfwsub_wf_h, 4) 3238 GEN_VEXT_VF(vfwsub_wf_w, 8) 3239 3240 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3241 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3242 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3243 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3244 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3245 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3246 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3247 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3248 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3249 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3250 GEN_VEXT_VF(vfmul_vf_h, 2) 3251 GEN_VEXT_VF(vfmul_vf_w, 4) 3252 GEN_VEXT_VF(vfmul_vf_d, 8) 3253 3254 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3255 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3256 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3257 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3258 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3259 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3260 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3261 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3262 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3263 GEN_VEXT_VF(vfdiv_vf_h, 2) 3264 GEN_VEXT_VF(vfdiv_vf_w, 4) 3265 GEN_VEXT_VF(vfdiv_vf_d, 8) 3266 3267 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3268 { 3269 return float16_div(b, a, s); 3270 } 3271 3272 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3273 { 3274 return float32_div(b, a, s); 3275 } 3276 3277 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3278 { 3279 return float64_div(b, a, s); 3280 } 3281 3282 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3283 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3284 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3285 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3286 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3287 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3288 3289 /* Vector Widening Floating-Point Multiply */ 3290 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3291 { 3292 return float32_mul(float16_to_float32(a, true, s), 3293 float16_to_float32(b, true, s), s); 3294 } 3295 3296 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3297 { 3298 return float64_mul(float32_to_float64(a, s), 3299 float32_to_float64(b, s), s); 3300 3301 } 3302 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3303 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3304 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3305 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3306 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3307 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3308 GEN_VEXT_VF(vfwmul_vf_h, 4) 3309 GEN_VEXT_VF(vfwmul_vf_w, 8) 3310 3311 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3312 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3313 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3314 CPURISCVState *env) \ 3315 { \ 3316 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3317 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3318 TD d = *((TD *)vd + HD(i)); \ 3319 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3320 } 3321 3322 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3323 { 3324 return float16_muladd(a, b, d, 0, s); 3325 } 3326 3327 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3328 { 3329 return float32_muladd(a, b, d, 0, s); 3330 } 3331 3332 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3333 { 3334 return float64_muladd(a, b, d, 0, s); 3335 } 3336 3337 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3338 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3339 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3340 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3341 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3342 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3343 3344 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3345 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3346 CPURISCVState *env) \ 3347 { \ 3348 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3349 TD d = *((TD *)vd + HD(i)); \ 3350 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3351 } 3352 3353 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3354 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3355 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3356 GEN_VEXT_VF(vfmacc_vf_h, 2) 3357 GEN_VEXT_VF(vfmacc_vf_w, 4) 3358 GEN_VEXT_VF(vfmacc_vf_d, 8) 3359 3360 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3361 { 3362 return float16_muladd(a, b, d, 3363 float_muladd_negate_c | float_muladd_negate_product, s); 3364 } 3365 3366 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3367 { 3368 return float32_muladd(a, b, d, 3369 float_muladd_negate_c | float_muladd_negate_product, s); 3370 } 3371 3372 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3373 { 3374 return float64_muladd(a, b, d, 3375 float_muladd_negate_c | float_muladd_negate_product, s); 3376 } 3377 3378 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3379 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3380 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3381 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3382 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3383 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3384 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3385 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3386 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3387 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3388 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3389 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3390 3391 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3392 { 3393 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3394 } 3395 3396 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3397 { 3398 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3399 } 3400 3401 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3402 { 3403 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3404 } 3405 3406 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3407 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3408 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3409 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3410 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3411 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3412 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3413 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3414 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3415 GEN_VEXT_VF(vfmsac_vf_h, 2) 3416 GEN_VEXT_VF(vfmsac_vf_w, 4) 3417 GEN_VEXT_VF(vfmsac_vf_d, 8) 3418 3419 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3420 { 3421 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3422 } 3423 3424 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3425 { 3426 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3427 } 3428 3429 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3430 { 3431 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3432 } 3433 3434 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3435 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3436 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3437 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3438 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3439 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3440 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3441 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3442 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3443 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3444 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3445 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3446 3447 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3448 { 3449 return float16_muladd(d, b, a, 0, s); 3450 } 3451 3452 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3453 { 3454 return float32_muladd(d, b, a, 0, s); 3455 } 3456 3457 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3458 { 3459 return float64_muladd(d, b, a, 0, s); 3460 } 3461 3462 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3463 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3464 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3465 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3466 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3467 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3468 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3469 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3470 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3471 GEN_VEXT_VF(vfmadd_vf_h, 2) 3472 GEN_VEXT_VF(vfmadd_vf_w, 4) 3473 GEN_VEXT_VF(vfmadd_vf_d, 8) 3474 3475 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3476 { 3477 return float16_muladd(d, b, a, 3478 float_muladd_negate_c | float_muladd_negate_product, s); 3479 } 3480 3481 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3482 { 3483 return float32_muladd(d, b, a, 3484 float_muladd_negate_c | float_muladd_negate_product, s); 3485 } 3486 3487 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3488 { 3489 return float64_muladd(d, b, a, 3490 float_muladd_negate_c | float_muladd_negate_product, s); 3491 } 3492 3493 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3494 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3495 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3496 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3497 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3498 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3499 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3500 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3501 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3502 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3503 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3504 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3505 3506 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3507 { 3508 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3509 } 3510 3511 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3512 { 3513 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3514 } 3515 3516 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3517 { 3518 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3519 } 3520 3521 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3522 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3523 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3524 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3525 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3526 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3527 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3528 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3529 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3530 GEN_VEXT_VF(vfmsub_vf_h, 2) 3531 GEN_VEXT_VF(vfmsub_vf_w, 4) 3532 GEN_VEXT_VF(vfmsub_vf_d, 8) 3533 3534 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3535 { 3536 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3537 } 3538 3539 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3540 { 3541 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3542 } 3543 3544 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3545 { 3546 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3547 } 3548 3549 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3550 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3551 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3552 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3553 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3554 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3555 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3556 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3557 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3558 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3559 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3560 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3561 3562 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3563 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3564 { 3565 return float32_muladd(float16_to_float32(a, true, s), 3566 float16_to_float32(b, true, s), d, 0, s); 3567 } 3568 3569 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3570 { 3571 return float64_muladd(float32_to_float64(a, s), 3572 float32_to_float64(b, s), d, 0, s); 3573 } 3574 3575 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3576 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3577 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3578 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3579 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3580 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3581 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3582 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3583 3584 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3585 { 3586 return float32_muladd(float16_to_float32(a, true, s), 3587 float16_to_float32(b, true, s), d, 3588 float_muladd_negate_c | float_muladd_negate_product, s); 3589 } 3590 3591 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3592 { 3593 return float64_muladd(float32_to_float64(a, s), 3594 float32_to_float64(b, s), d, 3595 float_muladd_negate_c | float_muladd_negate_product, s); 3596 } 3597 3598 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3599 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3600 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3601 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3602 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3603 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3604 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3605 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3606 3607 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3608 { 3609 return float32_muladd(float16_to_float32(a, true, s), 3610 float16_to_float32(b, true, s), d, 3611 float_muladd_negate_c, s); 3612 } 3613 3614 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3615 { 3616 return float64_muladd(float32_to_float64(a, s), 3617 float32_to_float64(b, s), d, 3618 float_muladd_negate_c, s); 3619 } 3620 3621 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3622 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3623 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3624 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3625 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3626 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3627 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3628 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3629 3630 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3631 { 3632 return float32_muladd(float16_to_float32(a, true, s), 3633 float16_to_float32(b, true, s), d, 3634 float_muladd_negate_product, s); 3635 } 3636 3637 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3638 { 3639 return float64_muladd(float32_to_float64(a, s), 3640 float32_to_float64(b, s), d, 3641 float_muladd_negate_product, s); 3642 } 3643 3644 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3645 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3646 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3647 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3648 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3649 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3650 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3651 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3652 3653 /* Vector Floating-Point Square-Root Instruction */ 3654 /* (TD, T2, TX2) */ 3655 #define OP_UU_H uint16_t, uint16_t, uint16_t 3656 #define OP_UU_W uint32_t, uint32_t, uint32_t 3657 #define OP_UU_D uint64_t, uint64_t, uint64_t 3658 3659 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3660 static void do_##NAME(void *vd, void *vs2, int i, \ 3661 CPURISCVState *env) \ 3662 { \ 3663 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3664 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3665 } 3666 3667 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3668 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3669 CPURISCVState *env, uint32_t desc) \ 3670 { \ 3671 uint32_t vm = vext_vm(desc); \ 3672 uint32_t vl = env->vl; \ 3673 uint32_t total_elems = \ 3674 vext_get_total_elems(env, desc, ESZ); \ 3675 uint32_t vta = vext_vta(desc); \ 3676 uint32_t vma = vext_vma(desc); \ 3677 uint32_t i; \ 3678 \ 3679 if (vl == 0) { \ 3680 return; \ 3681 } \ 3682 for (i = env->vstart; i < vl; i++) { \ 3683 if (!vm && !vext_elem_mask(v0, i)) { \ 3684 /* set masked-off elements to 1s */ \ 3685 vext_set_elems_1s(vd, vma, i * ESZ, \ 3686 (i + 1) * ESZ); \ 3687 continue; \ 3688 } \ 3689 do_##NAME(vd, vs2, i, env); \ 3690 } \ 3691 env->vstart = 0; \ 3692 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3693 total_elems * ESZ); \ 3694 } 3695 3696 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3697 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3698 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3699 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3700 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3701 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3702 3703 /* 3704 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3705 * 3706 * Adapted from riscv-v-spec recip.c: 3707 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3708 */ 3709 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3710 { 3711 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3712 uint64_t exp = extract64(f, frac_size, exp_size); 3713 uint64_t frac = extract64(f, 0, frac_size); 3714 3715 const uint8_t lookup_table[] = { 3716 52, 51, 50, 48, 47, 46, 44, 43, 3717 42, 41, 40, 39, 38, 36, 35, 34, 3718 33, 32, 31, 30, 30, 29, 28, 27, 3719 26, 25, 24, 23, 23, 22, 21, 20, 3720 19, 19, 18, 17, 16, 16, 15, 14, 3721 14, 13, 12, 12, 11, 10, 10, 9, 3722 9, 8, 7, 7, 6, 6, 5, 4, 3723 4, 3, 3, 2, 2, 1, 1, 0, 3724 127, 125, 123, 121, 119, 118, 116, 114, 3725 113, 111, 109, 108, 106, 105, 103, 102, 3726 100, 99, 97, 96, 95, 93, 92, 91, 3727 90, 88, 87, 86, 85, 84, 83, 82, 3728 80, 79, 78, 77, 76, 75, 74, 73, 3729 72, 71, 70, 70, 69, 68, 67, 66, 3730 65, 64, 63, 63, 62, 61, 60, 59, 3731 59, 58, 57, 56, 56, 55, 54, 53 3732 }; 3733 const int precision = 7; 3734 3735 if (exp == 0 && frac != 0) { /* subnormal */ 3736 /* Normalize the subnormal. */ 3737 while (extract64(frac, frac_size - 1, 1) == 0) { 3738 exp--; 3739 frac <<= 1; 3740 } 3741 3742 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3743 } 3744 3745 int idx = ((exp & 1) << (precision - 1)) | 3746 (frac >> (frac_size - precision + 1)); 3747 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3748 (frac_size - precision); 3749 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3750 3751 uint64_t val = 0; 3752 val = deposit64(val, 0, frac_size, out_frac); 3753 val = deposit64(val, frac_size, exp_size, out_exp); 3754 val = deposit64(val, frac_size + exp_size, 1, sign); 3755 return val; 3756 } 3757 3758 static float16 frsqrt7_h(float16 f, float_status *s) 3759 { 3760 int exp_size = 5, frac_size = 10; 3761 bool sign = float16_is_neg(f); 3762 3763 /* 3764 * frsqrt7(sNaN) = canonical NaN 3765 * frsqrt7(-inf) = canonical NaN 3766 * frsqrt7(-normal) = canonical NaN 3767 * frsqrt7(-subnormal) = canonical NaN 3768 */ 3769 if (float16_is_signaling_nan(f, s) || 3770 (float16_is_infinity(f) && sign) || 3771 (float16_is_normal(f) && sign) || 3772 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3773 s->float_exception_flags |= float_flag_invalid; 3774 return float16_default_nan(s); 3775 } 3776 3777 /* frsqrt7(qNaN) = canonical NaN */ 3778 if (float16_is_quiet_nan(f, s)) { 3779 return float16_default_nan(s); 3780 } 3781 3782 /* frsqrt7(+-0) = +-inf */ 3783 if (float16_is_zero(f)) { 3784 s->float_exception_flags |= float_flag_divbyzero; 3785 return float16_set_sign(float16_infinity, sign); 3786 } 3787 3788 /* frsqrt7(+inf) = +0 */ 3789 if (float16_is_infinity(f) && !sign) { 3790 return float16_set_sign(float16_zero, sign); 3791 } 3792 3793 /* +normal, +subnormal */ 3794 uint64_t val = frsqrt7(f, exp_size, frac_size); 3795 return make_float16(val); 3796 } 3797 3798 static float32 frsqrt7_s(float32 f, float_status *s) 3799 { 3800 int exp_size = 8, frac_size = 23; 3801 bool sign = float32_is_neg(f); 3802 3803 /* 3804 * frsqrt7(sNaN) = canonical NaN 3805 * frsqrt7(-inf) = canonical NaN 3806 * frsqrt7(-normal) = canonical NaN 3807 * frsqrt7(-subnormal) = canonical NaN 3808 */ 3809 if (float32_is_signaling_nan(f, s) || 3810 (float32_is_infinity(f) && sign) || 3811 (float32_is_normal(f) && sign) || 3812 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3813 s->float_exception_flags |= float_flag_invalid; 3814 return float32_default_nan(s); 3815 } 3816 3817 /* frsqrt7(qNaN) = canonical NaN */ 3818 if (float32_is_quiet_nan(f, s)) { 3819 return float32_default_nan(s); 3820 } 3821 3822 /* frsqrt7(+-0) = +-inf */ 3823 if (float32_is_zero(f)) { 3824 s->float_exception_flags |= float_flag_divbyzero; 3825 return float32_set_sign(float32_infinity, sign); 3826 } 3827 3828 /* frsqrt7(+inf) = +0 */ 3829 if (float32_is_infinity(f) && !sign) { 3830 return float32_set_sign(float32_zero, sign); 3831 } 3832 3833 /* +normal, +subnormal */ 3834 uint64_t val = frsqrt7(f, exp_size, frac_size); 3835 return make_float32(val); 3836 } 3837 3838 static float64 frsqrt7_d(float64 f, float_status *s) 3839 { 3840 int exp_size = 11, frac_size = 52; 3841 bool sign = float64_is_neg(f); 3842 3843 /* 3844 * frsqrt7(sNaN) = canonical NaN 3845 * frsqrt7(-inf) = canonical NaN 3846 * frsqrt7(-normal) = canonical NaN 3847 * frsqrt7(-subnormal) = canonical NaN 3848 */ 3849 if (float64_is_signaling_nan(f, s) || 3850 (float64_is_infinity(f) && sign) || 3851 (float64_is_normal(f) && sign) || 3852 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3853 s->float_exception_flags |= float_flag_invalid; 3854 return float64_default_nan(s); 3855 } 3856 3857 /* frsqrt7(qNaN) = canonical NaN */ 3858 if (float64_is_quiet_nan(f, s)) { 3859 return float64_default_nan(s); 3860 } 3861 3862 /* frsqrt7(+-0) = +-inf */ 3863 if (float64_is_zero(f)) { 3864 s->float_exception_flags |= float_flag_divbyzero; 3865 return float64_set_sign(float64_infinity, sign); 3866 } 3867 3868 /* frsqrt7(+inf) = +0 */ 3869 if (float64_is_infinity(f) && !sign) { 3870 return float64_set_sign(float64_zero, sign); 3871 } 3872 3873 /* +normal, +subnormal */ 3874 uint64_t val = frsqrt7(f, exp_size, frac_size); 3875 return make_float64(val); 3876 } 3877 3878 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3879 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3880 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3881 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3882 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3883 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3884 3885 /* 3886 * Vector Floating-Point Reciprocal Estimate Instruction 3887 * 3888 * Adapted from riscv-v-spec recip.c: 3889 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3890 */ 3891 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3892 float_status *s) 3893 { 3894 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3895 uint64_t exp = extract64(f, frac_size, exp_size); 3896 uint64_t frac = extract64(f, 0, frac_size); 3897 3898 const uint8_t lookup_table[] = { 3899 127, 125, 123, 121, 119, 117, 116, 114, 3900 112, 110, 109, 107, 105, 104, 102, 100, 3901 99, 97, 96, 94, 93, 91, 90, 88, 3902 87, 85, 84, 83, 81, 80, 79, 77, 3903 76, 75, 74, 72, 71, 70, 69, 68, 3904 66, 65, 64, 63, 62, 61, 60, 59, 3905 58, 57, 56, 55, 54, 53, 52, 51, 3906 50, 49, 48, 47, 46, 45, 44, 43, 3907 42, 41, 40, 40, 39, 38, 37, 36, 3908 35, 35, 34, 33, 32, 31, 31, 30, 3909 29, 28, 28, 27, 26, 25, 25, 24, 3910 23, 23, 22, 21, 21, 20, 19, 19, 3911 18, 17, 17, 16, 15, 15, 14, 14, 3912 13, 12, 12, 11, 11, 10, 9, 9, 3913 8, 8, 7, 7, 6, 5, 5, 4, 3914 4, 3, 3, 2, 2, 1, 1, 0 3915 }; 3916 const int precision = 7; 3917 3918 if (exp == 0 && frac != 0) { /* subnormal */ 3919 /* Normalize the subnormal. */ 3920 while (extract64(frac, frac_size - 1, 1) == 0) { 3921 exp--; 3922 frac <<= 1; 3923 } 3924 3925 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3926 3927 if (exp != 0 && exp != UINT64_MAX) { 3928 /* 3929 * Overflow to inf or max value of same sign, 3930 * depending on sign and rounding mode. 3931 */ 3932 s->float_exception_flags |= (float_flag_inexact | 3933 float_flag_overflow); 3934 3935 if ((s->float_rounding_mode == float_round_to_zero) || 3936 ((s->float_rounding_mode == float_round_down) && !sign) || 3937 ((s->float_rounding_mode == float_round_up) && sign)) { 3938 /* Return greatest/negative finite value. */ 3939 return (sign << (exp_size + frac_size)) | 3940 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3941 } else { 3942 /* Return +-inf. */ 3943 return (sign << (exp_size + frac_size)) | 3944 MAKE_64BIT_MASK(frac_size, exp_size); 3945 } 3946 } 3947 } 3948 3949 int idx = frac >> (frac_size - precision); 3950 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3951 (frac_size - precision); 3952 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3953 3954 if (out_exp == 0 || out_exp == UINT64_MAX) { 3955 /* 3956 * The result is subnormal, but don't raise the underflow exception, 3957 * because there's no additional loss of precision. 3958 */ 3959 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3960 if (out_exp == UINT64_MAX) { 3961 out_frac >>= 1; 3962 out_exp = 0; 3963 } 3964 } 3965 3966 uint64_t val = 0; 3967 val = deposit64(val, 0, frac_size, out_frac); 3968 val = deposit64(val, frac_size, exp_size, out_exp); 3969 val = deposit64(val, frac_size + exp_size, 1, sign); 3970 return val; 3971 } 3972 3973 static float16 frec7_h(float16 f, float_status *s) 3974 { 3975 int exp_size = 5, frac_size = 10; 3976 bool sign = float16_is_neg(f); 3977 3978 /* frec7(+-inf) = +-0 */ 3979 if (float16_is_infinity(f)) { 3980 return float16_set_sign(float16_zero, sign); 3981 } 3982 3983 /* frec7(+-0) = +-inf */ 3984 if (float16_is_zero(f)) { 3985 s->float_exception_flags |= float_flag_divbyzero; 3986 return float16_set_sign(float16_infinity, sign); 3987 } 3988 3989 /* frec7(sNaN) = canonical NaN */ 3990 if (float16_is_signaling_nan(f, s)) { 3991 s->float_exception_flags |= float_flag_invalid; 3992 return float16_default_nan(s); 3993 } 3994 3995 /* frec7(qNaN) = canonical NaN */ 3996 if (float16_is_quiet_nan(f, s)) { 3997 return float16_default_nan(s); 3998 } 3999 4000 /* +-normal, +-subnormal */ 4001 uint64_t val = frec7(f, exp_size, frac_size, s); 4002 return make_float16(val); 4003 } 4004 4005 static float32 frec7_s(float32 f, float_status *s) 4006 { 4007 int exp_size = 8, frac_size = 23; 4008 bool sign = float32_is_neg(f); 4009 4010 /* frec7(+-inf) = +-0 */ 4011 if (float32_is_infinity(f)) { 4012 return float32_set_sign(float32_zero, sign); 4013 } 4014 4015 /* frec7(+-0) = +-inf */ 4016 if (float32_is_zero(f)) { 4017 s->float_exception_flags |= float_flag_divbyzero; 4018 return float32_set_sign(float32_infinity, sign); 4019 } 4020 4021 /* frec7(sNaN) = canonical NaN */ 4022 if (float32_is_signaling_nan(f, s)) { 4023 s->float_exception_flags |= float_flag_invalid; 4024 return float32_default_nan(s); 4025 } 4026 4027 /* frec7(qNaN) = canonical NaN */ 4028 if (float32_is_quiet_nan(f, s)) { 4029 return float32_default_nan(s); 4030 } 4031 4032 /* +-normal, +-subnormal */ 4033 uint64_t val = frec7(f, exp_size, frac_size, s); 4034 return make_float32(val); 4035 } 4036 4037 static float64 frec7_d(float64 f, float_status *s) 4038 { 4039 int exp_size = 11, frac_size = 52; 4040 bool sign = float64_is_neg(f); 4041 4042 /* frec7(+-inf) = +-0 */ 4043 if (float64_is_infinity(f)) { 4044 return float64_set_sign(float64_zero, sign); 4045 } 4046 4047 /* frec7(+-0) = +-inf */ 4048 if (float64_is_zero(f)) { 4049 s->float_exception_flags |= float_flag_divbyzero; 4050 return float64_set_sign(float64_infinity, sign); 4051 } 4052 4053 /* frec7(sNaN) = canonical NaN */ 4054 if (float64_is_signaling_nan(f, s)) { 4055 s->float_exception_flags |= float_flag_invalid; 4056 return float64_default_nan(s); 4057 } 4058 4059 /* frec7(qNaN) = canonical NaN */ 4060 if (float64_is_quiet_nan(f, s)) { 4061 return float64_default_nan(s); 4062 } 4063 4064 /* +-normal, +-subnormal */ 4065 uint64_t val = frec7(f, exp_size, frac_size, s); 4066 return make_float64(val); 4067 } 4068 4069 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4070 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4071 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4072 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4073 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4074 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4075 4076 /* Vector Floating-Point MIN/MAX Instructions */ 4077 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4078 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4079 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4080 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4081 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4082 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4083 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4084 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4085 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4086 GEN_VEXT_VF(vfmin_vf_h, 2) 4087 GEN_VEXT_VF(vfmin_vf_w, 4) 4088 GEN_VEXT_VF(vfmin_vf_d, 8) 4089 4090 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4091 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4092 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4093 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4094 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4095 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4096 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4097 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4098 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4099 GEN_VEXT_VF(vfmax_vf_h, 2) 4100 GEN_VEXT_VF(vfmax_vf_w, 4) 4101 GEN_VEXT_VF(vfmax_vf_d, 8) 4102 4103 /* Vector Floating-Point Sign-Injection Instructions */ 4104 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4105 { 4106 return deposit64(b, 0, 15, a); 4107 } 4108 4109 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4110 { 4111 return deposit64(b, 0, 31, a); 4112 } 4113 4114 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4115 { 4116 return deposit64(b, 0, 63, a); 4117 } 4118 4119 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4120 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4121 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4122 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4123 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4124 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4125 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4126 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4127 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4128 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4129 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4130 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4131 4132 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4133 { 4134 return deposit64(~b, 0, 15, a); 4135 } 4136 4137 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4138 { 4139 return deposit64(~b, 0, 31, a); 4140 } 4141 4142 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4143 { 4144 return deposit64(~b, 0, 63, a); 4145 } 4146 4147 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4148 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4149 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4150 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4151 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4152 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4153 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4154 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4155 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4156 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4157 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4158 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4159 4160 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4161 { 4162 return deposit64(b ^ a, 0, 15, a); 4163 } 4164 4165 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4166 { 4167 return deposit64(b ^ a, 0, 31, a); 4168 } 4169 4170 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4171 { 4172 return deposit64(b ^ a, 0, 63, a); 4173 } 4174 4175 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4176 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4177 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4178 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4179 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4180 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4181 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4182 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4183 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4184 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4185 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4186 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4187 4188 /* Vector Floating-Point Compare Instructions */ 4189 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4190 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4191 CPURISCVState *env, uint32_t desc) \ 4192 { \ 4193 uint32_t vm = vext_vm(desc); \ 4194 uint32_t vl = env->vl; \ 4195 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4196 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4197 uint32_t vma = vext_vma(desc); \ 4198 uint32_t i; \ 4199 \ 4200 for (i = env->vstart; i < vl; i++) { \ 4201 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4202 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4203 if (!vm && !vext_elem_mask(v0, i)) { \ 4204 /* set masked-off elements to 1s */ \ 4205 if (vma) { \ 4206 vext_set_elem_mask(vd, i, 1); \ 4207 } \ 4208 continue; \ 4209 } \ 4210 vext_set_elem_mask(vd, i, \ 4211 DO_OP(s2, s1, &env->fp_status)); \ 4212 } \ 4213 env->vstart = 0; \ 4214 /* mask destination register are always tail-agnostic */ \ 4215 /* set tail elements to 1s */ \ 4216 if (vta_all_1s) { \ 4217 for (; i < total_elems; i++) { \ 4218 vext_set_elem_mask(vd, i, 1); \ 4219 } \ 4220 } \ 4221 } 4222 4223 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4224 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4225 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4226 4227 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4228 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4229 CPURISCVState *env, uint32_t desc) \ 4230 { \ 4231 uint32_t vm = vext_vm(desc); \ 4232 uint32_t vl = env->vl; \ 4233 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4234 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4235 uint32_t vma = vext_vma(desc); \ 4236 uint32_t i; \ 4237 \ 4238 for (i = env->vstart; i < vl; i++) { \ 4239 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4240 if (!vm && !vext_elem_mask(v0, i)) { \ 4241 /* set masked-off elements to 1s */ \ 4242 if (vma) { \ 4243 vext_set_elem_mask(vd, i, 1); \ 4244 } \ 4245 continue; \ 4246 } \ 4247 vext_set_elem_mask(vd, i, \ 4248 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4249 } \ 4250 env->vstart = 0; \ 4251 /* mask destination register are always tail-agnostic */ \ 4252 /* set tail elements to 1s */ \ 4253 if (vta_all_1s) { \ 4254 for (; i < total_elems; i++) { \ 4255 vext_set_elem_mask(vd, i, 1); \ 4256 } \ 4257 } \ 4258 } 4259 4260 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4261 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4262 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4263 4264 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4265 { 4266 FloatRelation compare = float16_compare_quiet(a, b, s); 4267 return compare != float_relation_equal; 4268 } 4269 4270 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4271 { 4272 FloatRelation compare = float32_compare_quiet(a, b, s); 4273 return compare != float_relation_equal; 4274 } 4275 4276 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4277 { 4278 FloatRelation compare = float64_compare_quiet(a, b, s); 4279 return compare != float_relation_equal; 4280 } 4281 4282 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4283 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4284 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4285 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4286 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4287 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4288 4289 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4290 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4291 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4292 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4293 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4294 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4295 4296 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4297 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4298 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4299 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4300 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4301 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4302 4303 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4304 { 4305 FloatRelation compare = float16_compare(a, b, s); 4306 return compare == float_relation_greater; 4307 } 4308 4309 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4310 { 4311 FloatRelation compare = float32_compare(a, b, s); 4312 return compare == float_relation_greater; 4313 } 4314 4315 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4316 { 4317 FloatRelation compare = float64_compare(a, b, s); 4318 return compare == float_relation_greater; 4319 } 4320 4321 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4322 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4323 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4324 4325 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4326 { 4327 FloatRelation compare = float16_compare(a, b, s); 4328 return compare == float_relation_greater || 4329 compare == float_relation_equal; 4330 } 4331 4332 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4333 { 4334 FloatRelation compare = float32_compare(a, b, s); 4335 return compare == float_relation_greater || 4336 compare == float_relation_equal; 4337 } 4338 4339 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4340 { 4341 FloatRelation compare = float64_compare(a, b, s); 4342 return compare == float_relation_greater || 4343 compare == float_relation_equal; 4344 } 4345 4346 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4347 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4348 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4349 4350 /* Vector Floating-Point Classify Instruction */ 4351 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4352 static void do_##NAME(void *vd, void *vs2, int i) \ 4353 { \ 4354 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4355 *((TD *)vd + HD(i)) = OP(s2); \ 4356 } 4357 4358 #define GEN_VEXT_V(NAME, ESZ) \ 4359 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4360 CPURISCVState *env, uint32_t desc) \ 4361 { \ 4362 uint32_t vm = vext_vm(desc); \ 4363 uint32_t vl = env->vl; \ 4364 uint32_t total_elems = \ 4365 vext_get_total_elems(env, desc, ESZ); \ 4366 uint32_t vta = vext_vta(desc); \ 4367 uint32_t vma = vext_vma(desc); \ 4368 uint32_t i; \ 4369 \ 4370 for (i = env->vstart; i < vl; i++) { \ 4371 if (!vm && !vext_elem_mask(v0, i)) { \ 4372 /* set masked-off elements to 1s */ \ 4373 vext_set_elems_1s(vd, vma, i * ESZ, \ 4374 (i + 1) * ESZ); \ 4375 continue; \ 4376 } \ 4377 do_##NAME(vd, vs2, i); \ 4378 } \ 4379 env->vstart = 0; \ 4380 /* set tail elements to 1s */ \ 4381 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4382 total_elems * ESZ); \ 4383 } 4384 4385 target_ulong fclass_h(uint64_t frs1) 4386 { 4387 float16 f = frs1; 4388 bool sign = float16_is_neg(f); 4389 4390 if (float16_is_infinity(f)) { 4391 return sign ? 1 << 0 : 1 << 7; 4392 } else if (float16_is_zero(f)) { 4393 return sign ? 1 << 3 : 1 << 4; 4394 } else if (float16_is_zero_or_denormal(f)) { 4395 return sign ? 1 << 2 : 1 << 5; 4396 } else if (float16_is_any_nan(f)) { 4397 float_status s = { }; /* for snan_bit_is_one */ 4398 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4399 } else { 4400 return sign ? 1 << 1 : 1 << 6; 4401 } 4402 } 4403 4404 target_ulong fclass_s(uint64_t frs1) 4405 { 4406 float32 f = frs1; 4407 bool sign = float32_is_neg(f); 4408 4409 if (float32_is_infinity(f)) { 4410 return sign ? 1 << 0 : 1 << 7; 4411 } else if (float32_is_zero(f)) { 4412 return sign ? 1 << 3 : 1 << 4; 4413 } else if (float32_is_zero_or_denormal(f)) { 4414 return sign ? 1 << 2 : 1 << 5; 4415 } else if (float32_is_any_nan(f)) { 4416 float_status s = { }; /* for snan_bit_is_one */ 4417 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4418 } else { 4419 return sign ? 1 << 1 : 1 << 6; 4420 } 4421 } 4422 4423 target_ulong fclass_d(uint64_t frs1) 4424 { 4425 float64 f = frs1; 4426 bool sign = float64_is_neg(f); 4427 4428 if (float64_is_infinity(f)) { 4429 return sign ? 1 << 0 : 1 << 7; 4430 } else if (float64_is_zero(f)) { 4431 return sign ? 1 << 3 : 1 << 4; 4432 } else if (float64_is_zero_or_denormal(f)) { 4433 return sign ? 1 << 2 : 1 << 5; 4434 } else if (float64_is_any_nan(f)) { 4435 float_status s = { }; /* for snan_bit_is_one */ 4436 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4437 } else { 4438 return sign ? 1 << 1 : 1 << 6; 4439 } 4440 } 4441 4442 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4443 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4444 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4445 GEN_VEXT_V(vfclass_v_h, 2) 4446 GEN_VEXT_V(vfclass_v_w, 4) 4447 GEN_VEXT_V(vfclass_v_d, 8) 4448 4449 /* Vector Floating-Point Merge Instruction */ 4450 4451 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4452 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4453 CPURISCVState *env, uint32_t desc) \ 4454 { \ 4455 uint32_t vm = vext_vm(desc); \ 4456 uint32_t vl = env->vl; \ 4457 uint32_t esz = sizeof(ETYPE); \ 4458 uint32_t total_elems = \ 4459 vext_get_total_elems(env, desc, esz); \ 4460 uint32_t vta = vext_vta(desc); \ 4461 uint32_t i; \ 4462 \ 4463 for (i = env->vstart; i < vl; i++) { \ 4464 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4465 *((ETYPE *)vd + H(i)) \ 4466 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4467 } \ 4468 env->vstart = 0; \ 4469 /* set tail elements to 1s */ \ 4470 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4471 } 4472 4473 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4474 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4475 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4476 4477 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4478 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4479 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4480 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4481 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4482 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4483 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4484 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4485 4486 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4487 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4488 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4489 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4490 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4491 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4492 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4493 4494 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4495 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4496 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4497 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4498 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4499 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4500 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4501 4502 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4503 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4504 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4505 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4506 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4507 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4508 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4509 4510 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4511 /* (TD, T2, TX2) */ 4512 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4513 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4514 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4515 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4516 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4517 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4518 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4519 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4520 4521 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4522 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4523 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4524 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4525 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4526 4527 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4528 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4529 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4530 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4531 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4532 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4533 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4534 4535 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4536 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4537 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4538 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4539 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4540 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4541 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4542 4543 /* 4544 * vfwcvt.f.f.v vd, vs2, vm 4545 * Convert single-width float to double-width float. 4546 */ 4547 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4548 { 4549 return float16_to_float32(a, true, s); 4550 } 4551 4552 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4553 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4554 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4555 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4556 4557 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4558 /* (TD, T2, TX2) */ 4559 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4560 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4561 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4562 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4563 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4564 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4565 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4566 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4567 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4568 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4569 4570 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4571 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4572 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4573 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4574 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4575 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4576 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4577 4578 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4579 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4580 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4581 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4582 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4583 4584 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4585 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4586 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4587 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4588 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4589 4590 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4591 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4592 { 4593 return float32_to_float16(a, true, s); 4594 } 4595 4596 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4597 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4598 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4599 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4600 4601 /* 4602 *** Vector Reduction Operations 4603 */ 4604 /* Vector Single-Width Integer Reduction Instructions */ 4605 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4606 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4607 void *vs2, CPURISCVState *env, uint32_t desc) \ 4608 { \ 4609 uint32_t vm = vext_vm(desc); \ 4610 uint32_t vl = env->vl; \ 4611 uint32_t esz = sizeof(TD); \ 4612 uint32_t vlenb = simd_maxsz(desc); \ 4613 uint32_t vta = vext_vta(desc); \ 4614 uint32_t i; \ 4615 TD s1 = *((TD *)vs1 + HD(0)); \ 4616 \ 4617 for (i = env->vstart; i < vl; i++) { \ 4618 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4619 if (!vm && !vext_elem_mask(v0, i)) { \ 4620 continue; \ 4621 } \ 4622 s1 = OP(s1, (TD)s2); \ 4623 } \ 4624 *((TD *)vd + HD(0)) = s1; \ 4625 env->vstart = 0; \ 4626 /* set tail elements to 1s */ \ 4627 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4628 } 4629 4630 /* vd[0] = sum(vs1[0], vs2[*]) */ 4631 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4632 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4633 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4634 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4635 4636 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4637 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4638 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4639 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4640 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4641 4642 /* vd[0] = max(vs1[0], vs2[*]) */ 4643 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4644 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4645 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4646 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4647 4648 /* vd[0] = minu(vs1[0], vs2[*]) */ 4649 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4650 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4651 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4652 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4653 4654 /* vd[0] = min(vs1[0], vs2[*]) */ 4655 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4656 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4657 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4658 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4659 4660 /* vd[0] = and(vs1[0], vs2[*]) */ 4661 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4662 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4663 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4664 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4665 4666 /* vd[0] = or(vs1[0], vs2[*]) */ 4667 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4668 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4669 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4670 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4671 4672 /* vd[0] = xor(vs1[0], vs2[*]) */ 4673 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4674 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4675 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4676 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4677 4678 /* Vector Widening Integer Reduction Instructions */ 4679 /* signed sum reduction into double-width accumulator */ 4680 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4681 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4682 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4683 4684 /* Unsigned sum reduction into double-width accumulator */ 4685 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4686 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4687 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4688 4689 /* Vector Single-Width Floating-Point Reduction Instructions */ 4690 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4691 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4692 void *vs2, CPURISCVState *env, \ 4693 uint32_t desc) \ 4694 { \ 4695 uint32_t vm = vext_vm(desc); \ 4696 uint32_t vl = env->vl; \ 4697 uint32_t esz = sizeof(TD); \ 4698 uint32_t vlenb = simd_maxsz(desc); \ 4699 uint32_t vta = vext_vta(desc); \ 4700 uint32_t i; \ 4701 TD s1 = *((TD *)vs1 + HD(0)); \ 4702 \ 4703 for (i = env->vstart; i < vl; i++) { \ 4704 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4705 if (!vm && !vext_elem_mask(v0, i)) { \ 4706 continue; \ 4707 } \ 4708 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4709 } \ 4710 *((TD *)vd + HD(0)) = s1; \ 4711 env->vstart = 0; \ 4712 /* set tail elements to 1s */ \ 4713 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4714 } 4715 4716 /* Unordered sum */ 4717 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4718 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4719 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4720 4721 /* Ordered sum */ 4722 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4723 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4724 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4725 4726 /* Maximum value */ 4727 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4728 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4729 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4730 4731 /* Minimum value */ 4732 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4733 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4734 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4735 4736 /* Vector Widening Floating-Point Add Instructions */ 4737 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4738 { 4739 return float32_add(a, float16_to_float32(b, true, s), s); 4740 } 4741 4742 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4743 { 4744 return float64_add(a, float32_to_float64(b, s), s); 4745 } 4746 4747 /* Vector Widening Floating-Point Reduction Instructions */ 4748 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4749 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4750 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4751 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4752 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4753 4754 /* 4755 *** Vector Mask Operations 4756 */ 4757 /* Vector Mask-Register Logical Instructions */ 4758 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4759 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4760 void *vs2, CPURISCVState *env, \ 4761 uint32_t desc) \ 4762 { \ 4763 uint32_t vl = env->vl; \ 4764 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4765 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4766 uint32_t i; \ 4767 int a, b; \ 4768 \ 4769 for (i = env->vstart; i < vl; i++) { \ 4770 a = vext_elem_mask(vs1, i); \ 4771 b = vext_elem_mask(vs2, i); \ 4772 vext_set_elem_mask(vd, i, OP(b, a)); \ 4773 } \ 4774 env->vstart = 0; \ 4775 /* mask destination register are always tail- \ 4776 * agnostic \ 4777 */ \ 4778 /* set tail elements to 1s */ \ 4779 if (vta_all_1s) { \ 4780 for (; i < total_elems; i++) { \ 4781 vext_set_elem_mask(vd, i, 1); \ 4782 } \ 4783 } \ 4784 } 4785 4786 #define DO_NAND(N, M) (!(N & M)) 4787 #define DO_ANDNOT(N, M) (N & !M) 4788 #define DO_NOR(N, M) (!(N | M)) 4789 #define DO_ORNOT(N, M) (N | !M) 4790 #define DO_XNOR(N, M) (!(N ^ M)) 4791 4792 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4793 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4794 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4795 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4796 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4797 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4798 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4799 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4800 4801 /* Vector count population in mask vcpop */ 4802 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4803 uint32_t desc) 4804 { 4805 target_ulong cnt = 0; 4806 uint32_t vm = vext_vm(desc); 4807 uint32_t vl = env->vl; 4808 int i; 4809 4810 for (i = env->vstart; i < vl; i++) { 4811 if (vm || vext_elem_mask(v0, i)) { 4812 if (vext_elem_mask(vs2, i)) { 4813 cnt++; 4814 } 4815 } 4816 } 4817 env->vstart = 0; 4818 return cnt; 4819 } 4820 4821 /* vfirst find-first-set mask bit*/ 4822 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4823 uint32_t desc) 4824 { 4825 uint32_t vm = vext_vm(desc); 4826 uint32_t vl = env->vl; 4827 int i; 4828 4829 for (i = env->vstart; i < vl; i++) { 4830 if (vm || vext_elem_mask(v0, i)) { 4831 if (vext_elem_mask(vs2, i)) { 4832 return i; 4833 } 4834 } 4835 } 4836 env->vstart = 0; 4837 return -1LL; 4838 } 4839 4840 enum set_mask_type { 4841 ONLY_FIRST = 1, 4842 INCLUDE_FIRST, 4843 BEFORE_FIRST, 4844 }; 4845 4846 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4847 uint32_t desc, enum set_mask_type type) 4848 { 4849 uint32_t vm = vext_vm(desc); 4850 uint32_t vl = env->vl; 4851 uint32_t total_elems = env_archcpu(env)->cfg.vlen; 4852 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4853 uint32_t vma = vext_vma(desc); 4854 int i; 4855 bool first_mask_bit = false; 4856 4857 for (i = env->vstart; i < vl; i++) { 4858 if (!vm && !vext_elem_mask(v0, i)) { 4859 /* set masked-off elements to 1s */ 4860 if (vma) { 4861 vext_set_elem_mask(vd, i, 1); 4862 } 4863 continue; 4864 } 4865 /* write a zero to all following active elements */ 4866 if (first_mask_bit) { 4867 vext_set_elem_mask(vd, i, 0); 4868 continue; 4869 } 4870 if (vext_elem_mask(vs2, i)) { 4871 first_mask_bit = true; 4872 if (type == BEFORE_FIRST) { 4873 vext_set_elem_mask(vd, i, 0); 4874 } else { 4875 vext_set_elem_mask(vd, i, 1); 4876 } 4877 } else { 4878 if (type == ONLY_FIRST) { 4879 vext_set_elem_mask(vd, i, 0); 4880 } else { 4881 vext_set_elem_mask(vd, i, 1); 4882 } 4883 } 4884 } 4885 env->vstart = 0; 4886 /* mask destination register are always tail-agnostic */ 4887 /* set tail elements to 1s */ 4888 if (vta_all_1s) { 4889 for (; i < total_elems; i++) { 4890 vext_set_elem_mask(vd, i, 1); 4891 } 4892 } 4893 } 4894 4895 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4896 uint32_t desc) 4897 { 4898 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4899 } 4900 4901 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4902 uint32_t desc) 4903 { 4904 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4905 } 4906 4907 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4908 uint32_t desc) 4909 { 4910 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4911 } 4912 4913 /* Vector Iota Instruction */ 4914 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4915 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4916 uint32_t desc) \ 4917 { \ 4918 uint32_t vm = vext_vm(desc); \ 4919 uint32_t vl = env->vl; \ 4920 uint32_t esz = sizeof(ETYPE); \ 4921 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4922 uint32_t vta = vext_vta(desc); \ 4923 uint32_t vma = vext_vma(desc); \ 4924 uint32_t sum = 0; \ 4925 int i; \ 4926 \ 4927 for (i = env->vstart; i < vl; i++) { \ 4928 if (!vm && !vext_elem_mask(v0, i)) { \ 4929 /* set masked-off elements to 1s */ \ 4930 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4931 continue; \ 4932 } \ 4933 *((ETYPE *)vd + H(i)) = sum; \ 4934 if (vext_elem_mask(vs2, i)) { \ 4935 sum++; \ 4936 } \ 4937 } \ 4938 env->vstart = 0; \ 4939 /* set tail elements to 1s */ \ 4940 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4941 } 4942 4943 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4944 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4945 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4946 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4947 4948 /* Vector Element Index Instruction */ 4949 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4950 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4951 { \ 4952 uint32_t vm = vext_vm(desc); \ 4953 uint32_t vl = env->vl; \ 4954 uint32_t esz = sizeof(ETYPE); \ 4955 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4956 uint32_t vta = vext_vta(desc); \ 4957 uint32_t vma = vext_vma(desc); \ 4958 int i; \ 4959 \ 4960 for (i = env->vstart; i < vl; i++) { \ 4961 if (!vm && !vext_elem_mask(v0, i)) { \ 4962 /* set masked-off elements to 1s */ \ 4963 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4964 continue; \ 4965 } \ 4966 *((ETYPE *)vd + H(i)) = i; \ 4967 } \ 4968 env->vstart = 0; \ 4969 /* set tail elements to 1s */ \ 4970 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4971 } 4972 4973 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4974 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4975 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4976 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4977 4978 /* 4979 *** Vector Permutation Instructions 4980 */ 4981 4982 /* Vector Slide Instructions */ 4983 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4984 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4985 CPURISCVState *env, uint32_t desc) \ 4986 { \ 4987 uint32_t vm = vext_vm(desc); \ 4988 uint32_t vl = env->vl; \ 4989 uint32_t esz = sizeof(ETYPE); \ 4990 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4991 uint32_t vta = vext_vta(desc); \ 4992 uint32_t vma = vext_vma(desc); \ 4993 target_ulong offset = s1, i_min, i; \ 4994 \ 4995 i_min = MAX(env->vstart, offset); \ 4996 for (i = i_min; i < vl; i++) { \ 4997 if (!vm && !vext_elem_mask(v0, i)) { \ 4998 /* set masked-off elements to 1s */ \ 4999 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5000 continue; \ 5001 } \ 5002 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5003 } \ 5004 /* set tail elements to 1s */ \ 5005 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5006 } 5007 5008 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5009 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5010 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5011 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5012 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5013 5014 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5015 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5016 CPURISCVState *env, uint32_t desc) \ 5017 { \ 5018 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5019 uint32_t vm = vext_vm(desc); \ 5020 uint32_t vl = env->vl; \ 5021 uint32_t esz = sizeof(ETYPE); \ 5022 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5023 uint32_t vta = vext_vta(desc); \ 5024 uint32_t vma = vext_vma(desc); \ 5025 target_ulong i_max, i; \ 5026 \ 5027 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5028 for (i = env->vstart; i < i_max; ++i) { \ 5029 if (!vm && !vext_elem_mask(v0, i)) { \ 5030 /* set masked-off elements to 1s */ \ 5031 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5032 continue; \ 5033 } \ 5034 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5035 } \ 5036 \ 5037 for (i = i_max; i < vl; ++i) { \ 5038 if (vm || vext_elem_mask(v0, i)) { \ 5039 *((ETYPE *)vd + H(i)) = 0; \ 5040 } \ 5041 } \ 5042 \ 5043 env->vstart = 0; \ 5044 /* set tail elements to 1s */ \ 5045 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5046 } 5047 5048 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5049 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5050 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5051 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5052 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5053 5054 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5055 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 5056 void *vs2, CPURISCVState *env, uint32_t desc) \ 5057 { \ 5058 typedef uint##BITWIDTH##_t ETYPE; \ 5059 uint32_t vm = vext_vm(desc); \ 5060 uint32_t vl = env->vl; \ 5061 uint32_t esz = sizeof(ETYPE); \ 5062 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5063 uint32_t vta = vext_vta(desc); \ 5064 uint32_t vma = vext_vma(desc); \ 5065 uint32_t i; \ 5066 \ 5067 for (i = env->vstart; i < vl; i++) { \ 5068 if (!vm && !vext_elem_mask(v0, i)) { \ 5069 /* set masked-off elements to 1s */ \ 5070 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5071 continue; \ 5072 } \ 5073 if (i == 0) { \ 5074 *((ETYPE *)vd + H(i)) = s1; \ 5075 } else { \ 5076 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5077 } \ 5078 } \ 5079 env->vstart = 0; \ 5080 /* set tail elements to 1s */ \ 5081 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5082 } 5083 5084 GEN_VEXT_VSLIE1UP(8, H1) 5085 GEN_VEXT_VSLIE1UP(16, H2) 5086 GEN_VEXT_VSLIE1UP(32, H4) 5087 GEN_VEXT_VSLIE1UP(64, H8) 5088 5089 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5090 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5091 CPURISCVState *env, uint32_t desc) \ 5092 { \ 5093 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5094 } 5095 5096 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5097 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5098 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5099 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5100 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5101 5102 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5103 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 5104 void *vs2, CPURISCVState *env, uint32_t desc) \ 5105 { \ 5106 typedef uint##BITWIDTH##_t ETYPE; \ 5107 uint32_t vm = vext_vm(desc); \ 5108 uint32_t vl = env->vl; \ 5109 uint32_t esz = sizeof(ETYPE); \ 5110 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5111 uint32_t vta = vext_vta(desc); \ 5112 uint32_t vma = vext_vma(desc); \ 5113 uint32_t i; \ 5114 \ 5115 for (i = env->vstart; i < vl; i++) { \ 5116 if (!vm && !vext_elem_mask(v0, i)) { \ 5117 /* set masked-off elements to 1s */ \ 5118 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5119 continue; \ 5120 } \ 5121 if (i == vl - 1) { \ 5122 *((ETYPE *)vd + H(i)) = s1; \ 5123 } else { \ 5124 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5125 } \ 5126 } \ 5127 env->vstart = 0; \ 5128 /* set tail elements to 1s */ \ 5129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5130 } 5131 5132 GEN_VEXT_VSLIDE1DOWN(8, H1) 5133 GEN_VEXT_VSLIDE1DOWN(16, H2) 5134 GEN_VEXT_VSLIDE1DOWN(32, H4) 5135 GEN_VEXT_VSLIDE1DOWN(64, H8) 5136 5137 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5138 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5139 CPURISCVState *env, uint32_t desc) \ 5140 { \ 5141 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5142 } 5143 5144 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5145 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5146 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5147 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5148 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5149 5150 /* Vector Floating-Point Slide Instructions */ 5151 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5152 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5153 CPURISCVState *env, uint32_t desc) \ 5154 { \ 5155 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5156 } 5157 5158 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5159 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5160 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5161 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5162 5163 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5164 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5165 CPURISCVState *env, uint32_t desc) \ 5166 { \ 5167 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5168 } 5169 5170 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5171 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5172 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5173 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5174 5175 /* Vector Register Gather Instruction */ 5176 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5177 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5178 CPURISCVState *env, uint32_t desc) \ 5179 { \ 5180 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5181 uint32_t vm = vext_vm(desc); \ 5182 uint32_t vl = env->vl; \ 5183 uint32_t esz = sizeof(TS2); \ 5184 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5185 uint32_t vta = vext_vta(desc); \ 5186 uint32_t vma = vext_vma(desc); \ 5187 uint64_t index; \ 5188 uint32_t i; \ 5189 \ 5190 for (i = env->vstart; i < vl; i++) { \ 5191 if (!vm && !vext_elem_mask(v0, i)) { \ 5192 /* set masked-off elements to 1s */ \ 5193 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5194 continue; \ 5195 } \ 5196 index = *((TS1 *)vs1 + HS1(i)); \ 5197 if (index >= vlmax) { \ 5198 *((TS2 *)vd + HS2(i)) = 0; \ 5199 } else { \ 5200 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5201 } \ 5202 } \ 5203 env->vstart = 0; \ 5204 /* set tail elements to 1s */ \ 5205 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5206 } 5207 5208 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5209 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5210 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5211 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5212 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5213 5214 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5215 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5216 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5217 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5218 5219 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5220 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5221 CPURISCVState *env, uint32_t desc) \ 5222 { \ 5223 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5224 uint32_t vm = vext_vm(desc); \ 5225 uint32_t vl = env->vl; \ 5226 uint32_t esz = sizeof(ETYPE); \ 5227 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5228 uint32_t vta = vext_vta(desc); \ 5229 uint32_t vma = vext_vma(desc); \ 5230 uint64_t index = s1; \ 5231 uint32_t i; \ 5232 \ 5233 for (i = env->vstart; i < vl; i++) { \ 5234 if (!vm && !vext_elem_mask(v0, i)) { \ 5235 /* set masked-off elements to 1s */ \ 5236 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5237 continue; \ 5238 } \ 5239 if (index >= vlmax) { \ 5240 *((ETYPE *)vd + H(i)) = 0; \ 5241 } else { \ 5242 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5243 } \ 5244 } \ 5245 env->vstart = 0; \ 5246 /* set tail elements to 1s */ \ 5247 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5248 } 5249 5250 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5251 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5252 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5253 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5254 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5255 5256 /* Vector Compress Instruction */ 5257 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5258 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5259 CPURISCVState *env, uint32_t desc) \ 5260 { \ 5261 uint32_t vl = env->vl; \ 5262 uint32_t esz = sizeof(ETYPE); \ 5263 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5264 uint32_t vta = vext_vta(desc); \ 5265 uint32_t num = 0, i; \ 5266 \ 5267 for (i = env->vstart; i < vl; i++) { \ 5268 if (!vext_elem_mask(vs1, i)) { \ 5269 continue; \ 5270 } \ 5271 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5272 num++; \ 5273 } \ 5274 env->vstart = 0; \ 5275 /* set tail elements to 1s */ \ 5276 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5277 } 5278 5279 /* Compress into vd elements of vs2 where vs1 is enabled */ 5280 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5281 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5282 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5283 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5284 5285 /* Vector Whole Register Move */ 5286 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5287 { 5288 /* EEW = SEW */ 5289 uint32_t maxsz = simd_maxsz(desc); 5290 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5291 uint32_t startb = env->vstart * sewb; 5292 uint32_t i = startb; 5293 5294 memcpy((uint8_t *)vd + H1(i), 5295 (uint8_t *)vs2 + H1(i), 5296 maxsz - startb); 5297 5298 env->vstart = 0; 5299 } 5300 5301 /* Vector Integer Extension */ 5302 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5303 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5304 CPURISCVState *env, uint32_t desc) \ 5305 { \ 5306 uint32_t vl = env->vl; \ 5307 uint32_t vm = vext_vm(desc); \ 5308 uint32_t esz = sizeof(ETYPE); \ 5309 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5310 uint32_t vta = vext_vta(desc); \ 5311 uint32_t vma = vext_vma(desc); \ 5312 uint32_t i; \ 5313 \ 5314 for (i = env->vstart; i < vl; i++) { \ 5315 if (!vm && !vext_elem_mask(v0, i)) { \ 5316 /* set masked-off elements to 1s */ \ 5317 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5318 continue; \ 5319 } \ 5320 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5321 } \ 5322 env->vstart = 0; \ 5323 /* set tail elements to 1s */ \ 5324 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5325 } 5326 5327 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5328 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5329 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5330 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5331 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5332 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5333 5334 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5335 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5336 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5337 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5338 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5339 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5340