1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 static inline uint32_t vext_vma(uint32_t desc) 131 { 132 return FIELD_EX32(simd_data(desc), VDATA, VMA); 133 } 134 135 static inline uint32_t vext_vta_all_1s(uint32_t desc) 136 { 137 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 138 } 139 140 /* 141 * Get the maximum number of elements can be operated. 142 * 143 * log2_esz: log2 of element size in bytes. 144 */ 145 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 146 { 147 /* 148 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 149 * so vlen in bytes (vlenb) is encoded as maxsz. 150 */ 151 uint32_t vlenb = simd_maxsz(desc); 152 153 /* Return VLMAX */ 154 int scale = vext_lmul(desc) - log2_esz; 155 return scale < 0 ? vlenb >> -scale : vlenb << scale; 156 } 157 158 /* 159 * Get number of total elements, including prestart, body and tail elements. 160 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 161 * are held in the same vector register. 162 */ 163 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 164 uint32_t esz) 165 { 166 uint32_t vlenb = simd_maxsz(desc); 167 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 168 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 169 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 170 return (vlenb << emul) / esz; 171 } 172 173 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 174 { 175 return (addr & env->cur_pmmask) | env->cur_pmbase; 176 } 177 178 /* 179 * This function checks watchpoint before real load operation. 180 * 181 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 182 * In user mode, there is no watchpoint support now. 183 * 184 * It will trigger an exception if there is no mapping in TLB 185 * and page table walk can't fill the TLB entry. Then the guest 186 * software can return here after process the exception or never return. 187 */ 188 static void probe_pages(CPURISCVState *env, target_ulong addr, 189 target_ulong len, uintptr_t ra, 190 MMUAccessType access_type) 191 { 192 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 193 target_ulong curlen = MIN(pagelen, len); 194 195 probe_access(env, adjust_addr(env, addr), curlen, access_type, 196 cpu_mmu_index(env, false), ra); 197 if (len > curlen) { 198 addr += curlen; 199 curlen = len - curlen; 200 probe_access(env, adjust_addr(env, addr), curlen, access_type, 201 cpu_mmu_index(env, false), ra); 202 } 203 } 204 205 /* set agnostic elements to 1s */ 206 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 207 uint32_t tot) 208 { 209 if (is_agnostic == 0) { 210 /* policy undisturbed */ 211 return; 212 } 213 if (tot - cnt == 0) { 214 return ; 215 } 216 memset(base + cnt, -1, tot - cnt); 217 } 218 219 static inline void vext_set_elem_mask(void *v0, int index, 220 uint8_t value) 221 { 222 int idx = index / 64; 223 int pos = index % 64; 224 uint64_t old = ((uint64_t *)v0)[idx]; 225 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 226 } 227 228 /* 229 * Earlier designs (pre-0.9) had a varying number of bits 230 * per mask value (MLEN). In the 0.9 design, MLEN=1. 231 * (Section 4.5) 232 */ 233 static inline int vext_elem_mask(void *v0, int index) 234 { 235 int idx = index / 64; 236 int pos = index % 64; 237 return (((uint64_t *)v0)[idx] >> pos) & 1; 238 } 239 240 /* elements operations for load and store */ 241 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 242 uint32_t idx, void *vd, uintptr_t retaddr); 243 244 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 245 static void NAME(CPURISCVState *env, abi_ptr addr, \ 246 uint32_t idx, void *vd, uintptr_t retaddr)\ 247 { \ 248 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 249 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 250 } \ 251 252 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 253 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 254 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 255 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 256 257 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 258 static void NAME(CPURISCVState *env, abi_ptr addr, \ 259 uint32_t idx, void *vd, uintptr_t retaddr)\ 260 { \ 261 ETYPE data = *((ETYPE *)vd + H(idx)); \ 262 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 263 } 264 265 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 266 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 267 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 268 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 269 270 /* 271 *** stride: access vector element from strided memory 272 */ 273 static void 274 vext_ldst_stride(void *vd, void *v0, target_ulong base, 275 target_ulong stride, CPURISCVState *env, 276 uint32_t desc, uint32_t vm, 277 vext_ldst_elem_fn *ldst_elem, 278 uint32_t log2_esz, uintptr_t ra) 279 { 280 uint32_t i, k; 281 uint32_t nf = vext_nf(desc); 282 uint32_t max_elems = vext_max_elems(desc, log2_esz); 283 uint32_t esz = 1 << log2_esz; 284 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 285 uint32_t vta = vext_vta(desc); 286 uint32_t vma = vext_vma(desc); 287 288 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 289 k = 0; 290 while (k < nf) { 291 if (!vm && !vext_elem_mask(v0, i)) { 292 /* set masked-off elements to 1s */ 293 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 294 (i + k * max_elems + 1) * esz); 295 k++; 296 continue; 297 } 298 target_ulong addr = base + stride * i + (k << log2_esz); 299 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 300 k++; 301 } 302 } 303 env->vstart = 0; 304 /* set tail elements to 1s */ 305 for (k = 0; k < nf; ++k) { 306 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 307 (k * max_elems + max_elems) * esz); 308 } 309 if (nf * max_elems % total_elems != 0) { 310 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 311 uint32_t registers_used = 312 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 313 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 314 registers_used * vlenb); 315 } 316 } 317 318 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 319 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 320 target_ulong stride, CPURISCVState *env, \ 321 uint32_t desc) \ 322 { \ 323 uint32_t vm = vext_vm(desc); \ 324 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 325 ctzl(sizeof(ETYPE)), GETPC()); \ 326 } 327 328 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 329 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 330 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 331 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 332 333 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 334 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 335 target_ulong stride, CPURISCVState *env, \ 336 uint32_t desc) \ 337 { \ 338 uint32_t vm = vext_vm(desc); \ 339 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 340 ctzl(sizeof(ETYPE)), GETPC()); \ 341 } 342 343 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 344 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 345 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 346 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 347 348 /* 349 *** unit-stride: access elements stored contiguously in memory 350 */ 351 352 /* unmasked unit-stride load and store operation*/ 353 static void 354 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 355 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 356 uintptr_t ra) 357 { 358 uint32_t i, k; 359 uint32_t nf = vext_nf(desc); 360 uint32_t max_elems = vext_max_elems(desc, log2_esz); 361 uint32_t esz = 1 << log2_esz; 362 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 363 uint32_t vta = vext_vta(desc); 364 365 /* load bytes from guest memory */ 366 for (i = env->vstart; i < evl; i++, env->vstart++) { 367 k = 0; 368 while (k < nf) { 369 target_ulong addr = base + ((i * nf + k) << log2_esz); 370 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 371 k++; 372 } 373 } 374 env->vstart = 0; 375 /* set tail elements to 1s */ 376 for (k = 0; k < nf; ++k) { 377 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, 378 (k * max_elems + max_elems) * esz); 379 } 380 if (nf * max_elems % total_elems != 0) { 381 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 382 uint32_t registers_used = 383 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 384 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 385 registers_used * vlenb); 386 } 387 } 388 389 /* 390 * masked unit-stride load and store operation will be a special case of stride, 391 * stride = NF * sizeof (MTYPE) 392 */ 393 394 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 395 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 396 CPURISCVState *env, uint32_t desc) \ 397 { \ 398 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 399 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 400 ctzl(sizeof(ETYPE)), GETPC()); \ 401 } \ 402 \ 403 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 404 CPURISCVState *env, uint32_t desc) \ 405 { \ 406 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 407 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 408 } 409 410 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 411 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 412 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 413 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 414 415 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 416 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 417 CPURISCVState *env, uint32_t desc) \ 418 { \ 419 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 420 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 421 ctzl(sizeof(ETYPE)), GETPC()); \ 422 } \ 423 \ 424 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 425 CPURISCVState *env, uint32_t desc) \ 426 { \ 427 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 428 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 429 } 430 431 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 432 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 433 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 434 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 435 436 /* 437 *** unit stride mask load and store, EEW = 1 438 */ 439 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, lde_b, 445 0, evl, GETPC()); 446 } 447 448 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 449 CPURISCVState *env, uint32_t desc) 450 { 451 /* evl = ceil(vl/8) */ 452 uint8_t evl = (env->vl + 7) >> 3; 453 vext_ldst_us(vd, base, env, desc, ste_b, 454 0, evl, GETPC()); 455 } 456 457 /* 458 *** index: access vector element from indexed memory 459 */ 460 typedef target_ulong vext_get_index_addr(target_ulong base, 461 uint32_t idx, void *vs2); 462 463 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 464 static target_ulong NAME(target_ulong base, \ 465 uint32_t idx, void *vs2) \ 466 { \ 467 return (base + *((ETYPE *)vs2 + H(idx))); \ 468 } 469 470 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 471 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 472 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 473 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 474 475 static inline void 476 vext_ldst_index(void *vd, void *v0, target_ulong base, 477 void *vs2, CPURISCVState *env, uint32_t desc, 478 vext_get_index_addr get_index_addr, 479 vext_ldst_elem_fn *ldst_elem, 480 uint32_t log2_esz, uintptr_t ra) 481 { 482 uint32_t i, k; 483 uint32_t nf = vext_nf(desc); 484 uint32_t vm = vext_vm(desc); 485 uint32_t max_elems = vext_max_elems(desc, log2_esz); 486 uint32_t esz = 1 << log2_esz; 487 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 488 uint32_t vta = vext_vta(desc); 489 uint32_t vma = vext_vma(desc); 490 491 /* load bytes from guest memory */ 492 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 493 k = 0; 494 while (k < nf) { 495 if (!vm && !vext_elem_mask(v0, i)) { 496 /* set masked-off elements to 1s */ 497 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 498 (i + k * max_elems + 1) * esz); 499 k++; 500 continue; 501 } 502 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 503 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 504 k++; 505 } 506 } 507 env->vstart = 0; 508 /* set tail elements to 1s */ 509 for (k = 0; k < nf; ++k) { 510 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 511 (k * max_elems + max_elems) * esz); 512 } 513 if (nf * max_elems % total_elems != 0) { 514 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 515 uint32_t registers_used = 516 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 517 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 518 registers_used * vlenb); 519 } 520 } 521 522 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 523 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 524 void *vs2, CPURISCVState *env, uint32_t desc) \ 525 { \ 526 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 527 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 528 } 529 530 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 531 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 532 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 533 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 534 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 535 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 536 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 537 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 538 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 539 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 540 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 541 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 542 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 543 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 544 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 545 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 546 547 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 548 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 549 void *vs2, CPURISCVState *env, uint32_t desc) \ 550 { \ 551 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 552 STORE_FN, ctzl(sizeof(ETYPE)), \ 553 GETPC()); \ 554 } 555 556 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 557 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 558 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 559 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 560 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 561 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 562 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 563 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 564 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 565 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 566 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 567 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 568 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 569 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 570 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 571 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 572 573 /* 574 *** unit-stride fault-only-fisrt load instructions 575 */ 576 static inline void 577 vext_ldff(void *vd, void *v0, target_ulong base, 578 CPURISCVState *env, uint32_t desc, 579 vext_ldst_elem_fn *ldst_elem, 580 uint32_t log2_esz, uintptr_t ra) 581 { 582 void *host; 583 uint32_t i, k, vl = 0; 584 uint32_t nf = vext_nf(desc); 585 uint32_t vm = vext_vm(desc); 586 uint32_t max_elems = vext_max_elems(desc, log2_esz); 587 uint32_t esz = 1 << log2_esz; 588 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 589 uint32_t vta = vext_vta(desc); 590 uint32_t vma = vext_vma(desc); 591 target_ulong addr, offset, remain; 592 593 /* probe every access*/ 594 for (i = env->vstart; i < env->vl; i++) { 595 if (!vm && !vext_elem_mask(v0, i)) { 596 continue; 597 } 598 addr = adjust_addr(env, base + i * (nf << log2_esz)); 599 if (i == 0) { 600 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 601 } else { 602 /* if it triggers an exception, no need to check watchpoint */ 603 remain = nf << log2_esz; 604 while (remain > 0) { 605 offset = -(addr | TARGET_PAGE_MASK); 606 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 607 cpu_mmu_index(env, false)); 608 if (host) { 609 #ifdef CONFIG_USER_ONLY 610 if (page_check_range(addr, offset, PAGE_READ) < 0) { 611 vl = i; 612 goto ProbeSuccess; 613 } 614 #else 615 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 616 #endif 617 } else { 618 vl = i; 619 goto ProbeSuccess; 620 } 621 if (remain <= offset) { 622 break; 623 } 624 remain -= offset; 625 addr = adjust_addr(env, addr + offset); 626 } 627 } 628 } 629 ProbeSuccess: 630 /* load bytes from guest memory */ 631 if (vl != 0) { 632 env->vl = vl; 633 } 634 for (i = env->vstart; i < env->vl; i++) { 635 k = 0; 636 while (k < nf) { 637 if (!vm && !vext_elem_mask(v0, i)) { 638 /* set masked-off elements to 1s */ 639 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 640 (i + k * max_elems + 1) * esz); 641 k++; 642 continue; 643 } 644 target_ulong addr = base + ((i * nf + k) << log2_esz); 645 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 646 k++; 647 } 648 } 649 env->vstart = 0; 650 /* set tail elements to 1s */ 651 for (k = 0; k < nf; ++k) { 652 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 653 (k * max_elems + max_elems) * esz); 654 } 655 if (nf * max_elems % total_elems != 0) { 656 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 657 uint32_t registers_used = 658 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 659 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 660 registers_used * vlenb); 661 } 662 } 663 664 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 665 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 666 CPURISCVState *env, uint32_t desc) \ 667 { \ 668 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 669 ctzl(sizeof(ETYPE)), GETPC()); \ 670 } 671 672 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 673 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 674 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 675 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 676 677 #define DO_SWAP(N, M) (M) 678 #define DO_AND(N, M) (N & M) 679 #define DO_XOR(N, M) (N ^ M) 680 #define DO_OR(N, M) (N | M) 681 #define DO_ADD(N, M) (N + M) 682 683 /* Signed min/max */ 684 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 685 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 686 687 /* Unsigned min/max */ 688 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 689 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 690 691 /* 692 *** load and store whole register instructions 693 */ 694 static void 695 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 696 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 697 { 698 uint32_t i, k, off, pos; 699 uint32_t nf = vext_nf(desc); 700 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 701 uint32_t max_elems = vlenb >> log2_esz; 702 703 k = env->vstart / max_elems; 704 off = env->vstart % max_elems; 705 706 if (off) { 707 /* load/store rest of elements of current segment pointed by vstart */ 708 for (pos = off; pos < max_elems; pos++, env->vstart++) { 709 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 710 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 711 } 712 k++; 713 } 714 715 /* load/store elements for rest of segments */ 716 for (; k < nf; k++) { 717 for (i = 0; i < max_elems; i++, env->vstart++) { 718 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 719 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 720 } 721 } 722 723 env->vstart = 0; 724 } 725 726 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 727 void HELPER(NAME)(void *vd, target_ulong base, \ 728 CPURISCVState *env, uint32_t desc) \ 729 { \ 730 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 731 ctzl(sizeof(ETYPE)), GETPC()); \ 732 } 733 734 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 735 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 736 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 737 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 738 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 739 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 740 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 741 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 742 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 743 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 744 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 745 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 746 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 747 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 748 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 749 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 750 751 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 752 void HELPER(NAME)(void *vd, target_ulong base, \ 753 CPURISCVState *env, uint32_t desc) \ 754 { \ 755 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 756 ctzl(sizeof(ETYPE)), GETPC()); \ 757 } 758 759 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 760 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 761 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 762 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 763 764 /* 765 *** Vector Integer Arithmetic Instructions 766 */ 767 768 /* expand macro args before macro */ 769 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 770 771 /* (TD, T1, T2, TX1, TX2) */ 772 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 773 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 774 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 775 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 776 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 777 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 778 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 779 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 780 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 781 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 782 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 783 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 784 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 785 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 786 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 787 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 788 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 789 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 790 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 791 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 792 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 793 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 794 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 795 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 796 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 797 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 798 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 799 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 800 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 801 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 802 803 /* operation of two vector elements */ 804 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 805 806 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 807 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 808 { \ 809 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 810 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 811 *((TD *)vd + HD(i)) = OP(s2, s1); \ 812 } 813 #define DO_SUB(N, M) (N - M) 814 #define DO_RSUB(N, M) (M - N) 815 816 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 817 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 818 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 819 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 820 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 821 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 822 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 823 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 824 825 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 826 CPURISCVState *env, uint32_t desc, 827 opivv2_fn *fn, uint32_t esz) 828 { 829 uint32_t vm = vext_vm(desc); 830 uint32_t vl = env->vl; 831 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 832 uint32_t vta = vext_vta(desc); 833 uint32_t vma = vext_vma(desc); 834 uint32_t i; 835 836 for (i = env->vstart; i < vl; i++) { 837 if (!vm && !vext_elem_mask(v0, i)) { 838 /* set masked-off elements to 1s */ 839 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 840 continue; 841 } 842 fn(vd, vs1, vs2, i); 843 } 844 env->vstart = 0; 845 /* set tail elements to 1s */ 846 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 847 } 848 849 /* generate the helpers for OPIVV */ 850 #define GEN_VEXT_VV(NAME, ESZ) \ 851 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 852 void *vs2, CPURISCVState *env, \ 853 uint32_t desc) \ 854 { \ 855 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 856 do_##NAME, ESZ); \ 857 } 858 859 GEN_VEXT_VV(vadd_vv_b, 1) 860 GEN_VEXT_VV(vadd_vv_h, 2) 861 GEN_VEXT_VV(vadd_vv_w, 4) 862 GEN_VEXT_VV(vadd_vv_d, 8) 863 GEN_VEXT_VV(vsub_vv_b, 1) 864 GEN_VEXT_VV(vsub_vv_h, 2) 865 GEN_VEXT_VV(vsub_vv_w, 4) 866 GEN_VEXT_VV(vsub_vv_d, 8) 867 868 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 869 870 /* 871 * (T1)s1 gives the real operator type. 872 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 873 */ 874 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 875 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 876 { \ 877 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 878 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 879 } 880 881 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 882 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 883 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 884 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 885 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 886 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 887 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 888 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 889 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 890 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 891 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 892 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 893 894 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 895 CPURISCVState *env, uint32_t desc, 896 opivx2_fn fn, uint32_t esz) 897 { 898 uint32_t vm = vext_vm(desc); 899 uint32_t vl = env->vl; 900 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 901 uint32_t vta = vext_vta(desc); 902 uint32_t vma = vext_vma(desc); 903 uint32_t i; 904 905 for (i = env->vstart; i < vl; i++) { 906 if (!vm && !vext_elem_mask(v0, i)) { 907 /* set masked-off elements to 1s */ 908 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 909 continue; 910 } 911 fn(vd, s1, vs2, i); 912 } 913 env->vstart = 0; 914 /* set tail elements to 1s */ 915 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 916 } 917 918 /* generate the helpers for OPIVX */ 919 #define GEN_VEXT_VX(NAME, ESZ) \ 920 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 921 void *vs2, CPURISCVState *env, \ 922 uint32_t desc) \ 923 { \ 924 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 925 do_##NAME, ESZ); \ 926 } 927 928 GEN_VEXT_VX(vadd_vx_b, 1) 929 GEN_VEXT_VX(vadd_vx_h, 2) 930 GEN_VEXT_VX(vadd_vx_w, 4) 931 GEN_VEXT_VX(vadd_vx_d, 8) 932 GEN_VEXT_VX(vsub_vx_b, 1) 933 GEN_VEXT_VX(vsub_vx_h, 2) 934 GEN_VEXT_VX(vsub_vx_w, 4) 935 GEN_VEXT_VX(vsub_vx_d, 8) 936 GEN_VEXT_VX(vrsub_vx_b, 1) 937 GEN_VEXT_VX(vrsub_vx_h, 2) 938 GEN_VEXT_VX(vrsub_vx_w, 4) 939 GEN_VEXT_VX(vrsub_vx_d, 8) 940 941 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 942 { 943 intptr_t oprsz = simd_oprsz(desc); 944 intptr_t i; 945 946 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 947 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 948 } 949 } 950 951 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 952 { 953 intptr_t oprsz = simd_oprsz(desc); 954 intptr_t i; 955 956 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 957 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 958 } 959 } 960 961 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 962 { 963 intptr_t oprsz = simd_oprsz(desc); 964 intptr_t i; 965 966 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 967 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 968 } 969 } 970 971 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 972 { 973 intptr_t oprsz = simd_oprsz(desc); 974 intptr_t i; 975 976 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 977 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 978 } 979 } 980 981 /* Vector Widening Integer Add/Subtract */ 982 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 983 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 984 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 985 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 986 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 987 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 988 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 989 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 990 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 991 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 992 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 993 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 994 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 995 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 996 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 997 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 998 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 999 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1000 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1001 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1002 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1003 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1004 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1005 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1006 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1007 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1008 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1009 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1010 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1011 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1012 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1013 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1014 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1015 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1016 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1017 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1018 GEN_VEXT_VV(vwaddu_vv_b, 2) 1019 GEN_VEXT_VV(vwaddu_vv_h, 4) 1020 GEN_VEXT_VV(vwaddu_vv_w, 8) 1021 GEN_VEXT_VV(vwsubu_vv_b, 2) 1022 GEN_VEXT_VV(vwsubu_vv_h, 4) 1023 GEN_VEXT_VV(vwsubu_vv_w, 8) 1024 GEN_VEXT_VV(vwadd_vv_b, 2) 1025 GEN_VEXT_VV(vwadd_vv_h, 4) 1026 GEN_VEXT_VV(vwadd_vv_w, 8) 1027 GEN_VEXT_VV(vwsub_vv_b, 2) 1028 GEN_VEXT_VV(vwsub_vv_h, 4) 1029 GEN_VEXT_VV(vwsub_vv_w, 8) 1030 GEN_VEXT_VV(vwaddu_wv_b, 2) 1031 GEN_VEXT_VV(vwaddu_wv_h, 4) 1032 GEN_VEXT_VV(vwaddu_wv_w, 8) 1033 GEN_VEXT_VV(vwsubu_wv_b, 2) 1034 GEN_VEXT_VV(vwsubu_wv_h, 4) 1035 GEN_VEXT_VV(vwsubu_wv_w, 8) 1036 GEN_VEXT_VV(vwadd_wv_b, 2) 1037 GEN_VEXT_VV(vwadd_wv_h, 4) 1038 GEN_VEXT_VV(vwadd_wv_w, 8) 1039 GEN_VEXT_VV(vwsub_wv_b, 2) 1040 GEN_VEXT_VV(vwsub_wv_h, 4) 1041 GEN_VEXT_VV(vwsub_wv_w, 8) 1042 1043 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1044 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1045 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1046 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1047 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1048 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1049 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1050 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1051 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1052 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1053 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1054 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1055 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1056 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1057 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1058 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1059 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1060 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1061 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1062 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1063 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1064 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1065 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1066 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1067 GEN_VEXT_VX(vwaddu_vx_b, 2) 1068 GEN_VEXT_VX(vwaddu_vx_h, 4) 1069 GEN_VEXT_VX(vwaddu_vx_w, 8) 1070 GEN_VEXT_VX(vwsubu_vx_b, 2) 1071 GEN_VEXT_VX(vwsubu_vx_h, 4) 1072 GEN_VEXT_VX(vwsubu_vx_w, 8) 1073 GEN_VEXT_VX(vwadd_vx_b, 2) 1074 GEN_VEXT_VX(vwadd_vx_h, 4) 1075 GEN_VEXT_VX(vwadd_vx_w, 8) 1076 GEN_VEXT_VX(vwsub_vx_b, 2) 1077 GEN_VEXT_VX(vwsub_vx_h, 4) 1078 GEN_VEXT_VX(vwsub_vx_w, 8) 1079 GEN_VEXT_VX(vwaddu_wx_b, 2) 1080 GEN_VEXT_VX(vwaddu_wx_h, 4) 1081 GEN_VEXT_VX(vwaddu_wx_w, 8) 1082 GEN_VEXT_VX(vwsubu_wx_b, 2) 1083 GEN_VEXT_VX(vwsubu_wx_h, 4) 1084 GEN_VEXT_VX(vwsubu_wx_w, 8) 1085 GEN_VEXT_VX(vwadd_wx_b, 2) 1086 GEN_VEXT_VX(vwadd_wx_h, 4) 1087 GEN_VEXT_VX(vwadd_wx_w, 8) 1088 GEN_VEXT_VX(vwsub_wx_b, 2) 1089 GEN_VEXT_VX(vwsub_wx_h, 4) 1090 GEN_VEXT_VX(vwsub_wx_w, 8) 1091 1092 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1093 #define DO_VADC(N, M, C) (N + M + C) 1094 #define DO_VSBC(N, M, C) (N - M - C) 1095 1096 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1097 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1098 CPURISCVState *env, uint32_t desc) \ 1099 { \ 1100 uint32_t vl = env->vl; \ 1101 uint32_t esz = sizeof(ETYPE); \ 1102 uint32_t total_elems = \ 1103 vext_get_total_elems(env, desc, esz); \ 1104 uint32_t vta = vext_vta(desc); \ 1105 uint32_t i; \ 1106 \ 1107 for (i = env->vstart; i < vl; i++) { \ 1108 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1109 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1110 ETYPE carry = vext_elem_mask(v0, i); \ 1111 \ 1112 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1113 } \ 1114 env->vstart = 0; \ 1115 /* set tail elements to 1s */ \ 1116 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1117 } 1118 1119 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1120 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1121 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1122 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1123 1124 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1125 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1126 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1127 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1128 1129 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1130 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1131 CPURISCVState *env, uint32_t desc) \ 1132 { \ 1133 uint32_t vl = env->vl; \ 1134 uint32_t esz = sizeof(ETYPE); \ 1135 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1136 uint32_t vta = vext_vta(desc); \ 1137 uint32_t i; \ 1138 \ 1139 for (i = env->vstart; i < vl; i++) { \ 1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1141 ETYPE carry = vext_elem_mask(v0, i); \ 1142 \ 1143 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1144 } \ 1145 env->vstart = 0; \ 1146 /* set tail elements to 1s */ \ 1147 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1148 } 1149 1150 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1151 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1152 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1153 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1154 1155 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1156 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1157 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1158 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1159 1160 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1161 (__typeof(N))(N + M) < N) 1162 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1163 1164 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1165 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1166 CPURISCVState *env, uint32_t desc) \ 1167 { \ 1168 uint32_t vl = env->vl; \ 1169 uint32_t vm = vext_vm(desc); \ 1170 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1171 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1172 uint32_t i; \ 1173 \ 1174 for (i = env->vstart; i < vl; i++) { \ 1175 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1177 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1178 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1179 } \ 1180 env->vstart = 0; \ 1181 /* mask destination register are always tail-agnostic */ \ 1182 /* set tail elements to 1s */ \ 1183 if (vta_all_1s) { \ 1184 for (; i < total_elems; i++) { \ 1185 vext_set_elem_mask(vd, i, 1); \ 1186 } \ 1187 } \ 1188 } 1189 1190 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1191 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1192 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1193 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1194 1195 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1196 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1197 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1198 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1199 1200 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1201 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1202 void *vs2, CPURISCVState *env, uint32_t desc) \ 1203 { \ 1204 uint32_t vl = env->vl; \ 1205 uint32_t vm = vext_vm(desc); \ 1206 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1207 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1208 uint32_t i; \ 1209 \ 1210 for (i = env->vstart; i < vl; i++) { \ 1211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1212 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1213 vext_set_elem_mask(vd, i, \ 1214 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1215 } \ 1216 env->vstart = 0; \ 1217 /* mask destination register are always tail-agnostic */ \ 1218 /* set tail elements to 1s */ \ 1219 if (vta_all_1s) { \ 1220 for (; i < total_elems; i++) { \ 1221 vext_set_elem_mask(vd, i, 1); \ 1222 } \ 1223 } \ 1224 } 1225 1226 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1227 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1228 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1229 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1230 1231 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1232 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1233 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1234 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1235 1236 /* Vector Bitwise Logical Instructions */ 1237 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1238 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1239 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1240 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1241 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1242 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1243 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1244 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1245 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1246 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1247 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1248 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1249 GEN_VEXT_VV(vand_vv_b, 1) 1250 GEN_VEXT_VV(vand_vv_h, 2) 1251 GEN_VEXT_VV(vand_vv_w, 4) 1252 GEN_VEXT_VV(vand_vv_d, 8) 1253 GEN_VEXT_VV(vor_vv_b, 1) 1254 GEN_VEXT_VV(vor_vv_h, 2) 1255 GEN_VEXT_VV(vor_vv_w, 4) 1256 GEN_VEXT_VV(vor_vv_d, 8) 1257 GEN_VEXT_VV(vxor_vv_b, 1) 1258 GEN_VEXT_VV(vxor_vv_h, 2) 1259 GEN_VEXT_VV(vxor_vv_w, 4) 1260 GEN_VEXT_VV(vxor_vv_d, 8) 1261 1262 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1263 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1264 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1265 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1266 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1267 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1268 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1269 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1270 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1271 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1272 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1273 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1274 GEN_VEXT_VX(vand_vx_b, 1) 1275 GEN_VEXT_VX(vand_vx_h, 2) 1276 GEN_VEXT_VX(vand_vx_w, 4) 1277 GEN_VEXT_VX(vand_vx_d, 8) 1278 GEN_VEXT_VX(vor_vx_b, 1) 1279 GEN_VEXT_VX(vor_vx_h, 2) 1280 GEN_VEXT_VX(vor_vx_w, 4) 1281 GEN_VEXT_VX(vor_vx_d, 8) 1282 GEN_VEXT_VX(vxor_vx_b, 1) 1283 GEN_VEXT_VX(vxor_vx_h, 2) 1284 GEN_VEXT_VX(vxor_vx_w, 4) 1285 GEN_VEXT_VX(vxor_vx_d, 8) 1286 1287 /* Vector Single-Width Bit Shift Instructions */ 1288 #define DO_SLL(N, M) (N << (M)) 1289 #define DO_SRL(N, M) (N >> (M)) 1290 1291 /* generate the helpers for shift instructions with two vector operators */ 1292 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1293 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1294 void *vs2, CPURISCVState *env, uint32_t desc) \ 1295 { \ 1296 uint32_t vm = vext_vm(desc); \ 1297 uint32_t vl = env->vl; \ 1298 uint32_t esz = sizeof(TS1); \ 1299 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1300 uint32_t vta = vext_vta(desc); \ 1301 uint32_t vma = vext_vma(desc); \ 1302 uint32_t i; \ 1303 \ 1304 for (i = env->vstart; i < vl; i++) { \ 1305 if (!vm && !vext_elem_mask(v0, i)) { \ 1306 /* set masked-off elements to 1s */ \ 1307 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1308 continue; \ 1309 } \ 1310 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1311 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1312 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1313 } \ 1314 env->vstart = 0; \ 1315 /* set tail elements to 1s */ \ 1316 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1317 } 1318 1319 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1320 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1321 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1322 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1323 1324 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1325 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1326 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1327 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1328 1329 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1330 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1331 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1332 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1333 1334 /* generate the helpers for shift instructions with one vector and one scalar */ 1335 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1336 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1337 void *vs2, CPURISCVState *env, uint32_t desc) \ 1338 { \ 1339 uint32_t vm = vext_vm(desc); \ 1340 uint32_t vl = env->vl; \ 1341 uint32_t esz = sizeof(TD); \ 1342 uint32_t total_elems = \ 1343 vext_get_total_elems(env, desc, esz); \ 1344 uint32_t vta = vext_vta(desc); \ 1345 uint32_t vma = vext_vma(desc); \ 1346 uint32_t i; \ 1347 \ 1348 for (i = env->vstart; i < vl; i++) { \ 1349 if (!vm && !vext_elem_mask(v0, i)) { \ 1350 /* set masked-off elements to 1s */ \ 1351 vext_set_elems_1s(vd, vma, i * esz, \ 1352 (i + 1) * esz); \ 1353 continue; \ 1354 } \ 1355 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1356 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1357 } \ 1358 env->vstart = 0; \ 1359 /* set tail elements to 1s */ \ 1360 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1361 } 1362 1363 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1364 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1365 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1366 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1367 1368 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1369 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1370 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1371 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1372 1373 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1374 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1375 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1376 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1377 1378 /* Vector Narrowing Integer Right Shift Instructions */ 1379 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1380 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1381 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1382 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1383 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1384 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1385 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1386 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1387 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1388 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1389 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1390 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1391 1392 /* Vector Integer Comparison Instructions */ 1393 #define DO_MSEQ(N, M) (N == M) 1394 #define DO_MSNE(N, M) (N != M) 1395 #define DO_MSLT(N, M) (N < M) 1396 #define DO_MSLE(N, M) (N <= M) 1397 #define DO_MSGT(N, M) (N > M) 1398 1399 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1400 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1401 CPURISCVState *env, uint32_t desc) \ 1402 { \ 1403 uint32_t vm = vext_vm(desc); \ 1404 uint32_t vl = env->vl; \ 1405 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1406 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1407 uint32_t i; \ 1408 \ 1409 for (i = env->vstart; i < vl; i++) { \ 1410 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1411 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1412 if (!vm && !vext_elem_mask(v0, i)) { \ 1413 continue; \ 1414 } \ 1415 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1416 } \ 1417 env->vstart = 0; \ 1418 /* mask destination register are always tail-agnostic */ \ 1419 /* set tail elements to 1s */ \ 1420 if (vta_all_1s) { \ 1421 for (; i < total_elems; i++) { \ 1422 vext_set_elem_mask(vd, i, 1); \ 1423 } \ 1424 } \ 1425 } 1426 1427 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1428 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1429 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1430 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1431 1432 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1433 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1434 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1435 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1436 1437 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1438 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1439 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1440 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1441 1442 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1443 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1444 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1445 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1446 1447 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1448 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1449 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1450 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1451 1452 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1453 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1454 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1455 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1456 1457 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1458 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1459 CPURISCVState *env, uint32_t desc) \ 1460 { \ 1461 uint32_t vm = vext_vm(desc); \ 1462 uint32_t vl = env->vl; \ 1463 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1464 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1465 uint32_t i; \ 1466 \ 1467 for (i = env->vstart; i < vl; i++) { \ 1468 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1469 if (!vm && !vext_elem_mask(v0, i)) { \ 1470 continue; \ 1471 } \ 1472 vext_set_elem_mask(vd, i, \ 1473 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1474 } \ 1475 env->vstart = 0; \ 1476 /* mask destination register are always tail-agnostic */ \ 1477 /* set tail elements to 1s */ \ 1478 if (vta_all_1s) { \ 1479 for (; i < total_elems; i++) { \ 1480 vext_set_elem_mask(vd, i, 1); \ 1481 } \ 1482 } \ 1483 } 1484 1485 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1486 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1487 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1488 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1489 1490 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1491 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1492 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1493 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1494 1495 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1496 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1497 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1498 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1499 1500 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1501 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1502 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1503 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1504 1505 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1506 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1507 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1508 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1509 1510 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1511 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1512 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1513 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1514 1515 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1516 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1517 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1518 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1519 1520 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1521 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1522 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1523 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1524 1525 /* Vector Integer Min/Max Instructions */ 1526 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1527 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1528 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1529 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1530 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1531 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1532 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1533 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1534 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1535 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1536 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1537 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1538 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1539 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1540 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1541 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1542 GEN_VEXT_VV(vminu_vv_b, 1) 1543 GEN_VEXT_VV(vminu_vv_h, 2) 1544 GEN_VEXT_VV(vminu_vv_w, 4) 1545 GEN_VEXT_VV(vminu_vv_d, 8) 1546 GEN_VEXT_VV(vmin_vv_b, 1) 1547 GEN_VEXT_VV(vmin_vv_h, 2) 1548 GEN_VEXT_VV(vmin_vv_w, 4) 1549 GEN_VEXT_VV(vmin_vv_d, 8) 1550 GEN_VEXT_VV(vmaxu_vv_b, 1) 1551 GEN_VEXT_VV(vmaxu_vv_h, 2) 1552 GEN_VEXT_VV(vmaxu_vv_w, 4) 1553 GEN_VEXT_VV(vmaxu_vv_d, 8) 1554 GEN_VEXT_VV(vmax_vv_b, 1) 1555 GEN_VEXT_VV(vmax_vv_h, 2) 1556 GEN_VEXT_VV(vmax_vv_w, 4) 1557 GEN_VEXT_VV(vmax_vv_d, 8) 1558 1559 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1560 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1561 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1562 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1563 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1564 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1565 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1566 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1567 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1568 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1569 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1570 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1571 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1572 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1573 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1574 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1575 GEN_VEXT_VX(vminu_vx_b, 1) 1576 GEN_VEXT_VX(vminu_vx_h, 2) 1577 GEN_VEXT_VX(vminu_vx_w, 4) 1578 GEN_VEXT_VX(vminu_vx_d, 8) 1579 GEN_VEXT_VX(vmin_vx_b, 1) 1580 GEN_VEXT_VX(vmin_vx_h, 2) 1581 GEN_VEXT_VX(vmin_vx_w, 4) 1582 GEN_VEXT_VX(vmin_vx_d, 8) 1583 GEN_VEXT_VX(vmaxu_vx_b, 1) 1584 GEN_VEXT_VX(vmaxu_vx_h, 2) 1585 GEN_VEXT_VX(vmaxu_vx_w, 4) 1586 GEN_VEXT_VX(vmaxu_vx_d, 8) 1587 GEN_VEXT_VX(vmax_vx_b, 1) 1588 GEN_VEXT_VX(vmax_vx_h, 2) 1589 GEN_VEXT_VX(vmax_vx_w, 4) 1590 GEN_VEXT_VX(vmax_vx_d, 8) 1591 1592 /* Vector Single-Width Integer Multiply Instructions */ 1593 #define DO_MUL(N, M) (N * M) 1594 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1595 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1596 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1597 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1598 GEN_VEXT_VV(vmul_vv_b, 1) 1599 GEN_VEXT_VV(vmul_vv_h, 2) 1600 GEN_VEXT_VV(vmul_vv_w, 4) 1601 GEN_VEXT_VV(vmul_vv_d, 8) 1602 1603 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1604 { 1605 return (int16_t)s2 * (int16_t)s1 >> 8; 1606 } 1607 1608 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1609 { 1610 return (int32_t)s2 * (int32_t)s1 >> 16; 1611 } 1612 1613 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1614 { 1615 return (int64_t)s2 * (int64_t)s1 >> 32; 1616 } 1617 1618 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1619 { 1620 uint64_t hi_64, lo_64; 1621 1622 muls64(&lo_64, &hi_64, s1, s2); 1623 return hi_64; 1624 } 1625 1626 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1627 { 1628 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1629 } 1630 1631 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1632 { 1633 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1634 } 1635 1636 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1637 { 1638 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1639 } 1640 1641 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1642 { 1643 uint64_t hi_64, lo_64; 1644 1645 mulu64(&lo_64, &hi_64, s2, s1); 1646 return hi_64; 1647 } 1648 1649 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1650 { 1651 return (int16_t)s2 * (uint16_t)s1 >> 8; 1652 } 1653 1654 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1655 { 1656 return (int32_t)s2 * (uint32_t)s1 >> 16; 1657 } 1658 1659 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1660 { 1661 return (int64_t)s2 * (uint64_t)s1 >> 32; 1662 } 1663 1664 /* 1665 * Let A = signed operand, 1666 * B = unsigned operand 1667 * P = mulu64(A, B), unsigned product 1668 * 1669 * LET X = 2 ** 64 - A, 2's complement of A 1670 * SP = signed product 1671 * THEN 1672 * IF A < 0 1673 * SP = -X * B 1674 * = -(2 ** 64 - A) * B 1675 * = A * B - 2 ** 64 * B 1676 * = P - 2 ** 64 * B 1677 * ELSE 1678 * SP = P 1679 * THEN 1680 * HI_P -= (A < 0 ? B : 0) 1681 */ 1682 1683 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1684 { 1685 uint64_t hi_64, lo_64; 1686 1687 mulu64(&lo_64, &hi_64, s2, s1); 1688 1689 hi_64 -= s2 < 0 ? s1 : 0; 1690 return hi_64; 1691 } 1692 1693 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1694 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1695 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1696 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1697 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1698 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1699 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1700 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1701 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1702 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1703 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1704 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1705 GEN_VEXT_VV(vmulh_vv_b, 1) 1706 GEN_VEXT_VV(vmulh_vv_h, 2) 1707 GEN_VEXT_VV(vmulh_vv_w, 4) 1708 GEN_VEXT_VV(vmulh_vv_d, 8) 1709 GEN_VEXT_VV(vmulhu_vv_b, 1) 1710 GEN_VEXT_VV(vmulhu_vv_h, 2) 1711 GEN_VEXT_VV(vmulhu_vv_w, 4) 1712 GEN_VEXT_VV(vmulhu_vv_d, 8) 1713 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1714 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1715 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1716 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1717 1718 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1719 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1720 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1721 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1722 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1723 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1724 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1725 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1726 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1727 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1728 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1729 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1730 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1731 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1732 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1733 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1734 GEN_VEXT_VX(vmul_vx_b, 1) 1735 GEN_VEXT_VX(vmul_vx_h, 2) 1736 GEN_VEXT_VX(vmul_vx_w, 4) 1737 GEN_VEXT_VX(vmul_vx_d, 8) 1738 GEN_VEXT_VX(vmulh_vx_b, 1) 1739 GEN_VEXT_VX(vmulh_vx_h, 2) 1740 GEN_VEXT_VX(vmulh_vx_w, 4) 1741 GEN_VEXT_VX(vmulh_vx_d, 8) 1742 GEN_VEXT_VX(vmulhu_vx_b, 1) 1743 GEN_VEXT_VX(vmulhu_vx_h, 2) 1744 GEN_VEXT_VX(vmulhu_vx_w, 4) 1745 GEN_VEXT_VX(vmulhu_vx_d, 8) 1746 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1747 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1748 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1749 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1750 1751 /* Vector Integer Divide Instructions */ 1752 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1753 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1754 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1755 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1756 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1757 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1758 1759 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1760 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1761 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1762 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1763 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1764 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1765 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1766 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1767 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1768 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1769 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1770 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1771 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1772 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1773 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1774 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1775 GEN_VEXT_VV(vdivu_vv_b, 1) 1776 GEN_VEXT_VV(vdivu_vv_h, 2) 1777 GEN_VEXT_VV(vdivu_vv_w, 4) 1778 GEN_VEXT_VV(vdivu_vv_d, 8) 1779 GEN_VEXT_VV(vdiv_vv_b, 1) 1780 GEN_VEXT_VV(vdiv_vv_h, 2) 1781 GEN_VEXT_VV(vdiv_vv_w, 4) 1782 GEN_VEXT_VV(vdiv_vv_d, 8) 1783 GEN_VEXT_VV(vremu_vv_b, 1) 1784 GEN_VEXT_VV(vremu_vv_h, 2) 1785 GEN_VEXT_VV(vremu_vv_w, 4) 1786 GEN_VEXT_VV(vremu_vv_d, 8) 1787 GEN_VEXT_VV(vrem_vv_b, 1) 1788 GEN_VEXT_VV(vrem_vv_h, 2) 1789 GEN_VEXT_VV(vrem_vv_w, 4) 1790 GEN_VEXT_VV(vrem_vv_d, 8) 1791 1792 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1793 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1794 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1795 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1796 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1797 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1798 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1799 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1800 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1801 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1802 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1803 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1804 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1805 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1806 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1807 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1808 GEN_VEXT_VX(vdivu_vx_b, 1) 1809 GEN_VEXT_VX(vdivu_vx_h, 2) 1810 GEN_VEXT_VX(vdivu_vx_w, 4) 1811 GEN_VEXT_VX(vdivu_vx_d, 8) 1812 GEN_VEXT_VX(vdiv_vx_b, 1) 1813 GEN_VEXT_VX(vdiv_vx_h, 2) 1814 GEN_VEXT_VX(vdiv_vx_w, 4) 1815 GEN_VEXT_VX(vdiv_vx_d, 8) 1816 GEN_VEXT_VX(vremu_vx_b, 1) 1817 GEN_VEXT_VX(vremu_vx_h, 2) 1818 GEN_VEXT_VX(vremu_vx_w, 4) 1819 GEN_VEXT_VX(vremu_vx_d, 8) 1820 GEN_VEXT_VX(vrem_vx_b, 1) 1821 GEN_VEXT_VX(vrem_vx_h, 2) 1822 GEN_VEXT_VX(vrem_vx_w, 4) 1823 GEN_VEXT_VX(vrem_vx_d, 8) 1824 1825 /* Vector Widening Integer Multiply Instructions */ 1826 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1827 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1828 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1829 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1830 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1831 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1832 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1833 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1834 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1835 GEN_VEXT_VV(vwmul_vv_b, 2) 1836 GEN_VEXT_VV(vwmul_vv_h, 4) 1837 GEN_VEXT_VV(vwmul_vv_w, 8) 1838 GEN_VEXT_VV(vwmulu_vv_b, 2) 1839 GEN_VEXT_VV(vwmulu_vv_h, 4) 1840 GEN_VEXT_VV(vwmulu_vv_w, 8) 1841 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1842 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1843 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1844 1845 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1846 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1847 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1848 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1849 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1850 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1851 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1852 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1853 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1854 GEN_VEXT_VX(vwmul_vx_b, 2) 1855 GEN_VEXT_VX(vwmul_vx_h, 4) 1856 GEN_VEXT_VX(vwmul_vx_w, 8) 1857 GEN_VEXT_VX(vwmulu_vx_b, 2) 1858 GEN_VEXT_VX(vwmulu_vx_h, 4) 1859 GEN_VEXT_VX(vwmulu_vx_w, 8) 1860 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1861 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1862 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1863 1864 /* Vector Single-Width Integer Multiply-Add Instructions */ 1865 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1866 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1867 { \ 1868 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1869 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1870 TD d = *((TD *)vd + HD(i)); \ 1871 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1872 } 1873 1874 #define DO_MACC(N, M, D) (M * N + D) 1875 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1876 #define DO_MADD(N, M, D) (M * D + N) 1877 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1878 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1879 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1880 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1881 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1882 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1883 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1884 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1885 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1886 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1887 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1888 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1889 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1890 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1891 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1892 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1893 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1894 GEN_VEXT_VV(vmacc_vv_b, 1) 1895 GEN_VEXT_VV(vmacc_vv_h, 2) 1896 GEN_VEXT_VV(vmacc_vv_w, 4) 1897 GEN_VEXT_VV(vmacc_vv_d, 8) 1898 GEN_VEXT_VV(vnmsac_vv_b, 1) 1899 GEN_VEXT_VV(vnmsac_vv_h, 2) 1900 GEN_VEXT_VV(vnmsac_vv_w, 4) 1901 GEN_VEXT_VV(vnmsac_vv_d, 8) 1902 GEN_VEXT_VV(vmadd_vv_b, 1) 1903 GEN_VEXT_VV(vmadd_vv_h, 2) 1904 GEN_VEXT_VV(vmadd_vv_w, 4) 1905 GEN_VEXT_VV(vmadd_vv_d, 8) 1906 GEN_VEXT_VV(vnmsub_vv_b, 1) 1907 GEN_VEXT_VV(vnmsub_vv_h, 2) 1908 GEN_VEXT_VV(vnmsub_vv_w, 4) 1909 GEN_VEXT_VV(vnmsub_vv_d, 8) 1910 1911 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1912 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1913 { \ 1914 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1915 TD d = *((TD *)vd + HD(i)); \ 1916 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1917 } 1918 1919 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1920 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1921 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1922 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1923 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1924 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1925 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1926 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1927 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1928 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1929 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1930 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1931 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1932 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1933 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1934 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1935 GEN_VEXT_VX(vmacc_vx_b, 1) 1936 GEN_VEXT_VX(vmacc_vx_h, 2) 1937 GEN_VEXT_VX(vmacc_vx_w, 4) 1938 GEN_VEXT_VX(vmacc_vx_d, 8) 1939 GEN_VEXT_VX(vnmsac_vx_b, 1) 1940 GEN_VEXT_VX(vnmsac_vx_h, 2) 1941 GEN_VEXT_VX(vnmsac_vx_w, 4) 1942 GEN_VEXT_VX(vnmsac_vx_d, 8) 1943 GEN_VEXT_VX(vmadd_vx_b, 1) 1944 GEN_VEXT_VX(vmadd_vx_h, 2) 1945 GEN_VEXT_VX(vmadd_vx_w, 4) 1946 GEN_VEXT_VX(vmadd_vx_d, 8) 1947 GEN_VEXT_VX(vnmsub_vx_b, 1) 1948 GEN_VEXT_VX(vnmsub_vx_h, 2) 1949 GEN_VEXT_VX(vnmsub_vx_w, 4) 1950 GEN_VEXT_VX(vnmsub_vx_d, 8) 1951 1952 /* Vector Widening Integer Multiply-Add Instructions */ 1953 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1954 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1955 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1956 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1957 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1958 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1959 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1960 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1961 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1962 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1963 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1964 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1965 GEN_VEXT_VV(vwmacc_vv_b, 2) 1966 GEN_VEXT_VV(vwmacc_vv_h, 4) 1967 GEN_VEXT_VV(vwmacc_vv_w, 8) 1968 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1969 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1970 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1971 1972 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1973 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1974 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1975 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1976 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1977 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1978 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1979 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1980 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1981 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1982 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1983 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1984 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1985 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1986 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1987 GEN_VEXT_VX(vwmacc_vx_b, 2) 1988 GEN_VEXT_VX(vwmacc_vx_h, 4) 1989 GEN_VEXT_VX(vwmacc_vx_w, 8) 1990 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1991 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1992 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1993 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1994 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1995 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1996 1997 /* Vector Integer Merge and Move Instructions */ 1998 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1999 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2000 uint32_t desc) \ 2001 { \ 2002 uint32_t vl = env->vl; \ 2003 uint32_t esz = sizeof(ETYPE); \ 2004 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2005 uint32_t vta = vext_vta(desc); \ 2006 uint32_t i; \ 2007 \ 2008 for (i = env->vstart; i < vl; i++) { \ 2009 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2010 *((ETYPE *)vd + H(i)) = s1; \ 2011 } \ 2012 env->vstart = 0; \ 2013 /* set tail elements to 1s */ \ 2014 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2015 } 2016 2017 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2018 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2019 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2020 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2021 2022 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2023 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2024 uint32_t desc) \ 2025 { \ 2026 uint32_t vl = env->vl; \ 2027 uint32_t esz = sizeof(ETYPE); \ 2028 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2029 uint32_t vta = vext_vta(desc); \ 2030 uint32_t i; \ 2031 \ 2032 for (i = env->vstart; i < vl; i++) { \ 2033 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2034 } \ 2035 env->vstart = 0; \ 2036 /* set tail elements to 1s */ \ 2037 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2038 } 2039 2040 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2041 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2042 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2043 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2044 2045 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2046 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2047 CPURISCVState *env, uint32_t desc) \ 2048 { \ 2049 uint32_t vl = env->vl; \ 2050 uint32_t esz = sizeof(ETYPE); \ 2051 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2052 uint32_t vta = vext_vta(desc); \ 2053 uint32_t i; \ 2054 \ 2055 for (i = env->vstart; i < vl; i++) { \ 2056 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2057 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2058 } \ 2059 env->vstart = 0; \ 2060 /* set tail elements to 1s */ \ 2061 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2062 } 2063 2064 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2065 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2066 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2067 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2068 2069 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2070 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2071 void *vs2, CPURISCVState *env, uint32_t desc) \ 2072 { \ 2073 uint32_t vl = env->vl; \ 2074 uint32_t esz = sizeof(ETYPE); \ 2075 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2076 uint32_t vta = vext_vta(desc); \ 2077 uint32_t i; \ 2078 \ 2079 for (i = env->vstart; i < vl; i++) { \ 2080 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2081 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2082 (ETYPE)(target_long)s1); \ 2083 *((ETYPE *)vd + H(i)) = d; \ 2084 } \ 2085 env->vstart = 0; \ 2086 /* set tail elements to 1s */ \ 2087 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2088 } 2089 2090 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2091 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2092 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2093 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2094 2095 /* 2096 *** Vector Fixed-Point Arithmetic Instructions 2097 */ 2098 2099 /* Vector Single-Width Saturating Add and Subtract */ 2100 2101 /* 2102 * As fixed point instructions probably have round mode and saturation, 2103 * define common macros for fixed point here. 2104 */ 2105 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2106 CPURISCVState *env, int vxrm); 2107 2108 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2109 static inline void \ 2110 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2111 CPURISCVState *env, int vxrm) \ 2112 { \ 2113 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2114 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2115 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2116 } 2117 2118 static inline void 2119 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2120 CPURISCVState *env, 2121 uint32_t vl, uint32_t vm, int vxrm, 2122 opivv2_rm_fn *fn) 2123 { 2124 for (uint32_t i = env->vstart; i < vl; i++) { 2125 if (!vm && !vext_elem_mask(v0, i)) { 2126 continue; 2127 } 2128 fn(vd, vs1, vs2, i, env, vxrm); 2129 } 2130 env->vstart = 0; 2131 } 2132 2133 static inline void 2134 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2135 CPURISCVState *env, 2136 uint32_t desc, 2137 opivv2_rm_fn *fn, uint32_t esz) 2138 { 2139 uint32_t vm = vext_vm(desc); 2140 uint32_t vl = env->vl; 2141 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2142 uint32_t vta = vext_vta(desc); 2143 2144 switch (env->vxrm) { 2145 case 0: /* rnu */ 2146 vext_vv_rm_1(vd, v0, vs1, vs2, 2147 env, vl, vm, 0, fn); 2148 break; 2149 case 1: /* rne */ 2150 vext_vv_rm_1(vd, v0, vs1, vs2, 2151 env, vl, vm, 1, fn); 2152 break; 2153 case 2: /* rdn */ 2154 vext_vv_rm_1(vd, v0, vs1, vs2, 2155 env, vl, vm, 2, fn); 2156 break; 2157 default: /* rod */ 2158 vext_vv_rm_1(vd, v0, vs1, vs2, 2159 env, vl, vm, 3, fn); 2160 break; 2161 } 2162 /* set tail elements to 1s */ 2163 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2164 } 2165 2166 /* generate helpers for fixed point instructions with OPIVV format */ 2167 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2168 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2169 CPURISCVState *env, uint32_t desc) \ 2170 { \ 2171 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2172 do_##NAME, ESZ); \ 2173 } 2174 2175 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2176 { 2177 uint8_t res = a + b; 2178 if (res < a) { 2179 res = UINT8_MAX; 2180 env->vxsat = 0x1; 2181 } 2182 return res; 2183 } 2184 2185 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2186 uint16_t b) 2187 { 2188 uint16_t res = a + b; 2189 if (res < a) { 2190 res = UINT16_MAX; 2191 env->vxsat = 0x1; 2192 } 2193 return res; 2194 } 2195 2196 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2197 uint32_t b) 2198 { 2199 uint32_t res = a + b; 2200 if (res < a) { 2201 res = UINT32_MAX; 2202 env->vxsat = 0x1; 2203 } 2204 return res; 2205 } 2206 2207 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2208 uint64_t b) 2209 { 2210 uint64_t res = a + b; 2211 if (res < a) { 2212 res = UINT64_MAX; 2213 env->vxsat = 0x1; 2214 } 2215 return res; 2216 } 2217 2218 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2219 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2220 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2221 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2222 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2223 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2224 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2225 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2226 2227 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2228 CPURISCVState *env, int vxrm); 2229 2230 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2231 static inline void \ 2232 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2233 CPURISCVState *env, int vxrm) \ 2234 { \ 2235 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2236 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2237 } 2238 2239 static inline void 2240 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2241 CPURISCVState *env, 2242 uint32_t vl, uint32_t vm, int vxrm, 2243 opivx2_rm_fn *fn) 2244 { 2245 for (uint32_t i = env->vstart; i < vl; i++) { 2246 if (!vm && !vext_elem_mask(v0, i)) { 2247 continue; 2248 } 2249 fn(vd, s1, vs2, i, env, vxrm); 2250 } 2251 env->vstart = 0; 2252 } 2253 2254 static inline void 2255 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2256 CPURISCVState *env, 2257 uint32_t desc, 2258 opivx2_rm_fn *fn, uint32_t esz) 2259 { 2260 uint32_t vm = vext_vm(desc); 2261 uint32_t vl = env->vl; 2262 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2263 uint32_t vta = vext_vta(desc); 2264 2265 switch (env->vxrm) { 2266 case 0: /* rnu */ 2267 vext_vx_rm_1(vd, v0, s1, vs2, 2268 env, vl, vm, 0, fn); 2269 break; 2270 case 1: /* rne */ 2271 vext_vx_rm_1(vd, v0, s1, vs2, 2272 env, vl, vm, 1, fn); 2273 break; 2274 case 2: /* rdn */ 2275 vext_vx_rm_1(vd, v0, s1, vs2, 2276 env, vl, vm, 2, fn); 2277 break; 2278 default: /* rod */ 2279 vext_vx_rm_1(vd, v0, s1, vs2, 2280 env, vl, vm, 3, fn); 2281 break; 2282 } 2283 /* set tail elements to 1s */ 2284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2285 } 2286 2287 /* generate helpers for fixed point instructions with OPIVX format */ 2288 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2289 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2290 void *vs2, CPURISCVState *env, uint32_t desc) \ 2291 { \ 2292 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2293 do_##NAME, ESZ); \ 2294 } 2295 2296 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2297 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2298 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2299 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2300 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2301 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2302 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2303 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2304 2305 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2306 { 2307 int8_t res = a + b; 2308 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2309 res = a > 0 ? INT8_MAX : INT8_MIN; 2310 env->vxsat = 0x1; 2311 } 2312 return res; 2313 } 2314 2315 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2316 { 2317 int16_t res = a + b; 2318 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2319 res = a > 0 ? INT16_MAX : INT16_MIN; 2320 env->vxsat = 0x1; 2321 } 2322 return res; 2323 } 2324 2325 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2326 { 2327 int32_t res = a + b; 2328 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2329 res = a > 0 ? INT32_MAX : INT32_MIN; 2330 env->vxsat = 0x1; 2331 } 2332 return res; 2333 } 2334 2335 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2336 { 2337 int64_t res = a + b; 2338 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2339 res = a > 0 ? INT64_MAX : INT64_MIN; 2340 env->vxsat = 0x1; 2341 } 2342 return res; 2343 } 2344 2345 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2346 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2347 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2348 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2349 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2350 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2351 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2352 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2353 2354 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2355 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2356 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2357 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2358 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2359 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2360 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2361 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2362 2363 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2364 { 2365 uint8_t res = a - b; 2366 if (res > a) { 2367 res = 0; 2368 env->vxsat = 0x1; 2369 } 2370 return res; 2371 } 2372 2373 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2374 uint16_t b) 2375 { 2376 uint16_t res = a - b; 2377 if (res > a) { 2378 res = 0; 2379 env->vxsat = 0x1; 2380 } 2381 return res; 2382 } 2383 2384 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2385 uint32_t b) 2386 { 2387 uint32_t res = a - b; 2388 if (res > a) { 2389 res = 0; 2390 env->vxsat = 0x1; 2391 } 2392 return res; 2393 } 2394 2395 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2396 uint64_t b) 2397 { 2398 uint64_t res = a - b; 2399 if (res > a) { 2400 res = 0; 2401 env->vxsat = 0x1; 2402 } 2403 return res; 2404 } 2405 2406 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2407 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2408 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2409 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2410 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2411 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2412 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2413 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2414 2415 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2416 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2417 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2418 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2419 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2420 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2421 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2422 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2423 2424 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2425 { 2426 int8_t res = a - b; 2427 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2428 res = a >= 0 ? INT8_MAX : INT8_MIN; 2429 env->vxsat = 0x1; 2430 } 2431 return res; 2432 } 2433 2434 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2435 { 2436 int16_t res = a - b; 2437 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2438 res = a >= 0 ? INT16_MAX : INT16_MIN; 2439 env->vxsat = 0x1; 2440 } 2441 return res; 2442 } 2443 2444 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2445 { 2446 int32_t res = a - b; 2447 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2448 res = a >= 0 ? INT32_MAX : INT32_MIN; 2449 env->vxsat = 0x1; 2450 } 2451 return res; 2452 } 2453 2454 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2455 { 2456 int64_t res = a - b; 2457 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2458 res = a >= 0 ? INT64_MAX : INT64_MIN; 2459 env->vxsat = 0x1; 2460 } 2461 return res; 2462 } 2463 2464 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2465 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2466 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2467 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2468 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2469 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2470 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2471 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2472 2473 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2474 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2475 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2476 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2477 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2478 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2479 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2480 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2481 2482 /* Vector Single-Width Averaging Add and Subtract */ 2483 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2484 { 2485 uint8_t d = extract64(v, shift, 1); 2486 uint8_t d1; 2487 uint64_t D1, D2; 2488 2489 if (shift == 0 || shift > 64) { 2490 return 0; 2491 } 2492 2493 d1 = extract64(v, shift - 1, 1); 2494 D1 = extract64(v, 0, shift); 2495 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2496 return d1; 2497 } else if (vxrm == 1) { /* round-to-nearest-even */ 2498 if (shift > 1) { 2499 D2 = extract64(v, 0, shift - 1); 2500 return d1 & ((D2 != 0) | d); 2501 } else { 2502 return d1 & d; 2503 } 2504 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2505 return !d & (D1 != 0); 2506 } 2507 return 0; /* round-down (truncate) */ 2508 } 2509 2510 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2511 { 2512 int64_t res = (int64_t)a + b; 2513 uint8_t round = get_round(vxrm, res, 1); 2514 2515 return (res >> 1) + round; 2516 } 2517 2518 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2519 { 2520 int64_t res = a + b; 2521 uint8_t round = get_round(vxrm, res, 1); 2522 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2523 2524 /* With signed overflow, bit 64 is inverse of bit 63. */ 2525 return ((res >> 1) ^ over) + round; 2526 } 2527 2528 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2529 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2530 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2531 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2532 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2533 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2534 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2535 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2536 2537 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2538 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2539 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2540 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2541 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2542 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2543 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2544 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2545 2546 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2547 uint32_t a, uint32_t b) 2548 { 2549 uint64_t res = (uint64_t)a + b; 2550 uint8_t round = get_round(vxrm, res, 1); 2551 2552 return (res >> 1) + round; 2553 } 2554 2555 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2556 uint64_t a, uint64_t b) 2557 { 2558 uint64_t res = a + b; 2559 uint8_t round = get_round(vxrm, res, 1); 2560 uint64_t over = (uint64_t)(res < a) << 63; 2561 2562 return ((res >> 1) | over) + round; 2563 } 2564 2565 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2566 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2567 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2568 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2569 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2570 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2571 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2572 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2573 2574 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2575 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2576 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2577 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2578 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2579 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2580 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2581 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2582 2583 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2584 { 2585 int64_t res = (int64_t)a - b; 2586 uint8_t round = get_round(vxrm, res, 1); 2587 2588 return (res >> 1) + round; 2589 } 2590 2591 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2592 { 2593 int64_t res = (int64_t)a - b; 2594 uint8_t round = get_round(vxrm, res, 1); 2595 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2596 2597 /* With signed overflow, bit 64 is inverse of bit 63. */ 2598 return ((res >> 1) ^ over) + round; 2599 } 2600 2601 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2602 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2603 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2604 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2605 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2606 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2607 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2608 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2609 2610 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2611 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2612 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2613 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2614 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2615 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2616 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2617 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2618 2619 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2620 uint32_t a, uint32_t b) 2621 { 2622 int64_t res = (int64_t)a - b; 2623 uint8_t round = get_round(vxrm, res, 1); 2624 2625 return (res >> 1) + round; 2626 } 2627 2628 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2629 uint64_t a, uint64_t b) 2630 { 2631 uint64_t res = (uint64_t)a - b; 2632 uint8_t round = get_round(vxrm, res, 1); 2633 uint64_t over = (uint64_t)(res > a) << 63; 2634 2635 return ((res >> 1) | over) + round; 2636 } 2637 2638 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2639 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2640 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2641 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2642 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2643 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2644 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2645 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2646 2647 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2648 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2649 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2650 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2651 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2652 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2653 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2654 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2655 2656 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2657 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2658 { 2659 uint8_t round; 2660 int16_t res; 2661 2662 res = (int16_t)a * (int16_t)b; 2663 round = get_round(vxrm, res, 7); 2664 res = (res >> 7) + round; 2665 2666 if (res > INT8_MAX) { 2667 env->vxsat = 0x1; 2668 return INT8_MAX; 2669 } else if (res < INT8_MIN) { 2670 env->vxsat = 0x1; 2671 return INT8_MIN; 2672 } else { 2673 return res; 2674 } 2675 } 2676 2677 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2678 { 2679 uint8_t round; 2680 int32_t res; 2681 2682 res = (int32_t)a * (int32_t)b; 2683 round = get_round(vxrm, res, 15); 2684 res = (res >> 15) + round; 2685 2686 if (res > INT16_MAX) { 2687 env->vxsat = 0x1; 2688 return INT16_MAX; 2689 } else if (res < INT16_MIN) { 2690 env->vxsat = 0x1; 2691 return INT16_MIN; 2692 } else { 2693 return res; 2694 } 2695 } 2696 2697 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2698 { 2699 uint8_t round; 2700 int64_t res; 2701 2702 res = (int64_t)a * (int64_t)b; 2703 round = get_round(vxrm, res, 31); 2704 res = (res >> 31) + round; 2705 2706 if (res > INT32_MAX) { 2707 env->vxsat = 0x1; 2708 return INT32_MAX; 2709 } else if (res < INT32_MIN) { 2710 env->vxsat = 0x1; 2711 return INT32_MIN; 2712 } else { 2713 return res; 2714 } 2715 } 2716 2717 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2718 { 2719 uint8_t round; 2720 uint64_t hi_64, lo_64; 2721 int64_t res; 2722 2723 if (a == INT64_MIN && b == INT64_MIN) { 2724 env->vxsat = 1; 2725 return INT64_MAX; 2726 } 2727 2728 muls64(&lo_64, &hi_64, a, b); 2729 round = get_round(vxrm, lo_64, 63); 2730 /* 2731 * Cannot overflow, as there are always 2732 * 2 sign bits after multiply. 2733 */ 2734 res = (hi_64 << 1) | (lo_64 >> 63); 2735 if (round) { 2736 if (res == INT64_MAX) { 2737 env->vxsat = 1; 2738 } else { 2739 res += 1; 2740 } 2741 } 2742 return res; 2743 } 2744 2745 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2746 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2747 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2748 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2749 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2750 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2751 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2752 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2753 2754 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2755 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2756 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2757 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2758 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2759 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2760 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2761 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2762 2763 /* Vector Single-Width Scaling Shift Instructions */ 2764 static inline uint8_t 2765 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2766 { 2767 uint8_t round, shift = b & 0x7; 2768 uint8_t res; 2769 2770 round = get_round(vxrm, a, shift); 2771 res = (a >> shift) + round; 2772 return res; 2773 } 2774 static inline uint16_t 2775 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2776 { 2777 uint8_t round, shift = b & 0xf; 2778 uint16_t res; 2779 2780 round = get_round(vxrm, a, shift); 2781 res = (a >> shift) + round; 2782 return res; 2783 } 2784 static inline uint32_t 2785 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2786 { 2787 uint8_t round, shift = b & 0x1f; 2788 uint32_t res; 2789 2790 round = get_round(vxrm, a, shift); 2791 res = (a >> shift) + round; 2792 return res; 2793 } 2794 static inline uint64_t 2795 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2796 { 2797 uint8_t round, shift = b & 0x3f; 2798 uint64_t res; 2799 2800 round = get_round(vxrm, a, shift); 2801 res = (a >> shift) + round; 2802 return res; 2803 } 2804 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2805 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2806 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2807 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2808 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2809 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2810 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2811 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2812 2813 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2814 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2815 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2816 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2817 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2818 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2819 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2820 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2821 2822 static inline int8_t 2823 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2824 { 2825 uint8_t round, shift = b & 0x7; 2826 int8_t res; 2827 2828 round = get_round(vxrm, a, shift); 2829 res = (a >> shift) + round; 2830 return res; 2831 } 2832 static inline int16_t 2833 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2834 { 2835 uint8_t round, shift = b & 0xf; 2836 int16_t res; 2837 2838 round = get_round(vxrm, a, shift); 2839 res = (a >> shift) + round; 2840 return res; 2841 } 2842 static inline int32_t 2843 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2844 { 2845 uint8_t round, shift = b & 0x1f; 2846 int32_t res; 2847 2848 round = get_round(vxrm, a, shift); 2849 res = (a >> shift) + round; 2850 return res; 2851 } 2852 static inline int64_t 2853 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2854 { 2855 uint8_t round, shift = b & 0x3f; 2856 int64_t res; 2857 2858 round = get_round(vxrm, a, shift); 2859 res = (a >> shift) + round; 2860 return res; 2861 } 2862 2863 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2864 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2865 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2866 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2867 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2868 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2869 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2870 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2871 2872 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2873 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2874 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2875 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2876 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2877 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2878 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2879 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2880 2881 /* Vector Narrowing Fixed-Point Clip Instructions */ 2882 static inline int8_t 2883 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2884 { 2885 uint8_t round, shift = b & 0xf; 2886 int16_t res; 2887 2888 round = get_round(vxrm, a, shift); 2889 res = (a >> shift) + round; 2890 if (res > INT8_MAX) { 2891 env->vxsat = 0x1; 2892 return INT8_MAX; 2893 } else if (res < INT8_MIN) { 2894 env->vxsat = 0x1; 2895 return INT8_MIN; 2896 } else { 2897 return res; 2898 } 2899 } 2900 2901 static inline int16_t 2902 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2903 { 2904 uint8_t round, shift = b & 0x1f; 2905 int32_t res; 2906 2907 round = get_round(vxrm, a, shift); 2908 res = (a >> shift) + round; 2909 if (res > INT16_MAX) { 2910 env->vxsat = 0x1; 2911 return INT16_MAX; 2912 } else if (res < INT16_MIN) { 2913 env->vxsat = 0x1; 2914 return INT16_MIN; 2915 } else { 2916 return res; 2917 } 2918 } 2919 2920 static inline int32_t 2921 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2922 { 2923 uint8_t round, shift = b & 0x3f; 2924 int64_t res; 2925 2926 round = get_round(vxrm, a, shift); 2927 res = (a >> shift) + round; 2928 if (res > INT32_MAX) { 2929 env->vxsat = 0x1; 2930 return INT32_MAX; 2931 } else if (res < INT32_MIN) { 2932 env->vxsat = 0x1; 2933 return INT32_MIN; 2934 } else { 2935 return res; 2936 } 2937 } 2938 2939 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2940 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2941 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2942 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2943 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2944 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2945 2946 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2947 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2948 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2949 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2950 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2951 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2952 2953 static inline uint8_t 2954 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2955 { 2956 uint8_t round, shift = b & 0xf; 2957 uint16_t res; 2958 2959 round = get_round(vxrm, a, shift); 2960 res = (a >> shift) + round; 2961 if (res > UINT8_MAX) { 2962 env->vxsat = 0x1; 2963 return UINT8_MAX; 2964 } else { 2965 return res; 2966 } 2967 } 2968 2969 static inline uint16_t 2970 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2971 { 2972 uint8_t round, shift = b & 0x1f; 2973 uint32_t res; 2974 2975 round = get_round(vxrm, a, shift); 2976 res = (a >> shift) + round; 2977 if (res > UINT16_MAX) { 2978 env->vxsat = 0x1; 2979 return UINT16_MAX; 2980 } else { 2981 return res; 2982 } 2983 } 2984 2985 static inline uint32_t 2986 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2987 { 2988 uint8_t round, shift = b & 0x3f; 2989 uint64_t res; 2990 2991 round = get_round(vxrm, a, shift); 2992 res = (a >> shift) + round; 2993 if (res > UINT32_MAX) { 2994 env->vxsat = 0x1; 2995 return UINT32_MAX; 2996 } else { 2997 return res; 2998 } 2999 } 3000 3001 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3002 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3003 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3004 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3005 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3006 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3007 3008 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3009 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3010 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3011 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3012 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3013 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3014 3015 /* 3016 *** Vector Float Point Arithmetic Instructions 3017 */ 3018 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3019 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3020 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3021 CPURISCVState *env) \ 3022 { \ 3023 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3024 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3025 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3026 } 3027 3028 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3029 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3030 void *vs2, CPURISCVState *env, \ 3031 uint32_t desc) \ 3032 { \ 3033 uint32_t vm = vext_vm(desc); \ 3034 uint32_t vl = env->vl; \ 3035 uint32_t total_elems = \ 3036 vext_get_total_elems(env, desc, ESZ); \ 3037 uint32_t vta = vext_vta(desc); \ 3038 uint32_t i; \ 3039 \ 3040 for (i = env->vstart; i < vl; i++) { \ 3041 if (!vm && !vext_elem_mask(v0, i)) { \ 3042 continue; \ 3043 } \ 3044 do_##NAME(vd, vs1, vs2, i, env); \ 3045 } \ 3046 env->vstart = 0; \ 3047 /* set tail elements to 1s */ \ 3048 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3049 total_elems * ESZ); \ 3050 } 3051 3052 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3053 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3054 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3055 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3056 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3057 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3058 3059 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3060 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3061 CPURISCVState *env) \ 3062 { \ 3063 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3064 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3065 } 3066 3067 #define GEN_VEXT_VF(NAME, ESZ) \ 3068 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3069 void *vs2, CPURISCVState *env, \ 3070 uint32_t desc) \ 3071 { \ 3072 uint32_t vm = vext_vm(desc); \ 3073 uint32_t vl = env->vl; \ 3074 uint32_t total_elems = \ 3075 vext_get_total_elems(env, desc, ESZ); \ 3076 uint32_t vta = vext_vta(desc); \ 3077 uint32_t i; \ 3078 \ 3079 for (i = env->vstart; i < vl; i++) { \ 3080 if (!vm && !vext_elem_mask(v0, i)) { \ 3081 continue; \ 3082 } \ 3083 do_##NAME(vd, s1, vs2, i, env); \ 3084 } \ 3085 env->vstart = 0; \ 3086 /* set tail elements to 1s */ \ 3087 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3088 total_elems * ESZ); \ 3089 } 3090 3091 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3092 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3093 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3094 GEN_VEXT_VF(vfadd_vf_h, 2) 3095 GEN_VEXT_VF(vfadd_vf_w, 4) 3096 GEN_VEXT_VF(vfadd_vf_d, 8) 3097 3098 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3099 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3100 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3101 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3102 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3103 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3104 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3105 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3106 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3107 GEN_VEXT_VF(vfsub_vf_h, 2) 3108 GEN_VEXT_VF(vfsub_vf_w, 4) 3109 GEN_VEXT_VF(vfsub_vf_d, 8) 3110 3111 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3112 { 3113 return float16_sub(b, a, s); 3114 } 3115 3116 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3117 { 3118 return float32_sub(b, a, s); 3119 } 3120 3121 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3122 { 3123 return float64_sub(b, a, s); 3124 } 3125 3126 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3127 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3128 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3129 GEN_VEXT_VF(vfrsub_vf_h, 2) 3130 GEN_VEXT_VF(vfrsub_vf_w, 4) 3131 GEN_VEXT_VF(vfrsub_vf_d, 8) 3132 3133 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3134 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3135 { 3136 return float32_add(float16_to_float32(a, true, s), 3137 float16_to_float32(b, true, s), s); 3138 } 3139 3140 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3141 { 3142 return float64_add(float32_to_float64(a, s), 3143 float32_to_float64(b, s), s); 3144 3145 } 3146 3147 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3148 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3149 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3150 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3151 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3152 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3153 GEN_VEXT_VF(vfwadd_vf_h, 4) 3154 GEN_VEXT_VF(vfwadd_vf_w, 8) 3155 3156 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3157 { 3158 return float32_sub(float16_to_float32(a, true, s), 3159 float16_to_float32(b, true, s), s); 3160 } 3161 3162 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3163 { 3164 return float64_sub(float32_to_float64(a, s), 3165 float32_to_float64(b, s), s); 3166 3167 } 3168 3169 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3170 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3171 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3172 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3173 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3174 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3175 GEN_VEXT_VF(vfwsub_vf_h, 4) 3176 GEN_VEXT_VF(vfwsub_vf_w, 8) 3177 3178 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3179 { 3180 return float32_add(a, float16_to_float32(b, true, s), s); 3181 } 3182 3183 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3184 { 3185 return float64_add(a, float32_to_float64(b, s), s); 3186 } 3187 3188 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3189 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3190 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3191 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3192 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3193 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3194 GEN_VEXT_VF(vfwadd_wf_h, 4) 3195 GEN_VEXT_VF(vfwadd_wf_w, 8) 3196 3197 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3198 { 3199 return float32_sub(a, float16_to_float32(b, true, s), s); 3200 } 3201 3202 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3203 { 3204 return float64_sub(a, float32_to_float64(b, s), s); 3205 } 3206 3207 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3208 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3209 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3210 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3211 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3212 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3213 GEN_VEXT_VF(vfwsub_wf_h, 4) 3214 GEN_VEXT_VF(vfwsub_wf_w, 8) 3215 3216 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3217 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3218 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3219 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3220 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3221 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3222 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3223 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3224 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3225 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3226 GEN_VEXT_VF(vfmul_vf_h, 2) 3227 GEN_VEXT_VF(vfmul_vf_w, 4) 3228 GEN_VEXT_VF(vfmul_vf_d, 8) 3229 3230 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3231 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3232 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3233 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3234 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3235 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3236 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3237 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3238 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3239 GEN_VEXT_VF(vfdiv_vf_h, 2) 3240 GEN_VEXT_VF(vfdiv_vf_w, 4) 3241 GEN_VEXT_VF(vfdiv_vf_d, 8) 3242 3243 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3244 { 3245 return float16_div(b, a, s); 3246 } 3247 3248 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3249 { 3250 return float32_div(b, a, s); 3251 } 3252 3253 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3254 { 3255 return float64_div(b, a, s); 3256 } 3257 3258 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3259 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3260 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3261 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3262 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3263 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3264 3265 /* Vector Widening Floating-Point Multiply */ 3266 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3267 { 3268 return float32_mul(float16_to_float32(a, true, s), 3269 float16_to_float32(b, true, s), s); 3270 } 3271 3272 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3273 { 3274 return float64_mul(float32_to_float64(a, s), 3275 float32_to_float64(b, s), s); 3276 3277 } 3278 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3279 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3280 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3281 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3282 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3283 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3284 GEN_VEXT_VF(vfwmul_vf_h, 4) 3285 GEN_VEXT_VF(vfwmul_vf_w, 8) 3286 3287 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3288 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3289 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3290 CPURISCVState *env) \ 3291 { \ 3292 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3293 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3294 TD d = *((TD *)vd + HD(i)); \ 3295 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3296 } 3297 3298 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3299 { 3300 return float16_muladd(a, b, d, 0, s); 3301 } 3302 3303 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3304 { 3305 return float32_muladd(a, b, d, 0, s); 3306 } 3307 3308 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3309 { 3310 return float64_muladd(a, b, d, 0, s); 3311 } 3312 3313 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3314 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3315 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3316 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3317 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3318 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3319 3320 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3321 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3322 CPURISCVState *env) \ 3323 { \ 3324 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3325 TD d = *((TD *)vd + HD(i)); \ 3326 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3327 } 3328 3329 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3330 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3331 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3332 GEN_VEXT_VF(vfmacc_vf_h, 2) 3333 GEN_VEXT_VF(vfmacc_vf_w, 4) 3334 GEN_VEXT_VF(vfmacc_vf_d, 8) 3335 3336 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3337 { 3338 return float16_muladd(a, b, d, 3339 float_muladd_negate_c | float_muladd_negate_product, s); 3340 } 3341 3342 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3343 { 3344 return float32_muladd(a, b, d, 3345 float_muladd_negate_c | float_muladd_negate_product, s); 3346 } 3347 3348 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3349 { 3350 return float64_muladd(a, b, d, 3351 float_muladd_negate_c | float_muladd_negate_product, s); 3352 } 3353 3354 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3355 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3356 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3357 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3358 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3359 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3360 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3361 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3362 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3363 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3364 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3365 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3366 3367 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3368 { 3369 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3370 } 3371 3372 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3373 { 3374 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3375 } 3376 3377 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3378 { 3379 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3380 } 3381 3382 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3383 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3384 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3385 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3386 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3387 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3388 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3389 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3390 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3391 GEN_VEXT_VF(vfmsac_vf_h, 2) 3392 GEN_VEXT_VF(vfmsac_vf_w, 4) 3393 GEN_VEXT_VF(vfmsac_vf_d, 8) 3394 3395 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3396 { 3397 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3398 } 3399 3400 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3401 { 3402 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3403 } 3404 3405 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3406 { 3407 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3408 } 3409 3410 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3411 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3412 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3413 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3414 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3415 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3416 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3417 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3418 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3419 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3420 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3421 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3422 3423 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3424 { 3425 return float16_muladd(d, b, a, 0, s); 3426 } 3427 3428 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3429 { 3430 return float32_muladd(d, b, a, 0, s); 3431 } 3432 3433 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3434 { 3435 return float64_muladd(d, b, a, 0, s); 3436 } 3437 3438 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3439 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3440 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3441 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3442 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3443 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3444 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3445 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3446 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3447 GEN_VEXT_VF(vfmadd_vf_h, 2) 3448 GEN_VEXT_VF(vfmadd_vf_w, 4) 3449 GEN_VEXT_VF(vfmadd_vf_d, 8) 3450 3451 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3452 { 3453 return float16_muladd(d, b, a, 3454 float_muladd_negate_c | float_muladd_negate_product, s); 3455 } 3456 3457 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3458 { 3459 return float32_muladd(d, b, a, 3460 float_muladd_negate_c | float_muladd_negate_product, s); 3461 } 3462 3463 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3464 { 3465 return float64_muladd(d, b, a, 3466 float_muladd_negate_c | float_muladd_negate_product, s); 3467 } 3468 3469 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3470 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3471 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3472 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3473 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3474 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3475 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3476 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3477 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3478 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3479 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3480 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3481 3482 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3483 { 3484 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3485 } 3486 3487 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3488 { 3489 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3490 } 3491 3492 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3493 { 3494 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3495 } 3496 3497 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3498 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3499 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3500 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3501 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3502 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3503 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3504 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3505 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3506 GEN_VEXT_VF(vfmsub_vf_h, 2) 3507 GEN_VEXT_VF(vfmsub_vf_w, 4) 3508 GEN_VEXT_VF(vfmsub_vf_d, 8) 3509 3510 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3511 { 3512 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3513 } 3514 3515 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3516 { 3517 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3518 } 3519 3520 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3521 { 3522 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3523 } 3524 3525 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3526 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3527 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3528 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3529 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3530 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3531 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3532 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3533 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3534 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3535 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3536 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3537 3538 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3539 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3540 { 3541 return float32_muladd(float16_to_float32(a, true, s), 3542 float16_to_float32(b, true, s), d, 0, s); 3543 } 3544 3545 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3546 { 3547 return float64_muladd(float32_to_float64(a, s), 3548 float32_to_float64(b, s), d, 0, s); 3549 } 3550 3551 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3552 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3553 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3554 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3555 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3556 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3557 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3558 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3559 3560 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3561 { 3562 return float32_muladd(float16_to_float32(a, true, s), 3563 float16_to_float32(b, true, s), d, 3564 float_muladd_negate_c | float_muladd_negate_product, s); 3565 } 3566 3567 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3568 { 3569 return float64_muladd(float32_to_float64(a, s), 3570 float32_to_float64(b, s), d, 3571 float_muladd_negate_c | float_muladd_negate_product, s); 3572 } 3573 3574 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3575 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3576 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3577 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3578 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3579 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3580 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3581 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3582 3583 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3584 { 3585 return float32_muladd(float16_to_float32(a, true, s), 3586 float16_to_float32(b, true, s), d, 3587 float_muladd_negate_c, s); 3588 } 3589 3590 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3591 { 3592 return float64_muladd(float32_to_float64(a, s), 3593 float32_to_float64(b, s), d, 3594 float_muladd_negate_c, s); 3595 } 3596 3597 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3598 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3599 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3600 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3601 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3602 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3603 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3604 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3605 3606 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3607 { 3608 return float32_muladd(float16_to_float32(a, true, s), 3609 float16_to_float32(b, true, s), d, 3610 float_muladd_negate_product, s); 3611 } 3612 3613 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3614 { 3615 return float64_muladd(float32_to_float64(a, s), 3616 float32_to_float64(b, s), d, 3617 float_muladd_negate_product, s); 3618 } 3619 3620 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3621 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3622 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3623 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3624 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3625 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3626 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3627 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3628 3629 /* Vector Floating-Point Square-Root Instruction */ 3630 /* (TD, T2, TX2) */ 3631 #define OP_UU_H uint16_t, uint16_t, uint16_t 3632 #define OP_UU_W uint32_t, uint32_t, uint32_t 3633 #define OP_UU_D uint64_t, uint64_t, uint64_t 3634 3635 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3636 static void do_##NAME(void *vd, void *vs2, int i, \ 3637 CPURISCVState *env) \ 3638 { \ 3639 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3640 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3641 } 3642 3643 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3644 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3645 CPURISCVState *env, uint32_t desc) \ 3646 { \ 3647 uint32_t vm = vext_vm(desc); \ 3648 uint32_t vl = env->vl; \ 3649 uint32_t total_elems = \ 3650 vext_get_total_elems(env, desc, ESZ); \ 3651 uint32_t vta = vext_vta(desc); \ 3652 uint32_t i; \ 3653 \ 3654 if (vl == 0) { \ 3655 return; \ 3656 } \ 3657 for (i = env->vstart; i < vl; i++) { \ 3658 if (!vm && !vext_elem_mask(v0, i)) { \ 3659 continue; \ 3660 } \ 3661 do_##NAME(vd, vs2, i, env); \ 3662 } \ 3663 env->vstart = 0; \ 3664 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3665 total_elems * ESZ); \ 3666 } 3667 3668 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3669 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3670 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3671 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3672 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3673 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3674 3675 /* 3676 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3677 * 3678 * Adapted from riscv-v-spec recip.c: 3679 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3680 */ 3681 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3682 { 3683 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3684 uint64_t exp = extract64(f, frac_size, exp_size); 3685 uint64_t frac = extract64(f, 0, frac_size); 3686 3687 const uint8_t lookup_table[] = { 3688 52, 51, 50, 48, 47, 46, 44, 43, 3689 42, 41, 40, 39, 38, 36, 35, 34, 3690 33, 32, 31, 30, 30, 29, 28, 27, 3691 26, 25, 24, 23, 23, 22, 21, 20, 3692 19, 19, 18, 17, 16, 16, 15, 14, 3693 14, 13, 12, 12, 11, 10, 10, 9, 3694 9, 8, 7, 7, 6, 6, 5, 4, 3695 4, 3, 3, 2, 2, 1, 1, 0, 3696 127, 125, 123, 121, 119, 118, 116, 114, 3697 113, 111, 109, 108, 106, 105, 103, 102, 3698 100, 99, 97, 96, 95, 93, 92, 91, 3699 90, 88, 87, 86, 85, 84, 83, 82, 3700 80, 79, 78, 77, 76, 75, 74, 73, 3701 72, 71, 70, 70, 69, 68, 67, 66, 3702 65, 64, 63, 63, 62, 61, 60, 59, 3703 59, 58, 57, 56, 56, 55, 54, 53 3704 }; 3705 const int precision = 7; 3706 3707 if (exp == 0 && frac != 0) { /* subnormal */ 3708 /* Normalize the subnormal. */ 3709 while (extract64(frac, frac_size - 1, 1) == 0) { 3710 exp--; 3711 frac <<= 1; 3712 } 3713 3714 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3715 } 3716 3717 int idx = ((exp & 1) << (precision - 1)) | 3718 (frac >> (frac_size - precision + 1)); 3719 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3720 (frac_size - precision); 3721 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3722 3723 uint64_t val = 0; 3724 val = deposit64(val, 0, frac_size, out_frac); 3725 val = deposit64(val, frac_size, exp_size, out_exp); 3726 val = deposit64(val, frac_size + exp_size, 1, sign); 3727 return val; 3728 } 3729 3730 static float16 frsqrt7_h(float16 f, float_status *s) 3731 { 3732 int exp_size = 5, frac_size = 10; 3733 bool sign = float16_is_neg(f); 3734 3735 /* 3736 * frsqrt7(sNaN) = canonical NaN 3737 * frsqrt7(-inf) = canonical NaN 3738 * frsqrt7(-normal) = canonical NaN 3739 * frsqrt7(-subnormal) = canonical NaN 3740 */ 3741 if (float16_is_signaling_nan(f, s) || 3742 (float16_is_infinity(f) && sign) || 3743 (float16_is_normal(f) && sign) || 3744 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3745 s->float_exception_flags |= float_flag_invalid; 3746 return float16_default_nan(s); 3747 } 3748 3749 /* frsqrt7(qNaN) = canonical NaN */ 3750 if (float16_is_quiet_nan(f, s)) { 3751 return float16_default_nan(s); 3752 } 3753 3754 /* frsqrt7(+-0) = +-inf */ 3755 if (float16_is_zero(f)) { 3756 s->float_exception_flags |= float_flag_divbyzero; 3757 return float16_set_sign(float16_infinity, sign); 3758 } 3759 3760 /* frsqrt7(+inf) = +0 */ 3761 if (float16_is_infinity(f) && !sign) { 3762 return float16_set_sign(float16_zero, sign); 3763 } 3764 3765 /* +normal, +subnormal */ 3766 uint64_t val = frsqrt7(f, exp_size, frac_size); 3767 return make_float16(val); 3768 } 3769 3770 static float32 frsqrt7_s(float32 f, float_status *s) 3771 { 3772 int exp_size = 8, frac_size = 23; 3773 bool sign = float32_is_neg(f); 3774 3775 /* 3776 * frsqrt7(sNaN) = canonical NaN 3777 * frsqrt7(-inf) = canonical NaN 3778 * frsqrt7(-normal) = canonical NaN 3779 * frsqrt7(-subnormal) = canonical NaN 3780 */ 3781 if (float32_is_signaling_nan(f, s) || 3782 (float32_is_infinity(f) && sign) || 3783 (float32_is_normal(f) && sign) || 3784 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3785 s->float_exception_flags |= float_flag_invalid; 3786 return float32_default_nan(s); 3787 } 3788 3789 /* frsqrt7(qNaN) = canonical NaN */ 3790 if (float32_is_quiet_nan(f, s)) { 3791 return float32_default_nan(s); 3792 } 3793 3794 /* frsqrt7(+-0) = +-inf */ 3795 if (float32_is_zero(f)) { 3796 s->float_exception_flags |= float_flag_divbyzero; 3797 return float32_set_sign(float32_infinity, sign); 3798 } 3799 3800 /* frsqrt7(+inf) = +0 */ 3801 if (float32_is_infinity(f) && !sign) { 3802 return float32_set_sign(float32_zero, sign); 3803 } 3804 3805 /* +normal, +subnormal */ 3806 uint64_t val = frsqrt7(f, exp_size, frac_size); 3807 return make_float32(val); 3808 } 3809 3810 static float64 frsqrt7_d(float64 f, float_status *s) 3811 { 3812 int exp_size = 11, frac_size = 52; 3813 bool sign = float64_is_neg(f); 3814 3815 /* 3816 * frsqrt7(sNaN) = canonical NaN 3817 * frsqrt7(-inf) = canonical NaN 3818 * frsqrt7(-normal) = canonical NaN 3819 * frsqrt7(-subnormal) = canonical NaN 3820 */ 3821 if (float64_is_signaling_nan(f, s) || 3822 (float64_is_infinity(f) && sign) || 3823 (float64_is_normal(f) && sign) || 3824 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3825 s->float_exception_flags |= float_flag_invalid; 3826 return float64_default_nan(s); 3827 } 3828 3829 /* frsqrt7(qNaN) = canonical NaN */ 3830 if (float64_is_quiet_nan(f, s)) { 3831 return float64_default_nan(s); 3832 } 3833 3834 /* frsqrt7(+-0) = +-inf */ 3835 if (float64_is_zero(f)) { 3836 s->float_exception_flags |= float_flag_divbyzero; 3837 return float64_set_sign(float64_infinity, sign); 3838 } 3839 3840 /* frsqrt7(+inf) = +0 */ 3841 if (float64_is_infinity(f) && !sign) { 3842 return float64_set_sign(float64_zero, sign); 3843 } 3844 3845 /* +normal, +subnormal */ 3846 uint64_t val = frsqrt7(f, exp_size, frac_size); 3847 return make_float64(val); 3848 } 3849 3850 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3851 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3852 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3853 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3854 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3855 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3856 3857 /* 3858 * Vector Floating-Point Reciprocal Estimate Instruction 3859 * 3860 * Adapted from riscv-v-spec recip.c: 3861 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3862 */ 3863 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3864 float_status *s) 3865 { 3866 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3867 uint64_t exp = extract64(f, frac_size, exp_size); 3868 uint64_t frac = extract64(f, 0, frac_size); 3869 3870 const uint8_t lookup_table[] = { 3871 127, 125, 123, 121, 119, 117, 116, 114, 3872 112, 110, 109, 107, 105, 104, 102, 100, 3873 99, 97, 96, 94, 93, 91, 90, 88, 3874 87, 85, 84, 83, 81, 80, 79, 77, 3875 76, 75, 74, 72, 71, 70, 69, 68, 3876 66, 65, 64, 63, 62, 61, 60, 59, 3877 58, 57, 56, 55, 54, 53, 52, 51, 3878 50, 49, 48, 47, 46, 45, 44, 43, 3879 42, 41, 40, 40, 39, 38, 37, 36, 3880 35, 35, 34, 33, 32, 31, 31, 30, 3881 29, 28, 28, 27, 26, 25, 25, 24, 3882 23, 23, 22, 21, 21, 20, 19, 19, 3883 18, 17, 17, 16, 15, 15, 14, 14, 3884 13, 12, 12, 11, 11, 10, 9, 9, 3885 8, 8, 7, 7, 6, 5, 5, 4, 3886 4, 3, 3, 2, 2, 1, 1, 0 3887 }; 3888 const int precision = 7; 3889 3890 if (exp == 0 && frac != 0) { /* subnormal */ 3891 /* Normalize the subnormal. */ 3892 while (extract64(frac, frac_size - 1, 1) == 0) { 3893 exp--; 3894 frac <<= 1; 3895 } 3896 3897 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3898 3899 if (exp != 0 && exp != UINT64_MAX) { 3900 /* 3901 * Overflow to inf or max value of same sign, 3902 * depending on sign and rounding mode. 3903 */ 3904 s->float_exception_flags |= (float_flag_inexact | 3905 float_flag_overflow); 3906 3907 if ((s->float_rounding_mode == float_round_to_zero) || 3908 ((s->float_rounding_mode == float_round_down) && !sign) || 3909 ((s->float_rounding_mode == float_round_up) && sign)) { 3910 /* Return greatest/negative finite value. */ 3911 return (sign << (exp_size + frac_size)) | 3912 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3913 } else { 3914 /* Return +-inf. */ 3915 return (sign << (exp_size + frac_size)) | 3916 MAKE_64BIT_MASK(frac_size, exp_size); 3917 } 3918 } 3919 } 3920 3921 int idx = frac >> (frac_size - precision); 3922 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3923 (frac_size - precision); 3924 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3925 3926 if (out_exp == 0 || out_exp == UINT64_MAX) { 3927 /* 3928 * The result is subnormal, but don't raise the underflow exception, 3929 * because there's no additional loss of precision. 3930 */ 3931 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3932 if (out_exp == UINT64_MAX) { 3933 out_frac >>= 1; 3934 out_exp = 0; 3935 } 3936 } 3937 3938 uint64_t val = 0; 3939 val = deposit64(val, 0, frac_size, out_frac); 3940 val = deposit64(val, frac_size, exp_size, out_exp); 3941 val = deposit64(val, frac_size + exp_size, 1, sign); 3942 return val; 3943 } 3944 3945 static float16 frec7_h(float16 f, float_status *s) 3946 { 3947 int exp_size = 5, frac_size = 10; 3948 bool sign = float16_is_neg(f); 3949 3950 /* frec7(+-inf) = +-0 */ 3951 if (float16_is_infinity(f)) { 3952 return float16_set_sign(float16_zero, sign); 3953 } 3954 3955 /* frec7(+-0) = +-inf */ 3956 if (float16_is_zero(f)) { 3957 s->float_exception_flags |= float_flag_divbyzero; 3958 return float16_set_sign(float16_infinity, sign); 3959 } 3960 3961 /* frec7(sNaN) = canonical NaN */ 3962 if (float16_is_signaling_nan(f, s)) { 3963 s->float_exception_flags |= float_flag_invalid; 3964 return float16_default_nan(s); 3965 } 3966 3967 /* frec7(qNaN) = canonical NaN */ 3968 if (float16_is_quiet_nan(f, s)) { 3969 return float16_default_nan(s); 3970 } 3971 3972 /* +-normal, +-subnormal */ 3973 uint64_t val = frec7(f, exp_size, frac_size, s); 3974 return make_float16(val); 3975 } 3976 3977 static float32 frec7_s(float32 f, float_status *s) 3978 { 3979 int exp_size = 8, frac_size = 23; 3980 bool sign = float32_is_neg(f); 3981 3982 /* frec7(+-inf) = +-0 */ 3983 if (float32_is_infinity(f)) { 3984 return float32_set_sign(float32_zero, sign); 3985 } 3986 3987 /* frec7(+-0) = +-inf */ 3988 if (float32_is_zero(f)) { 3989 s->float_exception_flags |= float_flag_divbyzero; 3990 return float32_set_sign(float32_infinity, sign); 3991 } 3992 3993 /* frec7(sNaN) = canonical NaN */ 3994 if (float32_is_signaling_nan(f, s)) { 3995 s->float_exception_flags |= float_flag_invalid; 3996 return float32_default_nan(s); 3997 } 3998 3999 /* frec7(qNaN) = canonical NaN */ 4000 if (float32_is_quiet_nan(f, s)) { 4001 return float32_default_nan(s); 4002 } 4003 4004 /* +-normal, +-subnormal */ 4005 uint64_t val = frec7(f, exp_size, frac_size, s); 4006 return make_float32(val); 4007 } 4008 4009 static float64 frec7_d(float64 f, float_status *s) 4010 { 4011 int exp_size = 11, frac_size = 52; 4012 bool sign = float64_is_neg(f); 4013 4014 /* frec7(+-inf) = +-0 */ 4015 if (float64_is_infinity(f)) { 4016 return float64_set_sign(float64_zero, sign); 4017 } 4018 4019 /* frec7(+-0) = +-inf */ 4020 if (float64_is_zero(f)) { 4021 s->float_exception_flags |= float_flag_divbyzero; 4022 return float64_set_sign(float64_infinity, sign); 4023 } 4024 4025 /* frec7(sNaN) = canonical NaN */ 4026 if (float64_is_signaling_nan(f, s)) { 4027 s->float_exception_flags |= float_flag_invalid; 4028 return float64_default_nan(s); 4029 } 4030 4031 /* frec7(qNaN) = canonical NaN */ 4032 if (float64_is_quiet_nan(f, s)) { 4033 return float64_default_nan(s); 4034 } 4035 4036 /* +-normal, +-subnormal */ 4037 uint64_t val = frec7(f, exp_size, frac_size, s); 4038 return make_float64(val); 4039 } 4040 4041 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4042 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4043 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4044 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4045 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4046 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4047 4048 /* Vector Floating-Point MIN/MAX Instructions */ 4049 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4050 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4051 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4052 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4053 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4054 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4055 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4056 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4057 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4058 GEN_VEXT_VF(vfmin_vf_h, 2) 4059 GEN_VEXT_VF(vfmin_vf_w, 4) 4060 GEN_VEXT_VF(vfmin_vf_d, 8) 4061 4062 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4063 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4064 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4065 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4066 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4067 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4068 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4069 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4070 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4071 GEN_VEXT_VF(vfmax_vf_h, 2) 4072 GEN_VEXT_VF(vfmax_vf_w, 4) 4073 GEN_VEXT_VF(vfmax_vf_d, 8) 4074 4075 /* Vector Floating-Point Sign-Injection Instructions */ 4076 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4077 { 4078 return deposit64(b, 0, 15, a); 4079 } 4080 4081 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4082 { 4083 return deposit64(b, 0, 31, a); 4084 } 4085 4086 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4087 { 4088 return deposit64(b, 0, 63, a); 4089 } 4090 4091 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4092 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4093 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4094 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4095 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4096 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4097 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4098 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4099 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4100 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4101 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4102 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4103 4104 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4105 { 4106 return deposit64(~b, 0, 15, a); 4107 } 4108 4109 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4110 { 4111 return deposit64(~b, 0, 31, a); 4112 } 4113 4114 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4115 { 4116 return deposit64(~b, 0, 63, a); 4117 } 4118 4119 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4120 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4121 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4122 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4123 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4124 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4125 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4126 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4127 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4128 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4129 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4130 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4131 4132 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4133 { 4134 return deposit64(b ^ a, 0, 15, a); 4135 } 4136 4137 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4138 { 4139 return deposit64(b ^ a, 0, 31, a); 4140 } 4141 4142 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4143 { 4144 return deposit64(b ^ a, 0, 63, a); 4145 } 4146 4147 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4148 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4149 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4150 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4151 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4152 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4153 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4154 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4155 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4156 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4157 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4158 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4159 4160 /* Vector Floating-Point Compare Instructions */ 4161 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4162 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4163 CPURISCVState *env, uint32_t desc) \ 4164 { \ 4165 uint32_t vm = vext_vm(desc); \ 4166 uint32_t vl = env->vl; \ 4167 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4168 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4169 uint32_t i; \ 4170 \ 4171 for (i = env->vstart; i < vl; i++) { \ 4172 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4173 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4174 if (!vm && !vext_elem_mask(v0, i)) { \ 4175 continue; \ 4176 } \ 4177 vext_set_elem_mask(vd, i, \ 4178 DO_OP(s2, s1, &env->fp_status)); \ 4179 } \ 4180 env->vstart = 0; \ 4181 /* mask destination register are always tail-agnostic */ \ 4182 /* set tail elements to 1s */ \ 4183 if (vta_all_1s) { \ 4184 for (; i < total_elems; i++) { \ 4185 vext_set_elem_mask(vd, i, 1); \ 4186 } \ 4187 } \ 4188 } 4189 4190 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4191 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4192 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4193 4194 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4195 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4196 CPURISCVState *env, uint32_t desc) \ 4197 { \ 4198 uint32_t vm = vext_vm(desc); \ 4199 uint32_t vl = env->vl; \ 4200 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4201 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4202 uint32_t i; \ 4203 \ 4204 for (i = env->vstart; i < vl; i++) { \ 4205 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4206 if (!vm && !vext_elem_mask(v0, i)) { \ 4207 continue; \ 4208 } \ 4209 vext_set_elem_mask(vd, i, \ 4210 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4211 } \ 4212 env->vstart = 0; \ 4213 /* mask destination register are always tail-agnostic */ \ 4214 /* set tail elements to 1s */ \ 4215 if (vta_all_1s) { \ 4216 for (; i < total_elems; i++) { \ 4217 vext_set_elem_mask(vd, i, 1); \ 4218 } \ 4219 } \ 4220 } 4221 4222 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4223 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4224 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4225 4226 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4227 { 4228 FloatRelation compare = float16_compare_quiet(a, b, s); 4229 return compare != float_relation_equal; 4230 } 4231 4232 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4233 { 4234 FloatRelation compare = float32_compare_quiet(a, b, s); 4235 return compare != float_relation_equal; 4236 } 4237 4238 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4239 { 4240 FloatRelation compare = float64_compare_quiet(a, b, s); 4241 return compare != float_relation_equal; 4242 } 4243 4244 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4245 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4246 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4247 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4248 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4249 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4250 4251 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4252 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4253 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4254 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4255 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4256 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4257 4258 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4259 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4260 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4261 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4262 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4263 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4264 4265 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4266 { 4267 FloatRelation compare = float16_compare(a, b, s); 4268 return compare == float_relation_greater; 4269 } 4270 4271 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4272 { 4273 FloatRelation compare = float32_compare(a, b, s); 4274 return compare == float_relation_greater; 4275 } 4276 4277 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4278 { 4279 FloatRelation compare = float64_compare(a, b, s); 4280 return compare == float_relation_greater; 4281 } 4282 4283 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4284 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4285 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4286 4287 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4288 { 4289 FloatRelation compare = float16_compare(a, b, s); 4290 return compare == float_relation_greater || 4291 compare == float_relation_equal; 4292 } 4293 4294 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4295 { 4296 FloatRelation compare = float32_compare(a, b, s); 4297 return compare == float_relation_greater || 4298 compare == float_relation_equal; 4299 } 4300 4301 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4302 { 4303 FloatRelation compare = float64_compare(a, b, s); 4304 return compare == float_relation_greater || 4305 compare == float_relation_equal; 4306 } 4307 4308 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4309 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4310 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4311 4312 /* Vector Floating-Point Classify Instruction */ 4313 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4314 static void do_##NAME(void *vd, void *vs2, int i) \ 4315 { \ 4316 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4317 *((TD *)vd + HD(i)) = OP(s2); \ 4318 } 4319 4320 #define GEN_VEXT_V(NAME, ESZ) \ 4321 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4322 CPURISCVState *env, uint32_t desc) \ 4323 { \ 4324 uint32_t vm = vext_vm(desc); \ 4325 uint32_t vl = env->vl; \ 4326 uint32_t total_elems = \ 4327 vext_get_total_elems(env, desc, ESZ); \ 4328 uint32_t vta = vext_vta(desc); \ 4329 uint32_t i; \ 4330 \ 4331 for (i = env->vstart; i < vl; i++) { \ 4332 if (!vm && !vext_elem_mask(v0, i)) { \ 4333 continue; \ 4334 } \ 4335 do_##NAME(vd, vs2, i); \ 4336 } \ 4337 env->vstart = 0; \ 4338 /* set tail elements to 1s */ \ 4339 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4340 total_elems * ESZ); \ 4341 } 4342 4343 target_ulong fclass_h(uint64_t frs1) 4344 { 4345 float16 f = frs1; 4346 bool sign = float16_is_neg(f); 4347 4348 if (float16_is_infinity(f)) { 4349 return sign ? 1 << 0 : 1 << 7; 4350 } else if (float16_is_zero(f)) { 4351 return sign ? 1 << 3 : 1 << 4; 4352 } else if (float16_is_zero_or_denormal(f)) { 4353 return sign ? 1 << 2 : 1 << 5; 4354 } else if (float16_is_any_nan(f)) { 4355 float_status s = { }; /* for snan_bit_is_one */ 4356 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4357 } else { 4358 return sign ? 1 << 1 : 1 << 6; 4359 } 4360 } 4361 4362 target_ulong fclass_s(uint64_t frs1) 4363 { 4364 float32 f = frs1; 4365 bool sign = float32_is_neg(f); 4366 4367 if (float32_is_infinity(f)) { 4368 return sign ? 1 << 0 : 1 << 7; 4369 } else if (float32_is_zero(f)) { 4370 return sign ? 1 << 3 : 1 << 4; 4371 } else if (float32_is_zero_or_denormal(f)) { 4372 return sign ? 1 << 2 : 1 << 5; 4373 } else if (float32_is_any_nan(f)) { 4374 float_status s = { }; /* for snan_bit_is_one */ 4375 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4376 } else { 4377 return sign ? 1 << 1 : 1 << 6; 4378 } 4379 } 4380 4381 target_ulong fclass_d(uint64_t frs1) 4382 { 4383 float64 f = frs1; 4384 bool sign = float64_is_neg(f); 4385 4386 if (float64_is_infinity(f)) { 4387 return sign ? 1 << 0 : 1 << 7; 4388 } else if (float64_is_zero(f)) { 4389 return sign ? 1 << 3 : 1 << 4; 4390 } else if (float64_is_zero_or_denormal(f)) { 4391 return sign ? 1 << 2 : 1 << 5; 4392 } else if (float64_is_any_nan(f)) { 4393 float_status s = { }; /* for snan_bit_is_one */ 4394 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4395 } else { 4396 return sign ? 1 << 1 : 1 << 6; 4397 } 4398 } 4399 4400 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4401 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4402 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4403 GEN_VEXT_V(vfclass_v_h, 2) 4404 GEN_VEXT_V(vfclass_v_w, 4) 4405 GEN_VEXT_V(vfclass_v_d, 8) 4406 4407 /* Vector Floating-Point Merge Instruction */ 4408 4409 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4410 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4411 CPURISCVState *env, uint32_t desc) \ 4412 { \ 4413 uint32_t vm = vext_vm(desc); \ 4414 uint32_t vl = env->vl; \ 4415 uint32_t esz = sizeof(ETYPE); \ 4416 uint32_t total_elems = \ 4417 vext_get_total_elems(env, desc, esz); \ 4418 uint32_t vta = vext_vta(desc); \ 4419 uint32_t i; \ 4420 \ 4421 for (i = env->vstart; i < vl; i++) { \ 4422 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4423 *((ETYPE *)vd + H(i)) \ 4424 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4425 } \ 4426 env->vstart = 0; \ 4427 /* set tail elements to 1s */ \ 4428 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4429 } 4430 4431 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4432 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4433 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4434 4435 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4436 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4437 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4438 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4439 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4440 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4441 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4442 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4443 4444 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4445 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4446 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4447 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4448 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4449 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4450 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4451 4452 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4453 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4454 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4455 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4456 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4457 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4458 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4459 4460 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4461 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4462 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4463 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4464 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4465 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4466 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4467 4468 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4469 /* (TD, T2, TX2) */ 4470 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4471 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4472 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4473 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4474 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4475 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4476 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4477 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4478 4479 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4480 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4481 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4482 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4483 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4484 4485 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4486 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4487 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4488 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4489 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4490 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4491 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4492 4493 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4494 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4495 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4496 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4497 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4498 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4499 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4500 4501 /* 4502 * vfwcvt.f.f.v vd, vs2, vm 4503 * Convert single-width float to double-width float. 4504 */ 4505 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4506 { 4507 return float16_to_float32(a, true, s); 4508 } 4509 4510 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4511 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4512 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4513 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4514 4515 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4516 /* (TD, T2, TX2) */ 4517 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4518 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4519 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4520 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4521 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4522 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4523 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4524 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4525 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4526 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4527 4528 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4529 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4530 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4531 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4532 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4533 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4534 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4535 4536 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4537 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4538 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4539 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4540 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4541 4542 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4543 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4544 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4545 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4546 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4547 4548 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4549 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4550 { 4551 return float32_to_float16(a, true, s); 4552 } 4553 4554 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4555 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4556 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4557 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4558 4559 /* 4560 *** Vector Reduction Operations 4561 */ 4562 /* Vector Single-Width Integer Reduction Instructions */ 4563 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4564 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4565 void *vs2, CPURISCVState *env, uint32_t desc) \ 4566 { \ 4567 uint32_t vm = vext_vm(desc); \ 4568 uint32_t vl = env->vl; \ 4569 uint32_t esz = sizeof(TD); \ 4570 uint32_t vlenb = simd_maxsz(desc); \ 4571 uint32_t vta = vext_vta(desc); \ 4572 uint32_t i; \ 4573 TD s1 = *((TD *)vs1 + HD(0)); \ 4574 \ 4575 for (i = env->vstart; i < vl; i++) { \ 4576 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4577 if (!vm && !vext_elem_mask(v0, i)) { \ 4578 continue; \ 4579 } \ 4580 s1 = OP(s1, (TD)s2); \ 4581 } \ 4582 *((TD *)vd + HD(0)) = s1; \ 4583 env->vstart = 0; \ 4584 /* set tail elements to 1s */ \ 4585 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4586 } 4587 4588 /* vd[0] = sum(vs1[0], vs2[*]) */ 4589 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4590 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4591 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4592 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4593 4594 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4595 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4596 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4597 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4598 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4599 4600 /* vd[0] = max(vs1[0], vs2[*]) */ 4601 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4602 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4603 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4604 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4605 4606 /* vd[0] = minu(vs1[0], vs2[*]) */ 4607 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4608 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4609 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4610 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4611 4612 /* vd[0] = min(vs1[0], vs2[*]) */ 4613 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4614 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4615 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4616 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4617 4618 /* vd[0] = and(vs1[0], vs2[*]) */ 4619 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4620 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4621 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4622 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4623 4624 /* vd[0] = or(vs1[0], vs2[*]) */ 4625 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4626 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4627 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4628 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4629 4630 /* vd[0] = xor(vs1[0], vs2[*]) */ 4631 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4632 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4633 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4634 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4635 4636 /* Vector Widening Integer Reduction Instructions */ 4637 /* signed sum reduction into double-width accumulator */ 4638 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4639 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4640 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4641 4642 /* Unsigned sum reduction into double-width accumulator */ 4643 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4644 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4645 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4646 4647 /* Vector Single-Width Floating-Point Reduction Instructions */ 4648 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4649 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4650 void *vs2, CPURISCVState *env, \ 4651 uint32_t desc) \ 4652 { \ 4653 uint32_t vm = vext_vm(desc); \ 4654 uint32_t vl = env->vl; \ 4655 uint32_t esz = sizeof(TD); \ 4656 uint32_t vlenb = simd_maxsz(desc); \ 4657 uint32_t vta = vext_vta(desc); \ 4658 uint32_t i; \ 4659 TD s1 = *((TD *)vs1 + HD(0)); \ 4660 \ 4661 for (i = env->vstart; i < vl; i++) { \ 4662 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4663 if (!vm && !vext_elem_mask(v0, i)) { \ 4664 continue; \ 4665 } \ 4666 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4667 } \ 4668 *((TD *)vd + HD(0)) = s1; \ 4669 env->vstart = 0; \ 4670 /* set tail elements to 1s */ \ 4671 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4672 } 4673 4674 /* Unordered sum */ 4675 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4676 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4677 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4678 4679 /* Maximum value */ 4680 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4681 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4682 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4683 4684 /* Minimum value */ 4685 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4686 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4687 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4688 4689 /* Vector Widening Floating-Point Reduction Instructions */ 4690 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4691 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4692 void *vs2, CPURISCVState *env, uint32_t desc) 4693 { 4694 uint32_t vm = vext_vm(desc); 4695 uint32_t vl = env->vl; 4696 uint32_t esz = sizeof(uint32_t); 4697 uint32_t vlenb = simd_maxsz(desc); 4698 uint32_t vta = vext_vta(desc); 4699 uint32_t i; 4700 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4701 4702 for (i = env->vstart; i < vl; i++) { 4703 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4704 if (!vm && !vext_elem_mask(v0, i)) { 4705 continue; 4706 } 4707 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4708 &env->fp_status); 4709 } 4710 *((uint32_t *)vd + H4(0)) = s1; 4711 env->vstart = 0; 4712 /* set tail elements to 1s */ 4713 vext_set_elems_1s(vd, vta, esz, vlenb); 4714 } 4715 4716 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4717 void *vs2, CPURISCVState *env, uint32_t desc) 4718 { 4719 uint32_t vm = vext_vm(desc); 4720 uint32_t vl = env->vl; 4721 uint32_t esz = sizeof(uint64_t); 4722 uint32_t vlenb = simd_maxsz(desc); 4723 uint32_t vta = vext_vta(desc); 4724 uint32_t i; 4725 uint64_t s1 = *((uint64_t *)vs1); 4726 4727 for (i = env->vstart; i < vl; i++) { 4728 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4729 if (!vm && !vext_elem_mask(v0, i)) { 4730 continue; 4731 } 4732 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4733 &env->fp_status); 4734 } 4735 *((uint64_t *)vd) = s1; 4736 env->vstart = 0; 4737 /* set tail elements to 1s */ 4738 vext_set_elems_1s(vd, vta, esz, vlenb); 4739 } 4740 4741 /* 4742 *** Vector Mask Operations 4743 */ 4744 /* Vector Mask-Register Logical Instructions */ 4745 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4746 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4747 void *vs2, CPURISCVState *env, \ 4748 uint32_t desc) \ 4749 { \ 4750 uint32_t vl = env->vl; \ 4751 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4752 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4753 uint32_t i; \ 4754 int a, b; \ 4755 \ 4756 for (i = env->vstart; i < vl; i++) { \ 4757 a = vext_elem_mask(vs1, i); \ 4758 b = vext_elem_mask(vs2, i); \ 4759 vext_set_elem_mask(vd, i, OP(b, a)); \ 4760 } \ 4761 env->vstart = 0; \ 4762 /* mask destination register are always tail- \ 4763 * agnostic \ 4764 */ \ 4765 /* set tail elements to 1s */ \ 4766 if (vta_all_1s) { \ 4767 for (; i < total_elems; i++) { \ 4768 vext_set_elem_mask(vd, i, 1); \ 4769 } \ 4770 } \ 4771 } 4772 4773 #define DO_NAND(N, M) (!(N & M)) 4774 #define DO_ANDNOT(N, M) (N & !M) 4775 #define DO_NOR(N, M) (!(N | M)) 4776 #define DO_ORNOT(N, M) (N | !M) 4777 #define DO_XNOR(N, M) (!(N ^ M)) 4778 4779 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4780 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4781 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4782 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4783 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4784 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4785 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4786 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4787 4788 /* Vector count population in mask vcpop */ 4789 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4790 uint32_t desc) 4791 { 4792 target_ulong cnt = 0; 4793 uint32_t vm = vext_vm(desc); 4794 uint32_t vl = env->vl; 4795 int i; 4796 4797 for (i = env->vstart; i < vl; i++) { 4798 if (vm || vext_elem_mask(v0, i)) { 4799 if (vext_elem_mask(vs2, i)) { 4800 cnt++; 4801 } 4802 } 4803 } 4804 env->vstart = 0; 4805 return cnt; 4806 } 4807 4808 /* vfirst find-first-set mask bit*/ 4809 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4810 uint32_t desc) 4811 { 4812 uint32_t vm = vext_vm(desc); 4813 uint32_t vl = env->vl; 4814 int i; 4815 4816 for (i = env->vstart; i < vl; i++) { 4817 if (vm || vext_elem_mask(v0, i)) { 4818 if (vext_elem_mask(vs2, i)) { 4819 return i; 4820 } 4821 } 4822 } 4823 env->vstart = 0; 4824 return -1LL; 4825 } 4826 4827 enum set_mask_type { 4828 ONLY_FIRST = 1, 4829 INCLUDE_FIRST, 4830 BEFORE_FIRST, 4831 }; 4832 4833 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4834 uint32_t desc, enum set_mask_type type) 4835 { 4836 uint32_t vm = vext_vm(desc); 4837 uint32_t vl = env->vl; 4838 uint32_t total_elems = env_archcpu(env)->cfg.vlen; 4839 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4840 int i; 4841 bool first_mask_bit = false; 4842 4843 for (i = env->vstart; i < vl; i++) { 4844 if (!vm && !vext_elem_mask(v0, i)) { 4845 continue; 4846 } 4847 /* write a zero to all following active elements */ 4848 if (first_mask_bit) { 4849 vext_set_elem_mask(vd, i, 0); 4850 continue; 4851 } 4852 if (vext_elem_mask(vs2, i)) { 4853 first_mask_bit = true; 4854 if (type == BEFORE_FIRST) { 4855 vext_set_elem_mask(vd, i, 0); 4856 } else { 4857 vext_set_elem_mask(vd, i, 1); 4858 } 4859 } else { 4860 if (type == ONLY_FIRST) { 4861 vext_set_elem_mask(vd, i, 0); 4862 } else { 4863 vext_set_elem_mask(vd, i, 1); 4864 } 4865 } 4866 } 4867 env->vstart = 0; 4868 /* mask destination register are always tail-agnostic */ 4869 /* set tail elements to 1s */ 4870 if (vta_all_1s) { 4871 for (; i < total_elems; i++) { 4872 vext_set_elem_mask(vd, i, 1); 4873 } 4874 } 4875 } 4876 4877 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4878 uint32_t desc) 4879 { 4880 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4881 } 4882 4883 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4884 uint32_t desc) 4885 { 4886 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4887 } 4888 4889 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4890 uint32_t desc) 4891 { 4892 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4893 } 4894 4895 /* Vector Iota Instruction */ 4896 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4897 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4898 uint32_t desc) \ 4899 { \ 4900 uint32_t vm = vext_vm(desc); \ 4901 uint32_t vl = env->vl; \ 4902 uint32_t esz = sizeof(ETYPE); \ 4903 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4904 uint32_t vta = vext_vta(desc); \ 4905 uint32_t sum = 0; \ 4906 int i; \ 4907 \ 4908 for (i = env->vstart; i < vl; i++) { \ 4909 if (!vm && !vext_elem_mask(v0, i)) { \ 4910 continue; \ 4911 } \ 4912 *((ETYPE *)vd + H(i)) = sum; \ 4913 if (vext_elem_mask(vs2, i)) { \ 4914 sum++; \ 4915 } \ 4916 } \ 4917 env->vstart = 0; \ 4918 /* set tail elements to 1s */ \ 4919 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4920 } 4921 4922 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4923 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4924 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4925 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4926 4927 /* Vector Element Index Instruction */ 4928 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4929 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4930 { \ 4931 uint32_t vm = vext_vm(desc); \ 4932 uint32_t vl = env->vl; \ 4933 uint32_t esz = sizeof(ETYPE); \ 4934 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4935 uint32_t vta = vext_vta(desc); \ 4936 int i; \ 4937 \ 4938 for (i = env->vstart; i < vl; i++) { \ 4939 if (!vm && !vext_elem_mask(v0, i)) { \ 4940 continue; \ 4941 } \ 4942 *((ETYPE *)vd + H(i)) = i; \ 4943 } \ 4944 env->vstart = 0; \ 4945 /* set tail elements to 1s */ \ 4946 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4947 } 4948 4949 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4950 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4951 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4952 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4953 4954 /* 4955 *** Vector Permutation Instructions 4956 */ 4957 4958 /* Vector Slide Instructions */ 4959 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4960 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4961 CPURISCVState *env, uint32_t desc) \ 4962 { \ 4963 uint32_t vm = vext_vm(desc); \ 4964 uint32_t vl = env->vl; \ 4965 uint32_t esz = sizeof(ETYPE); \ 4966 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4967 uint32_t vta = vext_vta(desc); \ 4968 target_ulong offset = s1, i_min, i; \ 4969 \ 4970 i_min = MAX(env->vstart, offset); \ 4971 for (i = i_min; i < vl; i++) { \ 4972 if (!vm && !vext_elem_mask(v0, i)) { \ 4973 continue; \ 4974 } \ 4975 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4976 } \ 4977 /* set tail elements to 1s */ \ 4978 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4979 } 4980 4981 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4982 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4983 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4984 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4985 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4986 4987 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4988 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4989 CPURISCVState *env, uint32_t desc) \ 4990 { \ 4991 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4992 uint32_t vm = vext_vm(desc); \ 4993 uint32_t vl = env->vl; \ 4994 uint32_t esz = sizeof(ETYPE); \ 4995 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4996 uint32_t vta = vext_vta(desc); \ 4997 target_ulong i_max, i; \ 4998 \ 4999 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5000 for (i = env->vstart; i < i_max; ++i) { \ 5001 if (vm || vext_elem_mask(v0, i)) { \ 5002 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5003 } \ 5004 } \ 5005 \ 5006 for (i = i_max; i < vl; ++i) { \ 5007 if (vm || vext_elem_mask(v0, i)) { \ 5008 *((ETYPE *)vd + H(i)) = 0; \ 5009 } \ 5010 } \ 5011 \ 5012 env->vstart = 0; \ 5013 /* set tail elements to 1s */ \ 5014 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5015 } 5016 5017 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5018 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5019 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5020 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5021 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5022 5023 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5024 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 5025 void *vs2, CPURISCVState *env, uint32_t desc) \ 5026 { \ 5027 typedef uint##BITWIDTH##_t ETYPE; \ 5028 uint32_t vm = vext_vm(desc); \ 5029 uint32_t vl = env->vl; \ 5030 uint32_t esz = sizeof(ETYPE); \ 5031 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5032 uint32_t vta = vext_vta(desc); \ 5033 uint32_t i; \ 5034 \ 5035 for (i = env->vstart; i < vl; i++) { \ 5036 if (!vm && !vext_elem_mask(v0, i)) { \ 5037 continue; \ 5038 } \ 5039 if (i == 0) { \ 5040 *((ETYPE *)vd + H(i)) = s1; \ 5041 } else { \ 5042 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5043 } \ 5044 } \ 5045 env->vstart = 0; \ 5046 /* set tail elements to 1s */ \ 5047 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5048 } 5049 5050 GEN_VEXT_VSLIE1UP(8, H1) 5051 GEN_VEXT_VSLIE1UP(16, H2) 5052 GEN_VEXT_VSLIE1UP(32, H4) 5053 GEN_VEXT_VSLIE1UP(64, H8) 5054 5055 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5056 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5057 CPURISCVState *env, uint32_t desc) \ 5058 { \ 5059 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5060 } 5061 5062 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5063 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5064 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5065 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5066 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5067 5068 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5069 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 5070 void *vs2, CPURISCVState *env, uint32_t desc) \ 5071 { \ 5072 typedef uint##BITWIDTH##_t ETYPE; \ 5073 uint32_t vm = vext_vm(desc); \ 5074 uint32_t vl = env->vl; \ 5075 uint32_t esz = sizeof(ETYPE); \ 5076 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5077 uint32_t vta = vext_vta(desc); \ 5078 uint32_t i; \ 5079 \ 5080 for (i = env->vstart; i < vl; i++) { \ 5081 if (!vm && !vext_elem_mask(v0, i)) { \ 5082 continue; \ 5083 } \ 5084 if (i == vl - 1) { \ 5085 *((ETYPE *)vd + H(i)) = s1; \ 5086 } else { \ 5087 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5088 } \ 5089 } \ 5090 env->vstart = 0; \ 5091 /* set tail elements to 1s */ \ 5092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5093 } 5094 5095 GEN_VEXT_VSLIDE1DOWN(8, H1) 5096 GEN_VEXT_VSLIDE1DOWN(16, H2) 5097 GEN_VEXT_VSLIDE1DOWN(32, H4) 5098 GEN_VEXT_VSLIDE1DOWN(64, H8) 5099 5100 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5101 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5102 CPURISCVState *env, uint32_t desc) \ 5103 { \ 5104 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5105 } 5106 5107 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5108 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5109 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5110 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5111 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5112 5113 /* Vector Floating-Point Slide Instructions */ 5114 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5115 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5116 CPURISCVState *env, uint32_t desc) \ 5117 { \ 5118 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5119 } 5120 5121 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5122 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5123 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5124 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5125 5126 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5127 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5128 CPURISCVState *env, uint32_t desc) \ 5129 { \ 5130 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5131 } 5132 5133 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5134 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5135 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5136 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5137 5138 /* Vector Register Gather Instruction */ 5139 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5140 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5141 CPURISCVState *env, uint32_t desc) \ 5142 { \ 5143 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5144 uint32_t vm = vext_vm(desc); \ 5145 uint32_t vl = env->vl; \ 5146 uint32_t esz = sizeof(TS2); \ 5147 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5148 uint32_t vta = vext_vta(desc); \ 5149 uint64_t index; \ 5150 uint32_t i; \ 5151 \ 5152 for (i = env->vstart; i < vl; i++) { \ 5153 if (!vm && !vext_elem_mask(v0, i)) { \ 5154 continue; \ 5155 } \ 5156 index = *((TS1 *)vs1 + HS1(i)); \ 5157 if (index >= vlmax) { \ 5158 *((TS2 *)vd + HS2(i)) = 0; \ 5159 } else { \ 5160 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5161 } \ 5162 } \ 5163 env->vstart = 0; \ 5164 /* set tail elements to 1s */ \ 5165 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5166 } 5167 5168 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5169 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5170 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5171 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5172 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5173 5174 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5175 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5176 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5177 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5178 5179 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5180 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5181 CPURISCVState *env, uint32_t desc) \ 5182 { \ 5183 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5184 uint32_t vm = vext_vm(desc); \ 5185 uint32_t vl = env->vl; \ 5186 uint32_t esz = sizeof(ETYPE); \ 5187 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5188 uint32_t vta = vext_vta(desc); \ 5189 uint64_t index = s1; \ 5190 uint32_t i; \ 5191 \ 5192 for (i = env->vstart; i < vl; i++) { \ 5193 if (!vm && !vext_elem_mask(v0, i)) { \ 5194 continue; \ 5195 } \ 5196 if (index >= vlmax) { \ 5197 *((ETYPE *)vd + H(i)) = 0; \ 5198 } else { \ 5199 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5200 } \ 5201 } \ 5202 env->vstart = 0; \ 5203 /* set tail elements to 1s */ \ 5204 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5205 } 5206 5207 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5208 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5209 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5210 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5211 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5212 5213 /* Vector Compress Instruction */ 5214 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5215 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5216 CPURISCVState *env, uint32_t desc) \ 5217 { \ 5218 uint32_t vl = env->vl; \ 5219 uint32_t esz = sizeof(ETYPE); \ 5220 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5221 uint32_t vta = vext_vta(desc); \ 5222 uint32_t num = 0, i; \ 5223 \ 5224 for (i = env->vstart; i < vl; i++) { \ 5225 if (!vext_elem_mask(vs1, i)) { \ 5226 continue; \ 5227 } \ 5228 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5229 num++; \ 5230 } \ 5231 env->vstart = 0; \ 5232 /* set tail elements to 1s */ \ 5233 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5234 } 5235 5236 /* Compress into vd elements of vs2 where vs1 is enabled */ 5237 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5238 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5239 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5240 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5241 5242 /* Vector Whole Register Move */ 5243 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5244 { 5245 /* EEW = SEW */ 5246 uint32_t maxsz = simd_maxsz(desc); 5247 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5248 uint32_t startb = env->vstart * sewb; 5249 uint32_t i = startb; 5250 5251 memcpy((uint8_t *)vd + H1(i), 5252 (uint8_t *)vs2 + H1(i), 5253 maxsz - startb); 5254 5255 env->vstart = 0; 5256 } 5257 5258 /* Vector Integer Extension */ 5259 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5260 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5261 CPURISCVState *env, uint32_t desc) \ 5262 { \ 5263 uint32_t vl = env->vl; \ 5264 uint32_t vm = vext_vm(desc); \ 5265 uint32_t esz = sizeof(ETYPE); \ 5266 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5267 uint32_t vta = vext_vta(desc); \ 5268 uint32_t i; \ 5269 \ 5270 for (i = env->vstart; i < vl; i++) { \ 5271 if (!vm && !vext_elem_mask(v0, i)) { \ 5272 continue; \ 5273 } \ 5274 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5275 } \ 5276 env->vstart = 0; \ 5277 /* set tail elements to 1s */ \ 5278 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5279 } 5280 5281 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5282 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5283 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5284 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5285 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5286 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5287 5288 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5289 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5290 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5291 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5292 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5293 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5294