1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vill = 1; 56 env->vtype = 0; 57 env->vl = 0; 58 env->vstart = 0; 59 return 0; 60 } 61 62 vlmax = vext_get_vlmax(cpu, s2); 63 if (s1 <= vlmax) { 64 vl = s1; 65 } else { 66 vl = vlmax; 67 } 68 env->vl = vl; 69 env->vtype = s2; 70 env->vstart = 0; 71 env->vill = 0; 72 return vl; 73 } 74 75 /* 76 * Note that vector data is stored in host-endian 64-bit chunks, 77 * so addressing units smaller than that needs a host-endian fixup. 78 */ 79 #if HOST_BIG_ENDIAN 80 #define H1(x) ((x) ^ 7) 81 #define H1_2(x) ((x) ^ 6) 82 #define H1_4(x) ((x) ^ 4) 83 #define H2(x) ((x) ^ 3) 84 #define H4(x) ((x) ^ 1) 85 #define H8(x) ((x)) 86 #else 87 #define H1(x) (x) 88 #define H1_2(x) (x) 89 #define H1_4(x) (x) 90 #define H2(x) (x) 91 #define H4(x) (x) 92 #define H8(x) (x) 93 #endif 94 95 static inline uint32_t vext_nf(uint32_t desc) 96 { 97 return FIELD_EX32(simd_data(desc), VDATA, NF); 98 } 99 100 static inline uint32_t vext_vm(uint32_t desc) 101 { 102 return FIELD_EX32(simd_data(desc), VDATA, VM); 103 } 104 105 /* 106 * Encode LMUL to lmul as following: 107 * LMUL vlmul lmul 108 * 1 000 0 109 * 2 001 1 110 * 4 010 2 111 * 8 011 3 112 * - 100 - 113 * 1/8 101 -3 114 * 1/4 110 -2 115 * 1/2 111 -1 116 */ 117 static inline int32_t vext_lmul(uint32_t desc) 118 { 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 120 } 121 122 static inline uint32_t vext_vta(uint32_t desc) 123 { 124 return FIELD_EX32(simd_data(desc), VDATA, VTA); 125 } 126 127 static inline uint32_t vext_vma(uint32_t desc) 128 { 129 return FIELD_EX32(simd_data(desc), VDATA, VMA); 130 } 131 132 static inline uint32_t vext_vta_all_1s(uint32_t desc) 133 { 134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 135 } 136 137 /* 138 * Get the maximum number of elements can be operated. 139 * 140 * log2_esz: log2 of element size in bytes. 141 */ 142 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 143 { 144 /* 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 146 * so vlen in bytes (vlenb) is encoded as maxsz. 147 */ 148 uint32_t vlenb = simd_maxsz(desc); 149 150 /* Return VLMAX */ 151 int scale = vext_lmul(desc) - log2_esz; 152 return scale < 0 ? vlenb >> -scale : vlenb << scale; 153 } 154 155 /* 156 * Get number of total elements, including prestart, body and tail elements. 157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 158 * are held in the same vector register. 159 */ 160 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 161 uint32_t esz) 162 { 163 uint32_t vlenb = simd_maxsz(desc); 164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 166 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 167 return (vlenb << emul) / esz; 168 } 169 170 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 171 { 172 return (addr & env->cur_pmmask) | env->cur_pmbase; 173 } 174 175 /* 176 * This function checks watchpoint before real load operation. 177 * 178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 179 * In user mode, there is no watchpoint support now. 180 * 181 * It will trigger an exception if there is no mapping in TLB 182 * and page table walk can't fill the TLB entry. Then the guest 183 * software can return here after process the exception or never return. 184 */ 185 static void probe_pages(CPURISCVState *env, target_ulong addr, 186 target_ulong len, uintptr_t ra, 187 MMUAccessType access_type) 188 { 189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 190 target_ulong curlen = MIN(pagelen, len); 191 192 probe_access(env, adjust_addr(env, addr), curlen, access_type, 193 cpu_mmu_index(env, false), ra); 194 if (len > curlen) { 195 addr += curlen; 196 curlen = len - curlen; 197 probe_access(env, adjust_addr(env, addr), curlen, access_type, 198 cpu_mmu_index(env, false), ra); 199 } 200 } 201 202 /* set agnostic elements to 1s */ 203 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 204 uint32_t tot) 205 { 206 if (is_agnostic == 0) { 207 /* policy undisturbed */ 208 return; 209 } 210 if (tot - cnt == 0) { 211 return; 212 } 213 memset(base + cnt, -1, tot - cnt); 214 } 215 216 static inline void vext_set_elem_mask(void *v0, int index, 217 uint8_t value) 218 { 219 int idx = index / 64; 220 int pos = index % 64; 221 uint64_t old = ((uint64_t *)v0)[idx]; 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 223 } 224 225 /* 226 * Earlier designs (pre-0.9) had a varying number of bits 227 * per mask value (MLEN). In the 0.9 design, MLEN=1. 228 * (Section 4.5) 229 */ 230 static inline int vext_elem_mask(void *v0, int index) 231 { 232 int idx = index / 64; 233 int pos = index % 64; 234 return (((uint64_t *)v0)[idx] >> pos) & 1; 235 } 236 237 /* elements operations for load and store */ 238 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 239 uint32_t idx, void *vd, uintptr_t retaddr); 240 241 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 242 static void NAME(CPURISCVState *env, abi_ptr addr, \ 243 uint32_t idx, void *vd, uintptr_t retaddr)\ 244 { \ 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 247 } \ 248 249 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 250 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 251 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 252 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 253 254 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 255 static void NAME(CPURISCVState *env, abi_ptr addr, \ 256 uint32_t idx, void *vd, uintptr_t retaddr)\ 257 { \ 258 ETYPE data = *((ETYPE *)vd + H(idx)); \ 259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 260 } 261 262 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 263 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 264 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 265 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 266 267 static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl, 268 void *vd, uint32_t desc, uint32_t nf, 269 uint32_t esz, uint32_t max_elems) 270 { 271 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 272 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 273 uint32_t vta = vext_vta(desc); 274 uint32_t registers_used; 275 int k; 276 277 for (k = 0; k < nf; ++k) { 278 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 279 (k * max_elems + max_elems) * esz); 280 } 281 282 if (nf * max_elems % total_elems != 0) { 283 registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 284 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 285 registers_used * vlenb); 286 } 287 } 288 289 /* 290 *** stride: access vector element from strided memory 291 */ 292 static void 293 vext_ldst_stride(void *vd, void *v0, target_ulong base, 294 target_ulong stride, CPURISCVState *env, 295 uint32_t desc, uint32_t vm, 296 vext_ldst_elem_fn *ldst_elem, 297 uint32_t log2_esz, uintptr_t ra) 298 { 299 uint32_t i, k; 300 uint32_t nf = vext_nf(desc); 301 uint32_t max_elems = vext_max_elems(desc, log2_esz); 302 uint32_t esz = 1 << log2_esz; 303 uint32_t vma = vext_vma(desc); 304 305 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 306 k = 0; 307 while (k < nf) { 308 if (!vm && !vext_elem_mask(v0, i)) { 309 /* set masked-off elements to 1s */ 310 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 311 (i + k * max_elems + 1) * esz); 312 k++; 313 continue; 314 } 315 target_ulong addr = base + stride * i + (k << log2_esz); 316 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 317 k++; 318 } 319 } 320 env->vstart = 0; 321 322 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 323 } 324 325 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 326 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 327 target_ulong stride, CPURISCVState *env, \ 328 uint32_t desc) \ 329 { \ 330 uint32_t vm = vext_vm(desc); \ 331 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 332 ctzl(sizeof(ETYPE)), GETPC()); \ 333 } 334 335 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 336 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 337 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 338 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 339 340 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 341 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 342 target_ulong stride, CPURISCVState *env, \ 343 uint32_t desc) \ 344 { \ 345 uint32_t vm = vext_vm(desc); \ 346 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 347 ctzl(sizeof(ETYPE)), GETPC()); \ 348 } 349 350 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 351 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 352 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 353 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 354 355 /* 356 *** unit-stride: access elements stored contiguously in memory 357 */ 358 359 /* unmasked unit-stride load and store operation*/ 360 static void 361 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 362 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 363 uintptr_t ra) 364 { 365 uint32_t i, k; 366 uint32_t nf = vext_nf(desc); 367 uint32_t max_elems = vext_max_elems(desc, log2_esz); 368 uint32_t esz = 1 << log2_esz; 369 370 /* load bytes from guest memory */ 371 for (i = env->vstart; i < evl; i++, env->vstart++) { 372 k = 0; 373 while (k < nf) { 374 target_ulong addr = base + ((i * nf + k) << log2_esz); 375 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 376 k++; 377 } 378 } 379 env->vstart = 0; 380 381 vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems); 382 } 383 384 /* 385 * masked unit-stride load and store operation will be a special case of stride, 386 * stride = NF * sizeof (MTYPE) 387 */ 388 389 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 390 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 391 CPURISCVState *env, uint32_t desc) \ 392 { \ 393 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 394 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 395 ctzl(sizeof(ETYPE)), GETPC()); \ 396 } \ 397 \ 398 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 399 CPURISCVState *env, uint32_t desc) \ 400 { \ 401 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 402 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 403 } 404 405 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 406 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 407 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 408 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 409 410 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 411 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 412 CPURISCVState *env, uint32_t desc) \ 413 { \ 414 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 415 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 416 ctzl(sizeof(ETYPE)), GETPC()); \ 417 } \ 418 \ 419 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 420 CPURISCVState *env, uint32_t desc) \ 421 { \ 422 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 423 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 424 } 425 426 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 427 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 428 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 429 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 430 431 /* 432 *** unit stride mask load and store, EEW = 1 433 */ 434 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 435 CPURISCVState *env, uint32_t desc) 436 { 437 /* evl = ceil(vl/8) */ 438 uint8_t evl = (env->vl + 7) >> 3; 439 vext_ldst_us(vd, base, env, desc, lde_b, 440 0, evl, GETPC()); 441 } 442 443 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 444 CPURISCVState *env, uint32_t desc) 445 { 446 /* evl = ceil(vl/8) */ 447 uint8_t evl = (env->vl + 7) >> 3; 448 vext_ldst_us(vd, base, env, desc, ste_b, 449 0, evl, GETPC()); 450 } 451 452 /* 453 *** index: access vector element from indexed memory 454 */ 455 typedef target_ulong vext_get_index_addr(target_ulong base, 456 uint32_t idx, void *vs2); 457 458 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 459 static target_ulong NAME(target_ulong base, \ 460 uint32_t idx, void *vs2) \ 461 { \ 462 return (base + *((ETYPE *)vs2 + H(idx))); \ 463 } 464 465 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 466 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 467 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 468 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 469 470 static inline void 471 vext_ldst_index(void *vd, void *v0, target_ulong base, 472 void *vs2, CPURISCVState *env, uint32_t desc, 473 vext_get_index_addr get_index_addr, 474 vext_ldst_elem_fn *ldst_elem, 475 uint32_t log2_esz, uintptr_t ra) 476 { 477 uint32_t i, k; 478 uint32_t nf = vext_nf(desc); 479 uint32_t vm = vext_vm(desc); 480 uint32_t max_elems = vext_max_elems(desc, log2_esz); 481 uint32_t esz = 1 << log2_esz; 482 uint32_t vma = vext_vma(desc); 483 484 /* load bytes from guest memory */ 485 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 486 k = 0; 487 while (k < nf) { 488 if (!vm && !vext_elem_mask(v0, i)) { 489 /* set masked-off elements to 1s */ 490 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 491 (i + k * max_elems + 1) * esz); 492 k++; 493 continue; 494 } 495 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 496 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 497 k++; 498 } 499 } 500 env->vstart = 0; 501 502 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 503 } 504 505 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 506 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 507 void *vs2, CPURISCVState *env, uint32_t desc) \ 508 { \ 509 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 510 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 511 } 512 513 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 514 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 515 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 516 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 517 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 518 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 519 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 520 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 521 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 522 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 523 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 524 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 525 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 526 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 527 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 528 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 529 530 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 531 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 532 void *vs2, CPURISCVState *env, uint32_t desc) \ 533 { \ 534 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 535 STORE_FN, ctzl(sizeof(ETYPE)), \ 536 GETPC()); \ 537 } 538 539 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 540 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 541 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 542 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 543 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 544 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 545 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 546 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 547 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 548 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 549 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 550 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 551 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 552 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 553 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 554 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 555 556 /* 557 *** unit-stride fault-only-fisrt load instructions 558 */ 559 static inline void 560 vext_ldff(void *vd, void *v0, target_ulong base, 561 CPURISCVState *env, uint32_t desc, 562 vext_ldst_elem_fn *ldst_elem, 563 uint32_t log2_esz, uintptr_t ra) 564 { 565 void *host; 566 uint32_t i, k, vl = 0; 567 uint32_t nf = vext_nf(desc); 568 uint32_t vm = vext_vm(desc); 569 uint32_t max_elems = vext_max_elems(desc, log2_esz); 570 uint32_t esz = 1 << log2_esz; 571 uint32_t vma = vext_vma(desc); 572 target_ulong addr, offset, remain; 573 574 /* probe every access*/ 575 for (i = env->vstart; i < env->vl; i++) { 576 if (!vm && !vext_elem_mask(v0, i)) { 577 continue; 578 } 579 addr = adjust_addr(env, base + i * (nf << log2_esz)); 580 if (i == 0) { 581 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 582 } else { 583 /* if it triggers an exception, no need to check watchpoint */ 584 remain = nf << log2_esz; 585 while (remain > 0) { 586 offset = -(addr | TARGET_PAGE_MASK); 587 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 588 cpu_mmu_index(env, false)); 589 if (host) { 590 #ifdef CONFIG_USER_ONLY 591 if (page_check_range(addr, offset, PAGE_READ) < 0) { 592 vl = i; 593 goto ProbeSuccess; 594 } 595 #else 596 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 597 #endif 598 } else { 599 vl = i; 600 goto ProbeSuccess; 601 } 602 if (remain <= offset) { 603 break; 604 } 605 remain -= offset; 606 addr = adjust_addr(env, addr + offset); 607 } 608 } 609 } 610 ProbeSuccess: 611 /* load bytes from guest memory */ 612 if (vl != 0) { 613 env->vl = vl; 614 } 615 for (i = env->vstart; i < env->vl; i++) { 616 k = 0; 617 while (k < nf) { 618 if (!vm && !vext_elem_mask(v0, i)) { 619 /* set masked-off elements to 1s */ 620 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 621 (i + k * max_elems + 1) * esz); 622 k++; 623 continue; 624 } 625 target_ulong addr = base + ((i * nf + k) << log2_esz); 626 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 627 k++; 628 } 629 } 630 env->vstart = 0; 631 632 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 633 } 634 635 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 636 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 637 CPURISCVState *env, uint32_t desc) \ 638 { \ 639 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 640 ctzl(sizeof(ETYPE)), GETPC()); \ 641 } 642 643 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 644 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 645 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 646 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 647 648 #define DO_SWAP(N, M) (M) 649 #define DO_AND(N, M) (N & M) 650 #define DO_XOR(N, M) (N ^ M) 651 #define DO_OR(N, M) (N | M) 652 #define DO_ADD(N, M) (N + M) 653 654 /* Signed min/max */ 655 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 656 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 657 658 /* Unsigned min/max */ 659 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 660 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 661 662 /* 663 *** load and store whole register instructions 664 */ 665 static void 666 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 667 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 668 { 669 uint32_t i, k, off, pos; 670 uint32_t nf = vext_nf(desc); 671 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 672 uint32_t max_elems = vlenb >> log2_esz; 673 674 k = env->vstart / max_elems; 675 off = env->vstart % max_elems; 676 677 if (off) { 678 /* load/store rest of elements of current segment pointed by vstart */ 679 for (pos = off; pos < max_elems; pos++, env->vstart++) { 680 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 681 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 682 } 683 k++; 684 } 685 686 /* load/store elements for rest of segments */ 687 for (; k < nf; k++) { 688 for (i = 0; i < max_elems; i++, env->vstart++) { 689 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 690 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 691 } 692 } 693 694 env->vstart = 0; 695 } 696 697 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 698 void HELPER(NAME)(void *vd, target_ulong base, \ 699 CPURISCVState *env, uint32_t desc) \ 700 { \ 701 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 702 ctzl(sizeof(ETYPE)), GETPC()); \ 703 } 704 705 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 706 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 707 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 708 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 709 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 710 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 711 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 712 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 713 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 714 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 715 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 716 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 717 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 718 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 719 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 720 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 721 722 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 723 void HELPER(NAME)(void *vd, target_ulong base, \ 724 CPURISCVState *env, uint32_t desc) \ 725 { \ 726 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 727 ctzl(sizeof(ETYPE)), GETPC()); \ 728 } 729 730 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 731 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 732 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 733 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 734 735 /* 736 *** Vector Integer Arithmetic Instructions 737 */ 738 739 /* expand macro args before macro */ 740 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 741 742 /* (TD, T1, T2, TX1, TX2) */ 743 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 744 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 745 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 746 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 747 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 748 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 749 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 750 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 751 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 752 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 753 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 754 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 755 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 756 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 757 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 758 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 759 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 760 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 761 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 762 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 763 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 764 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 765 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 766 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 767 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 768 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 769 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 770 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 771 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 772 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 773 774 /* operation of two vector elements */ 775 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 776 777 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 778 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 779 { \ 780 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 781 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 782 *((TD *)vd + HD(i)) = OP(s2, s1); \ 783 } 784 #define DO_SUB(N, M) (N - M) 785 #define DO_RSUB(N, M) (M - N) 786 787 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 788 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 789 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 790 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 791 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 792 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 793 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 794 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 795 796 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 797 CPURISCVState *env, uint32_t desc, 798 opivv2_fn *fn, uint32_t esz) 799 { 800 uint32_t vm = vext_vm(desc); 801 uint32_t vl = env->vl; 802 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 803 uint32_t vta = vext_vta(desc); 804 uint32_t vma = vext_vma(desc); 805 uint32_t i; 806 807 for (i = env->vstart; i < vl; i++) { 808 if (!vm && !vext_elem_mask(v0, i)) { 809 /* set masked-off elements to 1s */ 810 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 811 continue; 812 } 813 fn(vd, vs1, vs2, i); 814 } 815 env->vstart = 0; 816 /* set tail elements to 1s */ 817 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 818 } 819 820 /* generate the helpers for OPIVV */ 821 #define GEN_VEXT_VV(NAME, ESZ) \ 822 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 823 void *vs2, CPURISCVState *env, \ 824 uint32_t desc) \ 825 { \ 826 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 827 do_##NAME, ESZ); \ 828 } 829 830 GEN_VEXT_VV(vadd_vv_b, 1) 831 GEN_VEXT_VV(vadd_vv_h, 2) 832 GEN_VEXT_VV(vadd_vv_w, 4) 833 GEN_VEXT_VV(vadd_vv_d, 8) 834 GEN_VEXT_VV(vsub_vv_b, 1) 835 GEN_VEXT_VV(vsub_vv_h, 2) 836 GEN_VEXT_VV(vsub_vv_w, 4) 837 GEN_VEXT_VV(vsub_vv_d, 8) 838 839 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 840 841 /* 842 * (T1)s1 gives the real operator type. 843 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 844 */ 845 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 846 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 847 { \ 848 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 849 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 850 } 851 852 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 853 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 854 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 855 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 856 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 857 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 858 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 859 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 860 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 861 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 862 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 863 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 864 865 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 866 CPURISCVState *env, uint32_t desc, 867 opivx2_fn fn, uint32_t esz) 868 { 869 uint32_t vm = vext_vm(desc); 870 uint32_t vl = env->vl; 871 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 872 uint32_t vta = vext_vta(desc); 873 uint32_t vma = vext_vma(desc); 874 uint32_t i; 875 876 for (i = env->vstart; i < vl; i++) { 877 if (!vm && !vext_elem_mask(v0, i)) { 878 /* set masked-off elements to 1s */ 879 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 880 continue; 881 } 882 fn(vd, s1, vs2, i); 883 } 884 env->vstart = 0; 885 /* set tail elements to 1s */ 886 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 887 } 888 889 /* generate the helpers for OPIVX */ 890 #define GEN_VEXT_VX(NAME, ESZ) \ 891 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 892 void *vs2, CPURISCVState *env, \ 893 uint32_t desc) \ 894 { \ 895 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 896 do_##NAME, ESZ); \ 897 } 898 899 GEN_VEXT_VX(vadd_vx_b, 1) 900 GEN_VEXT_VX(vadd_vx_h, 2) 901 GEN_VEXT_VX(vadd_vx_w, 4) 902 GEN_VEXT_VX(vadd_vx_d, 8) 903 GEN_VEXT_VX(vsub_vx_b, 1) 904 GEN_VEXT_VX(vsub_vx_h, 2) 905 GEN_VEXT_VX(vsub_vx_w, 4) 906 GEN_VEXT_VX(vsub_vx_d, 8) 907 GEN_VEXT_VX(vrsub_vx_b, 1) 908 GEN_VEXT_VX(vrsub_vx_h, 2) 909 GEN_VEXT_VX(vrsub_vx_w, 4) 910 GEN_VEXT_VX(vrsub_vx_d, 8) 911 912 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 913 { 914 intptr_t oprsz = simd_oprsz(desc); 915 intptr_t i; 916 917 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 918 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 919 } 920 } 921 922 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 923 { 924 intptr_t oprsz = simd_oprsz(desc); 925 intptr_t i; 926 927 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 928 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 929 } 930 } 931 932 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 933 { 934 intptr_t oprsz = simd_oprsz(desc); 935 intptr_t i; 936 937 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 938 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 939 } 940 } 941 942 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 943 { 944 intptr_t oprsz = simd_oprsz(desc); 945 intptr_t i; 946 947 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 948 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 949 } 950 } 951 952 /* Vector Widening Integer Add/Subtract */ 953 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 954 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 955 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 956 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 957 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 958 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 959 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 960 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 961 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 962 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 963 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 964 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 965 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 966 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 967 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 968 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 969 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 970 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 971 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 972 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 973 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 974 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 975 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 976 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 977 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 978 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 979 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 980 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 981 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 982 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 983 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 984 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 985 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 986 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 987 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 988 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 989 GEN_VEXT_VV(vwaddu_vv_b, 2) 990 GEN_VEXT_VV(vwaddu_vv_h, 4) 991 GEN_VEXT_VV(vwaddu_vv_w, 8) 992 GEN_VEXT_VV(vwsubu_vv_b, 2) 993 GEN_VEXT_VV(vwsubu_vv_h, 4) 994 GEN_VEXT_VV(vwsubu_vv_w, 8) 995 GEN_VEXT_VV(vwadd_vv_b, 2) 996 GEN_VEXT_VV(vwadd_vv_h, 4) 997 GEN_VEXT_VV(vwadd_vv_w, 8) 998 GEN_VEXT_VV(vwsub_vv_b, 2) 999 GEN_VEXT_VV(vwsub_vv_h, 4) 1000 GEN_VEXT_VV(vwsub_vv_w, 8) 1001 GEN_VEXT_VV(vwaddu_wv_b, 2) 1002 GEN_VEXT_VV(vwaddu_wv_h, 4) 1003 GEN_VEXT_VV(vwaddu_wv_w, 8) 1004 GEN_VEXT_VV(vwsubu_wv_b, 2) 1005 GEN_VEXT_VV(vwsubu_wv_h, 4) 1006 GEN_VEXT_VV(vwsubu_wv_w, 8) 1007 GEN_VEXT_VV(vwadd_wv_b, 2) 1008 GEN_VEXT_VV(vwadd_wv_h, 4) 1009 GEN_VEXT_VV(vwadd_wv_w, 8) 1010 GEN_VEXT_VV(vwsub_wv_b, 2) 1011 GEN_VEXT_VV(vwsub_wv_h, 4) 1012 GEN_VEXT_VV(vwsub_wv_w, 8) 1013 1014 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1015 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1016 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1017 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1018 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1019 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1020 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1021 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1022 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1023 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1024 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1025 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1026 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1027 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1028 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1029 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1030 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1031 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1032 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1033 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1034 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1035 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1036 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1037 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1038 GEN_VEXT_VX(vwaddu_vx_b, 2) 1039 GEN_VEXT_VX(vwaddu_vx_h, 4) 1040 GEN_VEXT_VX(vwaddu_vx_w, 8) 1041 GEN_VEXT_VX(vwsubu_vx_b, 2) 1042 GEN_VEXT_VX(vwsubu_vx_h, 4) 1043 GEN_VEXT_VX(vwsubu_vx_w, 8) 1044 GEN_VEXT_VX(vwadd_vx_b, 2) 1045 GEN_VEXT_VX(vwadd_vx_h, 4) 1046 GEN_VEXT_VX(vwadd_vx_w, 8) 1047 GEN_VEXT_VX(vwsub_vx_b, 2) 1048 GEN_VEXT_VX(vwsub_vx_h, 4) 1049 GEN_VEXT_VX(vwsub_vx_w, 8) 1050 GEN_VEXT_VX(vwaddu_wx_b, 2) 1051 GEN_VEXT_VX(vwaddu_wx_h, 4) 1052 GEN_VEXT_VX(vwaddu_wx_w, 8) 1053 GEN_VEXT_VX(vwsubu_wx_b, 2) 1054 GEN_VEXT_VX(vwsubu_wx_h, 4) 1055 GEN_VEXT_VX(vwsubu_wx_w, 8) 1056 GEN_VEXT_VX(vwadd_wx_b, 2) 1057 GEN_VEXT_VX(vwadd_wx_h, 4) 1058 GEN_VEXT_VX(vwadd_wx_w, 8) 1059 GEN_VEXT_VX(vwsub_wx_b, 2) 1060 GEN_VEXT_VX(vwsub_wx_h, 4) 1061 GEN_VEXT_VX(vwsub_wx_w, 8) 1062 1063 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1064 #define DO_VADC(N, M, C) (N + M + C) 1065 #define DO_VSBC(N, M, C) (N - M - C) 1066 1067 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1068 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1069 CPURISCVState *env, uint32_t desc) \ 1070 { \ 1071 uint32_t vl = env->vl; \ 1072 uint32_t esz = sizeof(ETYPE); \ 1073 uint32_t total_elems = \ 1074 vext_get_total_elems(env, desc, esz); \ 1075 uint32_t vta = vext_vta(desc); \ 1076 uint32_t i; \ 1077 \ 1078 for (i = env->vstart; i < vl; i++) { \ 1079 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1080 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1081 ETYPE carry = vext_elem_mask(v0, i); \ 1082 \ 1083 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1084 } \ 1085 env->vstart = 0; \ 1086 /* set tail elements to 1s */ \ 1087 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1088 } 1089 1090 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1091 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1092 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1093 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1094 1095 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1096 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1097 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1098 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1099 1100 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1101 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1102 CPURISCVState *env, uint32_t desc) \ 1103 { \ 1104 uint32_t vl = env->vl; \ 1105 uint32_t esz = sizeof(ETYPE); \ 1106 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1107 uint32_t vta = vext_vta(desc); \ 1108 uint32_t i; \ 1109 \ 1110 for (i = env->vstart; i < vl; i++) { \ 1111 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1112 ETYPE carry = vext_elem_mask(v0, i); \ 1113 \ 1114 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1115 } \ 1116 env->vstart = 0; \ 1117 /* set tail elements to 1s */ \ 1118 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1119 } 1120 1121 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1122 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1123 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1124 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1125 1126 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1127 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1128 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1129 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1130 1131 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1132 (__typeof(N))(N + M) < N) 1133 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1134 1135 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1136 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1137 CPURISCVState *env, uint32_t desc) \ 1138 { \ 1139 uint32_t vl = env->vl; \ 1140 uint32_t vm = vext_vm(desc); \ 1141 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1142 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1143 uint32_t i; \ 1144 \ 1145 for (i = env->vstart; i < vl; i++) { \ 1146 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1147 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1148 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1149 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1150 } \ 1151 env->vstart = 0; \ 1152 /* mask destination register are always tail-agnostic */ \ 1153 /* set tail elements to 1s */ \ 1154 if (vta_all_1s) { \ 1155 for (; i < total_elems; i++) { \ 1156 vext_set_elem_mask(vd, i, 1); \ 1157 } \ 1158 } \ 1159 } 1160 1161 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1162 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1163 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1164 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1165 1166 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1167 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1168 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1169 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1170 1171 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1172 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1173 void *vs2, CPURISCVState *env, uint32_t desc) \ 1174 { \ 1175 uint32_t vl = env->vl; \ 1176 uint32_t vm = vext_vm(desc); \ 1177 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1178 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1179 uint32_t i; \ 1180 \ 1181 for (i = env->vstart; i < vl; i++) { \ 1182 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1183 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1184 vext_set_elem_mask(vd, i, \ 1185 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1186 } \ 1187 env->vstart = 0; \ 1188 /* mask destination register are always tail-agnostic */ \ 1189 /* set tail elements to 1s */ \ 1190 if (vta_all_1s) { \ 1191 for (; i < total_elems; i++) { \ 1192 vext_set_elem_mask(vd, i, 1); \ 1193 } \ 1194 } \ 1195 } 1196 1197 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1198 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1199 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1200 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1201 1202 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1203 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1204 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1205 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1206 1207 /* Vector Bitwise Logical Instructions */ 1208 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1209 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1210 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1211 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1212 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1213 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1214 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1215 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1216 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1217 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1218 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1219 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1220 GEN_VEXT_VV(vand_vv_b, 1) 1221 GEN_VEXT_VV(vand_vv_h, 2) 1222 GEN_VEXT_VV(vand_vv_w, 4) 1223 GEN_VEXT_VV(vand_vv_d, 8) 1224 GEN_VEXT_VV(vor_vv_b, 1) 1225 GEN_VEXT_VV(vor_vv_h, 2) 1226 GEN_VEXT_VV(vor_vv_w, 4) 1227 GEN_VEXT_VV(vor_vv_d, 8) 1228 GEN_VEXT_VV(vxor_vv_b, 1) 1229 GEN_VEXT_VV(vxor_vv_h, 2) 1230 GEN_VEXT_VV(vxor_vv_w, 4) 1231 GEN_VEXT_VV(vxor_vv_d, 8) 1232 1233 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1234 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1235 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1236 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1237 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1238 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1239 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1240 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1241 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1242 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1243 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1244 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1245 GEN_VEXT_VX(vand_vx_b, 1) 1246 GEN_VEXT_VX(vand_vx_h, 2) 1247 GEN_VEXT_VX(vand_vx_w, 4) 1248 GEN_VEXT_VX(vand_vx_d, 8) 1249 GEN_VEXT_VX(vor_vx_b, 1) 1250 GEN_VEXT_VX(vor_vx_h, 2) 1251 GEN_VEXT_VX(vor_vx_w, 4) 1252 GEN_VEXT_VX(vor_vx_d, 8) 1253 GEN_VEXT_VX(vxor_vx_b, 1) 1254 GEN_VEXT_VX(vxor_vx_h, 2) 1255 GEN_VEXT_VX(vxor_vx_w, 4) 1256 GEN_VEXT_VX(vxor_vx_d, 8) 1257 1258 /* Vector Single-Width Bit Shift Instructions */ 1259 #define DO_SLL(N, M) (N << (M)) 1260 #define DO_SRL(N, M) (N >> (M)) 1261 1262 /* generate the helpers for shift instructions with two vector operators */ 1263 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1264 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1265 void *vs2, CPURISCVState *env, uint32_t desc) \ 1266 { \ 1267 uint32_t vm = vext_vm(desc); \ 1268 uint32_t vl = env->vl; \ 1269 uint32_t esz = sizeof(TS1); \ 1270 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1271 uint32_t vta = vext_vta(desc); \ 1272 uint32_t vma = vext_vma(desc); \ 1273 uint32_t i; \ 1274 \ 1275 for (i = env->vstart; i < vl; i++) { \ 1276 if (!vm && !vext_elem_mask(v0, i)) { \ 1277 /* set masked-off elements to 1s */ \ 1278 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1279 continue; \ 1280 } \ 1281 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1282 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1283 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1284 } \ 1285 env->vstart = 0; \ 1286 /* set tail elements to 1s */ \ 1287 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1288 } 1289 1290 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1291 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1292 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1293 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1294 1295 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1296 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1297 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1298 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1299 1300 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1301 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1302 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1303 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1304 1305 /* generate the helpers for shift instructions with one vector and one scalar */ 1306 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1307 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1308 void *vs2, CPURISCVState *env, \ 1309 uint32_t desc) \ 1310 { \ 1311 uint32_t vm = vext_vm(desc); \ 1312 uint32_t vl = env->vl; \ 1313 uint32_t esz = sizeof(TD); \ 1314 uint32_t total_elems = \ 1315 vext_get_total_elems(env, desc, esz); \ 1316 uint32_t vta = vext_vta(desc); \ 1317 uint32_t vma = vext_vma(desc); \ 1318 uint32_t i; \ 1319 \ 1320 for (i = env->vstart; i < vl; i++) { \ 1321 if (!vm && !vext_elem_mask(v0, i)) { \ 1322 /* set masked-off elements to 1s */ \ 1323 vext_set_elems_1s(vd, vma, i * esz, \ 1324 (i + 1) * esz); \ 1325 continue; \ 1326 } \ 1327 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1328 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1329 } \ 1330 env->vstart = 0; \ 1331 /* set tail elements to 1s */ \ 1332 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1333 } 1334 1335 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1336 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1337 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1338 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1339 1340 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1341 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1342 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1343 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1344 1345 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1346 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1347 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1348 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1349 1350 /* Vector Narrowing Integer Right Shift Instructions */ 1351 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1352 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1353 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1354 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1355 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1356 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1357 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1358 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1359 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1360 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1361 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1362 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1363 1364 /* Vector Integer Comparison Instructions */ 1365 #define DO_MSEQ(N, M) (N == M) 1366 #define DO_MSNE(N, M) (N != M) 1367 #define DO_MSLT(N, M) (N < M) 1368 #define DO_MSLE(N, M) (N <= M) 1369 #define DO_MSGT(N, M) (N > M) 1370 1371 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1372 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1373 CPURISCVState *env, uint32_t desc) \ 1374 { \ 1375 uint32_t vm = vext_vm(desc); \ 1376 uint32_t vl = env->vl; \ 1377 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1378 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1379 uint32_t vma = vext_vma(desc); \ 1380 uint32_t i; \ 1381 \ 1382 for (i = env->vstart; i < vl; i++) { \ 1383 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1384 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1385 if (!vm && !vext_elem_mask(v0, i)) { \ 1386 /* set masked-off elements to 1s */ \ 1387 if (vma) { \ 1388 vext_set_elem_mask(vd, i, 1); \ 1389 } \ 1390 continue; \ 1391 } \ 1392 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1393 } \ 1394 env->vstart = 0; \ 1395 /* mask destination register are always tail-agnostic */ \ 1396 /* set tail elements to 1s */ \ 1397 if (vta_all_1s) { \ 1398 for (; i < total_elems; i++) { \ 1399 vext_set_elem_mask(vd, i, 1); \ 1400 } \ 1401 } \ 1402 } 1403 1404 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1405 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1406 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1407 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1408 1409 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1410 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1411 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1412 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1413 1414 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1415 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1416 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1417 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1418 1419 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1420 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1421 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1422 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1423 1424 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1425 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1426 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1427 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1428 1429 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1430 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1431 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1432 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1433 1434 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1435 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1436 CPURISCVState *env, uint32_t desc) \ 1437 { \ 1438 uint32_t vm = vext_vm(desc); \ 1439 uint32_t vl = env->vl; \ 1440 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1441 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1442 uint32_t vma = vext_vma(desc); \ 1443 uint32_t i; \ 1444 \ 1445 for (i = env->vstart; i < vl; i++) { \ 1446 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1447 if (!vm && !vext_elem_mask(v0, i)) { \ 1448 /* set masked-off elements to 1s */ \ 1449 if (vma) { \ 1450 vext_set_elem_mask(vd, i, 1); \ 1451 } \ 1452 continue; \ 1453 } \ 1454 vext_set_elem_mask(vd, i, \ 1455 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1456 } \ 1457 env->vstart = 0; \ 1458 /* mask destination register are always tail-agnostic */ \ 1459 /* set tail elements to 1s */ \ 1460 if (vta_all_1s) { \ 1461 for (; i < total_elems; i++) { \ 1462 vext_set_elem_mask(vd, i, 1); \ 1463 } \ 1464 } \ 1465 } 1466 1467 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1468 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1469 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1470 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1471 1472 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1473 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1474 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1475 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1476 1477 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1478 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1479 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1480 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1481 1482 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1483 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1484 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1485 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1486 1487 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1488 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1489 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1490 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1491 1492 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1493 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1494 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1495 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1496 1497 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1498 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1499 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1500 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1501 1502 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1503 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1504 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1505 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1506 1507 /* Vector Integer Min/Max Instructions */ 1508 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1509 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1510 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1511 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1512 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1513 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1514 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1515 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1516 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1517 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1518 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1519 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1520 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1521 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1522 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1523 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1524 GEN_VEXT_VV(vminu_vv_b, 1) 1525 GEN_VEXT_VV(vminu_vv_h, 2) 1526 GEN_VEXT_VV(vminu_vv_w, 4) 1527 GEN_VEXT_VV(vminu_vv_d, 8) 1528 GEN_VEXT_VV(vmin_vv_b, 1) 1529 GEN_VEXT_VV(vmin_vv_h, 2) 1530 GEN_VEXT_VV(vmin_vv_w, 4) 1531 GEN_VEXT_VV(vmin_vv_d, 8) 1532 GEN_VEXT_VV(vmaxu_vv_b, 1) 1533 GEN_VEXT_VV(vmaxu_vv_h, 2) 1534 GEN_VEXT_VV(vmaxu_vv_w, 4) 1535 GEN_VEXT_VV(vmaxu_vv_d, 8) 1536 GEN_VEXT_VV(vmax_vv_b, 1) 1537 GEN_VEXT_VV(vmax_vv_h, 2) 1538 GEN_VEXT_VV(vmax_vv_w, 4) 1539 GEN_VEXT_VV(vmax_vv_d, 8) 1540 1541 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1542 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1543 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1544 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1545 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1546 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1547 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1548 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1549 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1550 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1551 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1552 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1553 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1554 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1555 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1556 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1557 GEN_VEXT_VX(vminu_vx_b, 1) 1558 GEN_VEXT_VX(vminu_vx_h, 2) 1559 GEN_VEXT_VX(vminu_vx_w, 4) 1560 GEN_VEXT_VX(vminu_vx_d, 8) 1561 GEN_VEXT_VX(vmin_vx_b, 1) 1562 GEN_VEXT_VX(vmin_vx_h, 2) 1563 GEN_VEXT_VX(vmin_vx_w, 4) 1564 GEN_VEXT_VX(vmin_vx_d, 8) 1565 GEN_VEXT_VX(vmaxu_vx_b, 1) 1566 GEN_VEXT_VX(vmaxu_vx_h, 2) 1567 GEN_VEXT_VX(vmaxu_vx_w, 4) 1568 GEN_VEXT_VX(vmaxu_vx_d, 8) 1569 GEN_VEXT_VX(vmax_vx_b, 1) 1570 GEN_VEXT_VX(vmax_vx_h, 2) 1571 GEN_VEXT_VX(vmax_vx_w, 4) 1572 GEN_VEXT_VX(vmax_vx_d, 8) 1573 1574 /* Vector Single-Width Integer Multiply Instructions */ 1575 #define DO_MUL(N, M) (N * M) 1576 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1577 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1578 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1579 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1580 GEN_VEXT_VV(vmul_vv_b, 1) 1581 GEN_VEXT_VV(vmul_vv_h, 2) 1582 GEN_VEXT_VV(vmul_vv_w, 4) 1583 GEN_VEXT_VV(vmul_vv_d, 8) 1584 1585 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1586 { 1587 return (int16_t)s2 * (int16_t)s1 >> 8; 1588 } 1589 1590 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1591 { 1592 return (int32_t)s2 * (int32_t)s1 >> 16; 1593 } 1594 1595 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1596 { 1597 return (int64_t)s2 * (int64_t)s1 >> 32; 1598 } 1599 1600 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1601 { 1602 uint64_t hi_64, lo_64; 1603 1604 muls64(&lo_64, &hi_64, s1, s2); 1605 return hi_64; 1606 } 1607 1608 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1609 { 1610 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1611 } 1612 1613 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1614 { 1615 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1616 } 1617 1618 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1619 { 1620 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1621 } 1622 1623 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1624 { 1625 uint64_t hi_64, lo_64; 1626 1627 mulu64(&lo_64, &hi_64, s2, s1); 1628 return hi_64; 1629 } 1630 1631 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1632 { 1633 return (int16_t)s2 * (uint16_t)s1 >> 8; 1634 } 1635 1636 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1637 { 1638 return (int32_t)s2 * (uint32_t)s1 >> 16; 1639 } 1640 1641 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1642 { 1643 return (int64_t)s2 * (uint64_t)s1 >> 32; 1644 } 1645 1646 /* 1647 * Let A = signed operand, 1648 * B = unsigned operand 1649 * P = mulu64(A, B), unsigned product 1650 * 1651 * LET X = 2 ** 64 - A, 2's complement of A 1652 * SP = signed product 1653 * THEN 1654 * IF A < 0 1655 * SP = -X * B 1656 * = -(2 ** 64 - A) * B 1657 * = A * B - 2 ** 64 * B 1658 * = P - 2 ** 64 * B 1659 * ELSE 1660 * SP = P 1661 * THEN 1662 * HI_P -= (A < 0 ? B : 0) 1663 */ 1664 1665 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1666 { 1667 uint64_t hi_64, lo_64; 1668 1669 mulu64(&lo_64, &hi_64, s2, s1); 1670 1671 hi_64 -= s2 < 0 ? s1 : 0; 1672 return hi_64; 1673 } 1674 1675 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1676 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1677 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1678 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1679 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1680 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1681 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1682 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1683 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1684 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1685 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1686 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1687 GEN_VEXT_VV(vmulh_vv_b, 1) 1688 GEN_VEXT_VV(vmulh_vv_h, 2) 1689 GEN_VEXT_VV(vmulh_vv_w, 4) 1690 GEN_VEXT_VV(vmulh_vv_d, 8) 1691 GEN_VEXT_VV(vmulhu_vv_b, 1) 1692 GEN_VEXT_VV(vmulhu_vv_h, 2) 1693 GEN_VEXT_VV(vmulhu_vv_w, 4) 1694 GEN_VEXT_VV(vmulhu_vv_d, 8) 1695 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1696 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1697 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1698 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1699 1700 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1701 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1702 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1703 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1704 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1705 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1706 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1707 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1708 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1709 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1710 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1711 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1712 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1713 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1714 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1715 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1716 GEN_VEXT_VX(vmul_vx_b, 1) 1717 GEN_VEXT_VX(vmul_vx_h, 2) 1718 GEN_VEXT_VX(vmul_vx_w, 4) 1719 GEN_VEXT_VX(vmul_vx_d, 8) 1720 GEN_VEXT_VX(vmulh_vx_b, 1) 1721 GEN_VEXT_VX(vmulh_vx_h, 2) 1722 GEN_VEXT_VX(vmulh_vx_w, 4) 1723 GEN_VEXT_VX(vmulh_vx_d, 8) 1724 GEN_VEXT_VX(vmulhu_vx_b, 1) 1725 GEN_VEXT_VX(vmulhu_vx_h, 2) 1726 GEN_VEXT_VX(vmulhu_vx_w, 4) 1727 GEN_VEXT_VX(vmulhu_vx_d, 8) 1728 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1729 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1730 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1731 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1732 1733 /* Vector Integer Divide Instructions */ 1734 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1735 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1736 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1737 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1738 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1739 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1740 1741 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1742 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1743 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1744 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1745 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1746 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1747 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1748 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1749 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1750 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1751 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1752 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1753 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1754 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1755 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1756 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1757 GEN_VEXT_VV(vdivu_vv_b, 1) 1758 GEN_VEXT_VV(vdivu_vv_h, 2) 1759 GEN_VEXT_VV(vdivu_vv_w, 4) 1760 GEN_VEXT_VV(vdivu_vv_d, 8) 1761 GEN_VEXT_VV(vdiv_vv_b, 1) 1762 GEN_VEXT_VV(vdiv_vv_h, 2) 1763 GEN_VEXT_VV(vdiv_vv_w, 4) 1764 GEN_VEXT_VV(vdiv_vv_d, 8) 1765 GEN_VEXT_VV(vremu_vv_b, 1) 1766 GEN_VEXT_VV(vremu_vv_h, 2) 1767 GEN_VEXT_VV(vremu_vv_w, 4) 1768 GEN_VEXT_VV(vremu_vv_d, 8) 1769 GEN_VEXT_VV(vrem_vv_b, 1) 1770 GEN_VEXT_VV(vrem_vv_h, 2) 1771 GEN_VEXT_VV(vrem_vv_w, 4) 1772 GEN_VEXT_VV(vrem_vv_d, 8) 1773 1774 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1775 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1776 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1777 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1778 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1779 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1780 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1781 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1782 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1783 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1784 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1785 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1786 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1787 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1788 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1789 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1790 GEN_VEXT_VX(vdivu_vx_b, 1) 1791 GEN_VEXT_VX(vdivu_vx_h, 2) 1792 GEN_VEXT_VX(vdivu_vx_w, 4) 1793 GEN_VEXT_VX(vdivu_vx_d, 8) 1794 GEN_VEXT_VX(vdiv_vx_b, 1) 1795 GEN_VEXT_VX(vdiv_vx_h, 2) 1796 GEN_VEXT_VX(vdiv_vx_w, 4) 1797 GEN_VEXT_VX(vdiv_vx_d, 8) 1798 GEN_VEXT_VX(vremu_vx_b, 1) 1799 GEN_VEXT_VX(vremu_vx_h, 2) 1800 GEN_VEXT_VX(vremu_vx_w, 4) 1801 GEN_VEXT_VX(vremu_vx_d, 8) 1802 GEN_VEXT_VX(vrem_vx_b, 1) 1803 GEN_VEXT_VX(vrem_vx_h, 2) 1804 GEN_VEXT_VX(vrem_vx_w, 4) 1805 GEN_VEXT_VX(vrem_vx_d, 8) 1806 1807 /* Vector Widening Integer Multiply Instructions */ 1808 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1809 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1810 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1811 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1812 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1813 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1814 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1815 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1816 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1817 GEN_VEXT_VV(vwmul_vv_b, 2) 1818 GEN_VEXT_VV(vwmul_vv_h, 4) 1819 GEN_VEXT_VV(vwmul_vv_w, 8) 1820 GEN_VEXT_VV(vwmulu_vv_b, 2) 1821 GEN_VEXT_VV(vwmulu_vv_h, 4) 1822 GEN_VEXT_VV(vwmulu_vv_w, 8) 1823 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1824 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1825 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1826 1827 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1828 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1829 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1830 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1831 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1832 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1833 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1834 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1835 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1836 GEN_VEXT_VX(vwmul_vx_b, 2) 1837 GEN_VEXT_VX(vwmul_vx_h, 4) 1838 GEN_VEXT_VX(vwmul_vx_w, 8) 1839 GEN_VEXT_VX(vwmulu_vx_b, 2) 1840 GEN_VEXT_VX(vwmulu_vx_h, 4) 1841 GEN_VEXT_VX(vwmulu_vx_w, 8) 1842 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1843 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1844 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1845 1846 /* Vector Single-Width Integer Multiply-Add Instructions */ 1847 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1848 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1849 { \ 1850 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1851 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1852 TD d = *((TD *)vd + HD(i)); \ 1853 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1854 } 1855 1856 #define DO_MACC(N, M, D) (M * N + D) 1857 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1858 #define DO_MADD(N, M, D) (M * D + N) 1859 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1860 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1861 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1862 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1863 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1864 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1865 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1866 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1867 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1868 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1869 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1870 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1871 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1872 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1873 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1874 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1875 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1876 GEN_VEXT_VV(vmacc_vv_b, 1) 1877 GEN_VEXT_VV(vmacc_vv_h, 2) 1878 GEN_VEXT_VV(vmacc_vv_w, 4) 1879 GEN_VEXT_VV(vmacc_vv_d, 8) 1880 GEN_VEXT_VV(vnmsac_vv_b, 1) 1881 GEN_VEXT_VV(vnmsac_vv_h, 2) 1882 GEN_VEXT_VV(vnmsac_vv_w, 4) 1883 GEN_VEXT_VV(vnmsac_vv_d, 8) 1884 GEN_VEXT_VV(vmadd_vv_b, 1) 1885 GEN_VEXT_VV(vmadd_vv_h, 2) 1886 GEN_VEXT_VV(vmadd_vv_w, 4) 1887 GEN_VEXT_VV(vmadd_vv_d, 8) 1888 GEN_VEXT_VV(vnmsub_vv_b, 1) 1889 GEN_VEXT_VV(vnmsub_vv_h, 2) 1890 GEN_VEXT_VV(vnmsub_vv_w, 4) 1891 GEN_VEXT_VV(vnmsub_vv_d, 8) 1892 1893 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1894 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1895 { \ 1896 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1897 TD d = *((TD *)vd + HD(i)); \ 1898 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1899 } 1900 1901 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1902 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1903 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1904 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1905 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1906 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1907 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1908 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1909 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1910 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1911 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1912 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1913 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1914 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1915 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1916 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1917 GEN_VEXT_VX(vmacc_vx_b, 1) 1918 GEN_VEXT_VX(vmacc_vx_h, 2) 1919 GEN_VEXT_VX(vmacc_vx_w, 4) 1920 GEN_VEXT_VX(vmacc_vx_d, 8) 1921 GEN_VEXT_VX(vnmsac_vx_b, 1) 1922 GEN_VEXT_VX(vnmsac_vx_h, 2) 1923 GEN_VEXT_VX(vnmsac_vx_w, 4) 1924 GEN_VEXT_VX(vnmsac_vx_d, 8) 1925 GEN_VEXT_VX(vmadd_vx_b, 1) 1926 GEN_VEXT_VX(vmadd_vx_h, 2) 1927 GEN_VEXT_VX(vmadd_vx_w, 4) 1928 GEN_VEXT_VX(vmadd_vx_d, 8) 1929 GEN_VEXT_VX(vnmsub_vx_b, 1) 1930 GEN_VEXT_VX(vnmsub_vx_h, 2) 1931 GEN_VEXT_VX(vnmsub_vx_w, 4) 1932 GEN_VEXT_VX(vnmsub_vx_d, 8) 1933 1934 /* Vector Widening Integer Multiply-Add Instructions */ 1935 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1936 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1937 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1938 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1939 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1940 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1941 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1942 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1943 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1944 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1945 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1946 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1947 GEN_VEXT_VV(vwmacc_vv_b, 2) 1948 GEN_VEXT_VV(vwmacc_vv_h, 4) 1949 GEN_VEXT_VV(vwmacc_vv_w, 8) 1950 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1951 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1952 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1953 1954 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1955 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1956 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1957 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1958 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1959 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1960 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1961 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1962 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1963 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1964 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1965 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1966 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1967 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1968 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1969 GEN_VEXT_VX(vwmacc_vx_b, 2) 1970 GEN_VEXT_VX(vwmacc_vx_h, 4) 1971 GEN_VEXT_VX(vwmacc_vx_w, 8) 1972 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1973 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1974 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1975 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1976 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1977 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1978 1979 /* Vector Integer Merge and Move Instructions */ 1980 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1981 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1982 uint32_t desc) \ 1983 { \ 1984 uint32_t vl = env->vl; \ 1985 uint32_t esz = sizeof(ETYPE); \ 1986 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1987 uint32_t vta = vext_vta(desc); \ 1988 uint32_t i; \ 1989 \ 1990 for (i = env->vstart; i < vl; i++) { \ 1991 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1992 *((ETYPE *)vd + H(i)) = s1; \ 1993 } \ 1994 env->vstart = 0; \ 1995 /* set tail elements to 1s */ \ 1996 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1997 } 1998 1999 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2000 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2001 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2002 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2003 2004 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2005 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2006 uint32_t desc) \ 2007 { \ 2008 uint32_t vl = env->vl; \ 2009 uint32_t esz = sizeof(ETYPE); \ 2010 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2011 uint32_t vta = vext_vta(desc); \ 2012 uint32_t i; \ 2013 \ 2014 for (i = env->vstart; i < vl; i++) { \ 2015 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2016 } \ 2017 env->vstart = 0; \ 2018 /* set tail elements to 1s */ \ 2019 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2020 } 2021 2022 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2023 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2024 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2025 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2026 2027 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2028 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2029 CPURISCVState *env, uint32_t desc) \ 2030 { \ 2031 uint32_t vl = env->vl; \ 2032 uint32_t esz = sizeof(ETYPE); \ 2033 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2034 uint32_t vta = vext_vta(desc); \ 2035 uint32_t i; \ 2036 \ 2037 for (i = env->vstart; i < vl; i++) { \ 2038 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2039 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2040 } \ 2041 env->vstart = 0; \ 2042 /* set tail elements to 1s */ \ 2043 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2044 } 2045 2046 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2047 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2048 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2049 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2050 2051 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2052 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2053 void *vs2, CPURISCVState *env, uint32_t desc) \ 2054 { \ 2055 uint32_t vl = env->vl; \ 2056 uint32_t esz = sizeof(ETYPE); \ 2057 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2058 uint32_t vta = vext_vta(desc); \ 2059 uint32_t i; \ 2060 \ 2061 for (i = env->vstart; i < vl; i++) { \ 2062 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2063 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2064 (ETYPE)(target_long)s1); \ 2065 *((ETYPE *)vd + H(i)) = d; \ 2066 } \ 2067 env->vstart = 0; \ 2068 /* set tail elements to 1s */ \ 2069 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2070 } 2071 2072 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2073 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2074 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2075 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2076 2077 /* 2078 *** Vector Fixed-Point Arithmetic Instructions 2079 */ 2080 2081 /* Vector Single-Width Saturating Add and Subtract */ 2082 2083 /* 2084 * As fixed point instructions probably have round mode and saturation, 2085 * define common macros for fixed point here. 2086 */ 2087 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2088 CPURISCVState *env, int vxrm); 2089 2090 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2091 static inline void \ 2092 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2093 CPURISCVState *env, int vxrm) \ 2094 { \ 2095 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2096 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2097 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2098 } 2099 2100 static inline void 2101 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2102 CPURISCVState *env, 2103 uint32_t vl, uint32_t vm, int vxrm, 2104 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2105 { 2106 for (uint32_t i = env->vstart; i < vl; i++) { 2107 if (!vm && !vext_elem_mask(v0, i)) { 2108 /* set masked-off elements to 1s */ 2109 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2110 continue; 2111 } 2112 fn(vd, vs1, vs2, i, env, vxrm); 2113 } 2114 env->vstart = 0; 2115 } 2116 2117 static inline void 2118 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2119 CPURISCVState *env, 2120 uint32_t desc, 2121 opivv2_rm_fn *fn, uint32_t esz) 2122 { 2123 uint32_t vm = vext_vm(desc); 2124 uint32_t vl = env->vl; 2125 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2126 uint32_t vta = vext_vta(desc); 2127 uint32_t vma = vext_vma(desc); 2128 2129 switch (env->vxrm) { 2130 case 0: /* rnu */ 2131 vext_vv_rm_1(vd, v0, vs1, vs2, 2132 env, vl, vm, 0, fn, vma, esz); 2133 break; 2134 case 1: /* rne */ 2135 vext_vv_rm_1(vd, v0, vs1, vs2, 2136 env, vl, vm, 1, fn, vma, esz); 2137 break; 2138 case 2: /* rdn */ 2139 vext_vv_rm_1(vd, v0, vs1, vs2, 2140 env, vl, vm, 2, fn, vma, esz); 2141 break; 2142 default: /* rod */ 2143 vext_vv_rm_1(vd, v0, vs1, vs2, 2144 env, vl, vm, 3, fn, vma, esz); 2145 break; 2146 } 2147 /* set tail elements to 1s */ 2148 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2149 } 2150 2151 /* generate helpers for fixed point instructions with OPIVV format */ 2152 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2153 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2154 CPURISCVState *env, uint32_t desc) \ 2155 { \ 2156 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2157 do_##NAME, ESZ); \ 2158 } 2159 2160 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2161 { 2162 uint8_t res = a + b; 2163 if (res < a) { 2164 res = UINT8_MAX; 2165 env->vxsat = 0x1; 2166 } 2167 return res; 2168 } 2169 2170 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2171 uint16_t b) 2172 { 2173 uint16_t res = a + b; 2174 if (res < a) { 2175 res = UINT16_MAX; 2176 env->vxsat = 0x1; 2177 } 2178 return res; 2179 } 2180 2181 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2182 uint32_t b) 2183 { 2184 uint32_t res = a + b; 2185 if (res < a) { 2186 res = UINT32_MAX; 2187 env->vxsat = 0x1; 2188 } 2189 return res; 2190 } 2191 2192 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2193 uint64_t b) 2194 { 2195 uint64_t res = a + b; 2196 if (res < a) { 2197 res = UINT64_MAX; 2198 env->vxsat = 0x1; 2199 } 2200 return res; 2201 } 2202 2203 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2204 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2205 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2206 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2207 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2208 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2209 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2210 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2211 2212 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2213 CPURISCVState *env, int vxrm); 2214 2215 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2216 static inline void \ 2217 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2218 CPURISCVState *env, int vxrm) \ 2219 { \ 2220 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2221 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2222 } 2223 2224 static inline void 2225 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2226 CPURISCVState *env, 2227 uint32_t vl, uint32_t vm, int vxrm, 2228 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2229 { 2230 for (uint32_t i = env->vstart; i < vl; i++) { 2231 if (!vm && !vext_elem_mask(v0, i)) { 2232 /* set masked-off elements to 1s */ 2233 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2234 continue; 2235 } 2236 fn(vd, s1, vs2, i, env, vxrm); 2237 } 2238 env->vstart = 0; 2239 } 2240 2241 static inline void 2242 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2243 CPURISCVState *env, 2244 uint32_t desc, 2245 opivx2_rm_fn *fn, uint32_t esz) 2246 { 2247 uint32_t vm = vext_vm(desc); 2248 uint32_t vl = env->vl; 2249 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2250 uint32_t vta = vext_vta(desc); 2251 uint32_t vma = vext_vma(desc); 2252 2253 switch (env->vxrm) { 2254 case 0: /* rnu */ 2255 vext_vx_rm_1(vd, v0, s1, vs2, 2256 env, vl, vm, 0, fn, vma, esz); 2257 break; 2258 case 1: /* rne */ 2259 vext_vx_rm_1(vd, v0, s1, vs2, 2260 env, vl, vm, 1, fn, vma, esz); 2261 break; 2262 case 2: /* rdn */ 2263 vext_vx_rm_1(vd, v0, s1, vs2, 2264 env, vl, vm, 2, fn, vma, esz); 2265 break; 2266 default: /* rod */ 2267 vext_vx_rm_1(vd, v0, s1, vs2, 2268 env, vl, vm, 3, fn, vma, esz); 2269 break; 2270 } 2271 /* set tail elements to 1s */ 2272 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2273 } 2274 2275 /* generate helpers for fixed point instructions with OPIVX format */ 2276 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2277 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2278 void *vs2, CPURISCVState *env, \ 2279 uint32_t desc) \ 2280 { \ 2281 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2282 do_##NAME, ESZ); \ 2283 } 2284 2285 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2286 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2287 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2288 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2289 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2290 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2291 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2292 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2293 2294 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2295 { 2296 int8_t res = a + b; 2297 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2298 res = a > 0 ? INT8_MAX : INT8_MIN; 2299 env->vxsat = 0x1; 2300 } 2301 return res; 2302 } 2303 2304 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2305 { 2306 int16_t res = a + b; 2307 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2308 res = a > 0 ? INT16_MAX : INT16_MIN; 2309 env->vxsat = 0x1; 2310 } 2311 return res; 2312 } 2313 2314 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2315 { 2316 int32_t res = a + b; 2317 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2318 res = a > 0 ? INT32_MAX : INT32_MIN; 2319 env->vxsat = 0x1; 2320 } 2321 return res; 2322 } 2323 2324 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2325 { 2326 int64_t res = a + b; 2327 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2328 res = a > 0 ? INT64_MAX : INT64_MIN; 2329 env->vxsat = 0x1; 2330 } 2331 return res; 2332 } 2333 2334 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2335 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2336 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2337 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2338 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2339 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2340 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2341 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2342 2343 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2344 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2345 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2346 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2347 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2348 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2349 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2350 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2351 2352 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2353 { 2354 uint8_t res = a - b; 2355 if (res > a) { 2356 res = 0; 2357 env->vxsat = 0x1; 2358 } 2359 return res; 2360 } 2361 2362 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2363 uint16_t b) 2364 { 2365 uint16_t res = a - b; 2366 if (res > a) { 2367 res = 0; 2368 env->vxsat = 0x1; 2369 } 2370 return res; 2371 } 2372 2373 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2374 uint32_t b) 2375 { 2376 uint32_t res = a - b; 2377 if (res > a) { 2378 res = 0; 2379 env->vxsat = 0x1; 2380 } 2381 return res; 2382 } 2383 2384 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2385 uint64_t b) 2386 { 2387 uint64_t res = a - b; 2388 if (res > a) { 2389 res = 0; 2390 env->vxsat = 0x1; 2391 } 2392 return res; 2393 } 2394 2395 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2396 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2397 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2398 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2399 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2400 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2401 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2402 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2403 2404 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2405 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2406 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2407 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2408 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2409 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2410 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2411 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2412 2413 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2414 { 2415 int8_t res = a - b; 2416 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2417 res = a >= 0 ? INT8_MAX : INT8_MIN; 2418 env->vxsat = 0x1; 2419 } 2420 return res; 2421 } 2422 2423 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2424 { 2425 int16_t res = a - b; 2426 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2427 res = a >= 0 ? INT16_MAX : INT16_MIN; 2428 env->vxsat = 0x1; 2429 } 2430 return res; 2431 } 2432 2433 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2434 { 2435 int32_t res = a - b; 2436 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2437 res = a >= 0 ? INT32_MAX : INT32_MIN; 2438 env->vxsat = 0x1; 2439 } 2440 return res; 2441 } 2442 2443 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2444 { 2445 int64_t res = a - b; 2446 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2447 res = a >= 0 ? INT64_MAX : INT64_MIN; 2448 env->vxsat = 0x1; 2449 } 2450 return res; 2451 } 2452 2453 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2454 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2455 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2456 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2457 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2458 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2459 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2460 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2461 2462 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2463 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2464 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2465 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2466 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2467 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2468 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2469 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2470 2471 /* Vector Single-Width Averaging Add and Subtract */ 2472 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2473 { 2474 uint8_t d = extract64(v, shift, 1); 2475 uint8_t d1; 2476 uint64_t D1, D2; 2477 2478 if (shift == 0 || shift > 64) { 2479 return 0; 2480 } 2481 2482 d1 = extract64(v, shift - 1, 1); 2483 D1 = extract64(v, 0, shift); 2484 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2485 return d1; 2486 } else if (vxrm == 1) { /* round-to-nearest-even */ 2487 if (shift > 1) { 2488 D2 = extract64(v, 0, shift - 1); 2489 return d1 & ((D2 != 0) | d); 2490 } else { 2491 return d1 & d; 2492 } 2493 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2494 return !d & (D1 != 0); 2495 } 2496 return 0; /* round-down (truncate) */ 2497 } 2498 2499 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2500 { 2501 int64_t res = (int64_t)a + b; 2502 uint8_t round = get_round(vxrm, res, 1); 2503 2504 return (res >> 1) + round; 2505 } 2506 2507 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2508 { 2509 int64_t res = a + b; 2510 uint8_t round = get_round(vxrm, res, 1); 2511 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2512 2513 /* With signed overflow, bit 64 is inverse of bit 63. */ 2514 return ((res >> 1) ^ over) + round; 2515 } 2516 2517 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2518 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2519 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2520 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2521 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2522 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2523 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2524 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2525 2526 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2527 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2528 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2529 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2530 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2531 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2532 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2533 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2534 2535 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2536 uint32_t a, uint32_t b) 2537 { 2538 uint64_t res = (uint64_t)a + b; 2539 uint8_t round = get_round(vxrm, res, 1); 2540 2541 return (res >> 1) + round; 2542 } 2543 2544 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2545 uint64_t a, uint64_t b) 2546 { 2547 uint64_t res = a + b; 2548 uint8_t round = get_round(vxrm, res, 1); 2549 uint64_t over = (uint64_t)(res < a) << 63; 2550 2551 return ((res >> 1) | over) + round; 2552 } 2553 2554 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2555 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2556 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2557 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2558 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2559 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2560 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2561 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2562 2563 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2564 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2565 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2566 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2567 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2568 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2569 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2570 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2571 2572 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2573 { 2574 int64_t res = (int64_t)a - b; 2575 uint8_t round = get_round(vxrm, res, 1); 2576 2577 return (res >> 1) + round; 2578 } 2579 2580 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2581 { 2582 int64_t res = (int64_t)a - b; 2583 uint8_t round = get_round(vxrm, res, 1); 2584 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2585 2586 /* With signed overflow, bit 64 is inverse of bit 63. */ 2587 return ((res >> 1) ^ over) + round; 2588 } 2589 2590 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2591 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2592 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2593 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2594 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2595 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2596 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2597 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2598 2599 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2600 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2601 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2602 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2603 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2604 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2605 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2606 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2607 2608 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2609 uint32_t a, uint32_t b) 2610 { 2611 int64_t res = (int64_t)a - b; 2612 uint8_t round = get_round(vxrm, res, 1); 2613 2614 return (res >> 1) + round; 2615 } 2616 2617 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2618 uint64_t a, uint64_t b) 2619 { 2620 uint64_t res = (uint64_t)a - b; 2621 uint8_t round = get_round(vxrm, res, 1); 2622 uint64_t over = (uint64_t)(res > a) << 63; 2623 2624 return ((res >> 1) | over) + round; 2625 } 2626 2627 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2628 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2629 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2630 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2631 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2632 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2633 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2634 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2635 2636 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2637 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2638 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2639 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2640 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2641 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2642 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2643 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2644 2645 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2646 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2647 { 2648 uint8_t round; 2649 int16_t res; 2650 2651 res = (int16_t)a * (int16_t)b; 2652 round = get_round(vxrm, res, 7); 2653 res = (res >> 7) + round; 2654 2655 if (res > INT8_MAX) { 2656 env->vxsat = 0x1; 2657 return INT8_MAX; 2658 } else if (res < INT8_MIN) { 2659 env->vxsat = 0x1; 2660 return INT8_MIN; 2661 } else { 2662 return res; 2663 } 2664 } 2665 2666 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2667 { 2668 uint8_t round; 2669 int32_t res; 2670 2671 res = (int32_t)a * (int32_t)b; 2672 round = get_round(vxrm, res, 15); 2673 res = (res >> 15) + round; 2674 2675 if (res > INT16_MAX) { 2676 env->vxsat = 0x1; 2677 return INT16_MAX; 2678 } else if (res < INT16_MIN) { 2679 env->vxsat = 0x1; 2680 return INT16_MIN; 2681 } else { 2682 return res; 2683 } 2684 } 2685 2686 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2687 { 2688 uint8_t round; 2689 int64_t res; 2690 2691 res = (int64_t)a * (int64_t)b; 2692 round = get_round(vxrm, res, 31); 2693 res = (res >> 31) + round; 2694 2695 if (res > INT32_MAX) { 2696 env->vxsat = 0x1; 2697 return INT32_MAX; 2698 } else if (res < INT32_MIN) { 2699 env->vxsat = 0x1; 2700 return INT32_MIN; 2701 } else { 2702 return res; 2703 } 2704 } 2705 2706 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2707 { 2708 uint8_t round; 2709 uint64_t hi_64, lo_64; 2710 int64_t res; 2711 2712 if (a == INT64_MIN && b == INT64_MIN) { 2713 env->vxsat = 1; 2714 return INT64_MAX; 2715 } 2716 2717 muls64(&lo_64, &hi_64, a, b); 2718 round = get_round(vxrm, lo_64, 63); 2719 /* 2720 * Cannot overflow, as there are always 2721 * 2 sign bits after multiply. 2722 */ 2723 res = (hi_64 << 1) | (lo_64 >> 63); 2724 if (round) { 2725 if (res == INT64_MAX) { 2726 env->vxsat = 1; 2727 } else { 2728 res += 1; 2729 } 2730 } 2731 return res; 2732 } 2733 2734 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2735 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2736 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2737 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2738 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2739 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2740 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2741 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2742 2743 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2744 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2745 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2746 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2747 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2748 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2749 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2750 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2751 2752 /* Vector Single-Width Scaling Shift Instructions */ 2753 static inline uint8_t 2754 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2755 { 2756 uint8_t round, shift = b & 0x7; 2757 uint8_t res; 2758 2759 round = get_round(vxrm, a, shift); 2760 res = (a >> shift) + round; 2761 return res; 2762 } 2763 static inline uint16_t 2764 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2765 { 2766 uint8_t round, shift = b & 0xf; 2767 2768 round = get_round(vxrm, a, shift); 2769 return (a >> shift) + round; 2770 } 2771 static inline uint32_t 2772 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2773 { 2774 uint8_t round, shift = b & 0x1f; 2775 2776 round = get_round(vxrm, a, shift); 2777 return (a >> shift) + round; 2778 } 2779 static inline uint64_t 2780 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2781 { 2782 uint8_t round, shift = b & 0x3f; 2783 2784 round = get_round(vxrm, a, shift); 2785 return (a >> shift) + round; 2786 } 2787 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2788 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2789 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2790 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2791 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2792 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2793 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2794 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2795 2796 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2797 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2798 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2799 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2800 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2801 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2802 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2803 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2804 2805 static inline int8_t 2806 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2807 { 2808 uint8_t round, shift = b & 0x7; 2809 2810 round = get_round(vxrm, a, shift); 2811 return (a >> shift) + round; 2812 } 2813 static inline int16_t 2814 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2815 { 2816 uint8_t round, shift = b & 0xf; 2817 2818 round = get_round(vxrm, a, shift); 2819 return (a >> shift) + round; 2820 } 2821 static inline int32_t 2822 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2823 { 2824 uint8_t round, shift = b & 0x1f; 2825 2826 round = get_round(vxrm, a, shift); 2827 return (a >> shift) + round; 2828 } 2829 static inline int64_t 2830 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2831 { 2832 uint8_t round, shift = b & 0x3f; 2833 2834 round = get_round(vxrm, a, shift); 2835 return (a >> shift) + round; 2836 } 2837 2838 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2839 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2840 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2841 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2842 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2843 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2844 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2845 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2846 2847 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2848 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2849 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2850 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2851 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2852 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2853 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2854 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2855 2856 /* Vector Narrowing Fixed-Point Clip Instructions */ 2857 static inline int8_t 2858 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2859 { 2860 uint8_t round, shift = b & 0xf; 2861 int16_t res; 2862 2863 round = get_round(vxrm, a, shift); 2864 res = (a >> shift) + round; 2865 if (res > INT8_MAX) { 2866 env->vxsat = 0x1; 2867 return INT8_MAX; 2868 } else if (res < INT8_MIN) { 2869 env->vxsat = 0x1; 2870 return INT8_MIN; 2871 } else { 2872 return res; 2873 } 2874 } 2875 2876 static inline int16_t 2877 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2878 { 2879 uint8_t round, shift = b & 0x1f; 2880 int32_t res; 2881 2882 round = get_round(vxrm, a, shift); 2883 res = (a >> shift) + round; 2884 if (res > INT16_MAX) { 2885 env->vxsat = 0x1; 2886 return INT16_MAX; 2887 } else if (res < INT16_MIN) { 2888 env->vxsat = 0x1; 2889 return INT16_MIN; 2890 } else { 2891 return res; 2892 } 2893 } 2894 2895 static inline int32_t 2896 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2897 { 2898 uint8_t round, shift = b & 0x3f; 2899 int64_t res; 2900 2901 round = get_round(vxrm, a, shift); 2902 res = (a >> shift) + round; 2903 if (res > INT32_MAX) { 2904 env->vxsat = 0x1; 2905 return INT32_MAX; 2906 } else if (res < INT32_MIN) { 2907 env->vxsat = 0x1; 2908 return INT32_MIN; 2909 } else { 2910 return res; 2911 } 2912 } 2913 2914 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2915 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2916 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2917 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2918 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2919 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2920 2921 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2922 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2923 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2924 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2925 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2926 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2927 2928 static inline uint8_t 2929 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2930 { 2931 uint8_t round, shift = b & 0xf; 2932 uint16_t res; 2933 2934 round = get_round(vxrm, a, shift); 2935 res = (a >> shift) + round; 2936 if (res > UINT8_MAX) { 2937 env->vxsat = 0x1; 2938 return UINT8_MAX; 2939 } else { 2940 return res; 2941 } 2942 } 2943 2944 static inline uint16_t 2945 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2946 { 2947 uint8_t round, shift = b & 0x1f; 2948 uint32_t res; 2949 2950 round = get_round(vxrm, a, shift); 2951 res = (a >> shift) + round; 2952 if (res > UINT16_MAX) { 2953 env->vxsat = 0x1; 2954 return UINT16_MAX; 2955 } else { 2956 return res; 2957 } 2958 } 2959 2960 static inline uint32_t 2961 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2962 { 2963 uint8_t round, shift = b & 0x3f; 2964 uint64_t res; 2965 2966 round = get_round(vxrm, a, shift); 2967 res = (a >> shift) + round; 2968 if (res > UINT32_MAX) { 2969 env->vxsat = 0x1; 2970 return UINT32_MAX; 2971 } else { 2972 return res; 2973 } 2974 } 2975 2976 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2977 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2978 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2979 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2980 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2981 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2982 2983 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2984 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2985 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2986 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2987 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2988 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2989 2990 /* 2991 *** Vector Float Point Arithmetic Instructions 2992 */ 2993 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2994 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2995 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2996 CPURISCVState *env) \ 2997 { \ 2998 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2999 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3000 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3001 } 3002 3003 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3004 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3005 void *vs2, CPURISCVState *env, \ 3006 uint32_t desc) \ 3007 { \ 3008 uint32_t vm = vext_vm(desc); \ 3009 uint32_t vl = env->vl; \ 3010 uint32_t total_elems = \ 3011 vext_get_total_elems(env, desc, ESZ); \ 3012 uint32_t vta = vext_vta(desc); \ 3013 uint32_t vma = vext_vma(desc); \ 3014 uint32_t i; \ 3015 \ 3016 for (i = env->vstart; i < vl; i++) { \ 3017 if (!vm && !vext_elem_mask(v0, i)) { \ 3018 /* set masked-off elements to 1s */ \ 3019 vext_set_elems_1s(vd, vma, i * ESZ, \ 3020 (i + 1) * ESZ); \ 3021 continue; \ 3022 } \ 3023 do_##NAME(vd, vs1, vs2, i, env); \ 3024 } \ 3025 env->vstart = 0; \ 3026 /* set tail elements to 1s */ \ 3027 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3028 total_elems * ESZ); \ 3029 } 3030 3031 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3032 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3033 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3034 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3035 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3036 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3037 3038 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3039 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3040 CPURISCVState *env) \ 3041 { \ 3042 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3043 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3044 } 3045 3046 #define GEN_VEXT_VF(NAME, ESZ) \ 3047 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3048 void *vs2, CPURISCVState *env, \ 3049 uint32_t desc) \ 3050 { \ 3051 uint32_t vm = vext_vm(desc); \ 3052 uint32_t vl = env->vl; \ 3053 uint32_t total_elems = \ 3054 vext_get_total_elems(env, desc, ESZ); \ 3055 uint32_t vta = vext_vta(desc); \ 3056 uint32_t vma = vext_vma(desc); \ 3057 uint32_t i; \ 3058 \ 3059 for (i = env->vstart; i < vl; i++) { \ 3060 if (!vm && !vext_elem_mask(v0, i)) { \ 3061 /* set masked-off elements to 1s */ \ 3062 vext_set_elems_1s(vd, vma, i * ESZ, \ 3063 (i + 1) * ESZ); \ 3064 continue; \ 3065 } \ 3066 do_##NAME(vd, s1, vs2, i, env); \ 3067 } \ 3068 env->vstart = 0; \ 3069 /* set tail elements to 1s */ \ 3070 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3071 total_elems * ESZ); \ 3072 } 3073 3074 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3075 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3076 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3077 GEN_VEXT_VF(vfadd_vf_h, 2) 3078 GEN_VEXT_VF(vfadd_vf_w, 4) 3079 GEN_VEXT_VF(vfadd_vf_d, 8) 3080 3081 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3082 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3083 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3084 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3085 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3086 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3087 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3088 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3089 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3090 GEN_VEXT_VF(vfsub_vf_h, 2) 3091 GEN_VEXT_VF(vfsub_vf_w, 4) 3092 GEN_VEXT_VF(vfsub_vf_d, 8) 3093 3094 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3095 { 3096 return float16_sub(b, a, s); 3097 } 3098 3099 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3100 { 3101 return float32_sub(b, a, s); 3102 } 3103 3104 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3105 { 3106 return float64_sub(b, a, s); 3107 } 3108 3109 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3110 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3111 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3112 GEN_VEXT_VF(vfrsub_vf_h, 2) 3113 GEN_VEXT_VF(vfrsub_vf_w, 4) 3114 GEN_VEXT_VF(vfrsub_vf_d, 8) 3115 3116 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3117 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3118 { 3119 return float32_add(float16_to_float32(a, true, s), 3120 float16_to_float32(b, true, s), s); 3121 } 3122 3123 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3124 { 3125 return float64_add(float32_to_float64(a, s), 3126 float32_to_float64(b, s), s); 3127 3128 } 3129 3130 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3131 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3132 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3133 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3134 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3135 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3136 GEN_VEXT_VF(vfwadd_vf_h, 4) 3137 GEN_VEXT_VF(vfwadd_vf_w, 8) 3138 3139 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3140 { 3141 return float32_sub(float16_to_float32(a, true, s), 3142 float16_to_float32(b, true, s), s); 3143 } 3144 3145 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3146 { 3147 return float64_sub(float32_to_float64(a, s), 3148 float32_to_float64(b, s), s); 3149 3150 } 3151 3152 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3153 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3154 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3155 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3156 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3157 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3158 GEN_VEXT_VF(vfwsub_vf_h, 4) 3159 GEN_VEXT_VF(vfwsub_vf_w, 8) 3160 3161 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3162 { 3163 return float32_add(a, float16_to_float32(b, true, s), s); 3164 } 3165 3166 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3167 { 3168 return float64_add(a, float32_to_float64(b, s), s); 3169 } 3170 3171 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3172 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3173 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3174 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3175 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3176 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3177 GEN_VEXT_VF(vfwadd_wf_h, 4) 3178 GEN_VEXT_VF(vfwadd_wf_w, 8) 3179 3180 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3181 { 3182 return float32_sub(a, float16_to_float32(b, true, s), s); 3183 } 3184 3185 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3186 { 3187 return float64_sub(a, float32_to_float64(b, s), s); 3188 } 3189 3190 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3191 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3192 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3193 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3194 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3195 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3196 GEN_VEXT_VF(vfwsub_wf_h, 4) 3197 GEN_VEXT_VF(vfwsub_wf_w, 8) 3198 3199 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3200 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3201 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3202 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3203 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3204 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3205 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3206 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3207 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3208 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3209 GEN_VEXT_VF(vfmul_vf_h, 2) 3210 GEN_VEXT_VF(vfmul_vf_w, 4) 3211 GEN_VEXT_VF(vfmul_vf_d, 8) 3212 3213 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3214 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3215 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3216 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3217 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3218 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3219 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3220 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3221 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3222 GEN_VEXT_VF(vfdiv_vf_h, 2) 3223 GEN_VEXT_VF(vfdiv_vf_w, 4) 3224 GEN_VEXT_VF(vfdiv_vf_d, 8) 3225 3226 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3227 { 3228 return float16_div(b, a, s); 3229 } 3230 3231 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3232 { 3233 return float32_div(b, a, s); 3234 } 3235 3236 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3237 { 3238 return float64_div(b, a, s); 3239 } 3240 3241 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3242 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3243 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3244 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3245 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3246 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3247 3248 /* Vector Widening Floating-Point Multiply */ 3249 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3250 { 3251 return float32_mul(float16_to_float32(a, true, s), 3252 float16_to_float32(b, true, s), s); 3253 } 3254 3255 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3256 { 3257 return float64_mul(float32_to_float64(a, s), 3258 float32_to_float64(b, s), s); 3259 3260 } 3261 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3262 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3263 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3264 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3265 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3266 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3267 GEN_VEXT_VF(vfwmul_vf_h, 4) 3268 GEN_VEXT_VF(vfwmul_vf_w, 8) 3269 3270 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3271 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3272 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3273 CPURISCVState *env) \ 3274 { \ 3275 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3276 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3277 TD d = *((TD *)vd + HD(i)); \ 3278 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3279 } 3280 3281 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3282 { 3283 return float16_muladd(a, b, d, 0, s); 3284 } 3285 3286 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3287 { 3288 return float32_muladd(a, b, d, 0, s); 3289 } 3290 3291 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3292 { 3293 return float64_muladd(a, b, d, 0, s); 3294 } 3295 3296 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3297 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3298 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3299 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3300 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3301 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3302 3303 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3304 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3305 CPURISCVState *env) \ 3306 { \ 3307 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3308 TD d = *((TD *)vd + HD(i)); \ 3309 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3310 } 3311 3312 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3313 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3314 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3315 GEN_VEXT_VF(vfmacc_vf_h, 2) 3316 GEN_VEXT_VF(vfmacc_vf_w, 4) 3317 GEN_VEXT_VF(vfmacc_vf_d, 8) 3318 3319 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3320 { 3321 return float16_muladd(a, b, d, float_muladd_negate_c | 3322 float_muladd_negate_product, s); 3323 } 3324 3325 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3326 { 3327 return float32_muladd(a, b, d, float_muladd_negate_c | 3328 float_muladd_negate_product, s); 3329 } 3330 3331 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3332 { 3333 return float64_muladd(a, b, d, float_muladd_negate_c | 3334 float_muladd_negate_product, s); 3335 } 3336 3337 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3338 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3339 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3340 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3341 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3342 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3343 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3344 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3345 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3346 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3347 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3348 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3349 3350 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3351 { 3352 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3353 } 3354 3355 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3356 { 3357 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3358 } 3359 3360 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3361 { 3362 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3363 } 3364 3365 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3366 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3367 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3368 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3369 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3370 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3371 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3372 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3373 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3374 GEN_VEXT_VF(vfmsac_vf_h, 2) 3375 GEN_VEXT_VF(vfmsac_vf_w, 4) 3376 GEN_VEXT_VF(vfmsac_vf_d, 8) 3377 3378 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3379 { 3380 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3381 } 3382 3383 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3384 { 3385 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3386 } 3387 3388 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3389 { 3390 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3391 } 3392 3393 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3394 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3395 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3396 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3397 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3398 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3399 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3400 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3401 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3402 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3403 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3404 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3405 3406 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3407 { 3408 return float16_muladd(d, b, a, 0, s); 3409 } 3410 3411 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3412 { 3413 return float32_muladd(d, b, a, 0, s); 3414 } 3415 3416 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3417 { 3418 return float64_muladd(d, b, a, 0, s); 3419 } 3420 3421 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3422 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3423 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3424 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3425 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3426 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3427 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3428 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3429 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3430 GEN_VEXT_VF(vfmadd_vf_h, 2) 3431 GEN_VEXT_VF(vfmadd_vf_w, 4) 3432 GEN_VEXT_VF(vfmadd_vf_d, 8) 3433 3434 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3435 { 3436 return float16_muladd(d, b, a, float_muladd_negate_c | 3437 float_muladd_negate_product, s); 3438 } 3439 3440 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3441 { 3442 return float32_muladd(d, b, a, float_muladd_negate_c | 3443 float_muladd_negate_product, s); 3444 } 3445 3446 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3447 { 3448 return float64_muladd(d, b, a, float_muladd_negate_c | 3449 float_muladd_negate_product, s); 3450 } 3451 3452 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3453 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3454 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3455 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3456 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3457 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3458 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3459 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3460 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3461 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3462 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3463 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3464 3465 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3466 { 3467 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3468 } 3469 3470 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3471 { 3472 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3473 } 3474 3475 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3476 { 3477 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3478 } 3479 3480 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3481 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3482 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3483 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3484 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3485 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3486 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3487 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3488 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3489 GEN_VEXT_VF(vfmsub_vf_h, 2) 3490 GEN_VEXT_VF(vfmsub_vf_w, 4) 3491 GEN_VEXT_VF(vfmsub_vf_d, 8) 3492 3493 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3494 { 3495 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3496 } 3497 3498 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3499 { 3500 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3501 } 3502 3503 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3504 { 3505 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3506 } 3507 3508 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3509 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3510 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3511 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3512 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3513 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3514 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3515 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3516 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3517 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3518 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3519 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3520 3521 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3522 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3523 { 3524 return float32_muladd(float16_to_float32(a, true, s), 3525 float16_to_float32(b, true, s), d, 0, s); 3526 } 3527 3528 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3529 { 3530 return float64_muladd(float32_to_float64(a, s), 3531 float32_to_float64(b, s), d, 0, s); 3532 } 3533 3534 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3535 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3536 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3537 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3538 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3539 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3540 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3541 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3542 3543 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3544 { 3545 return float32_muladd(float16_to_float32(a, true, s), 3546 float16_to_float32(b, true, s), d, 3547 float_muladd_negate_c | float_muladd_negate_product, 3548 s); 3549 } 3550 3551 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3552 { 3553 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3554 d, float_muladd_negate_c | 3555 float_muladd_negate_product, s); 3556 } 3557 3558 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3559 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3560 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3561 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3562 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3563 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3564 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3565 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3566 3567 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3568 { 3569 return float32_muladd(float16_to_float32(a, true, s), 3570 float16_to_float32(b, true, s), d, 3571 float_muladd_negate_c, s); 3572 } 3573 3574 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3575 { 3576 return float64_muladd(float32_to_float64(a, s), 3577 float32_to_float64(b, s), d, 3578 float_muladd_negate_c, s); 3579 } 3580 3581 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3582 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3583 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3584 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3585 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3586 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3587 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3588 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3589 3590 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3591 { 3592 return float32_muladd(float16_to_float32(a, true, s), 3593 float16_to_float32(b, true, s), d, 3594 float_muladd_negate_product, s); 3595 } 3596 3597 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3598 { 3599 return float64_muladd(float32_to_float64(a, s), 3600 float32_to_float64(b, s), d, 3601 float_muladd_negate_product, s); 3602 } 3603 3604 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3605 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3606 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3607 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3608 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3609 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3610 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3611 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3612 3613 /* Vector Floating-Point Square-Root Instruction */ 3614 /* (TD, T2, TX2) */ 3615 #define OP_UU_H uint16_t, uint16_t, uint16_t 3616 #define OP_UU_W uint32_t, uint32_t, uint32_t 3617 #define OP_UU_D uint64_t, uint64_t, uint64_t 3618 3619 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3620 static void do_##NAME(void *vd, void *vs2, int i, \ 3621 CPURISCVState *env) \ 3622 { \ 3623 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3624 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3625 } 3626 3627 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3628 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3629 CPURISCVState *env, uint32_t desc) \ 3630 { \ 3631 uint32_t vm = vext_vm(desc); \ 3632 uint32_t vl = env->vl; \ 3633 uint32_t total_elems = \ 3634 vext_get_total_elems(env, desc, ESZ); \ 3635 uint32_t vta = vext_vta(desc); \ 3636 uint32_t vma = vext_vma(desc); \ 3637 uint32_t i; \ 3638 \ 3639 if (vl == 0) { \ 3640 return; \ 3641 } \ 3642 for (i = env->vstart; i < vl; i++) { \ 3643 if (!vm && !vext_elem_mask(v0, i)) { \ 3644 /* set masked-off elements to 1s */ \ 3645 vext_set_elems_1s(vd, vma, i * ESZ, \ 3646 (i + 1) * ESZ); \ 3647 continue; \ 3648 } \ 3649 do_##NAME(vd, vs2, i, env); \ 3650 } \ 3651 env->vstart = 0; \ 3652 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3653 total_elems * ESZ); \ 3654 } 3655 3656 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3657 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3658 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3659 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3660 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3661 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3662 3663 /* 3664 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3665 * 3666 * Adapted from riscv-v-spec recip.c: 3667 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3668 */ 3669 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3670 { 3671 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3672 uint64_t exp = extract64(f, frac_size, exp_size); 3673 uint64_t frac = extract64(f, 0, frac_size); 3674 3675 const uint8_t lookup_table[] = { 3676 52, 51, 50, 48, 47, 46, 44, 43, 3677 42, 41, 40, 39, 38, 36, 35, 34, 3678 33, 32, 31, 30, 30, 29, 28, 27, 3679 26, 25, 24, 23, 23, 22, 21, 20, 3680 19, 19, 18, 17, 16, 16, 15, 14, 3681 14, 13, 12, 12, 11, 10, 10, 9, 3682 9, 8, 7, 7, 6, 6, 5, 4, 3683 4, 3, 3, 2, 2, 1, 1, 0, 3684 127, 125, 123, 121, 119, 118, 116, 114, 3685 113, 111, 109, 108, 106, 105, 103, 102, 3686 100, 99, 97, 96, 95, 93, 92, 91, 3687 90, 88, 87, 86, 85, 84, 83, 82, 3688 80, 79, 78, 77, 76, 75, 74, 73, 3689 72, 71, 70, 70, 69, 68, 67, 66, 3690 65, 64, 63, 63, 62, 61, 60, 59, 3691 59, 58, 57, 56, 56, 55, 54, 53 3692 }; 3693 const int precision = 7; 3694 3695 if (exp == 0 && frac != 0) { /* subnormal */ 3696 /* Normalize the subnormal. */ 3697 while (extract64(frac, frac_size - 1, 1) == 0) { 3698 exp--; 3699 frac <<= 1; 3700 } 3701 3702 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3703 } 3704 3705 int idx = ((exp & 1) << (precision - 1)) | 3706 (frac >> (frac_size - precision + 1)); 3707 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3708 (frac_size - precision); 3709 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3710 3711 uint64_t val = 0; 3712 val = deposit64(val, 0, frac_size, out_frac); 3713 val = deposit64(val, frac_size, exp_size, out_exp); 3714 val = deposit64(val, frac_size + exp_size, 1, sign); 3715 return val; 3716 } 3717 3718 static float16 frsqrt7_h(float16 f, float_status *s) 3719 { 3720 int exp_size = 5, frac_size = 10; 3721 bool sign = float16_is_neg(f); 3722 3723 /* 3724 * frsqrt7(sNaN) = canonical NaN 3725 * frsqrt7(-inf) = canonical NaN 3726 * frsqrt7(-normal) = canonical NaN 3727 * frsqrt7(-subnormal) = canonical NaN 3728 */ 3729 if (float16_is_signaling_nan(f, s) || 3730 (float16_is_infinity(f) && sign) || 3731 (float16_is_normal(f) && sign) || 3732 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3733 s->float_exception_flags |= float_flag_invalid; 3734 return float16_default_nan(s); 3735 } 3736 3737 /* frsqrt7(qNaN) = canonical NaN */ 3738 if (float16_is_quiet_nan(f, s)) { 3739 return float16_default_nan(s); 3740 } 3741 3742 /* frsqrt7(+-0) = +-inf */ 3743 if (float16_is_zero(f)) { 3744 s->float_exception_flags |= float_flag_divbyzero; 3745 return float16_set_sign(float16_infinity, sign); 3746 } 3747 3748 /* frsqrt7(+inf) = +0 */ 3749 if (float16_is_infinity(f) && !sign) { 3750 return float16_set_sign(float16_zero, sign); 3751 } 3752 3753 /* +normal, +subnormal */ 3754 uint64_t val = frsqrt7(f, exp_size, frac_size); 3755 return make_float16(val); 3756 } 3757 3758 static float32 frsqrt7_s(float32 f, float_status *s) 3759 { 3760 int exp_size = 8, frac_size = 23; 3761 bool sign = float32_is_neg(f); 3762 3763 /* 3764 * frsqrt7(sNaN) = canonical NaN 3765 * frsqrt7(-inf) = canonical NaN 3766 * frsqrt7(-normal) = canonical NaN 3767 * frsqrt7(-subnormal) = canonical NaN 3768 */ 3769 if (float32_is_signaling_nan(f, s) || 3770 (float32_is_infinity(f) && sign) || 3771 (float32_is_normal(f) && sign) || 3772 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3773 s->float_exception_flags |= float_flag_invalid; 3774 return float32_default_nan(s); 3775 } 3776 3777 /* frsqrt7(qNaN) = canonical NaN */ 3778 if (float32_is_quiet_nan(f, s)) { 3779 return float32_default_nan(s); 3780 } 3781 3782 /* frsqrt7(+-0) = +-inf */ 3783 if (float32_is_zero(f)) { 3784 s->float_exception_flags |= float_flag_divbyzero; 3785 return float32_set_sign(float32_infinity, sign); 3786 } 3787 3788 /* frsqrt7(+inf) = +0 */ 3789 if (float32_is_infinity(f) && !sign) { 3790 return float32_set_sign(float32_zero, sign); 3791 } 3792 3793 /* +normal, +subnormal */ 3794 uint64_t val = frsqrt7(f, exp_size, frac_size); 3795 return make_float32(val); 3796 } 3797 3798 static float64 frsqrt7_d(float64 f, float_status *s) 3799 { 3800 int exp_size = 11, frac_size = 52; 3801 bool sign = float64_is_neg(f); 3802 3803 /* 3804 * frsqrt7(sNaN) = canonical NaN 3805 * frsqrt7(-inf) = canonical NaN 3806 * frsqrt7(-normal) = canonical NaN 3807 * frsqrt7(-subnormal) = canonical NaN 3808 */ 3809 if (float64_is_signaling_nan(f, s) || 3810 (float64_is_infinity(f) && sign) || 3811 (float64_is_normal(f) && sign) || 3812 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3813 s->float_exception_flags |= float_flag_invalid; 3814 return float64_default_nan(s); 3815 } 3816 3817 /* frsqrt7(qNaN) = canonical NaN */ 3818 if (float64_is_quiet_nan(f, s)) { 3819 return float64_default_nan(s); 3820 } 3821 3822 /* frsqrt7(+-0) = +-inf */ 3823 if (float64_is_zero(f)) { 3824 s->float_exception_flags |= float_flag_divbyzero; 3825 return float64_set_sign(float64_infinity, sign); 3826 } 3827 3828 /* frsqrt7(+inf) = +0 */ 3829 if (float64_is_infinity(f) && !sign) { 3830 return float64_set_sign(float64_zero, sign); 3831 } 3832 3833 /* +normal, +subnormal */ 3834 uint64_t val = frsqrt7(f, exp_size, frac_size); 3835 return make_float64(val); 3836 } 3837 3838 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3839 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3840 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3841 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3842 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3843 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3844 3845 /* 3846 * Vector Floating-Point Reciprocal Estimate Instruction 3847 * 3848 * Adapted from riscv-v-spec recip.c: 3849 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3850 */ 3851 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3852 float_status *s) 3853 { 3854 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3855 uint64_t exp = extract64(f, frac_size, exp_size); 3856 uint64_t frac = extract64(f, 0, frac_size); 3857 3858 const uint8_t lookup_table[] = { 3859 127, 125, 123, 121, 119, 117, 116, 114, 3860 112, 110, 109, 107, 105, 104, 102, 100, 3861 99, 97, 96, 94, 93, 91, 90, 88, 3862 87, 85, 84, 83, 81, 80, 79, 77, 3863 76, 75, 74, 72, 71, 70, 69, 68, 3864 66, 65, 64, 63, 62, 61, 60, 59, 3865 58, 57, 56, 55, 54, 53, 52, 51, 3866 50, 49, 48, 47, 46, 45, 44, 43, 3867 42, 41, 40, 40, 39, 38, 37, 36, 3868 35, 35, 34, 33, 32, 31, 31, 30, 3869 29, 28, 28, 27, 26, 25, 25, 24, 3870 23, 23, 22, 21, 21, 20, 19, 19, 3871 18, 17, 17, 16, 15, 15, 14, 14, 3872 13, 12, 12, 11, 11, 10, 9, 9, 3873 8, 8, 7, 7, 6, 5, 5, 4, 3874 4, 3, 3, 2, 2, 1, 1, 0 3875 }; 3876 const int precision = 7; 3877 3878 if (exp == 0 && frac != 0) { /* subnormal */ 3879 /* Normalize the subnormal. */ 3880 while (extract64(frac, frac_size - 1, 1) == 0) { 3881 exp--; 3882 frac <<= 1; 3883 } 3884 3885 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3886 3887 if (exp != 0 && exp != UINT64_MAX) { 3888 /* 3889 * Overflow to inf or max value of same sign, 3890 * depending on sign and rounding mode. 3891 */ 3892 s->float_exception_flags |= (float_flag_inexact | 3893 float_flag_overflow); 3894 3895 if ((s->float_rounding_mode == float_round_to_zero) || 3896 ((s->float_rounding_mode == float_round_down) && !sign) || 3897 ((s->float_rounding_mode == float_round_up) && sign)) { 3898 /* Return greatest/negative finite value. */ 3899 return (sign << (exp_size + frac_size)) | 3900 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3901 } else { 3902 /* Return +-inf. */ 3903 return (sign << (exp_size + frac_size)) | 3904 MAKE_64BIT_MASK(frac_size, exp_size); 3905 } 3906 } 3907 } 3908 3909 int idx = frac >> (frac_size - precision); 3910 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3911 (frac_size - precision); 3912 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3913 3914 if (out_exp == 0 || out_exp == UINT64_MAX) { 3915 /* 3916 * The result is subnormal, but don't raise the underflow exception, 3917 * because there's no additional loss of precision. 3918 */ 3919 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3920 if (out_exp == UINT64_MAX) { 3921 out_frac >>= 1; 3922 out_exp = 0; 3923 } 3924 } 3925 3926 uint64_t val = 0; 3927 val = deposit64(val, 0, frac_size, out_frac); 3928 val = deposit64(val, frac_size, exp_size, out_exp); 3929 val = deposit64(val, frac_size + exp_size, 1, sign); 3930 return val; 3931 } 3932 3933 static float16 frec7_h(float16 f, float_status *s) 3934 { 3935 int exp_size = 5, frac_size = 10; 3936 bool sign = float16_is_neg(f); 3937 3938 /* frec7(+-inf) = +-0 */ 3939 if (float16_is_infinity(f)) { 3940 return float16_set_sign(float16_zero, sign); 3941 } 3942 3943 /* frec7(+-0) = +-inf */ 3944 if (float16_is_zero(f)) { 3945 s->float_exception_flags |= float_flag_divbyzero; 3946 return float16_set_sign(float16_infinity, sign); 3947 } 3948 3949 /* frec7(sNaN) = canonical NaN */ 3950 if (float16_is_signaling_nan(f, s)) { 3951 s->float_exception_flags |= float_flag_invalid; 3952 return float16_default_nan(s); 3953 } 3954 3955 /* frec7(qNaN) = canonical NaN */ 3956 if (float16_is_quiet_nan(f, s)) { 3957 return float16_default_nan(s); 3958 } 3959 3960 /* +-normal, +-subnormal */ 3961 uint64_t val = frec7(f, exp_size, frac_size, s); 3962 return make_float16(val); 3963 } 3964 3965 static float32 frec7_s(float32 f, float_status *s) 3966 { 3967 int exp_size = 8, frac_size = 23; 3968 bool sign = float32_is_neg(f); 3969 3970 /* frec7(+-inf) = +-0 */ 3971 if (float32_is_infinity(f)) { 3972 return float32_set_sign(float32_zero, sign); 3973 } 3974 3975 /* frec7(+-0) = +-inf */ 3976 if (float32_is_zero(f)) { 3977 s->float_exception_flags |= float_flag_divbyzero; 3978 return float32_set_sign(float32_infinity, sign); 3979 } 3980 3981 /* frec7(sNaN) = canonical NaN */ 3982 if (float32_is_signaling_nan(f, s)) { 3983 s->float_exception_flags |= float_flag_invalid; 3984 return float32_default_nan(s); 3985 } 3986 3987 /* frec7(qNaN) = canonical NaN */ 3988 if (float32_is_quiet_nan(f, s)) { 3989 return float32_default_nan(s); 3990 } 3991 3992 /* +-normal, +-subnormal */ 3993 uint64_t val = frec7(f, exp_size, frac_size, s); 3994 return make_float32(val); 3995 } 3996 3997 static float64 frec7_d(float64 f, float_status *s) 3998 { 3999 int exp_size = 11, frac_size = 52; 4000 bool sign = float64_is_neg(f); 4001 4002 /* frec7(+-inf) = +-0 */ 4003 if (float64_is_infinity(f)) { 4004 return float64_set_sign(float64_zero, sign); 4005 } 4006 4007 /* frec7(+-0) = +-inf */ 4008 if (float64_is_zero(f)) { 4009 s->float_exception_flags |= float_flag_divbyzero; 4010 return float64_set_sign(float64_infinity, sign); 4011 } 4012 4013 /* frec7(sNaN) = canonical NaN */ 4014 if (float64_is_signaling_nan(f, s)) { 4015 s->float_exception_flags |= float_flag_invalid; 4016 return float64_default_nan(s); 4017 } 4018 4019 /* frec7(qNaN) = canonical NaN */ 4020 if (float64_is_quiet_nan(f, s)) { 4021 return float64_default_nan(s); 4022 } 4023 4024 /* +-normal, +-subnormal */ 4025 uint64_t val = frec7(f, exp_size, frac_size, s); 4026 return make_float64(val); 4027 } 4028 4029 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4030 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4031 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4032 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4033 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4034 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4035 4036 /* Vector Floating-Point MIN/MAX Instructions */ 4037 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4038 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4039 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4040 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4041 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4042 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4043 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4044 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4045 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4046 GEN_VEXT_VF(vfmin_vf_h, 2) 4047 GEN_VEXT_VF(vfmin_vf_w, 4) 4048 GEN_VEXT_VF(vfmin_vf_d, 8) 4049 4050 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4051 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4052 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4053 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4054 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4055 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4056 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4057 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4058 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4059 GEN_VEXT_VF(vfmax_vf_h, 2) 4060 GEN_VEXT_VF(vfmax_vf_w, 4) 4061 GEN_VEXT_VF(vfmax_vf_d, 8) 4062 4063 /* Vector Floating-Point Sign-Injection Instructions */ 4064 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4065 { 4066 return deposit64(b, 0, 15, a); 4067 } 4068 4069 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4070 { 4071 return deposit64(b, 0, 31, a); 4072 } 4073 4074 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4075 { 4076 return deposit64(b, 0, 63, a); 4077 } 4078 4079 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4080 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4081 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4082 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4083 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4084 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4085 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4086 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4087 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4088 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4089 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4090 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4091 4092 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4093 { 4094 return deposit64(~b, 0, 15, a); 4095 } 4096 4097 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4098 { 4099 return deposit64(~b, 0, 31, a); 4100 } 4101 4102 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4103 { 4104 return deposit64(~b, 0, 63, a); 4105 } 4106 4107 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4108 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4109 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4110 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4111 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4112 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4113 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4114 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4115 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4116 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4117 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4118 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4119 4120 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4121 { 4122 return deposit64(b ^ a, 0, 15, a); 4123 } 4124 4125 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4126 { 4127 return deposit64(b ^ a, 0, 31, a); 4128 } 4129 4130 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4131 { 4132 return deposit64(b ^ a, 0, 63, a); 4133 } 4134 4135 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4136 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4137 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4138 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4139 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4140 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4141 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4142 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4143 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4144 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4145 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4146 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4147 4148 /* Vector Floating-Point Compare Instructions */ 4149 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4150 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4151 CPURISCVState *env, uint32_t desc) \ 4152 { \ 4153 uint32_t vm = vext_vm(desc); \ 4154 uint32_t vl = env->vl; \ 4155 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4156 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4157 uint32_t vma = vext_vma(desc); \ 4158 uint32_t i; \ 4159 \ 4160 for (i = env->vstart; i < vl; i++) { \ 4161 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4162 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4163 if (!vm && !vext_elem_mask(v0, i)) { \ 4164 /* set masked-off elements to 1s */ \ 4165 if (vma) { \ 4166 vext_set_elem_mask(vd, i, 1); \ 4167 } \ 4168 continue; \ 4169 } \ 4170 vext_set_elem_mask(vd, i, \ 4171 DO_OP(s2, s1, &env->fp_status)); \ 4172 } \ 4173 env->vstart = 0; \ 4174 /* mask destination register are always tail-agnostic */ \ 4175 /* set tail elements to 1s */ \ 4176 if (vta_all_1s) { \ 4177 for (; i < total_elems; i++) { \ 4178 vext_set_elem_mask(vd, i, 1); \ 4179 } \ 4180 } \ 4181 } 4182 4183 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4184 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4185 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4186 4187 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4188 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4189 CPURISCVState *env, uint32_t desc) \ 4190 { \ 4191 uint32_t vm = vext_vm(desc); \ 4192 uint32_t vl = env->vl; \ 4193 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4194 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4195 uint32_t vma = vext_vma(desc); \ 4196 uint32_t i; \ 4197 \ 4198 for (i = env->vstart; i < vl; i++) { \ 4199 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4200 if (!vm && !vext_elem_mask(v0, i)) { \ 4201 /* set masked-off elements to 1s */ \ 4202 if (vma) { \ 4203 vext_set_elem_mask(vd, i, 1); \ 4204 } \ 4205 continue; \ 4206 } \ 4207 vext_set_elem_mask(vd, i, \ 4208 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4209 } \ 4210 env->vstart = 0; \ 4211 /* mask destination register are always tail-agnostic */ \ 4212 /* set tail elements to 1s */ \ 4213 if (vta_all_1s) { \ 4214 for (; i < total_elems; i++) { \ 4215 vext_set_elem_mask(vd, i, 1); \ 4216 } \ 4217 } \ 4218 } 4219 4220 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4221 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4222 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4223 4224 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4225 { 4226 FloatRelation compare = float16_compare_quiet(a, b, s); 4227 return compare != float_relation_equal; 4228 } 4229 4230 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4231 { 4232 FloatRelation compare = float32_compare_quiet(a, b, s); 4233 return compare != float_relation_equal; 4234 } 4235 4236 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4237 { 4238 FloatRelation compare = float64_compare_quiet(a, b, s); 4239 return compare != float_relation_equal; 4240 } 4241 4242 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4243 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4244 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4245 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4246 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4247 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4248 4249 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4250 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4251 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4252 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4253 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4254 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4255 4256 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4257 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4258 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4259 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4260 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4261 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4262 4263 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4264 { 4265 FloatRelation compare = float16_compare(a, b, s); 4266 return compare == float_relation_greater; 4267 } 4268 4269 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4270 { 4271 FloatRelation compare = float32_compare(a, b, s); 4272 return compare == float_relation_greater; 4273 } 4274 4275 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4276 { 4277 FloatRelation compare = float64_compare(a, b, s); 4278 return compare == float_relation_greater; 4279 } 4280 4281 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4282 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4283 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4284 4285 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4286 { 4287 FloatRelation compare = float16_compare(a, b, s); 4288 return compare == float_relation_greater || 4289 compare == float_relation_equal; 4290 } 4291 4292 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4293 { 4294 FloatRelation compare = float32_compare(a, b, s); 4295 return compare == float_relation_greater || 4296 compare == float_relation_equal; 4297 } 4298 4299 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4300 { 4301 FloatRelation compare = float64_compare(a, b, s); 4302 return compare == float_relation_greater || 4303 compare == float_relation_equal; 4304 } 4305 4306 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4307 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4308 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4309 4310 /* Vector Floating-Point Classify Instruction */ 4311 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4312 static void do_##NAME(void *vd, void *vs2, int i) \ 4313 { \ 4314 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4315 *((TD *)vd + HD(i)) = OP(s2); \ 4316 } 4317 4318 #define GEN_VEXT_V(NAME, ESZ) \ 4319 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4320 CPURISCVState *env, uint32_t desc) \ 4321 { \ 4322 uint32_t vm = vext_vm(desc); \ 4323 uint32_t vl = env->vl; \ 4324 uint32_t total_elems = \ 4325 vext_get_total_elems(env, desc, ESZ); \ 4326 uint32_t vta = vext_vta(desc); \ 4327 uint32_t vma = vext_vma(desc); \ 4328 uint32_t i; \ 4329 \ 4330 for (i = env->vstart; i < vl; i++) { \ 4331 if (!vm && !vext_elem_mask(v0, i)) { \ 4332 /* set masked-off elements to 1s */ \ 4333 vext_set_elems_1s(vd, vma, i * ESZ, \ 4334 (i + 1) * ESZ); \ 4335 continue; \ 4336 } \ 4337 do_##NAME(vd, vs2, i); \ 4338 } \ 4339 env->vstart = 0; \ 4340 /* set tail elements to 1s */ \ 4341 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4342 total_elems * ESZ); \ 4343 } 4344 4345 target_ulong fclass_h(uint64_t frs1) 4346 { 4347 float16 f = frs1; 4348 bool sign = float16_is_neg(f); 4349 4350 if (float16_is_infinity(f)) { 4351 return sign ? 1 << 0 : 1 << 7; 4352 } else if (float16_is_zero(f)) { 4353 return sign ? 1 << 3 : 1 << 4; 4354 } else if (float16_is_zero_or_denormal(f)) { 4355 return sign ? 1 << 2 : 1 << 5; 4356 } else if (float16_is_any_nan(f)) { 4357 float_status s = { }; /* for snan_bit_is_one */ 4358 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4359 } else { 4360 return sign ? 1 << 1 : 1 << 6; 4361 } 4362 } 4363 4364 target_ulong fclass_s(uint64_t frs1) 4365 { 4366 float32 f = frs1; 4367 bool sign = float32_is_neg(f); 4368 4369 if (float32_is_infinity(f)) { 4370 return sign ? 1 << 0 : 1 << 7; 4371 } else if (float32_is_zero(f)) { 4372 return sign ? 1 << 3 : 1 << 4; 4373 } else if (float32_is_zero_or_denormal(f)) { 4374 return sign ? 1 << 2 : 1 << 5; 4375 } else if (float32_is_any_nan(f)) { 4376 float_status s = { }; /* for snan_bit_is_one */ 4377 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4378 } else { 4379 return sign ? 1 << 1 : 1 << 6; 4380 } 4381 } 4382 4383 target_ulong fclass_d(uint64_t frs1) 4384 { 4385 float64 f = frs1; 4386 bool sign = float64_is_neg(f); 4387 4388 if (float64_is_infinity(f)) { 4389 return sign ? 1 << 0 : 1 << 7; 4390 } else if (float64_is_zero(f)) { 4391 return sign ? 1 << 3 : 1 << 4; 4392 } else if (float64_is_zero_or_denormal(f)) { 4393 return sign ? 1 << 2 : 1 << 5; 4394 } else if (float64_is_any_nan(f)) { 4395 float_status s = { }; /* for snan_bit_is_one */ 4396 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4397 } else { 4398 return sign ? 1 << 1 : 1 << 6; 4399 } 4400 } 4401 4402 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4403 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4404 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4405 GEN_VEXT_V(vfclass_v_h, 2) 4406 GEN_VEXT_V(vfclass_v_w, 4) 4407 GEN_VEXT_V(vfclass_v_d, 8) 4408 4409 /* Vector Floating-Point Merge Instruction */ 4410 4411 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4412 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4413 CPURISCVState *env, uint32_t desc) \ 4414 { \ 4415 uint32_t vm = vext_vm(desc); \ 4416 uint32_t vl = env->vl; \ 4417 uint32_t esz = sizeof(ETYPE); \ 4418 uint32_t total_elems = \ 4419 vext_get_total_elems(env, desc, esz); \ 4420 uint32_t vta = vext_vta(desc); \ 4421 uint32_t i; \ 4422 \ 4423 for (i = env->vstart; i < vl; i++) { \ 4424 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4425 *((ETYPE *)vd + H(i)) = \ 4426 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4427 } \ 4428 env->vstart = 0; \ 4429 /* set tail elements to 1s */ \ 4430 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4431 } 4432 4433 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4434 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4435 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4436 4437 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4438 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4439 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4440 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4441 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4442 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4443 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4444 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4445 4446 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4447 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4448 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4449 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4450 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4451 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4452 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4453 4454 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4455 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4456 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4457 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4458 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4459 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4460 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4461 4462 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4463 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4464 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4465 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4466 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4467 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4468 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4469 4470 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4471 /* (TD, T2, TX2) */ 4472 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4473 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4474 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4475 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4476 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4477 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4478 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4479 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4480 4481 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4482 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4483 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4484 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4485 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4486 4487 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4488 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4489 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4490 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4491 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4492 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4493 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4494 4495 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4496 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4497 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4498 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4499 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4500 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4501 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4502 4503 /* 4504 * vfwcvt.f.f.v vd, vs2, vm 4505 * Convert single-width float to double-width float. 4506 */ 4507 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4508 { 4509 return float16_to_float32(a, true, s); 4510 } 4511 4512 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4513 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4514 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4515 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4516 4517 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4518 /* (TD, T2, TX2) */ 4519 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4520 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4521 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4522 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4523 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4524 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4525 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4526 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4527 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4528 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4529 4530 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4531 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4532 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4533 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4534 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4535 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4536 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4537 4538 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4539 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4540 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4541 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4542 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4543 4544 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4545 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4546 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4547 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4548 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4549 4550 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4551 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4552 { 4553 return float32_to_float16(a, true, s); 4554 } 4555 4556 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4557 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4558 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4559 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4560 4561 /* 4562 *** Vector Reduction Operations 4563 */ 4564 /* Vector Single-Width Integer Reduction Instructions */ 4565 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4566 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4567 void *vs2, CPURISCVState *env, \ 4568 uint32_t desc) \ 4569 { \ 4570 uint32_t vm = vext_vm(desc); \ 4571 uint32_t vl = env->vl; \ 4572 uint32_t esz = sizeof(TD); \ 4573 uint32_t vlenb = simd_maxsz(desc); \ 4574 uint32_t vta = vext_vta(desc); \ 4575 uint32_t i; \ 4576 TD s1 = *((TD *)vs1 + HD(0)); \ 4577 \ 4578 for (i = env->vstart; i < vl; i++) { \ 4579 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4580 if (!vm && !vext_elem_mask(v0, i)) { \ 4581 continue; \ 4582 } \ 4583 s1 = OP(s1, (TD)s2); \ 4584 } \ 4585 *((TD *)vd + HD(0)) = s1; \ 4586 env->vstart = 0; \ 4587 /* set tail elements to 1s */ \ 4588 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4589 } 4590 4591 /* vd[0] = sum(vs1[0], vs2[*]) */ 4592 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4593 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4594 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4595 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4596 4597 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4598 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4599 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4600 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4601 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4602 4603 /* vd[0] = max(vs1[0], vs2[*]) */ 4604 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4605 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4606 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4607 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4608 4609 /* vd[0] = minu(vs1[0], vs2[*]) */ 4610 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4611 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4612 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4613 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4614 4615 /* vd[0] = min(vs1[0], vs2[*]) */ 4616 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4617 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4618 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4619 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4620 4621 /* vd[0] = and(vs1[0], vs2[*]) */ 4622 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4623 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4624 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4625 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4626 4627 /* vd[0] = or(vs1[0], vs2[*]) */ 4628 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4629 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4630 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4631 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4632 4633 /* vd[0] = xor(vs1[0], vs2[*]) */ 4634 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4635 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4636 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4637 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4638 4639 /* Vector Widening Integer Reduction Instructions */ 4640 /* signed sum reduction into double-width accumulator */ 4641 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4642 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4643 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4644 4645 /* Unsigned sum reduction into double-width accumulator */ 4646 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4647 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4648 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4649 4650 /* Vector Single-Width Floating-Point Reduction Instructions */ 4651 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4652 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4653 void *vs2, CPURISCVState *env, \ 4654 uint32_t desc) \ 4655 { \ 4656 uint32_t vm = vext_vm(desc); \ 4657 uint32_t vl = env->vl; \ 4658 uint32_t esz = sizeof(TD); \ 4659 uint32_t vlenb = simd_maxsz(desc); \ 4660 uint32_t vta = vext_vta(desc); \ 4661 uint32_t i; \ 4662 TD s1 = *((TD *)vs1 + HD(0)); \ 4663 \ 4664 for (i = env->vstart; i < vl; i++) { \ 4665 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4666 if (!vm && !vext_elem_mask(v0, i)) { \ 4667 continue; \ 4668 } \ 4669 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4670 } \ 4671 *((TD *)vd + HD(0)) = s1; \ 4672 env->vstart = 0; \ 4673 /* set tail elements to 1s */ \ 4674 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4675 } 4676 4677 /* Unordered sum */ 4678 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4679 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4680 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4681 4682 /* Ordered sum */ 4683 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4684 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4685 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4686 4687 /* Maximum value */ 4688 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4689 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4690 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4691 4692 /* Minimum value */ 4693 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4694 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4695 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4696 4697 /* Vector Widening Floating-Point Add Instructions */ 4698 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4699 { 4700 return float32_add(a, float16_to_float32(b, true, s), s); 4701 } 4702 4703 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4704 { 4705 return float64_add(a, float32_to_float64(b, s), s); 4706 } 4707 4708 /* Vector Widening Floating-Point Reduction Instructions */ 4709 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4710 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4711 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4712 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4713 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4714 4715 /* 4716 *** Vector Mask Operations 4717 */ 4718 /* Vector Mask-Register Logical Instructions */ 4719 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4720 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4721 void *vs2, CPURISCVState *env, \ 4722 uint32_t desc) \ 4723 { \ 4724 uint32_t vl = env->vl; \ 4725 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4726 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4727 uint32_t i; \ 4728 int a, b; \ 4729 \ 4730 for (i = env->vstart; i < vl; i++) { \ 4731 a = vext_elem_mask(vs1, i); \ 4732 b = vext_elem_mask(vs2, i); \ 4733 vext_set_elem_mask(vd, i, OP(b, a)); \ 4734 } \ 4735 env->vstart = 0; \ 4736 /* mask destination register are always tail- \ 4737 * agnostic \ 4738 */ \ 4739 /* set tail elements to 1s */ \ 4740 if (vta_all_1s) { \ 4741 for (; i < total_elems; i++) { \ 4742 vext_set_elem_mask(vd, i, 1); \ 4743 } \ 4744 } \ 4745 } 4746 4747 #define DO_NAND(N, M) (!(N & M)) 4748 #define DO_ANDNOT(N, M) (N & !M) 4749 #define DO_NOR(N, M) (!(N | M)) 4750 #define DO_ORNOT(N, M) (N | !M) 4751 #define DO_XNOR(N, M) (!(N ^ M)) 4752 4753 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4754 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4755 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4756 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4757 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4758 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4759 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4760 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4761 4762 /* Vector count population in mask vcpop */ 4763 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4764 uint32_t desc) 4765 { 4766 target_ulong cnt = 0; 4767 uint32_t vm = vext_vm(desc); 4768 uint32_t vl = env->vl; 4769 int i; 4770 4771 for (i = env->vstart; i < vl; i++) { 4772 if (vm || vext_elem_mask(v0, i)) { 4773 if (vext_elem_mask(vs2, i)) { 4774 cnt++; 4775 } 4776 } 4777 } 4778 env->vstart = 0; 4779 return cnt; 4780 } 4781 4782 /* vfirst find-first-set mask bit*/ 4783 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4784 uint32_t desc) 4785 { 4786 uint32_t vm = vext_vm(desc); 4787 uint32_t vl = env->vl; 4788 int i; 4789 4790 for (i = env->vstart; i < vl; i++) { 4791 if (vm || vext_elem_mask(v0, i)) { 4792 if (vext_elem_mask(vs2, i)) { 4793 return i; 4794 } 4795 } 4796 } 4797 env->vstart = 0; 4798 return -1LL; 4799 } 4800 4801 enum set_mask_type { 4802 ONLY_FIRST = 1, 4803 INCLUDE_FIRST, 4804 BEFORE_FIRST, 4805 }; 4806 4807 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4808 uint32_t desc, enum set_mask_type type) 4809 { 4810 uint32_t vm = vext_vm(desc); 4811 uint32_t vl = env->vl; 4812 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4813 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4814 uint32_t vma = vext_vma(desc); 4815 int i; 4816 bool first_mask_bit = false; 4817 4818 for (i = env->vstart; i < vl; i++) { 4819 if (!vm && !vext_elem_mask(v0, i)) { 4820 /* set masked-off elements to 1s */ 4821 if (vma) { 4822 vext_set_elem_mask(vd, i, 1); 4823 } 4824 continue; 4825 } 4826 /* write a zero to all following active elements */ 4827 if (first_mask_bit) { 4828 vext_set_elem_mask(vd, i, 0); 4829 continue; 4830 } 4831 if (vext_elem_mask(vs2, i)) { 4832 first_mask_bit = true; 4833 if (type == BEFORE_FIRST) { 4834 vext_set_elem_mask(vd, i, 0); 4835 } else { 4836 vext_set_elem_mask(vd, i, 1); 4837 } 4838 } else { 4839 if (type == ONLY_FIRST) { 4840 vext_set_elem_mask(vd, i, 0); 4841 } else { 4842 vext_set_elem_mask(vd, i, 1); 4843 } 4844 } 4845 } 4846 env->vstart = 0; 4847 /* mask destination register are always tail-agnostic */ 4848 /* set tail elements to 1s */ 4849 if (vta_all_1s) { 4850 for (; i < total_elems; i++) { 4851 vext_set_elem_mask(vd, i, 1); 4852 } 4853 } 4854 } 4855 4856 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4857 uint32_t desc) 4858 { 4859 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4860 } 4861 4862 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4863 uint32_t desc) 4864 { 4865 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4866 } 4867 4868 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4869 uint32_t desc) 4870 { 4871 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4872 } 4873 4874 /* Vector Iota Instruction */ 4875 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4876 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4877 uint32_t desc) \ 4878 { \ 4879 uint32_t vm = vext_vm(desc); \ 4880 uint32_t vl = env->vl; \ 4881 uint32_t esz = sizeof(ETYPE); \ 4882 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4883 uint32_t vta = vext_vta(desc); \ 4884 uint32_t vma = vext_vma(desc); \ 4885 uint32_t sum = 0; \ 4886 int i; \ 4887 \ 4888 for (i = env->vstart; i < vl; i++) { \ 4889 if (!vm && !vext_elem_mask(v0, i)) { \ 4890 /* set masked-off elements to 1s */ \ 4891 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4892 continue; \ 4893 } \ 4894 *((ETYPE *)vd + H(i)) = sum; \ 4895 if (vext_elem_mask(vs2, i)) { \ 4896 sum++; \ 4897 } \ 4898 } \ 4899 env->vstart = 0; \ 4900 /* set tail elements to 1s */ \ 4901 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4902 } 4903 4904 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4905 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4906 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4907 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4908 4909 /* Vector Element Index Instruction */ 4910 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4911 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4912 { \ 4913 uint32_t vm = vext_vm(desc); \ 4914 uint32_t vl = env->vl; \ 4915 uint32_t esz = sizeof(ETYPE); \ 4916 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4917 uint32_t vta = vext_vta(desc); \ 4918 uint32_t vma = vext_vma(desc); \ 4919 int i; \ 4920 \ 4921 for (i = env->vstart; i < vl; i++) { \ 4922 if (!vm && !vext_elem_mask(v0, i)) { \ 4923 /* set masked-off elements to 1s */ \ 4924 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4925 continue; \ 4926 } \ 4927 *((ETYPE *)vd + H(i)) = i; \ 4928 } \ 4929 env->vstart = 0; \ 4930 /* set tail elements to 1s */ \ 4931 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4932 } 4933 4934 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4935 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4936 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4937 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4938 4939 /* 4940 *** Vector Permutation Instructions 4941 */ 4942 4943 /* Vector Slide Instructions */ 4944 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4945 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4946 CPURISCVState *env, uint32_t desc) \ 4947 { \ 4948 uint32_t vm = vext_vm(desc); \ 4949 uint32_t vl = env->vl; \ 4950 uint32_t esz = sizeof(ETYPE); \ 4951 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4952 uint32_t vta = vext_vta(desc); \ 4953 uint32_t vma = vext_vma(desc); \ 4954 target_ulong offset = s1, i_min, i; \ 4955 \ 4956 i_min = MAX(env->vstart, offset); \ 4957 for (i = i_min; i < vl; i++) { \ 4958 if (!vm && !vext_elem_mask(v0, i)) { \ 4959 /* set masked-off elements to 1s */ \ 4960 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4961 continue; \ 4962 } \ 4963 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4964 } \ 4965 /* set tail elements to 1s */ \ 4966 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4967 } 4968 4969 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4970 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4971 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4972 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4973 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4974 4975 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4976 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4977 CPURISCVState *env, uint32_t desc) \ 4978 { \ 4979 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4980 uint32_t vm = vext_vm(desc); \ 4981 uint32_t vl = env->vl; \ 4982 uint32_t esz = sizeof(ETYPE); \ 4983 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4984 uint32_t vta = vext_vta(desc); \ 4985 uint32_t vma = vext_vma(desc); \ 4986 target_ulong i_max, i; \ 4987 \ 4988 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4989 for (i = env->vstart; i < i_max; ++i) { \ 4990 if (!vm && !vext_elem_mask(v0, i)) { \ 4991 /* set masked-off elements to 1s */ \ 4992 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4993 continue; \ 4994 } \ 4995 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4996 } \ 4997 \ 4998 for (i = i_max; i < vl; ++i) { \ 4999 if (vm || vext_elem_mask(v0, i)) { \ 5000 *((ETYPE *)vd + H(i)) = 0; \ 5001 } \ 5002 } \ 5003 \ 5004 env->vstart = 0; \ 5005 /* set tail elements to 1s */ \ 5006 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5007 } 5008 5009 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5010 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5011 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5012 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5013 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5014 5015 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5016 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5017 void *vs2, CPURISCVState *env, \ 5018 uint32_t desc) \ 5019 { \ 5020 typedef uint##BITWIDTH##_t ETYPE; \ 5021 uint32_t vm = vext_vm(desc); \ 5022 uint32_t vl = env->vl; \ 5023 uint32_t esz = sizeof(ETYPE); \ 5024 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5025 uint32_t vta = vext_vta(desc); \ 5026 uint32_t vma = vext_vma(desc); \ 5027 uint32_t i; \ 5028 \ 5029 for (i = env->vstart; i < vl; i++) { \ 5030 if (!vm && !vext_elem_mask(v0, i)) { \ 5031 /* set masked-off elements to 1s */ \ 5032 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5033 continue; \ 5034 } \ 5035 if (i == 0) { \ 5036 *((ETYPE *)vd + H(i)) = s1; \ 5037 } else { \ 5038 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5039 } \ 5040 } \ 5041 env->vstart = 0; \ 5042 /* set tail elements to 1s */ \ 5043 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5044 } 5045 5046 GEN_VEXT_VSLIE1UP(8, H1) 5047 GEN_VEXT_VSLIE1UP(16, H2) 5048 GEN_VEXT_VSLIE1UP(32, H4) 5049 GEN_VEXT_VSLIE1UP(64, H8) 5050 5051 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5052 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5053 CPURISCVState *env, uint32_t desc) \ 5054 { \ 5055 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5056 } 5057 5058 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5059 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5060 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5061 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5062 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5063 5064 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5065 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5066 void *vs2, CPURISCVState *env, \ 5067 uint32_t desc) \ 5068 { \ 5069 typedef uint##BITWIDTH##_t ETYPE; \ 5070 uint32_t vm = vext_vm(desc); \ 5071 uint32_t vl = env->vl; \ 5072 uint32_t esz = sizeof(ETYPE); \ 5073 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5074 uint32_t vta = vext_vta(desc); \ 5075 uint32_t vma = vext_vma(desc); \ 5076 uint32_t i; \ 5077 \ 5078 for (i = env->vstart; i < vl; i++) { \ 5079 if (!vm && !vext_elem_mask(v0, i)) { \ 5080 /* set masked-off elements to 1s */ \ 5081 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5082 continue; \ 5083 } \ 5084 if (i == vl - 1) { \ 5085 *((ETYPE *)vd + H(i)) = s1; \ 5086 } else { \ 5087 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5088 } \ 5089 } \ 5090 env->vstart = 0; \ 5091 /* set tail elements to 1s */ \ 5092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5093 } 5094 5095 GEN_VEXT_VSLIDE1DOWN(8, H1) 5096 GEN_VEXT_VSLIDE1DOWN(16, H2) 5097 GEN_VEXT_VSLIDE1DOWN(32, H4) 5098 GEN_VEXT_VSLIDE1DOWN(64, H8) 5099 5100 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5101 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5102 CPURISCVState *env, uint32_t desc) \ 5103 { \ 5104 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5105 } 5106 5107 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5108 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5109 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5110 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5111 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5112 5113 /* Vector Floating-Point Slide Instructions */ 5114 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5115 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5116 CPURISCVState *env, uint32_t desc) \ 5117 { \ 5118 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5119 } 5120 5121 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5122 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5123 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5124 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5125 5126 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5127 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5128 CPURISCVState *env, uint32_t desc) \ 5129 { \ 5130 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5131 } 5132 5133 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5134 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5135 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5136 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5137 5138 /* Vector Register Gather Instruction */ 5139 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5140 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5141 CPURISCVState *env, uint32_t desc) \ 5142 { \ 5143 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5144 uint32_t vm = vext_vm(desc); \ 5145 uint32_t vl = env->vl; \ 5146 uint32_t esz = sizeof(TS2); \ 5147 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5148 uint32_t vta = vext_vta(desc); \ 5149 uint32_t vma = vext_vma(desc); \ 5150 uint64_t index; \ 5151 uint32_t i; \ 5152 \ 5153 for (i = env->vstart; i < vl; i++) { \ 5154 if (!vm && !vext_elem_mask(v0, i)) { \ 5155 /* set masked-off elements to 1s */ \ 5156 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5157 continue; \ 5158 } \ 5159 index = *((TS1 *)vs1 + HS1(i)); \ 5160 if (index >= vlmax) { \ 5161 *((TS2 *)vd + HS2(i)) = 0; \ 5162 } else { \ 5163 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5164 } \ 5165 } \ 5166 env->vstart = 0; \ 5167 /* set tail elements to 1s */ \ 5168 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5169 } 5170 5171 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5172 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5173 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5174 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5175 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5176 5177 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5178 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5179 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5180 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5181 5182 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5183 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5184 CPURISCVState *env, uint32_t desc) \ 5185 { \ 5186 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5187 uint32_t vm = vext_vm(desc); \ 5188 uint32_t vl = env->vl; \ 5189 uint32_t esz = sizeof(ETYPE); \ 5190 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5191 uint32_t vta = vext_vta(desc); \ 5192 uint32_t vma = vext_vma(desc); \ 5193 uint64_t index = s1; \ 5194 uint32_t i; \ 5195 \ 5196 for (i = env->vstart; i < vl; i++) { \ 5197 if (!vm && !vext_elem_mask(v0, i)) { \ 5198 /* set masked-off elements to 1s */ \ 5199 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5200 continue; \ 5201 } \ 5202 if (index >= vlmax) { \ 5203 *((ETYPE *)vd + H(i)) = 0; \ 5204 } else { \ 5205 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5206 } \ 5207 } \ 5208 env->vstart = 0; \ 5209 /* set tail elements to 1s */ \ 5210 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5211 } 5212 5213 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5214 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5215 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5216 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5217 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5218 5219 /* Vector Compress Instruction */ 5220 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5221 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5222 CPURISCVState *env, uint32_t desc) \ 5223 { \ 5224 uint32_t vl = env->vl; \ 5225 uint32_t esz = sizeof(ETYPE); \ 5226 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5227 uint32_t vta = vext_vta(desc); \ 5228 uint32_t num = 0, i; \ 5229 \ 5230 for (i = env->vstart; i < vl; i++) { \ 5231 if (!vext_elem_mask(vs1, i)) { \ 5232 continue; \ 5233 } \ 5234 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5235 num++; \ 5236 } \ 5237 env->vstart = 0; \ 5238 /* set tail elements to 1s */ \ 5239 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5240 } 5241 5242 /* Compress into vd elements of vs2 where vs1 is enabled */ 5243 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5244 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5245 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5246 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5247 5248 /* Vector Whole Register Move */ 5249 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5250 { 5251 /* EEW = SEW */ 5252 uint32_t maxsz = simd_maxsz(desc); 5253 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5254 uint32_t startb = env->vstart * sewb; 5255 uint32_t i = startb; 5256 5257 memcpy((uint8_t *)vd + H1(i), 5258 (uint8_t *)vs2 + H1(i), 5259 maxsz - startb); 5260 5261 env->vstart = 0; 5262 } 5263 5264 /* Vector Integer Extension */ 5265 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5266 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5267 CPURISCVState *env, uint32_t desc) \ 5268 { \ 5269 uint32_t vl = env->vl; \ 5270 uint32_t vm = vext_vm(desc); \ 5271 uint32_t esz = sizeof(ETYPE); \ 5272 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5273 uint32_t vta = vext_vta(desc); \ 5274 uint32_t vma = vext_vma(desc); \ 5275 uint32_t i; \ 5276 \ 5277 for (i = env->vstart; i < vl; i++) { \ 5278 if (!vm && !vext_elem_mask(v0, i)) { \ 5279 /* set masked-off elements to 1s */ \ 5280 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5281 continue; \ 5282 } \ 5283 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5284 } \ 5285 env->vstart = 0; \ 5286 /* set tail elements to 1s */ \ 5287 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5288 } 5289 5290 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5291 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5292 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5293 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5294 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5295 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5296 5297 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5298 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5299 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5300 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5301 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5302 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5303