1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vill = 1; 56 env->vtype = 0; 57 env->vl = 0; 58 env->vstart = 0; 59 return 0; 60 } 61 62 vlmax = vext_get_vlmax(cpu, s2); 63 if (s1 <= vlmax) { 64 vl = s1; 65 } else { 66 vl = vlmax; 67 } 68 env->vl = vl; 69 env->vtype = s2; 70 env->vstart = 0; 71 env->vill = 0; 72 return vl; 73 } 74 75 /* 76 * Note that vector data is stored in host-endian 64-bit chunks, 77 * so addressing units smaller than that needs a host-endian fixup. 78 */ 79 #if HOST_BIG_ENDIAN 80 #define H1(x) ((x) ^ 7) 81 #define H1_2(x) ((x) ^ 6) 82 #define H1_4(x) ((x) ^ 4) 83 #define H2(x) ((x) ^ 3) 84 #define H4(x) ((x) ^ 1) 85 #define H8(x) ((x)) 86 #else 87 #define H1(x) (x) 88 #define H1_2(x) (x) 89 #define H1_4(x) (x) 90 #define H2(x) (x) 91 #define H4(x) (x) 92 #define H8(x) (x) 93 #endif 94 95 static inline uint32_t vext_nf(uint32_t desc) 96 { 97 return FIELD_EX32(simd_data(desc), VDATA, NF); 98 } 99 100 static inline uint32_t vext_vm(uint32_t desc) 101 { 102 return FIELD_EX32(simd_data(desc), VDATA, VM); 103 } 104 105 /* 106 * Encode LMUL to lmul as following: 107 * LMUL vlmul lmul 108 * 1 000 0 109 * 2 001 1 110 * 4 010 2 111 * 8 011 3 112 * - 100 - 113 * 1/8 101 -3 114 * 1/4 110 -2 115 * 1/2 111 -1 116 */ 117 static inline int32_t vext_lmul(uint32_t desc) 118 { 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 120 } 121 122 static inline uint32_t vext_vta(uint32_t desc) 123 { 124 return FIELD_EX32(simd_data(desc), VDATA, VTA); 125 } 126 127 static inline uint32_t vext_vma(uint32_t desc) 128 { 129 return FIELD_EX32(simd_data(desc), VDATA, VMA); 130 } 131 132 static inline uint32_t vext_vta_all_1s(uint32_t desc) 133 { 134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 135 } 136 137 /* 138 * Get the maximum number of elements can be operated. 139 * 140 * log2_esz: log2 of element size in bytes. 141 */ 142 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 143 { 144 /* 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 146 * so vlen in bytes (vlenb) is encoded as maxsz. 147 */ 148 uint32_t vlenb = simd_maxsz(desc); 149 150 /* Return VLMAX */ 151 int scale = vext_lmul(desc) - log2_esz; 152 return scale < 0 ? vlenb >> -scale : vlenb << scale; 153 } 154 155 /* 156 * Get number of total elements, including prestart, body and tail elements. 157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 158 * are held in the same vector register. 159 */ 160 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 161 uint32_t esz) 162 { 163 uint32_t vlenb = simd_maxsz(desc); 164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 166 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 167 return (vlenb << emul) / esz; 168 } 169 170 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 171 { 172 return (addr & env->cur_pmmask) | env->cur_pmbase; 173 } 174 175 /* 176 * This function checks watchpoint before real load operation. 177 * 178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 179 * In user mode, there is no watchpoint support now. 180 * 181 * It will trigger an exception if there is no mapping in TLB 182 * and page table walk can't fill the TLB entry. Then the guest 183 * software can return here after process the exception or never return. 184 */ 185 static void probe_pages(CPURISCVState *env, target_ulong addr, 186 target_ulong len, uintptr_t ra, 187 MMUAccessType access_type) 188 { 189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 190 target_ulong curlen = MIN(pagelen, len); 191 192 probe_access(env, adjust_addr(env, addr), curlen, access_type, 193 cpu_mmu_index(env, false), ra); 194 if (len > curlen) { 195 addr += curlen; 196 curlen = len - curlen; 197 probe_access(env, adjust_addr(env, addr), curlen, access_type, 198 cpu_mmu_index(env, false), ra); 199 } 200 } 201 202 /* set agnostic elements to 1s */ 203 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 204 uint32_t tot) 205 { 206 if (is_agnostic == 0) { 207 /* policy undisturbed */ 208 return; 209 } 210 if (tot - cnt == 0) { 211 return; 212 } 213 memset(base + cnt, -1, tot - cnt); 214 } 215 216 static inline void vext_set_elem_mask(void *v0, int index, 217 uint8_t value) 218 { 219 int idx = index / 64; 220 int pos = index % 64; 221 uint64_t old = ((uint64_t *)v0)[idx]; 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 223 } 224 225 /* 226 * Earlier designs (pre-0.9) had a varying number of bits 227 * per mask value (MLEN). In the 0.9 design, MLEN=1. 228 * (Section 4.5) 229 */ 230 static inline int vext_elem_mask(void *v0, int index) 231 { 232 int idx = index / 64; 233 int pos = index % 64; 234 return (((uint64_t *)v0)[idx] >> pos) & 1; 235 } 236 237 /* elements operations for load and store */ 238 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 239 uint32_t idx, void *vd, uintptr_t retaddr); 240 241 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 242 static void NAME(CPURISCVState *env, abi_ptr addr, \ 243 uint32_t idx, void *vd, uintptr_t retaddr)\ 244 { \ 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 247 } \ 248 249 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 250 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 251 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 252 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 253 254 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 255 static void NAME(CPURISCVState *env, abi_ptr addr, \ 256 uint32_t idx, void *vd, uintptr_t retaddr)\ 257 { \ 258 ETYPE data = *((ETYPE *)vd + H(idx)); \ 259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 260 } 261 262 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 263 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 264 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 265 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 266 267 static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl, 268 void *vd, uint32_t desc, uint32_t nf, 269 uint32_t esz, uint32_t max_elems) 270 { 271 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 272 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 273 uint32_t vta = vext_vta(desc); 274 uint32_t registers_used; 275 int k; 276 277 for (k = 0; k < nf; ++k) { 278 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 279 (k * max_elems + max_elems) * esz); 280 } 281 282 if (nf * max_elems % total_elems != 0) { 283 registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 284 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 285 registers_used * vlenb); 286 } 287 } 288 289 /* 290 * stride: access vector element from strided memory 291 */ 292 static void 293 vext_ldst_stride(void *vd, void *v0, target_ulong base, 294 target_ulong stride, CPURISCVState *env, 295 uint32_t desc, uint32_t vm, 296 vext_ldst_elem_fn *ldst_elem, 297 uint32_t log2_esz, uintptr_t ra) 298 { 299 uint32_t i, k; 300 uint32_t nf = vext_nf(desc); 301 uint32_t max_elems = vext_max_elems(desc, log2_esz); 302 uint32_t esz = 1 << log2_esz; 303 uint32_t vma = vext_vma(desc); 304 305 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 306 k = 0; 307 while (k < nf) { 308 if (!vm && !vext_elem_mask(v0, i)) { 309 /* set masked-off elements to 1s */ 310 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 311 (i + k * max_elems + 1) * esz); 312 k++; 313 continue; 314 } 315 target_ulong addr = base + stride * i + (k << log2_esz); 316 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 317 k++; 318 } 319 } 320 env->vstart = 0; 321 322 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 323 } 324 325 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 326 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 327 target_ulong stride, CPURISCVState *env, \ 328 uint32_t desc) \ 329 { \ 330 uint32_t vm = vext_vm(desc); \ 331 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 332 ctzl(sizeof(ETYPE)), GETPC()); \ 333 } 334 335 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 336 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 337 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 338 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 339 340 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 341 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 342 target_ulong stride, CPURISCVState *env, \ 343 uint32_t desc) \ 344 { \ 345 uint32_t vm = vext_vm(desc); \ 346 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 347 ctzl(sizeof(ETYPE)), GETPC()); \ 348 } 349 350 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 351 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 352 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 353 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 354 355 /* 356 * unit-stride: access elements stored contiguously in memory 357 */ 358 359 /* unmasked unit-stride load and store operation */ 360 static void 361 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 362 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 363 uintptr_t ra) 364 { 365 uint32_t i, k; 366 uint32_t nf = vext_nf(desc); 367 uint32_t max_elems = vext_max_elems(desc, log2_esz); 368 uint32_t esz = 1 << log2_esz; 369 370 /* load bytes from guest memory */ 371 for (i = env->vstart; i < evl; i++, env->vstart++) { 372 k = 0; 373 while (k < nf) { 374 target_ulong addr = base + ((i * nf + k) << log2_esz); 375 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 376 k++; 377 } 378 } 379 env->vstart = 0; 380 381 vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems); 382 } 383 384 /* 385 * masked unit-stride load and store operation will be a special case of stride, 386 * stride = NF * sizeof (MTYPE) 387 */ 388 389 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 390 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 391 CPURISCVState *env, uint32_t desc) \ 392 { \ 393 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 394 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 395 ctzl(sizeof(ETYPE)), GETPC()); \ 396 } \ 397 \ 398 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 399 CPURISCVState *env, uint32_t desc) \ 400 { \ 401 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 402 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 403 } 404 405 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 406 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 407 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 408 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 409 410 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 411 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 412 CPURISCVState *env, uint32_t desc) \ 413 { \ 414 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 415 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 416 ctzl(sizeof(ETYPE)), GETPC()); \ 417 } \ 418 \ 419 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 420 CPURISCVState *env, uint32_t desc) \ 421 { \ 422 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 423 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 424 } 425 426 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 427 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 428 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 429 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 430 431 /* 432 * unit stride mask load and store, EEW = 1 433 */ 434 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 435 CPURISCVState *env, uint32_t desc) 436 { 437 /* evl = ceil(vl/8) */ 438 uint8_t evl = (env->vl + 7) >> 3; 439 vext_ldst_us(vd, base, env, desc, lde_b, 440 0, evl, GETPC()); 441 } 442 443 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 444 CPURISCVState *env, uint32_t desc) 445 { 446 /* evl = ceil(vl/8) */ 447 uint8_t evl = (env->vl + 7) >> 3; 448 vext_ldst_us(vd, base, env, desc, ste_b, 449 0, evl, GETPC()); 450 } 451 452 /* 453 * index: access vector element from indexed memory 454 */ 455 typedef target_ulong vext_get_index_addr(target_ulong base, 456 uint32_t idx, void *vs2); 457 458 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 459 static target_ulong NAME(target_ulong base, \ 460 uint32_t idx, void *vs2) \ 461 { \ 462 return (base + *((ETYPE *)vs2 + H(idx))); \ 463 } 464 465 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 466 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 467 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 468 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 469 470 static inline void 471 vext_ldst_index(void *vd, void *v0, target_ulong base, 472 void *vs2, CPURISCVState *env, uint32_t desc, 473 vext_get_index_addr get_index_addr, 474 vext_ldst_elem_fn *ldst_elem, 475 uint32_t log2_esz, uintptr_t ra) 476 { 477 uint32_t i, k; 478 uint32_t nf = vext_nf(desc); 479 uint32_t vm = vext_vm(desc); 480 uint32_t max_elems = vext_max_elems(desc, log2_esz); 481 uint32_t esz = 1 << log2_esz; 482 uint32_t vma = vext_vma(desc); 483 484 /* load bytes from guest memory */ 485 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 486 k = 0; 487 while (k < nf) { 488 if (!vm && !vext_elem_mask(v0, i)) { 489 /* set masked-off elements to 1s */ 490 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 491 (i + k * max_elems + 1) * esz); 492 k++; 493 continue; 494 } 495 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 496 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 497 k++; 498 } 499 } 500 env->vstart = 0; 501 502 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 503 } 504 505 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 506 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 507 void *vs2, CPURISCVState *env, uint32_t desc) \ 508 { \ 509 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 510 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 511 } 512 513 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 514 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 515 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 516 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 517 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 518 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 519 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 520 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 521 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 522 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 523 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 524 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 525 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 526 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 527 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 528 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 529 530 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 531 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 532 void *vs2, CPURISCVState *env, uint32_t desc) \ 533 { \ 534 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 535 STORE_FN, ctzl(sizeof(ETYPE)), \ 536 GETPC()); \ 537 } 538 539 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 540 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 541 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 542 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 543 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 544 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 545 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 546 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 547 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 548 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 549 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 550 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 551 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 552 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 553 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 554 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 555 556 /* 557 * unit-stride fault-only-fisrt load instructions 558 */ 559 static inline void 560 vext_ldff(void *vd, void *v0, target_ulong base, 561 CPURISCVState *env, uint32_t desc, 562 vext_ldst_elem_fn *ldst_elem, 563 uint32_t log2_esz, uintptr_t ra) 564 { 565 void *host; 566 uint32_t i, k, vl = 0; 567 uint32_t nf = vext_nf(desc); 568 uint32_t vm = vext_vm(desc); 569 uint32_t max_elems = vext_max_elems(desc, log2_esz); 570 uint32_t esz = 1 << log2_esz; 571 uint32_t vma = vext_vma(desc); 572 target_ulong addr, offset, remain; 573 574 /* probe every access */ 575 for (i = env->vstart; i < env->vl; i++) { 576 if (!vm && !vext_elem_mask(v0, i)) { 577 continue; 578 } 579 addr = adjust_addr(env, base + i * (nf << log2_esz)); 580 if (i == 0) { 581 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 582 } else { 583 /* if it triggers an exception, no need to check watchpoint */ 584 remain = nf << log2_esz; 585 while (remain > 0) { 586 offset = -(addr | TARGET_PAGE_MASK); 587 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 588 cpu_mmu_index(env, false)); 589 if (host) { 590 #ifdef CONFIG_USER_ONLY 591 if (page_check_range(addr, offset, PAGE_READ) < 0) { 592 vl = i; 593 goto ProbeSuccess; 594 } 595 #else 596 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 597 #endif 598 } else { 599 vl = i; 600 goto ProbeSuccess; 601 } 602 if (remain <= offset) { 603 break; 604 } 605 remain -= offset; 606 addr = adjust_addr(env, addr + offset); 607 } 608 } 609 } 610 ProbeSuccess: 611 /* load bytes from guest memory */ 612 if (vl != 0) { 613 env->vl = vl; 614 } 615 for (i = env->vstart; i < env->vl; i++) { 616 k = 0; 617 while (k < nf) { 618 if (!vm && !vext_elem_mask(v0, i)) { 619 /* set masked-off elements to 1s */ 620 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 621 (i + k * max_elems + 1) * esz); 622 k++; 623 continue; 624 } 625 target_ulong addr = base + ((i * nf + k) << log2_esz); 626 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 627 k++; 628 } 629 } 630 env->vstart = 0; 631 632 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 633 } 634 635 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 636 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 637 CPURISCVState *env, uint32_t desc) \ 638 { \ 639 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 640 ctzl(sizeof(ETYPE)), GETPC()); \ 641 } 642 643 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 644 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 645 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 646 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 647 648 #define DO_SWAP(N, M) (M) 649 #define DO_AND(N, M) (N & M) 650 #define DO_XOR(N, M) (N ^ M) 651 #define DO_OR(N, M) (N | M) 652 #define DO_ADD(N, M) (N + M) 653 654 /* Signed min/max */ 655 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 656 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 657 658 /* Unsigned min/max */ 659 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 660 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 661 662 /* 663 * load and store whole register instructions 664 */ 665 static void 666 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 667 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 668 { 669 uint32_t i, k, off, pos; 670 uint32_t nf = vext_nf(desc); 671 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 672 uint32_t max_elems = vlenb >> log2_esz; 673 674 k = env->vstart / max_elems; 675 off = env->vstart % max_elems; 676 677 if (off) { 678 /* load/store rest of elements of current segment pointed by vstart */ 679 for (pos = off; pos < max_elems; pos++, env->vstart++) { 680 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 681 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 682 } 683 k++; 684 } 685 686 /* load/store elements for rest of segments */ 687 for (; k < nf; k++) { 688 for (i = 0; i < max_elems; i++, env->vstart++) { 689 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 690 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 691 } 692 } 693 694 env->vstart = 0; 695 } 696 697 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 698 void HELPER(NAME)(void *vd, target_ulong base, \ 699 CPURISCVState *env, uint32_t desc) \ 700 { \ 701 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 702 ctzl(sizeof(ETYPE)), GETPC()); \ 703 } 704 705 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 706 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 707 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 708 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 709 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 710 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 711 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 712 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 713 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 714 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 715 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 716 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 717 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 718 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 719 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 720 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 721 722 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 723 void HELPER(NAME)(void *vd, target_ulong base, \ 724 CPURISCVState *env, uint32_t desc) \ 725 { \ 726 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 727 ctzl(sizeof(ETYPE)), GETPC()); \ 728 } 729 730 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 731 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 732 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 733 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 734 735 /* 736 * Vector Integer Arithmetic Instructions 737 */ 738 739 /* expand macro args before macro */ 740 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 741 742 /* (TD, T1, T2, TX1, TX2) */ 743 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 744 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 745 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 746 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 747 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 748 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 749 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 750 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 751 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 752 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 753 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 754 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 755 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 756 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 757 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 758 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 759 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 760 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 761 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 762 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 763 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 764 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 765 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 766 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 767 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 768 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 769 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 770 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 771 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 772 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 773 774 /* operation of two vector elements */ 775 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 776 777 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 778 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 779 { \ 780 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 781 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 782 *((TD *)vd + HD(i)) = OP(s2, s1); \ 783 } 784 #define DO_SUB(N, M) (N - M) 785 #define DO_RSUB(N, M) (M - N) 786 787 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 788 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 789 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 790 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 791 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 792 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 793 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 794 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 795 796 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 797 CPURISCVState *env, uint32_t desc, 798 opivv2_fn *fn, uint32_t esz) 799 { 800 uint32_t vm = vext_vm(desc); 801 uint32_t vl = env->vl; 802 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 803 uint32_t vta = vext_vta(desc); 804 uint32_t vma = vext_vma(desc); 805 uint32_t i; 806 807 for (i = env->vstart; i < vl; i++) { 808 if (!vm && !vext_elem_mask(v0, i)) { 809 /* set masked-off elements to 1s */ 810 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 811 continue; 812 } 813 fn(vd, vs1, vs2, i); 814 } 815 env->vstart = 0; 816 /* set tail elements to 1s */ 817 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 818 } 819 820 /* generate the helpers for OPIVV */ 821 #define GEN_VEXT_VV(NAME, ESZ) \ 822 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 823 void *vs2, CPURISCVState *env, \ 824 uint32_t desc) \ 825 { \ 826 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 827 do_##NAME, ESZ); \ 828 } 829 830 GEN_VEXT_VV(vadd_vv_b, 1) 831 GEN_VEXT_VV(vadd_vv_h, 2) 832 GEN_VEXT_VV(vadd_vv_w, 4) 833 GEN_VEXT_VV(vadd_vv_d, 8) 834 GEN_VEXT_VV(vsub_vv_b, 1) 835 GEN_VEXT_VV(vsub_vv_h, 2) 836 GEN_VEXT_VV(vsub_vv_w, 4) 837 GEN_VEXT_VV(vsub_vv_d, 8) 838 839 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 840 841 /* 842 * (T1)s1 gives the real operator type. 843 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 844 */ 845 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 846 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 847 { \ 848 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 849 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 850 } 851 852 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 853 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 854 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 855 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 856 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 857 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 858 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 859 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 860 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 861 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 862 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 863 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 864 865 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 866 CPURISCVState *env, uint32_t desc, 867 opivx2_fn fn, uint32_t esz) 868 { 869 uint32_t vm = vext_vm(desc); 870 uint32_t vl = env->vl; 871 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 872 uint32_t vta = vext_vta(desc); 873 uint32_t vma = vext_vma(desc); 874 uint32_t i; 875 876 for (i = env->vstart; i < vl; i++) { 877 if (!vm && !vext_elem_mask(v0, i)) { 878 /* set masked-off elements to 1s */ 879 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 880 continue; 881 } 882 fn(vd, s1, vs2, i); 883 } 884 env->vstart = 0; 885 /* set tail elements to 1s */ 886 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 887 } 888 889 /* generate the helpers for OPIVX */ 890 #define GEN_VEXT_VX(NAME, ESZ) \ 891 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 892 void *vs2, CPURISCVState *env, \ 893 uint32_t desc) \ 894 { \ 895 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 896 do_##NAME, ESZ); \ 897 } 898 899 GEN_VEXT_VX(vadd_vx_b, 1) 900 GEN_VEXT_VX(vadd_vx_h, 2) 901 GEN_VEXT_VX(vadd_vx_w, 4) 902 GEN_VEXT_VX(vadd_vx_d, 8) 903 GEN_VEXT_VX(vsub_vx_b, 1) 904 GEN_VEXT_VX(vsub_vx_h, 2) 905 GEN_VEXT_VX(vsub_vx_w, 4) 906 GEN_VEXT_VX(vsub_vx_d, 8) 907 GEN_VEXT_VX(vrsub_vx_b, 1) 908 GEN_VEXT_VX(vrsub_vx_h, 2) 909 GEN_VEXT_VX(vrsub_vx_w, 4) 910 GEN_VEXT_VX(vrsub_vx_d, 8) 911 912 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 913 { 914 intptr_t oprsz = simd_oprsz(desc); 915 intptr_t i; 916 917 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 918 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 919 } 920 } 921 922 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 923 { 924 intptr_t oprsz = simd_oprsz(desc); 925 intptr_t i; 926 927 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 928 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 929 } 930 } 931 932 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 933 { 934 intptr_t oprsz = simd_oprsz(desc); 935 intptr_t i; 936 937 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 938 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 939 } 940 } 941 942 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 943 { 944 intptr_t oprsz = simd_oprsz(desc); 945 intptr_t i; 946 947 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 948 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 949 } 950 } 951 952 /* Vector Widening Integer Add/Subtract */ 953 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 954 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 955 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 956 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 957 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 958 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 959 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 960 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 961 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 962 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 963 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 964 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 965 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 966 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 967 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 968 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 969 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 970 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 971 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 972 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 973 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 974 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 975 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 976 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 977 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 978 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 979 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 980 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 981 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 982 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 983 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 984 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 985 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 986 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 987 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 988 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 989 GEN_VEXT_VV(vwaddu_vv_b, 2) 990 GEN_VEXT_VV(vwaddu_vv_h, 4) 991 GEN_VEXT_VV(vwaddu_vv_w, 8) 992 GEN_VEXT_VV(vwsubu_vv_b, 2) 993 GEN_VEXT_VV(vwsubu_vv_h, 4) 994 GEN_VEXT_VV(vwsubu_vv_w, 8) 995 GEN_VEXT_VV(vwadd_vv_b, 2) 996 GEN_VEXT_VV(vwadd_vv_h, 4) 997 GEN_VEXT_VV(vwadd_vv_w, 8) 998 GEN_VEXT_VV(vwsub_vv_b, 2) 999 GEN_VEXT_VV(vwsub_vv_h, 4) 1000 GEN_VEXT_VV(vwsub_vv_w, 8) 1001 GEN_VEXT_VV(vwaddu_wv_b, 2) 1002 GEN_VEXT_VV(vwaddu_wv_h, 4) 1003 GEN_VEXT_VV(vwaddu_wv_w, 8) 1004 GEN_VEXT_VV(vwsubu_wv_b, 2) 1005 GEN_VEXT_VV(vwsubu_wv_h, 4) 1006 GEN_VEXT_VV(vwsubu_wv_w, 8) 1007 GEN_VEXT_VV(vwadd_wv_b, 2) 1008 GEN_VEXT_VV(vwadd_wv_h, 4) 1009 GEN_VEXT_VV(vwadd_wv_w, 8) 1010 GEN_VEXT_VV(vwsub_wv_b, 2) 1011 GEN_VEXT_VV(vwsub_wv_h, 4) 1012 GEN_VEXT_VV(vwsub_wv_w, 8) 1013 1014 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1015 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1016 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1017 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1018 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1019 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1020 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1021 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1022 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1023 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1024 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1025 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1026 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1027 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1028 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1029 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1030 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1031 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1032 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1033 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1034 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1035 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1036 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1037 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1038 GEN_VEXT_VX(vwaddu_vx_b, 2) 1039 GEN_VEXT_VX(vwaddu_vx_h, 4) 1040 GEN_VEXT_VX(vwaddu_vx_w, 8) 1041 GEN_VEXT_VX(vwsubu_vx_b, 2) 1042 GEN_VEXT_VX(vwsubu_vx_h, 4) 1043 GEN_VEXT_VX(vwsubu_vx_w, 8) 1044 GEN_VEXT_VX(vwadd_vx_b, 2) 1045 GEN_VEXT_VX(vwadd_vx_h, 4) 1046 GEN_VEXT_VX(vwadd_vx_w, 8) 1047 GEN_VEXT_VX(vwsub_vx_b, 2) 1048 GEN_VEXT_VX(vwsub_vx_h, 4) 1049 GEN_VEXT_VX(vwsub_vx_w, 8) 1050 GEN_VEXT_VX(vwaddu_wx_b, 2) 1051 GEN_VEXT_VX(vwaddu_wx_h, 4) 1052 GEN_VEXT_VX(vwaddu_wx_w, 8) 1053 GEN_VEXT_VX(vwsubu_wx_b, 2) 1054 GEN_VEXT_VX(vwsubu_wx_h, 4) 1055 GEN_VEXT_VX(vwsubu_wx_w, 8) 1056 GEN_VEXT_VX(vwadd_wx_b, 2) 1057 GEN_VEXT_VX(vwadd_wx_h, 4) 1058 GEN_VEXT_VX(vwadd_wx_w, 8) 1059 GEN_VEXT_VX(vwsub_wx_b, 2) 1060 GEN_VEXT_VX(vwsub_wx_h, 4) 1061 GEN_VEXT_VX(vwsub_wx_w, 8) 1062 1063 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1064 #define DO_VADC(N, M, C) (N + M + C) 1065 #define DO_VSBC(N, M, C) (N - M - C) 1066 1067 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1068 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1069 CPURISCVState *env, uint32_t desc) \ 1070 { \ 1071 uint32_t vl = env->vl; \ 1072 uint32_t esz = sizeof(ETYPE); \ 1073 uint32_t total_elems = \ 1074 vext_get_total_elems(env, desc, esz); \ 1075 uint32_t vta = vext_vta(desc); \ 1076 uint32_t i; \ 1077 \ 1078 for (i = env->vstart; i < vl; i++) { \ 1079 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1080 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1081 ETYPE carry = vext_elem_mask(v0, i); \ 1082 \ 1083 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1084 } \ 1085 env->vstart = 0; \ 1086 /* set tail elements to 1s */ \ 1087 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1088 } 1089 1090 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1091 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1092 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1093 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1094 1095 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1096 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1097 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1098 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1099 1100 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1101 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1102 CPURISCVState *env, uint32_t desc) \ 1103 { \ 1104 uint32_t vl = env->vl; \ 1105 uint32_t esz = sizeof(ETYPE); \ 1106 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1107 uint32_t vta = vext_vta(desc); \ 1108 uint32_t i; \ 1109 \ 1110 for (i = env->vstart; i < vl; i++) { \ 1111 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1112 ETYPE carry = vext_elem_mask(v0, i); \ 1113 \ 1114 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1115 } \ 1116 env->vstart = 0; \ 1117 /* set tail elements to 1s */ \ 1118 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1119 } 1120 1121 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1122 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1123 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1124 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1125 1126 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1127 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1128 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1129 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1130 1131 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1132 (__typeof(N))(N + M) < N) 1133 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1134 1135 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1136 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1137 CPURISCVState *env, uint32_t desc) \ 1138 { \ 1139 uint32_t vl = env->vl; \ 1140 uint32_t vm = vext_vm(desc); \ 1141 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1142 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1143 uint32_t i; \ 1144 \ 1145 for (i = env->vstart; i < vl; i++) { \ 1146 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1147 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1148 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1149 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1150 } \ 1151 env->vstart = 0; \ 1152 /* 1153 * mask destination register are always tail-agnostic 1154 * set tail elements to 1s 1155 */ \ 1156 if (vta_all_1s) { \ 1157 for (; i < total_elems; i++) { \ 1158 vext_set_elem_mask(vd, i, 1); \ 1159 } \ 1160 } \ 1161 } 1162 1163 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1164 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1165 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1166 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1167 1168 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1169 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1170 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1171 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1172 1173 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1174 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1175 void *vs2, CPURISCVState *env, uint32_t desc) \ 1176 { \ 1177 uint32_t vl = env->vl; \ 1178 uint32_t vm = vext_vm(desc); \ 1179 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1180 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1181 uint32_t i; \ 1182 \ 1183 for (i = env->vstart; i < vl; i++) { \ 1184 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1185 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1186 vext_set_elem_mask(vd, i, \ 1187 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1188 } \ 1189 env->vstart = 0; \ 1190 /* 1191 * mask destination register are always tail-agnostic 1192 * set tail elements to 1s 1193 */ \ 1194 if (vta_all_1s) { \ 1195 for (; i < total_elems; i++) { \ 1196 vext_set_elem_mask(vd, i, 1); \ 1197 } \ 1198 } \ 1199 } 1200 1201 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1202 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1203 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1204 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1205 1206 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1207 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1208 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1209 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1210 1211 /* Vector Bitwise Logical Instructions */ 1212 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1213 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1214 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1215 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1216 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1217 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1218 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1219 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1220 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1221 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1222 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1223 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1224 GEN_VEXT_VV(vand_vv_b, 1) 1225 GEN_VEXT_VV(vand_vv_h, 2) 1226 GEN_VEXT_VV(vand_vv_w, 4) 1227 GEN_VEXT_VV(vand_vv_d, 8) 1228 GEN_VEXT_VV(vor_vv_b, 1) 1229 GEN_VEXT_VV(vor_vv_h, 2) 1230 GEN_VEXT_VV(vor_vv_w, 4) 1231 GEN_VEXT_VV(vor_vv_d, 8) 1232 GEN_VEXT_VV(vxor_vv_b, 1) 1233 GEN_VEXT_VV(vxor_vv_h, 2) 1234 GEN_VEXT_VV(vxor_vv_w, 4) 1235 GEN_VEXT_VV(vxor_vv_d, 8) 1236 1237 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1238 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1239 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1240 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1241 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1242 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1243 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1244 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1245 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1246 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1247 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1248 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1249 GEN_VEXT_VX(vand_vx_b, 1) 1250 GEN_VEXT_VX(vand_vx_h, 2) 1251 GEN_VEXT_VX(vand_vx_w, 4) 1252 GEN_VEXT_VX(vand_vx_d, 8) 1253 GEN_VEXT_VX(vor_vx_b, 1) 1254 GEN_VEXT_VX(vor_vx_h, 2) 1255 GEN_VEXT_VX(vor_vx_w, 4) 1256 GEN_VEXT_VX(vor_vx_d, 8) 1257 GEN_VEXT_VX(vxor_vx_b, 1) 1258 GEN_VEXT_VX(vxor_vx_h, 2) 1259 GEN_VEXT_VX(vxor_vx_w, 4) 1260 GEN_VEXT_VX(vxor_vx_d, 8) 1261 1262 /* Vector Single-Width Bit Shift Instructions */ 1263 #define DO_SLL(N, M) (N << (M)) 1264 #define DO_SRL(N, M) (N >> (M)) 1265 1266 /* generate the helpers for shift instructions with two vector operators */ 1267 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1268 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1269 void *vs2, CPURISCVState *env, uint32_t desc) \ 1270 { \ 1271 uint32_t vm = vext_vm(desc); \ 1272 uint32_t vl = env->vl; \ 1273 uint32_t esz = sizeof(TS1); \ 1274 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1275 uint32_t vta = vext_vta(desc); \ 1276 uint32_t vma = vext_vma(desc); \ 1277 uint32_t i; \ 1278 \ 1279 for (i = env->vstart; i < vl; i++) { \ 1280 if (!vm && !vext_elem_mask(v0, i)) { \ 1281 /* set masked-off elements to 1s */ \ 1282 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1283 continue; \ 1284 } \ 1285 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1286 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1287 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1288 } \ 1289 env->vstart = 0; \ 1290 /* set tail elements to 1s */ \ 1291 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1292 } 1293 1294 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1295 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1296 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1297 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1298 1299 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1300 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1301 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1302 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1303 1304 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1305 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1306 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1307 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1308 1309 /* generate the helpers for shift instructions with one vector and one scalar */ 1310 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1311 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1312 void *vs2, CPURISCVState *env, \ 1313 uint32_t desc) \ 1314 { \ 1315 uint32_t vm = vext_vm(desc); \ 1316 uint32_t vl = env->vl; \ 1317 uint32_t esz = sizeof(TD); \ 1318 uint32_t total_elems = \ 1319 vext_get_total_elems(env, desc, esz); \ 1320 uint32_t vta = vext_vta(desc); \ 1321 uint32_t vma = vext_vma(desc); \ 1322 uint32_t i; \ 1323 \ 1324 for (i = env->vstart; i < vl; i++) { \ 1325 if (!vm && !vext_elem_mask(v0, i)) { \ 1326 /* set masked-off elements to 1s */ \ 1327 vext_set_elems_1s(vd, vma, i * esz, \ 1328 (i + 1) * esz); \ 1329 continue; \ 1330 } \ 1331 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1332 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1333 } \ 1334 env->vstart = 0; \ 1335 /* set tail elements to 1s */ \ 1336 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1337 } 1338 1339 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1340 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1341 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1342 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1343 1344 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1345 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1346 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1347 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1348 1349 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1350 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1351 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1352 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1353 1354 /* Vector Narrowing Integer Right Shift Instructions */ 1355 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1356 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1357 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1358 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1359 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1360 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1361 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1362 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1363 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1364 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1365 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1366 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1367 1368 /* Vector Integer Comparison Instructions */ 1369 #define DO_MSEQ(N, M) (N == M) 1370 #define DO_MSNE(N, M) (N != M) 1371 #define DO_MSLT(N, M) (N < M) 1372 #define DO_MSLE(N, M) (N <= M) 1373 #define DO_MSGT(N, M) (N > M) 1374 1375 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1376 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1377 CPURISCVState *env, uint32_t desc) \ 1378 { \ 1379 uint32_t vm = vext_vm(desc); \ 1380 uint32_t vl = env->vl; \ 1381 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1382 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1383 uint32_t vma = vext_vma(desc); \ 1384 uint32_t i; \ 1385 \ 1386 for (i = env->vstart; i < vl; i++) { \ 1387 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1388 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1389 if (!vm && !vext_elem_mask(v0, i)) { \ 1390 /* set masked-off elements to 1s */ \ 1391 if (vma) { \ 1392 vext_set_elem_mask(vd, i, 1); \ 1393 } \ 1394 continue; \ 1395 } \ 1396 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1397 } \ 1398 env->vstart = 0; \ 1399 /* 1400 * mask destination register are always tail-agnostic 1401 * set tail elements to 1s 1402 */ \ 1403 if (vta_all_1s) { \ 1404 for (; i < total_elems; i++) { \ 1405 vext_set_elem_mask(vd, i, 1); \ 1406 } \ 1407 } \ 1408 } 1409 1410 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1411 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1412 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1413 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1414 1415 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1416 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1417 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1418 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1419 1420 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1421 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1422 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1423 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1424 1425 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1426 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1427 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1428 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1429 1430 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1431 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1432 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1433 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1434 1435 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1436 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1437 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1438 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1439 1440 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1441 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1442 CPURISCVState *env, uint32_t desc) \ 1443 { \ 1444 uint32_t vm = vext_vm(desc); \ 1445 uint32_t vl = env->vl; \ 1446 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1447 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1448 uint32_t vma = vext_vma(desc); \ 1449 uint32_t i; \ 1450 \ 1451 for (i = env->vstart; i < vl; i++) { \ 1452 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1453 if (!vm && !vext_elem_mask(v0, i)) { \ 1454 /* set masked-off elements to 1s */ \ 1455 if (vma) { \ 1456 vext_set_elem_mask(vd, i, 1); \ 1457 } \ 1458 continue; \ 1459 } \ 1460 vext_set_elem_mask(vd, i, \ 1461 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1462 } \ 1463 env->vstart = 0; \ 1464 /* 1465 * mask destination register are always tail-agnostic 1466 * set tail elements to 1s 1467 */ \ 1468 if (vta_all_1s) { \ 1469 for (; i < total_elems; i++) { \ 1470 vext_set_elem_mask(vd, i, 1); \ 1471 } \ 1472 } \ 1473 } 1474 1475 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1476 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1477 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1478 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1479 1480 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1481 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1482 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1483 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1484 1485 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1486 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1487 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1488 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1489 1490 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1491 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1492 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1493 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1494 1495 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1496 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1497 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1498 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1499 1500 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1501 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1502 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1503 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1504 1505 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1506 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1507 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1508 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1509 1510 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1511 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1512 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1513 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1514 1515 /* Vector Integer Min/Max Instructions */ 1516 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1517 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1518 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1519 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1520 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1521 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1522 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1523 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1524 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1525 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1526 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1527 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1528 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1529 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1530 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1531 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1532 GEN_VEXT_VV(vminu_vv_b, 1) 1533 GEN_VEXT_VV(vminu_vv_h, 2) 1534 GEN_VEXT_VV(vminu_vv_w, 4) 1535 GEN_VEXT_VV(vminu_vv_d, 8) 1536 GEN_VEXT_VV(vmin_vv_b, 1) 1537 GEN_VEXT_VV(vmin_vv_h, 2) 1538 GEN_VEXT_VV(vmin_vv_w, 4) 1539 GEN_VEXT_VV(vmin_vv_d, 8) 1540 GEN_VEXT_VV(vmaxu_vv_b, 1) 1541 GEN_VEXT_VV(vmaxu_vv_h, 2) 1542 GEN_VEXT_VV(vmaxu_vv_w, 4) 1543 GEN_VEXT_VV(vmaxu_vv_d, 8) 1544 GEN_VEXT_VV(vmax_vv_b, 1) 1545 GEN_VEXT_VV(vmax_vv_h, 2) 1546 GEN_VEXT_VV(vmax_vv_w, 4) 1547 GEN_VEXT_VV(vmax_vv_d, 8) 1548 1549 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1550 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1551 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1552 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1553 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1554 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1555 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1556 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1557 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1558 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1559 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1560 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1561 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1562 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1563 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1564 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1565 GEN_VEXT_VX(vminu_vx_b, 1) 1566 GEN_VEXT_VX(vminu_vx_h, 2) 1567 GEN_VEXT_VX(vminu_vx_w, 4) 1568 GEN_VEXT_VX(vminu_vx_d, 8) 1569 GEN_VEXT_VX(vmin_vx_b, 1) 1570 GEN_VEXT_VX(vmin_vx_h, 2) 1571 GEN_VEXT_VX(vmin_vx_w, 4) 1572 GEN_VEXT_VX(vmin_vx_d, 8) 1573 GEN_VEXT_VX(vmaxu_vx_b, 1) 1574 GEN_VEXT_VX(vmaxu_vx_h, 2) 1575 GEN_VEXT_VX(vmaxu_vx_w, 4) 1576 GEN_VEXT_VX(vmaxu_vx_d, 8) 1577 GEN_VEXT_VX(vmax_vx_b, 1) 1578 GEN_VEXT_VX(vmax_vx_h, 2) 1579 GEN_VEXT_VX(vmax_vx_w, 4) 1580 GEN_VEXT_VX(vmax_vx_d, 8) 1581 1582 /* Vector Single-Width Integer Multiply Instructions */ 1583 #define DO_MUL(N, M) (N * M) 1584 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1585 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1586 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1587 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1588 GEN_VEXT_VV(vmul_vv_b, 1) 1589 GEN_VEXT_VV(vmul_vv_h, 2) 1590 GEN_VEXT_VV(vmul_vv_w, 4) 1591 GEN_VEXT_VV(vmul_vv_d, 8) 1592 1593 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1594 { 1595 return (int16_t)s2 * (int16_t)s1 >> 8; 1596 } 1597 1598 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1599 { 1600 return (int32_t)s2 * (int32_t)s1 >> 16; 1601 } 1602 1603 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1604 { 1605 return (int64_t)s2 * (int64_t)s1 >> 32; 1606 } 1607 1608 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1609 { 1610 uint64_t hi_64, lo_64; 1611 1612 muls64(&lo_64, &hi_64, s1, s2); 1613 return hi_64; 1614 } 1615 1616 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1617 { 1618 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1619 } 1620 1621 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1622 { 1623 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1624 } 1625 1626 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1627 { 1628 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1629 } 1630 1631 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1632 { 1633 uint64_t hi_64, lo_64; 1634 1635 mulu64(&lo_64, &hi_64, s2, s1); 1636 return hi_64; 1637 } 1638 1639 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1640 { 1641 return (int16_t)s2 * (uint16_t)s1 >> 8; 1642 } 1643 1644 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1645 { 1646 return (int32_t)s2 * (uint32_t)s1 >> 16; 1647 } 1648 1649 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1650 { 1651 return (int64_t)s2 * (uint64_t)s1 >> 32; 1652 } 1653 1654 /* 1655 * Let A = signed operand, 1656 * B = unsigned operand 1657 * P = mulu64(A, B), unsigned product 1658 * 1659 * LET X = 2 ** 64 - A, 2's complement of A 1660 * SP = signed product 1661 * THEN 1662 * IF A < 0 1663 * SP = -X * B 1664 * = -(2 ** 64 - A) * B 1665 * = A * B - 2 ** 64 * B 1666 * = P - 2 ** 64 * B 1667 * ELSE 1668 * SP = P 1669 * THEN 1670 * HI_P -= (A < 0 ? B : 0) 1671 */ 1672 1673 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1674 { 1675 uint64_t hi_64, lo_64; 1676 1677 mulu64(&lo_64, &hi_64, s2, s1); 1678 1679 hi_64 -= s2 < 0 ? s1 : 0; 1680 return hi_64; 1681 } 1682 1683 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1684 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1685 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1686 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1687 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1688 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1689 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1690 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1691 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1692 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1693 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1694 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1695 GEN_VEXT_VV(vmulh_vv_b, 1) 1696 GEN_VEXT_VV(vmulh_vv_h, 2) 1697 GEN_VEXT_VV(vmulh_vv_w, 4) 1698 GEN_VEXT_VV(vmulh_vv_d, 8) 1699 GEN_VEXT_VV(vmulhu_vv_b, 1) 1700 GEN_VEXT_VV(vmulhu_vv_h, 2) 1701 GEN_VEXT_VV(vmulhu_vv_w, 4) 1702 GEN_VEXT_VV(vmulhu_vv_d, 8) 1703 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1704 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1705 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1706 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1707 1708 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1709 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1710 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1711 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1712 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1713 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1714 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1715 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1716 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1717 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1718 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1719 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1720 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1721 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1722 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1723 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1724 GEN_VEXT_VX(vmul_vx_b, 1) 1725 GEN_VEXT_VX(vmul_vx_h, 2) 1726 GEN_VEXT_VX(vmul_vx_w, 4) 1727 GEN_VEXT_VX(vmul_vx_d, 8) 1728 GEN_VEXT_VX(vmulh_vx_b, 1) 1729 GEN_VEXT_VX(vmulh_vx_h, 2) 1730 GEN_VEXT_VX(vmulh_vx_w, 4) 1731 GEN_VEXT_VX(vmulh_vx_d, 8) 1732 GEN_VEXT_VX(vmulhu_vx_b, 1) 1733 GEN_VEXT_VX(vmulhu_vx_h, 2) 1734 GEN_VEXT_VX(vmulhu_vx_w, 4) 1735 GEN_VEXT_VX(vmulhu_vx_d, 8) 1736 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1737 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1738 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1739 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1740 1741 /* Vector Integer Divide Instructions */ 1742 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1743 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1744 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1745 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1746 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1747 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1748 1749 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1750 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1751 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1752 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1753 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1754 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1755 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1756 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1757 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1758 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1759 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1760 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1761 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1762 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1763 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1764 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1765 GEN_VEXT_VV(vdivu_vv_b, 1) 1766 GEN_VEXT_VV(vdivu_vv_h, 2) 1767 GEN_VEXT_VV(vdivu_vv_w, 4) 1768 GEN_VEXT_VV(vdivu_vv_d, 8) 1769 GEN_VEXT_VV(vdiv_vv_b, 1) 1770 GEN_VEXT_VV(vdiv_vv_h, 2) 1771 GEN_VEXT_VV(vdiv_vv_w, 4) 1772 GEN_VEXT_VV(vdiv_vv_d, 8) 1773 GEN_VEXT_VV(vremu_vv_b, 1) 1774 GEN_VEXT_VV(vremu_vv_h, 2) 1775 GEN_VEXT_VV(vremu_vv_w, 4) 1776 GEN_VEXT_VV(vremu_vv_d, 8) 1777 GEN_VEXT_VV(vrem_vv_b, 1) 1778 GEN_VEXT_VV(vrem_vv_h, 2) 1779 GEN_VEXT_VV(vrem_vv_w, 4) 1780 GEN_VEXT_VV(vrem_vv_d, 8) 1781 1782 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1783 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1784 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1785 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1786 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1787 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1788 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1789 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1790 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1791 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1792 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1793 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1794 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1795 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1796 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1797 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1798 GEN_VEXT_VX(vdivu_vx_b, 1) 1799 GEN_VEXT_VX(vdivu_vx_h, 2) 1800 GEN_VEXT_VX(vdivu_vx_w, 4) 1801 GEN_VEXT_VX(vdivu_vx_d, 8) 1802 GEN_VEXT_VX(vdiv_vx_b, 1) 1803 GEN_VEXT_VX(vdiv_vx_h, 2) 1804 GEN_VEXT_VX(vdiv_vx_w, 4) 1805 GEN_VEXT_VX(vdiv_vx_d, 8) 1806 GEN_VEXT_VX(vremu_vx_b, 1) 1807 GEN_VEXT_VX(vremu_vx_h, 2) 1808 GEN_VEXT_VX(vremu_vx_w, 4) 1809 GEN_VEXT_VX(vremu_vx_d, 8) 1810 GEN_VEXT_VX(vrem_vx_b, 1) 1811 GEN_VEXT_VX(vrem_vx_h, 2) 1812 GEN_VEXT_VX(vrem_vx_w, 4) 1813 GEN_VEXT_VX(vrem_vx_d, 8) 1814 1815 /* Vector Widening Integer Multiply Instructions */ 1816 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1817 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1818 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1819 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1820 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1821 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1822 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1823 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1824 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1825 GEN_VEXT_VV(vwmul_vv_b, 2) 1826 GEN_VEXT_VV(vwmul_vv_h, 4) 1827 GEN_VEXT_VV(vwmul_vv_w, 8) 1828 GEN_VEXT_VV(vwmulu_vv_b, 2) 1829 GEN_VEXT_VV(vwmulu_vv_h, 4) 1830 GEN_VEXT_VV(vwmulu_vv_w, 8) 1831 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1832 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1833 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1834 1835 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1836 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1837 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1838 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1839 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1840 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1841 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1842 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1843 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1844 GEN_VEXT_VX(vwmul_vx_b, 2) 1845 GEN_VEXT_VX(vwmul_vx_h, 4) 1846 GEN_VEXT_VX(vwmul_vx_w, 8) 1847 GEN_VEXT_VX(vwmulu_vx_b, 2) 1848 GEN_VEXT_VX(vwmulu_vx_h, 4) 1849 GEN_VEXT_VX(vwmulu_vx_w, 8) 1850 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1851 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1852 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1853 1854 /* Vector Single-Width Integer Multiply-Add Instructions */ 1855 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1856 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1857 { \ 1858 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1859 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1860 TD d = *((TD *)vd + HD(i)); \ 1861 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1862 } 1863 1864 #define DO_MACC(N, M, D) (M * N + D) 1865 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1866 #define DO_MADD(N, M, D) (M * D + N) 1867 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1868 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1869 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1870 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1871 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1872 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1873 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1874 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1875 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1876 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1877 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1878 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1879 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1880 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1881 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1882 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1883 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1884 GEN_VEXT_VV(vmacc_vv_b, 1) 1885 GEN_VEXT_VV(vmacc_vv_h, 2) 1886 GEN_VEXT_VV(vmacc_vv_w, 4) 1887 GEN_VEXT_VV(vmacc_vv_d, 8) 1888 GEN_VEXT_VV(vnmsac_vv_b, 1) 1889 GEN_VEXT_VV(vnmsac_vv_h, 2) 1890 GEN_VEXT_VV(vnmsac_vv_w, 4) 1891 GEN_VEXT_VV(vnmsac_vv_d, 8) 1892 GEN_VEXT_VV(vmadd_vv_b, 1) 1893 GEN_VEXT_VV(vmadd_vv_h, 2) 1894 GEN_VEXT_VV(vmadd_vv_w, 4) 1895 GEN_VEXT_VV(vmadd_vv_d, 8) 1896 GEN_VEXT_VV(vnmsub_vv_b, 1) 1897 GEN_VEXT_VV(vnmsub_vv_h, 2) 1898 GEN_VEXT_VV(vnmsub_vv_w, 4) 1899 GEN_VEXT_VV(vnmsub_vv_d, 8) 1900 1901 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1902 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1903 { \ 1904 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1905 TD d = *((TD *)vd + HD(i)); \ 1906 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1907 } 1908 1909 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1910 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1911 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1912 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1913 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1914 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1915 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1916 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1917 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1918 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1919 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1920 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1921 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1922 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1923 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1924 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1925 GEN_VEXT_VX(vmacc_vx_b, 1) 1926 GEN_VEXT_VX(vmacc_vx_h, 2) 1927 GEN_VEXT_VX(vmacc_vx_w, 4) 1928 GEN_VEXT_VX(vmacc_vx_d, 8) 1929 GEN_VEXT_VX(vnmsac_vx_b, 1) 1930 GEN_VEXT_VX(vnmsac_vx_h, 2) 1931 GEN_VEXT_VX(vnmsac_vx_w, 4) 1932 GEN_VEXT_VX(vnmsac_vx_d, 8) 1933 GEN_VEXT_VX(vmadd_vx_b, 1) 1934 GEN_VEXT_VX(vmadd_vx_h, 2) 1935 GEN_VEXT_VX(vmadd_vx_w, 4) 1936 GEN_VEXT_VX(vmadd_vx_d, 8) 1937 GEN_VEXT_VX(vnmsub_vx_b, 1) 1938 GEN_VEXT_VX(vnmsub_vx_h, 2) 1939 GEN_VEXT_VX(vnmsub_vx_w, 4) 1940 GEN_VEXT_VX(vnmsub_vx_d, 8) 1941 1942 /* Vector Widening Integer Multiply-Add Instructions */ 1943 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1944 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1945 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1946 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1947 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1948 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1949 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1950 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1951 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1952 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1953 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1954 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1955 GEN_VEXT_VV(vwmacc_vv_b, 2) 1956 GEN_VEXT_VV(vwmacc_vv_h, 4) 1957 GEN_VEXT_VV(vwmacc_vv_w, 8) 1958 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1959 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1960 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1961 1962 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1963 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1964 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1965 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1966 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1967 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1968 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1969 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1970 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1971 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1972 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1973 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1974 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1975 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1976 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1977 GEN_VEXT_VX(vwmacc_vx_b, 2) 1978 GEN_VEXT_VX(vwmacc_vx_h, 4) 1979 GEN_VEXT_VX(vwmacc_vx_w, 8) 1980 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1981 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1982 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1983 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1984 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1985 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1986 1987 /* Vector Integer Merge and Move Instructions */ 1988 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1989 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1990 uint32_t desc) \ 1991 { \ 1992 uint32_t vl = env->vl; \ 1993 uint32_t esz = sizeof(ETYPE); \ 1994 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1995 uint32_t vta = vext_vta(desc); \ 1996 uint32_t i; \ 1997 \ 1998 for (i = env->vstart; i < vl; i++) { \ 1999 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2000 *((ETYPE *)vd + H(i)) = s1; \ 2001 } \ 2002 env->vstart = 0; \ 2003 /* set tail elements to 1s */ \ 2004 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2005 } 2006 2007 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2008 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2009 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2010 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2011 2012 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2013 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2014 uint32_t desc) \ 2015 { \ 2016 uint32_t vl = env->vl; \ 2017 uint32_t esz = sizeof(ETYPE); \ 2018 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2019 uint32_t vta = vext_vta(desc); \ 2020 uint32_t i; \ 2021 \ 2022 for (i = env->vstart; i < vl; i++) { \ 2023 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2024 } \ 2025 env->vstart = 0; \ 2026 /* set tail elements to 1s */ \ 2027 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2028 } 2029 2030 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2031 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2032 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2033 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2034 2035 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2036 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2037 CPURISCVState *env, uint32_t desc) \ 2038 { \ 2039 uint32_t vl = env->vl; \ 2040 uint32_t esz = sizeof(ETYPE); \ 2041 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2042 uint32_t vta = vext_vta(desc); \ 2043 uint32_t i; \ 2044 \ 2045 for (i = env->vstart; i < vl; i++) { \ 2046 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2047 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2048 } \ 2049 env->vstart = 0; \ 2050 /* set tail elements to 1s */ \ 2051 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2052 } 2053 2054 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2055 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2056 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2057 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2058 2059 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2060 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2061 void *vs2, CPURISCVState *env, uint32_t desc) \ 2062 { \ 2063 uint32_t vl = env->vl; \ 2064 uint32_t esz = sizeof(ETYPE); \ 2065 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2066 uint32_t vta = vext_vta(desc); \ 2067 uint32_t i; \ 2068 \ 2069 for (i = env->vstart; i < vl; i++) { \ 2070 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2071 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2072 (ETYPE)(target_long)s1); \ 2073 *((ETYPE *)vd + H(i)) = d; \ 2074 } \ 2075 env->vstart = 0; \ 2076 /* set tail elements to 1s */ \ 2077 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2078 } 2079 2080 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2081 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2082 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2083 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2084 2085 /* 2086 * Vector Fixed-Point Arithmetic Instructions 2087 */ 2088 2089 /* Vector Single-Width Saturating Add and Subtract */ 2090 2091 /* 2092 * As fixed point instructions probably have round mode and saturation, 2093 * define common macros for fixed point here. 2094 */ 2095 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2096 CPURISCVState *env, int vxrm); 2097 2098 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2099 static inline void \ 2100 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2101 CPURISCVState *env, int vxrm) \ 2102 { \ 2103 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2104 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2105 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2106 } 2107 2108 static inline void 2109 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2110 CPURISCVState *env, 2111 uint32_t vl, uint32_t vm, int vxrm, 2112 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2113 { 2114 for (uint32_t i = env->vstart; i < vl; i++) { 2115 if (!vm && !vext_elem_mask(v0, i)) { 2116 /* set masked-off elements to 1s */ 2117 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2118 continue; 2119 } 2120 fn(vd, vs1, vs2, i, env, vxrm); 2121 } 2122 env->vstart = 0; 2123 } 2124 2125 static inline void 2126 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2127 CPURISCVState *env, 2128 uint32_t desc, 2129 opivv2_rm_fn *fn, uint32_t esz) 2130 { 2131 uint32_t vm = vext_vm(desc); 2132 uint32_t vl = env->vl; 2133 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2134 uint32_t vta = vext_vta(desc); 2135 uint32_t vma = vext_vma(desc); 2136 2137 switch (env->vxrm) { 2138 case 0: /* rnu */ 2139 vext_vv_rm_1(vd, v0, vs1, vs2, 2140 env, vl, vm, 0, fn, vma, esz); 2141 break; 2142 case 1: /* rne */ 2143 vext_vv_rm_1(vd, v0, vs1, vs2, 2144 env, vl, vm, 1, fn, vma, esz); 2145 break; 2146 case 2: /* rdn */ 2147 vext_vv_rm_1(vd, v0, vs1, vs2, 2148 env, vl, vm, 2, fn, vma, esz); 2149 break; 2150 default: /* rod */ 2151 vext_vv_rm_1(vd, v0, vs1, vs2, 2152 env, vl, vm, 3, fn, vma, esz); 2153 break; 2154 } 2155 /* set tail elements to 1s */ 2156 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2157 } 2158 2159 /* generate helpers for fixed point instructions with OPIVV format */ 2160 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2161 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2162 CPURISCVState *env, uint32_t desc) \ 2163 { \ 2164 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2165 do_##NAME, ESZ); \ 2166 } 2167 2168 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2169 { 2170 uint8_t res = a + b; 2171 if (res < a) { 2172 res = UINT8_MAX; 2173 env->vxsat = 0x1; 2174 } 2175 return res; 2176 } 2177 2178 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2179 uint16_t b) 2180 { 2181 uint16_t res = a + b; 2182 if (res < a) { 2183 res = UINT16_MAX; 2184 env->vxsat = 0x1; 2185 } 2186 return res; 2187 } 2188 2189 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2190 uint32_t b) 2191 { 2192 uint32_t res = a + b; 2193 if (res < a) { 2194 res = UINT32_MAX; 2195 env->vxsat = 0x1; 2196 } 2197 return res; 2198 } 2199 2200 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2201 uint64_t b) 2202 { 2203 uint64_t res = a + b; 2204 if (res < a) { 2205 res = UINT64_MAX; 2206 env->vxsat = 0x1; 2207 } 2208 return res; 2209 } 2210 2211 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2212 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2213 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2214 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2215 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2216 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2217 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2218 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2219 2220 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2221 CPURISCVState *env, int vxrm); 2222 2223 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2224 static inline void \ 2225 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2226 CPURISCVState *env, int vxrm) \ 2227 { \ 2228 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2229 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2230 } 2231 2232 static inline void 2233 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2234 CPURISCVState *env, 2235 uint32_t vl, uint32_t vm, int vxrm, 2236 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2237 { 2238 for (uint32_t i = env->vstart; i < vl; i++) { 2239 if (!vm && !vext_elem_mask(v0, i)) { 2240 /* set masked-off elements to 1s */ 2241 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2242 continue; 2243 } 2244 fn(vd, s1, vs2, i, env, vxrm); 2245 } 2246 env->vstart = 0; 2247 } 2248 2249 static inline void 2250 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2251 CPURISCVState *env, 2252 uint32_t desc, 2253 opivx2_rm_fn *fn, uint32_t esz) 2254 { 2255 uint32_t vm = vext_vm(desc); 2256 uint32_t vl = env->vl; 2257 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2258 uint32_t vta = vext_vta(desc); 2259 uint32_t vma = vext_vma(desc); 2260 2261 switch (env->vxrm) { 2262 case 0: /* rnu */ 2263 vext_vx_rm_1(vd, v0, s1, vs2, 2264 env, vl, vm, 0, fn, vma, esz); 2265 break; 2266 case 1: /* rne */ 2267 vext_vx_rm_1(vd, v0, s1, vs2, 2268 env, vl, vm, 1, fn, vma, esz); 2269 break; 2270 case 2: /* rdn */ 2271 vext_vx_rm_1(vd, v0, s1, vs2, 2272 env, vl, vm, 2, fn, vma, esz); 2273 break; 2274 default: /* rod */ 2275 vext_vx_rm_1(vd, v0, s1, vs2, 2276 env, vl, vm, 3, fn, vma, esz); 2277 break; 2278 } 2279 /* set tail elements to 1s */ 2280 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2281 } 2282 2283 /* generate helpers for fixed point instructions with OPIVX format */ 2284 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2285 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2286 void *vs2, CPURISCVState *env, \ 2287 uint32_t desc) \ 2288 { \ 2289 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2290 do_##NAME, ESZ); \ 2291 } 2292 2293 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2294 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2295 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2296 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2297 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2298 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2299 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2300 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2301 2302 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2303 { 2304 int8_t res = a + b; 2305 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2306 res = a > 0 ? INT8_MAX : INT8_MIN; 2307 env->vxsat = 0x1; 2308 } 2309 return res; 2310 } 2311 2312 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2313 { 2314 int16_t res = a + b; 2315 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2316 res = a > 0 ? INT16_MAX : INT16_MIN; 2317 env->vxsat = 0x1; 2318 } 2319 return res; 2320 } 2321 2322 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2323 { 2324 int32_t res = a + b; 2325 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2326 res = a > 0 ? INT32_MAX : INT32_MIN; 2327 env->vxsat = 0x1; 2328 } 2329 return res; 2330 } 2331 2332 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2333 { 2334 int64_t res = a + b; 2335 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2336 res = a > 0 ? INT64_MAX : INT64_MIN; 2337 env->vxsat = 0x1; 2338 } 2339 return res; 2340 } 2341 2342 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2343 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2344 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2345 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2346 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2347 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2348 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2349 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2350 2351 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2352 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2353 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2354 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2355 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2356 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2357 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2358 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2359 2360 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2361 { 2362 uint8_t res = a - b; 2363 if (res > a) { 2364 res = 0; 2365 env->vxsat = 0x1; 2366 } 2367 return res; 2368 } 2369 2370 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2371 uint16_t b) 2372 { 2373 uint16_t res = a - b; 2374 if (res > a) { 2375 res = 0; 2376 env->vxsat = 0x1; 2377 } 2378 return res; 2379 } 2380 2381 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2382 uint32_t b) 2383 { 2384 uint32_t res = a - b; 2385 if (res > a) { 2386 res = 0; 2387 env->vxsat = 0x1; 2388 } 2389 return res; 2390 } 2391 2392 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2393 uint64_t b) 2394 { 2395 uint64_t res = a - b; 2396 if (res > a) { 2397 res = 0; 2398 env->vxsat = 0x1; 2399 } 2400 return res; 2401 } 2402 2403 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2404 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2405 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2406 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2407 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2408 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2409 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2410 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2411 2412 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2413 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2414 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2415 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2416 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2417 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2418 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2419 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2420 2421 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2422 { 2423 int8_t res = a - b; 2424 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2425 res = a >= 0 ? INT8_MAX : INT8_MIN; 2426 env->vxsat = 0x1; 2427 } 2428 return res; 2429 } 2430 2431 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2432 { 2433 int16_t res = a - b; 2434 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2435 res = a >= 0 ? INT16_MAX : INT16_MIN; 2436 env->vxsat = 0x1; 2437 } 2438 return res; 2439 } 2440 2441 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2442 { 2443 int32_t res = a - b; 2444 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2445 res = a >= 0 ? INT32_MAX : INT32_MIN; 2446 env->vxsat = 0x1; 2447 } 2448 return res; 2449 } 2450 2451 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2452 { 2453 int64_t res = a - b; 2454 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2455 res = a >= 0 ? INT64_MAX : INT64_MIN; 2456 env->vxsat = 0x1; 2457 } 2458 return res; 2459 } 2460 2461 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2462 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2463 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2464 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2465 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2466 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2467 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2468 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2469 2470 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2471 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2472 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2473 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2474 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2475 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2476 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2477 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2478 2479 /* Vector Single-Width Averaging Add and Subtract */ 2480 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2481 { 2482 uint8_t d = extract64(v, shift, 1); 2483 uint8_t d1; 2484 uint64_t D1, D2; 2485 2486 if (shift == 0 || shift > 64) { 2487 return 0; 2488 } 2489 2490 d1 = extract64(v, shift - 1, 1); 2491 D1 = extract64(v, 0, shift); 2492 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2493 return d1; 2494 } else if (vxrm == 1) { /* round-to-nearest-even */ 2495 if (shift > 1) { 2496 D2 = extract64(v, 0, shift - 1); 2497 return d1 & ((D2 != 0) | d); 2498 } else { 2499 return d1 & d; 2500 } 2501 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2502 return !d & (D1 != 0); 2503 } 2504 return 0; /* round-down (truncate) */ 2505 } 2506 2507 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2508 { 2509 int64_t res = (int64_t)a + b; 2510 uint8_t round = get_round(vxrm, res, 1); 2511 2512 return (res >> 1) + round; 2513 } 2514 2515 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2516 { 2517 int64_t res = a + b; 2518 uint8_t round = get_round(vxrm, res, 1); 2519 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2520 2521 /* With signed overflow, bit 64 is inverse of bit 63. */ 2522 return ((res >> 1) ^ over) + round; 2523 } 2524 2525 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2526 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2527 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2528 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2529 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2530 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2531 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2532 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2533 2534 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2535 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2536 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2537 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2538 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2539 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2540 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2541 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2542 2543 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2544 uint32_t a, uint32_t b) 2545 { 2546 uint64_t res = (uint64_t)a + b; 2547 uint8_t round = get_round(vxrm, res, 1); 2548 2549 return (res >> 1) + round; 2550 } 2551 2552 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2553 uint64_t a, uint64_t b) 2554 { 2555 uint64_t res = a + b; 2556 uint8_t round = get_round(vxrm, res, 1); 2557 uint64_t over = (uint64_t)(res < a) << 63; 2558 2559 return ((res >> 1) | over) + round; 2560 } 2561 2562 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2563 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2564 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2565 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2566 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2567 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2568 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2569 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2570 2571 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2572 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2573 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2574 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2575 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2576 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2577 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2578 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2579 2580 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2581 { 2582 int64_t res = (int64_t)a - b; 2583 uint8_t round = get_round(vxrm, res, 1); 2584 2585 return (res >> 1) + round; 2586 } 2587 2588 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2589 { 2590 int64_t res = (int64_t)a - b; 2591 uint8_t round = get_round(vxrm, res, 1); 2592 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2593 2594 /* With signed overflow, bit 64 is inverse of bit 63. */ 2595 return ((res >> 1) ^ over) + round; 2596 } 2597 2598 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2599 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2600 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2601 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2602 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2603 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2604 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2605 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2606 2607 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2608 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2609 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2610 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2611 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2612 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2613 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2614 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2615 2616 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2617 uint32_t a, uint32_t b) 2618 { 2619 int64_t res = (int64_t)a - b; 2620 uint8_t round = get_round(vxrm, res, 1); 2621 2622 return (res >> 1) + round; 2623 } 2624 2625 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2626 uint64_t a, uint64_t b) 2627 { 2628 uint64_t res = (uint64_t)a - b; 2629 uint8_t round = get_round(vxrm, res, 1); 2630 uint64_t over = (uint64_t)(res > a) << 63; 2631 2632 return ((res >> 1) | over) + round; 2633 } 2634 2635 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2636 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2637 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2638 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2639 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2640 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2641 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2642 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2643 2644 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2645 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2646 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2647 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2648 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2649 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2650 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2651 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2652 2653 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2654 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2655 { 2656 uint8_t round; 2657 int16_t res; 2658 2659 res = (int16_t)a * (int16_t)b; 2660 round = get_round(vxrm, res, 7); 2661 res = (res >> 7) + round; 2662 2663 if (res > INT8_MAX) { 2664 env->vxsat = 0x1; 2665 return INT8_MAX; 2666 } else if (res < INT8_MIN) { 2667 env->vxsat = 0x1; 2668 return INT8_MIN; 2669 } else { 2670 return res; 2671 } 2672 } 2673 2674 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2675 { 2676 uint8_t round; 2677 int32_t res; 2678 2679 res = (int32_t)a * (int32_t)b; 2680 round = get_round(vxrm, res, 15); 2681 res = (res >> 15) + round; 2682 2683 if (res > INT16_MAX) { 2684 env->vxsat = 0x1; 2685 return INT16_MAX; 2686 } else if (res < INT16_MIN) { 2687 env->vxsat = 0x1; 2688 return INT16_MIN; 2689 } else { 2690 return res; 2691 } 2692 } 2693 2694 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2695 { 2696 uint8_t round; 2697 int64_t res; 2698 2699 res = (int64_t)a * (int64_t)b; 2700 round = get_round(vxrm, res, 31); 2701 res = (res >> 31) + round; 2702 2703 if (res > INT32_MAX) { 2704 env->vxsat = 0x1; 2705 return INT32_MAX; 2706 } else if (res < INT32_MIN) { 2707 env->vxsat = 0x1; 2708 return INT32_MIN; 2709 } else { 2710 return res; 2711 } 2712 } 2713 2714 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2715 { 2716 uint8_t round; 2717 uint64_t hi_64, lo_64; 2718 int64_t res; 2719 2720 if (a == INT64_MIN && b == INT64_MIN) { 2721 env->vxsat = 1; 2722 return INT64_MAX; 2723 } 2724 2725 muls64(&lo_64, &hi_64, a, b); 2726 round = get_round(vxrm, lo_64, 63); 2727 /* 2728 * Cannot overflow, as there are always 2729 * 2 sign bits after multiply. 2730 */ 2731 res = (hi_64 << 1) | (lo_64 >> 63); 2732 if (round) { 2733 if (res == INT64_MAX) { 2734 env->vxsat = 1; 2735 } else { 2736 res += 1; 2737 } 2738 } 2739 return res; 2740 } 2741 2742 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2743 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2744 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2745 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2746 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2747 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2748 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2749 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2750 2751 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2752 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2753 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2754 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2755 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2756 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2757 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2758 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2759 2760 /* Vector Single-Width Scaling Shift Instructions */ 2761 static inline uint8_t 2762 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2763 { 2764 uint8_t round, shift = b & 0x7; 2765 uint8_t res; 2766 2767 round = get_round(vxrm, a, shift); 2768 res = (a >> shift) + round; 2769 return res; 2770 } 2771 static inline uint16_t 2772 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2773 { 2774 uint8_t round, shift = b & 0xf; 2775 2776 round = get_round(vxrm, a, shift); 2777 return (a >> shift) + round; 2778 } 2779 static inline uint32_t 2780 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2781 { 2782 uint8_t round, shift = b & 0x1f; 2783 2784 round = get_round(vxrm, a, shift); 2785 return (a >> shift) + round; 2786 } 2787 static inline uint64_t 2788 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2789 { 2790 uint8_t round, shift = b & 0x3f; 2791 2792 round = get_round(vxrm, a, shift); 2793 return (a >> shift) + round; 2794 } 2795 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2796 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2797 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2798 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2799 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2800 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2801 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2802 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2803 2804 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2805 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2806 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2807 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2808 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2809 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2810 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2811 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2812 2813 static inline int8_t 2814 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2815 { 2816 uint8_t round, shift = b & 0x7; 2817 2818 round = get_round(vxrm, a, shift); 2819 return (a >> shift) + round; 2820 } 2821 static inline int16_t 2822 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2823 { 2824 uint8_t round, shift = b & 0xf; 2825 2826 round = get_round(vxrm, a, shift); 2827 return (a >> shift) + round; 2828 } 2829 static inline int32_t 2830 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2831 { 2832 uint8_t round, shift = b & 0x1f; 2833 2834 round = get_round(vxrm, a, shift); 2835 return (a >> shift) + round; 2836 } 2837 static inline int64_t 2838 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2839 { 2840 uint8_t round, shift = b & 0x3f; 2841 2842 round = get_round(vxrm, a, shift); 2843 return (a >> shift) + round; 2844 } 2845 2846 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2847 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2848 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2849 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2850 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2851 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2852 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2853 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2854 2855 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2856 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2857 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2858 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2859 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2860 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2861 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2862 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2863 2864 /* Vector Narrowing Fixed-Point Clip Instructions */ 2865 static inline int8_t 2866 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2867 { 2868 uint8_t round, shift = b & 0xf; 2869 int16_t res; 2870 2871 round = get_round(vxrm, a, shift); 2872 res = (a >> shift) + round; 2873 if (res > INT8_MAX) { 2874 env->vxsat = 0x1; 2875 return INT8_MAX; 2876 } else if (res < INT8_MIN) { 2877 env->vxsat = 0x1; 2878 return INT8_MIN; 2879 } else { 2880 return res; 2881 } 2882 } 2883 2884 static inline int16_t 2885 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2886 { 2887 uint8_t round, shift = b & 0x1f; 2888 int32_t res; 2889 2890 round = get_round(vxrm, a, shift); 2891 res = (a >> shift) + round; 2892 if (res > INT16_MAX) { 2893 env->vxsat = 0x1; 2894 return INT16_MAX; 2895 } else if (res < INT16_MIN) { 2896 env->vxsat = 0x1; 2897 return INT16_MIN; 2898 } else { 2899 return res; 2900 } 2901 } 2902 2903 static inline int32_t 2904 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2905 { 2906 uint8_t round, shift = b & 0x3f; 2907 int64_t res; 2908 2909 round = get_round(vxrm, a, shift); 2910 res = (a >> shift) + round; 2911 if (res > INT32_MAX) { 2912 env->vxsat = 0x1; 2913 return INT32_MAX; 2914 } else if (res < INT32_MIN) { 2915 env->vxsat = 0x1; 2916 return INT32_MIN; 2917 } else { 2918 return res; 2919 } 2920 } 2921 2922 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2923 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2924 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2925 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2926 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2927 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2928 2929 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2930 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2931 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2932 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2933 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2934 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2935 2936 static inline uint8_t 2937 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2938 { 2939 uint8_t round, shift = b & 0xf; 2940 uint16_t res; 2941 2942 round = get_round(vxrm, a, shift); 2943 res = (a >> shift) + round; 2944 if (res > UINT8_MAX) { 2945 env->vxsat = 0x1; 2946 return UINT8_MAX; 2947 } else { 2948 return res; 2949 } 2950 } 2951 2952 static inline uint16_t 2953 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2954 { 2955 uint8_t round, shift = b & 0x1f; 2956 uint32_t res; 2957 2958 round = get_round(vxrm, a, shift); 2959 res = (a >> shift) + round; 2960 if (res > UINT16_MAX) { 2961 env->vxsat = 0x1; 2962 return UINT16_MAX; 2963 } else { 2964 return res; 2965 } 2966 } 2967 2968 static inline uint32_t 2969 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2970 { 2971 uint8_t round, shift = b & 0x3f; 2972 uint64_t res; 2973 2974 round = get_round(vxrm, a, shift); 2975 res = (a >> shift) + round; 2976 if (res > UINT32_MAX) { 2977 env->vxsat = 0x1; 2978 return UINT32_MAX; 2979 } else { 2980 return res; 2981 } 2982 } 2983 2984 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2985 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2986 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2987 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2988 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2989 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2990 2991 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2992 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2993 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2994 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2995 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2996 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2997 2998 /* 2999 * Vector Float Point Arithmetic Instructions 3000 */ 3001 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3002 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3003 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3004 CPURISCVState *env) \ 3005 { \ 3006 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3007 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3008 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3009 } 3010 3011 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3012 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3013 void *vs2, CPURISCVState *env, \ 3014 uint32_t desc) \ 3015 { \ 3016 uint32_t vm = vext_vm(desc); \ 3017 uint32_t vl = env->vl; \ 3018 uint32_t total_elems = \ 3019 vext_get_total_elems(env, desc, ESZ); \ 3020 uint32_t vta = vext_vta(desc); \ 3021 uint32_t vma = vext_vma(desc); \ 3022 uint32_t i; \ 3023 \ 3024 for (i = env->vstart; i < vl; i++) { \ 3025 if (!vm && !vext_elem_mask(v0, i)) { \ 3026 /* set masked-off elements to 1s */ \ 3027 vext_set_elems_1s(vd, vma, i * ESZ, \ 3028 (i + 1) * ESZ); \ 3029 continue; \ 3030 } \ 3031 do_##NAME(vd, vs1, vs2, i, env); \ 3032 } \ 3033 env->vstart = 0; \ 3034 /* set tail elements to 1s */ \ 3035 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3036 total_elems * ESZ); \ 3037 } 3038 3039 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3040 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3041 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3042 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3043 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3044 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3045 3046 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3047 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3048 CPURISCVState *env) \ 3049 { \ 3050 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3051 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3052 } 3053 3054 #define GEN_VEXT_VF(NAME, ESZ) \ 3055 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3056 void *vs2, CPURISCVState *env, \ 3057 uint32_t desc) \ 3058 { \ 3059 uint32_t vm = vext_vm(desc); \ 3060 uint32_t vl = env->vl; \ 3061 uint32_t total_elems = \ 3062 vext_get_total_elems(env, desc, ESZ); \ 3063 uint32_t vta = vext_vta(desc); \ 3064 uint32_t vma = vext_vma(desc); \ 3065 uint32_t i; \ 3066 \ 3067 for (i = env->vstart; i < vl; i++) { \ 3068 if (!vm && !vext_elem_mask(v0, i)) { \ 3069 /* set masked-off elements to 1s */ \ 3070 vext_set_elems_1s(vd, vma, i * ESZ, \ 3071 (i + 1) * ESZ); \ 3072 continue; \ 3073 } \ 3074 do_##NAME(vd, s1, vs2, i, env); \ 3075 } \ 3076 env->vstart = 0; \ 3077 /* set tail elements to 1s */ \ 3078 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3079 total_elems * ESZ); \ 3080 } 3081 3082 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3083 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3084 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3085 GEN_VEXT_VF(vfadd_vf_h, 2) 3086 GEN_VEXT_VF(vfadd_vf_w, 4) 3087 GEN_VEXT_VF(vfadd_vf_d, 8) 3088 3089 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3090 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3091 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3092 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3093 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3094 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3095 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3096 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3097 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3098 GEN_VEXT_VF(vfsub_vf_h, 2) 3099 GEN_VEXT_VF(vfsub_vf_w, 4) 3100 GEN_VEXT_VF(vfsub_vf_d, 8) 3101 3102 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3103 { 3104 return float16_sub(b, a, s); 3105 } 3106 3107 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3108 { 3109 return float32_sub(b, a, s); 3110 } 3111 3112 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3113 { 3114 return float64_sub(b, a, s); 3115 } 3116 3117 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3118 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3119 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3120 GEN_VEXT_VF(vfrsub_vf_h, 2) 3121 GEN_VEXT_VF(vfrsub_vf_w, 4) 3122 GEN_VEXT_VF(vfrsub_vf_d, 8) 3123 3124 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3125 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3126 { 3127 return float32_add(float16_to_float32(a, true, s), 3128 float16_to_float32(b, true, s), s); 3129 } 3130 3131 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3132 { 3133 return float64_add(float32_to_float64(a, s), 3134 float32_to_float64(b, s), s); 3135 3136 } 3137 3138 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3139 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3140 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3141 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3142 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3143 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3144 GEN_VEXT_VF(vfwadd_vf_h, 4) 3145 GEN_VEXT_VF(vfwadd_vf_w, 8) 3146 3147 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3148 { 3149 return float32_sub(float16_to_float32(a, true, s), 3150 float16_to_float32(b, true, s), s); 3151 } 3152 3153 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3154 { 3155 return float64_sub(float32_to_float64(a, s), 3156 float32_to_float64(b, s), s); 3157 3158 } 3159 3160 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3161 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3162 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3163 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3164 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3165 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3166 GEN_VEXT_VF(vfwsub_vf_h, 4) 3167 GEN_VEXT_VF(vfwsub_vf_w, 8) 3168 3169 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3170 { 3171 return float32_add(a, float16_to_float32(b, true, s), s); 3172 } 3173 3174 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3175 { 3176 return float64_add(a, float32_to_float64(b, s), s); 3177 } 3178 3179 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3180 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3181 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3182 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3183 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3184 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3185 GEN_VEXT_VF(vfwadd_wf_h, 4) 3186 GEN_VEXT_VF(vfwadd_wf_w, 8) 3187 3188 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3189 { 3190 return float32_sub(a, float16_to_float32(b, true, s), s); 3191 } 3192 3193 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3194 { 3195 return float64_sub(a, float32_to_float64(b, s), s); 3196 } 3197 3198 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3199 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3200 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3201 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3202 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3203 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3204 GEN_VEXT_VF(vfwsub_wf_h, 4) 3205 GEN_VEXT_VF(vfwsub_wf_w, 8) 3206 3207 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3208 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3209 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3210 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3211 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3212 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3213 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3214 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3215 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3216 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3217 GEN_VEXT_VF(vfmul_vf_h, 2) 3218 GEN_VEXT_VF(vfmul_vf_w, 4) 3219 GEN_VEXT_VF(vfmul_vf_d, 8) 3220 3221 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3222 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3223 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3224 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3225 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3226 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3227 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3228 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3229 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3230 GEN_VEXT_VF(vfdiv_vf_h, 2) 3231 GEN_VEXT_VF(vfdiv_vf_w, 4) 3232 GEN_VEXT_VF(vfdiv_vf_d, 8) 3233 3234 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3235 { 3236 return float16_div(b, a, s); 3237 } 3238 3239 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3240 { 3241 return float32_div(b, a, s); 3242 } 3243 3244 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3245 { 3246 return float64_div(b, a, s); 3247 } 3248 3249 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3250 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3251 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3252 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3253 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3254 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3255 3256 /* Vector Widening Floating-Point Multiply */ 3257 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3258 { 3259 return float32_mul(float16_to_float32(a, true, s), 3260 float16_to_float32(b, true, s), s); 3261 } 3262 3263 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3264 { 3265 return float64_mul(float32_to_float64(a, s), 3266 float32_to_float64(b, s), s); 3267 3268 } 3269 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3270 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3271 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3272 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3273 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3274 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3275 GEN_VEXT_VF(vfwmul_vf_h, 4) 3276 GEN_VEXT_VF(vfwmul_vf_w, 8) 3277 3278 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3279 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3280 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3281 CPURISCVState *env) \ 3282 { \ 3283 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3284 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3285 TD d = *((TD *)vd + HD(i)); \ 3286 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3287 } 3288 3289 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3290 { 3291 return float16_muladd(a, b, d, 0, s); 3292 } 3293 3294 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3295 { 3296 return float32_muladd(a, b, d, 0, s); 3297 } 3298 3299 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3300 { 3301 return float64_muladd(a, b, d, 0, s); 3302 } 3303 3304 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3305 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3306 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3307 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3308 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3309 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3310 3311 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3312 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3313 CPURISCVState *env) \ 3314 { \ 3315 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3316 TD d = *((TD *)vd + HD(i)); \ 3317 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3318 } 3319 3320 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3321 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3322 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3323 GEN_VEXT_VF(vfmacc_vf_h, 2) 3324 GEN_VEXT_VF(vfmacc_vf_w, 4) 3325 GEN_VEXT_VF(vfmacc_vf_d, 8) 3326 3327 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3328 { 3329 return float16_muladd(a, b, d, float_muladd_negate_c | 3330 float_muladd_negate_product, s); 3331 } 3332 3333 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3334 { 3335 return float32_muladd(a, b, d, float_muladd_negate_c | 3336 float_muladd_negate_product, s); 3337 } 3338 3339 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3340 { 3341 return float64_muladd(a, b, d, float_muladd_negate_c | 3342 float_muladd_negate_product, s); 3343 } 3344 3345 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3346 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3347 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3348 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3349 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3350 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3351 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3352 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3353 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3354 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3355 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3356 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3357 3358 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3359 { 3360 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3361 } 3362 3363 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3364 { 3365 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3366 } 3367 3368 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3369 { 3370 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3371 } 3372 3373 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3374 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3375 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3376 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3377 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3378 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3379 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3380 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3381 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3382 GEN_VEXT_VF(vfmsac_vf_h, 2) 3383 GEN_VEXT_VF(vfmsac_vf_w, 4) 3384 GEN_VEXT_VF(vfmsac_vf_d, 8) 3385 3386 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3387 { 3388 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3389 } 3390 3391 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3392 { 3393 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3394 } 3395 3396 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3397 { 3398 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3399 } 3400 3401 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3402 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3403 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3404 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3405 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3406 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3407 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3408 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3409 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3410 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3411 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3412 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3413 3414 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3415 { 3416 return float16_muladd(d, b, a, 0, s); 3417 } 3418 3419 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3420 { 3421 return float32_muladd(d, b, a, 0, s); 3422 } 3423 3424 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3425 { 3426 return float64_muladd(d, b, a, 0, s); 3427 } 3428 3429 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3430 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3431 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3432 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3433 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3434 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3435 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3436 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3437 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3438 GEN_VEXT_VF(vfmadd_vf_h, 2) 3439 GEN_VEXT_VF(vfmadd_vf_w, 4) 3440 GEN_VEXT_VF(vfmadd_vf_d, 8) 3441 3442 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3443 { 3444 return float16_muladd(d, b, a, float_muladd_negate_c | 3445 float_muladd_negate_product, s); 3446 } 3447 3448 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3449 { 3450 return float32_muladd(d, b, a, float_muladd_negate_c | 3451 float_muladd_negate_product, s); 3452 } 3453 3454 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3455 { 3456 return float64_muladd(d, b, a, float_muladd_negate_c | 3457 float_muladd_negate_product, s); 3458 } 3459 3460 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3461 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3462 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3463 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3464 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3465 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3466 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3467 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3468 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3469 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3470 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3471 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3472 3473 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3474 { 3475 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3476 } 3477 3478 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3479 { 3480 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3481 } 3482 3483 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3484 { 3485 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3486 } 3487 3488 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3489 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3490 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3491 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3492 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3493 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3494 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3495 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3496 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3497 GEN_VEXT_VF(vfmsub_vf_h, 2) 3498 GEN_VEXT_VF(vfmsub_vf_w, 4) 3499 GEN_VEXT_VF(vfmsub_vf_d, 8) 3500 3501 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3502 { 3503 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3504 } 3505 3506 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3507 { 3508 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3509 } 3510 3511 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3512 { 3513 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3514 } 3515 3516 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3517 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3518 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3519 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3520 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3521 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3522 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3523 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3524 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3525 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3526 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3527 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3528 3529 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3530 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3531 { 3532 return float32_muladd(float16_to_float32(a, true, s), 3533 float16_to_float32(b, true, s), d, 0, s); 3534 } 3535 3536 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3537 { 3538 return float64_muladd(float32_to_float64(a, s), 3539 float32_to_float64(b, s), d, 0, s); 3540 } 3541 3542 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3543 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3544 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3545 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3546 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3547 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3548 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3549 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3550 3551 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3552 { 3553 return float32_muladd(float16_to_float32(a, true, s), 3554 float16_to_float32(b, true, s), d, 3555 float_muladd_negate_c | float_muladd_negate_product, 3556 s); 3557 } 3558 3559 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3560 { 3561 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3562 d, float_muladd_negate_c | 3563 float_muladd_negate_product, s); 3564 } 3565 3566 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3567 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3568 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3569 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3570 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3571 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3572 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3573 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3574 3575 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3576 { 3577 return float32_muladd(float16_to_float32(a, true, s), 3578 float16_to_float32(b, true, s), d, 3579 float_muladd_negate_c, s); 3580 } 3581 3582 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3583 { 3584 return float64_muladd(float32_to_float64(a, s), 3585 float32_to_float64(b, s), d, 3586 float_muladd_negate_c, s); 3587 } 3588 3589 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3590 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3591 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3592 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3593 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3594 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3595 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3596 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3597 3598 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3599 { 3600 return float32_muladd(float16_to_float32(a, true, s), 3601 float16_to_float32(b, true, s), d, 3602 float_muladd_negate_product, s); 3603 } 3604 3605 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3606 { 3607 return float64_muladd(float32_to_float64(a, s), 3608 float32_to_float64(b, s), d, 3609 float_muladd_negate_product, s); 3610 } 3611 3612 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3613 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3614 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3615 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3616 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3617 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3618 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3619 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3620 3621 /* Vector Floating-Point Square-Root Instruction */ 3622 /* (TD, T2, TX2) */ 3623 #define OP_UU_H uint16_t, uint16_t, uint16_t 3624 #define OP_UU_W uint32_t, uint32_t, uint32_t 3625 #define OP_UU_D uint64_t, uint64_t, uint64_t 3626 3627 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3628 static void do_##NAME(void *vd, void *vs2, int i, \ 3629 CPURISCVState *env) \ 3630 { \ 3631 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3632 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3633 } 3634 3635 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3636 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3637 CPURISCVState *env, uint32_t desc) \ 3638 { \ 3639 uint32_t vm = vext_vm(desc); \ 3640 uint32_t vl = env->vl; \ 3641 uint32_t total_elems = \ 3642 vext_get_total_elems(env, desc, ESZ); \ 3643 uint32_t vta = vext_vta(desc); \ 3644 uint32_t vma = vext_vma(desc); \ 3645 uint32_t i; \ 3646 \ 3647 if (vl == 0) { \ 3648 return; \ 3649 } \ 3650 for (i = env->vstart; i < vl; i++) { \ 3651 if (!vm && !vext_elem_mask(v0, i)) { \ 3652 /* set masked-off elements to 1s */ \ 3653 vext_set_elems_1s(vd, vma, i * ESZ, \ 3654 (i + 1) * ESZ); \ 3655 continue; \ 3656 } \ 3657 do_##NAME(vd, vs2, i, env); \ 3658 } \ 3659 env->vstart = 0; \ 3660 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3661 total_elems * ESZ); \ 3662 } 3663 3664 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3665 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3666 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3667 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3668 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3669 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3670 3671 /* 3672 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3673 * 3674 * Adapted from riscv-v-spec recip.c: 3675 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3676 */ 3677 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3678 { 3679 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3680 uint64_t exp = extract64(f, frac_size, exp_size); 3681 uint64_t frac = extract64(f, 0, frac_size); 3682 3683 const uint8_t lookup_table[] = { 3684 52, 51, 50, 48, 47, 46, 44, 43, 3685 42, 41, 40, 39, 38, 36, 35, 34, 3686 33, 32, 31, 30, 30, 29, 28, 27, 3687 26, 25, 24, 23, 23, 22, 21, 20, 3688 19, 19, 18, 17, 16, 16, 15, 14, 3689 14, 13, 12, 12, 11, 10, 10, 9, 3690 9, 8, 7, 7, 6, 6, 5, 4, 3691 4, 3, 3, 2, 2, 1, 1, 0, 3692 127, 125, 123, 121, 119, 118, 116, 114, 3693 113, 111, 109, 108, 106, 105, 103, 102, 3694 100, 99, 97, 96, 95, 93, 92, 91, 3695 90, 88, 87, 86, 85, 84, 83, 82, 3696 80, 79, 78, 77, 76, 75, 74, 73, 3697 72, 71, 70, 70, 69, 68, 67, 66, 3698 65, 64, 63, 63, 62, 61, 60, 59, 3699 59, 58, 57, 56, 56, 55, 54, 53 3700 }; 3701 const int precision = 7; 3702 3703 if (exp == 0 && frac != 0) { /* subnormal */ 3704 /* Normalize the subnormal. */ 3705 while (extract64(frac, frac_size - 1, 1) == 0) { 3706 exp--; 3707 frac <<= 1; 3708 } 3709 3710 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3711 } 3712 3713 int idx = ((exp & 1) << (precision - 1)) | 3714 (frac >> (frac_size - precision + 1)); 3715 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3716 (frac_size - precision); 3717 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3718 3719 uint64_t val = 0; 3720 val = deposit64(val, 0, frac_size, out_frac); 3721 val = deposit64(val, frac_size, exp_size, out_exp); 3722 val = deposit64(val, frac_size + exp_size, 1, sign); 3723 return val; 3724 } 3725 3726 static float16 frsqrt7_h(float16 f, float_status *s) 3727 { 3728 int exp_size = 5, frac_size = 10; 3729 bool sign = float16_is_neg(f); 3730 3731 /* 3732 * frsqrt7(sNaN) = canonical NaN 3733 * frsqrt7(-inf) = canonical NaN 3734 * frsqrt7(-normal) = canonical NaN 3735 * frsqrt7(-subnormal) = canonical NaN 3736 */ 3737 if (float16_is_signaling_nan(f, s) || 3738 (float16_is_infinity(f) && sign) || 3739 (float16_is_normal(f) && sign) || 3740 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3741 s->float_exception_flags |= float_flag_invalid; 3742 return float16_default_nan(s); 3743 } 3744 3745 /* frsqrt7(qNaN) = canonical NaN */ 3746 if (float16_is_quiet_nan(f, s)) { 3747 return float16_default_nan(s); 3748 } 3749 3750 /* frsqrt7(+-0) = +-inf */ 3751 if (float16_is_zero(f)) { 3752 s->float_exception_flags |= float_flag_divbyzero; 3753 return float16_set_sign(float16_infinity, sign); 3754 } 3755 3756 /* frsqrt7(+inf) = +0 */ 3757 if (float16_is_infinity(f) && !sign) { 3758 return float16_set_sign(float16_zero, sign); 3759 } 3760 3761 /* +normal, +subnormal */ 3762 uint64_t val = frsqrt7(f, exp_size, frac_size); 3763 return make_float16(val); 3764 } 3765 3766 static float32 frsqrt7_s(float32 f, float_status *s) 3767 { 3768 int exp_size = 8, frac_size = 23; 3769 bool sign = float32_is_neg(f); 3770 3771 /* 3772 * frsqrt7(sNaN) = canonical NaN 3773 * frsqrt7(-inf) = canonical NaN 3774 * frsqrt7(-normal) = canonical NaN 3775 * frsqrt7(-subnormal) = canonical NaN 3776 */ 3777 if (float32_is_signaling_nan(f, s) || 3778 (float32_is_infinity(f) && sign) || 3779 (float32_is_normal(f) && sign) || 3780 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3781 s->float_exception_flags |= float_flag_invalid; 3782 return float32_default_nan(s); 3783 } 3784 3785 /* frsqrt7(qNaN) = canonical NaN */ 3786 if (float32_is_quiet_nan(f, s)) { 3787 return float32_default_nan(s); 3788 } 3789 3790 /* frsqrt7(+-0) = +-inf */ 3791 if (float32_is_zero(f)) { 3792 s->float_exception_flags |= float_flag_divbyzero; 3793 return float32_set_sign(float32_infinity, sign); 3794 } 3795 3796 /* frsqrt7(+inf) = +0 */ 3797 if (float32_is_infinity(f) && !sign) { 3798 return float32_set_sign(float32_zero, sign); 3799 } 3800 3801 /* +normal, +subnormal */ 3802 uint64_t val = frsqrt7(f, exp_size, frac_size); 3803 return make_float32(val); 3804 } 3805 3806 static float64 frsqrt7_d(float64 f, float_status *s) 3807 { 3808 int exp_size = 11, frac_size = 52; 3809 bool sign = float64_is_neg(f); 3810 3811 /* 3812 * frsqrt7(sNaN) = canonical NaN 3813 * frsqrt7(-inf) = canonical NaN 3814 * frsqrt7(-normal) = canonical NaN 3815 * frsqrt7(-subnormal) = canonical NaN 3816 */ 3817 if (float64_is_signaling_nan(f, s) || 3818 (float64_is_infinity(f) && sign) || 3819 (float64_is_normal(f) && sign) || 3820 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3821 s->float_exception_flags |= float_flag_invalid; 3822 return float64_default_nan(s); 3823 } 3824 3825 /* frsqrt7(qNaN) = canonical NaN */ 3826 if (float64_is_quiet_nan(f, s)) { 3827 return float64_default_nan(s); 3828 } 3829 3830 /* frsqrt7(+-0) = +-inf */ 3831 if (float64_is_zero(f)) { 3832 s->float_exception_flags |= float_flag_divbyzero; 3833 return float64_set_sign(float64_infinity, sign); 3834 } 3835 3836 /* frsqrt7(+inf) = +0 */ 3837 if (float64_is_infinity(f) && !sign) { 3838 return float64_set_sign(float64_zero, sign); 3839 } 3840 3841 /* +normal, +subnormal */ 3842 uint64_t val = frsqrt7(f, exp_size, frac_size); 3843 return make_float64(val); 3844 } 3845 3846 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3847 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3848 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3849 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3850 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3851 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3852 3853 /* 3854 * Vector Floating-Point Reciprocal Estimate Instruction 3855 * 3856 * Adapted from riscv-v-spec recip.c: 3857 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3858 */ 3859 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3860 float_status *s) 3861 { 3862 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3863 uint64_t exp = extract64(f, frac_size, exp_size); 3864 uint64_t frac = extract64(f, 0, frac_size); 3865 3866 const uint8_t lookup_table[] = { 3867 127, 125, 123, 121, 119, 117, 116, 114, 3868 112, 110, 109, 107, 105, 104, 102, 100, 3869 99, 97, 96, 94, 93, 91, 90, 88, 3870 87, 85, 84, 83, 81, 80, 79, 77, 3871 76, 75, 74, 72, 71, 70, 69, 68, 3872 66, 65, 64, 63, 62, 61, 60, 59, 3873 58, 57, 56, 55, 54, 53, 52, 51, 3874 50, 49, 48, 47, 46, 45, 44, 43, 3875 42, 41, 40, 40, 39, 38, 37, 36, 3876 35, 35, 34, 33, 32, 31, 31, 30, 3877 29, 28, 28, 27, 26, 25, 25, 24, 3878 23, 23, 22, 21, 21, 20, 19, 19, 3879 18, 17, 17, 16, 15, 15, 14, 14, 3880 13, 12, 12, 11, 11, 10, 9, 9, 3881 8, 8, 7, 7, 6, 5, 5, 4, 3882 4, 3, 3, 2, 2, 1, 1, 0 3883 }; 3884 const int precision = 7; 3885 3886 if (exp == 0 && frac != 0) { /* subnormal */ 3887 /* Normalize the subnormal. */ 3888 while (extract64(frac, frac_size - 1, 1) == 0) { 3889 exp--; 3890 frac <<= 1; 3891 } 3892 3893 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3894 3895 if (exp != 0 && exp != UINT64_MAX) { 3896 /* 3897 * Overflow to inf or max value of same sign, 3898 * depending on sign and rounding mode. 3899 */ 3900 s->float_exception_flags |= (float_flag_inexact | 3901 float_flag_overflow); 3902 3903 if ((s->float_rounding_mode == float_round_to_zero) || 3904 ((s->float_rounding_mode == float_round_down) && !sign) || 3905 ((s->float_rounding_mode == float_round_up) && sign)) { 3906 /* Return greatest/negative finite value. */ 3907 return (sign << (exp_size + frac_size)) | 3908 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3909 } else { 3910 /* Return +-inf. */ 3911 return (sign << (exp_size + frac_size)) | 3912 MAKE_64BIT_MASK(frac_size, exp_size); 3913 } 3914 } 3915 } 3916 3917 int idx = frac >> (frac_size - precision); 3918 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3919 (frac_size - precision); 3920 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3921 3922 if (out_exp == 0 || out_exp == UINT64_MAX) { 3923 /* 3924 * The result is subnormal, but don't raise the underflow exception, 3925 * because there's no additional loss of precision. 3926 */ 3927 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3928 if (out_exp == UINT64_MAX) { 3929 out_frac >>= 1; 3930 out_exp = 0; 3931 } 3932 } 3933 3934 uint64_t val = 0; 3935 val = deposit64(val, 0, frac_size, out_frac); 3936 val = deposit64(val, frac_size, exp_size, out_exp); 3937 val = deposit64(val, frac_size + exp_size, 1, sign); 3938 return val; 3939 } 3940 3941 static float16 frec7_h(float16 f, float_status *s) 3942 { 3943 int exp_size = 5, frac_size = 10; 3944 bool sign = float16_is_neg(f); 3945 3946 /* frec7(+-inf) = +-0 */ 3947 if (float16_is_infinity(f)) { 3948 return float16_set_sign(float16_zero, sign); 3949 } 3950 3951 /* frec7(+-0) = +-inf */ 3952 if (float16_is_zero(f)) { 3953 s->float_exception_flags |= float_flag_divbyzero; 3954 return float16_set_sign(float16_infinity, sign); 3955 } 3956 3957 /* frec7(sNaN) = canonical NaN */ 3958 if (float16_is_signaling_nan(f, s)) { 3959 s->float_exception_flags |= float_flag_invalid; 3960 return float16_default_nan(s); 3961 } 3962 3963 /* frec7(qNaN) = canonical NaN */ 3964 if (float16_is_quiet_nan(f, s)) { 3965 return float16_default_nan(s); 3966 } 3967 3968 /* +-normal, +-subnormal */ 3969 uint64_t val = frec7(f, exp_size, frac_size, s); 3970 return make_float16(val); 3971 } 3972 3973 static float32 frec7_s(float32 f, float_status *s) 3974 { 3975 int exp_size = 8, frac_size = 23; 3976 bool sign = float32_is_neg(f); 3977 3978 /* frec7(+-inf) = +-0 */ 3979 if (float32_is_infinity(f)) { 3980 return float32_set_sign(float32_zero, sign); 3981 } 3982 3983 /* frec7(+-0) = +-inf */ 3984 if (float32_is_zero(f)) { 3985 s->float_exception_flags |= float_flag_divbyzero; 3986 return float32_set_sign(float32_infinity, sign); 3987 } 3988 3989 /* frec7(sNaN) = canonical NaN */ 3990 if (float32_is_signaling_nan(f, s)) { 3991 s->float_exception_flags |= float_flag_invalid; 3992 return float32_default_nan(s); 3993 } 3994 3995 /* frec7(qNaN) = canonical NaN */ 3996 if (float32_is_quiet_nan(f, s)) { 3997 return float32_default_nan(s); 3998 } 3999 4000 /* +-normal, +-subnormal */ 4001 uint64_t val = frec7(f, exp_size, frac_size, s); 4002 return make_float32(val); 4003 } 4004 4005 static float64 frec7_d(float64 f, float_status *s) 4006 { 4007 int exp_size = 11, frac_size = 52; 4008 bool sign = float64_is_neg(f); 4009 4010 /* frec7(+-inf) = +-0 */ 4011 if (float64_is_infinity(f)) { 4012 return float64_set_sign(float64_zero, sign); 4013 } 4014 4015 /* frec7(+-0) = +-inf */ 4016 if (float64_is_zero(f)) { 4017 s->float_exception_flags |= float_flag_divbyzero; 4018 return float64_set_sign(float64_infinity, sign); 4019 } 4020 4021 /* frec7(sNaN) = canonical NaN */ 4022 if (float64_is_signaling_nan(f, s)) { 4023 s->float_exception_flags |= float_flag_invalid; 4024 return float64_default_nan(s); 4025 } 4026 4027 /* frec7(qNaN) = canonical NaN */ 4028 if (float64_is_quiet_nan(f, s)) { 4029 return float64_default_nan(s); 4030 } 4031 4032 /* +-normal, +-subnormal */ 4033 uint64_t val = frec7(f, exp_size, frac_size, s); 4034 return make_float64(val); 4035 } 4036 4037 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4038 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4039 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4040 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4041 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4042 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4043 4044 /* Vector Floating-Point MIN/MAX Instructions */ 4045 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4046 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4047 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4048 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4049 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4050 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4051 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4052 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4053 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4054 GEN_VEXT_VF(vfmin_vf_h, 2) 4055 GEN_VEXT_VF(vfmin_vf_w, 4) 4056 GEN_VEXT_VF(vfmin_vf_d, 8) 4057 4058 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4059 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4060 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4061 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4062 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4063 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4064 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4065 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4066 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4067 GEN_VEXT_VF(vfmax_vf_h, 2) 4068 GEN_VEXT_VF(vfmax_vf_w, 4) 4069 GEN_VEXT_VF(vfmax_vf_d, 8) 4070 4071 /* Vector Floating-Point Sign-Injection Instructions */ 4072 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4073 { 4074 return deposit64(b, 0, 15, a); 4075 } 4076 4077 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4078 { 4079 return deposit64(b, 0, 31, a); 4080 } 4081 4082 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4083 { 4084 return deposit64(b, 0, 63, a); 4085 } 4086 4087 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4088 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4089 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4090 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4091 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4092 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4093 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4094 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4095 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4096 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4097 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4098 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4099 4100 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4101 { 4102 return deposit64(~b, 0, 15, a); 4103 } 4104 4105 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4106 { 4107 return deposit64(~b, 0, 31, a); 4108 } 4109 4110 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4111 { 4112 return deposit64(~b, 0, 63, a); 4113 } 4114 4115 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4116 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4117 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4118 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4119 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4120 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4121 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4122 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4123 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4124 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4125 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4126 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4127 4128 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4129 { 4130 return deposit64(b ^ a, 0, 15, a); 4131 } 4132 4133 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4134 { 4135 return deposit64(b ^ a, 0, 31, a); 4136 } 4137 4138 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4139 { 4140 return deposit64(b ^ a, 0, 63, a); 4141 } 4142 4143 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4144 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4145 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4146 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4147 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4148 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4149 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4150 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4151 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4152 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4153 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4154 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4155 4156 /* Vector Floating-Point Compare Instructions */ 4157 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4158 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4159 CPURISCVState *env, uint32_t desc) \ 4160 { \ 4161 uint32_t vm = vext_vm(desc); \ 4162 uint32_t vl = env->vl; \ 4163 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4164 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4165 uint32_t vma = vext_vma(desc); \ 4166 uint32_t i; \ 4167 \ 4168 for (i = env->vstart; i < vl; i++) { \ 4169 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4170 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4171 if (!vm && !vext_elem_mask(v0, i)) { \ 4172 /* set masked-off elements to 1s */ \ 4173 if (vma) { \ 4174 vext_set_elem_mask(vd, i, 1); \ 4175 } \ 4176 continue; \ 4177 } \ 4178 vext_set_elem_mask(vd, i, \ 4179 DO_OP(s2, s1, &env->fp_status)); \ 4180 } \ 4181 env->vstart = 0; \ 4182 /* 4183 * mask destination register are always tail-agnostic 4184 * set tail elements to 1s 4185 */ \ 4186 if (vta_all_1s) { \ 4187 for (; i < total_elems; i++) { \ 4188 vext_set_elem_mask(vd, i, 1); \ 4189 } \ 4190 } \ 4191 } 4192 4193 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4194 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4195 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4196 4197 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4198 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4199 CPURISCVState *env, uint32_t desc) \ 4200 { \ 4201 uint32_t vm = vext_vm(desc); \ 4202 uint32_t vl = env->vl; \ 4203 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4204 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4205 uint32_t vma = vext_vma(desc); \ 4206 uint32_t i; \ 4207 \ 4208 for (i = env->vstart; i < vl; i++) { \ 4209 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4210 if (!vm && !vext_elem_mask(v0, i)) { \ 4211 /* set masked-off elements to 1s */ \ 4212 if (vma) { \ 4213 vext_set_elem_mask(vd, i, 1); \ 4214 } \ 4215 continue; \ 4216 } \ 4217 vext_set_elem_mask(vd, i, \ 4218 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4219 } \ 4220 env->vstart = 0; \ 4221 /* 4222 * mask destination register are always tail-agnostic 4223 * set tail elements to 1s 4224 */ \ 4225 if (vta_all_1s) { \ 4226 for (; i < total_elems; i++) { \ 4227 vext_set_elem_mask(vd, i, 1); \ 4228 } \ 4229 } \ 4230 } 4231 4232 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4233 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4234 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4235 4236 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4237 { 4238 FloatRelation compare = float16_compare_quiet(a, b, s); 4239 return compare != float_relation_equal; 4240 } 4241 4242 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4243 { 4244 FloatRelation compare = float32_compare_quiet(a, b, s); 4245 return compare != float_relation_equal; 4246 } 4247 4248 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4249 { 4250 FloatRelation compare = float64_compare_quiet(a, b, s); 4251 return compare != float_relation_equal; 4252 } 4253 4254 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4255 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4256 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4257 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4258 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4259 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4260 4261 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4262 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4263 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4264 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4265 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4266 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4267 4268 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4269 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4270 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4271 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4272 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4273 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4274 4275 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4276 { 4277 FloatRelation compare = float16_compare(a, b, s); 4278 return compare == float_relation_greater; 4279 } 4280 4281 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4282 { 4283 FloatRelation compare = float32_compare(a, b, s); 4284 return compare == float_relation_greater; 4285 } 4286 4287 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4288 { 4289 FloatRelation compare = float64_compare(a, b, s); 4290 return compare == float_relation_greater; 4291 } 4292 4293 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4294 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4295 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4296 4297 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4298 { 4299 FloatRelation compare = float16_compare(a, b, s); 4300 return compare == float_relation_greater || 4301 compare == float_relation_equal; 4302 } 4303 4304 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4305 { 4306 FloatRelation compare = float32_compare(a, b, s); 4307 return compare == float_relation_greater || 4308 compare == float_relation_equal; 4309 } 4310 4311 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4312 { 4313 FloatRelation compare = float64_compare(a, b, s); 4314 return compare == float_relation_greater || 4315 compare == float_relation_equal; 4316 } 4317 4318 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4319 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4320 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4321 4322 /* Vector Floating-Point Classify Instruction */ 4323 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4324 static void do_##NAME(void *vd, void *vs2, int i) \ 4325 { \ 4326 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4327 *((TD *)vd + HD(i)) = OP(s2); \ 4328 } 4329 4330 #define GEN_VEXT_V(NAME, ESZ) \ 4331 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4332 CPURISCVState *env, uint32_t desc) \ 4333 { \ 4334 uint32_t vm = vext_vm(desc); \ 4335 uint32_t vl = env->vl; \ 4336 uint32_t total_elems = \ 4337 vext_get_total_elems(env, desc, ESZ); \ 4338 uint32_t vta = vext_vta(desc); \ 4339 uint32_t vma = vext_vma(desc); \ 4340 uint32_t i; \ 4341 \ 4342 for (i = env->vstart; i < vl; i++) { \ 4343 if (!vm && !vext_elem_mask(v0, i)) { \ 4344 /* set masked-off elements to 1s */ \ 4345 vext_set_elems_1s(vd, vma, i * ESZ, \ 4346 (i + 1) * ESZ); \ 4347 continue; \ 4348 } \ 4349 do_##NAME(vd, vs2, i); \ 4350 } \ 4351 env->vstart = 0; \ 4352 /* set tail elements to 1s */ \ 4353 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4354 total_elems * ESZ); \ 4355 } 4356 4357 target_ulong fclass_h(uint64_t frs1) 4358 { 4359 float16 f = frs1; 4360 bool sign = float16_is_neg(f); 4361 4362 if (float16_is_infinity(f)) { 4363 return sign ? 1 << 0 : 1 << 7; 4364 } else if (float16_is_zero(f)) { 4365 return sign ? 1 << 3 : 1 << 4; 4366 } else if (float16_is_zero_or_denormal(f)) { 4367 return sign ? 1 << 2 : 1 << 5; 4368 } else if (float16_is_any_nan(f)) { 4369 float_status s = { }; /* for snan_bit_is_one */ 4370 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4371 } else { 4372 return sign ? 1 << 1 : 1 << 6; 4373 } 4374 } 4375 4376 target_ulong fclass_s(uint64_t frs1) 4377 { 4378 float32 f = frs1; 4379 bool sign = float32_is_neg(f); 4380 4381 if (float32_is_infinity(f)) { 4382 return sign ? 1 << 0 : 1 << 7; 4383 } else if (float32_is_zero(f)) { 4384 return sign ? 1 << 3 : 1 << 4; 4385 } else if (float32_is_zero_or_denormal(f)) { 4386 return sign ? 1 << 2 : 1 << 5; 4387 } else if (float32_is_any_nan(f)) { 4388 float_status s = { }; /* for snan_bit_is_one */ 4389 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4390 } else { 4391 return sign ? 1 << 1 : 1 << 6; 4392 } 4393 } 4394 4395 target_ulong fclass_d(uint64_t frs1) 4396 { 4397 float64 f = frs1; 4398 bool sign = float64_is_neg(f); 4399 4400 if (float64_is_infinity(f)) { 4401 return sign ? 1 << 0 : 1 << 7; 4402 } else if (float64_is_zero(f)) { 4403 return sign ? 1 << 3 : 1 << 4; 4404 } else if (float64_is_zero_or_denormal(f)) { 4405 return sign ? 1 << 2 : 1 << 5; 4406 } else if (float64_is_any_nan(f)) { 4407 float_status s = { }; /* for snan_bit_is_one */ 4408 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4409 } else { 4410 return sign ? 1 << 1 : 1 << 6; 4411 } 4412 } 4413 4414 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4415 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4416 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4417 GEN_VEXT_V(vfclass_v_h, 2) 4418 GEN_VEXT_V(vfclass_v_w, 4) 4419 GEN_VEXT_V(vfclass_v_d, 8) 4420 4421 /* Vector Floating-Point Merge Instruction */ 4422 4423 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4424 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4425 CPURISCVState *env, uint32_t desc) \ 4426 { \ 4427 uint32_t vm = vext_vm(desc); \ 4428 uint32_t vl = env->vl; \ 4429 uint32_t esz = sizeof(ETYPE); \ 4430 uint32_t total_elems = \ 4431 vext_get_total_elems(env, desc, esz); \ 4432 uint32_t vta = vext_vta(desc); \ 4433 uint32_t i; \ 4434 \ 4435 for (i = env->vstart; i < vl; i++) { \ 4436 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4437 *((ETYPE *)vd + H(i)) = \ 4438 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4439 } \ 4440 env->vstart = 0; \ 4441 /* set tail elements to 1s */ \ 4442 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4443 } 4444 4445 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4446 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4447 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4448 4449 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4450 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4451 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4452 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4453 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4454 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4455 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4456 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4457 4458 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4459 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4460 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4461 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4462 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4463 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4464 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4465 4466 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4467 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4468 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4469 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4470 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4471 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4472 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4473 4474 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4475 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4476 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4477 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4478 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4479 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4480 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4481 4482 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4483 /* (TD, T2, TX2) */ 4484 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4485 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4486 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4487 /* 4488 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4489 */ 4490 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4491 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4492 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4493 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4494 4495 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4496 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4497 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4498 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4499 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4500 4501 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4502 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4503 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4504 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4505 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4506 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4507 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4508 4509 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4510 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4511 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4512 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4513 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4514 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4515 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4516 4517 /* 4518 * vfwcvt.f.f.v vd, vs2, vm 4519 * Convert single-width float to double-width float. 4520 */ 4521 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4522 { 4523 return float16_to_float32(a, true, s); 4524 } 4525 4526 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4527 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4528 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4529 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4530 4531 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4532 /* (TD, T2, TX2) */ 4533 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4534 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4535 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4536 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4537 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4538 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4539 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4540 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4541 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4542 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4543 4544 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4545 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4546 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4547 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4548 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4549 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4550 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4551 4552 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4553 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4554 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4555 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4556 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4557 4558 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4559 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4560 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4561 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4562 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4563 4564 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4565 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4566 { 4567 return float32_to_float16(a, true, s); 4568 } 4569 4570 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4571 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4572 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4573 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4574 4575 /* 4576 * Vector Reduction Operations 4577 */ 4578 /* Vector Single-Width Integer Reduction Instructions */ 4579 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4580 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4581 void *vs2, CPURISCVState *env, \ 4582 uint32_t desc) \ 4583 { \ 4584 uint32_t vm = vext_vm(desc); \ 4585 uint32_t vl = env->vl; \ 4586 uint32_t esz = sizeof(TD); \ 4587 uint32_t vlenb = simd_maxsz(desc); \ 4588 uint32_t vta = vext_vta(desc); \ 4589 uint32_t i; \ 4590 TD s1 = *((TD *)vs1 + HD(0)); \ 4591 \ 4592 for (i = env->vstart; i < vl; i++) { \ 4593 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4594 if (!vm && !vext_elem_mask(v0, i)) { \ 4595 continue; \ 4596 } \ 4597 s1 = OP(s1, (TD)s2); \ 4598 } \ 4599 *((TD *)vd + HD(0)) = s1; \ 4600 env->vstart = 0; \ 4601 /* set tail elements to 1s */ \ 4602 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4603 } 4604 4605 /* vd[0] = sum(vs1[0], vs2[*]) */ 4606 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4607 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4608 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4609 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4610 4611 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4612 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4613 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4614 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4615 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4616 4617 /* vd[0] = max(vs1[0], vs2[*]) */ 4618 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4619 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4620 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4621 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4622 4623 /* vd[0] = minu(vs1[0], vs2[*]) */ 4624 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4625 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4626 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4627 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4628 4629 /* vd[0] = min(vs1[0], vs2[*]) */ 4630 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4631 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4632 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4633 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4634 4635 /* vd[0] = and(vs1[0], vs2[*]) */ 4636 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4637 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4638 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4639 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4640 4641 /* vd[0] = or(vs1[0], vs2[*]) */ 4642 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4643 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4644 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4645 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4646 4647 /* vd[0] = xor(vs1[0], vs2[*]) */ 4648 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4649 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4650 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4651 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4652 4653 /* Vector Widening Integer Reduction Instructions */ 4654 /* signed sum reduction into double-width accumulator */ 4655 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4656 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4657 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4658 4659 /* Unsigned sum reduction into double-width accumulator */ 4660 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4661 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4662 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4663 4664 /* Vector Single-Width Floating-Point Reduction Instructions */ 4665 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4666 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4667 void *vs2, CPURISCVState *env, \ 4668 uint32_t desc) \ 4669 { \ 4670 uint32_t vm = vext_vm(desc); \ 4671 uint32_t vl = env->vl; \ 4672 uint32_t esz = sizeof(TD); \ 4673 uint32_t vlenb = simd_maxsz(desc); \ 4674 uint32_t vta = vext_vta(desc); \ 4675 uint32_t i; \ 4676 TD s1 = *((TD *)vs1 + HD(0)); \ 4677 \ 4678 for (i = env->vstart; i < vl; i++) { \ 4679 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4680 if (!vm && !vext_elem_mask(v0, i)) { \ 4681 continue; \ 4682 } \ 4683 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4684 } \ 4685 *((TD *)vd + HD(0)) = s1; \ 4686 env->vstart = 0; \ 4687 /* set tail elements to 1s */ \ 4688 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4689 } 4690 4691 /* Unordered sum */ 4692 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4693 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4694 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4695 4696 /* Ordered sum */ 4697 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4698 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4699 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4700 4701 /* Maximum value */ 4702 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4703 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4704 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4705 4706 /* Minimum value */ 4707 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4708 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4709 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4710 4711 /* Vector Widening Floating-Point Add Instructions */ 4712 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4713 { 4714 return float32_add(a, float16_to_float32(b, true, s), s); 4715 } 4716 4717 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4718 { 4719 return float64_add(a, float32_to_float64(b, s), s); 4720 } 4721 4722 /* Vector Widening Floating-Point Reduction Instructions */ 4723 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4724 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4725 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4726 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4727 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4728 4729 /* 4730 * Vector Mask Operations 4731 */ 4732 /* Vector Mask-Register Logical Instructions */ 4733 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4734 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4735 void *vs2, CPURISCVState *env, \ 4736 uint32_t desc) \ 4737 { \ 4738 uint32_t vl = env->vl; \ 4739 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4740 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4741 uint32_t i; \ 4742 int a, b; \ 4743 \ 4744 for (i = env->vstart; i < vl; i++) { \ 4745 a = vext_elem_mask(vs1, i); \ 4746 b = vext_elem_mask(vs2, i); \ 4747 vext_set_elem_mask(vd, i, OP(b, a)); \ 4748 } \ 4749 env->vstart = 0; \ 4750 /* 4751 * mask destination register are always tail-agnostic 4752 * set tail elements to 1s 4753 */ \ 4754 if (vta_all_1s) { \ 4755 for (; i < total_elems; i++) { \ 4756 vext_set_elem_mask(vd, i, 1); \ 4757 } \ 4758 } \ 4759 } 4760 4761 #define DO_NAND(N, M) (!(N & M)) 4762 #define DO_ANDNOT(N, M) (N & !M) 4763 #define DO_NOR(N, M) (!(N | M)) 4764 #define DO_ORNOT(N, M) (N | !M) 4765 #define DO_XNOR(N, M) (!(N ^ M)) 4766 4767 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4768 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4769 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4770 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4771 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4772 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4773 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4774 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4775 4776 /* Vector count population in mask vcpop */ 4777 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4778 uint32_t desc) 4779 { 4780 target_ulong cnt = 0; 4781 uint32_t vm = vext_vm(desc); 4782 uint32_t vl = env->vl; 4783 int i; 4784 4785 for (i = env->vstart; i < vl; i++) { 4786 if (vm || vext_elem_mask(v0, i)) { 4787 if (vext_elem_mask(vs2, i)) { 4788 cnt++; 4789 } 4790 } 4791 } 4792 env->vstart = 0; 4793 return cnt; 4794 } 4795 4796 /* vfirst find-first-set mask bit */ 4797 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4798 uint32_t desc) 4799 { 4800 uint32_t vm = vext_vm(desc); 4801 uint32_t vl = env->vl; 4802 int i; 4803 4804 for (i = env->vstart; i < vl; i++) { 4805 if (vm || vext_elem_mask(v0, i)) { 4806 if (vext_elem_mask(vs2, i)) { 4807 return i; 4808 } 4809 } 4810 } 4811 env->vstart = 0; 4812 return -1LL; 4813 } 4814 4815 enum set_mask_type { 4816 ONLY_FIRST = 1, 4817 INCLUDE_FIRST, 4818 BEFORE_FIRST, 4819 }; 4820 4821 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4822 uint32_t desc, enum set_mask_type type) 4823 { 4824 uint32_t vm = vext_vm(desc); 4825 uint32_t vl = env->vl; 4826 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4827 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4828 uint32_t vma = vext_vma(desc); 4829 int i; 4830 bool first_mask_bit = false; 4831 4832 for (i = env->vstart; i < vl; i++) { 4833 if (!vm && !vext_elem_mask(v0, i)) { 4834 /* set masked-off elements to 1s */ 4835 if (vma) { 4836 vext_set_elem_mask(vd, i, 1); 4837 } 4838 continue; 4839 } 4840 /* write a zero to all following active elements */ 4841 if (first_mask_bit) { 4842 vext_set_elem_mask(vd, i, 0); 4843 continue; 4844 } 4845 if (vext_elem_mask(vs2, i)) { 4846 first_mask_bit = true; 4847 if (type == BEFORE_FIRST) { 4848 vext_set_elem_mask(vd, i, 0); 4849 } else { 4850 vext_set_elem_mask(vd, i, 1); 4851 } 4852 } else { 4853 if (type == ONLY_FIRST) { 4854 vext_set_elem_mask(vd, i, 0); 4855 } else { 4856 vext_set_elem_mask(vd, i, 1); 4857 } 4858 } 4859 } 4860 env->vstart = 0; 4861 /* 4862 * mask destination register are always tail-agnostic 4863 * set tail elements to 1s 4864 */ 4865 if (vta_all_1s) { 4866 for (; i < total_elems; i++) { 4867 vext_set_elem_mask(vd, i, 1); 4868 } 4869 } 4870 } 4871 4872 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4873 uint32_t desc) 4874 { 4875 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4876 } 4877 4878 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4879 uint32_t desc) 4880 { 4881 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4882 } 4883 4884 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4885 uint32_t desc) 4886 { 4887 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4888 } 4889 4890 /* Vector Iota Instruction */ 4891 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4892 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4893 uint32_t desc) \ 4894 { \ 4895 uint32_t vm = vext_vm(desc); \ 4896 uint32_t vl = env->vl; \ 4897 uint32_t esz = sizeof(ETYPE); \ 4898 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4899 uint32_t vta = vext_vta(desc); \ 4900 uint32_t vma = vext_vma(desc); \ 4901 uint32_t sum = 0; \ 4902 int i; \ 4903 \ 4904 for (i = env->vstart; i < vl; i++) { \ 4905 if (!vm && !vext_elem_mask(v0, i)) { \ 4906 /* set masked-off elements to 1s */ \ 4907 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4908 continue; \ 4909 } \ 4910 *((ETYPE *)vd + H(i)) = sum; \ 4911 if (vext_elem_mask(vs2, i)) { \ 4912 sum++; \ 4913 } \ 4914 } \ 4915 env->vstart = 0; \ 4916 /* set tail elements to 1s */ \ 4917 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4918 } 4919 4920 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4921 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4922 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4923 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4924 4925 /* Vector Element Index Instruction */ 4926 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4927 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4928 { \ 4929 uint32_t vm = vext_vm(desc); \ 4930 uint32_t vl = env->vl; \ 4931 uint32_t esz = sizeof(ETYPE); \ 4932 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4933 uint32_t vta = vext_vta(desc); \ 4934 uint32_t vma = vext_vma(desc); \ 4935 int i; \ 4936 \ 4937 for (i = env->vstart; i < vl; i++) { \ 4938 if (!vm && !vext_elem_mask(v0, i)) { \ 4939 /* set masked-off elements to 1s */ \ 4940 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4941 continue; \ 4942 } \ 4943 *((ETYPE *)vd + H(i)) = i; \ 4944 } \ 4945 env->vstart = 0; \ 4946 /* set tail elements to 1s */ \ 4947 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4948 } 4949 4950 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4951 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4952 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4953 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4954 4955 /* 4956 * Vector Permutation Instructions 4957 */ 4958 4959 /* Vector Slide Instructions */ 4960 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4961 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4962 CPURISCVState *env, uint32_t desc) \ 4963 { \ 4964 uint32_t vm = vext_vm(desc); \ 4965 uint32_t vl = env->vl; \ 4966 uint32_t esz = sizeof(ETYPE); \ 4967 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4968 uint32_t vta = vext_vta(desc); \ 4969 uint32_t vma = vext_vma(desc); \ 4970 target_ulong offset = s1, i_min, i; \ 4971 \ 4972 i_min = MAX(env->vstart, offset); \ 4973 for (i = i_min; i < vl; i++) { \ 4974 if (!vm && !vext_elem_mask(v0, i)) { \ 4975 /* set masked-off elements to 1s */ \ 4976 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4977 continue; \ 4978 } \ 4979 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4980 } \ 4981 /* set tail elements to 1s */ \ 4982 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4983 } 4984 4985 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4986 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4987 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4988 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4989 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4990 4991 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4992 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4993 CPURISCVState *env, uint32_t desc) \ 4994 { \ 4995 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4996 uint32_t vm = vext_vm(desc); \ 4997 uint32_t vl = env->vl; \ 4998 uint32_t esz = sizeof(ETYPE); \ 4999 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5000 uint32_t vta = vext_vta(desc); \ 5001 uint32_t vma = vext_vma(desc); \ 5002 target_ulong i_max, i; \ 5003 \ 5004 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5005 for (i = env->vstart; i < i_max; ++i) { \ 5006 if (!vm && !vext_elem_mask(v0, i)) { \ 5007 /* set masked-off elements to 1s */ \ 5008 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5009 continue; \ 5010 } \ 5011 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5012 } \ 5013 \ 5014 for (i = i_max; i < vl; ++i) { \ 5015 if (vm || vext_elem_mask(v0, i)) { \ 5016 *((ETYPE *)vd + H(i)) = 0; \ 5017 } \ 5018 } \ 5019 \ 5020 env->vstart = 0; \ 5021 /* set tail elements to 1s */ \ 5022 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5023 } 5024 5025 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5026 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5027 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5028 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5029 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5030 5031 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5032 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5033 void *vs2, CPURISCVState *env, \ 5034 uint32_t desc) \ 5035 { \ 5036 typedef uint##BITWIDTH##_t ETYPE; \ 5037 uint32_t vm = vext_vm(desc); \ 5038 uint32_t vl = env->vl; \ 5039 uint32_t esz = sizeof(ETYPE); \ 5040 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5041 uint32_t vta = vext_vta(desc); \ 5042 uint32_t vma = vext_vma(desc); \ 5043 uint32_t i; \ 5044 \ 5045 for (i = env->vstart; i < vl; i++) { \ 5046 if (!vm && !vext_elem_mask(v0, i)) { \ 5047 /* set masked-off elements to 1s */ \ 5048 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5049 continue; \ 5050 } \ 5051 if (i == 0) { \ 5052 *((ETYPE *)vd + H(i)) = s1; \ 5053 } else { \ 5054 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5055 } \ 5056 } \ 5057 env->vstart = 0; \ 5058 /* set tail elements to 1s */ \ 5059 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5060 } 5061 5062 GEN_VEXT_VSLIE1UP(8, H1) 5063 GEN_VEXT_VSLIE1UP(16, H2) 5064 GEN_VEXT_VSLIE1UP(32, H4) 5065 GEN_VEXT_VSLIE1UP(64, H8) 5066 5067 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5068 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5069 CPURISCVState *env, uint32_t desc) \ 5070 { \ 5071 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5072 } 5073 5074 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5075 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5076 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5077 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5078 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5079 5080 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5081 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5082 void *vs2, CPURISCVState *env, \ 5083 uint32_t desc) \ 5084 { \ 5085 typedef uint##BITWIDTH##_t ETYPE; \ 5086 uint32_t vm = vext_vm(desc); \ 5087 uint32_t vl = env->vl; \ 5088 uint32_t esz = sizeof(ETYPE); \ 5089 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5090 uint32_t vta = vext_vta(desc); \ 5091 uint32_t vma = vext_vma(desc); \ 5092 uint32_t i; \ 5093 \ 5094 for (i = env->vstart; i < vl; i++) { \ 5095 if (!vm && !vext_elem_mask(v0, i)) { \ 5096 /* set masked-off elements to 1s */ \ 5097 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5098 continue; \ 5099 } \ 5100 if (i == vl - 1) { \ 5101 *((ETYPE *)vd + H(i)) = s1; \ 5102 } else { \ 5103 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5104 } \ 5105 } \ 5106 env->vstart = 0; \ 5107 /* set tail elements to 1s */ \ 5108 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5109 } 5110 5111 GEN_VEXT_VSLIDE1DOWN(8, H1) 5112 GEN_VEXT_VSLIDE1DOWN(16, H2) 5113 GEN_VEXT_VSLIDE1DOWN(32, H4) 5114 GEN_VEXT_VSLIDE1DOWN(64, H8) 5115 5116 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5117 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5118 CPURISCVState *env, uint32_t desc) \ 5119 { \ 5120 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5121 } 5122 5123 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5124 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5125 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5126 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5127 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5128 5129 /* Vector Floating-Point Slide Instructions */ 5130 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5131 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5132 CPURISCVState *env, uint32_t desc) \ 5133 { \ 5134 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5135 } 5136 5137 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5138 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5139 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5140 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5141 5142 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5143 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5144 CPURISCVState *env, uint32_t desc) \ 5145 { \ 5146 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5147 } 5148 5149 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5150 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5151 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5152 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5153 5154 /* Vector Register Gather Instruction */ 5155 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5156 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5157 CPURISCVState *env, uint32_t desc) \ 5158 { \ 5159 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5160 uint32_t vm = vext_vm(desc); \ 5161 uint32_t vl = env->vl; \ 5162 uint32_t esz = sizeof(TS2); \ 5163 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5164 uint32_t vta = vext_vta(desc); \ 5165 uint32_t vma = vext_vma(desc); \ 5166 uint64_t index; \ 5167 uint32_t i; \ 5168 \ 5169 for (i = env->vstart; i < vl; i++) { \ 5170 if (!vm && !vext_elem_mask(v0, i)) { \ 5171 /* set masked-off elements to 1s */ \ 5172 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5173 continue; \ 5174 } \ 5175 index = *((TS1 *)vs1 + HS1(i)); \ 5176 if (index >= vlmax) { \ 5177 *((TS2 *)vd + HS2(i)) = 0; \ 5178 } else { \ 5179 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5180 } \ 5181 } \ 5182 env->vstart = 0; \ 5183 /* set tail elements to 1s */ \ 5184 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5185 } 5186 5187 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5188 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5189 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5190 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5191 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5192 5193 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5194 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5195 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5196 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5197 5198 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5199 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5200 CPURISCVState *env, uint32_t desc) \ 5201 { \ 5202 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5203 uint32_t vm = vext_vm(desc); \ 5204 uint32_t vl = env->vl; \ 5205 uint32_t esz = sizeof(ETYPE); \ 5206 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5207 uint32_t vta = vext_vta(desc); \ 5208 uint32_t vma = vext_vma(desc); \ 5209 uint64_t index = s1; \ 5210 uint32_t i; \ 5211 \ 5212 for (i = env->vstart; i < vl; i++) { \ 5213 if (!vm && !vext_elem_mask(v0, i)) { \ 5214 /* set masked-off elements to 1s */ \ 5215 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5216 continue; \ 5217 } \ 5218 if (index >= vlmax) { \ 5219 *((ETYPE *)vd + H(i)) = 0; \ 5220 } else { \ 5221 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5222 } \ 5223 } \ 5224 env->vstart = 0; \ 5225 /* set tail elements to 1s */ \ 5226 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5227 } 5228 5229 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5230 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5231 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5232 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5233 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5234 5235 /* Vector Compress Instruction */ 5236 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5237 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5238 CPURISCVState *env, uint32_t desc) \ 5239 { \ 5240 uint32_t vl = env->vl; \ 5241 uint32_t esz = sizeof(ETYPE); \ 5242 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5243 uint32_t vta = vext_vta(desc); \ 5244 uint32_t num = 0, i; \ 5245 \ 5246 for (i = env->vstart; i < vl; i++) { \ 5247 if (!vext_elem_mask(vs1, i)) { \ 5248 continue; \ 5249 } \ 5250 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5251 num++; \ 5252 } \ 5253 env->vstart = 0; \ 5254 /* set tail elements to 1s */ \ 5255 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5256 } 5257 5258 /* Compress into vd elements of vs2 where vs1 is enabled */ 5259 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5260 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5261 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5262 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5263 5264 /* Vector Whole Register Move */ 5265 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5266 { 5267 /* EEW = SEW */ 5268 uint32_t maxsz = simd_maxsz(desc); 5269 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5270 uint32_t startb = env->vstart * sewb; 5271 uint32_t i = startb; 5272 5273 memcpy((uint8_t *)vd + H1(i), 5274 (uint8_t *)vs2 + H1(i), 5275 maxsz - startb); 5276 5277 env->vstart = 0; 5278 } 5279 5280 /* Vector Integer Extension */ 5281 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5282 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5283 CPURISCVState *env, uint32_t desc) \ 5284 { \ 5285 uint32_t vl = env->vl; \ 5286 uint32_t vm = vext_vm(desc); \ 5287 uint32_t esz = sizeof(ETYPE); \ 5288 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5289 uint32_t vta = vext_vta(desc); \ 5290 uint32_t vma = vext_vma(desc); \ 5291 uint32_t i; \ 5292 \ 5293 for (i = env->vstart; i < vl; i++) { \ 5294 if (!vm && !vext_elem_mask(v0, i)) { \ 5295 /* set masked-off elements to 1s */ \ 5296 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5297 continue; \ 5298 } \ 5299 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5300 } \ 5301 env->vstart = 0; \ 5302 /* set tail elements to 1s */ \ 5303 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5304 } 5305 5306 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5307 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5308 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5309 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5310 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5311 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5312 5313 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5314 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5315 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5316 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5317 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5318 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5319