1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vill = 1; 56 env->vtype = 0; 57 env->vl = 0; 58 env->vstart = 0; 59 return 0; 60 } 61 62 vlmax = vext_get_vlmax(cpu, s2); 63 if (s1 <= vlmax) { 64 vl = s1; 65 } else { 66 vl = vlmax; 67 } 68 env->vl = vl; 69 env->vtype = s2; 70 env->vstart = 0; 71 env->vill = 0; 72 return vl; 73 } 74 75 /* 76 * Note that vector data is stored in host-endian 64-bit chunks, 77 * so addressing units smaller than that needs a host-endian fixup. 78 */ 79 #if HOST_BIG_ENDIAN 80 #define H1(x) ((x) ^ 7) 81 #define H1_2(x) ((x) ^ 6) 82 #define H1_4(x) ((x) ^ 4) 83 #define H2(x) ((x) ^ 3) 84 #define H4(x) ((x) ^ 1) 85 #define H8(x) ((x)) 86 #else 87 #define H1(x) (x) 88 #define H1_2(x) (x) 89 #define H1_4(x) (x) 90 #define H2(x) (x) 91 #define H4(x) (x) 92 #define H8(x) (x) 93 #endif 94 95 static inline uint32_t vext_nf(uint32_t desc) 96 { 97 return FIELD_EX32(simd_data(desc), VDATA, NF); 98 } 99 100 static inline uint32_t vext_vm(uint32_t desc) 101 { 102 return FIELD_EX32(simd_data(desc), VDATA, VM); 103 } 104 105 /* 106 * Encode LMUL to lmul as following: 107 * LMUL vlmul lmul 108 * 1 000 0 109 * 2 001 1 110 * 4 010 2 111 * 8 011 3 112 * - 100 - 113 * 1/8 101 -3 114 * 1/4 110 -2 115 * 1/2 111 -1 116 */ 117 static inline int32_t vext_lmul(uint32_t desc) 118 { 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 120 } 121 122 static inline uint32_t vext_vta(uint32_t desc) 123 { 124 return FIELD_EX32(simd_data(desc), VDATA, VTA); 125 } 126 127 static inline uint32_t vext_vma(uint32_t desc) 128 { 129 return FIELD_EX32(simd_data(desc), VDATA, VMA); 130 } 131 132 static inline uint32_t vext_vta_all_1s(uint32_t desc) 133 { 134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 135 } 136 137 /* 138 * Get the maximum number of elements can be operated. 139 * 140 * log2_esz: log2 of element size in bytes. 141 */ 142 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 143 { 144 /* 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 146 * so vlen in bytes (vlenb) is encoded as maxsz. 147 */ 148 uint32_t vlenb = simd_maxsz(desc); 149 150 /* Return VLMAX */ 151 int scale = vext_lmul(desc) - log2_esz; 152 return scale < 0 ? vlenb >> -scale : vlenb << scale; 153 } 154 155 /* 156 * Get number of total elements, including prestart, body and tail elements. 157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 158 * are held in the same vector register. 159 */ 160 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 161 uint32_t esz) 162 { 163 uint32_t vlenb = simd_maxsz(desc); 164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 166 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 167 return (vlenb << emul) / esz; 168 } 169 170 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 171 { 172 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 173 } 174 175 /* 176 * This function checks watchpoint before real load operation. 177 * 178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 179 * In user mode, there is no watchpoint support now. 180 * 181 * It will trigger an exception if there is no mapping in TLB 182 * and page table walk can't fill the TLB entry. Then the guest 183 * software can return here after process the exception or never return. 184 */ 185 static void probe_pages(CPURISCVState *env, target_ulong addr, 186 target_ulong len, uintptr_t ra, 187 MMUAccessType access_type) 188 { 189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 190 target_ulong curlen = MIN(pagelen, len); 191 192 probe_access(env, adjust_addr(env, addr), curlen, access_type, 193 cpu_mmu_index(env, false), ra); 194 if (len > curlen) { 195 addr += curlen; 196 curlen = len - curlen; 197 probe_access(env, adjust_addr(env, addr), curlen, access_type, 198 cpu_mmu_index(env, false), ra); 199 } 200 } 201 202 /* set agnostic elements to 1s */ 203 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 204 uint32_t tot) 205 { 206 if (is_agnostic == 0) { 207 /* policy undisturbed */ 208 return; 209 } 210 if (tot - cnt == 0) { 211 return; 212 } 213 memset(base + cnt, -1, tot - cnt); 214 } 215 216 static inline void vext_set_elem_mask(void *v0, int index, 217 uint8_t value) 218 { 219 int idx = index / 64; 220 int pos = index % 64; 221 uint64_t old = ((uint64_t *)v0)[idx]; 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 223 } 224 225 /* 226 * Earlier designs (pre-0.9) had a varying number of bits 227 * per mask value (MLEN). In the 0.9 design, MLEN=1. 228 * (Section 4.5) 229 */ 230 static inline int vext_elem_mask(void *v0, int index) 231 { 232 int idx = index / 64; 233 int pos = index % 64; 234 return (((uint64_t *)v0)[idx] >> pos) & 1; 235 } 236 237 /* elements operations for load and store */ 238 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 239 uint32_t idx, void *vd, uintptr_t retaddr); 240 241 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 242 static void NAME(CPURISCVState *env, abi_ptr addr, \ 243 uint32_t idx, void *vd, uintptr_t retaddr)\ 244 { \ 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 247 } \ 248 249 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 250 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 251 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 252 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 253 254 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 255 static void NAME(CPURISCVState *env, abi_ptr addr, \ 256 uint32_t idx, void *vd, uintptr_t retaddr)\ 257 { \ 258 ETYPE data = *((ETYPE *)vd + H(idx)); \ 259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 260 } 261 262 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 263 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 264 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 265 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 266 267 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 268 uint32_t desc, uint32_t nf, 269 uint32_t esz, uint32_t max_elems) 270 { 271 uint32_t vta = vext_vta(desc); 272 int k; 273 274 if (vta == 0) { 275 return; 276 } 277 278 for (k = 0; k < nf; ++k) { 279 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 280 (k * max_elems + max_elems) * esz); 281 } 282 } 283 284 /* 285 * stride: access vector element from strided memory 286 */ 287 static void 288 vext_ldst_stride(void *vd, void *v0, target_ulong base, 289 target_ulong stride, CPURISCVState *env, 290 uint32_t desc, uint32_t vm, 291 vext_ldst_elem_fn *ldst_elem, 292 uint32_t log2_esz, uintptr_t ra) 293 { 294 uint32_t i, k; 295 uint32_t nf = vext_nf(desc); 296 uint32_t max_elems = vext_max_elems(desc, log2_esz); 297 uint32_t esz = 1 << log2_esz; 298 uint32_t vma = vext_vma(desc); 299 300 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 301 k = 0; 302 while (k < nf) { 303 if (!vm && !vext_elem_mask(v0, i)) { 304 /* set masked-off elements to 1s */ 305 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 306 (i + k * max_elems + 1) * esz); 307 k++; 308 continue; 309 } 310 target_ulong addr = base + stride * i + (k << log2_esz); 311 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 312 k++; 313 } 314 } 315 env->vstart = 0; 316 317 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 318 } 319 320 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 321 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 322 target_ulong stride, CPURISCVState *env, \ 323 uint32_t desc) \ 324 { \ 325 uint32_t vm = vext_vm(desc); \ 326 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 327 ctzl(sizeof(ETYPE)), GETPC()); \ 328 } 329 330 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 331 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 332 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 333 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 334 335 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 336 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 337 target_ulong stride, CPURISCVState *env, \ 338 uint32_t desc) \ 339 { \ 340 uint32_t vm = vext_vm(desc); \ 341 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 342 ctzl(sizeof(ETYPE)), GETPC()); \ 343 } 344 345 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 346 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 347 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 348 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 349 350 /* 351 * unit-stride: access elements stored contiguously in memory 352 */ 353 354 /* unmasked unit-stride load and store operation */ 355 static void 356 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 357 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 358 uintptr_t ra) 359 { 360 uint32_t i, k; 361 uint32_t nf = vext_nf(desc); 362 uint32_t max_elems = vext_max_elems(desc, log2_esz); 363 uint32_t esz = 1 << log2_esz; 364 365 /* load bytes from guest memory */ 366 for (i = env->vstart; i < evl; i++, env->vstart++) { 367 k = 0; 368 while (k < nf) { 369 target_ulong addr = base + ((i * nf + k) << log2_esz); 370 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 371 k++; 372 } 373 } 374 env->vstart = 0; 375 376 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 377 } 378 379 /* 380 * masked unit-stride load and store operation will be a special case of 381 * stride, stride = NF * sizeof (ETYPE) 382 */ 383 384 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 385 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 386 CPURISCVState *env, uint32_t desc) \ 387 { \ 388 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 389 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 390 ctzl(sizeof(ETYPE)), GETPC()); \ 391 } \ 392 \ 393 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 394 CPURISCVState *env, uint32_t desc) \ 395 { \ 396 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 397 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 398 } 399 400 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 401 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 402 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 403 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 404 405 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 406 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 407 CPURISCVState *env, uint32_t desc) \ 408 { \ 409 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 410 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 411 ctzl(sizeof(ETYPE)), GETPC()); \ 412 } \ 413 \ 414 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 415 CPURISCVState *env, uint32_t desc) \ 416 { \ 417 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 418 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 419 } 420 421 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 422 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 423 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 424 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 425 426 /* 427 * unit stride mask load and store, EEW = 1 428 */ 429 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 430 CPURISCVState *env, uint32_t desc) 431 { 432 /* evl = ceil(vl/8) */ 433 uint8_t evl = (env->vl + 7) >> 3; 434 vext_ldst_us(vd, base, env, desc, lde_b, 435 0, evl, GETPC()); 436 } 437 438 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 439 CPURISCVState *env, uint32_t desc) 440 { 441 /* evl = ceil(vl/8) */ 442 uint8_t evl = (env->vl + 7) >> 3; 443 vext_ldst_us(vd, base, env, desc, ste_b, 444 0, evl, GETPC()); 445 } 446 447 /* 448 * index: access vector element from indexed memory 449 */ 450 typedef target_ulong vext_get_index_addr(target_ulong base, 451 uint32_t idx, void *vs2); 452 453 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 454 static target_ulong NAME(target_ulong base, \ 455 uint32_t idx, void *vs2) \ 456 { \ 457 return (base + *((ETYPE *)vs2 + H(idx))); \ 458 } 459 460 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 461 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 462 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 463 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 464 465 static inline void 466 vext_ldst_index(void *vd, void *v0, target_ulong base, 467 void *vs2, CPURISCVState *env, uint32_t desc, 468 vext_get_index_addr get_index_addr, 469 vext_ldst_elem_fn *ldst_elem, 470 uint32_t log2_esz, uintptr_t ra) 471 { 472 uint32_t i, k; 473 uint32_t nf = vext_nf(desc); 474 uint32_t vm = vext_vm(desc); 475 uint32_t max_elems = vext_max_elems(desc, log2_esz); 476 uint32_t esz = 1 << log2_esz; 477 uint32_t vma = vext_vma(desc); 478 479 /* load bytes from guest memory */ 480 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 481 k = 0; 482 while (k < nf) { 483 if (!vm && !vext_elem_mask(v0, i)) { 484 /* set masked-off elements to 1s */ 485 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 486 (i + k * max_elems + 1) * esz); 487 k++; 488 continue; 489 } 490 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 491 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 492 k++; 493 } 494 } 495 env->vstart = 0; 496 497 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 498 } 499 500 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 501 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 502 void *vs2, CPURISCVState *env, uint32_t desc) \ 503 { \ 504 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 505 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 506 } 507 508 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 509 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 510 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 511 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 512 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 513 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 514 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 515 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 516 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 517 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 518 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 519 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 520 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 521 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 522 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 523 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 524 525 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 526 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 527 void *vs2, CPURISCVState *env, uint32_t desc) \ 528 { \ 529 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 530 STORE_FN, ctzl(sizeof(ETYPE)), \ 531 GETPC()); \ 532 } 533 534 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 535 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 536 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 537 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 538 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 539 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 540 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 541 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 542 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 543 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 544 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 545 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 546 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 547 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 548 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 549 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 550 551 /* 552 * unit-stride fault-only-fisrt load instructions 553 */ 554 static inline void 555 vext_ldff(void *vd, void *v0, target_ulong base, 556 CPURISCVState *env, uint32_t desc, 557 vext_ldst_elem_fn *ldst_elem, 558 uint32_t log2_esz, uintptr_t ra) 559 { 560 void *host; 561 uint32_t i, k, vl = 0; 562 uint32_t nf = vext_nf(desc); 563 uint32_t vm = vext_vm(desc); 564 uint32_t max_elems = vext_max_elems(desc, log2_esz); 565 uint32_t esz = 1 << log2_esz; 566 uint32_t vma = vext_vma(desc); 567 target_ulong addr, offset, remain; 568 569 /* probe every access */ 570 for (i = env->vstart; i < env->vl; i++) { 571 if (!vm && !vext_elem_mask(v0, i)) { 572 continue; 573 } 574 addr = adjust_addr(env, base + i * (nf << log2_esz)); 575 if (i == 0) { 576 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 577 } else { 578 /* if it triggers an exception, no need to check watchpoint */ 579 remain = nf << log2_esz; 580 while (remain > 0) { 581 offset = -(addr | TARGET_PAGE_MASK); 582 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 583 cpu_mmu_index(env, false)); 584 if (host) { 585 #ifdef CONFIG_USER_ONLY 586 if (page_check_range(addr, offset, PAGE_READ) < 0) { 587 vl = i; 588 goto ProbeSuccess; 589 } 590 #else 591 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 592 #endif 593 } else { 594 vl = i; 595 goto ProbeSuccess; 596 } 597 if (remain <= offset) { 598 break; 599 } 600 remain -= offset; 601 addr = adjust_addr(env, addr + offset); 602 } 603 } 604 } 605 ProbeSuccess: 606 /* load bytes from guest memory */ 607 if (vl != 0) { 608 env->vl = vl; 609 } 610 for (i = env->vstart; i < env->vl; i++) { 611 k = 0; 612 while (k < nf) { 613 if (!vm && !vext_elem_mask(v0, i)) { 614 /* set masked-off elements to 1s */ 615 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 616 (i + k * max_elems + 1) * esz); 617 k++; 618 continue; 619 } 620 target_ulong addr = base + ((i * nf + k) << log2_esz); 621 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 622 k++; 623 } 624 } 625 env->vstart = 0; 626 627 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 628 } 629 630 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 631 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 632 CPURISCVState *env, uint32_t desc) \ 633 { \ 634 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 635 ctzl(sizeof(ETYPE)), GETPC()); \ 636 } 637 638 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 639 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 640 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 641 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 642 643 #define DO_SWAP(N, M) (M) 644 #define DO_AND(N, M) (N & M) 645 #define DO_XOR(N, M) (N ^ M) 646 #define DO_OR(N, M) (N | M) 647 #define DO_ADD(N, M) (N + M) 648 649 /* Signed min/max */ 650 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 651 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 652 653 /* 654 * load and store whole register instructions 655 */ 656 static void 657 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 658 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 659 { 660 uint32_t i, k, off, pos; 661 uint32_t nf = vext_nf(desc); 662 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 663 uint32_t max_elems = vlenb >> log2_esz; 664 665 k = env->vstart / max_elems; 666 off = env->vstart % max_elems; 667 668 if (off) { 669 /* load/store rest of elements of current segment pointed by vstart */ 670 for (pos = off; pos < max_elems; pos++, env->vstart++) { 671 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 672 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 673 ra); 674 } 675 k++; 676 } 677 678 /* load/store elements for rest of segments */ 679 for (; k < nf; k++) { 680 for (i = 0; i < max_elems; i++, env->vstart++) { 681 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 682 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 683 } 684 } 685 686 env->vstart = 0; 687 } 688 689 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 690 void HELPER(NAME)(void *vd, target_ulong base, \ 691 CPURISCVState *env, uint32_t desc) \ 692 { \ 693 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 694 ctzl(sizeof(ETYPE)), GETPC()); \ 695 } 696 697 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 698 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 699 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 700 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 701 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 702 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 703 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 704 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 705 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 706 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 707 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 708 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 709 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 710 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 711 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 712 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 713 714 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 715 void HELPER(NAME)(void *vd, target_ulong base, \ 716 CPURISCVState *env, uint32_t desc) \ 717 { \ 718 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 719 ctzl(sizeof(ETYPE)), GETPC()); \ 720 } 721 722 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 723 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 724 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 725 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 726 727 /* 728 * Vector Integer Arithmetic Instructions 729 */ 730 731 /* expand macro args before macro */ 732 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 733 734 /* (TD, T1, T2, TX1, TX2) */ 735 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 736 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 737 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 738 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 739 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 740 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 741 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 742 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 743 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 744 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 745 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 746 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 747 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 748 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 749 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 750 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 751 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 752 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 753 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 754 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 755 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 756 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 757 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 758 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 759 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 760 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 761 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 762 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 763 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 764 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 765 766 /* operation of two vector elements */ 767 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 768 769 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 770 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 771 { \ 772 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 773 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 774 *((TD *)vd + HD(i)) = OP(s2, s1); \ 775 } 776 #define DO_SUB(N, M) (N - M) 777 #define DO_RSUB(N, M) (M - N) 778 779 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 780 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 781 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 782 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 783 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 784 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 785 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 786 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 787 788 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 789 CPURISCVState *env, uint32_t desc, 790 opivv2_fn *fn, uint32_t esz) 791 { 792 uint32_t vm = vext_vm(desc); 793 uint32_t vl = env->vl; 794 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 795 uint32_t vta = vext_vta(desc); 796 uint32_t vma = vext_vma(desc); 797 uint32_t i; 798 799 for (i = env->vstart; i < vl; i++) { 800 if (!vm && !vext_elem_mask(v0, i)) { 801 /* set masked-off elements to 1s */ 802 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 803 continue; 804 } 805 fn(vd, vs1, vs2, i); 806 } 807 env->vstart = 0; 808 /* set tail elements to 1s */ 809 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 810 } 811 812 /* generate the helpers for OPIVV */ 813 #define GEN_VEXT_VV(NAME, ESZ) \ 814 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 815 void *vs2, CPURISCVState *env, \ 816 uint32_t desc) \ 817 { \ 818 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 819 do_##NAME, ESZ); \ 820 } 821 822 GEN_VEXT_VV(vadd_vv_b, 1) 823 GEN_VEXT_VV(vadd_vv_h, 2) 824 GEN_VEXT_VV(vadd_vv_w, 4) 825 GEN_VEXT_VV(vadd_vv_d, 8) 826 GEN_VEXT_VV(vsub_vv_b, 1) 827 GEN_VEXT_VV(vsub_vv_h, 2) 828 GEN_VEXT_VV(vsub_vv_w, 4) 829 GEN_VEXT_VV(vsub_vv_d, 8) 830 831 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 832 833 /* 834 * (T1)s1 gives the real operator type. 835 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 836 */ 837 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 838 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 839 { \ 840 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 841 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 842 } 843 844 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 845 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 846 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 847 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 848 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 849 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 850 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 851 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 852 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 853 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 854 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 855 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 856 857 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 858 CPURISCVState *env, uint32_t desc, 859 opivx2_fn fn, uint32_t esz) 860 { 861 uint32_t vm = vext_vm(desc); 862 uint32_t vl = env->vl; 863 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 864 uint32_t vta = vext_vta(desc); 865 uint32_t vma = vext_vma(desc); 866 uint32_t i; 867 868 for (i = env->vstart; i < vl; i++) { 869 if (!vm && !vext_elem_mask(v0, i)) { 870 /* set masked-off elements to 1s */ 871 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 872 continue; 873 } 874 fn(vd, s1, vs2, i); 875 } 876 env->vstart = 0; 877 /* set tail elements to 1s */ 878 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 879 } 880 881 /* generate the helpers for OPIVX */ 882 #define GEN_VEXT_VX(NAME, ESZ) \ 883 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 884 void *vs2, CPURISCVState *env, \ 885 uint32_t desc) \ 886 { \ 887 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 888 do_##NAME, ESZ); \ 889 } 890 891 GEN_VEXT_VX(vadd_vx_b, 1) 892 GEN_VEXT_VX(vadd_vx_h, 2) 893 GEN_VEXT_VX(vadd_vx_w, 4) 894 GEN_VEXT_VX(vadd_vx_d, 8) 895 GEN_VEXT_VX(vsub_vx_b, 1) 896 GEN_VEXT_VX(vsub_vx_h, 2) 897 GEN_VEXT_VX(vsub_vx_w, 4) 898 GEN_VEXT_VX(vsub_vx_d, 8) 899 GEN_VEXT_VX(vrsub_vx_b, 1) 900 GEN_VEXT_VX(vrsub_vx_h, 2) 901 GEN_VEXT_VX(vrsub_vx_w, 4) 902 GEN_VEXT_VX(vrsub_vx_d, 8) 903 904 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 905 { 906 intptr_t oprsz = simd_oprsz(desc); 907 intptr_t i; 908 909 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 910 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 911 } 912 } 913 914 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 915 { 916 intptr_t oprsz = simd_oprsz(desc); 917 intptr_t i; 918 919 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 920 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 921 } 922 } 923 924 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 925 { 926 intptr_t oprsz = simd_oprsz(desc); 927 intptr_t i; 928 929 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 930 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 931 } 932 } 933 934 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 935 { 936 intptr_t oprsz = simd_oprsz(desc); 937 intptr_t i; 938 939 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 940 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 941 } 942 } 943 944 /* Vector Widening Integer Add/Subtract */ 945 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 946 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 947 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 948 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 949 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 950 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 951 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 952 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 953 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 954 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 955 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 956 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 957 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 958 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 959 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 960 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 961 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 962 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 963 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 964 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 965 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 966 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 967 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 968 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 969 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 970 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 971 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 972 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 973 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 974 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 975 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 976 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 977 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 978 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 979 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 980 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 981 GEN_VEXT_VV(vwaddu_vv_b, 2) 982 GEN_VEXT_VV(vwaddu_vv_h, 4) 983 GEN_VEXT_VV(vwaddu_vv_w, 8) 984 GEN_VEXT_VV(vwsubu_vv_b, 2) 985 GEN_VEXT_VV(vwsubu_vv_h, 4) 986 GEN_VEXT_VV(vwsubu_vv_w, 8) 987 GEN_VEXT_VV(vwadd_vv_b, 2) 988 GEN_VEXT_VV(vwadd_vv_h, 4) 989 GEN_VEXT_VV(vwadd_vv_w, 8) 990 GEN_VEXT_VV(vwsub_vv_b, 2) 991 GEN_VEXT_VV(vwsub_vv_h, 4) 992 GEN_VEXT_VV(vwsub_vv_w, 8) 993 GEN_VEXT_VV(vwaddu_wv_b, 2) 994 GEN_VEXT_VV(vwaddu_wv_h, 4) 995 GEN_VEXT_VV(vwaddu_wv_w, 8) 996 GEN_VEXT_VV(vwsubu_wv_b, 2) 997 GEN_VEXT_VV(vwsubu_wv_h, 4) 998 GEN_VEXT_VV(vwsubu_wv_w, 8) 999 GEN_VEXT_VV(vwadd_wv_b, 2) 1000 GEN_VEXT_VV(vwadd_wv_h, 4) 1001 GEN_VEXT_VV(vwadd_wv_w, 8) 1002 GEN_VEXT_VV(vwsub_wv_b, 2) 1003 GEN_VEXT_VV(vwsub_wv_h, 4) 1004 GEN_VEXT_VV(vwsub_wv_w, 8) 1005 1006 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1007 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1008 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1009 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1010 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1011 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1012 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1013 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1014 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1015 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1016 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1017 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1018 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1019 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1020 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1021 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1022 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1023 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1024 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1025 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1026 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1027 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1028 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1029 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1030 GEN_VEXT_VX(vwaddu_vx_b, 2) 1031 GEN_VEXT_VX(vwaddu_vx_h, 4) 1032 GEN_VEXT_VX(vwaddu_vx_w, 8) 1033 GEN_VEXT_VX(vwsubu_vx_b, 2) 1034 GEN_VEXT_VX(vwsubu_vx_h, 4) 1035 GEN_VEXT_VX(vwsubu_vx_w, 8) 1036 GEN_VEXT_VX(vwadd_vx_b, 2) 1037 GEN_VEXT_VX(vwadd_vx_h, 4) 1038 GEN_VEXT_VX(vwadd_vx_w, 8) 1039 GEN_VEXT_VX(vwsub_vx_b, 2) 1040 GEN_VEXT_VX(vwsub_vx_h, 4) 1041 GEN_VEXT_VX(vwsub_vx_w, 8) 1042 GEN_VEXT_VX(vwaddu_wx_b, 2) 1043 GEN_VEXT_VX(vwaddu_wx_h, 4) 1044 GEN_VEXT_VX(vwaddu_wx_w, 8) 1045 GEN_VEXT_VX(vwsubu_wx_b, 2) 1046 GEN_VEXT_VX(vwsubu_wx_h, 4) 1047 GEN_VEXT_VX(vwsubu_wx_w, 8) 1048 GEN_VEXT_VX(vwadd_wx_b, 2) 1049 GEN_VEXT_VX(vwadd_wx_h, 4) 1050 GEN_VEXT_VX(vwadd_wx_w, 8) 1051 GEN_VEXT_VX(vwsub_wx_b, 2) 1052 GEN_VEXT_VX(vwsub_wx_h, 4) 1053 GEN_VEXT_VX(vwsub_wx_w, 8) 1054 1055 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1056 #define DO_VADC(N, M, C) (N + M + C) 1057 #define DO_VSBC(N, M, C) (N - M - C) 1058 1059 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1060 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1061 CPURISCVState *env, uint32_t desc) \ 1062 { \ 1063 uint32_t vl = env->vl; \ 1064 uint32_t esz = sizeof(ETYPE); \ 1065 uint32_t total_elems = \ 1066 vext_get_total_elems(env, desc, esz); \ 1067 uint32_t vta = vext_vta(desc); \ 1068 uint32_t i; \ 1069 \ 1070 for (i = env->vstart; i < vl; i++) { \ 1071 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1072 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1073 ETYPE carry = vext_elem_mask(v0, i); \ 1074 \ 1075 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1076 } \ 1077 env->vstart = 0; \ 1078 /* set tail elements to 1s */ \ 1079 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1080 } 1081 1082 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1083 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1084 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1085 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1086 1087 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1088 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1089 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1090 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1091 1092 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1093 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1094 CPURISCVState *env, uint32_t desc) \ 1095 { \ 1096 uint32_t vl = env->vl; \ 1097 uint32_t esz = sizeof(ETYPE); \ 1098 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1099 uint32_t vta = vext_vta(desc); \ 1100 uint32_t i; \ 1101 \ 1102 for (i = env->vstart; i < vl; i++) { \ 1103 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1104 ETYPE carry = vext_elem_mask(v0, i); \ 1105 \ 1106 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1107 } \ 1108 env->vstart = 0; \ 1109 /* set tail elements to 1s */ \ 1110 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1111 } 1112 1113 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1114 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1115 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1116 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1117 1118 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1119 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1120 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1121 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1122 1123 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1124 (__typeof(N))(N + M) < N) 1125 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1126 1127 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1128 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1129 CPURISCVState *env, uint32_t desc) \ 1130 { \ 1131 uint32_t vl = env->vl; \ 1132 uint32_t vm = vext_vm(desc); \ 1133 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1134 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1135 uint32_t i; \ 1136 \ 1137 for (i = env->vstart; i < vl; i++) { \ 1138 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1139 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1140 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1141 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1142 } \ 1143 env->vstart = 0; \ 1144 /* 1145 * mask destination register are always tail-agnostic 1146 * set tail elements to 1s 1147 */ \ 1148 if (vta_all_1s) { \ 1149 for (; i < total_elems; i++) { \ 1150 vext_set_elem_mask(vd, i, 1); \ 1151 } \ 1152 } \ 1153 } 1154 1155 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1156 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1157 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1158 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1159 1160 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1161 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1162 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1163 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1164 1165 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1166 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1167 void *vs2, CPURISCVState *env, uint32_t desc) \ 1168 { \ 1169 uint32_t vl = env->vl; \ 1170 uint32_t vm = vext_vm(desc); \ 1171 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1172 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1173 uint32_t i; \ 1174 \ 1175 for (i = env->vstart; i < vl; i++) { \ 1176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1177 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1178 vext_set_elem_mask(vd, i, \ 1179 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1180 } \ 1181 env->vstart = 0; \ 1182 /* 1183 * mask destination register are always tail-agnostic 1184 * set tail elements to 1s 1185 */ \ 1186 if (vta_all_1s) { \ 1187 for (; i < total_elems; i++) { \ 1188 vext_set_elem_mask(vd, i, 1); \ 1189 } \ 1190 } \ 1191 } 1192 1193 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1194 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1195 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1196 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1197 1198 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1199 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1200 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1201 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1202 1203 /* Vector Bitwise Logical Instructions */ 1204 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1205 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1206 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1207 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1208 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1209 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1210 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1211 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1212 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1213 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1214 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1215 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1216 GEN_VEXT_VV(vand_vv_b, 1) 1217 GEN_VEXT_VV(vand_vv_h, 2) 1218 GEN_VEXT_VV(vand_vv_w, 4) 1219 GEN_VEXT_VV(vand_vv_d, 8) 1220 GEN_VEXT_VV(vor_vv_b, 1) 1221 GEN_VEXT_VV(vor_vv_h, 2) 1222 GEN_VEXT_VV(vor_vv_w, 4) 1223 GEN_VEXT_VV(vor_vv_d, 8) 1224 GEN_VEXT_VV(vxor_vv_b, 1) 1225 GEN_VEXT_VV(vxor_vv_h, 2) 1226 GEN_VEXT_VV(vxor_vv_w, 4) 1227 GEN_VEXT_VV(vxor_vv_d, 8) 1228 1229 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1230 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1231 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1232 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1233 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1234 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1235 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1236 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1237 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1238 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1239 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1240 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1241 GEN_VEXT_VX(vand_vx_b, 1) 1242 GEN_VEXT_VX(vand_vx_h, 2) 1243 GEN_VEXT_VX(vand_vx_w, 4) 1244 GEN_VEXT_VX(vand_vx_d, 8) 1245 GEN_VEXT_VX(vor_vx_b, 1) 1246 GEN_VEXT_VX(vor_vx_h, 2) 1247 GEN_VEXT_VX(vor_vx_w, 4) 1248 GEN_VEXT_VX(vor_vx_d, 8) 1249 GEN_VEXT_VX(vxor_vx_b, 1) 1250 GEN_VEXT_VX(vxor_vx_h, 2) 1251 GEN_VEXT_VX(vxor_vx_w, 4) 1252 GEN_VEXT_VX(vxor_vx_d, 8) 1253 1254 /* Vector Single-Width Bit Shift Instructions */ 1255 #define DO_SLL(N, M) (N << (M)) 1256 #define DO_SRL(N, M) (N >> (M)) 1257 1258 /* generate the helpers for shift instructions with two vector operators */ 1259 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1260 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1261 void *vs2, CPURISCVState *env, uint32_t desc) \ 1262 { \ 1263 uint32_t vm = vext_vm(desc); \ 1264 uint32_t vl = env->vl; \ 1265 uint32_t esz = sizeof(TS1); \ 1266 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1267 uint32_t vta = vext_vta(desc); \ 1268 uint32_t vma = vext_vma(desc); \ 1269 uint32_t i; \ 1270 \ 1271 for (i = env->vstart; i < vl; i++) { \ 1272 if (!vm && !vext_elem_mask(v0, i)) { \ 1273 /* set masked-off elements to 1s */ \ 1274 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1275 continue; \ 1276 } \ 1277 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1278 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1279 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1280 } \ 1281 env->vstart = 0; \ 1282 /* set tail elements to 1s */ \ 1283 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1284 } 1285 1286 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1287 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1288 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1289 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1290 1291 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1292 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1293 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1294 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1295 1296 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1297 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1298 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1299 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1300 1301 /* 1302 * generate the helpers for shift instructions with one vector and one scalar 1303 */ 1304 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1305 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1306 void *vs2, CPURISCVState *env, \ 1307 uint32_t desc) \ 1308 { \ 1309 uint32_t vm = vext_vm(desc); \ 1310 uint32_t vl = env->vl; \ 1311 uint32_t esz = sizeof(TD); \ 1312 uint32_t total_elems = \ 1313 vext_get_total_elems(env, desc, esz); \ 1314 uint32_t vta = vext_vta(desc); \ 1315 uint32_t vma = vext_vma(desc); \ 1316 uint32_t i; \ 1317 \ 1318 for (i = env->vstart; i < vl; i++) { \ 1319 if (!vm && !vext_elem_mask(v0, i)) { \ 1320 /* set masked-off elements to 1s */ \ 1321 vext_set_elems_1s(vd, vma, i * esz, \ 1322 (i + 1) * esz); \ 1323 continue; \ 1324 } \ 1325 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1326 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1327 } \ 1328 env->vstart = 0; \ 1329 /* set tail elements to 1s */ \ 1330 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1331 } 1332 1333 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1334 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1335 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1336 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1337 1338 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1339 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1340 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1341 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1342 1343 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1344 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1345 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1346 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1347 1348 /* Vector Narrowing Integer Right Shift Instructions */ 1349 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1350 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1351 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1352 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1353 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1354 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1355 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1356 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1357 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1358 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1359 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1360 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1361 1362 /* Vector Integer Comparison Instructions */ 1363 #define DO_MSEQ(N, M) (N == M) 1364 #define DO_MSNE(N, M) (N != M) 1365 #define DO_MSLT(N, M) (N < M) 1366 #define DO_MSLE(N, M) (N <= M) 1367 #define DO_MSGT(N, M) (N > M) 1368 1369 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1370 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1371 CPURISCVState *env, uint32_t desc) \ 1372 { \ 1373 uint32_t vm = vext_vm(desc); \ 1374 uint32_t vl = env->vl; \ 1375 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1376 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1377 uint32_t vma = vext_vma(desc); \ 1378 uint32_t i; \ 1379 \ 1380 for (i = env->vstart; i < vl; i++) { \ 1381 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1382 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1383 if (!vm && !vext_elem_mask(v0, i)) { \ 1384 /* set masked-off elements to 1s */ \ 1385 if (vma) { \ 1386 vext_set_elem_mask(vd, i, 1); \ 1387 } \ 1388 continue; \ 1389 } \ 1390 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1391 } \ 1392 env->vstart = 0; \ 1393 /* 1394 * mask destination register are always tail-agnostic 1395 * set tail elements to 1s 1396 */ \ 1397 if (vta_all_1s) { \ 1398 for (; i < total_elems; i++) { \ 1399 vext_set_elem_mask(vd, i, 1); \ 1400 } \ 1401 } \ 1402 } 1403 1404 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1405 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1406 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1407 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1408 1409 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1410 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1411 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1412 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1413 1414 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1415 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1416 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1417 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1418 1419 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1420 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1421 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1422 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1423 1424 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1425 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1426 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1427 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1428 1429 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1430 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1431 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1432 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1433 1434 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1435 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1436 CPURISCVState *env, uint32_t desc) \ 1437 { \ 1438 uint32_t vm = vext_vm(desc); \ 1439 uint32_t vl = env->vl; \ 1440 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1441 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1442 uint32_t vma = vext_vma(desc); \ 1443 uint32_t i; \ 1444 \ 1445 for (i = env->vstart; i < vl; i++) { \ 1446 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1447 if (!vm && !vext_elem_mask(v0, i)) { \ 1448 /* set masked-off elements to 1s */ \ 1449 if (vma) { \ 1450 vext_set_elem_mask(vd, i, 1); \ 1451 } \ 1452 continue; \ 1453 } \ 1454 vext_set_elem_mask(vd, i, \ 1455 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1456 } \ 1457 env->vstart = 0; \ 1458 /* 1459 * mask destination register are always tail-agnostic 1460 * set tail elements to 1s 1461 */ \ 1462 if (vta_all_1s) { \ 1463 for (; i < total_elems; i++) { \ 1464 vext_set_elem_mask(vd, i, 1); \ 1465 } \ 1466 } \ 1467 } 1468 1469 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1470 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1471 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1472 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1473 1474 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1475 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1476 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1477 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1478 1479 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1480 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1481 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1482 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1483 1484 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1485 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1486 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1487 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1488 1489 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1490 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1491 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1492 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1493 1494 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1495 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1496 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1497 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1498 1499 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1500 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1501 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1502 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1503 1504 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1505 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1506 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1507 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1508 1509 /* Vector Integer Min/Max Instructions */ 1510 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1511 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1512 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1513 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1514 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1515 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1516 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1517 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1518 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1519 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1520 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1521 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1522 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1523 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1524 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1525 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1526 GEN_VEXT_VV(vminu_vv_b, 1) 1527 GEN_VEXT_VV(vminu_vv_h, 2) 1528 GEN_VEXT_VV(vminu_vv_w, 4) 1529 GEN_VEXT_VV(vminu_vv_d, 8) 1530 GEN_VEXT_VV(vmin_vv_b, 1) 1531 GEN_VEXT_VV(vmin_vv_h, 2) 1532 GEN_VEXT_VV(vmin_vv_w, 4) 1533 GEN_VEXT_VV(vmin_vv_d, 8) 1534 GEN_VEXT_VV(vmaxu_vv_b, 1) 1535 GEN_VEXT_VV(vmaxu_vv_h, 2) 1536 GEN_VEXT_VV(vmaxu_vv_w, 4) 1537 GEN_VEXT_VV(vmaxu_vv_d, 8) 1538 GEN_VEXT_VV(vmax_vv_b, 1) 1539 GEN_VEXT_VV(vmax_vv_h, 2) 1540 GEN_VEXT_VV(vmax_vv_w, 4) 1541 GEN_VEXT_VV(vmax_vv_d, 8) 1542 1543 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1544 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1545 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1546 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1547 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1548 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1549 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1550 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1551 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1552 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1553 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1554 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1555 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1556 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1557 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1558 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1559 GEN_VEXT_VX(vminu_vx_b, 1) 1560 GEN_VEXT_VX(vminu_vx_h, 2) 1561 GEN_VEXT_VX(vminu_vx_w, 4) 1562 GEN_VEXT_VX(vminu_vx_d, 8) 1563 GEN_VEXT_VX(vmin_vx_b, 1) 1564 GEN_VEXT_VX(vmin_vx_h, 2) 1565 GEN_VEXT_VX(vmin_vx_w, 4) 1566 GEN_VEXT_VX(vmin_vx_d, 8) 1567 GEN_VEXT_VX(vmaxu_vx_b, 1) 1568 GEN_VEXT_VX(vmaxu_vx_h, 2) 1569 GEN_VEXT_VX(vmaxu_vx_w, 4) 1570 GEN_VEXT_VX(vmaxu_vx_d, 8) 1571 GEN_VEXT_VX(vmax_vx_b, 1) 1572 GEN_VEXT_VX(vmax_vx_h, 2) 1573 GEN_VEXT_VX(vmax_vx_w, 4) 1574 GEN_VEXT_VX(vmax_vx_d, 8) 1575 1576 /* Vector Single-Width Integer Multiply Instructions */ 1577 #define DO_MUL(N, M) (N * M) 1578 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1579 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1580 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1581 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1582 GEN_VEXT_VV(vmul_vv_b, 1) 1583 GEN_VEXT_VV(vmul_vv_h, 2) 1584 GEN_VEXT_VV(vmul_vv_w, 4) 1585 GEN_VEXT_VV(vmul_vv_d, 8) 1586 1587 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1588 { 1589 return (int16_t)s2 * (int16_t)s1 >> 8; 1590 } 1591 1592 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1593 { 1594 return (int32_t)s2 * (int32_t)s1 >> 16; 1595 } 1596 1597 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1598 { 1599 return (int64_t)s2 * (int64_t)s1 >> 32; 1600 } 1601 1602 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1603 { 1604 uint64_t hi_64, lo_64; 1605 1606 muls64(&lo_64, &hi_64, s1, s2); 1607 return hi_64; 1608 } 1609 1610 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1611 { 1612 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1613 } 1614 1615 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1616 { 1617 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1618 } 1619 1620 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1621 { 1622 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1623 } 1624 1625 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1626 { 1627 uint64_t hi_64, lo_64; 1628 1629 mulu64(&lo_64, &hi_64, s2, s1); 1630 return hi_64; 1631 } 1632 1633 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1634 { 1635 return (int16_t)s2 * (uint16_t)s1 >> 8; 1636 } 1637 1638 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1639 { 1640 return (int32_t)s2 * (uint32_t)s1 >> 16; 1641 } 1642 1643 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1644 { 1645 return (int64_t)s2 * (uint64_t)s1 >> 32; 1646 } 1647 1648 /* 1649 * Let A = signed operand, 1650 * B = unsigned operand 1651 * P = mulu64(A, B), unsigned product 1652 * 1653 * LET X = 2 ** 64 - A, 2's complement of A 1654 * SP = signed product 1655 * THEN 1656 * IF A < 0 1657 * SP = -X * B 1658 * = -(2 ** 64 - A) * B 1659 * = A * B - 2 ** 64 * B 1660 * = P - 2 ** 64 * B 1661 * ELSE 1662 * SP = P 1663 * THEN 1664 * HI_P -= (A < 0 ? B : 0) 1665 */ 1666 1667 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1668 { 1669 uint64_t hi_64, lo_64; 1670 1671 mulu64(&lo_64, &hi_64, s2, s1); 1672 1673 hi_64 -= s2 < 0 ? s1 : 0; 1674 return hi_64; 1675 } 1676 1677 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1678 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1679 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1680 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1681 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1682 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1683 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1684 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1685 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1686 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1687 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1688 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1689 GEN_VEXT_VV(vmulh_vv_b, 1) 1690 GEN_VEXT_VV(vmulh_vv_h, 2) 1691 GEN_VEXT_VV(vmulh_vv_w, 4) 1692 GEN_VEXT_VV(vmulh_vv_d, 8) 1693 GEN_VEXT_VV(vmulhu_vv_b, 1) 1694 GEN_VEXT_VV(vmulhu_vv_h, 2) 1695 GEN_VEXT_VV(vmulhu_vv_w, 4) 1696 GEN_VEXT_VV(vmulhu_vv_d, 8) 1697 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1698 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1699 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1700 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1701 1702 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1703 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1704 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1705 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1706 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1707 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1708 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1709 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1710 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1711 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1712 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1713 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1714 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1715 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1716 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1717 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1718 GEN_VEXT_VX(vmul_vx_b, 1) 1719 GEN_VEXT_VX(vmul_vx_h, 2) 1720 GEN_VEXT_VX(vmul_vx_w, 4) 1721 GEN_VEXT_VX(vmul_vx_d, 8) 1722 GEN_VEXT_VX(vmulh_vx_b, 1) 1723 GEN_VEXT_VX(vmulh_vx_h, 2) 1724 GEN_VEXT_VX(vmulh_vx_w, 4) 1725 GEN_VEXT_VX(vmulh_vx_d, 8) 1726 GEN_VEXT_VX(vmulhu_vx_b, 1) 1727 GEN_VEXT_VX(vmulhu_vx_h, 2) 1728 GEN_VEXT_VX(vmulhu_vx_w, 4) 1729 GEN_VEXT_VX(vmulhu_vx_d, 8) 1730 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1731 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1732 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1733 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1734 1735 /* Vector Integer Divide Instructions */ 1736 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1737 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1738 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1739 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1740 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1741 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1742 1743 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1744 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1745 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1746 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1747 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1748 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1749 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1750 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1751 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1752 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1753 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1754 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1755 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1756 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1757 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1758 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1759 GEN_VEXT_VV(vdivu_vv_b, 1) 1760 GEN_VEXT_VV(vdivu_vv_h, 2) 1761 GEN_VEXT_VV(vdivu_vv_w, 4) 1762 GEN_VEXT_VV(vdivu_vv_d, 8) 1763 GEN_VEXT_VV(vdiv_vv_b, 1) 1764 GEN_VEXT_VV(vdiv_vv_h, 2) 1765 GEN_VEXT_VV(vdiv_vv_w, 4) 1766 GEN_VEXT_VV(vdiv_vv_d, 8) 1767 GEN_VEXT_VV(vremu_vv_b, 1) 1768 GEN_VEXT_VV(vremu_vv_h, 2) 1769 GEN_VEXT_VV(vremu_vv_w, 4) 1770 GEN_VEXT_VV(vremu_vv_d, 8) 1771 GEN_VEXT_VV(vrem_vv_b, 1) 1772 GEN_VEXT_VV(vrem_vv_h, 2) 1773 GEN_VEXT_VV(vrem_vv_w, 4) 1774 GEN_VEXT_VV(vrem_vv_d, 8) 1775 1776 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1777 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1778 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1779 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1780 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1781 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1782 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1783 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1784 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1785 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1786 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1787 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1788 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1789 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1790 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1791 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1792 GEN_VEXT_VX(vdivu_vx_b, 1) 1793 GEN_VEXT_VX(vdivu_vx_h, 2) 1794 GEN_VEXT_VX(vdivu_vx_w, 4) 1795 GEN_VEXT_VX(vdivu_vx_d, 8) 1796 GEN_VEXT_VX(vdiv_vx_b, 1) 1797 GEN_VEXT_VX(vdiv_vx_h, 2) 1798 GEN_VEXT_VX(vdiv_vx_w, 4) 1799 GEN_VEXT_VX(vdiv_vx_d, 8) 1800 GEN_VEXT_VX(vremu_vx_b, 1) 1801 GEN_VEXT_VX(vremu_vx_h, 2) 1802 GEN_VEXT_VX(vremu_vx_w, 4) 1803 GEN_VEXT_VX(vremu_vx_d, 8) 1804 GEN_VEXT_VX(vrem_vx_b, 1) 1805 GEN_VEXT_VX(vrem_vx_h, 2) 1806 GEN_VEXT_VX(vrem_vx_w, 4) 1807 GEN_VEXT_VX(vrem_vx_d, 8) 1808 1809 /* Vector Widening Integer Multiply Instructions */ 1810 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1811 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1812 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1813 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1814 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1815 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1816 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1817 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1818 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1819 GEN_VEXT_VV(vwmul_vv_b, 2) 1820 GEN_VEXT_VV(vwmul_vv_h, 4) 1821 GEN_VEXT_VV(vwmul_vv_w, 8) 1822 GEN_VEXT_VV(vwmulu_vv_b, 2) 1823 GEN_VEXT_VV(vwmulu_vv_h, 4) 1824 GEN_VEXT_VV(vwmulu_vv_w, 8) 1825 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1826 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1827 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1828 1829 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1830 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1831 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1832 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1833 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1834 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1835 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1836 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1837 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1838 GEN_VEXT_VX(vwmul_vx_b, 2) 1839 GEN_VEXT_VX(vwmul_vx_h, 4) 1840 GEN_VEXT_VX(vwmul_vx_w, 8) 1841 GEN_VEXT_VX(vwmulu_vx_b, 2) 1842 GEN_VEXT_VX(vwmulu_vx_h, 4) 1843 GEN_VEXT_VX(vwmulu_vx_w, 8) 1844 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1845 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1846 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1847 1848 /* Vector Single-Width Integer Multiply-Add Instructions */ 1849 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1850 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1851 { \ 1852 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1853 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1854 TD d = *((TD *)vd + HD(i)); \ 1855 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1856 } 1857 1858 #define DO_MACC(N, M, D) (M * N + D) 1859 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1860 #define DO_MADD(N, M, D) (M * D + N) 1861 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1862 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1863 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1864 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1865 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1866 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1867 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1868 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1869 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1870 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1871 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1872 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1873 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1874 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1875 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1876 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1877 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1878 GEN_VEXT_VV(vmacc_vv_b, 1) 1879 GEN_VEXT_VV(vmacc_vv_h, 2) 1880 GEN_VEXT_VV(vmacc_vv_w, 4) 1881 GEN_VEXT_VV(vmacc_vv_d, 8) 1882 GEN_VEXT_VV(vnmsac_vv_b, 1) 1883 GEN_VEXT_VV(vnmsac_vv_h, 2) 1884 GEN_VEXT_VV(vnmsac_vv_w, 4) 1885 GEN_VEXT_VV(vnmsac_vv_d, 8) 1886 GEN_VEXT_VV(vmadd_vv_b, 1) 1887 GEN_VEXT_VV(vmadd_vv_h, 2) 1888 GEN_VEXT_VV(vmadd_vv_w, 4) 1889 GEN_VEXT_VV(vmadd_vv_d, 8) 1890 GEN_VEXT_VV(vnmsub_vv_b, 1) 1891 GEN_VEXT_VV(vnmsub_vv_h, 2) 1892 GEN_VEXT_VV(vnmsub_vv_w, 4) 1893 GEN_VEXT_VV(vnmsub_vv_d, 8) 1894 1895 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1896 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1897 { \ 1898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1899 TD d = *((TD *)vd + HD(i)); \ 1900 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1901 } 1902 1903 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1904 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1905 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1906 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1907 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1908 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1909 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1910 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1911 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1912 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1913 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1914 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1915 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1916 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1917 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1918 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1919 GEN_VEXT_VX(vmacc_vx_b, 1) 1920 GEN_VEXT_VX(vmacc_vx_h, 2) 1921 GEN_VEXT_VX(vmacc_vx_w, 4) 1922 GEN_VEXT_VX(vmacc_vx_d, 8) 1923 GEN_VEXT_VX(vnmsac_vx_b, 1) 1924 GEN_VEXT_VX(vnmsac_vx_h, 2) 1925 GEN_VEXT_VX(vnmsac_vx_w, 4) 1926 GEN_VEXT_VX(vnmsac_vx_d, 8) 1927 GEN_VEXT_VX(vmadd_vx_b, 1) 1928 GEN_VEXT_VX(vmadd_vx_h, 2) 1929 GEN_VEXT_VX(vmadd_vx_w, 4) 1930 GEN_VEXT_VX(vmadd_vx_d, 8) 1931 GEN_VEXT_VX(vnmsub_vx_b, 1) 1932 GEN_VEXT_VX(vnmsub_vx_h, 2) 1933 GEN_VEXT_VX(vnmsub_vx_w, 4) 1934 GEN_VEXT_VX(vnmsub_vx_d, 8) 1935 1936 /* Vector Widening Integer Multiply-Add Instructions */ 1937 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1938 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1939 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1940 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1941 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1942 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1943 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1944 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1945 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1946 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1947 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1948 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1949 GEN_VEXT_VV(vwmacc_vv_b, 2) 1950 GEN_VEXT_VV(vwmacc_vv_h, 4) 1951 GEN_VEXT_VV(vwmacc_vv_w, 8) 1952 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1953 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1954 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1955 1956 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1957 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1958 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1959 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1960 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1961 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1962 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1963 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1964 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1965 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1966 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1967 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1968 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1969 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1970 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1971 GEN_VEXT_VX(vwmacc_vx_b, 2) 1972 GEN_VEXT_VX(vwmacc_vx_h, 4) 1973 GEN_VEXT_VX(vwmacc_vx_w, 8) 1974 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1975 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1976 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1977 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1978 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1979 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1980 1981 /* Vector Integer Merge and Move Instructions */ 1982 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1983 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1984 uint32_t desc) \ 1985 { \ 1986 uint32_t vl = env->vl; \ 1987 uint32_t esz = sizeof(ETYPE); \ 1988 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1989 uint32_t vta = vext_vta(desc); \ 1990 uint32_t i; \ 1991 \ 1992 for (i = env->vstart; i < vl; i++) { \ 1993 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1994 *((ETYPE *)vd + H(i)) = s1; \ 1995 } \ 1996 env->vstart = 0; \ 1997 /* set tail elements to 1s */ \ 1998 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1999 } 2000 2001 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2002 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2003 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2004 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2005 2006 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2007 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2008 uint32_t desc) \ 2009 { \ 2010 uint32_t vl = env->vl; \ 2011 uint32_t esz = sizeof(ETYPE); \ 2012 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2013 uint32_t vta = vext_vta(desc); \ 2014 uint32_t i; \ 2015 \ 2016 for (i = env->vstart; i < vl; i++) { \ 2017 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2018 } \ 2019 env->vstart = 0; \ 2020 /* set tail elements to 1s */ \ 2021 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2022 } 2023 2024 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2025 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2026 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2027 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2028 2029 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2030 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2031 CPURISCVState *env, uint32_t desc) \ 2032 { \ 2033 uint32_t vl = env->vl; \ 2034 uint32_t esz = sizeof(ETYPE); \ 2035 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2036 uint32_t vta = vext_vta(desc); \ 2037 uint32_t i; \ 2038 \ 2039 for (i = env->vstart; i < vl; i++) { \ 2040 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2041 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2042 } \ 2043 env->vstart = 0; \ 2044 /* set tail elements to 1s */ \ 2045 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2046 } 2047 2048 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2049 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2050 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2051 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2052 2053 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2054 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2055 void *vs2, CPURISCVState *env, uint32_t desc) \ 2056 { \ 2057 uint32_t vl = env->vl; \ 2058 uint32_t esz = sizeof(ETYPE); \ 2059 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2060 uint32_t vta = vext_vta(desc); \ 2061 uint32_t i; \ 2062 \ 2063 for (i = env->vstart; i < vl; i++) { \ 2064 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2065 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2066 (ETYPE)(target_long)s1); \ 2067 *((ETYPE *)vd + H(i)) = d; \ 2068 } \ 2069 env->vstart = 0; \ 2070 /* set tail elements to 1s */ \ 2071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2072 } 2073 2074 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2075 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2076 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2077 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2078 2079 /* 2080 * Vector Fixed-Point Arithmetic Instructions 2081 */ 2082 2083 /* Vector Single-Width Saturating Add and Subtract */ 2084 2085 /* 2086 * As fixed point instructions probably have round mode and saturation, 2087 * define common macros for fixed point here. 2088 */ 2089 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2090 CPURISCVState *env, int vxrm); 2091 2092 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2093 static inline void \ 2094 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2095 CPURISCVState *env, int vxrm) \ 2096 { \ 2097 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2098 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2099 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2100 } 2101 2102 static inline void 2103 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2104 CPURISCVState *env, 2105 uint32_t vl, uint32_t vm, int vxrm, 2106 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2107 { 2108 for (uint32_t i = env->vstart; i < vl; i++) { 2109 if (!vm && !vext_elem_mask(v0, i)) { 2110 /* set masked-off elements to 1s */ 2111 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2112 continue; 2113 } 2114 fn(vd, vs1, vs2, i, env, vxrm); 2115 } 2116 env->vstart = 0; 2117 } 2118 2119 static inline void 2120 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2121 CPURISCVState *env, 2122 uint32_t desc, 2123 opivv2_rm_fn *fn, uint32_t esz) 2124 { 2125 uint32_t vm = vext_vm(desc); 2126 uint32_t vl = env->vl; 2127 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2128 uint32_t vta = vext_vta(desc); 2129 uint32_t vma = vext_vma(desc); 2130 2131 switch (env->vxrm) { 2132 case 0: /* rnu */ 2133 vext_vv_rm_1(vd, v0, vs1, vs2, 2134 env, vl, vm, 0, fn, vma, esz); 2135 break; 2136 case 1: /* rne */ 2137 vext_vv_rm_1(vd, v0, vs1, vs2, 2138 env, vl, vm, 1, fn, vma, esz); 2139 break; 2140 case 2: /* rdn */ 2141 vext_vv_rm_1(vd, v0, vs1, vs2, 2142 env, vl, vm, 2, fn, vma, esz); 2143 break; 2144 default: /* rod */ 2145 vext_vv_rm_1(vd, v0, vs1, vs2, 2146 env, vl, vm, 3, fn, vma, esz); 2147 break; 2148 } 2149 /* set tail elements to 1s */ 2150 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2151 } 2152 2153 /* generate helpers for fixed point instructions with OPIVV format */ 2154 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2155 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2156 CPURISCVState *env, uint32_t desc) \ 2157 { \ 2158 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2159 do_##NAME, ESZ); \ 2160 } 2161 2162 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2163 uint8_t b) 2164 { 2165 uint8_t res = a + b; 2166 if (res < a) { 2167 res = UINT8_MAX; 2168 env->vxsat = 0x1; 2169 } 2170 return res; 2171 } 2172 2173 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2174 uint16_t b) 2175 { 2176 uint16_t res = a + b; 2177 if (res < a) { 2178 res = UINT16_MAX; 2179 env->vxsat = 0x1; 2180 } 2181 return res; 2182 } 2183 2184 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2185 uint32_t b) 2186 { 2187 uint32_t res = a + b; 2188 if (res < a) { 2189 res = UINT32_MAX; 2190 env->vxsat = 0x1; 2191 } 2192 return res; 2193 } 2194 2195 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2196 uint64_t b) 2197 { 2198 uint64_t res = a + b; 2199 if (res < a) { 2200 res = UINT64_MAX; 2201 env->vxsat = 0x1; 2202 } 2203 return res; 2204 } 2205 2206 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2207 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2208 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2209 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2210 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2211 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2212 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2213 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2214 2215 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2216 CPURISCVState *env, int vxrm); 2217 2218 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2219 static inline void \ 2220 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2221 CPURISCVState *env, int vxrm) \ 2222 { \ 2223 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2224 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2225 } 2226 2227 static inline void 2228 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2229 CPURISCVState *env, 2230 uint32_t vl, uint32_t vm, int vxrm, 2231 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2232 { 2233 for (uint32_t i = env->vstart; i < vl; i++) { 2234 if (!vm && !vext_elem_mask(v0, i)) { 2235 /* set masked-off elements to 1s */ 2236 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2237 continue; 2238 } 2239 fn(vd, s1, vs2, i, env, vxrm); 2240 } 2241 env->vstart = 0; 2242 } 2243 2244 static inline void 2245 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2246 CPURISCVState *env, 2247 uint32_t desc, 2248 opivx2_rm_fn *fn, uint32_t esz) 2249 { 2250 uint32_t vm = vext_vm(desc); 2251 uint32_t vl = env->vl; 2252 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2253 uint32_t vta = vext_vta(desc); 2254 uint32_t vma = vext_vma(desc); 2255 2256 switch (env->vxrm) { 2257 case 0: /* rnu */ 2258 vext_vx_rm_1(vd, v0, s1, vs2, 2259 env, vl, vm, 0, fn, vma, esz); 2260 break; 2261 case 1: /* rne */ 2262 vext_vx_rm_1(vd, v0, s1, vs2, 2263 env, vl, vm, 1, fn, vma, esz); 2264 break; 2265 case 2: /* rdn */ 2266 vext_vx_rm_1(vd, v0, s1, vs2, 2267 env, vl, vm, 2, fn, vma, esz); 2268 break; 2269 default: /* rod */ 2270 vext_vx_rm_1(vd, v0, s1, vs2, 2271 env, vl, vm, 3, fn, vma, esz); 2272 break; 2273 } 2274 /* set tail elements to 1s */ 2275 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2276 } 2277 2278 /* generate helpers for fixed point instructions with OPIVX format */ 2279 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2280 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2281 void *vs2, CPURISCVState *env, \ 2282 uint32_t desc) \ 2283 { \ 2284 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2285 do_##NAME, ESZ); \ 2286 } 2287 2288 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2289 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2290 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2291 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2292 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2293 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2294 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2295 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2296 2297 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2298 { 2299 int8_t res = a + b; 2300 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2301 res = a > 0 ? INT8_MAX : INT8_MIN; 2302 env->vxsat = 0x1; 2303 } 2304 return res; 2305 } 2306 2307 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2308 int16_t b) 2309 { 2310 int16_t res = a + b; 2311 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2312 res = a > 0 ? INT16_MAX : INT16_MIN; 2313 env->vxsat = 0x1; 2314 } 2315 return res; 2316 } 2317 2318 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2319 int32_t b) 2320 { 2321 int32_t res = a + b; 2322 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2323 res = a > 0 ? INT32_MAX : INT32_MIN; 2324 env->vxsat = 0x1; 2325 } 2326 return res; 2327 } 2328 2329 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2330 int64_t b) 2331 { 2332 int64_t res = a + b; 2333 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2334 res = a > 0 ? INT64_MAX : INT64_MIN; 2335 env->vxsat = 0x1; 2336 } 2337 return res; 2338 } 2339 2340 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2341 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2342 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2343 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2344 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2345 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2346 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2347 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2348 2349 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2350 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2351 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2352 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2353 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2354 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2355 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2356 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2357 2358 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2359 uint8_t b) 2360 { 2361 uint8_t res = a - b; 2362 if (res > a) { 2363 res = 0; 2364 env->vxsat = 0x1; 2365 } 2366 return res; 2367 } 2368 2369 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2370 uint16_t b) 2371 { 2372 uint16_t res = a - b; 2373 if (res > a) { 2374 res = 0; 2375 env->vxsat = 0x1; 2376 } 2377 return res; 2378 } 2379 2380 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2381 uint32_t b) 2382 { 2383 uint32_t res = a - b; 2384 if (res > a) { 2385 res = 0; 2386 env->vxsat = 0x1; 2387 } 2388 return res; 2389 } 2390 2391 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2392 uint64_t b) 2393 { 2394 uint64_t res = a - b; 2395 if (res > a) { 2396 res = 0; 2397 env->vxsat = 0x1; 2398 } 2399 return res; 2400 } 2401 2402 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2403 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2404 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2405 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2406 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2407 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2408 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2409 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2410 2411 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2412 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2413 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2414 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2415 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2416 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2417 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2418 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2419 2420 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2421 { 2422 int8_t res = a - b; 2423 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2424 res = a >= 0 ? INT8_MAX : INT8_MIN; 2425 env->vxsat = 0x1; 2426 } 2427 return res; 2428 } 2429 2430 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2431 int16_t b) 2432 { 2433 int16_t res = a - b; 2434 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2435 res = a >= 0 ? INT16_MAX : INT16_MIN; 2436 env->vxsat = 0x1; 2437 } 2438 return res; 2439 } 2440 2441 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2442 int32_t b) 2443 { 2444 int32_t res = a - b; 2445 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2446 res = a >= 0 ? INT32_MAX : INT32_MIN; 2447 env->vxsat = 0x1; 2448 } 2449 return res; 2450 } 2451 2452 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2453 int64_t b) 2454 { 2455 int64_t res = a - b; 2456 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2457 res = a >= 0 ? INT64_MAX : INT64_MIN; 2458 env->vxsat = 0x1; 2459 } 2460 return res; 2461 } 2462 2463 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2464 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2465 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2466 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2467 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2468 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2469 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2470 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2471 2472 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2473 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2474 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2475 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2476 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2477 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2478 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2479 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2480 2481 /* Vector Single-Width Averaging Add and Subtract */ 2482 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2483 { 2484 uint8_t d = extract64(v, shift, 1); 2485 uint8_t d1; 2486 uint64_t D1, D2; 2487 2488 if (shift == 0 || shift > 64) { 2489 return 0; 2490 } 2491 2492 d1 = extract64(v, shift - 1, 1); 2493 D1 = extract64(v, 0, shift); 2494 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2495 return d1; 2496 } else if (vxrm == 1) { /* round-to-nearest-even */ 2497 if (shift > 1) { 2498 D2 = extract64(v, 0, shift - 1); 2499 return d1 & ((D2 != 0) | d); 2500 } else { 2501 return d1 & d; 2502 } 2503 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2504 return !d & (D1 != 0); 2505 } 2506 return 0; /* round-down (truncate) */ 2507 } 2508 2509 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2510 int32_t b) 2511 { 2512 int64_t res = (int64_t)a + b; 2513 uint8_t round = get_round(vxrm, res, 1); 2514 2515 return (res >> 1) + round; 2516 } 2517 2518 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2519 int64_t b) 2520 { 2521 int64_t res = a + b; 2522 uint8_t round = get_round(vxrm, res, 1); 2523 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2524 2525 /* With signed overflow, bit 64 is inverse of bit 63. */ 2526 return ((res >> 1) ^ over) + round; 2527 } 2528 2529 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2530 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2531 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2532 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2533 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2534 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2535 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2536 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2537 2538 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2539 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2540 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2541 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2542 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2543 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2544 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2545 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2546 2547 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2548 uint32_t a, uint32_t b) 2549 { 2550 uint64_t res = (uint64_t)a + b; 2551 uint8_t round = get_round(vxrm, res, 1); 2552 2553 return (res >> 1) + round; 2554 } 2555 2556 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2557 uint64_t a, uint64_t b) 2558 { 2559 uint64_t res = a + b; 2560 uint8_t round = get_round(vxrm, res, 1); 2561 uint64_t over = (uint64_t)(res < a) << 63; 2562 2563 return ((res >> 1) | over) + round; 2564 } 2565 2566 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2567 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2568 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2569 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2570 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2571 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2572 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2573 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2574 2575 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2576 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2577 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2578 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2579 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2580 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2581 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2582 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2583 2584 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2585 int32_t b) 2586 { 2587 int64_t res = (int64_t)a - b; 2588 uint8_t round = get_round(vxrm, res, 1); 2589 2590 return (res >> 1) + round; 2591 } 2592 2593 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2594 int64_t b) 2595 { 2596 int64_t res = (int64_t)a - b; 2597 uint8_t round = get_round(vxrm, res, 1); 2598 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2599 2600 /* With signed overflow, bit 64 is inverse of bit 63. */ 2601 return ((res >> 1) ^ over) + round; 2602 } 2603 2604 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2605 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2606 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2607 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2608 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2609 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2610 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2611 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2612 2613 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2614 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2615 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2616 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2617 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2618 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2619 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2620 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2621 2622 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2623 uint32_t a, uint32_t b) 2624 { 2625 int64_t res = (int64_t)a - b; 2626 uint8_t round = get_round(vxrm, res, 1); 2627 2628 return (res >> 1) + round; 2629 } 2630 2631 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2632 uint64_t a, uint64_t b) 2633 { 2634 uint64_t res = (uint64_t)a - b; 2635 uint8_t round = get_round(vxrm, res, 1); 2636 uint64_t over = (uint64_t)(res > a) << 63; 2637 2638 return ((res >> 1) | over) + round; 2639 } 2640 2641 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2642 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2643 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2644 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2645 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2646 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2647 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2648 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2649 2650 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2651 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2652 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2653 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2654 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2655 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2656 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2657 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2658 2659 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2660 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2661 { 2662 uint8_t round; 2663 int16_t res; 2664 2665 res = (int16_t)a * (int16_t)b; 2666 round = get_round(vxrm, res, 7); 2667 res = (res >> 7) + round; 2668 2669 if (res > INT8_MAX) { 2670 env->vxsat = 0x1; 2671 return INT8_MAX; 2672 } else if (res < INT8_MIN) { 2673 env->vxsat = 0x1; 2674 return INT8_MIN; 2675 } else { 2676 return res; 2677 } 2678 } 2679 2680 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2681 { 2682 uint8_t round; 2683 int32_t res; 2684 2685 res = (int32_t)a * (int32_t)b; 2686 round = get_round(vxrm, res, 15); 2687 res = (res >> 15) + round; 2688 2689 if (res > INT16_MAX) { 2690 env->vxsat = 0x1; 2691 return INT16_MAX; 2692 } else if (res < INT16_MIN) { 2693 env->vxsat = 0x1; 2694 return INT16_MIN; 2695 } else { 2696 return res; 2697 } 2698 } 2699 2700 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2701 { 2702 uint8_t round; 2703 int64_t res; 2704 2705 res = (int64_t)a * (int64_t)b; 2706 round = get_round(vxrm, res, 31); 2707 res = (res >> 31) + round; 2708 2709 if (res > INT32_MAX) { 2710 env->vxsat = 0x1; 2711 return INT32_MAX; 2712 } else if (res < INT32_MIN) { 2713 env->vxsat = 0x1; 2714 return INT32_MIN; 2715 } else { 2716 return res; 2717 } 2718 } 2719 2720 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2721 { 2722 uint8_t round; 2723 uint64_t hi_64, lo_64; 2724 int64_t res; 2725 2726 if (a == INT64_MIN && b == INT64_MIN) { 2727 env->vxsat = 1; 2728 return INT64_MAX; 2729 } 2730 2731 muls64(&lo_64, &hi_64, a, b); 2732 round = get_round(vxrm, lo_64, 63); 2733 /* 2734 * Cannot overflow, as there are always 2735 * 2 sign bits after multiply. 2736 */ 2737 res = (hi_64 << 1) | (lo_64 >> 63); 2738 if (round) { 2739 if (res == INT64_MAX) { 2740 env->vxsat = 1; 2741 } else { 2742 res += 1; 2743 } 2744 } 2745 return res; 2746 } 2747 2748 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2749 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2750 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2751 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2752 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2753 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2754 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2755 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2756 2757 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2758 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2759 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2760 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2761 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2762 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2763 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2764 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2765 2766 /* Vector Single-Width Scaling Shift Instructions */ 2767 static inline uint8_t 2768 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2769 { 2770 uint8_t round, shift = b & 0x7; 2771 uint8_t res; 2772 2773 round = get_round(vxrm, a, shift); 2774 res = (a >> shift) + round; 2775 return res; 2776 } 2777 static inline uint16_t 2778 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2779 { 2780 uint8_t round, shift = b & 0xf; 2781 2782 round = get_round(vxrm, a, shift); 2783 return (a >> shift) + round; 2784 } 2785 static inline uint32_t 2786 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2787 { 2788 uint8_t round, shift = b & 0x1f; 2789 2790 round = get_round(vxrm, a, shift); 2791 return (a >> shift) + round; 2792 } 2793 static inline uint64_t 2794 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2795 { 2796 uint8_t round, shift = b & 0x3f; 2797 2798 round = get_round(vxrm, a, shift); 2799 return (a >> shift) + round; 2800 } 2801 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2802 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2803 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2804 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2805 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2806 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2807 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2808 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2809 2810 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2811 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2812 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2813 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2814 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2815 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2816 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2817 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2818 2819 static inline int8_t 2820 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2821 { 2822 uint8_t round, shift = b & 0x7; 2823 2824 round = get_round(vxrm, a, shift); 2825 return (a >> shift) + round; 2826 } 2827 static inline int16_t 2828 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2829 { 2830 uint8_t round, shift = b & 0xf; 2831 2832 round = get_round(vxrm, a, shift); 2833 return (a >> shift) + round; 2834 } 2835 static inline int32_t 2836 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2837 { 2838 uint8_t round, shift = b & 0x1f; 2839 2840 round = get_round(vxrm, a, shift); 2841 return (a >> shift) + round; 2842 } 2843 static inline int64_t 2844 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2845 { 2846 uint8_t round, shift = b & 0x3f; 2847 2848 round = get_round(vxrm, a, shift); 2849 return (a >> shift) + round; 2850 } 2851 2852 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2853 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2854 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2855 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2856 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2857 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2858 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2859 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2860 2861 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2862 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2863 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2864 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2865 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2866 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2867 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2868 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2869 2870 /* Vector Narrowing Fixed-Point Clip Instructions */ 2871 static inline int8_t 2872 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2873 { 2874 uint8_t round, shift = b & 0xf; 2875 int16_t res; 2876 2877 round = get_round(vxrm, a, shift); 2878 res = (a >> shift) + round; 2879 if (res > INT8_MAX) { 2880 env->vxsat = 0x1; 2881 return INT8_MAX; 2882 } else if (res < INT8_MIN) { 2883 env->vxsat = 0x1; 2884 return INT8_MIN; 2885 } else { 2886 return res; 2887 } 2888 } 2889 2890 static inline int16_t 2891 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2892 { 2893 uint8_t round, shift = b & 0x1f; 2894 int32_t res; 2895 2896 round = get_round(vxrm, a, shift); 2897 res = (a >> shift) + round; 2898 if (res > INT16_MAX) { 2899 env->vxsat = 0x1; 2900 return INT16_MAX; 2901 } else if (res < INT16_MIN) { 2902 env->vxsat = 0x1; 2903 return INT16_MIN; 2904 } else { 2905 return res; 2906 } 2907 } 2908 2909 static inline int32_t 2910 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2911 { 2912 uint8_t round, shift = b & 0x3f; 2913 int64_t res; 2914 2915 round = get_round(vxrm, a, shift); 2916 res = (a >> shift) + round; 2917 if (res > INT32_MAX) { 2918 env->vxsat = 0x1; 2919 return INT32_MAX; 2920 } else if (res < INT32_MIN) { 2921 env->vxsat = 0x1; 2922 return INT32_MIN; 2923 } else { 2924 return res; 2925 } 2926 } 2927 2928 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2929 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2930 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2931 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2932 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2933 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2934 2935 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2936 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2937 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2938 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2939 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2940 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2941 2942 static inline uint8_t 2943 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2944 { 2945 uint8_t round, shift = b & 0xf; 2946 uint16_t res; 2947 2948 round = get_round(vxrm, a, shift); 2949 res = (a >> shift) + round; 2950 if (res > UINT8_MAX) { 2951 env->vxsat = 0x1; 2952 return UINT8_MAX; 2953 } else { 2954 return res; 2955 } 2956 } 2957 2958 static inline uint16_t 2959 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2960 { 2961 uint8_t round, shift = b & 0x1f; 2962 uint32_t res; 2963 2964 round = get_round(vxrm, a, shift); 2965 res = (a >> shift) + round; 2966 if (res > UINT16_MAX) { 2967 env->vxsat = 0x1; 2968 return UINT16_MAX; 2969 } else { 2970 return res; 2971 } 2972 } 2973 2974 static inline uint32_t 2975 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2976 { 2977 uint8_t round, shift = b & 0x3f; 2978 uint64_t res; 2979 2980 round = get_round(vxrm, a, shift); 2981 res = (a >> shift) + round; 2982 if (res > UINT32_MAX) { 2983 env->vxsat = 0x1; 2984 return UINT32_MAX; 2985 } else { 2986 return res; 2987 } 2988 } 2989 2990 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2991 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2992 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2993 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2994 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2995 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2996 2997 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2998 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2999 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3000 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3001 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3002 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3003 3004 /* 3005 * Vector Float Point Arithmetic Instructions 3006 */ 3007 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3008 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3009 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3010 CPURISCVState *env) \ 3011 { \ 3012 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3013 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3014 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3015 } 3016 3017 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3018 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3019 void *vs2, CPURISCVState *env, \ 3020 uint32_t desc) \ 3021 { \ 3022 uint32_t vm = vext_vm(desc); \ 3023 uint32_t vl = env->vl; \ 3024 uint32_t total_elems = \ 3025 vext_get_total_elems(env, desc, ESZ); \ 3026 uint32_t vta = vext_vta(desc); \ 3027 uint32_t vma = vext_vma(desc); \ 3028 uint32_t i; \ 3029 \ 3030 for (i = env->vstart; i < vl; i++) { \ 3031 if (!vm && !vext_elem_mask(v0, i)) { \ 3032 /* set masked-off elements to 1s */ \ 3033 vext_set_elems_1s(vd, vma, i * ESZ, \ 3034 (i + 1) * ESZ); \ 3035 continue; \ 3036 } \ 3037 do_##NAME(vd, vs1, vs2, i, env); \ 3038 } \ 3039 env->vstart = 0; \ 3040 /* set tail elements to 1s */ \ 3041 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3042 total_elems * ESZ); \ 3043 } 3044 3045 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3046 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3047 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3048 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3049 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3050 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3051 3052 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3053 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3054 CPURISCVState *env) \ 3055 { \ 3056 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3057 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3058 } 3059 3060 #define GEN_VEXT_VF(NAME, ESZ) \ 3061 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3062 void *vs2, CPURISCVState *env, \ 3063 uint32_t desc) \ 3064 { \ 3065 uint32_t vm = vext_vm(desc); \ 3066 uint32_t vl = env->vl; \ 3067 uint32_t total_elems = \ 3068 vext_get_total_elems(env, desc, ESZ); \ 3069 uint32_t vta = vext_vta(desc); \ 3070 uint32_t vma = vext_vma(desc); \ 3071 uint32_t i; \ 3072 \ 3073 for (i = env->vstart; i < vl; i++) { \ 3074 if (!vm && !vext_elem_mask(v0, i)) { \ 3075 /* set masked-off elements to 1s */ \ 3076 vext_set_elems_1s(vd, vma, i * ESZ, \ 3077 (i + 1) * ESZ); \ 3078 continue; \ 3079 } \ 3080 do_##NAME(vd, s1, vs2, i, env); \ 3081 } \ 3082 env->vstart = 0; \ 3083 /* set tail elements to 1s */ \ 3084 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3085 total_elems * ESZ); \ 3086 } 3087 3088 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3089 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3090 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3091 GEN_VEXT_VF(vfadd_vf_h, 2) 3092 GEN_VEXT_VF(vfadd_vf_w, 4) 3093 GEN_VEXT_VF(vfadd_vf_d, 8) 3094 3095 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3096 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3097 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3098 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3099 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3100 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3101 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3102 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3103 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3104 GEN_VEXT_VF(vfsub_vf_h, 2) 3105 GEN_VEXT_VF(vfsub_vf_w, 4) 3106 GEN_VEXT_VF(vfsub_vf_d, 8) 3107 3108 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3109 { 3110 return float16_sub(b, a, s); 3111 } 3112 3113 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3114 { 3115 return float32_sub(b, a, s); 3116 } 3117 3118 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3119 { 3120 return float64_sub(b, a, s); 3121 } 3122 3123 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3124 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3125 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3126 GEN_VEXT_VF(vfrsub_vf_h, 2) 3127 GEN_VEXT_VF(vfrsub_vf_w, 4) 3128 GEN_VEXT_VF(vfrsub_vf_d, 8) 3129 3130 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3131 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3132 { 3133 return float32_add(float16_to_float32(a, true, s), 3134 float16_to_float32(b, true, s), s); 3135 } 3136 3137 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3138 { 3139 return float64_add(float32_to_float64(a, s), 3140 float32_to_float64(b, s), s); 3141 3142 } 3143 3144 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3145 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3146 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3147 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3148 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3149 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3150 GEN_VEXT_VF(vfwadd_vf_h, 4) 3151 GEN_VEXT_VF(vfwadd_vf_w, 8) 3152 3153 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3154 { 3155 return float32_sub(float16_to_float32(a, true, s), 3156 float16_to_float32(b, true, s), s); 3157 } 3158 3159 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3160 { 3161 return float64_sub(float32_to_float64(a, s), 3162 float32_to_float64(b, s), s); 3163 3164 } 3165 3166 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3167 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3168 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3169 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3170 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3171 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3172 GEN_VEXT_VF(vfwsub_vf_h, 4) 3173 GEN_VEXT_VF(vfwsub_vf_w, 8) 3174 3175 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3176 { 3177 return float32_add(a, float16_to_float32(b, true, s), s); 3178 } 3179 3180 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3181 { 3182 return float64_add(a, float32_to_float64(b, s), s); 3183 } 3184 3185 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3186 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3187 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3188 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3189 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3190 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3191 GEN_VEXT_VF(vfwadd_wf_h, 4) 3192 GEN_VEXT_VF(vfwadd_wf_w, 8) 3193 3194 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3195 { 3196 return float32_sub(a, float16_to_float32(b, true, s), s); 3197 } 3198 3199 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3200 { 3201 return float64_sub(a, float32_to_float64(b, s), s); 3202 } 3203 3204 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3205 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3206 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3207 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3208 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3209 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3210 GEN_VEXT_VF(vfwsub_wf_h, 4) 3211 GEN_VEXT_VF(vfwsub_wf_w, 8) 3212 3213 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3214 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3215 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3216 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3217 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3218 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3219 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3220 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3221 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3222 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3223 GEN_VEXT_VF(vfmul_vf_h, 2) 3224 GEN_VEXT_VF(vfmul_vf_w, 4) 3225 GEN_VEXT_VF(vfmul_vf_d, 8) 3226 3227 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3228 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3229 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3230 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3231 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3232 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3233 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3234 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3235 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3236 GEN_VEXT_VF(vfdiv_vf_h, 2) 3237 GEN_VEXT_VF(vfdiv_vf_w, 4) 3238 GEN_VEXT_VF(vfdiv_vf_d, 8) 3239 3240 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3241 { 3242 return float16_div(b, a, s); 3243 } 3244 3245 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3246 { 3247 return float32_div(b, a, s); 3248 } 3249 3250 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3251 { 3252 return float64_div(b, a, s); 3253 } 3254 3255 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3256 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3257 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3258 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3259 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3260 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3261 3262 /* Vector Widening Floating-Point Multiply */ 3263 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3264 { 3265 return float32_mul(float16_to_float32(a, true, s), 3266 float16_to_float32(b, true, s), s); 3267 } 3268 3269 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3270 { 3271 return float64_mul(float32_to_float64(a, s), 3272 float32_to_float64(b, s), s); 3273 3274 } 3275 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3276 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3277 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3278 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3279 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3280 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3281 GEN_VEXT_VF(vfwmul_vf_h, 4) 3282 GEN_VEXT_VF(vfwmul_vf_w, 8) 3283 3284 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3285 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3286 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3287 CPURISCVState *env) \ 3288 { \ 3289 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3290 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3291 TD d = *((TD *)vd + HD(i)); \ 3292 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3293 } 3294 3295 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3296 { 3297 return float16_muladd(a, b, d, 0, s); 3298 } 3299 3300 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3301 { 3302 return float32_muladd(a, b, d, 0, s); 3303 } 3304 3305 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(a, b, d, 0, s); 3308 } 3309 3310 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3311 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3312 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3313 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3314 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3315 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3316 3317 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3318 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3319 CPURISCVState *env) \ 3320 { \ 3321 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3322 TD d = *((TD *)vd + HD(i)); \ 3323 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3324 } 3325 3326 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3327 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3328 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3329 GEN_VEXT_VF(vfmacc_vf_h, 2) 3330 GEN_VEXT_VF(vfmacc_vf_w, 4) 3331 GEN_VEXT_VF(vfmacc_vf_d, 8) 3332 3333 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3334 { 3335 return float16_muladd(a, b, d, float_muladd_negate_c | 3336 float_muladd_negate_product, s); 3337 } 3338 3339 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3340 { 3341 return float32_muladd(a, b, d, float_muladd_negate_c | 3342 float_muladd_negate_product, s); 3343 } 3344 3345 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3346 { 3347 return float64_muladd(a, b, d, float_muladd_negate_c | 3348 float_muladd_negate_product, s); 3349 } 3350 3351 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3352 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3353 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3354 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3355 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3356 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3357 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3358 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3359 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3360 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3361 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3362 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3363 3364 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3365 { 3366 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3367 } 3368 3369 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3370 { 3371 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3372 } 3373 3374 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3375 { 3376 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3377 } 3378 3379 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3380 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3381 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3382 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3383 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3384 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3385 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3386 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3387 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3388 GEN_VEXT_VF(vfmsac_vf_h, 2) 3389 GEN_VEXT_VF(vfmsac_vf_w, 4) 3390 GEN_VEXT_VF(vfmsac_vf_d, 8) 3391 3392 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3393 { 3394 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3395 } 3396 3397 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3398 { 3399 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3400 } 3401 3402 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3403 { 3404 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3405 } 3406 3407 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3408 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3409 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3410 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3411 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3412 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3413 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3414 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3415 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3416 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3417 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3418 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3419 3420 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3421 { 3422 return float16_muladd(d, b, a, 0, s); 3423 } 3424 3425 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3426 { 3427 return float32_muladd(d, b, a, 0, s); 3428 } 3429 3430 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3431 { 3432 return float64_muladd(d, b, a, 0, s); 3433 } 3434 3435 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3436 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3437 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3438 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3439 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3440 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3441 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3442 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3443 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3444 GEN_VEXT_VF(vfmadd_vf_h, 2) 3445 GEN_VEXT_VF(vfmadd_vf_w, 4) 3446 GEN_VEXT_VF(vfmadd_vf_d, 8) 3447 3448 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3449 { 3450 return float16_muladd(d, b, a, float_muladd_negate_c | 3451 float_muladd_negate_product, s); 3452 } 3453 3454 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3455 { 3456 return float32_muladd(d, b, a, float_muladd_negate_c | 3457 float_muladd_negate_product, s); 3458 } 3459 3460 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3461 { 3462 return float64_muladd(d, b, a, float_muladd_negate_c | 3463 float_muladd_negate_product, s); 3464 } 3465 3466 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3467 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3468 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3469 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3470 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3471 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3472 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3473 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3474 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3475 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3476 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3477 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3478 3479 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3480 { 3481 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3482 } 3483 3484 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3485 { 3486 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3487 } 3488 3489 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3490 { 3491 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3492 } 3493 3494 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3495 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3496 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3497 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3498 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3499 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3500 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3501 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3502 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3503 GEN_VEXT_VF(vfmsub_vf_h, 2) 3504 GEN_VEXT_VF(vfmsub_vf_w, 4) 3505 GEN_VEXT_VF(vfmsub_vf_d, 8) 3506 3507 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3508 { 3509 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3510 } 3511 3512 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3513 { 3514 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3515 } 3516 3517 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3518 { 3519 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3520 } 3521 3522 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3523 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3524 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3525 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3526 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3527 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3528 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3529 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3530 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3531 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3532 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3533 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3534 3535 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3536 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3537 { 3538 return float32_muladd(float16_to_float32(a, true, s), 3539 float16_to_float32(b, true, s), d, 0, s); 3540 } 3541 3542 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3543 { 3544 return float64_muladd(float32_to_float64(a, s), 3545 float32_to_float64(b, s), d, 0, s); 3546 } 3547 3548 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3549 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3550 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3551 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3552 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3553 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3554 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3555 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3556 3557 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3558 { 3559 return float32_muladd(float16_to_float32(a, true, s), 3560 float16_to_float32(b, true, s), d, 3561 float_muladd_negate_c | float_muladd_negate_product, 3562 s); 3563 } 3564 3565 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3566 { 3567 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3568 d, float_muladd_negate_c | 3569 float_muladd_negate_product, s); 3570 } 3571 3572 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3573 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3574 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3575 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3576 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3577 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3578 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3579 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3580 3581 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3582 { 3583 return float32_muladd(float16_to_float32(a, true, s), 3584 float16_to_float32(b, true, s), d, 3585 float_muladd_negate_c, s); 3586 } 3587 3588 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3589 { 3590 return float64_muladd(float32_to_float64(a, s), 3591 float32_to_float64(b, s), d, 3592 float_muladd_negate_c, s); 3593 } 3594 3595 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3596 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3597 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3598 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3599 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3600 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3601 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3602 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3603 3604 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3605 { 3606 return float32_muladd(float16_to_float32(a, true, s), 3607 float16_to_float32(b, true, s), d, 3608 float_muladd_negate_product, s); 3609 } 3610 3611 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3612 { 3613 return float64_muladd(float32_to_float64(a, s), 3614 float32_to_float64(b, s), d, 3615 float_muladd_negate_product, s); 3616 } 3617 3618 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3619 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3620 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3621 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3622 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3623 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3624 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3625 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3626 3627 /* Vector Floating-Point Square-Root Instruction */ 3628 /* (TD, T2, TX2) */ 3629 #define OP_UU_H uint16_t, uint16_t, uint16_t 3630 #define OP_UU_W uint32_t, uint32_t, uint32_t 3631 #define OP_UU_D uint64_t, uint64_t, uint64_t 3632 3633 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3634 static void do_##NAME(void *vd, void *vs2, int i, \ 3635 CPURISCVState *env) \ 3636 { \ 3637 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3638 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3639 } 3640 3641 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3642 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3643 CPURISCVState *env, uint32_t desc) \ 3644 { \ 3645 uint32_t vm = vext_vm(desc); \ 3646 uint32_t vl = env->vl; \ 3647 uint32_t total_elems = \ 3648 vext_get_total_elems(env, desc, ESZ); \ 3649 uint32_t vta = vext_vta(desc); \ 3650 uint32_t vma = vext_vma(desc); \ 3651 uint32_t i; \ 3652 \ 3653 if (vl == 0) { \ 3654 return; \ 3655 } \ 3656 for (i = env->vstart; i < vl; i++) { \ 3657 if (!vm && !vext_elem_mask(v0, i)) { \ 3658 /* set masked-off elements to 1s */ \ 3659 vext_set_elems_1s(vd, vma, i * ESZ, \ 3660 (i + 1) * ESZ); \ 3661 continue; \ 3662 } \ 3663 do_##NAME(vd, vs2, i, env); \ 3664 } \ 3665 env->vstart = 0; \ 3666 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3667 total_elems * ESZ); \ 3668 } 3669 3670 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3671 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3672 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3673 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3674 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3675 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3676 3677 /* 3678 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3679 * 3680 * Adapted from riscv-v-spec recip.c: 3681 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3682 */ 3683 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3684 { 3685 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3686 uint64_t exp = extract64(f, frac_size, exp_size); 3687 uint64_t frac = extract64(f, 0, frac_size); 3688 3689 const uint8_t lookup_table[] = { 3690 52, 51, 50, 48, 47, 46, 44, 43, 3691 42, 41, 40, 39, 38, 36, 35, 34, 3692 33, 32, 31, 30, 30, 29, 28, 27, 3693 26, 25, 24, 23, 23, 22, 21, 20, 3694 19, 19, 18, 17, 16, 16, 15, 14, 3695 14, 13, 12, 12, 11, 10, 10, 9, 3696 9, 8, 7, 7, 6, 6, 5, 4, 3697 4, 3, 3, 2, 2, 1, 1, 0, 3698 127, 125, 123, 121, 119, 118, 116, 114, 3699 113, 111, 109, 108, 106, 105, 103, 102, 3700 100, 99, 97, 96, 95, 93, 92, 91, 3701 90, 88, 87, 86, 85, 84, 83, 82, 3702 80, 79, 78, 77, 76, 75, 74, 73, 3703 72, 71, 70, 70, 69, 68, 67, 66, 3704 65, 64, 63, 63, 62, 61, 60, 59, 3705 59, 58, 57, 56, 56, 55, 54, 53 3706 }; 3707 const int precision = 7; 3708 3709 if (exp == 0 && frac != 0) { /* subnormal */ 3710 /* Normalize the subnormal. */ 3711 while (extract64(frac, frac_size - 1, 1) == 0) { 3712 exp--; 3713 frac <<= 1; 3714 } 3715 3716 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3717 } 3718 3719 int idx = ((exp & 1) << (precision - 1)) | 3720 (frac >> (frac_size - precision + 1)); 3721 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3722 (frac_size - precision); 3723 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3724 3725 uint64_t val = 0; 3726 val = deposit64(val, 0, frac_size, out_frac); 3727 val = deposit64(val, frac_size, exp_size, out_exp); 3728 val = deposit64(val, frac_size + exp_size, 1, sign); 3729 return val; 3730 } 3731 3732 static float16 frsqrt7_h(float16 f, float_status *s) 3733 { 3734 int exp_size = 5, frac_size = 10; 3735 bool sign = float16_is_neg(f); 3736 3737 /* 3738 * frsqrt7(sNaN) = canonical NaN 3739 * frsqrt7(-inf) = canonical NaN 3740 * frsqrt7(-normal) = canonical NaN 3741 * frsqrt7(-subnormal) = canonical NaN 3742 */ 3743 if (float16_is_signaling_nan(f, s) || 3744 (float16_is_infinity(f) && sign) || 3745 (float16_is_normal(f) && sign) || 3746 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3747 s->float_exception_flags |= float_flag_invalid; 3748 return float16_default_nan(s); 3749 } 3750 3751 /* frsqrt7(qNaN) = canonical NaN */ 3752 if (float16_is_quiet_nan(f, s)) { 3753 return float16_default_nan(s); 3754 } 3755 3756 /* frsqrt7(+-0) = +-inf */ 3757 if (float16_is_zero(f)) { 3758 s->float_exception_flags |= float_flag_divbyzero; 3759 return float16_set_sign(float16_infinity, sign); 3760 } 3761 3762 /* frsqrt7(+inf) = +0 */ 3763 if (float16_is_infinity(f) && !sign) { 3764 return float16_set_sign(float16_zero, sign); 3765 } 3766 3767 /* +normal, +subnormal */ 3768 uint64_t val = frsqrt7(f, exp_size, frac_size); 3769 return make_float16(val); 3770 } 3771 3772 static float32 frsqrt7_s(float32 f, float_status *s) 3773 { 3774 int exp_size = 8, frac_size = 23; 3775 bool sign = float32_is_neg(f); 3776 3777 /* 3778 * frsqrt7(sNaN) = canonical NaN 3779 * frsqrt7(-inf) = canonical NaN 3780 * frsqrt7(-normal) = canonical NaN 3781 * frsqrt7(-subnormal) = canonical NaN 3782 */ 3783 if (float32_is_signaling_nan(f, s) || 3784 (float32_is_infinity(f) && sign) || 3785 (float32_is_normal(f) && sign) || 3786 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3787 s->float_exception_flags |= float_flag_invalid; 3788 return float32_default_nan(s); 3789 } 3790 3791 /* frsqrt7(qNaN) = canonical NaN */ 3792 if (float32_is_quiet_nan(f, s)) { 3793 return float32_default_nan(s); 3794 } 3795 3796 /* frsqrt7(+-0) = +-inf */ 3797 if (float32_is_zero(f)) { 3798 s->float_exception_flags |= float_flag_divbyzero; 3799 return float32_set_sign(float32_infinity, sign); 3800 } 3801 3802 /* frsqrt7(+inf) = +0 */ 3803 if (float32_is_infinity(f) && !sign) { 3804 return float32_set_sign(float32_zero, sign); 3805 } 3806 3807 /* +normal, +subnormal */ 3808 uint64_t val = frsqrt7(f, exp_size, frac_size); 3809 return make_float32(val); 3810 } 3811 3812 static float64 frsqrt7_d(float64 f, float_status *s) 3813 { 3814 int exp_size = 11, frac_size = 52; 3815 bool sign = float64_is_neg(f); 3816 3817 /* 3818 * frsqrt7(sNaN) = canonical NaN 3819 * frsqrt7(-inf) = canonical NaN 3820 * frsqrt7(-normal) = canonical NaN 3821 * frsqrt7(-subnormal) = canonical NaN 3822 */ 3823 if (float64_is_signaling_nan(f, s) || 3824 (float64_is_infinity(f) && sign) || 3825 (float64_is_normal(f) && sign) || 3826 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3827 s->float_exception_flags |= float_flag_invalid; 3828 return float64_default_nan(s); 3829 } 3830 3831 /* frsqrt7(qNaN) = canonical NaN */ 3832 if (float64_is_quiet_nan(f, s)) { 3833 return float64_default_nan(s); 3834 } 3835 3836 /* frsqrt7(+-0) = +-inf */ 3837 if (float64_is_zero(f)) { 3838 s->float_exception_flags |= float_flag_divbyzero; 3839 return float64_set_sign(float64_infinity, sign); 3840 } 3841 3842 /* frsqrt7(+inf) = +0 */ 3843 if (float64_is_infinity(f) && !sign) { 3844 return float64_set_sign(float64_zero, sign); 3845 } 3846 3847 /* +normal, +subnormal */ 3848 uint64_t val = frsqrt7(f, exp_size, frac_size); 3849 return make_float64(val); 3850 } 3851 3852 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3853 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3854 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3855 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3856 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3857 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3858 3859 /* 3860 * Vector Floating-Point Reciprocal Estimate Instruction 3861 * 3862 * Adapted from riscv-v-spec recip.c: 3863 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3864 */ 3865 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3866 float_status *s) 3867 { 3868 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3869 uint64_t exp = extract64(f, frac_size, exp_size); 3870 uint64_t frac = extract64(f, 0, frac_size); 3871 3872 const uint8_t lookup_table[] = { 3873 127, 125, 123, 121, 119, 117, 116, 114, 3874 112, 110, 109, 107, 105, 104, 102, 100, 3875 99, 97, 96, 94, 93, 91, 90, 88, 3876 87, 85, 84, 83, 81, 80, 79, 77, 3877 76, 75, 74, 72, 71, 70, 69, 68, 3878 66, 65, 64, 63, 62, 61, 60, 59, 3879 58, 57, 56, 55, 54, 53, 52, 51, 3880 50, 49, 48, 47, 46, 45, 44, 43, 3881 42, 41, 40, 40, 39, 38, 37, 36, 3882 35, 35, 34, 33, 32, 31, 31, 30, 3883 29, 28, 28, 27, 26, 25, 25, 24, 3884 23, 23, 22, 21, 21, 20, 19, 19, 3885 18, 17, 17, 16, 15, 15, 14, 14, 3886 13, 12, 12, 11, 11, 10, 9, 9, 3887 8, 8, 7, 7, 6, 5, 5, 4, 3888 4, 3, 3, 2, 2, 1, 1, 0 3889 }; 3890 const int precision = 7; 3891 3892 if (exp == 0 && frac != 0) { /* subnormal */ 3893 /* Normalize the subnormal. */ 3894 while (extract64(frac, frac_size - 1, 1) == 0) { 3895 exp--; 3896 frac <<= 1; 3897 } 3898 3899 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3900 3901 if (exp != 0 && exp != UINT64_MAX) { 3902 /* 3903 * Overflow to inf or max value of same sign, 3904 * depending on sign and rounding mode. 3905 */ 3906 s->float_exception_flags |= (float_flag_inexact | 3907 float_flag_overflow); 3908 3909 if ((s->float_rounding_mode == float_round_to_zero) || 3910 ((s->float_rounding_mode == float_round_down) && !sign) || 3911 ((s->float_rounding_mode == float_round_up) && sign)) { 3912 /* Return greatest/negative finite value. */ 3913 return (sign << (exp_size + frac_size)) | 3914 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3915 } else { 3916 /* Return +-inf. */ 3917 return (sign << (exp_size + frac_size)) | 3918 MAKE_64BIT_MASK(frac_size, exp_size); 3919 } 3920 } 3921 } 3922 3923 int idx = frac >> (frac_size - precision); 3924 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3925 (frac_size - precision); 3926 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3927 3928 if (out_exp == 0 || out_exp == UINT64_MAX) { 3929 /* 3930 * The result is subnormal, but don't raise the underflow exception, 3931 * because there's no additional loss of precision. 3932 */ 3933 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3934 if (out_exp == UINT64_MAX) { 3935 out_frac >>= 1; 3936 out_exp = 0; 3937 } 3938 } 3939 3940 uint64_t val = 0; 3941 val = deposit64(val, 0, frac_size, out_frac); 3942 val = deposit64(val, frac_size, exp_size, out_exp); 3943 val = deposit64(val, frac_size + exp_size, 1, sign); 3944 return val; 3945 } 3946 3947 static float16 frec7_h(float16 f, float_status *s) 3948 { 3949 int exp_size = 5, frac_size = 10; 3950 bool sign = float16_is_neg(f); 3951 3952 /* frec7(+-inf) = +-0 */ 3953 if (float16_is_infinity(f)) { 3954 return float16_set_sign(float16_zero, sign); 3955 } 3956 3957 /* frec7(+-0) = +-inf */ 3958 if (float16_is_zero(f)) { 3959 s->float_exception_flags |= float_flag_divbyzero; 3960 return float16_set_sign(float16_infinity, sign); 3961 } 3962 3963 /* frec7(sNaN) = canonical NaN */ 3964 if (float16_is_signaling_nan(f, s)) { 3965 s->float_exception_flags |= float_flag_invalid; 3966 return float16_default_nan(s); 3967 } 3968 3969 /* frec7(qNaN) = canonical NaN */ 3970 if (float16_is_quiet_nan(f, s)) { 3971 return float16_default_nan(s); 3972 } 3973 3974 /* +-normal, +-subnormal */ 3975 uint64_t val = frec7(f, exp_size, frac_size, s); 3976 return make_float16(val); 3977 } 3978 3979 static float32 frec7_s(float32 f, float_status *s) 3980 { 3981 int exp_size = 8, frac_size = 23; 3982 bool sign = float32_is_neg(f); 3983 3984 /* frec7(+-inf) = +-0 */ 3985 if (float32_is_infinity(f)) { 3986 return float32_set_sign(float32_zero, sign); 3987 } 3988 3989 /* frec7(+-0) = +-inf */ 3990 if (float32_is_zero(f)) { 3991 s->float_exception_flags |= float_flag_divbyzero; 3992 return float32_set_sign(float32_infinity, sign); 3993 } 3994 3995 /* frec7(sNaN) = canonical NaN */ 3996 if (float32_is_signaling_nan(f, s)) { 3997 s->float_exception_flags |= float_flag_invalid; 3998 return float32_default_nan(s); 3999 } 4000 4001 /* frec7(qNaN) = canonical NaN */ 4002 if (float32_is_quiet_nan(f, s)) { 4003 return float32_default_nan(s); 4004 } 4005 4006 /* +-normal, +-subnormal */ 4007 uint64_t val = frec7(f, exp_size, frac_size, s); 4008 return make_float32(val); 4009 } 4010 4011 static float64 frec7_d(float64 f, float_status *s) 4012 { 4013 int exp_size = 11, frac_size = 52; 4014 bool sign = float64_is_neg(f); 4015 4016 /* frec7(+-inf) = +-0 */ 4017 if (float64_is_infinity(f)) { 4018 return float64_set_sign(float64_zero, sign); 4019 } 4020 4021 /* frec7(+-0) = +-inf */ 4022 if (float64_is_zero(f)) { 4023 s->float_exception_flags |= float_flag_divbyzero; 4024 return float64_set_sign(float64_infinity, sign); 4025 } 4026 4027 /* frec7(sNaN) = canonical NaN */ 4028 if (float64_is_signaling_nan(f, s)) { 4029 s->float_exception_flags |= float_flag_invalid; 4030 return float64_default_nan(s); 4031 } 4032 4033 /* frec7(qNaN) = canonical NaN */ 4034 if (float64_is_quiet_nan(f, s)) { 4035 return float64_default_nan(s); 4036 } 4037 4038 /* +-normal, +-subnormal */ 4039 uint64_t val = frec7(f, exp_size, frac_size, s); 4040 return make_float64(val); 4041 } 4042 4043 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4044 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4045 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4046 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4047 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4048 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4049 4050 /* Vector Floating-Point MIN/MAX Instructions */ 4051 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4052 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4053 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4054 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4055 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4056 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4057 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4058 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4059 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4060 GEN_VEXT_VF(vfmin_vf_h, 2) 4061 GEN_VEXT_VF(vfmin_vf_w, 4) 4062 GEN_VEXT_VF(vfmin_vf_d, 8) 4063 4064 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4065 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4066 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4067 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4068 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4069 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4070 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4071 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4072 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4073 GEN_VEXT_VF(vfmax_vf_h, 2) 4074 GEN_VEXT_VF(vfmax_vf_w, 4) 4075 GEN_VEXT_VF(vfmax_vf_d, 8) 4076 4077 /* Vector Floating-Point Sign-Injection Instructions */ 4078 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4079 { 4080 return deposit64(b, 0, 15, a); 4081 } 4082 4083 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4084 { 4085 return deposit64(b, 0, 31, a); 4086 } 4087 4088 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4089 { 4090 return deposit64(b, 0, 63, a); 4091 } 4092 4093 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4094 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4095 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4096 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4097 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4098 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4099 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4100 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4101 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4102 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4103 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4104 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4105 4106 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4107 { 4108 return deposit64(~b, 0, 15, a); 4109 } 4110 4111 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4112 { 4113 return deposit64(~b, 0, 31, a); 4114 } 4115 4116 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4117 { 4118 return deposit64(~b, 0, 63, a); 4119 } 4120 4121 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4122 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4123 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4124 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4125 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4126 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4127 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4128 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4129 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4130 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4131 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4132 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4133 4134 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4135 { 4136 return deposit64(b ^ a, 0, 15, a); 4137 } 4138 4139 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4140 { 4141 return deposit64(b ^ a, 0, 31, a); 4142 } 4143 4144 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4145 { 4146 return deposit64(b ^ a, 0, 63, a); 4147 } 4148 4149 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4150 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4151 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4152 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4153 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4154 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4155 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4156 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4157 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4158 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4159 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4160 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4161 4162 /* Vector Floating-Point Compare Instructions */ 4163 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4164 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4165 CPURISCVState *env, uint32_t desc) \ 4166 { \ 4167 uint32_t vm = vext_vm(desc); \ 4168 uint32_t vl = env->vl; \ 4169 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4170 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4171 uint32_t vma = vext_vma(desc); \ 4172 uint32_t i; \ 4173 \ 4174 for (i = env->vstart; i < vl; i++) { \ 4175 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4177 if (!vm && !vext_elem_mask(v0, i)) { \ 4178 /* set masked-off elements to 1s */ \ 4179 if (vma) { \ 4180 vext_set_elem_mask(vd, i, 1); \ 4181 } \ 4182 continue; \ 4183 } \ 4184 vext_set_elem_mask(vd, i, \ 4185 DO_OP(s2, s1, &env->fp_status)); \ 4186 } \ 4187 env->vstart = 0; \ 4188 /* 4189 * mask destination register are always tail-agnostic 4190 * set tail elements to 1s 4191 */ \ 4192 if (vta_all_1s) { \ 4193 for (; i < total_elems; i++) { \ 4194 vext_set_elem_mask(vd, i, 1); \ 4195 } \ 4196 } \ 4197 } 4198 4199 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4200 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4201 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4202 4203 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4204 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4205 CPURISCVState *env, uint32_t desc) \ 4206 { \ 4207 uint32_t vm = vext_vm(desc); \ 4208 uint32_t vl = env->vl; \ 4209 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4210 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4211 uint32_t vma = vext_vma(desc); \ 4212 uint32_t i; \ 4213 \ 4214 for (i = env->vstart; i < vl; i++) { \ 4215 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4216 if (!vm && !vext_elem_mask(v0, i)) { \ 4217 /* set masked-off elements to 1s */ \ 4218 if (vma) { \ 4219 vext_set_elem_mask(vd, i, 1); \ 4220 } \ 4221 continue; \ 4222 } \ 4223 vext_set_elem_mask(vd, i, \ 4224 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4225 } \ 4226 env->vstart = 0; \ 4227 /* 4228 * mask destination register are always tail-agnostic 4229 * set tail elements to 1s 4230 */ \ 4231 if (vta_all_1s) { \ 4232 for (; i < total_elems; i++) { \ 4233 vext_set_elem_mask(vd, i, 1); \ 4234 } \ 4235 } \ 4236 } 4237 4238 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4239 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4240 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4241 4242 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4243 { 4244 FloatRelation compare = float16_compare_quiet(a, b, s); 4245 return compare != float_relation_equal; 4246 } 4247 4248 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4249 { 4250 FloatRelation compare = float32_compare_quiet(a, b, s); 4251 return compare != float_relation_equal; 4252 } 4253 4254 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4255 { 4256 FloatRelation compare = float64_compare_quiet(a, b, s); 4257 return compare != float_relation_equal; 4258 } 4259 4260 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4261 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4262 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4263 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4264 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4265 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4266 4267 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4268 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4269 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4270 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4271 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4272 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4273 4274 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4275 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4276 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4277 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4278 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4279 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4280 4281 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4282 { 4283 FloatRelation compare = float16_compare(a, b, s); 4284 return compare == float_relation_greater; 4285 } 4286 4287 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4288 { 4289 FloatRelation compare = float32_compare(a, b, s); 4290 return compare == float_relation_greater; 4291 } 4292 4293 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4294 { 4295 FloatRelation compare = float64_compare(a, b, s); 4296 return compare == float_relation_greater; 4297 } 4298 4299 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4300 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4301 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4302 4303 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4304 { 4305 FloatRelation compare = float16_compare(a, b, s); 4306 return compare == float_relation_greater || 4307 compare == float_relation_equal; 4308 } 4309 4310 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4311 { 4312 FloatRelation compare = float32_compare(a, b, s); 4313 return compare == float_relation_greater || 4314 compare == float_relation_equal; 4315 } 4316 4317 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4318 { 4319 FloatRelation compare = float64_compare(a, b, s); 4320 return compare == float_relation_greater || 4321 compare == float_relation_equal; 4322 } 4323 4324 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4325 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4326 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4327 4328 /* Vector Floating-Point Classify Instruction */ 4329 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4330 static void do_##NAME(void *vd, void *vs2, int i) \ 4331 { \ 4332 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4333 *((TD *)vd + HD(i)) = OP(s2); \ 4334 } 4335 4336 #define GEN_VEXT_V(NAME, ESZ) \ 4337 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4338 CPURISCVState *env, uint32_t desc) \ 4339 { \ 4340 uint32_t vm = vext_vm(desc); \ 4341 uint32_t vl = env->vl; \ 4342 uint32_t total_elems = \ 4343 vext_get_total_elems(env, desc, ESZ); \ 4344 uint32_t vta = vext_vta(desc); \ 4345 uint32_t vma = vext_vma(desc); \ 4346 uint32_t i; \ 4347 \ 4348 for (i = env->vstart; i < vl; i++) { \ 4349 if (!vm && !vext_elem_mask(v0, i)) { \ 4350 /* set masked-off elements to 1s */ \ 4351 vext_set_elems_1s(vd, vma, i * ESZ, \ 4352 (i + 1) * ESZ); \ 4353 continue; \ 4354 } \ 4355 do_##NAME(vd, vs2, i); \ 4356 } \ 4357 env->vstart = 0; \ 4358 /* set tail elements to 1s */ \ 4359 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4360 total_elems * ESZ); \ 4361 } 4362 4363 target_ulong fclass_h(uint64_t frs1) 4364 { 4365 float16 f = frs1; 4366 bool sign = float16_is_neg(f); 4367 4368 if (float16_is_infinity(f)) { 4369 return sign ? 1 << 0 : 1 << 7; 4370 } else if (float16_is_zero(f)) { 4371 return sign ? 1 << 3 : 1 << 4; 4372 } else if (float16_is_zero_or_denormal(f)) { 4373 return sign ? 1 << 2 : 1 << 5; 4374 } else if (float16_is_any_nan(f)) { 4375 float_status s = { }; /* for snan_bit_is_one */ 4376 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4377 } else { 4378 return sign ? 1 << 1 : 1 << 6; 4379 } 4380 } 4381 4382 target_ulong fclass_s(uint64_t frs1) 4383 { 4384 float32 f = frs1; 4385 bool sign = float32_is_neg(f); 4386 4387 if (float32_is_infinity(f)) { 4388 return sign ? 1 << 0 : 1 << 7; 4389 } else if (float32_is_zero(f)) { 4390 return sign ? 1 << 3 : 1 << 4; 4391 } else if (float32_is_zero_or_denormal(f)) { 4392 return sign ? 1 << 2 : 1 << 5; 4393 } else if (float32_is_any_nan(f)) { 4394 float_status s = { }; /* for snan_bit_is_one */ 4395 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4396 } else { 4397 return sign ? 1 << 1 : 1 << 6; 4398 } 4399 } 4400 4401 target_ulong fclass_d(uint64_t frs1) 4402 { 4403 float64 f = frs1; 4404 bool sign = float64_is_neg(f); 4405 4406 if (float64_is_infinity(f)) { 4407 return sign ? 1 << 0 : 1 << 7; 4408 } else if (float64_is_zero(f)) { 4409 return sign ? 1 << 3 : 1 << 4; 4410 } else if (float64_is_zero_or_denormal(f)) { 4411 return sign ? 1 << 2 : 1 << 5; 4412 } else if (float64_is_any_nan(f)) { 4413 float_status s = { }; /* for snan_bit_is_one */ 4414 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4415 } else { 4416 return sign ? 1 << 1 : 1 << 6; 4417 } 4418 } 4419 4420 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4421 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4422 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4423 GEN_VEXT_V(vfclass_v_h, 2) 4424 GEN_VEXT_V(vfclass_v_w, 4) 4425 GEN_VEXT_V(vfclass_v_d, 8) 4426 4427 /* Vector Floating-Point Merge Instruction */ 4428 4429 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4430 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4431 CPURISCVState *env, uint32_t desc) \ 4432 { \ 4433 uint32_t vm = vext_vm(desc); \ 4434 uint32_t vl = env->vl; \ 4435 uint32_t esz = sizeof(ETYPE); \ 4436 uint32_t total_elems = \ 4437 vext_get_total_elems(env, desc, esz); \ 4438 uint32_t vta = vext_vta(desc); \ 4439 uint32_t i; \ 4440 \ 4441 for (i = env->vstart; i < vl; i++) { \ 4442 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4443 *((ETYPE *)vd + H(i)) = \ 4444 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4445 } \ 4446 env->vstart = 0; \ 4447 /* set tail elements to 1s */ \ 4448 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4449 } 4450 4451 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4452 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4453 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4454 4455 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4456 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4457 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4458 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4459 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4460 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4461 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4462 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4463 4464 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4465 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4466 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4467 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4468 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4469 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4470 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4471 4472 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4473 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4474 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4475 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4476 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4477 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4478 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4479 4480 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4481 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4482 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4483 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4484 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4485 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4486 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4487 4488 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4489 /* (TD, T2, TX2) */ 4490 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4491 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4492 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4493 /* 4494 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4495 */ 4496 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4497 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4498 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4499 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4500 4501 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4502 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4503 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4504 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4505 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4506 4507 /* 4508 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4509 */ 4510 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4511 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4512 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4513 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4514 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4515 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4516 4517 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4518 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4519 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4520 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4521 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4522 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4523 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4524 4525 /* 4526 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4527 */ 4528 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4529 { 4530 return float16_to_float32(a, true, s); 4531 } 4532 4533 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4534 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4535 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4536 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4537 4538 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4539 /* (TD, T2, TX2) */ 4540 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4541 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4542 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4543 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4544 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4545 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4546 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4547 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4548 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4549 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4550 4551 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4552 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4553 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4554 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4555 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4556 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4557 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4558 4559 /* 4560 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4561 */ 4562 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4563 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4564 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4565 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4566 4567 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4568 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4569 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4570 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4571 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4572 4573 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4574 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4575 { 4576 return float32_to_float16(a, true, s); 4577 } 4578 4579 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4580 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4581 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4582 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4583 4584 /* 4585 * Vector Reduction Operations 4586 */ 4587 /* Vector Single-Width Integer Reduction Instructions */ 4588 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4589 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4590 void *vs2, CPURISCVState *env, \ 4591 uint32_t desc) \ 4592 { \ 4593 uint32_t vm = vext_vm(desc); \ 4594 uint32_t vl = env->vl; \ 4595 uint32_t esz = sizeof(TD); \ 4596 uint32_t vlenb = simd_maxsz(desc); \ 4597 uint32_t vta = vext_vta(desc); \ 4598 uint32_t i; \ 4599 TD s1 = *((TD *)vs1 + HD(0)); \ 4600 \ 4601 for (i = env->vstart; i < vl; i++) { \ 4602 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4603 if (!vm && !vext_elem_mask(v0, i)) { \ 4604 continue; \ 4605 } \ 4606 s1 = OP(s1, (TD)s2); \ 4607 } \ 4608 *((TD *)vd + HD(0)) = s1; \ 4609 env->vstart = 0; \ 4610 /* set tail elements to 1s */ \ 4611 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4612 } 4613 4614 /* vd[0] = sum(vs1[0], vs2[*]) */ 4615 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4616 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4617 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4618 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4619 4620 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4621 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4622 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4623 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4624 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4625 4626 /* vd[0] = max(vs1[0], vs2[*]) */ 4627 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4628 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4629 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4630 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4631 4632 /* vd[0] = minu(vs1[0], vs2[*]) */ 4633 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4634 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4635 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4636 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4637 4638 /* vd[0] = min(vs1[0], vs2[*]) */ 4639 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4640 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4641 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4642 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4643 4644 /* vd[0] = and(vs1[0], vs2[*]) */ 4645 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4646 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4647 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4648 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4649 4650 /* vd[0] = or(vs1[0], vs2[*]) */ 4651 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4652 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4653 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4654 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4655 4656 /* vd[0] = xor(vs1[0], vs2[*]) */ 4657 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4658 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4659 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4660 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4661 4662 /* Vector Widening Integer Reduction Instructions */ 4663 /* signed sum reduction into double-width accumulator */ 4664 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4665 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4666 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4667 4668 /* Unsigned sum reduction into double-width accumulator */ 4669 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4670 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4671 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4672 4673 /* Vector Single-Width Floating-Point Reduction Instructions */ 4674 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4675 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4676 void *vs2, CPURISCVState *env, \ 4677 uint32_t desc) \ 4678 { \ 4679 uint32_t vm = vext_vm(desc); \ 4680 uint32_t vl = env->vl; \ 4681 uint32_t esz = sizeof(TD); \ 4682 uint32_t vlenb = simd_maxsz(desc); \ 4683 uint32_t vta = vext_vta(desc); \ 4684 uint32_t i; \ 4685 TD s1 = *((TD *)vs1 + HD(0)); \ 4686 \ 4687 for (i = env->vstart; i < vl; i++) { \ 4688 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4689 if (!vm && !vext_elem_mask(v0, i)) { \ 4690 continue; \ 4691 } \ 4692 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4693 } \ 4694 *((TD *)vd + HD(0)) = s1; \ 4695 env->vstart = 0; \ 4696 /* set tail elements to 1s */ \ 4697 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4698 } 4699 4700 /* Unordered sum */ 4701 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4702 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4703 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4704 4705 /* Ordered sum */ 4706 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4707 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4708 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4709 4710 /* Maximum value */ 4711 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4712 float16_maximum_number) 4713 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4714 float32_maximum_number) 4715 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4716 float64_maximum_number) 4717 4718 /* Minimum value */ 4719 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4720 float16_minimum_number) 4721 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4722 float32_minimum_number) 4723 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4724 float64_minimum_number) 4725 4726 /* Vector Widening Floating-Point Add Instructions */ 4727 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4728 { 4729 return float32_add(a, float16_to_float32(b, true, s), s); 4730 } 4731 4732 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4733 { 4734 return float64_add(a, float32_to_float64(b, s), s); 4735 } 4736 4737 /* Vector Widening Floating-Point Reduction Instructions */ 4738 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4739 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4740 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4741 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4742 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4743 4744 /* 4745 * Vector Mask Operations 4746 */ 4747 /* Vector Mask-Register Logical Instructions */ 4748 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4749 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4750 void *vs2, CPURISCVState *env, \ 4751 uint32_t desc) \ 4752 { \ 4753 uint32_t vl = env->vl; \ 4754 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4755 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4756 uint32_t i; \ 4757 int a, b; \ 4758 \ 4759 for (i = env->vstart; i < vl; i++) { \ 4760 a = vext_elem_mask(vs1, i); \ 4761 b = vext_elem_mask(vs2, i); \ 4762 vext_set_elem_mask(vd, i, OP(b, a)); \ 4763 } \ 4764 env->vstart = 0; \ 4765 /* 4766 * mask destination register are always tail-agnostic 4767 * set tail elements to 1s 4768 */ \ 4769 if (vta_all_1s) { \ 4770 for (; i < total_elems; i++) { \ 4771 vext_set_elem_mask(vd, i, 1); \ 4772 } \ 4773 } \ 4774 } 4775 4776 #define DO_NAND(N, M) (!(N & M)) 4777 #define DO_ANDNOT(N, M) (N & !M) 4778 #define DO_NOR(N, M) (!(N | M)) 4779 #define DO_ORNOT(N, M) (N | !M) 4780 #define DO_XNOR(N, M) (!(N ^ M)) 4781 4782 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4783 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4784 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4785 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4786 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4787 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4788 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4789 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4790 4791 /* Vector count population in mask vcpop */ 4792 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4793 uint32_t desc) 4794 { 4795 target_ulong cnt = 0; 4796 uint32_t vm = vext_vm(desc); 4797 uint32_t vl = env->vl; 4798 int i; 4799 4800 for (i = env->vstart; i < vl; i++) { 4801 if (vm || vext_elem_mask(v0, i)) { 4802 if (vext_elem_mask(vs2, i)) { 4803 cnt++; 4804 } 4805 } 4806 } 4807 env->vstart = 0; 4808 return cnt; 4809 } 4810 4811 /* vfirst find-first-set mask bit */ 4812 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4813 uint32_t desc) 4814 { 4815 uint32_t vm = vext_vm(desc); 4816 uint32_t vl = env->vl; 4817 int i; 4818 4819 for (i = env->vstart; i < vl; i++) { 4820 if (vm || vext_elem_mask(v0, i)) { 4821 if (vext_elem_mask(vs2, i)) { 4822 return i; 4823 } 4824 } 4825 } 4826 env->vstart = 0; 4827 return -1LL; 4828 } 4829 4830 enum set_mask_type { 4831 ONLY_FIRST = 1, 4832 INCLUDE_FIRST, 4833 BEFORE_FIRST, 4834 }; 4835 4836 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4837 uint32_t desc, enum set_mask_type type) 4838 { 4839 uint32_t vm = vext_vm(desc); 4840 uint32_t vl = env->vl; 4841 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4842 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4843 uint32_t vma = vext_vma(desc); 4844 int i; 4845 bool first_mask_bit = false; 4846 4847 for (i = env->vstart; i < vl; i++) { 4848 if (!vm && !vext_elem_mask(v0, i)) { 4849 /* set masked-off elements to 1s */ 4850 if (vma) { 4851 vext_set_elem_mask(vd, i, 1); 4852 } 4853 continue; 4854 } 4855 /* write a zero to all following active elements */ 4856 if (first_mask_bit) { 4857 vext_set_elem_mask(vd, i, 0); 4858 continue; 4859 } 4860 if (vext_elem_mask(vs2, i)) { 4861 first_mask_bit = true; 4862 if (type == BEFORE_FIRST) { 4863 vext_set_elem_mask(vd, i, 0); 4864 } else { 4865 vext_set_elem_mask(vd, i, 1); 4866 } 4867 } else { 4868 if (type == ONLY_FIRST) { 4869 vext_set_elem_mask(vd, i, 0); 4870 } else { 4871 vext_set_elem_mask(vd, i, 1); 4872 } 4873 } 4874 } 4875 env->vstart = 0; 4876 /* 4877 * mask destination register are always tail-agnostic 4878 * set tail elements to 1s 4879 */ 4880 if (vta_all_1s) { 4881 for (; i < total_elems; i++) { 4882 vext_set_elem_mask(vd, i, 1); 4883 } 4884 } 4885 } 4886 4887 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4888 uint32_t desc) 4889 { 4890 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4891 } 4892 4893 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4894 uint32_t desc) 4895 { 4896 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4897 } 4898 4899 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4900 uint32_t desc) 4901 { 4902 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4903 } 4904 4905 /* Vector Iota Instruction */ 4906 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4907 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4908 uint32_t desc) \ 4909 { \ 4910 uint32_t vm = vext_vm(desc); \ 4911 uint32_t vl = env->vl; \ 4912 uint32_t esz = sizeof(ETYPE); \ 4913 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4914 uint32_t vta = vext_vta(desc); \ 4915 uint32_t vma = vext_vma(desc); \ 4916 uint32_t sum = 0; \ 4917 int i; \ 4918 \ 4919 for (i = env->vstart; i < vl; i++) { \ 4920 if (!vm && !vext_elem_mask(v0, i)) { \ 4921 /* set masked-off elements to 1s */ \ 4922 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4923 continue; \ 4924 } \ 4925 *((ETYPE *)vd + H(i)) = sum; \ 4926 if (vext_elem_mask(vs2, i)) { \ 4927 sum++; \ 4928 } \ 4929 } \ 4930 env->vstart = 0; \ 4931 /* set tail elements to 1s */ \ 4932 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4933 } 4934 4935 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4936 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4937 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4938 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4939 4940 /* Vector Element Index Instruction */ 4941 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4942 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4943 { \ 4944 uint32_t vm = vext_vm(desc); \ 4945 uint32_t vl = env->vl; \ 4946 uint32_t esz = sizeof(ETYPE); \ 4947 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4948 uint32_t vta = vext_vta(desc); \ 4949 uint32_t vma = vext_vma(desc); \ 4950 int i; \ 4951 \ 4952 for (i = env->vstart; i < vl; i++) { \ 4953 if (!vm && !vext_elem_mask(v0, i)) { \ 4954 /* set masked-off elements to 1s */ \ 4955 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4956 continue; \ 4957 } \ 4958 *((ETYPE *)vd + H(i)) = i; \ 4959 } \ 4960 env->vstart = 0; \ 4961 /* set tail elements to 1s */ \ 4962 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4963 } 4964 4965 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4966 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4967 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4968 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4969 4970 /* 4971 * Vector Permutation Instructions 4972 */ 4973 4974 /* Vector Slide Instructions */ 4975 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4976 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4977 CPURISCVState *env, uint32_t desc) \ 4978 { \ 4979 uint32_t vm = vext_vm(desc); \ 4980 uint32_t vl = env->vl; \ 4981 uint32_t esz = sizeof(ETYPE); \ 4982 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4983 uint32_t vta = vext_vta(desc); \ 4984 uint32_t vma = vext_vma(desc); \ 4985 target_ulong offset = s1, i_min, i; \ 4986 \ 4987 i_min = MAX(env->vstart, offset); \ 4988 for (i = i_min; i < vl; i++) { \ 4989 if (!vm && !vext_elem_mask(v0, i)) { \ 4990 /* set masked-off elements to 1s */ \ 4991 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4992 continue; \ 4993 } \ 4994 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4995 } \ 4996 /* set tail elements to 1s */ \ 4997 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4998 } 4999 5000 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5001 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5002 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5003 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5004 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5005 5006 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5007 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5008 CPURISCVState *env, uint32_t desc) \ 5009 { \ 5010 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5011 uint32_t vm = vext_vm(desc); \ 5012 uint32_t vl = env->vl; \ 5013 uint32_t esz = sizeof(ETYPE); \ 5014 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5015 uint32_t vta = vext_vta(desc); \ 5016 uint32_t vma = vext_vma(desc); \ 5017 target_ulong i_max, i; \ 5018 \ 5019 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5020 for (i = env->vstart; i < i_max; ++i) { \ 5021 if (!vm && !vext_elem_mask(v0, i)) { \ 5022 /* set masked-off elements to 1s */ \ 5023 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5024 continue; \ 5025 } \ 5026 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5027 } \ 5028 \ 5029 for (i = i_max; i < vl; ++i) { \ 5030 if (vm || vext_elem_mask(v0, i)) { \ 5031 *((ETYPE *)vd + H(i)) = 0; \ 5032 } \ 5033 } \ 5034 \ 5035 env->vstart = 0; \ 5036 /* set tail elements to 1s */ \ 5037 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5038 } 5039 5040 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5041 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5042 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5043 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5044 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5045 5046 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5047 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5048 void *vs2, CPURISCVState *env, \ 5049 uint32_t desc) \ 5050 { \ 5051 typedef uint##BITWIDTH##_t ETYPE; \ 5052 uint32_t vm = vext_vm(desc); \ 5053 uint32_t vl = env->vl; \ 5054 uint32_t esz = sizeof(ETYPE); \ 5055 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5056 uint32_t vta = vext_vta(desc); \ 5057 uint32_t vma = vext_vma(desc); \ 5058 uint32_t i; \ 5059 \ 5060 for (i = env->vstart; i < vl; i++) { \ 5061 if (!vm && !vext_elem_mask(v0, i)) { \ 5062 /* set masked-off elements to 1s */ \ 5063 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5064 continue; \ 5065 } \ 5066 if (i == 0) { \ 5067 *((ETYPE *)vd + H(i)) = s1; \ 5068 } else { \ 5069 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5070 } \ 5071 } \ 5072 env->vstart = 0; \ 5073 /* set tail elements to 1s */ \ 5074 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5075 } 5076 5077 GEN_VEXT_VSLIE1UP(8, H1) 5078 GEN_VEXT_VSLIE1UP(16, H2) 5079 GEN_VEXT_VSLIE1UP(32, H4) 5080 GEN_VEXT_VSLIE1UP(64, H8) 5081 5082 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5083 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5084 CPURISCVState *env, uint32_t desc) \ 5085 { \ 5086 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5087 } 5088 5089 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5090 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5091 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5092 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5093 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5094 5095 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5096 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5097 void *vs2, CPURISCVState *env, \ 5098 uint32_t desc) \ 5099 { \ 5100 typedef uint##BITWIDTH##_t ETYPE; \ 5101 uint32_t vm = vext_vm(desc); \ 5102 uint32_t vl = env->vl; \ 5103 uint32_t esz = sizeof(ETYPE); \ 5104 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5105 uint32_t vta = vext_vta(desc); \ 5106 uint32_t vma = vext_vma(desc); \ 5107 uint32_t i; \ 5108 \ 5109 for (i = env->vstart; i < vl; i++) { \ 5110 if (!vm && !vext_elem_mask(v0, i)) { \ 5111 /* set masked-off elements to 1s */ \ 5112 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5113 continue; \ 5114 } \ 5115 if (i == vl - 1) { \ 5116 *((ETYPE *)vd + H(i)) = s1; \ 5117 } else { \ 5118 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5119 } \ 5120 } \ 5121 env->vstart = 0; \ 5122 /* set tail elements to 1s */ \ 5123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5124 } 5125 5126 GEN_VEXT_VSLIDE1DOWN(8, H1) 5127 GEN_VEXT_VSLIDE1DOWN(16, H2) 5128 GEN_VEXT_VSLIDE1DOWN(32, H4) 5129 GEN_VEXT_VSLIDE1DOWN(64, H8) 5130 5131 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5132 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5133 CPURISCVState *env, uint32_t desc) \ 5134 { \ 5135 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5136 } 5137 5138 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5139 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5140 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5141 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5142 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5143 5144 /* Vector Floating-Point Slide Instructions */ 5145 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5146 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5147 CPURISCVState *env, uint32_t desc) \ 5148 { \ 5149 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5150 } 5151 5152 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5153 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5154 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5155 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5156 5157 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5158 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5159 CPURISCVState *env, uint32_t desc) \ 5160 { \ 5161 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5162 } 5163 5164 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5165 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5166 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5167 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5168 5169 /* Vector Register Gather Instruction */ 5170 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5171 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5172 CPURISCVState *env, uint32_t desc) \ 5173 { \ 5174 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5175 uint32_t vm = vext_vm(desc); \ 5176 uint32_t vl = env->vl; \ 5177 uint32_t esz = sizeof(TS2); \ 5178 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5179 uint32_t vta = vext_vta(desc); \ 5180 uint32_t vma = vext_vma(desc); \ 5181 uint64_t index; \ 5182 uint32_t i; \ 5183 \ 5184 for (i = env->vstart; i < vl; i++) { \ 5185 if (!vm && !vext_elem_mask(v0, i)) { \ 5186 /* set masked-off elements to 1s */ \ 5187 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5188 continue; \ 5189 } \ 5190 index = *((TS1 *)vs1 + HS1(i)); \ 5191 if (index >= vlmax) { \ 5192 *((TS2 *)vd + HS2(i)) = 0; \ 5193 } else { \ 5194 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5195 } \ 5196 } \ 5197 env->vstart = 0; \ 5198 /* set tail elements to 1s */ \ 5199 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5200 } 5201 5202 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5203 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5204 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5205 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5206 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5207 5208 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5209 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5210 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5211 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5212 5213 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5214 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5215 CPURISCVState *env, uint32_t desc) \ 5216 { \ 5217 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5218 uint32_t vm = vext_vm(desc); \ 5219 uint32_t vl = env->vl; \ 5220 uint32_t esz = sizeof(ETYPE); \ 5221 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5222 uint32_t vta = vext_vta(desc); \ 5223 uint32_t vma = vext_vma(desc); \ 5224 uint64_t index = s1; \ 5225 uint32_t i; \ 5226 \ 5227 for (i = env->vstart; i < vl; i++) { \ 5228 if (!vm && !vext_elem_mask(v0, i)) { \ 5229 /* set masked-off elements to 1s */ \ 5230 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5231 continue; \ 5232 } \ 5233 if (index >= vlmax) { \ 5234 *((ETYPE *)vd + H(i)) = 0; \ 5235 } else { \ 5236 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5237 } \ 5238 } \ 5239 env->vstart = 0; \ 5240 /* set tail elements to 1s */ \ 5241 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5242 } 5243 5244 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5245 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5246 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5247 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5248 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5249 5250 /* Vector Compress Instruction */ 5251 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5252 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5253 CPURISCVState *env, uint32_t desc) \ 5254 { \ 5255 uint32_t vl = env->vl; \ 5256 uint32_t esz = sizeof(ETYPE); \ 5257 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5258 uint32_t vta = vext_vta(desc); \ 5259 uint32_t num = 0, i; \ 5260 \ 5261 for (i = env->vstart; i < vl; i++) { \ 5262 if (!vext_elem_mask(vs1, i)) { \ 5263 continue; \ 5264 } \ 5265 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5266 num++; \ 5267 } \ 5268 env->vstart = 0; \ 5269 /* set tail elements to 1s */ \ 5270 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5271 } 5272 5273 /* Compress into vd elements of vs2 where vs1 is enabled */ 5274 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5275 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5276 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5277 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5278 5279 /* Vector Whole Register Move */ 5280 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5281 { 5282 /* EEW = SEW */ 5283 uint32_t maxsz = simd_maxsz(desc); 5284 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5285 uint32_t startb = env->vstart * sewb; 5286 uint32_t i = startb; 5287 5288 memcpy((uint8_t *)vd + H1(i), 5289 (uint8_t *)vs2 + H1(i), 5290 maxsz - startb); 5291 5292 env->vstart = 0; 5293 } 5294 5295 /* Vector Integer Extension */ 5296 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5297 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5298 CPURISCVState *env, uint32_t desc) \ 5299 { \ 5300 uint32_t vl = env->vl; \ 5301 uint32_t vm = vext_vm(desc); \ 5302 uint32_t esz = sizeof(ETYPE); \ 5303 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5304 uint32_t vta = vext_vta(desc); \ 5305 uint32_t vma = vext_vma(desc); \ 5306 uint32_t i; \ 5307 \ 5308 for (i = env->vstart; i < vl; i++) { \ 5309 if (!vm && !vext_elem_mask(v0, i)) { \ 5310 /* set masked-off elements to 1s */ \ 5311 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5312 continue; \ 5313 } \ 5314 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5315 } \ 5316 env->vstart = 0; \ 5317 /* set tail elements to 1s */ \ 5318 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5319 } 5320 5321 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5322 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5323 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5324 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5325 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5326 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5327 5328 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5329 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5330 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5331 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5332 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5333 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5334