1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL - check LMUL * VLEN >= SEW */ 47 if (lmul == 4 || 48 cpu->cfg.vlen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vill = 1; 56 env->vtype = 0; 57 env->vl = 0; 58 env->vstart = 0; 59 return 0; 60 } 61 62 vlmax = vext_get_vlmax(cpu, s2); 63 if (s1 <= vlmax) { 64 vl = s1; 65 } else { 66 vl = vlmax; 67 } 68 env->vl = vl; 69 env->vtype = s2; 70 env->vstart = 0; 71 env->vill = 0; 72 return vl; 73 } 74 75 /* 76 * Note that vector data is stored in host-endian 64-bit chunks, 77 * so addressing units smaller than that needs a host-endian fixup. 78 */ 79 #if HOST_BIG_ENDIAN 80 #define H1(x) ((x) ^ 7) 81 #define H1_2(x) ((x) ^ 6) 82 #define H1_4(x) ((x) ^ 4) 83 #define H2(x) ((x) ^ 3) 84 #define H4(x) ((x) ^ 1) 85 #define H8(x) ((x)) 86 #else 87 #define H1(x) (x) 88 #define H1_2(x) (x) 89 #define H1_4(x) (x) 90 #define H2(x) (x) 91 #define H4(x) (x) 92 #define H8(x) (x) 93 #endif 94 95 static inline uint32_t vext_nf(uint32_t desc) 96 { 97 return FIELD_EX32(simd_data(desc), VDATA, NF); 98 } 99 100 static inline uint32_t vext_vm(uint32_t desc) 101 { 102 return FIELD_EX32(simd_data(desc), VDATA, VM); 103 } 104 105 /* 106 * Encode LMUL to lmul as following: 107 * LMUL vlmul lmul 108 * 1 000 0 109 * 2 001 1 110 * 4 010 2 111 * 8 011 3 112 * - 100 - 113 * 1/8 101 -3 114 * 1/4 110 -2 115 * 1/2 111 -1 116 */ 117 static inline int32_t vext_lmul(uint32_t desc) 118 { 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 120 } 121 122 static inline uint32_t vext_vta(uint32_t desc) 123 { 124 return FIELD_EX32(simd_data(desc), VDATA, VTA); 125 } 126 127 static inline uint32_t vext_vma(uint32_t desc) 128 { 129 return FIELD_EX32(simd_data(desc), VDATA, VMA); 130 } 131 132 static inline uint32_t vext_vta_all_1s(uint32_t desc) 133 { 134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 135 } 136 137 /* 138 * Get the maximum number of elements can be operated. 139 * 140 * log2_esz: log2 of element size in bytes. 141 */ 142 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 143 { 144 /* 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 146 * so vlen in bytes (vlenb) is encoded as maxsz. 147 */ 148 uint32_t vlenb = simd_maxsz(desc); 149 150 /* Return VLMAX */ 151 int scale = vext_lmul(desc) - log2_esz; 152 return scale < 0 ? vlenb >> -scale : vlenb << scale; 153 } 154 155 /* 156 * Get number of total elements, including prestart, body and tail elements. 157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 158 * are held in the same vector register. 159 */ 160 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 161 uint32_t esz) 162 { 163 uint32_t vlenb = simd_maxsz(desc); 164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 166 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 167 return (vlenb << emul) / esz; 168 } 169 170 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 171 { 172 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 173 } 174 175 /* 176 * This function checks watchpoint before real load operation. 177 * 178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 179 * In user mode, there is no watchpoint support now. 180 * 181 * It will trigger an exception if there is no mapping in TLB 182 * and page table walk can't fill the TLB entry. Then the guest 183 * software can return here after process the exception or never return. 184 */ 185 static void probe_pages(CPURISCVState *env, target_ulong addr, 186 target_ulong len, uintptr_t ra, 187 MMUAccessType access_type) 188 { 189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 190 target_ulong curlen = MIN(pagelen, len); 191 192 probe_access(env, adjust_addr(env, addr), curlen, access_type, 193 cpu_mmu_index(env, false), ra); 194 if (len > curlen) { 195 addr += curlen; 196 curlen = len - curlen; 197 probe_access(env, adjust_addr(env, addr), curlen, access_type, 198 cpu_mmu_index(env, false), ra); 199 } 200 } 201 202 /* set agnostic elements to 1s */ 203 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 204 uint32_t tot) 205 { 206 if (is_agnostic == 0) { 207 /* policy undisturbed */ 208 return; 209 } 210 if (tot - cnt == 0) { 211 return; 212 } 213 memset(base + cnt, -1, tot - cnt); 214 } 215 216 static inline void vext_set_elem_mask(void *v0, int index, 217 uint8_t value) 218 { 219 int idx = index / 64; 220 int pos = index % 64; 221 uint64_t old = ((uint64_t *)v0)[idx]; 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 223 } 224 225 /* 226 * Earlier designs (pre-0.9) had a varying number of bits 227 * per mask value (MLEN). In the 0.9 design, MLEN=1. 228 * (Section 4.5) 229 */ 230 static inline int vext_elem_mask(void *v0, int index) 231 { 232 int idx = index / 64; 233 int pos = index % 64; 234 return (((uint64_t *)v0)[idx] >> pos) & 1; 235 } 236 237 /* elements operations for load and store */ 238 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 239 uint32_t idx, void *vd, uintptr_t retaddr); 240 241 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 242 static void NAME(CPURISCVState *env, abi_ptr addr, \ 243 uint32_t idx, void *vd, uintptr_t retaddr)\ 244 { \ 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 247 } \ 248 249 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 250 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 251 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 252 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 253 254 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 255 static void NAME(CPURISCVState *env, abi_ptr addr, \ 256 uint32_t idx, void *vd, uintptr_t retaddr)\ 257 { \ 258 ETYPE data = *((ETYPE *)vd + H(idx)); \ 259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 260 } 261 262 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 263 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 264 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 265 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 266 267 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 268 uint32_t desc, uint32_t nf, 269 uint32_t esz, uint32_t max_elems) 270 { 271 uint32_t vta = vext_vta(desc); 272 int k; 273 274 if (vta == 0) { 275 return; 276 } 277 278 for (k = 0; k < nf; ++k) { 279 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 280 (k * max_elems + max_elems) * esz); 281 } 282 } 283 284 /* 285 * stride: access vector element from strided memory 286 */ 287 static void 288 vext_ldst_stride(void *vd, void *v0, target_ulong base, 289 target_ulong stride, CPURISCVState *env, 290 uint32_t desc, uint32_t vm, 291 vext_ldst_elem_fn *ldst_elem, 292 uint32_t log2_esz, uintptr_t ra) 293 { 294 uint32_t i, k; 295 uint32_t nf = vext_nf(desc); 296 uint32_t max_elems = vext_max_elems(desc, log2_esz); 297 uint32_t esz = 1 << log2_esz; 298 uint32_t vma = vext_vma(desc); 299 300 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 301 k = 0; 302 while (k < nf) { 303 if (!vm && !vext_elem_mask(v0, i)) { 304 /* set masked-off elements to 1s */ 305 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 306 (i + k * max_elems + 1) * esz); 307 k++; 308 continue; 309 } 310 target_ulong addr = base + stride * i + (k << log2_esz); 311 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 312 k++; 313 } 314 } 315 env->vstart = 0; 316 317 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 318 } 319 320 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 321 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 322 target_ulong stride, CPURISCVState *env, \ 323 uint32_t desc) \ 324 { \ 325 uint32_t vm = vext_vm(desc); \ 326 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 327 ctzl(sizeof(ETYPE)), GETPC()); \ 328 } 329 330 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 331 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 332 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 333 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 334 335 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 336 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 337 target_ulong stride, CPURISCVState *env, \ 338 uint32_t desc) \ 339 { \ 340 uint32_t vm = vext_vm(desc); \ 341 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 342 ctzl(sizeof(ETYPE)), GETPC()); \ 343 } 344 345 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 346 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 347 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 348 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 349 350 /* 351 * unit-stride: access elements stored contiguously in memory 352 */ 353 354 /* unmasked unit-stride load and store operation */ 355 static void 356 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 357 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 358 uintptr_t ra) 359 { 360 uint32_t i, k; 361 uint32_t nf = vext_nf(desc); 362 uint32_t max_elems = vext_max_elems(desc, log2_esz); 363 uint32_t esz = 1 << log2_esz; 364 365 /* load bytes from guest memory */ 366 for (i = env->vstart; i < evl; i++, env->vstart++) { 367 k = 0; 368 while (k < nf) { 369 target_ulong addr = base + ((i * nf + k) << log2_esz); 370 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 371 k++; 372 } 373 } 374 env->vstart = 0; 375 376 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 377 } 378 379 /* 380 * masked unit-stride load and store operation will be a special case of 381 * stride, stride = NF * sizeof (ETYPE) 382 */ 383 384 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 385 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 386 CPURISCVState *env, uint32_t desc) \ 387 { \ 388 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 389 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 390 ctzl(sizeof(ETYPE)), GETPC()); \ 391 } \ 392 \ 393 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 394 CPURISCVState *env, uint32_t desc) \ 395 { \ 396 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 397 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 398 } 399 400 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 401 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 402 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 403 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 404 405 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 406 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 407 CPURISCVState *env, uint32_t desc) \ 408 { \ 409 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 410 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 411 ctzl(sizeof(ETYPE)), GETPC()); \ 412 } \ 413 \ 414 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 415 CPURISCVState *env, uint32_t desc) \ 416 { \ 417 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 418 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 419 } 420 421 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 422 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 423 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 424 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 425 426 /* 427 * unit stride mask load and store, EEW = 1 428 */ 429 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 430 CPURISCVState *env, uint32_t desc) 431 { 432 /* evl = ceil(vl/8) */ 433 uint8_t evl = (env->vl + 7) >> 3; 434 vext_ldst_us(vd, base, env, desc, lde_b, 435 0, evl, GETPC()); 436 } 437 438 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 439 CPURISCVState *env, uint32_t desc) 440 { 441 /* evl = ceil(vl/8) */ 442 uint8_t evl = (env->vl + 7) >> 3; 443 vext_ldst_us(vd, base, env, desc, ste_b, 444 0, evl, GETPC()); 445 } 446 447 /* 448 * index: access vector element from indexed memory 449 */ 450 typedef target_ulong vext_get_index_addr(target_ulong base, 451 uint32_t idx, void *vs2); 452 453 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 454 static target_ulong NAME(target_ulong base, \ 455 uint32_t idx, void *vs2) \ 456 { \ 457 return (base + *((ETYPE *)vs2 + H(idx))); \ 458 } 459 460 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 461 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 462 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 463 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 464 465 static inline void 466 vext_ldst_index(void *vd, void *v0, target_ulong base, 467 void *vs2, CPURISCVState *env, uint32_t desc, 468 vext_get_index_addr get_index_addr, 469 vext_ldst_elem_fn *ldst_elem, 470 uint32_t log2_esz, uintptr_t ra) 471 { 472 uint32_t i, k; 473 uint32_t nf = vext_nf(desc); 474 uint32_t vm = vext_vm(desc); 475 uint32_t max_elems = vext_max_elems(desc, log2_esz); 476 uint32_t esz = 1 << log2_esz; 477 uint32_t vma = vext_vma(desc); 478 479 /* load bytes from guest memory */ 480 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 481 k = 0; 482 while (k < nf) { 483 if (!vm && !vext_elem_mask(v0, i)) { 484 /* set masked-off elements to 1s */ 485 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 486 (i + k * max_elems + 1) * esz); 487 k++; 488 continue; 489 } 490 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 491 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 492 k++; 493 } 494 } 495 env->vstart = 0; 496 497 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 498 } 499 500 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 501 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 502 void *vs2, CPURISCVState *env, uint32_t desc) \ 503 { \ 504 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 505 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 506 } 507 508 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 509 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 510 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 511 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 512 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 513 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 514 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 515 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 516 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 517 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 518 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 519 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 520 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 521 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 522 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 523 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 524 525 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 526 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 527 void *vs2, CPURISCVState *env, uint32_t desc) \ 528 { \ 529 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 530 STORE_FN, ctzl(sizeof(ETYPE)), \ 531 GETPC()); \ 532 } 533 534 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 535 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 536 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 537 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 538 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 539 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 540 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 541 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 542 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 543 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 544 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 545 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 546 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 547 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 548 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 549 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 550 551 /* 552 * unit-stride fault-only-fisrt load instructions 553 */ 554 static inline void 555 vext_ldff(void *vd, void *v0, target_ulong base, 556 CPURISCVState *env, uint32_t desc, 557 vext_ldst_elem_fn *ldst_elem, 558 uint32_t log2_esz, uintptr_t ra) 559 { 560 void *host; 561 uint32_t i, k, vl = 0; 562 uint32_t nf = vext_nf(desc); 563 uint32_t vm = vext_vm(desc); 564 uint32_t max_elems = vext_max_elems(desc, log2_esz); 565 uint32_t esz = 1 << log2_esz; 566 uint32_t vma = vext_vma(desc); 567 target_ulong addr, offset, remain; 568 569 /* probe every access */ 570 for (i = env->vstart; i < env->vl; i++) { 571 if (!vm && !vext_elem_mask(v0, i)) { 572 continue; 573 } 574 addr = adjust_addr(env, base + i * (nf << log2_esz)); 575 if (i == 0) { 576 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 577 } else { 578 /* if it triggers an exception, no need to check watchpoint */ 579 remain = nf << log2_esz; 580 while (remain > 0) { 581 offset = -(addr | TARGET_PAGE_MASK); 582 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 583 cpu_mmu_index(env, false)); 584 if (host) { 585 #ifdef CONFIG_USER_ONLY 586 if (page_check_range(addr, offset, PAGE_READ)) { 587 vl = i; 588 goto ProbeSuccess; 589 } 590 #else 591 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 592 #endif 593 } else { 594 vl = i; 595 goto ProbeSuccess; 596 } 597 if (remain <= offset) { 598 break; 599 } 600 remain -= offset; 601 addr = adjust_addr(env, addr + offset); 602 } 603 } 604 } 605 ProbeSuccess: 606 /* load bytes from guest memory */ 607 if (vl != 0) { 608 env->vl = vl; 609 } 610 for (i = env->vstart; i < env->vl; i++) { 611 k = 0; 612 while (k < nf) { 613 if (!vm && !vext_elem_mask(v0, i)) { 614 /* set masked-off elements to 1s */ 615 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 616 (i + k * max_elems + 1) * esz); 617 k++; 618 continue; 619 } 620 target_ulong addr = base + ((i * nf + k) << log2_esz); 621 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 622 k++; 623 } 624 } 625 env->vstart = 0; 626 627 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 628 } 629 630 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 631 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 632 CPURISCVState *env, uint32_t desc) \ 633 { \ 634 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 635 ctzl(sizeof(ETYPE)), GETPC()); \ 636 } 637 638 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 639 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 640 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 641 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 642 643 #define DO_SWAP(N, M) (M) 644 #define DO_AND(N, M) (N & M) 645 #define DO_XOR(N, M) (N ^ M) 646 #define DO_OR(N, M) (N | M) 647 #define DO_ADD(N, M) (N + M) 648 649 /* Signed min/max */ 650 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 651 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 652 653 /* 654 * load and store whole register instructions 655 */ 656 static void 657 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 658 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 659 { 660 uint32_t i, k, off, pos; 661 uint32_t nf = vext_nf(desc); 662 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 663 uint32_t max_elems = vlenb >> log2_esz; 664 665 k = env->vstart / max_elems; 666 off = env->vstart % max_elems; 667 668 if (off) { 669 /* load/store rest of elements of current segment pointed by vstart */ 670 for (pos = off; pos < max_elems; pos++, env->vstart++) { 671 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 672 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 673 ra); 674 } 675 k++; 676 } 677 678 /* load/store elements for rest of segments */ 679 for (; k < nf; k++) { 680 for (i = 0; i < max_elems; i++, env->vstart++) { 681 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 682 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 683 } 684 } 685 686 env->vstart = 0; 687 } 688 689 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 690 void HELPER(NAME)(void *vd, target_ulong base, \ 691 CPURISCVState *env, uint32_t desc) \ 692 { \ 693 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 694 ctzl(sizeof(ETYPE)), GETPC()); \ 695 } 696 697 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 698 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 699 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 700 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 701 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 702 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 703 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 704 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 705 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 706 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 707 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 708 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 709 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 710 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 711 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 712 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 713 714 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 715 void HELPER(NAME)(void *vd, target_ulong base, \ 716 CPURISCVState *env, uint32_t desc) \ 717 { \ 718 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 719 ctzl(sizeof(ETYPE)), GETPC()); \ 720 } 721 722 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 723 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 724 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 725 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 726 727 /* 728 * Vector Integer Arithmetic Instructions 729 */ 730 731 /* expand macro args before macro */ 732 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 733 734 /* (TD, T1, T2, TX1, TX2) */ 735 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 736 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 737 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 738 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 739 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 740 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 741 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 742 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 743 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 744 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 745 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 746 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 747 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 748 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 749 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 750 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 751 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 752 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 753 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 754 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 755 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 756 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 757 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 758 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 759 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 760 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 761 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 762 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 763 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 764 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 765 766 /* operation of two vector elements */ 767 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 768 769 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 770 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 771 { \ 772 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 773 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 774 *((TD *)vd + HD(i)) = OP(s2, s1); \ 775 } 776 #define DO_SUB(N, M) (N - M) 777 #define DO_RSUB(N, M) (M - N) 778 779 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 780 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 781 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 782 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 783 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 784 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 785 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 786 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 787 788 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 789 CPURISCVState *env, uint32_t desc, 790 opivv2_fn *fn, uint32_t esz) 791 { 792 uint32_t vm = vext_vm(desc); 793 uint32_t vl = env->vl; 794 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 795 uint32_t vta = vext_vta(desc); 796 uint32_t vma = vext_vma(desc); 797 uint32_t i; 798 799 for (i = env->vstart; i < vl; i++) { 800 if (!vm && !vext_elem_mask(v0, i)) { 801 /* set masked-off elements to 1s */ 802 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 803 continue; 804 } 805 fn(vd, vs1, vs2, i); 806 } 807 env->vstart = 0; 808 /* set tail elements to 1s */ 809 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 810 } 811 812 /* generate the helpers for OPIVV */ 813 #define GEN_VEXT_VV(NAME, ESZ) \ 814 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 815 void *vs2, CPURISCVState *env, \ 816 uint32_t desc) \ 817 { \ 818 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 819 do_##NAME, ESZ); \ 820 } 821 822 GEN_VEXT_VV(vadd_vv_b, 1) 823 GEN_VEXT_VV(vadd_vv_h, 2) 824 GEN_VEXT_VV(vadd_vv_w, 4) 825 GEN_VEXT_VV(vadd_vv_d, 8) 826 GEN_VEXT_VV(vsub_vv_b, 1) 827 GEN_VEXT_VV(vsub_vv_h, 2) 828 GEN_VEXT_VV(vsub_vv_w, 4) 829 GEN_VEXT_VV(vsub_vv_d, 8) 830 831 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 832 833 /* 834 * (T1)s1 gives the real operator type. 835 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 836 */ 837 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 838 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 839 { \ 840 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 841 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 842 } 843 844 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 845 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 846 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 847 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 848 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 849 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 850 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 851 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 852 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 853 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 854 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 855 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 856 857 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 858 CPURISCVState *env, uint32_t desc, 859 opivx2_fn fn, uint32_t esz) 860 { 861 uint32_t vm = vext_vm(desc); 862 uint32_t vl = env->vl; 863 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 864 uint32_t vta = vext_vta(desc); 865 uint32_t vma = vext_vma(desc); 866 uint32_t i; 867 868 for (i = env->vstart; i < vl; i++) { 869 if (!vm && !vext_elem_mask(v0, i)) { 870 /* set masked-off elements to 1s */ 871 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 872 continue; 873 } 874 fn(vd, s1, vs2, i); 875 } 876 env->vstart = 0; 877 /* set tail elements to 1s */ 878 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 879 } 880 881 /* generate the helpers for OPIVX */ 882 #define GEN_VEXT_VX(NAME, ESZ) \ 883 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 884 void *vs2, CPURISCVState *env, \ 885 uint32_t desc) \ 886 { \ 887 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 888 do_##NAME, ESZ); \ 889 } 890 891 GEN_VEXT_VX(vadd_vx_b, 1) 892 GEN_VEXT_VX(vadd_vx_h, 2) 893 GEN_VEXT_VX(vadd_vx_w, 4) 894 GEN_VEXT_VX(vadd_vx_d, 8) 895 GEN_VEXT_VX(vsub_vx_b, 1) 896 GEN_VEXT_VX(vsub_vx_h, 2) 897 GEN_VEXT_VX(vsub_vx_w, 4) 898 GEN_VEXT_VX(vsub_vx_d, 8) 899 GEN_VEXT_VX(vrsub_vx_b, 1) 900 GEN_VEXT_VX(vrsub_vx_h, 2) 901 GEN_VEXT_VX(vrsub_vx_w, 4) 902 GEN_VEXT_VX(vrsub_vx_d, 8) 903 904 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 905 { 906 intptr_t oprsz = simd_oprsz(desc); 907 intptr_t i; 908 909 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 910 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 911 } 912 } 913 914 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 915 { 916 intptr_t oprsz = simd_oprsz(desc); 917 intptr_t i; 918 919 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 920 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 921 } 922 } 923 924 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 925 { 926 intptr_t oprsz = simd_oprsz(desc); 927 intptr_t i; 928 929 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 930 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 931 } 932 } 933 934 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 935 { 936 intptr_t oprsz = simd_oprsz(desc); 937 intptr_t i; 938 939 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 940 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 941 } 942 } 943 944 /* Vector Widening Integer Add/Subtract */ 945 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 946 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 947 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 948 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 949 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 950 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 951 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 952 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 953 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 954 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 955 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 956 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 957 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 958 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 959 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 960 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 961 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 962 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 963 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 964 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 965 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 966 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 967 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 968 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 969 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 970 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 971 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 972 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 973 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 974 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 975 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 976 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 977 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 978 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 979 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 980 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 981 GEN_VEXT_VV(vwaddu_vv_b, 2) 982 GEN_VEXT_VV(vwaddu_vv_h, 4) 983 GEN_VEXT_VV(vwaddu_vv_w, 8) 984 GEN_VEXT_VV(vwsubu_vv_b, 2) 985 GEN_VEXT_VV(vwsubu_vv_h, 4) 986 GEN_VEXT_VV(vwsubu_vv_w, 8) 987 GEN_VEXT_VV(vwadd_vv_b, 2) 988 GEN_VEXT_VV(vwadd_vv_h, 4) 989 GEN_VEXT_VV(vwadd_vv_w, 8) 990 GEN_VEXT_VV(vwsub_vv_b, 2) 991 GEN_VEXT_VV(vwsub_vv_h, 4) 992 GEN_VEXT_VV(vwsub_vv_w, 8) 993 GEN_VEXT_VV(vwaddu_wv_b, 2) 994 GEN_VEXT_VV(vwaddu_wv_h, 4) 995 GEN_VEXT_VV(vwaddu_wv_w, 8) 996 GEN_VEXT_VV(vwsubu_wv_b, 2) 997 GEN_VEXT_VV(vwsubu_wv_h, 4) 998 GEN_VEXT_VV(vwsubu_wv_w, 8) 999 GEN_VEXT_VV(vwadd_wv_b, 2) 1000 GEN_VEXT_VV(vwadd_wv_h, 4) 1001 GEN_VEXT_VV(vwadd_wv_w, 8) 1002 GEN_VEXT_VV(vwsub_wv_b, 2) 1003 GEN_VEXT_VV(vwsub_wv_h, 4) 1004 GEN_VEXT_VV(vwsub_wv_w, 8) 1005 1006 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1007 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1008 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1009 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1010 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1011 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1012 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1013 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1014 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1015 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1016 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1017 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1018 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1019 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1020 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1021 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1022 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1023 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1024 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1025 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1026 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1027 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1028 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1029 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1030 GEN_VEXT_VX(vwaddu_vx_b, 2) 1031 GEN_VEXT_VX(vwaddu_vx_h, 4) 1032 GEN_VEXT_VX(vwaddu_vx_w, 8) 1033 GEN_VEXT_VX(vwsubu_vx_b, 2) 1034 GEN_VEXT_VX(vwsubu_vx_h, 4) 1035 GEN_VEXT_VX(vwsubu_vx_w, 8) 1036 GEN_VEXT_VX(vwadd_vx_b, 2) 1037 GEN_VEXT_VX(vwadd_vx_h, 4) 1038 GEN_VEXT_VX(vwadd_vx_w, 8) 1039 GEN_VEXT_VX(vwsub_vx_b, 2) 1040 GEN_VEXT_VX(vwsub_vx_h, 4) 1041 GEN_VEXT_VX(vwsub_vx_w, 8) 1042 GEN_VEXT_VX(vwaddu_wx_b, 2) 1043 GEN_VEXT_VX(vwaddu_wx_h, 4) 1044 GEN_VEXT_VX(vwaddu_wx_w, 8) 1045 GEN_VEXT_VX(vwsubu_wx_b, 2) 1046 GEN_VEXT_VX(vwsubu_wx_h, 4) 1047 GEN_VEXT_VX(vwsubu_wx_w, 8) 1048 GEN_VEXT_VX(vwadd_wx_b, 2) 1049 GEN_VEXT_VX(vwadd_wx_h, 4) 1050 GEN_VEXT_VX(vwadd_wx_w, 8) 1051 GEN_VEXT_VX(vwsub_wx_b, 2) 1052 GEN_VEXT_VX(vwsub_wx_h, 4) 1053 GEN_VEXT_VX(vwsub_wx_w, 8) 1054 1055 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1056 #define DO_VADC(N, M, C) (N + M + C) 1057 #define DO_VSBC(N, M, C) (N - M - C) 1058 1059 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1060 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1061 CPURISCVState *env, uint32_t desc) \ 1062 { \ 1063 uint32_t vl = env->vl; \ 1064 uint32_t esz = sizeof(ETYPE); \ 1065 uint32_t total_elems = \ 1066 vext_get_total_elems(env, desc, esz); \ 1067 uint32_t vta = vext_vta(desc); \ 1068 uint32_t i; \ 1069 \ 1070 for (i = env->vstart; i < vl; i++) { \ 1071 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1072 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1073 ETYPE carry = vext_elem_mask(v0, i); \ 1074 \ 1075 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1076 } \ 1077 env->vstart = 0; \ 1078 /* set tail elements to 1s */ \ 1079 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1080 } 1081 1082 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1083 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1084 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1085 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1086 1087 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1088 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1089 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1090 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1091 1092 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1093 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1094 CPURISCVState *env, uint32_t desc) \ 1095 { \ 1096 uint32_t vl = env->vl; \ 1097 uint32_t esz = sizeof(ETYPE); \ 1098 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1099 uint32_t vta = vext_vta(desc); \ 1100 uint32_t i; \ 1101 \ 1102 for (i = env->vstart; i < vl; i++) { \ 1103 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1104 ETYPE carry = vext_elem_mask(v0, i); \ 1105 \ 1106 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1107 } \ 1108 env->vstart = 0; \ 1109 /* set tail elements to 1s */ \ 1110 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1111 } 1112 1113 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1114 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1115 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1116 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1117 1118 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1119 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1120 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1121 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1122 1123 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1124 (__typeof(N))(N + M) < N) 1125 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1126 1127 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1128 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1129 CPURISCVState *env, uint32_t desc) \ 1130 { \ 1131 uint32_t vl = env->vl; \ 1132 uint32_t vm = vext_vm(desc); \ 1133 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1134 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1135 uint32_t i; \ 1136 \ 1137 for (i = env->vstart; i < vl; i++) { \ 1138 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1139 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1140 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1141 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1142 } \ 1143 env->vstart = 0; \ 1144 /* 1145 * mask destination register are always tail-agnostic 1146 * set tail elements to 1s 1147 */ \ 1148 if (vta_all_1s) { \ 1149 for (; i < total_elems; i++) { \ 1150 vext_set_elem_mask(vd, i, 1); \ 1151 } \ 1152 } \ 1153 } 1154 1155 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1156 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1157 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1158 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1159 1160 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1161 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1162 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1163 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1164 1165 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1166 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1167 void *vs2, CPURISCVState *env, uint32_t desc) \ 1168 { \ 1169 uint32_t vl = env->vl; \ 1170 uint32_t vm = vext_vm(desc); \ 1171 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1172 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1173 uint32_t i; \ 1174 \ 1175 for (i = env->vstart; i < vl; i++) { \ 1176 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1177 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1178 vext_set_elem_mask(vd, i, \ 1179 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1180 } \ 1181 env->vstart = 0; \ 1182 /* 1183 * mask destination register are always tail-agnostic 1184 * set tail elements to 1s 1185 */ \ 1186 if (vta_all_1s) { \ 1187 for (; i < total_elems; i++) { \ 1188 vext_set_elem_mask(vd, i, 1); \ 1189 } \ 1190 } \ 1191 } 1192 1193 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1194 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1195 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1196 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1197 1198 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1199 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1200 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1201 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1202 1203 /* Vector Bitwise Logical Instructions */ 1204 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1205 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1206 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1207 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1208 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1209 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1210 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1211 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1212 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1213 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1214 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1215 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1216 GEN_VEXT_VV(vand_vv_b, 1) 1217 GEN_VEXT_VV(vand_vv_h, 2) 1218 GEN_VEXT_VV(vand_vv_w, 4) 1219 GEN_VEXT_VV(vand_vv_d, 8) 1220 GEN_VEXT_VV(vor_vv_b, 1) 1221 GEN_VEXT_VV(vor_vv_h, 2) 1222 GEN_VEXT_VV(vor_vv_w, 4) 1223 GEN_VEXT_VV(vor_vv_d, 8) 1224 GEN_VEXT_VV(vxor_vv_b, 1) 1225 GEN_VEXT_VV(vxor_vv_h, 2) 1226 GEN_VEXT_VV(vxor_vv_w, 4) 1227 GEN_VEXT_VV(vxor_vv_d, 8) 1228 1229 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1230 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1231 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1232 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1233 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1234 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1235 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1236 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1237 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1238 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1239 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1240 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1241 GEN_VEXT_VX(vand_vx_b, 1) 1242 GEN_VEXT_VX(vand_vx_h, 2) 1243 GEN_VEXT_VX(vand_vx_w, 4) 1244 GEN_VEXT_VX(vand_vx_d, 8) 1245 GEN_VEXT_VX(vor_vx_b, 1) 1246 GEN_VEXT_VX(vor_vx_h, 2) 1247 GEN_VEXT_VX(vor_vx_w, 4) 1248 GEN_VEXT_VX(vor_vx_d, 8) 1249 GEN_VEXT_VX(vxor_vx_b, 1) 1250 GEN_VEXT_VX(vxor_vx_h, 2) 1251 GEN_VEXT_VX(vxor_vx_w, 4) 1252 GEN_VEXT_VX(vxor_vx_d, 8) 1253 1254 /* Vector Single-Width Bit Shift Instructions */ 1255 #define DO_SLL(N, M) (N << (M)) 1256 #define DO_SRL(N, M) (N >> (M)) 1257 1258 /* generate the helpers for shift instructions with two vector operators */ 1259 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1260 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1261 void *vs2, CPURISCVState *env, uint32_t desc) \ 1262 { \ 1263 uint32_t vm = vext_vm(desc); \ 1264 uint32_t vl = env->vl; \ 1265 uint32_t esz = sizeof(TS1); \ 1266 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1267 uint32_t vta = vext_vta(desc); \ 1268 uint32_t vma = vext_vma(desc); \ 1269 uint32_t i; \ 1270 \ 1271 for (i = env->vstart; i < vl; i++) { \ 1272 if (!vm && !vext_elem_mask(v0, i)) { \ 1273 /* set masked-off elements to 1s */ \ 1274 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1275 continue; \ 1276 } \ 1277 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1278 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1279 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1280 } \ 1281 env->vstart = 0; \ 1282 /* set tail elements to 1s */ \ 1283 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1284 } 1285 1286 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1287 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1288 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1289 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1290 1291 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1292 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1293 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1294 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1295 1296 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1297 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1298 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1299 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1300 1301 /* 1302 * generate the helpers for shift instructions with one vector and one scalar 1303 */ 1304 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1305 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1306 void *vs2, CPURISCVState *env, \ 1307 uint32_t desc) \ 1308 { \ 1309 uint32_t vm = vext_vm(desc); \ 1310 uint32_t vl = env->vl; \ 1311 uint32_t esz = sizeof(TD); \ 1312 uint32_t total_elems = \ 1313 vext_get_total_elems(env, desc, esz); \ 1314 uint32_t vta = vext_vta(desc); \ 1315 uint32_t vma = vext_vma(desc); \ 1316 uint32_t i; \ 1317 \ 1318 for (i = env->vstart; i < vl; i++) { \ 1319 if (!vm && !vext_elem_mask(v0, i)) { \ 1320 /* set masked-off elements to 1s */ \ 1321 vext_set_elems_1s(vd, vma, i * esz, \ 1322 (i + 1) * esz); \ 1323 continue; \ 1324 } \ 1325 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1326 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1327 } \ 1328 env->vstart = 0; \ 1329 /* set tail elements to 1s */ \ 1330 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1331 } 1332 1333 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1334 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1335 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1336 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1337 1338 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1339 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1340 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1341 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1342 1343 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1344 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1345 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1346 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1347 1348 /* Vector Narrowing Integer Right Shift Instructions */ 1349 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1350 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1351 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1352 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1353 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1354 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1355 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1356 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1357 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1358 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1359 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1360 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1361 1362 /* Vector Integer Comparison Instructions */ 1363 #define DO_MSEQ(N, M) (N == M) 1364 #define DO_MSNE(N, M) (N != M) 1365 #define DO_MSLT(N, M) (N < M) 1366 #define DO_MSLE(N, M) (N <= M) 1367 #define DO_MSGT(N, M) (N > M) 1368 1369 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1370 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1371 CPURISCVState *env, uint32_t desc) \ 1372 { \ 1373 uint32_t vm = vext_vm(desc); \ 1374 uint32_t vl = env->vl; \ 1375 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1376 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1377 uint32_t vma = vext_vma(desc); \ 1378 uint32_t i; \ 1379 \ 1380 for (i = env->vstart; i < vl; i++) { \ 1381 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1382 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1383 if (!vm && !vext_elem_mask(v0, i)) { \ 1384 /* set masked-off elements to 1s */ \ 1385 if (vma) { \ 1386 vext_set_elem_mask(vd, i, 1); \ 1387 } \ 1388 continue; \ 1389 } \ 1390 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1391 } \ 1392 env->vstart = 0; \ 1393 /* 1394 * mask destination register are always tail-agnostic 1395 * set tail elements to 1s 1396 */ \ 1397 if (vta_all_1s) { \ 1398 for (; i < total_elems; i++) { \ 1399 vext_set_elem_mask(vd, i, 1); \ 1400 } \ 1401 } \ 1402 } 1403 1404 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1405 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1406 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1407 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1408 1409 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1410 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1411 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1412 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1413 1414 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1415 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1416 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1417 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1418 1419 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1420 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1421 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1422 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1423 1424 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1425 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1426 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1427 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1428 1429 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1430 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1431 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1432 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1433 1434 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1435 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1436 CPURISCVState *env, uint32_t desc) \ 1437 { \ 1438 uint32_t vm = vext_vm(desc); \ 1439 uint32_t vl = env->vl; \ 1440 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1441 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1442 uint32_t vma = vext_vma(desc); \ 1443 uint32_t i; \ 1444 \ 1445 for (i = env->vstart; i < vl; i++) { \ 1446 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1447 if (!vm && !vext_elem_mask(v0, i)) { \ 1448 /* set masked-off elements to 1s */ \ 1449 if (vma) { \ 1450 vext_set_elem_mask(vd, i, 1); \ 1451 } \ 1452 continue; \ 1453 } \ 1454 vext_set_elem_mask(vd, i, \ 1455 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1456 } \ 1457 env->vstart = 0; \ 1458 /* 1459 * mask destination register are always tail-agnostic 1460 * set tail elements to 1s 1461 */ \ 1462 if (vta_all_1s) { \ 1463 for (; i < total_elems; i++) { \ 1464 vext_set_elem_mask(vd, i, 1); \ 1465 } \ 1466 } \ 1467 } 1468 1469 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1470 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1471 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1472 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1473 1474 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1475 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1476 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1477 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1478 1479 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1480 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1481 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1482 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1483 1484 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1485 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1486 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1487 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1488 1489 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1490 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1491 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1492 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1493 1494 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1495 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1496 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1497 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1498 1499 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1500 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1501 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1502 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1503 1504 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1505 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1506 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1507 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1508 1509 /* Vector Integer Min/Max Instructions */ 1510 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1511 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1512 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1513 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1514 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1515 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1516 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1517 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1518 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1519 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1520 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1521 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1522 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1523 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1524 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1525 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1526 GEN_VEXT_VV(vminu_vv_b, 1) 1527 GEN_VEXT_VV(vminu_vv_h, 2) 1528 GEN_VEXT_VV(vminu_vv_w, 4) 1529 GEN_VEXT_VV(vminu_vv_d, 8) 1530 GEN_VEXT_VV(vmin_vv_b, 1) 1531 GEN_VEXT_VV(vmin_vv_h, 2) 1532 GEN_VEXT_VV(vmin_vv_w, 4) 1533 GEN_VEXT_VV(vmin_vv_d, 8) 1534 GEN_VEXT_VV(vmaxu_vv_b, 1) 1535 GEN_VEXT_VV(vmaxu_vv_h, 2) 1536 GEN_VEXT_VV(vmaxu_vv_w, 4) 1537 GEN_VEXT_VV(vmaxu_vv_d, 8) 1538 GEN_VEXT_VV(vmax_vv_b, 1) 1539 GEN_VEXT_VV(vmax_vv_h, 2) 1540 GEN_VEXT_VV(vmax_vv_w, 4) 1541 GEN_VEXT_VV(vmax_vv_d, 8) 1542 1543 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1544 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1545 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1546 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1547 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1548 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1549 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1550 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1551 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1552 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1553 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1554 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1555 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1556 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1557 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1558 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1559 GEN_VEXT_VX(vminu_vx_b, 1) 1560 GEN_VEXT_VX(vminu_vx_h, 2) 1561 GEN_VEXT_VX(vminu_vx_w, 4) 1562 GEN_VEXT_VX(vminu_vx_d, 8) 1563 GEN_VEXT_VX(vmin_vx_b, 1) 1564 GEN_VEXT_VX(vmin_vx_h, 2) 1565 GEN_VEXT_VX(vmin_vx_w, 4) 1566 GEN_VEXT_VX(vmin_vx_d, 8) 1567 GEN_VEXT_VX(vmaxu_vx_b, 1) 1568 GEN_VEXT_VX(vmaxu_vx_h, 2) 1569 GEN_VEXT_VX(vmaxu_vx_w, 4) 1570 GEN_VEXT_VX(vmaxu_vx_d, 8) 1571 GEN_VEXT_VX(vmax_vx_b, 1) 1572 GEN_VEXT_VX(vmax_vx_h, 2) 1573 GEN_VEXT_VX(vmax_vx_w, 4) 1574 GEN_VEXT_VX(vmax_vx_d, 8) 1575 1576 /* Vector Single-Width Integer Multiply Instructions */ 1577 #define DO_MUL(N, M) (N * M) 1578 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1579 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1580 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1581 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1582 GEN_VEXT_VV(vmul_vv_b, 1) 1583 GEN_VEXT_VV(vmul_vv_h, 2) 1584 GEN_VEXT_VV(vmul_vv_w, 4) 1585 GEN_VEXT_VV(vmul_vv_d, 8) 1586 1587 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1588 { 1589 return (int16_t)s2 * (int16_t)s1 >> 8; 1590 } 1591 1592 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1593 { 1594 return (int32_t)s2 * (int32_t)s1 >> 16; 1595 } 1596 1597 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1598 { 1599 return (int64_t)s2 * (int64_t)s1 >> 32; 1600 } 1601 1602 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1603 { 1604 uint64_t hi_64, lo_64; 1605 1606 muls64(&lo_64, &hi_64, s1, s2); 1607 return hi_64; 1608 } 1609 1610 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1611 { 1612 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1613 } 1614 1615 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1616 { 1617 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1618 } 1619 1620 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1621 { 1622 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1623 } 1624 1625 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1626 { 1627 uint64_t hi_64, lo_64; 1628 1629 mulu64(&lo_64, &hi_64, s2, s1); 1630 return hi_64; 1631 } 1632 1633 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1634 { 1635 return (int16_t)s2 * (uint16_t)s1 >> 8; 1636 } 1637 1638 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1639 { 1640 return (int32_t)s2 * (uint32_t)s1 >> 16; 1641 } 1642 1643 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1644 { 1645 return (int64_t)s2 * (uint64_t)s1 >> 32; 1646 } 1647 1648 /* 1649 * Let A = signed operand, 1650 * B = unsigned operand 1651 * P = mulu64(A, B), unsigned product 1652 * 1653 * LET X = 2 ** 64 - A, 2's complement of A 1654 * SP = signed product 1655 * THEN 1656 * IF A < 0 1657 * SP = -X * B 1658 * = -(2 ** 64 - A) * B 1659 * = A * B - 2 ** 64 * B 1660 * = P - 2 ** 64 * B 1661 * ELSE 1662 * SP = P 1663 * THEN 1664 * HI_P -= (A < 0 ? B : 0) 1665 */ 1666 1667 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1668 { 1669 uint64_t hi_64, lo_64; 1670 1671 mulu64(&lo_64, &hi_64, s2, s1); 1672 1673 hi_64 -= s2 < 0 ? s1 : 0; 1674 return hi_64; 1675 } 1676 1677 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1678 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1679 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1680 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1681 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1682 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1683 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1684 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1685 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1686 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1687 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1688 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1689 GEN_VEXT_VV(vmulh_vv_b, 1) 1690 GEN_VEXT_VV(vmulh_vv_h, 2) 1691 GEN_VEXT_VV(vmulh_vv_w, 4) 1692 GEN_VEXT_VV(vmulh_vv_d, 8) 1693 GEN_VEXT_VV(vmulhu_vv_b, 1) 1694 GEN_VEXT_VV(vmulhu_vv_h, 2) 1695 GEN_VEXT_VV(vmulhu_vv_w, 4) 1696 GEN_VEXT_VV(vmulhu_vv_d, 8) 1697 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1698 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1699 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1700 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1701 1702 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1703 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1704 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1705 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1706 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1707 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1708 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1709 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1710 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1711 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1712 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1713 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1714 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1715 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1716 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1717 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1718 GEN_VEXT_VX(vmul_vx_b, 1) 1719 GEN_VEXT_VX(vmul_vx_h, 2) 1720 GEN_VEXT_VX(vmul_vx_w, 4) 1721 GEN_VEXT_VX(vmul_vx_d, 8) 1722 GEN_VEXT_VX(vmulh_vx_b, 1) 1723 GEN_VEXT_VX(vmulh_vx_h, 2) 1724 GEN_VEXT_VX(vmulh_vx_w, 4) 1725 GEN_VEXT_VX(vmulh_vx_d, 8) 1726 GEN_VEXT_VX(vmulhu_vx_b, 1) 1727 GEN_VEXT_VX(vmulhu_vx_h, 2) 1728 GEN_VEXT_VX(vmulhu_vx_w, 4) 1729 GEN_VEXT_VX(vmulhu_vx_d, 8) 1730 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1731 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1732 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1733 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1734 1735 /* Vector Integer Divide Instructions */ 1736 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1737 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1738 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1739 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1740 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1741 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1742 1743 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1744 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1745 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1746 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1747 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1748 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1749 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1750 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1751 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1752 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1753 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1754 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1755 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1756 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1757 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1758 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1759 GEN_VEXT_VV(vdivu_vv_b, 1) 1760 GEN_VEXT_VV(vdivu_vv_h, 2) 1761 GEN_VEXT_VV(vdivu_vv_w, 4) 1762 GEN_VEXT_VV(vdivu_vv_d, 8) 1763 GEN_VEXT_VV(vdiv_vv_b, 1) 1764 GEN_VEXT_VV(vdiv_vv_h, 2) 1765 GEN_VEXT_VV(vdiv_vv_w, 4) 1766 GEN_VEXT_VV(vdiv_vv_d, 8) 1767 GEN_VEXT_VV(vremu_vv_b, 1) 1768 GEN_VEXT_VV(vremu_vv_h, 2) 1769 GEN_VEXT_VV(vremu_vv_w, 4) 1770 GEN_VEXT_VV(vremu_vv_d, 8) 1771 GEN_VEXT_VV(vrem_vv_b, 1) 1772 GEN_VEXT_VV(vrem_vv_h, 2) 1773 GEN_VEXT_VV(vrem_vv_w, 4) 1774 GEN_VEXT_VV(vrem_vv_d, 8) 1775 1776 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1777 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1778 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1779 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1780 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1781 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1782 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1783 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1784 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1785 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1786 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1787 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1788 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1789 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1790 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1791 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1792 GEN_VEXT_VX(vdivu_vx_b, 1) 1793 GEN_VEXT_VX(vdivu_vx_h, 2) 1794 GEN_VEXT_VX(vdivu_vx_w, 4) 1795 GEN_VEXT_VX(vdivu_vx_d, 8) 1796 GEN_VEXT_VX(vdiv_vx_b, 1) 1797 GEN_VEXT_VX(vdiv_vx_h, 2) 1798 GEN_VEXT_VX(vdiv_vx_w, 4) 1799 GEN_VEXT_VX(vdiv_vx_d, 8) 1800 GEN_VEXT_VX(vremu_vx_b, 1) 1801 GEN_VEXT_VX(vremu_vx_h, 2) 1802 GEN_VEXT_VX(vremu_vx_w, 4) 1803 GEN_VEXT_VX(vremu_vx_d, 8) 1804 GEN_VEXT_VX(vrem_vx_b, 1) 1805 GEN_VEXT_VX(vrem_vx_h, 2) 1806 GEN_VEXT_VX(vrem_vx_w, 4) 1807 GEN_VEXT_VX(vrem_vx_d, 8) 1808 1809 /* Vector Widening Integer Multiply Instructions */ 1810 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1811 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1812 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1813 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1814 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1815 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1816 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1817 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1818 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1819 GEN_VEXT_VV(vwmul_vv_b, 2) 1820 GEN_VEXT_VV(vwmul_vv_h, 4) 1821 GEN_VEXT_VV(vwmul_vv_w, 8) 1822 GEN_VEXT_VV(vwmulu_vv_b, 2) 1823 GEN_VEXT_VV(vwmulu_vv_h, 4) 1824 GEN_VEXT_VV(vwmulu_vv_w, 8) 1825 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1826 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1827 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1828 1829 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1830 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1831 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1832 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1833 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1834 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1835 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1836 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1837 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1838 GEN_VEXT_VX(vwmul_vx_b, 2) 1839 GEN_VEXT_VX(vwmul_vx_h, 4) 1840 GEN_VEXT_VX(vwmul_vx_w, 8) 1841 GEN_VEXT_VX(vwmulu_vx_b, 2) 1842 GEN_VEXT_VX(vwmulu_vx_h, 4) 1843 GEN_VEXT_VX(vwmulu_vx_w, 8) 1844 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1845 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1846 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1847 1848 /* Vector Single-Width Integer Multiply-Add Instructions */ 1849 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1850 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1851 { \ 1852 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1853 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1854 TD d = *((TD *)vd + HD(i)); \ 1855 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1856 } 1857 1858 #define DO_MACC(N, M, D) (M * N + D) 1859 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1860 #define DO_MADD(N, M, D) (M * D + N) 1861 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1862 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1863 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1864 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1865 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1866 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1867 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1868 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1869 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1870 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1871 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1872 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1873 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1874 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1875 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1876 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1877 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1878 GEN_VEXT_VV(vmacc_vv_b, 1) 1879 GEN_VEXT_VV(vmacc_vv_h, 2) 1880 GEN_VEXT_VV(vmacc_vv_w, 4) 1881 GEN_VEXT_VV(vmacc_vv_d, 8) 1882 GEN_VEXT_VV(vnmsac_vv_b, 1) 1883 GEN_VEXT_VV(vnmsac_vv_h, 2) 1884 GEN_VEXT_VV(vnmsac_vv_w, 4) 1885 GEN_VEXT_VV(vnmsac_vv_d, 8) 1886 GEN_VEXT_VV(vmadd_vv_b, 1) 1887 GEN_VEXT_VV(vmadd_vv_h, 2) 1888 GEN_VEXT_VV(vmadd_vv_w, 4) 1889 GEN_VEXT_VV(vmadd_vv_d, 8) 1890 GEN_VEXT_VV(vnmsub_vv_b, 1) 1891 GEN_VEXT_VV(vnmsub_vv_h, 2) 1892 GEN_VEXT_VV(vnmsub_vv_w, 4) 1893 GEN_VEXT_VV(vnmsub_vv_d, 8) 1894 1895 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1896 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1897 { \ 1898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1899 TD d = *((TD *)vd + HD(i)); \ 1900 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1901 } 1902 1903 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1904 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1905 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1906 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1907 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1908 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1909 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1910 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1911 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1912 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1913 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1914 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1915 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1916 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1917 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1918 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1919 GEN_VEXT_VX(vmacc_vx_b, 1) 1920 GEN_VEXT_VX(vmacc_vx_h, 2) 1921 GEN_VEXT_VX(vmacc_vx_w, 4) 1922 GEN_VEXT_VX(vmacc_vx_d, 8) 1923 GEN_VEXT_VX(vnmsac_vx_b, 1) 1924 GEN_VEXT_VX(vnmsac_vx_h, 2) 1925 GEN_VEXT_VX(vnmsac_vx_w, 4) 1926 GEN_VEXT_VX(vnmsac_vx_d, 8) 1927 GEN_VEXT_VX(vmadd_vx_b, 1) 1928 GEN_VEXT_VX(vmadd_vx_h, 2) 1929 GEN_VEXT_VX(vmadd_vx_w, 4) 1930 GEN_VEXT_VX(vmadd_vx_d, 8) 1931 GEN_VEXT_VX(vnmsub_vx_b, 1) 1932 GEN_VEXT_VX(vnmsub_vx_h, 2) 1933 GEN_VEXT_VX(vnmsub_vx_w, 4) 1934 GEN_VEXT_VX(vnmsub_vx_d, 8) 1935 1936 /* Vector Widening Integer Multiply-Add Instructions */ 1937 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1938 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1939 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1940 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1941 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1942 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1943 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1944 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1945 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1946 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1947 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1948 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1949 GEN_VEXT_VV(vwmacc_vv_b, 2) 1950 GEN_VEXT_VV(vwmacc_vv_h, 4) 1951 GEN_VEXT_VV(vwmacc_vv_w, 8) 1952 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1953 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1954 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1955 1956 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1957 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1958 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1959 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1960 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1961 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1962 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1963 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1964 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1965 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1966 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1967 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1968 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1969 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1970 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1971 GEN_VEXT_VX(vwmacc_vx_b, 2) 1972 GEN_VEXT_VX(vwmacc_vx_h, 4) 1973 GEN_VEXT_VX(vwmacc_vx_w, 8) 1974 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1975 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1976 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1977 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1978 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1979 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1980 1981 /* Vector Integer Merge and Move Instructions */ 1982 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1983 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1984 uint32_t desc) \ 1985 { \ 1986 uint32_t vl = env->vl; \ 1987 uint32_t esz = sizeof(ETYPE); \ 1988 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1989 uint32_t vta = vext_vta(desc); \ 1990 uint32_t i; \ 1991 \ 1992 for (i = env->vstart; i < vl; i++) { \ 1993 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1994 *((ETYPE *)vd + H(i)) = s1; \ 1995 } \ 1996 env->vstart = 0; \ 1997 /* set tail elements to 1s */ \ 1998 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1999 } 2000 2001 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2002 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2003 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2004 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2005 2006 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2007 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2008 uint32_t desc) \ 2009 { \ 2010 uint32_t vl = env->vl; \ 2011 uint32_t esz = sizeof(ETYPE); \ 2012 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2013 uint32_t vta = vext_vta(desc); \ 2014 uint32_t i; \ 2015 \ 2016 for (i = env->vstart; i < vl; i++) { \ 2017 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2018 } \ 2019 env->vstart = 0; \ 2020 /* set tail elements to 1s */ \ 2021 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2022 } 2023 2024 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2025 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2026 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2027 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2028 2029 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2030 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2031 CPURISCVState *env, uint32_t desc) \ 2032 { \ 2033 uint32_t vl = env->vl; \ 2034 uint32_t esz = sizeof(ETYPE); \ 2035 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2036 uint32_t vta = vext_vta(desc); \ 2037 uint32_t i; \ 2038 \ 2039 for (i = env->vstart; i < vl; i++) { \ 2040 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2041 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2042 } \ 2043 env->vstart = 0; \ 2044 /* set tail elements to 1s */ \ 2045 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2046 } 2047 2048 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2049 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2050 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2051 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2052 2053 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2054 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2055 void *vs2, CPURISCVState *env, uint32_t desc) \ 2056 { \ 2057 uint32_t vl = env->vl; \ 2058 uint32_t esz = sizeof(ETYPE); \ 2059 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2060 uint32_t vta = vext_vta(desc); \ 2061 uint32_t i; \ 2062 \ 2063 for (i = env->vstart; i < vl; i++) { \ 2064 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2065 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2066 (ETYPE)(target_long)s1); \ 2067 *((ETYPE *)vd + H(i)) = d; \ 2068 } \ 2069 env->vstart = 0; \ 2070 /* set tail elements to 1s */ \ 2071 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2072 } 2073 2074 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2075 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2076 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2077 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2078 2079 /* 2080 * Vector Fixed-Point Arithmetic Instructions 2081 */ 2082 2083 /* Vector Single-Width Saturating Add and Subtract */ 2084 2085 /* 2086 * As fixed point instructions probably have round mode and saturation, 2087 * define common macros for fixed point here. 2088 */ 2089 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2090 CPURISCVState *env, int vxrm); 2091 2092 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2093 static inline void \ 2094 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2095 CPURISCVState *env, int vxrm) \ 2096 { \ 2097 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2098 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2099 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2100 } 2101 2102 static inline void 2103 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2104 CPURISCVState *env, 2105 uint32_t vl, uint32_t vm, int vxrm, 2106 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2107 { 2108 for (uint32_t i = env->vstart; i < vl; i++) { 2109 if (!vm && !vext_elem_mask(v0, i)) { 2110 /* set masked-off elements to 1s */ 2111 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2112 continue; 2113 } 2114 fn(vd, vs1, vs2, i, env, vxrm); 2115 } 2116 env->vstart = 0; 2117 } 2118 2119 static inline void 2120 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2121 CPURISCVState *env, 2122 uint32_t desc, 2123 opivv2_rm_fn *fn, uint32_t esz) 2124 { 2125 uint32_t vm = vext_vm(desc); 2126 uint32_t vl = env->vl; 2127 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2128 uint32_t vta = vext_vta(desc); 2129 uint32_t vma = vext_vma(desc); 2130 2131 switch (env->vxrm) { 2132 case 0: /* rnu */ 2133 vext_vv_rm_1(vd, v0, vs1, vs2, 2134 env, vl, vm, 0, fn, vma, esz); 2135 break; 2136 case 1: /* rne */ 2137 vext_vv_rm_1(vd, v0, vs1, vs2, 2138 env, vl, vm, 1, fn, vma, esz); 2139 break; 2140 case 2: /* rdn */ 2141 vext_vv_rm_1(vd, v0, vs1, vs2, 2142 env, vl, vm, 2, fn, vma, esz); 2143 break; 2144 default: /* rod */ 2145 vext_vv_rm_1(vd, v0, vs1, vs2, 2146 env, vl, vm, 3, fn, vma, esz); 2147 break; 2148 } 2149 /* set tail elements to 1s */ 2150 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2151 } 2152 2153 /* generate helpers for fixed point instructions with OPIVV format */ 2154 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2155 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2156 CPURISCVState *env, uint32_t desc) \ 2157 { \ 2158 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2159 do_##NAME, ESZ); \ 2160 } 2161 2162 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2163 uint8_t b) 2164 { 2165 uint8_t res = a + b; 2166 if (res < a) { 2167 res = UINT8_MAX; 2168 env->vxsat = 0x1; 2169 } 2170 return res; 2171 } 2172 2173 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2174 uint16_t b) 2175 { 2176 uint16_t res = a + b; 2177 if (res < a) { 2178 res = UINT16_MAX; 2179 env->vxsat = 0x1; 2180 } 2181 return res; 2182 } 2183 2184 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2185 uint32_t b) 2186 { 2187 uint32_t res = a + b; 2188 if (res < a) { 2189 res = UINT32_MAX; 2190 env->vxsat = 0x1; 2191 } 2192 return res; 2193 } 2194 2195 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2196 uint64_t b) 2197 { 2198 uint64_t res = a + b; 2199 if (res < a) { 2200 res = UINT64_MAX; 2201 env->vxsat = 0x1; 2202 } 2203 return res; 2204 } 2205 2206 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2207 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2208 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2209 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2210 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2211 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2212 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2213 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2214 2215 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2216 CPURISCVState *env, int vxrm); 2217 2218 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2219 static inline void \ 2220 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2221 CPURISCVState *env, int vxrm) \ 2222 { \ 2223 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2224 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2225 } 2226 2227 static inline void 2228 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2229 CPURISCVState *env, 2230 uint32_t vl, uint32_t vm, int vxrm, 2231 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2232 { 2233 for (uint32_t i = env->vstart; i < vl; i++) { 2234 if (!vm && !vext_elem_mask(v0, i)) { 2235 /* set masked-off elements to 1s */ 2236 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2237 continue; 2238 } 2239 fn(vd, s1, vs2, i, env, vxrm); 2240 } 2241 env->vstart = 0; 2242 } 2243 2244 static inline void 2245 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2246 CPURISCVState *env, 2247 uint32_t desc, 2248 opivx2_rm_fn *fn, uint32_t esz) 2249 { 2250 uint32_t vm = vext_vm(desc); 2251 uint32_t vl = env->vl; 2252 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2253 uint32_t vta = vext_vta(desc); 2254 uint32_t vma = vext_vma(desc); 2255 2256 switch (env->vxrm) { 2257 case 0: /* rnu */ 2258 vext_vx_rm_1(vd, v0, s1, vs2, 2259 env, vl, vm, 0, fn, vma, esz); 2260 break; 2261 case 1: /* rne */ 2262 vext_vx_rm_1(vd, v0, s1, vs2, 2263 env, vl, vm, 1, fn, vma, esz); 2264 break; 2265 case 2: /* rdn */ 2266 vext_vx_rm_1(vd, v0, s1, vs2, 2267 env, vl, vm, 2, fn, vma, esz); 2268 break; 2269 default: /* rod */ 2270 vext_vx_rm_1(vd, v0, s1, vs2, 2271 env, vl, vm, 3, fn, vma, esz); 2272 break; 2273 } 2274 /* set tail elements to 1s */ 2275 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2276 } 2277 2278 /* generate helpers for fixed point instructions with OPIVX format */ 2279 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2280 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2281 void *vs2, CPURISCVState *env, \ 2282 uint32_t desc) \ 2283 { \ 2284 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2285 do_##NAME, ESZ); \ 2286 } 2287 2288 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2289 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2290 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2291 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2292 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2293 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2294 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2295 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2296 2297 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2298 { 2299 int8_t res = a + b; 2300 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2301 res = a > 0 ? INT8_MAX : INT8_MIN; 2302 env->vxsat = 0x1; 2303 } 2304 return res; 2305 } 2306 2307 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2308 int16_t b) 2309 { 2310 int16_t res = a + b; 2311 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2312 res = a > 0 ? INT16_MAX : INT16_MIN; 2313 env->vxsat = 0x1; 2314 } 2315 return res; 2316 } 2317 2318 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2319 int32_t b) 2320 { 2321 int32_t res = a + b; 2322 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2323 res = a > 0 ? INT32_MAX : INT32_MIN; 2324 env->vxsat = 0x1; 2325 } 2326 return res; 2327 } 2328 2329 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2330 int64_t b) 2331 { 2332 int64_t res = a + b; 2333 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2334 res = a > 0 ? INT64_MAX : INT64_MIN; 2335 env->vxsat = 0x1; 2336 } 2337 return res; 2338 } 2339 2340 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2341 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2342 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2343 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2344 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2345 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2346 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2347 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2348 2349 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2350 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2351 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2352 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2353 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2354 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2355 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2356 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2357 2358 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2359 uint8_t b) 2360 { 2361 uint8_t res = a - b; 2362 if (res > a) { 2363 res = 0; 2364 env->vxsat = 0x1; 2365 } 2366 return res; 2367 } 2368 2369 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2370 uint16_t b) 2371 { 2372 uint16_t res = a - b; 2373 if (res > a) { 2374 res = 0; 2375 env->vxsat = 0x1; 2376 } 2377 return res; 2378 } 2379 2380 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2381 uint32_t b) 2382 { 2383 uint32_t res = a - b; 2384 if (res > a) { 2385 res = 0; 2386 env->vxsat = 0x1; 2387 } 2388 return res; 2389 } 2390 2391 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2392 uint64_t b) 2393 { 2394 uint64_t res = a - b; 2395 if (res > a) { 2396 res = 0; 2397 env->vxsat = 0x1; 2398 } 2399 return res; 2400 } 2401 2402 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2403 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2404 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2405 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2406 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2407 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2408 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2409 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2410 2411 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2412 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2413 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2414 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2415 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2416 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2417 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2418 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2419 2420 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2421 { 2422 int8_t res = a - b; 2423 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2424 res = a >= 0 ? INT8_MAX : INT8_MIN; 2425 env->vxsat = 0x1; 2426 } 2427 return res; 2428 } 2429 2430 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2431 int16_t b) 2432 { 2433 int16_t res = a - b; 2434 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2435 res = a >= 0 ? INT16_MAX : INT16_MIN; 2436 env->vxsat = 0x1; 2437 } 2438 return res; 2439 } 2440 2441 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2442 int32_t b) 2443 { 2444 int32_t res = a - b; 2445 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2446 res = a >= 0 ? INT32_MAX : INT32_MIN; 2447 env->vxsat = 0x1; 2448 } 2449 return res; 2450 } 2451 2452 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2453 int64_t b) 2454 { 2455 int64_t res = a - b; 2456 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2457 res = a >= 0 ? INT64_MAX : INT64_MIN; 2458 env->vxsat = 0x1; 2459 } 2460 return res; 2461 } 2462 2463 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2464 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2465 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2466 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2467 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2468 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2469 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2470 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2471 2472 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2473 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2474 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2475 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2476 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2477 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2478 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2479 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2480 2481 /* Vector Single-Width Averaging Add and Subtract */ 2482 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2483 { 2484 uint8_t d = extract64(v, shift, 1); 2485 uint8_t d1; 2486 uint64_t D1, D2; 2487 2488 if (shift == 0 || shift > 64) { 2489 return 0; 2490 } 2491 2492 d1 = extract64(v, shift - 1, 1); 2493 D1 = extract64(v, 0, shift); 2494 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2495 return d1; 2496 } else if (vxrm == 1) { /* round-to-nearest-even */ 2497 if (shift > 1) { 2498 D2 = extract64(v, 0, shift - 1); 2499 return d1 & ((D2 != 0) | d); 2500 } else { 2501 return d1 & d; 2502 } 2503 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2504 return !d & (D1 != 0); 2505 } 2506 return 0; /* round-down (truncate) */ 2507 } 2508 2509 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2510 int32_t b) 2511 { 2512 int64_t res = (int64_t)a + b; 2513 uint8_t round = get_round(vxrm, res, 1); 2514 2515 return (res >> 1) + round; 2516 } 2517 2518 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2519 int64_t b) 2520 { 2521 int64_t res = a + b; 2522 uint8_t round = get_round(vxrm, res, 1); 2523 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2524 2525 /* With signed overflow, bit 64 is inverse of bit 63. */ 2526 return ((res >> 1) ^ over) + round; 2527 } 2528 2529 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2530 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2531 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2532 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2533 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2534 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2535 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2536 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2537 2538 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2539 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2540 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2541 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2542 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2543 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2544 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2545 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2546 2547 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2548 uint32_t a, uint32_t b) 2549 { 2550 uint64_t res = (uint64_t)a + b; 2551 uint8_t round = get_round(vxrm, res, 1); 2552 2553 return (res >> 1) + round; 2554 } 2555 2556 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2557 uint64_t a, uint64_t b) 2558 { 2559 uint64_t res = a + b; 2560 uint8_t round = get_round(vxrm, res, 1); 2561 uint64_t over = (uint64_t)(res < a) << 63; 2562 2563 return ((res >> 1) | over) + round; 2564 } 2565 2566 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2567 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2568 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2569 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2570 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2571 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2572 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2573 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2574 2575 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2576 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2577 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2578 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2579 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2580 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2581 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2582 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2583 2584 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2585 int32_t b) 2586 { 2587 int64_t res = (int64_t)a - b; 2588 uint8_t round = get_round(vxrm, res, 1); 2589 2590 return (res >> 1) + round; 2591 } 2592 2593 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2594 int64_t b) 2595 { 2596 int64_t res = (int64_t)a - b; 2597 uint8_t round = get_round(vxrm, res, 1); 2598 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2599 2600 /* With signed overflow, bit 64 is inverse of bit 63. */ 2601 return ((res >> 1) ^ over) + round; 2602 } 2603 2604 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2605 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2606 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2607 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2608 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2609 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2610 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2611 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2612 2613 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2614 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2615 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2616 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2617 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2618 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2619 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2620 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2621 2622 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2623 uint32_t a, uint32_t b) 2624 { 2625 int64_t res = (int64_t)a - b; 2626 uint8_t round = get_round(vxrm, res, 1); 2627 2628 return (res >> 1) + round; 2629 } 2630 2631 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2632 uint64_t a, uint64_t b) 2633 { 2634 uint64_t res = (uint64_t)a - b; 2635 uint8_t round = get_round(vxrm, res, 1); 2636 uint64_t over = (uint64_t)(res > a) << 63; 2637 2638 return ((res >> 1) | over) + round; 2639 } 2640 2641 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2642 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2643 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2644 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2645 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2646 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2647 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2648 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2649 2650 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2651 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2652 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2653 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2654 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2655 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2656 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2657 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2658 2659 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2660 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2661 { 2662 uint8_t round; 2663 int16_t res; 2664 2665 res = (int16_t)a * (int16_t)b; 2666 round = get_round(vxrm, res, 7); 2667 res = (res >> 7) + round; 2668 2669 if (res > INT8_MAX) { 2670 env->vxsat = 0x1; 2671 return INT8_MAX; 2672 } else if (res < INT8_MIN) { 2673 env->vxsat = 0x1; 2674 return INT8_MIN; 2675 } else { 2676 return res; 2677 } 2678 } 2679 2680 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2681 { 2682 uint8_t round; 2683 int32_t res; 2684 2685 res = (int32_t)a * (int32_t)b; 2686 round = get_round(vxrm, res, 15); 2687 res = (res >> 15) + round; 2688 2689 if (res > INT16_MAX) { 2690 env->vxsat = 0x1; 2691 return INT16_MAX; 2692 } else if (res < INT16_MIN) { 2693 env->vxsat = 0x1; 2694 return INT16_MIN; 2695 } else { 2696 return res; 2697 } 2698 } 2699 2700 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2701 { 2702 uint8_t round; 2703 int64_t res; 2704 2705 res = (int64_t)a * (int64_t)b; 2706 round = get_round(vxrm, res, 31); 2707 res = (res >> 31) + round; 2708 2709 if (res > INT32_MAX) { 2710 env->vxsat = 0x1; 2711 return INT32_MAX; 2712 } else if (res < INT32_MIN) { 2713 env->vxsat = 0x1; 2714 return INT32_MIN; 2715 } else { 2716 return res; 2717 } 2718 } 2719 2720 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2721 { 2722 uint8_t round; 2723 uint64_t hi_64, lo_64; 2724 int64_t res; 2725 2726 if (a == INT64_MIN && b == INT64_MIN) { 2727 env->vxsat = 1; 2728 return INT64_MAX; 2729 } 2730 2731 muls64(&lo_64, &hi_64, a, b); 2732 round = get_round(vxrm, lo_64, 63); 2733 /* 2734 * Cannot overflow, as there are always 2735 * 2 sign bits after multiply. 2736 */ 2737 res = (hi_64 << 1) | (lo_64 >> 63); 2738 if (round) { 2739 if (res == INT64_MAX) { 2740 env->vxsat = 1; 2741 } else { 2742 res += 1; 2743 } 2744 } 2745 return res; 2746 } 2747 2748 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2749 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2750 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2751 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2752 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2753 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2754 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2755 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2756 2757 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2758 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2759 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2760 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2761 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2762 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2763 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2764 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2765 2766 /* Vector Single-Width Scaling Shift Instructions */ 2767 static inline uint8_t 2768 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2769 { 2770 uint8_t round, shift = b & 0x7; 2771 uint8_t res; 2772 2773 round = get_round(vxrm, a, shift); 2774 res = (a >> shift) + round; 2775 return res; 2776 } 2777 static inline uint16_t 2778 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2779 { 2780 uint8_t round, shift = b & 0xf; 2781 2782 round = get_round(vxrm, a, shift); 2783 return (a >> shift) + round; 2784 } 2785 static inline uint32_t 2786 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2787 { 2788 uint8_t round, shift = b & 0x1f; 2789 2790 round = get_round(vxrm, a, shift); 2791 return (a >> shift) + round; 2792 } 2793 static inline uint64_t 2794 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2795 { 2796 uint8_t round, shift = b & 0x3f; 2797 2798 round = get_round(vxrm, a, shift); 2799 return (a >> shift) + round; 2800 } 2801 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2802 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2803 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2804 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2805 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2806 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2807 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2808 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2809 2810 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2811 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2812 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2813 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2814 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2815 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2816 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2817 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2818 2819 static inline int8_t 2820 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2821 { 2822 uint8_t round, shift = b & 0x7; 2823 2824 round = get_round(vxrm, a, shift); 2825 return (a >> shift) + round; 2826 } 2827 static inline int16_t 2828 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2829 { 2830 uint8_t round, shift = b & 0xf; 2831 2832 round = get_round(vxrm, a, shift); 2833 return (a >> shift) + round; 2834 } 2835 static inline int32_t 2836 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2837 { 2838 uint8_t round, shift = b & 0x1f; 2839 2840 round = get_round(vxrm, a, shift); 2841 return (a >> shift) + round; 2842 } 2843 static inline int64_t 2844 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2845 { 2846 uint8_t round, shift = b & 0x3f; 2847 2848 round = get_round(vxrm, a, shift); 2849 return (a >> shift) + round; 2850 } 2851 2852 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2853 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2854 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2855 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2856 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2857 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2858 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2859 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2860 2861 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2862 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2863 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2864 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2865 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2866 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2867 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2868 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2869 2870 /* Vector Narrowing Fixed-Point Clip Instructions */ 2871 static inline int8_t 2872 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2873 { 2874 uint8_t round, shift = b & 0xf; 2875 int16_t res; 2876 2877 round = get_round(vxrm, a, shift); 2878 res = (a >> shift) + round; 2879 if (res > INT8_MAX) { 2880 env->vxsat = 0x1; 2881 return INT8_MAX; 2882 } else if (res < INT8_MIN) { 2883 env->vxsat = 0x1; 2884 return INT8_MIN; 2885 } else { 2886 return res; 2887 } 2888 } 2889 2890 static inline int16_t 2891 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2892 { 2893 uint8_t round, shift = b & 0x1f; 2894 int32_t res; 2895 2896 round = get_round(vxrm, a, shift); 2897 res = (a >> shift) + round; 2898 if (res > INT16_MAX) { 2899 env->vxsat = 0x1; 2900 return INT16_MAX; 2901 } else if (res < INT16_MIN) { 2902 env->vxsat = 0x1; 2903 return INT16_MIN; 2904 } else { 2905 return res; 2906 } 2907 } 2908 2909 static inline int32_t 2910 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2911 { 2912 uint8_t round, shift = b & 0x3f; 2913 int64_t res; 2914 2915 round = get_round(vxrm, a, shift); 2916 res = (a >> shift) + round; 2917 if (res > INT32_MAX) { 2918 env->vxsat = 0x1; 2919 return INT32_MAX; 2920 } else if (res < INT32_MIN) { 2921 env->vxsat = 0x1; 2922 return INT32_MIN; 2923 } else { 2924 return res; 2925 } 2926 } 2927 2928 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2929 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2930 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2931 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2932 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2933 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2934 2935 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2936 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2937 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2938 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2939 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2940 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2941 2942 static inline uint8_t 2943 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2944 { 2945 uint8_t round, shift = b & 0xf; 2946 uint16_t res; 2947 2948 round = get_round(vxrm, a, shift); 2949 res = (a >> shift) + round; 2950 if (res > UINT8_MAX) { 2951 env->vxsat = 0x1; 2952 return UINT8_MAX; 2953 } else { 2954 return res; 2955 } 2956 } 2957 2958 static inline uint16_t 2959 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2960 { 2961 uint8_t round, shift = b & 0x1f; 2962 uint32_t res; 2963 2964 round = get_round(vxrm, a, shift); 2965 res = (a >> shift) + round; 2966 if (res > UINT16_MAX) { 2967 env->vxsat = 0x1; 2968 return UINT16_MAX; 2969 } else { 2970 return res; 2971 } 2972 } 2973 2974 static inline uint32_t 2975 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2976 { 2977 uint8_t round, shift = b & 0x3f; 2978 uint64_t res; 2979 2980 round = get_round(vxrm, a, shift); 2981 res = (a >> shift) + round; 2982 if (res > UINT32_MAX) { 2983 env->vxsat = 0x1; 2984 return UINT32_MAX; 2985 } else { 2986 return res; 2987 } 2988 } 2989 2990 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2991 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2992 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2993 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2994 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2995 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2996 2997 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2998 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2999 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3000 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3001 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3002 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3003 3004 /* 3005 * Vector Float Point Arithmetic Instructions 3006 */ 3007 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3008 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3009 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3010 CPURISCVState *env) \ 3011 { \ 3012 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3013 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3014 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3015 } 3016 3017 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3018 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3019 void *vs2, CPURISCVState *env, \ 3020 uint32_t desc) \ 3021 { \ 3022 uint32_t vm = vext_vm(desc); \ 3023 uint32_t vl = env->vl; \ 3024 uint32_t total_elems = \ 3025 vext_get_total_elems(env, desc, ESZ); \ 3026 uint32_t vta = vext_vta(desc); \ 3027 uint32_t vma = vext_vma(desc); \ 3028 uint32_t i; \ 3029 \ 3030 for (i = env->vstart; i < vl; i++) { \ 3031 if (!vm && !vext_elem_mask(v0, i)) { \ 3032 /* set masked-off elements to 1s */ \ 3033 vext_set_elems_1s(vd, vma, i * ESZ, \ 3034 (i + 1) * ESZ); \ 3035 continue; \ 3036 } \ 3037 do_##NAME(vd, vs1, vs2, i, env); \ 3038 } \ 3039 env->vstart = 0; \ 3040 /* set tail elements to 1s */ \ 3041 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3042 total_elems * ESZ); \ 3043 } 3044 3045 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3046 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3047 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3048 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3049 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3050 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3051 3052 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3053 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3054 CPURISCVState *env) \ 3055 { \ 3056 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3057 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3058 } 3059 3060 #define GEN_VEXT_VF(NAME, ESZ) \ 3061 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3062 void *vs2, CPURISCVState *env, \ 3063 uint32_t desc) \ 3064 { \ 3065 uint32_t vm = vext_vm(desc); \ 3066 uint32_t vl = env->vl; \ 3067 uint32_t total_elems = \ 3068 vext_get_total_elems(env, desc, ESZ); \ 3069 uint32_t vta = vext_vta(desc); \ 3070 uint32_t vma = vext_vma(desc); \ 3071 uint32_t i; \ 3072 \ 3073 for (i = env->vstart; i < vl; i++) { \ 3074 if (!vm && !vext_elem_mask(v0, i)) { \ 3075 /* set masked-off elements to 1s */ \ 3076 vext_set_elems_1s(vd, vma, i * ESZ, \ 3077 (i + 1) * ESZ); \ 3078 continue; \ 3079 } \ 3080 do_##NAME(vd, s1, vs2, i, env); \ 3081 } \ 3082 env->vstart = 0; \ 3083 /* set tail elements to 1s */ \ 3084 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3085 total_elems * ESZ); \ 3086 } 3087 3088 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3089 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3090 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3091 GEN_VEXT_VF(vfadd_vf_h, 2) 3092 GEN_VEXT_VF(vfadd_vf_w, 4) 3093 GEN_VEXT_VF(vfadd_vf_d, 8) 3094 3095 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3096 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3097 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3098 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3099 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3100 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3101 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3102 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3103 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3104 GEN_VEXT_VF(vfsub_vf_h, 2) 3105 GEN_VEXT_VF(vfsub_vf_w, 4) 3106 GEN_VEXT_VF(vfsub_vf_d, 8) 3107 3108 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3109 { 3110 return float16_sub(b, a, s); 3111 } 3112 3113 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3114 { 3115 return float32_sub(b, a, s); 3116 } 3117 3118 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3119 { 3120 return float64_sub(b, a, s); 3121 } 3122 3123 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3124 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3125 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3126 GEN_VEXT_VF(vfrsub_vf_h, 2) 3127 GEN_VEXT_VF(vfrsub_vf_w, 4) 3128 GEN_VEXT_VF(vfrsub_vf_d, 8) 3129 3130 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3131 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3132 { 3133 return float32_add(float16_to_float32(a, true, s), 3134 float16_to_float32(b, true, s), s); 3135 } 3136 3137 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3138 { 3139 return float64_add(float32_to_float64(a, s), 3140 float32_to_float64(b, s), s); 3141 3142 } 3143 3144 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3145 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3146 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3147 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3148 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3149 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3150 GEN_VEXT_VF(vfwadd_vf_h, 4) 3151 GEN_VEXT_VF(vfwadd_vf_w, 8) 3152 3153 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3154 { 3155 return float32_sub(float16_to_float32(a, true, s), 3156 float16_to_float32(b, true, s), s); 3157 } 3158 3159 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3160 { 3161 return float64_sub(float32_to_float64(a, s), 3162 float32_to_float64(b, s), s); 3163 3164 } 3165 3166 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3167 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3168 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3169 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3170 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3171 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3172 GEN_VEXT_VF(vfwsub_vf_h, 4) 3173 GEN_VEXT_VF(vfwsub_vf_w, 8) 3174 3175 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3176 { 3177 return float32_add(a, float16_to_float32(b, true, s), s); 3178 } 3179 3180 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3181 { 3182 return float64_add(a, float32_to_float64(b, s), s); 3183 } 3184 3185 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3186 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3187 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3188 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3189 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3190 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3191 GEN_VEXT_VF(vfwadd_wf_h, 4) 3192 GEN_VEXT_VF(vfwadd_wf_w, 8) 3193 3194 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3195 { 3196 return float32_sub(a, float16_to_float32(b, true, s), s); 3197 } 3198 3199 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3200 { 3201 return float64_sub(a, float32_to_float64(b, s), s); 3202 } 3203 3204 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3205 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3206 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3207 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3208 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3209 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3210 GEN_VEXT_VF(vfwsub_wf_h, 4) 3211 GEN_VEXT_VF(vfwsub_wf_w, 8) 3212 3213 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3214 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3215 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3216 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3217 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3218 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3219 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3220 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3221 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3222 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3223 GEN_VEXT_VF(vfmul_vf_h, 2) 3224 GEN_VEXT_VF(vfmul_vf_w, 4) 3225 GEN_VEXT_VF(vfmul_vf_d, 8) 3226 3227 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3228 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3229 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3230 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3231 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3232 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3233 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3234 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3235 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3236 GEN_VEXT_VF(vfdiv_vf_h, 2) 3237 GEN_VEXT_VF(vfdiv_vf_w, 4) 3238 GEN_VEXT_VF(vfdiv_vf_d, 8) 3239 3240 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3241 { 3242 return float16_div(b, a, s); 3243 } 3244 3245 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3246 { 3247 return float32_div(b, a, s); 3248 } 3249 3250 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3251 { 3252 return float64_div(b, a, s); 3253 } 3254 3255 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3256 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3257 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3258 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3259 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3260 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3261 3262 /* Vector Widening Floating-Point Multiply */ 3263 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3264 { 3265 return float32_mul(float16_to_float32(a, true, s), 3266 float16_to_float32(b, true, s), s); 3267 } 3268 3269 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3270 { 3271 return float64_mul(float32_to_float64(a, s), 3272 float32_to_float64(b, s), s); 3273 3274 } 3275 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3276 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3277 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3278 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3279 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3280 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3281 GEN_VEXT_VF(vfwmul_vf_h, 4) 3282 GEN_VEXT_VF(vfwmul_vf_w, 8) 3283 3284 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3285 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3286 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3287 CPURISCVState *env) \ 3288 { \ 3289 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3290 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3291 TD d = *((TD *)vd + HD(i)); \ 3292 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3293 } 3294 3295 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3296 { 3297 return float16_muladd(a, b, d, 0, s); 3298 } 3299 3300 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3301 { 3302 return float32_muladd(a, b, d, 0, s); 3303 } 3304 3305 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(a, b, d, 0, s); 3308 } 3309 3310 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3311 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3312 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3313 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3314 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3315 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3316 3317 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3318 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3319 CPURISCVState *env) \ 3320 { \ 3321 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3322 TD d = *((TD *)vd + HD(i)); \ 3323 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3324 } 3325 3326 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3327 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3328 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3329 GEN_VEXT_VF(vfmacc_vf_h, 2) 3330 GEN_VEXT_VF(vfmacc_vf_w, 4) 3331 GEN_VEXT_VF(vfmacc_vf_d, 8) 3332 3333 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3334 { 3335 return float16_muladd(a, b, d, float_muladd_negate_c | 3336 float_muladd_negate_product, s); 3337 } 3338 3339 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3340 { 3341 return float32_muladd(a, b, d, float_muladd_negate_c | 3342 float_muladd_negate_product, s); 3343 } 3344 3345 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3346 { 3347 return float64_muladd(a, b, d, float_muladd_negate_c | 3348 float_muladd_negate_product, s); 3349 } 3350 3351 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3352 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3353 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3354 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3355 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3356 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3357 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3358 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3359 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3360 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3361 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3362 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3363 3364 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3365 { 3366 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3367 } 3368 3369 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3370 { 3371 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3372 } 3373 3374 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3375 { 3376 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3377 } 3378 3379 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3380 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3381 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3382 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3383 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3384 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3385 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3386 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3387 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3388 GEN_VEXT_VF(vfmsac_vf_h, 2) 3389 GEN_VEXT_VF(vfmsac_vf_w, 4) 3390 GEN_VEXT_VF(vfmsac_vf_d, 8) 3391 3392 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3393 { 3394 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3395 } 3396 3397 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3398 { 3399 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3400 } 3401 3402 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3403 { 3404 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3405 } 3406 3407 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3408 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3409 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3410 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3411 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3412 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3413 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3414 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3415 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3416 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3417 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3418 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3419 3420 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3421 { 3422 return float16_muladd(d, b, a, 0, s); 3423 } 3424 3425 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3426 { 3427 return float32_muladd(d, b, a, 0, s); 3428 } 3429 3430 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3431 { 3432 return float64_muladd(d, b, a, 0, s); 3433 } 3434 3435 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3436 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3437 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3438 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3439 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3440 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3441 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3442 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3443 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3444 GEN_VEXT_VF(vfmadd_vf_h, 2) 3445 GEN_VEXT_VF(vfmadd_vf_w, 4) 3446 GEN_VEXT_VF(vfmadd_vf_d, 8) 3447 3448 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3449 { 3450 return float16_muladd(d, b, a, float_muladd_negate_c | 3451 float_muladd_negate_product, s); 3452 } 3453 3454 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3455 { 3456 return float32_muladd(d, b, a, float_muladd_negate_c | 3457 float_muladd_negate_product, s); 3458 } 3459 3460 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3461 { 3462 return float64_muladd(d, b, a, float_muladd_negate_c | 3463 float_muladd_negate_product, s); 3464 } 3465 3466 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3467 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3468 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3469 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3470 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3471 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3472 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3473 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3474 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3475 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3476 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3477 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3478 3479 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3480 { 3481 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3482 } 3483 3484 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3485 { 3486 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3487 } 3488 3489 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3490 { 3491 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3492 } 3493 3494 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3495 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3496 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3497 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3498 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3499 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3500 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3501 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3502 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3503 GEN_VEXT_VF(vfmsub_vf_h, 2) 3504 GEN_VEXT_VF(vfmsub_vf_w, 4) 3505 GEN_VEXT_VF(vfmsub_vf_d, 8) 3506 3507 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3508 { 3509 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3510 } 3511 3512 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3513 { 3514 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3515 } 3516 3517 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3518 { 3519 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3520 } 3521 3522 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3523 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3524 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3525 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3526 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3527 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3528 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3529 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3530 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3531 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3532 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3533 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3534 3535 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3536 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3537 { 3538 return float32_muladd(float16_to_float32(a, true, s), 3539 float16_to_float32(b, true, s), d, 0, s); 3540 } 3541 3542 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3543 { 3544 return float64_muladd(float32_to_float64(a, s), 3545 float32_to_float64(b, s), d, 0, s); 3546 } 3547 3548 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3549 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3550 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3551 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3552 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3553 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3554 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3555 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3556 3557 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3558 { 3559 return float32_muladd(bfloat16_to_float32(a, s), 3560 bfloat16_to_float32(b, s), d, 0, s); 3561 } 3562 3563 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3564 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3565 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmacc16) 3566 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3567 3568 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3569 { 3570 return float32_muladd(float16_to_float32(a, true, s), 3571 float16_to_float32(b, true, s), d, 3572 float_muladd_negate_c | float_muladd_negate_product, 3573 s); 3574 } 3575 3576 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3577 { 3578 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3579 d, float_muladd_negate_c | 3580 float_muladd_negate_product, s); 3581 } 3582 3583 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3584 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3585 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3586 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3587 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3588 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3589 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3590 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3591 3592 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3593 { 3594 return float32_muladd(float16_to_float32(a, true, s), 3595 float16_to_float32(b, true, s), d, 3596 float_muladd_negate_c, s); 3597 } 3598 3599 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3600 { 3601 return float64_muladd(float32_to_float64(a, s), 3602 float32_to_float64(b, s), d, 3603 float_muladd_negate_c, s); 3604 } 3605 3606 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3607 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3608 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3609 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3610 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3611 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3612 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3613 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3614 3615 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3616 { 3617 return float32_muladd(float16_to_float32(a, true, s), 3618 float16_to_float32(b, true, s), d, 3619 float_muladd_negate_product, s); 3620 } 3621 3622 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3623 { 3624 return float64_muladd(float32_to_float64(a, s), 3625 float32_to_float64(b, s), d, 3626 float_muladd_negate_product, s); 3627 } 3628 3629 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3630 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3631 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3632 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3633 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3634 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3635 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3636 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3637 3638 /* Vector Floating-Point Square-Root Instruction */ 3639 /* (TD, T2, TX2) */ 3640 #define OP_UU_H uint16_t, uint16_t, uint16_t 3641 #define OP_UU_W uint32_t, uint32_t, uint32_t 3642 #define OP_UU_D uint64_t, uint64_t, uint64_t 3643 3644 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3645 static void do_##NAME(void *vd, void *vs2, int i, \ 3646 CPURISCVState *env) \ 3647 { \ 3648 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3649 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3650 } 3651 3652 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3653 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3654 CPURISCVState *env, uint32_t desc) \ 3655 { \ 3656 uint32_t vm = vext_vm(desc); \ 3657 uint32_t vl = env->vl; \ 3658 uint32_t total_elems = \ 3659 vext_get_total_elems(env, desc, ESZ); \ 3660 uint32_t vta = vext_vta(desc); \ 3661 uint32_t vma = vext_vma(desc); \ 3662 uint32_t i; \ 3663 \ 3664 if (vl == 0) { \ 3665 return; \ 3666 } \ 3667 for (i = env->vstart; i < vl; i++) { \ 3668 if (!vm && !vext_elem_mask(v0, i)) { \ 3669 /* set masked-off elements to 1s */ \ 3670 vext_set_elems_1s(vd, vma, i * ESZ, \ 3671 (i + 1) * ESZ); \ 3672 continue; \ 3673 } \ 3674 do_##NAME(vd, vs2, i, env); \ 3675 } \ 3676 env->vstart = 0; \ 3677 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3678 total_elems * ESZ); \ 3679 } 3680 3681 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3682 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3683 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3684 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3685 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3686 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3687 3688 /* 3689 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3690 * 3691 * Adapted from riscv-v-spec recip.c: 3692 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3693 */ 3694 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3695 { 3696 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3697 uint64_t exp = extract64(f, frac_size, exp_size); 3698 uint64_t frac = extract64(f, 0, frac_size); 3699 3700 const uint8_t lookup_table[] = { 3701 52, 51, 50, 48, 47, 46, 44, 43, 3702 42, 41, 40, 39, 38, 36, 35, 34, 3703 33, 32, 31, 30, 30, 29, 28, 27, 3704 26, 25, 24, 23, 23, 22, 21, 20, 3705 19, 19, 18, 17, 16, 16, 15, 14, 3706 14, 13, 12, 12, 11, 10, 10, 9, 3707 9, 8, 7, 7, 6, 6, 5, 4, 3708 4, 3, 3, 2, 2, 1, 1, 0, 3709 127, 125, 123, 121, 119, 118, 116, 114, 3710 113, 111, 109, 108, 106, 105, 103, 102, 3711 100, 99, 97, 96, 95, 93, 92, 91, 3712 90, 88, 87, 86, 85, 84, 83, 82, 3713 80, 79, 78, 77, 76, 75, 74, 73, 3714 72, 71, 70, 70, 69, 68, 67, 66, 3715 65, 64, 63, 63, 62, 61, 60, 59, 3716 59, 58, 57, 56, 56, 55, 54, 53 3717 }; 3718 const int precision = 7; 3719 3720 if (exp == 0 && frac != 0) { /* subnormal */ 3721 /* Normalize the subnormal. */ 3722 while (extract64(frac, frac_size - 1, 1) == 0) { 3723 exp--; 3724 frac <<= 1; 3725 } 3726 3727 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3728 } 3729 3730 int idx = ((exp & 1) << (precision - 1)) | 3731 (frac >> (frac_size - precision + 1)); 3732 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3733 (frac_size - precision); 3734 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3735 3736 uint64_t val = 0; 3737 val = deposit64(val, 0, frac_size, out_frac); 3738 val = deposit64(val, frac_size, exp_size, out_exp); 3739 val = deposit64(val, frac_size + exp_size, 1, sign); 3740 return val; 3741 } 3742 3743 static float16 frsqrt7_h(float16 f, float_status *s) 3744 { 3745 int exp_size = 5, frac_size = 10; 3746 bool sign = float16_is_neg(f); 3747 3748 /* 3749 * frsqrt7(sNaN) = canonical NaN 3750 * frsqrt7(-inf) = canonical NaN 3751 * frsqrt7(-normal) = canonical NaN 3752 * frsqrt7(-subnormal) = canonical NaN 3753 */ 3754 if (float16_is_signaling_nan(f, s) || 3755 (float16_is_infinity(f) && sign) || 3756 (float16_is_normal(f) && sign) || 3757 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3758 s->float_exception_flags |= float_flag_invalid; 3759 return float16_default_nan(s); 3760 } 3761 3762 /* frsqrt7(qNaN) = canonical NaN */ 3763 if (float16_is_quiet_nan(f, s)) { 3764 return float16_default_nan(s); 3765 } 3766 3767 /* frsqrt7(+-0) = +-inf */ 3768 if (float16_is_zero(f)) { 3769 s->float_exception_flags |= float_flag_divbyzero; 3770 return float16_set_sign(float16_infinity, sign); 3771 } 3772 3773 /* frsqrt7(+inf) = +0 */ 3774 if (float16_is_infinity(f) && !sign) { 3775 return float16_set_sign(float16_zero, sign); 3776 } 3777 3778 /* +normal, +subnormal */ 3779 uint64_t val = frsqrt7(f, exp_size, frac_size); 3780 return make_float16(val); 3781 } 3782 3783 static float32 frsqrt7_s(float32 f, float_status *s) 3784 { 3785 int exp_size = 8, frac_size = 23; 3786 bool sign = float32_is_neg(f); 3787 3788 /* 3789 * frsqrt7(sNaN) = canonical NaN 3790 * frsqrt7(-inf) = canonical NaN 3791 * frsqrt7(-normal) = canonical NaN 3792 * frsqrt7(-subnormal) = canonical NaN 3793 */ 3794 if (float32_is_signaling_nan(f, s) || 3795 (float32_is_infinity(f) && sign) || 3796 (float32_is_normal(f) && sign) || 3797 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3798 s->float_exception_flags |= float_flag_invalid; 3799 return float32_default_nan(s); 3800 } 3801 3802 /* frsqrt7(qNaN) = canonical NaN */ 3803 if (float32_is_quiet_nan(f, s)) { 3804 return float32_default_nan(s); 3805 } 3806 3807 /* frsqrt7(+-0) = +-inf */ 3808 if (float32_is_zero(f)) { 3809 s->float_exception_flags |= float_flag_divbyzero; 3810 return float32_set_sign(float32_infinity, sign); 3811 } 3812 3813 /* frsqrt7(+inf) = +0 */ 3814 if (float32_is_infinity(f) && !sign) { 3815 return float32_set_sign(float32_zero, sign); 3816 } 3817 3818 /* +normal, +subnormal */ 3819 uint64_t val = frsqrt7(f, exp_size, frac_size); 3820 return make_float32(val); 3821 } 3822 3823 static float64 frsqrt7_d(float64 f, float_status *s) 3824 { 3825 int exp_size = 11, frac_size = 52; 3826 bool sign = float64_is_neg(f); 3827 3828 /* 3829 * frsqrt7(sNaN) = canonical NaN 3830 * frsqrt7(-inf) = canonical NaN 3831 * frsqrt7(-normal) = canonical NaN 3832 * frsqrt7(-subnormal) = canonical NaN 3833 */ 3834 if (float64_is_signaling_nan(f, s) || 3835 (float64_is_infinity(f) && sign) || 3836 (float64_is_normal(f) && sign) || 3837 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3838 s->float_exception_flags |= float_flag_invalid; 3839 return float64_default_nan(s); 3840 } 3841 3842 /* frsqrt7(qNaN) = canonical NaN */ 3843 if (float64_is_quiet_nan(f, s)) { 3844 return float64_default_nan(s); 3845 } 3846 3847 /* frsqrt7(+-0) = +-inf */ 3848 if (float64_is_zero(f)) { 3849 s->float_exception_flags |= float_flag_divbyzero; 3850 return float64_set_sign(float64_infinity, sign); 3851 } 3852 3853 /* frsqrt7(+inf) = +0 */ 3854 if (float64_is_infinity(f) && !sign) { 3855 return float64_set_sign(float64_zero, sign); 3856 } 3857 3858 /* +normal, +subnormal */ 3859 uint64_t val = frsqrt7(f, exp_size, frac_size); 3860 return make_float64(val); 3861 } 3862 3863 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3864 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3865 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3866 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3867 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3868 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3869 3870 /* 3871 * Vector Floating-Point Reciprocal Estimate Instruction 3872 * 3873 * Adapted from riscv-v-spec recip.c: 3874 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3875 */ 3876 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3877 float_status *s) 3878 { 3879 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3880 uint64_t exp = extract64(f, frac_size, exp_size); 3881 uint64_t frac = extract64(f, 0, frac_size); 3882 3883 const uint8_t lookup_table[] = { 3884 127, 125, 123, 121, 119, 117, 116, 114, 3885 112, 110, 109, 107, 105, 104, 102, 100, 3886 99, 97, 96, 94, 93, 91, 90, 88, 3887 87, 85, 84, 83, 81, 80, 79, 77, 3888 76, 75, 74, 72, 71, 70, 69, 68, 3889 66, 65, 64, 63, 62, 61, 60, 59, 3890 58, 57, 56, 55, 54, 53, 52, 51, 3891 50, 49, 48, 47, 46, 45, 44, 43, 3892 42, 41, 40, 40, 39, 38, 37, 36, 3893 35, 35, 34, 33, 32, 31, 31, 30, 3894 29, 28, 28, 27, 26, 25, 25, 24, 3895 23, 23, 22, 21, 21, 20, 19, 19, 3896 18, 17, 17, 16, 15, 15, 14, 14, 3897 13, 12, 12, 11, 11, 10, 9, 9, 3898 8, 8, 7, 7, 6, 5, 5, 4, 3899 4, 3, 3, 2, 2, 1, 1, 0 3900 }; 3901 const int precision = 7; 3902 3903 if (exp == 0 && frac != 0) { /* subnormal */ 3904 /* Normalize the subnormal. */ 3905 while (extract64(frac, frac_size - 1, 1) == 0) { 3906 exp--; 3907 frac <<= 1; 3908 } 3909 3910 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3911 3912 if (exp != 0 && exp != UINT64_MAX) { 3913 /* 3914 * Overflow to inf or max value of same sign, 3915 * depending on sign and rounding mode. 3916 */ 3917 s->float_exception_flags |= (float_flag_inexact | 3918 float_flag_overflow); 3919 3920 if ((s->float_rounding_mode == float_round_to_zero) || 3921 ((s->float_rounding_mode == float_round_down) && !sign) || 3922 ((s->float_rounding_mode == float_round_up) && sign)) { 3923 /* Return greatest/negative finite value. */ 3924 return (sign << (exp_size + frac_size)) | 3925 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3926 } else { 3927 /* Return +-inf. */ 3928 return (sign << (exp_size + frac_size)) | 3929 MAKE_64BIT_MASK(frac_size, exp_size); 3930 } 3931 } 3932 } 3933 3934 int idx = frac >> (frac_size - precision); 3935 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3936 (frac_size - precision); 3937 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3938 3939 if (out_exp == 0 || out_exp == UINT64_MAX) { 3940 /* 3941 * The result is subnormal, but don't raise the underflow exception, 3942 * because there's no additional loss of precision. 3943 */ 3944 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3945 if (out_exp == UINT64_MAX) { 3946 out_frac >>= 1; 3947 out_exp = 0; 3948 } 3949 } 3950 3951 uint64_t val = 0; 3952 val = deposit64(val, 0, frac_size, out_frac); 3953 val = deposit64(val, frac_size, exp_size, out_exp); 3954 val = deposit64(val, frac_size + exp_size, 1, sign); 3955 return val; 3956 } 3957 3958 static float16 frec7_h(float16 f, float_status *s) 3959 { 3960 int exp_size = 5, frac_size = 10; 3961 bool sign = float16_is_neg(f); 3962 3963 /* frec7(+-inf) = +-0 */ 3964 if (float16_is_infinity(f)) { 3965 return float16_set_sign(float16_zero, sign); 3966 } 3967 3968 /* frec7(+-0) = +-inf */ 3969 if (float16_is_zero(f)) { 3970 s->float_exception_flags |= float_flag_divbyzero; 3971 return float16_set_sign(float16_infinity, sign); 3972 } 3973 3974 /* frec7(sNaN) = canonical NaN */ 3975 if (float16_is_signaling_nan(f, s)) { 3976 s->float_exception_flags |= float_flag_invalid; 3977 return float16_default_nan(s); 3978 } 3979 3980 /* frec7(qNaN) = canonical NaN */ 3981 if (float16_is_quiet_nan(f, s)) { 3982 return float16_default_nan(s); 3983 } 3984 3985 /* +-normal, +-subnormal */ 3986 uint64_t val = frec7(f, exp_size, frac_size, s); 3987 return make_float16(val); 3988 } 3989 3990 static float32 frec7_s(float32 f, float_status *s) 3991 { 3992 int exp_size = 8, frac_size = 23; 3993 bool sign = float32_is_neg(f); 3994 3995 /* frec7(+-inf) = +-0 */ 3996 if (float32_is_infinity(f)) { 3997 return float32_set_sign(float32_zero, sign); 3998 } 3999 4000 /* frec7(+-0) = +-inf */ 4001 if (float32_is_zero(f)) { 4002 s->float_exception_flags |= float_flag_divbyzero; 4003 return float32_set_sign(float32_infinity, sign); 4004 } 4005 4006 /* frec7(sNaN) = canonical NaN */ 4007 if (float32_is_signaling_nan(f, s)) { 4008 s->float_exception_flags |= float_flag_invalid; 4009 return float32_default_nan(s); 4010 } 4011 4012 /* frec7(qNaN) = canonical NaN */ 4013 if (float32_is_quiet_nan(f, s)) { 4014 return float32_default_nan(s); 4015 } 4016 4017 /* +-normal, +-subnormal */ 4018 uint64_t val = frec7(f, exp_size, frac_size, s); 4019 return make_float32(val); 4020 } 4021 4022 static float64 frec7_d(float64 f, float_status *s) 4023 { 4024 int exp_size = 11, frac_size = 52; 4025 bool sign = float64_is_neg(f); 4026 4027 /* frec7(+-inf) = +-0 */ 4028 if (float64_is_infinity(f)) { 4029 return float64_set_sign(float64_zero, sign); 4030 } 4031 4032 /* frec7(+-0) = +-inf */ 4033 if (float64_is_zero(f)) { 4034 s->float_exception_flags |= float_flag_divbyzero; 4035 return float64_set_sign(float64_infinity, sign); 4036 } 4037 4038 /* frec7(sNaN) = canonical NaN */ 4039 if (float64_is_signaling_nan(f, s)) { 4040 s->float_exception_flags |= float_flag_invalid; 4041 return float64_default_nan(s); 4042 } 4043 4044 /* frec7(qNaN) = canonical NaN */ 4045 if (float64_is_quiet_nan(f, s)) { 4046 return float64_default_nan(s); 4047 } 4048 4049 /* +-normal, +-subnormal */ 4050 uint64_t val = frec7(f, exp_size, frac_size, s); 4051 return make_float64(val); 4052 } 4053 4054 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4055 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4056 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4057 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4058 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4059 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4060 4061 /* Vector Floating-Point MIN/MAX Instructions */ 4062 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4063 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4064 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4065 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4066 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4067 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4068 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4069 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4070 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4071 GEN_VEXT_VF(vfmin_vf_h, 2) 4072 GEN_VEXT_VF(vfmin_vf_w, 4) 4073 GEN_VEXT_VF(vfmin_vf_d, 8) 4074 4075 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4076 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4077 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4078 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4079 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4080 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4081 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4082 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4083 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4084 GEN_VEXT_VF(vfmax_vf_h, 2) 4085 GEN_VEXT_VF(vfmax_vf_w, 4) 4086 GEN_VEXT_VF(vfmax_vf_d, 8) 4087 4088 /* Vector Floating-Point Sign-Injection Instructions */ 4089 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4090 { 4091 return deposit64(b, 0, 15, a); 4092 } 4093 4094 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4095 { 4096 return deposit64(b, 0, 31, a); 4097 } 4098 4099 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4100 { 4101 return deposit64(b, 0, 63, a); 4102 } 4103 4104 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4105 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4106 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4107 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4108 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4109 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4110 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4111 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4112 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4113 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4114 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4115 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4116 4117 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4118 { 4119 return deposit64(~b, 0, 15, a); 4120 } 4121 4122 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4123 { 4124 return deposit64(~b, 0, 31, a); 4125 } 4126 4127 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4128 { 4129 return deposit64(~b, 0, 63, a); 4130 } 4131 4132 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4133 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4134 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4135 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4136 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4137 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4138 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4139 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4140 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4141 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4142 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4143 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4144 4145 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4146 { 4147 return deposit64(b ^ a, 0, 15, a); 4148 } 4149 4150 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4151 { 4152 return deposit64(b ^ a, 0, 31, a); 4153 } 4154 4155 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4156 { 4157 return deposit64(b ^ a, 0, 63, a); 4158 } 4159 4160 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4161 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4162 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4163 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4164 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4165 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4166 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4167 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4168 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4169 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4170 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4171 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4172 4173 /* Vector Floating-Point Compare Instructions */ 4174 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4175 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4176 CPURISCVState *env, uint32_t desc) \ 4177 { \ 4178 uint32_t vm = vext_vm(desc); \ 4179 uint32_t vl = env->vl; \ 4180 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4181 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4182 uint32_t vma = vext_vma(desc); \ 4183 uint32_t i; \ 4184 \ 4185 for (i = env->vstart; i < vl; i++) { \ 4186 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4188 if (!vm && !vext_elem_mask(v0, i)) { \ 4189 /* set masked-off elements to 1s */ \ 4190 if (vma) { \ 4191 vext_set_elem_mask(vd, i, 1); \ 4192 } \ 4193 continue; \ 4194 } \ 4195 vext_set_elem_mask(vd, i, \ 4196 DO_OP(s2, s1, &env->fp_status)); \ 4197 } \ 4198 env->vstart = 0; \ 4199 /* 4200 * mask destination register are always tail-agnostic 4201 * set tail elements to 1s 4202 */ \ 4203 if (vta_all_1s) { \ 4204 for (; i < total_elems; i++) { \ 4205 vext_set_elem_mask(vd, i, 1); \ 4206 } \ 4207 } \ 4208 } 4209 4210 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4211 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4212 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4213 4214 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4215 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4216 CPURISCVState *env, uint32_t desc) \ 4217 { \ 4218 uint32_t vm = vext_vm(desc); \ 4219 uint32_t vl = env->vl; \ 4220 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4221 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4222 uint32_t vma = vext_vma(desc); \ 4223 uint32_t i; \ 4224 \ 4225 for (i = env->vstart; i < vl; i++) { \ 4226 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4227 if (!vm && !vext_elem_mask(v0, i)) { \ 4228 /* set masked-off elements to 1s */ \ 4229 if (vma) { \ 4230 vext_set_elem_mask(vd, i, 1); \ 4231 } \ 4232 continue; \ 4233 } \ 4234 vext_set_elem_mask(vd, i, \ 4235 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4236 } \ 4237 env->vstart = 0; \ 4238 /* 4239 * mask destination register are always tail-agnostic 4240 * set tail elements to 1s 4241 */ \ 4242 if (vta_all_1s) { \ 4243 for (; i < total_elems; i++) { \ 4244 vext_set_elem_mask(vd, i, 1); \ 4245 } \ 4246 } \ 4247 } 4248 4249 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4250 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4251 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4252 4253 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4254 { 4255 FloatRelation compare = float16_compare_quiet(a, b, s); 4256 return compare != float_relation_equal; 4257 } 4258 4259 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4260 { 4261 FloatRelation compare = float32_compare_quiet(a, b, s); 4262 return compare != float_relation_equal; 4263 } 4264 4265 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4266 { 4267 FloatRelation compare = float64_compare_quiet(a, b, s); 4268 return compare != float_relation_equal; 4269 } 4270 4271 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4272 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4273 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4274 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4275 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4276 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4277 4278 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4279 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4280 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4281 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4282 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4283 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4284 4285 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4286 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4287 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4288 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4289 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4290 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4291 4292 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4293 { 4294 FloatRelation compare = float16_compare(a, b, s); 4295 return compare == float_relation_greater; 4296 } 4297 4298 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4299 { 4300 FloatRelation compare = float32_compare(a, b, s); 4301 return compare == float_relation_greater; 4302 } 4303 4304 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4305 { 4306 FloatRelation compare = float64_compare(a, b, s); 4307 return compare == float_relation_greater; 4308 } 4309 4310 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4311 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4312 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4313 4314 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4315 { 4316 FloatRelation compare = float16_compare(a, b, s); 4317 return compare == float_relation_greater || 4318 compare == float_relation_equal; 4319 } 4320 4321 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4322 { 4323 FloatRelation compare = float32_compare(a, b, s); 4324 return compare == float_relation_greater || 4325 compare == float_relation_equal; 4326 } 4327 4328 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4329 { 4330 FloatRelation compare = float64_compare(a, b, s); 4331 return compare == float_relation_greater || 4332 compare == float_relation_equal; 4333 } 4334 4335 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4336 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4337 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4338 4339 /* Vector Floating-Point Classify Instruction */ 4340 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4341 static void do_##NAME(void *vd, void *vs2, int i) \ 4342 { \ 4343 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4344 *((TD *)vd + HD(i)) = OP(s2); \ 4345 } 4346 4347 #define GEN_VEXT_V(NAME, ESZ) \ 4348 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4349 CPURISCVState *env, uint32_t desc) \ 4350 { \ 4351 uint32_t vm = vext_vm(desc); \ 4352 uint32_t vl = env->vl; \ 4353 uint32_t total_elems = \ 4354 vext_get_total_elems(env, desc, ESZ); \ 4355 uint32_t vta = vext_vta(desc); \ 4356 uint32_t vma = vext_vma(desc); \ 4357 uint32_t i; \ 4358 \ 4359 for (i = env->vstart; i < vl; i++) { \ 4360 if (!vm && !vext_elem_mask(v0, i)) { \ 4361 /* set masked-off elements to 1s */ \ 4362 vext_set_elems_1s(vd, vma, i * ESZ, \ 4363 (i + 1) * ESZ); \ 4364 continue; \ 4365 } \ 4366 do_##NAME(vd, vs2, i); \ 4367 } \ 4368 env->vstart = 0; \ 4369 /* set tail elements to 1s */ \ 4370 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4371 total_elems * ESZ); \ 4372 } 4373 4374 target_ulong fclass_h(uint64_t frs1) 4375 { 4376 float16 f = frs1; 4377 bool sign = float16_is_neg(f); 4378 4379 if (float16_is_infinity(f)) { 4380 return sign ? 1 << 0 : 1 << 7; 4381 } else if (float16_is_zero(f)) { 4382 return sign ? 1 << 3 : 1 << 4; 4383 } else if (float16_is_zero_or_denormal(f)) { 4384 return sign ? 1 << 2 : 1 << 5; 4385 } else if (float16_is_any_nan(f)) { 4386 float_status s = { }; /* for snan_bit_is_one */ 4387 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4388 } else { 4389 return sign ? 1 << 1 : 1 << 6; 4390 } 4391 } 4392 4393 target_ulong fclass_s(uint64_t frs1) 4394 { 4395 float32 f = frs1; 4396 bool sign = float32_is_neg(f); 4397 4398 if (float32_is_infinity(f)) { 4399 return sign ? 1 << 0 : 1 << 7; 4400 } else if (float32_is_zero(f)) { 4401 return sign ? 1 << 3 : 1 << 4; 4402 } else if (float32_is_zero_or_denormal(f)) { 4403 return sign ? 1 << 2 : 1 << 5; 4404 } else if (float32_is_any_nan(f)) { 4405 float_status s = { }; /* for snan_bit_is_one */ 4406 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4407 } else { 4408 return sign ? 1 << 1 : 1 << 6; 4409 } 4410 } 4411 4412 target_ulong fclass_d(uint64_t frs1) 4413 { 4414 float64 f = frs1; 4415 bool sign = float64_is_neg(f); 4416 4417 if (float64_is_infinity(f)) { 4418 return sign ? 1 << 0 : 1 << 7; 4419 } else if (float64_is_zero(f)) { 4420 return sign ? 1 << 3 : 1 << 4; 4421 } else if (float64_is_zero_or_denormal(f)) { 4422 return sign ? 1 << 2 : 1 << 5; 4423 } else if (float64_is_any_nan(f)) { 4424 float_status s = { }; /* for snan_bit_is_one */ 4425 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4426 } else { 4427 return sign ? 1 << 1 : 1 << 6; 4428 } 4429 } 4430 4431 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4432 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4433 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4434 GEN_VEXT_V(vfclass_v_h, 2) 4435 GEN_VEXT_V(vfclass_v_w, 4) 4436 GEN_VEXT_V(vfclass_v_d, 8) 4437 4438 /* Vector Floating-Point Merge Instruction */ 4439 4440 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4441 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4442 CPURISCVState *env, uint32_t desc) \ 4443 { \ 4444 uint32_t vm = vext_vm(desc); \ 4445 uint32_t vl = env->vl; \ 4446 uint32_t esz = sizeof(ETYPE); \ 4447 uint32_t total_elems = \ 4448 vext_get_total_elems(env, desc, esz); \ 4449 uint32_t vta = vext_vta(desc); \ 4450 uint32_t i; \ 4451 \ 4452 for (i = env->vstart; i < vl; i++) { \ 4453 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4454 *((ETYPE *)vd + H(i)) = \ 4455 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4456 } \ 4457 env->vstart = 0; \ 4458 /* set tail elements to 1s */ \ 4459 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4460 } 4461 4462 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4463 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4464 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4465 4466 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4467 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4468 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4469 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4470 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4471 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4472 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4473 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4474 4475 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4476 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4477 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4478 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4479 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4480 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4481 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4482 4483 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4484 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4485 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4486 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4487 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4488 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4489 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4490 4491 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4492 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4493 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4494 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4495 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4496 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4497 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4498 4499 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4500 /* (TD, T2, TX2) */ 4501 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4502 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4503 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4504 /* 4505 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4506 */ 4507 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4508 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4509 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4510 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4511 4512 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4513 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4514 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4515 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4516 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4517 4518 /* 4519 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4520 */ 4521 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4522 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4523 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4524 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4525 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4526 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4527 4528 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4529 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4530 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4531 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4532 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4533 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4534 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4535 4536 /* 4537 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4538 */ 4539 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4540 { 4541 return float16_to_float32(a, true, s); 4542 } 4543 4544 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4545 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4546 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4547 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4548 4549 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4550 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4551 4552 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4553 /* (TD, T2, TX2) */ 4554 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4555 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4556 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4557 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4558 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4559 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4560 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4561 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4562 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4563 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4564 4565 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4566 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4567 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4568 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4569 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4570 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4571 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4572 4573 /* 4574 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4575 */ 4576 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4577 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4578 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4579 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4580 4581 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4582 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4583 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4584 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4585 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4586 4587 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4588 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4589 { 4590 return float32_to_float16(a, true, s); 4591 } 4592 4593 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4594 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4595 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4596 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4597 4598 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4599 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4600 4601 /* 4602 * Vector Reduction Operations 4603 */ 4604 /* Vector Single-Width Integer Reduction Instructions */ 4605 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4606 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4607 void *vs2, CPURISCVState *env, \ 4608 uint32_t desc) \ 4609 { \ 4610 uint32_t vm = vext_vm(desc); \ 4611 uint32_t vl = env->vl; \ 4612 uint32_t esz = sizeof(TD); \ 4613 uint32_t vlenb = simd_maxsz(desc); \ 4614 uint32_t vta = vext_vta(desc); \ 4615 uint32_t i; \ 4616 TD s1 = *((TD *)vs1 + HD(0)); \ 4617 \ 4618 for (i = env->vstart; i < vl; i++) { \ 4619 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4620 if (!vm && !vext_elem_mask(v0, i)) { \ 4621 continue; \ 4622 } \ 4623 s1 = OP(s1, (TD)s2); \ 4624 } \ 4625 *((TD *)vd + HD(0)) = s1; \ 4626 env->vstart = 0; \ 4627 /* set tail elements to 1s */ \ 4628 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4629 } 4630 4631 /* vd[0] = sum(vs1[0], vs2[*]) */ 4632 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4633 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4634 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4635 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4636 4637 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4638 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4639 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4640 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4641 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4642 4643 /* vd[0] = max(vs1[0], vs2[*]) */ 4644 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4645 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4646 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4647 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4648 4649 /* vd[0] = minu(vs1[0], vs2[*]) */ 4650 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4651 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4652 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4653 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4654 4655 /* vd[0] = min(vs1[0], vs2[*]) */ 4656 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4657 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4658 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4659 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4660 4661 /* vd[0] = and(vs1[0], vs2[*]) */ 4662 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4663 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4664 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4665 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4666 4667 /* vd[0] = or(vs1[0], vs2[*]) */ 4668 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4669 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4670 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4671 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4672 4673 /* vd[0] = xor(vs1[0], vs2[*]) */ 4674 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4675 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4676 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4677 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4678 4679 /* Vector Widening Integer Reduction Instructions */ 4680 /* signed sum reduction into double-width accumulator */ 4681 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4682 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4683 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4684 4685 /* Unsigned sum reduction into double-width accumulator */ 4686 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4687 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4688 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4689 4690 /* Vector Single-Width Floating-Point Reduction Instructions */ 4691 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4692 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4693 void *vs2, CPURISCVState *env, \ 4694 uint32_t desc) \ 4695 { \ 4696 uint32_t vm = vext_vm(desc); \ 4697 uint32_t vl = env->vl; \ 4698 uint32_t esz = sizeof(TD); \ 4699 uint32_t vlenb = simd_maxsz(desc); \ 4700 uint32_t vta = vext_vta(desc); \ 4701 uint32_t i; \ 4702 TD s1 = *((TD *)vs1 + HD(0)); \ 4703 \ 4704 for (i = env->vstart; i < vl; i++) { \ 4705 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4706 if (!vm && !vext_elem_mask(v0, i)) { \ 4707 continue; \ 4708 } \ 4709 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4710 } \ 4711 *((TD *)vd + HD(0)) = s1; \ 4712 env->vstart = 0; \ 4713 /* set tail elements to 1s */ \ 4714 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4715 } 4716 4717 /* Unordered sum */ 4718 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4719 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4720 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4721 4722 /* Ordered sum */ 4723 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4724 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4725 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4726 4727 /* Maximum value */ 4728 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4729 float16_maximum_number) 4730 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4731 float32_maximum_number) 4732 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4733 float64_maximum_number) 4734 4735 /* Minimum value */ 4736 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4737 float16_minimum_number) 4738 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4739 float32_minimum_number) 4740 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4741 float64_minimum_number) 4742 4743 /* Vector Widening Floating-Point Add Instructions */ 4744 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4745 { 4746 return float32_add(a, float16_to_float32(b, true, s), s); 4747 } 4748 4749 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4750 { 4751 return float64_add(a, float32_to_float64(b, s), s); 4752 } 4753 4754 /* Vector Widening Floating-Point Reduction Instructions */ 4755 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4756 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4757 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4758 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4759 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4760 4761 /* 4762 * Vector Mask Operations 4763 */ 4764 /* Vector Mask-Register Logical Instructions */ 4765 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4766 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4767 void *vs2, CPURISCVState *env, \ 4768 uint32_t desc) \ 4769 { \ 4770 uint32_t vl = env->vl; \ 4771 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4772 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4773 uint32_t i; \ 4774 int a, b; \ 4775 \ 4776 for (i = env->vstart; i < vl; i++) { \ 4777 a = vext_elem_mask(vs1, i); \ 4778 b = vext_elem_mask(vs2, i); \ 4779 vext_set_elem_mask(vd, i, OP(b, a)); \ 4780 } \ 4781 env->vstart = 0; \ 4782 /* 4783 * mask destination register are always tail-agnostic 4784 * set tail elements to 1s 4785 */ \ 4786 if (vta_all_1s) { \ 4787 for (; i < total_elems; i++) { \ 4788 vext_set_elem_mask(vd, i, 1); \ 4789 } \ 4790 } \ 4791 } 4792 4793 #define DO_NAND(N, M) (!(N & M)) 4794 #define DO_ANDNOT(N, M) (N & !M) 4795 #define DO_NOR(N, M) (!(N | M)) 4796 #define DO_ORNOT(N, M) (N | !M) 4797 #define DO_XNOR(N, M) (!(N ^ M)) 4798 4799 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4800 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4801 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4802 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4803 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4804 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4805 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4806 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4807 4808 /* Vector count population in mask vcpop */ 4809 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4810 uint32_t desc) 4811 { 4812 target_ulong cnt = 0; 4813 uint32_t vm = vext_vm(desc); 4814 uint32_t vl = env->vl; 4815 int i; 4816 4817 for (i = env->vstart; i < vl; i++) { 4818 if (vm || vext_elem_mask(v0, i)) { 4819 if (vext_elem_mask(vs2, i)) { 4820 cnt++; 4821 } 4822 } 4823 } 4824 env->vstart = 0; 4825 return cnt; 4826 } 4827 4828 /* vfirst find-first-set mask bit */ 4829 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4830 uint32_t desc) 4831 { 4832 uint32_t vm = vext_vm(desc); 4833 uint32_t vl = env->vl; 4834 int i; 4835 4836 for (i = env->vstart; i < vl; i++) { 4837 if (vm || vext_elem_mask(v0, i)) { 4838 if (vext_elem_mask(vs2, i)) { 4839 return i; 4840 } 4841 } 4842 } 4843 env->vstart = 0; 4844 return -1LL; 4845 } 4846 4847 enum set_mask_type { 4848 ONLY_FIRST = 1, 4849 INCLUDE_FIRST, 4850 BEFORE_FIRST, 4851 }; 4852 4853 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4854 uint32_t desc, enum set_mask_type type) 4855 { 4856 uint32_t vm = vext_vm(desc); 4857 uint32_t vl = env->vl; 4858 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4859 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4860 uint32_t vma = vext_vma(desc); 4861 int i; 4862 bool first_mask_bit = false; 4863 4864 for (i = env->vstart; i < vl; i++) { 4865 if (!vm && !vext_elem_mask(v0, i)) { 4866 /* set masked-off elements to 1s */ 4867 if (vma) { 4868 vext_set_elem_mask(vd, i, 1); 4869 } 4870 continue; 4871 } 4872 /* write a zero to all following active elements */ 4873 if (first_mask_bit) { 4874 vext_set_elem_mask(vd, i, 0); 4875 continue; 4876 } 4877 if (vext_elem_mask(vs2, i)) { 4878 first_mask_bit = true; 4879 if (type == BEFORE_FIRST) { 4880 vext_set_elem_mask(vd, i, 0); 4881 } else { 4882 vext_set_elem_mask(vd, i, 1); 4883 } 4884 } else { 4885 if (type == ONLY_FIRST) { 4886 vext_set_elem_mask(vd, i, 0); 4887 } else { 4888 vext_set_elem_mask(vd, i, 1); 4889 } 4890 } 4891 } 4892 env->vstart = 0; 4893 /* 4894 * mask destination register are always tail-agnostic 4895 * set tail elements to 1s 4896 */ 4897 if (vta_all_1s) { 4898 for (; i < total_elems; i++) { 4899 vext_set_elem_mask(vd, i, 1); 4900 } 4901 } 4902 } 4903 4904 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4905 uint32_t desc) 4906 { 4907 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4908 } 4909 4910 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4911 uint32_t desc) 4912 { 4913 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4914 } 4915 4916 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4917 uint32_t desc) 4918 { 4919 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4920 } 4921 4922 /* Vector Iota Instruction */ 4923 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4924 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4925 uint32_t desc) \ 4926 { \ 4927 uint32_t vm = vext_vm(desc); \ 4928 uint32_t vl = env->vl; \ 4929 uint32_t esz = sizeof(ETYPE); \ 4930 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4931 uint32_t vta = vext_vta(desc); \ 4932 uint32_t vma = vext_vma(desc); \ 4933 uint32_t sum = 0; \ 4934 int i; \ 4935 \ 4936 for (i = env->vstart; i < vl; i++) { \ 4937 if (!vm && !vext_elem_mask(v0, i)) { \ 4938 /* set masked-off elements to 1s */ \ 4939 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4940 continue; \ 4941 } \ 4942 *((ETYPE *)vd + H(i)) = sum; \ 4943 if (vext_elem_mask(vs2, i)) { \ 4944 sum++; \ 4945 } \ 4946 } \ 4947 env->vstart = 0; \ 4948 /* set tail elements to 1s */ \ 4949 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4950 } 4951 4952 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4953 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4954 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4955 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4956 4957 /* Vector Element Index Instruction */ 4958 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4959 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4960 { \ 4961 uint32_t vm = vext_vm(desc); \ 4962 uint32_t vl = env->vl; \ 4963 uint32_t esz = sizeof(ETYPE); \ 4964 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4965 uint32_t vta = vext_vta(desc); \ 4966 uint32_t vma = vext_vma(desc); \ 4967 int i; \ 4968 \ 4969 for (i = env->vstart; i < vl; i++) { \ 4970 if (!vm && !vext_elem_mask(v0, i)) { \ 4971 /* set masked-off elements to 1s */ \ 4972 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4973 continue; \ 4974 } \ 4975 *((ETYPE *)vd + H(i)) = i; \ 4976 } \ 4977 env->vstart = 0; \ 4978 /* set tail elements to 1s */ \ 4979 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4980 } 4981 4982 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4983 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4984 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4985 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4986 4987 /* 4988 * Vector Permutation Instructions 4989 */ 4990 4991 /* Vector Slide Instructions */ 4992 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4993 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4994 CPURISCVState *env, uint32_t desc) \ 4995 { \ 4996 uint32_t vm = vext_vm(desc); \ 4997 uint32_t vl = env->vl; \ 4998 uint32_t esz = sizeof(ETYPE); \ 4999 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5000 uint32_t vta = vext_vta(desc); \ 5001 uint32_t vma = vext_vma(desc); \ 5002 target_ulong offset = s1, i_min, i; \ 5003 \ 5004 i_min = MAX(env->vstart, offset); \ 5005 for (i = i_min; i < vl; i++) { \ 5006 if (!vm && !vext_elem_mask(v0, i)) { \ 5007 /* set masked-off elements to 1s */ \ 5008 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5009 continue; \ 5010 } \ 5011 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5012 } \ 5013 /* set tail elements to 1s */ \ 5014 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5015 } 5016 5017 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5018 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5019 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5020 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5021 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5022 5023 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5025 CPURISCVState *env, uint32_t desc) \ 5026 { \ 5027 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5028 uint32_t vm = vext_vm(desc); \ 5029 uint32_t vl = env->vl; \ 5030 uint32_t esz = sizeof(ETYPE); \ 5031 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5032 uint32_t vta = vext_vta(desc); \ 5033 uint32_t vma = vext_vma(desc); \ 5034 target_ulong i_max, i; \ 5035 \ 5036 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5037 for (i = env->vstart; i < i_max; ++i) { \ 5038 if (!vm && !vext_elem_mask(v0, i)) { \ 5039 /* set masked-off elements to 1s */ \ 5040 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5041 continue; \ 5042 } \ 5043 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5044 } \ 5045 \ 5046 for (i = i_max; i < vl; ++i) { \ 5047 if (vm || vext_elem_mask(v0, i)) { \ 5048 *((ETYPE *)vd + H(i)) = 0; \ 5049 } \ 5050 } \ 5051 \ 5052 env->vstart = 0; \ 5053 /* set tail elements to 1s */ \ 5054 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5055 } 5056 5057 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5058 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5059 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5060 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5061 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5062 5063 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5064 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5065 void *vs2, CPURISCVState *env, \ 5066 uint32_t desc) \ 5067 { \ 5068 typedef uint##BITWIDTH##_t ETYPE; \ 5069 uint32_t vm = vext_vm(desc); \ 5070 uint32_t vl = env->vl; \ 5071 uint32_t esz = sizeof(ETYPE); \ 5072 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5073 uint32_t vta = vext_vta(desc); \ 5074 uint32_t vma = vext_vma(desc); \ 5075 uint32_t i; \ 5076 \ 5077 for (i = env->vstart; i < vl; i++) { \ 5078 if (!vm && !vext_elem_mask(v0, i)) { \ 5079 /* set masked-off elements to 1s */ \ 5080 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5081 continue; \ 5082 } \ 5083 if (i == 0) { \ 5084 *((ETYPE *)vd + H(i)) = s1; \ 5085 } else { \ 5086 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5087 } \ 5088 } \ 5089 env->vstart = 0; \ 5090 /* set tail elements to 1s */ \ 5091 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5092 } 5093 5094 GEN_VEXT_VSLIE1UP(8, H1) 5095 GEN_VEXT_VSLIE1UP(16, H2) 5096 GEN_VEXT_VSLIE1UP(32, H4) 5097 GEN_VEXT_VSLIE1UP(64, H8) 5098 5099 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5100 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5101 CPURISCVState *env, uint32_t desc) \ 5102 { \ 5103 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5104 } 5105 5106 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5107 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5108 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5109 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5110 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5111 5112 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5113 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5114 void *vs2, CPURISCVState *env, \ 5115 uint32_t desc) \ 5116 { \ 5117 typedef uint##BITWIDTH##_t ETYPE; \ 5118 uint32_t vm = vext_vm(desc); \ 5119 uint32_t vl = env->vl; \ 5120 uint32_t esz = sizeof(ETYPE); \ 5121 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5122 uint32_t vta = vext_vta(desc); \ 5123 uint32_t vma = vext_vma(desc); \ 5124 uint32_t i; \ 5125 \ 5126 for (i = env->vstart; i < vl; i++) { \ 5127 if (!vm && !vext_elem_mask(v0, i)) { \ 5128 /* set masked-off elements to 1s */ \ 5129 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5130 continue; \ 5131 } \ 5132 if (i == vl - 1) { \ 5133 *((ETYPE *)vd + H(i)) = s1; \ 5134 } else { \ 5135 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5136 } \ 5137 } \ 5138 env->vstart = 0; \ 5139 /* set tail elements to 1s */ \ 5140 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5141 } 5142 5143 GEN_VEXT_VSLIDE1DOWN(8, H1) 5144 GEN_VEXT_VSLIDE1DOWN(16, H2) 5145 GEN_VEXT_VSLIDE1DOWN(32, H4) 5146 GEN_VEXT_VSLIDE1DOWN(64, H8) 5147 5148 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5149 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5150 CPURISCVState *env, uint32_t desc) \ 5151 { \ 5152 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5153 } 5154 5155 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5156 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5157 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5158 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5159 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5160 5161 /* Vector Floating-Point Slide Instructions */ 5162 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5163 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5164 CPURISCVState *env, uint32_t desc) \ 5165 { \ 5166 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5167 } 5168 5169 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5170 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5171 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5172 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5173 5174 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5175 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5176 CPURISCVState *env, uint32_t desc) \ 5177 { \ 5178 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5179 } 5180 5181 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5182 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5183 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5184 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5185 5186 /* Vector Register Gather Instruction */ 5187 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5188 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5189 CPURISCVState *env, uint32_t desc) \ 5190 { \ 5191 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5192 uint32_t vm = vext_vm(desc); \ 5193 uint32_t vl = env->vl; \ 5194 uint32_t esz = sizeof(TS2); \ 5195 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5196 uint32_t vta = vext_vta(desc); \ 5197 uint32_t vma = vext_vma(desc); \ 5198 uint64_t index; \ 5199 uint32_t i; \ 5200 \ 5201 for (i = env->vstart; i < vl; i++) { \ 5202 if (!vm && !vext_elem_mask(v0, i)) { \ 5203 /* set masked-off elements to 1s */ \ 5204 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5205 continue; \ 5206 } \ 5207 index = *((TS1 *)vs1 + HS1(i)); \ 5208 if (index >= vlmax) { \ 5209 *((TS2 *)vd + HS2(i)) = 0; \ 5210 } else { \ 5211 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5212 } \ 5213 } \ 5214 env->vstart = 0; \ 5215 /* set tail elements to 1s */ \ 5216 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5217 } 5218 5219 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5220 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5221 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5222 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5223 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5224 5225 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5226 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5227 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5228 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5229 5230 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5231 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5232 CPURISCVState *env, uint32_t desc) \ 5233 { \ 5234 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5235 uint32_t vm = vext_vm(desc); \ 5236 uint32_t vl = env->vl; \ 5237 uint32_t esz = sizeof(ETYPE); \ 5238 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5239 uint32_t vta = vext_vta(desc); \ 5240 uint32_t vma = vext_vma(desc); \ 5241 uint64_t index = s1; \ 5242 uint32_t i; \ 5243 \ 5244 for (i = env->vstart; i < vl; i++) { \ 5245 if (!vm && !vext_elem_mask(v0, i)) { \ 5246 /* set masked-off elements to 1s */ \ 5247 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5248 continue; \ 5249 } \ 5250 if (index >= vlmax) { \ 5251 *((ETYPE *)vd + H(i)) = 0; \ 5252 } else { \ 5253 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5254 } \ 5255 } \ 5256 env->vstart = 0; \ 5257 /* set tail elements to 1s */ \ 5258 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5259 } 5260 5261 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5262 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5263 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5264 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5265 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5266 5267 /* Vector Compress Instruction */ 5268 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5269 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5270 CPURISCVState *env, uint32_t desc) \ 5271 { \ 5272 uint32_t vl = env->vl; \ 5273 uint32_t esz = sizeof(ETYPE); \ 5274 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5275 uint32_t vta = vext_vta(desc); \ 5276 uint32_t num = 0, i; \ 5277 \ 5278 for (i = env->vstart; i < vl; i++) { \ 5279 if (!vext_elem_mask(vs1, i)) { \ 5280 continue; \ 5281 } \ 5282 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5283 num++; \ 5284 } \ 5285 env->vstart = 0; \ 5286 /* set tail elements to 1s */ \ 5287 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5288 } 5289 5290 /* Compress into vd elements of vs2 where vs1 is enabled */ 5291 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5292 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5293 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5294 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5295 5296 /* Vector Whole Register Move */ 5297 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5298 { 5299 /* EEW = SEW */ 5300 uint32_t maxsz = simd_maxsz(desc); 5301 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5302 uint32_t startb = env->vstart * sewb; 5303 uint32_t i = startb; 5304 5305 memcpy((uint8_t *)vd + H1(i), 5306 (uint8_t *)vs2 + H1(i), 5307 maxsz - startb); 5308 5309 env->vstart = 0; 5310 } 5311 5312 /* Vector Integer Extension */ 5313 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5314 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5315 CPURISCVState *env, uint32_t desc) \ 5316 { \ 5317 uint32_t vl = env->vl; \ 5318 uint32_t vm = vext_vm(desc); \ 5319 uint32_t esz = sizeof(ETYPE); \ 5320 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5321 uint32_t vta = vext_vta(desc); \ 5322 uint32_t vma = vext_vma(desc); \ 5323 uint32_t i; \ 5324 \ 5325 for (i = env->vstart; i < vl; i++) { \ 5326 if (!vm && !vext_elem_mask(v0, i)) { \ 5327 /* set masked-off elements to 1s */ \ 5328 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5329 continue; \ 5330 } \ 5331 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5332 } \ 5333 env->vstart = 0; \ 5334 /* set tail elements to 1s */ \ 5335 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5336 } 5337 5338 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5339 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5340 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5341 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5342 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5343 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5344 5345 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5346 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5347 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5348 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5349 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5350 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5351