1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 static inline uint32_t vext_vma(uint32_t desc) 131 { 132 return FIELD_EX32(simd_data(desc), VDATA, VMA); 133 } 134 135 static inline uint32_t vext_vta_all_1s(uint32_t desc) 136 { 137 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 138 } 139 140 /* 141 * Get the maximum number of elements can be operated. 142 * 143 * log2_esz: log2 of element size in bytes. 144 */ 145 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 146 { 147 /* 148 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 149 * so vlen in bytes (vlenb) is encoded as maxsz. 150 */ 151 uint32_t vlenb = simd_maxsz(desc); 152 153 /* Return VLMAX */ 154 int scale = vext_lmul(desc) - log2_esz; 155 return scale < 0 ? vlenb >> -scale : vlenb << scale; 156 } 157 158 /* 159 * Get number of total elements, including prestart, body and tail elements. 160 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 161 * are held in the same vector register. 162 */ 163 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 164 uint32_t esz) 165 { 166 uint32_t vlenb = simd_maxsz(desc); 167 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 168 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 169 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 170 return (vlenb << emul) / esz; 171 } 172 173 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 174 { 175 return (addr & env->cur_pmmask) | env->cur_pmbase; 176 } 177 178 /* 179 * This function checks watchpoint before real load operation. 180 * 181 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 182 * In user mode, there is no watchpoint support now. 183 * 184 * It will trigger an exception if there is no mapping in TLB 185 * and page table walk can't fill the TLB entry. Then the guest 186 * software can return here after process the exception or never return. 187 */ 188 static void probe_pages(CPURISCVState *env, target_ulong addr, 189 target_ulong len, uintptr_t ra, 190 MMUAccessType access_type) 191 { 192 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 193 target_ulong curlen = MIN(pagelen, len); 194 195 probe_access(env, adjust_addr(env, addr), curlen, access_type, 196 cpu_mmu_index(env, false), ra); 197 if (len > curlen) { 198 addr += curlen; 199 curlen = len - curlen; 200 probe_access(env, adjust_addr(env, addr), curlen, access_type, 201 cpu_mmu_index(env, false), ra); 202 } 203 } 204 205 /* set agnostic elements to 1s */ 206 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 207 uint32_t tot) 208 { 209 if (is_agnostic == 0) { 210 /* policy undisturbed */ 211 return; 212 } 213 if (tot - cnt == 0) { 214 return ; 215 } 216 memset(base + cnt, -1, tot - cnt); 217 } 218 219 static inline void vext_set_elem_mask(void *v0, int index, 220 uint8_t value) 221 { 222 int idx = index / 64; 223 int pos = index % 64; 224 uint64_t old = ((uint64_t *)v0)[idx]; 225 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 226 } 227 228 /* 229 * Earlier designs (pre-0.9) had a varying number of bits 230 * per mask value (MLEN). In the 0.9 design, MLEN=1. 231 * (Section 4.5) 232 */ 233 static inline int vext_elem_mask(void *v0, int index) 234 { 235 int idx = index / 64; 236 int pos = index % 64; 237 return (((uint64_t *)v0)[idx] >> pos) & 1; 238 } 239 240 /* elements operations for load and store */ 241 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 242 uint32_t idx, void *vd, uintptr_t retaddr); 243 244 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 245 static void NAME(CPURISCVState *env, abi_ptr addr, \ 246 uint32_t idx, void *vd, uintptr_t retaddr)\ 247 { \ 248 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 249 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 250 } \ 251 252 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 253 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 254 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 255 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 256 257 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 258 static void NAME(CPURISCVState *env, abi_ptr addr, \ 259 uint32_t idx, void *vd, uintptr_t retaddr)\ 260 { \ 261 ETYPE data = *((ETYPE *)vd + H(idx)); \ 262 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 263 } 264 265 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 266 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 267 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 268 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 269 270 /* 271 *** stride: access vector element from strided memory 272 */ 273 static void 274 vext_ldst_stride(void *vd, void *v0, target_ulong base, 275 target_ulong stride, CPURISCVState *env, 276 uint32_t desc, uint32_t vm, 277 vext_ldst_elem_fn *ldst_elem, 278 uint32_t log2_esz, uintptr_t ra) 279 { 280 uint32_t i, k; 281 uint32_t nf = vext_nf(desc); 282 uint32_t max_elems = vext_max_elems(desc, log2_esz); 283 uint32_t esz = 1 << log2_esz; 284 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 285 uint32_t vta = vext_vta(desc); 286 287 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 288 if (!vm && !vext_elem_mask(v0, i)) { 289 continue; 290 } 291 292 k = 0; 293 while (k < nf) { 294 target_ulong addr = base + stride * i + (k << log2_esz); 295 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 296 k++; 297 } 298 } 299 env->vstart = 0; 300 /* set tail elements to 1s */ 301 for (k = 0; k < nf; ++k) { 302 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 303 (k * max_elems + max_elems) * esz); 304 } 305 if (nf * max_elems % total_elems != 0) { 306 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 307 uint32_t registers_used = 308 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 309 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 310 registers_used * vlenb); 311 } 312 } 313 314 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 315 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 316 target_ulong stride, CPURISCVState *env, \ 317 uint32_t desc) \ 318 { \ 319 uint32_t vm = vext_vm(desc); \ 320 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 321 ctzl(sizeof(ETYPE)), GETPC()); \ 322 } 323 324 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 325 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 326 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 327 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 328 329 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 330 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 331 target_ulong stride, CPURISCVState *env, \ 332 uint32_t desc) \ 333 { \ 334 uint32_t vm = vext_vm(desc); \ 335 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 336 ctzl(sizeof(ETYPE)), GETPC()); \ 337 } 338 339 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 340 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 341 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 342 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 343 344 /* 345 *** unit-stride: access elements stored contiguously in memory 346 */ 347 348 /* unmasked unit-stride load and store operation*/ 349 static void 350 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 351 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 352 uintptr_t ra) 353 { 354 uint32_t i, k; 355 uint32_t nf = vext_nf(desc); 356 uint32_t max_elems = vext_max_elems(desc, log2_esz); 357 uint32_t esz = 1 << log2_esz; 358 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 359 uint32_t vta = vext_vta(desc); 360 361 /* load bytes from guest memory */ 362 for (i = env->vstart; i < evl; i++, env->vstart++) { 363 k = 0; 364 while (k < nf) { 365 target_ulong addr = base + ((i * nf + k) << log2_esz); 366 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 367 k++; 368 } 369 } 370 env->vstart = 0; 371 /* set tail elements to 1s */ 372 for (k = 0; k < nf; ++k) { 373 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, 374 (k * max_elems + max_elems) * esz); 375 } 376 if (nf * max_elems % total_elems != 0) { 377 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 378 uint32_t registers_used = 379 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 380 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 381 registers_used * vlenb); 382 } 383 } 384 385 /* 386 * masked unit-stride load and store operation will be a special case of stride, 387 * stride = NF * sizeof (MTYPE) 388 */ 389 390 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 391 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 392 CPURISCVState *env, uint32_t desc) \ 393 { \ 394 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 395 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 396 ctzl(sizeof(ETYPE)), GETPC()); \ 397 } \ 398 \ 399 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 400 CPURISCVState *env, uint32_t desc) \ 401 { \ 402 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 403 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 404 } 405 406 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 407 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 408 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 409 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 410 411 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 412 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 413 CPURISCVState *env, uint32_t desc) \ 414 { \ 415 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 416 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 417 ctzl(sizeof(ETYPE)), GETPC()); \ 418 } \ 419 \ 420 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 421 CPURISCVState *env, uint32_t desc) \ 422 { \ 423 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 424 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 425 } 426 427 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 428 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 429 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 430 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 431 432 /* 433 *** unit stride mask load and store, EEW = 1 434 */ 435 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 436 CPURISCVState *env, uint32_t desc) 437 { 438 /* evl = ceil(vl/8) */ 439 uint8_t evl = (env->vl + 7) >> 3; 440 vext_ldst_us(vd, base, env, desc, lde_b, 441 0, evl, GETPC()); 442 } 443 444 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 445 CPURISCVState *env, uint32_t desc) 446 { 447 /* evl = ceil(vl/8) */ 448 uint8_t evl = (env->vl + 7) >> 3; 449 vext_ldst_us(vd, base, env, desc, ste_b, 450 0, evl, GETPC()); 451 } 452 453 /* 454 *** index: access vector element from indexed memory 455 */ 456 typedef target_ulong vext_get_index_addr(target_ulong base, 457 uint32_t idx, void *vs2); 458 459 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 460 static target_ulong NAME(target_ulong base, \ 461 uint32_t idx, void *vs2) \ 462 { \ 463 return (base + *((ETYPE *)vs2 + H(idx))); \ 464 } 465 466 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 467 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 468 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 469 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 470 471 static inline void 472 vext_ldst_index(void *vd, void *v0, target_ulong base, 473 void *vs2, CPURISCVState *env, uint32_t desc, 474 vext_get_index_addr get_index_addr, 475 vext_ldst_elem_fn *ldst_elem, 476 uint32_t log2_esz, uintptr_t ra) 477 { 478 uint32_t i, k; 479 uint32_t nf = vext_nf(desc); 480 uint32_t vm = vext_vm(desc); 481 uint32_t max_elems = vext_max_elems(desc, log2_esz); 482 uint32_t esz = 1 << log2_esz; 483 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 484 uint32_t vta = vext_vta(desc); 485 486 /* load bytes from guest memory */ 487 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 488 if (!vm && !vext_elem_mask(v0, i)) { 489 continue; 490 } 491 492 k = 0; 493 while (k < nf) { 494 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 495 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 496 k++; 497 } 498 } 499 env->vstart = 0; 500 /* set tail elements to 1s */ 501 for (k = 0; k < nf; ++k) { 502 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 503 (k * max_elems + max_elems) * esz); 504 } 505 if (nf * max_elems % total_elems != 0) { 506 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 507 uint32_t registers_used = 508 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 509 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 510 registers_used * vlenb); 511 } 512 } 513 514 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 515 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 516 void *vs2, CPURISCVState *env, uint32_t desc) \ 517 { \ 518 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 519 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 520 } 521 522 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 523 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 524 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 525 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 526 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 527 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 528 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 529 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 530 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 531 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 532 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 533 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 534 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 535 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 536 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 537 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 538 539 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 540 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 541 void *vs2, CPURISCVState *env, uint32_t desc) \ 542 { \ 543 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 544 STORE_FN, ctzl(sizeof(ETYPE)), \ 545 GETPC()); \ 546 } 547 548 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 549 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 550 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 551 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 552 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 553 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 554 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 555 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 556 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 557 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 558 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 559 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 560 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 561 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 562 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 563 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 564 565 /* 566 *** unit-stride fault-only-fisrt load instructions 567 */ 568 static inline void 569 vext_ldff(void *vd, void *v0, target_ulong base, 570 CPURISCVState *env, uint32_t desc, 571 vext_ldst_elem_fn *ldst_elem, 572 uint32_t log2_esz, uintptr_t ra) 573 { 574 void *host; 575 uint32_t i, k, vl = 0; 576 uint32_t nf = vext_nf(desc); 577 uint32_t vm = vext_vm(desc); 578 uint32_t max_elems = vext_max_elems(desc, log2_esz); 579 uint32_t esz = 1 << log2_esz; 580 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 581 uint32_t vta = vext_vta(desc); 582 target_ulong addr, offset, remain; 583 584 /* probe every access*/ 585 for (i = env->vstart; i < env->vl; i++) { 586 if (!vm && !vext_elem_mask(v0, i)) { 587 continue; 588 } 589 addr = adjust_addr(env, base + i * (nf << log2_esz)); 590 if (i == 0) { 591 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 592 } else { 593 /* if it triggers an exception, no need to check watchpoint */ 594 remain = nf << log2_esz; 595 while (remain > 0) { 596 offset = -(addr | TARGET_PAGE_MASK); 597 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 598 cpu_mmu_index(env, false)); 599 if (host) { 600 #ifdef CONFIG_USER_ONLY 601 if (page_check_range(addr, offset, PAGE_READ) < 0) { 602 vl = i; 603 goto ProbeSuccess; 604 } 605 #else 606 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 607 #endif 608 } else { 609 vl = i; 610 goto ProbeSuccess; 611 } 612 if (remain <= offset) { 613 break; 614 } 615 remain -= offset; 616 addr = adjust_addr(env, addr + offset); 617 } 618 } 619 } 620 ProbeSuccess: 621 /* load bytes from guest memory */ 622 if (vl != 0) { 623 env->vl = vl; 624 } 625 for (i = env->vstart; i < env->vl; i++) { 626 k = 0; 627 if (!vm && !vext_elem_mask(v0, i)) { 628 continue; 629 } 630 while (k < nf) { 631 target_ulong addr = base + ((i * nf + k) << log2_esz); 632 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 633 k++; 634 } 635 } 636 env->vstart = 0; 637 /* set tail elements to 1s */ 638 for (k = 0; k < nf; ++k) { 639 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 640 (k * max_elems + max_elems) * esz); 641 } 642 if (nf * max_elems % total_elems != 0) { 643 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 644 uint32_t registers_used = 645 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 646 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 647 registers_used * vlenb); 648 } 649 } 650 651 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 652 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 653 CPURISCVState *env, uint32_t desc) \ 654 { \ 655 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 656 ctzl(sizeof(ETYPE)), GETPC()); \ 657 } 658 659 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 660 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 661 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 662 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 663 664 #define DO_SWAP(N, M) (M) 665 #define DO_AND(N, M) (N & M) 666 #define DO_XOR(N, M) (N ^ M) 667 #define DO_OR(N, M) (N | M) 668 #define DO_ADD(N, M) (N + M) 669 670 /* Signed min/max */ 671 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 672 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 673 674 /* Unsigned min/max */ 675 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 676 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 677 678 /* 679 *** load and store whole register instructions 680 */ 681 static void 682 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 683 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 684 { 685 uint32_t i, k, off, pos; 686 uint32_t nf = vext_nf(desc); 687 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 688 uint32_t max_elems = vlenb >> log2_esz; 689 690 k = env->vstart / max_elems; 691 off = env->vstart % max_elems; 692 693 if (off) { 694 /* load/store rest of elements of current segment pointed by vstart */ 695 for (pos = off; pos < max_elems; pos++, env->vstart++) { 696 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 697 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 698 } 699 k++; 700 } 701 702 /* load/store elements for rest of segments */ 703 for (; k < nf; k++) { 704 for (i = 0; i < max_elems; i++, env->vstart++) { 705 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 706 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 707 } 708 } 709 710 env->vstart = 0; 711 } 712 713 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 714 void HELPER(NAME)(void *vd, target_ulong base, \ 715 CPURISCVState *env, uint32_t desc) \ 716 { \ 717 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 718 ctzl(sizeof(ETYPE)), GETPC()); \ 719 } 720 721 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 722 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 723 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 724 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 725 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 726 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 727 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 728 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 729 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 730 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 731 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 732 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 733 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 734 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 735 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 736 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 737 738 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 739 void HELPER(NAME)(void *vd, target_ulong base, \ 740 CPURISCVState *env, uint32_t desc) \ 741 { \ 742 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 743 ctzl(sizeof(ETYPE)), GETPC()); \ 744 } 745 746 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 747 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 748 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 749 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 750 751 /* 752 *** Vector Integer Arithmetic Instructions 753 */ 754 755 /* expand macro args before macro */ 756 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 757 758 /* (TD, T1, T2, TX1, TX2) */ 759 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 760 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 761 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 762 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 763 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 764 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 765 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 766 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 767 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 768 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 769 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 770 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 771 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 772 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 773 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 774 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 775 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 776 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 777 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 778 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 779 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 780 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 781 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 782 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 783 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 784 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 785 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 786 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 787 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 788 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 789 790 /* operation of two vector elements */ 791 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 792 793 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 794 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 795 { \ 796 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 797 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 798 *((TD *)vd + HD(i)) = OP(s2, s1); \ 799 } 800 #define DO_SUB(N, M) (N - M) 801 #define DO_RSUB(N, M) (M - N) 802 803 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 804 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 805 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 806 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 807 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 808 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 809 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 810 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 811 812 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 813 CPURISCVState *env, uint32_t desc, 814 opivv2_fn *fn, uint32_t esz) 815 { 816 uint32_t vm = vext_vm(desc); 817 uint32_t vl = env->vl; 818 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 819 uint32_t vta = vext_vta(desc); 820 uint32_t vma = vext_vma(desc); 821 uint32_t i; 822 823 for (i = env->vstart; i < vl; i++) { 824 if (!vm && !vext_elem_mask(v0, i)) { 825 /* set masked-off elements to 1s */ 826 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 827 continue; 828 } 829 fn(vd, vs1, vs2, i); 830 } 831 env->vstart = 0; 832 /* set tail elements to 1s */ 833 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 834 } 835 836 /* generate the helpers for OPIVV */ 837 #define GEN_VEXT_VV(NAME, ESZ) \ 838 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 839 void *vs2, CPURISCVState *env, \ 840 uint32_t desc) \ 841 { \ 842 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 843 do_##NAME, ESZ); \ 844 } 845 846 GEN_VEXT_VV(vadd_vv_b, 1) 847 GEN_VEXT_VV(vadd_vv_h, 2) 848 GEN_VEXT_VV(vadd_vv_w, 4) 849 GEN_VEXT_VV(vadd_vv_d, 8) 850 GEN_VEXT_VV(vsub_vv_b, 1) 851 GEN_VEXT_VV(vsub_vv_h, 2) 852 GEN_VEXT_VV(vsub_vv_w, 4) 853 GEN_VEXT_VV(vsub_vv_d, 8) 854 855 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 856 857 /* 858 * (T1)s1 gives the real operator type. 859 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 860 */ 861 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 862 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 863 { \ 864 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 865 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 866 } 867 868 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 869 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 870 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 871 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 872 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 873 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 874 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 875 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 876 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 877 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 878 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 879 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 880 881 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 882 CPURISCVState *env, uint32_t desc, 883 opivx2_fn fn, uint32_t esz) 884 { 885 uint32_t vm = vext_vm(desc); 886 uint32_t vl = env->vl; 887 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 888 uint32_t vta = vext_vta(desc); 889 uint32_t i; 890 891 for (i = env->vstart; i < vl; i++) { 892 if (!vm && !vext_elem_mask(v0, i)) { 893 continue; 894 } 895 fn(vd, s1, vs2, i); 896 } 897 env->vstart = 0; 898 /* set tail elements to 1s */ 899 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 900 } 901 902 /* generate the helpers for OPIVX */ 903 #define GEN_VEXT_VX(NAME, ESZ) \ 904 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 905 void *vs2, CPURISCVState *env, \ 906 uint32_t desc) \ 907 { \ 908 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 909 do_##NAME, ESZ); \ 910 } 911 912 GEN_VEXT_VX(vadd_vx_b, 1) 913 GEN_VEXT_VX(vadd_vx_h, 2) 914 GEN_VEXT_VX(vadd_vx_w, 4) 915 GEN_VEXT_VX(vadd_vx_d, 8) 916 GEN_VEXT_VX(vsub_vx_b, 1) 917 GEN_VEXT_VX(vsub_vx_h, 2) 918 GEN_VEXT_VX(vsub_vx_w, 4) 919 GEN_VEXT_VX(vsub_vx_d, 8) 920 GEN_VEXT_VX(vrsub_vx_b, 1) 921 GEN_VEXT_VX(vrsub_vx_h, 2) 922 GEN_VEXT_VX(vrsub_vx_w, 4) 923 GEN_VEXT_VX(vrsub_vx_d, 8) 924 925 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 926 { 927 intptr_t oprsz = simd_oprsz(desc); 928 intptr_t i; 929 930 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 931 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 932 } 933 } 934 935 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 936 { 937 intptr_t oprsz = simd_oprsz(desc); 938 intptr_t i; 939 940 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 941 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 942 } 943 } 944 945 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 946 { 947 intptr_t oprsz = simd_oprsz(desc); 948 intptr_t i; 949 950 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 951 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 952 } 953 } 954 955 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 956 { 957 intptr_t oprsz = simd_oprsz(desc); 958 intptr_t i; 959 960 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 961 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 962 } 963 } 964 965 /* Vector Widening Integer Add/Subtract */ 966 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 967 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 968 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 969 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 970 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 971 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 972 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 973 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 974 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 975 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 976 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 977 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 978 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 979 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 980 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 981 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 982 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 983 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 984 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 985 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 986 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 987 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 988 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 989 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 990 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 991 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 992 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 993 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 994 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 995 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 996 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 997 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 998 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 999 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1000 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1001 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1002 GEN_VEXT_VV(vwaddu_vv_b, 2) 1003 GEN_VEXT_VV(vwaddu_vv_h, 4) 1004 GEN_VEXT_VV(vwaddu_vv_w, 8) 1005 GEN_VEXT_VV(vwsubu_vv_b, 2) 1006 GEN_VEXT_VV(vwsubu_vv_h, 4) 1007 GEN_VEXT_VV(vwsubu_vv_w, 8) 1008 GEN_VEXT_VV(vwadd_vv_b, 2) 1009 GEN_VEXT_VV(vwadd_vv_h, 4) 1010 GEN_VEXT_VV(vwadd_vv_w, 8) 1011 GEN_VEXT_VV(vwsub_vv_b, 2) 1012 GEN_VEXT_VV(vwsub_vv_h, 4) 1013 GEN_VEXT_VV(vwsub_vv_w, 8) 1014 GEN_VEXT_VV(vwaddu_wv_b, 2) 1015 GEN_VEXT_VV(vwaddu_wv_h, 4) 1016 GEN_VEXT_VV(vwaddu_wv_w, 8) 1017 GEN_VEXT_VV(vwsubu_wv_b, 2) 1018 GEN_VEXT_VV(vwsubu_wv_h, 4) 1019 GEN_VEXT_VV(vwsubu_wv_w, 8) 1020 GEN_VEXT_VV(vwadd_wv_b, 2) 1021 GEN_VEXT_VV(vwadd_wv_h, 4) 1022 GEN_VEXT_VV(vwadd_wv_w, 8) 1023 GEN_VEXT_VV(vwsub_wv_b, 2) 1024 GEN_VEXT_VV(vwsub_wv_h, 4) 1025 GEN_VEXT_VV(vwsub_wv_w, 8) 1026 1027 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1028 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1029 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1030 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1031 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1032 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1033 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1034 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1035 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1036 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1037 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1038 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1039 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1040 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1041 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1042 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1043 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1044 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1045 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1046 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1047 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1048 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1049 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1050 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1051 GEN_VEXT_VX(vwaddu_vx_b, 2) 1052 GEN_VEXT_VX(vwaddu_vx_h, 4) 1053 GEN_VEXT_VX(vwaddu_vx_w, 8) 1054 GEN_VEXT_VX(vwsubu_vx_b, 2) 1055 GEN_VEXT_VX(vwsubu_vx_h, 4) 1056 GEN_VEXT_VX(vwsubu_vx_w, 8) 1057 GEN_VEXT_VX(vwadd_vx_b, 2) 1058 GEN_VEXT_VX(vwadd_vx_h, 4) 1059 GEN_VEXT_VX(vwadd_vx_w, 8) 1060 GEN_VEXT_VX(vwsub_vx_b, 2) 1061 GEN_VEXT_VX(vwsub_vx_h, 4) 1062 GEN_VEXT_VX(vwsub_vx_w, 8) 1063 GEN_VEXT_VX(vwaddu_wx_b, 2) 1064 GEN_VEXT_VX(vwaddu_wx_h, 4) 1065 GEN_VEXT_VX(vwaddu_wx_w, 8) 1066 GEN_VEXT_VX(vwsubu_wx_b, 2) 1067 GEN_VEXT_VX(vwsubu_wx_h, 4) 1068 GEN_VEXT_VX(vwsubu_wx_w, 8) 1069 GEN_VEXT_VX(vwadd_wx_b, 2) 1070 GEN_VEXT_VX(vwadd_wx_h, 4) 1071 GEN_VEXT_VX(vwadd_wx_w, 8) 1072 GEN_VEXT_VX(vwsub_wx_b, 2) 1073 GEN_VEXT_VX(vwsub_wx_h, 4) 1074 GEN_VEXT_VX(vwsub_wx_w, 8) 1075 1076 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1077 #define DO_VADC(N, M, C) (N + M + C) 1078 #define DO_VSBC(N, M, C) (N - M - C) 1079 1080 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1081 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1082 CPURISCVState *env, uint32_t desc) \ 1083 { \ 1084 uint32_t vl = env->vl; \ 1085 uint32_t esz = sizeof(ETYPE); \ 1086 uint32_t total_elems = \ 1087 vext_get_total_elems(env, desc, esz); \ 1088 uint32_t vta = vext_vta(desc); \ 1089 uint32_t i; \ 1090 \ 1091 for (i = env->vstart; i < vl; i++) { \ 1092 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1093 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1094 ETYPE carry = vext_elem_mask(v0, i); \ 1095 \ 1096 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1097 } \ 1098 env->vstart = 0; \ 1099 /* set tail elements to 1s */ \ 1100 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1101 } 1102 1103 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1104 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1105 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1106 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1107 1108 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1109 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1110 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1111 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1112 1113 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1114 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1115 CPURISCVState *env, uint32_t desc) \ 1116 { \ 1117 uint32_t vl = env->vl; \ 1118 uint32_t esz = sizeof(ETYPE); \ 1119 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1120 uint32_t vta = vext_vta(desc); \ 1121 uint32_t i; \ 1122 \ 1123 for (i = env->vstart; i < vl; i++) { \ 1124 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1125 ETYPE carry = vext_elem_mask(v0, i); \ 1126 \ 1127 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1128 } \ 1129 env->vstart = 0; \ 1130 /* set tail elements to 1s */ \ 1131 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1132 } 1133 1134 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1135 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1136 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1137 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1138 1139 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1140 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1141 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1142 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1143 1144 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1145 (__typeof(N))(N + M) < N) 1146 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1147 1148 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1149 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1150 CPURISCVState *env, uint32_t desc) \ 1151 { \ 1152 uint32_t vl = env->vl; \ 1153 uint32_t vm = vext_vm(desc); \ 1154 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1155 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1156 uint32_t i; \ 1157 \ 1158 for (i = env->vstart; i < vl; i++) { \ 1159 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1160 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1161 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1162 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1163 } \ 1164 env->vstart = 0; \ 1165 /* mask destination register are always tail-agnostic */ \ 1166 /* set tail elements to 1s */ \ 1167 if (vta_all_1s) { \ 1168 for (; i < total_elems; i++) { \ 1169 vext_set_elem_mask(vd, i, 1); \ 1170 } \ 1171 } \ 1172 } 1173 1174 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1175 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1176 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1177 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1178 1179 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1180 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1181 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1182 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1183 1184 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1185 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1186 void *vs2, CPURISCVState *env, uint32_t desc) \ 1187 { \ 1188 uint32_t vl = env->vl; \ 1189 uint32_t vm = vext_vm(desc); \ 1190 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1191 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1192 uint32_t i; \ 1193 \ 1194 for (i = env->vstart; i < vl; i++) { \ 1195 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1196 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1197 vext_set_elem_mask(vd, i, \ 1198 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1199 } \ 1200 env->vstart = 0; \ 1201 /* mask destination register are always tail-agnostic */ \ 1202 /* set tail elements to 1s */ \ 1203 if (vta_all_1s) { \ 1204 for (; i < total_elems; i++) { \ 1205 vext_set_elem_mask(vd, i, 1); \ 1206 } \ 1207 } \ 1208 } 1209 1210 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1211 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1212 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1213 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1214 1215 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1216 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1217 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1218 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1219 1220 /* Vector Bitwise Logical Instructions */ 1221 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1222 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1223 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1224 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1225 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1226 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1227 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1228 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1229 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1230 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1231 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1232 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1233 GEN_VEXT_VV(vand_vv_b, 1) 1234 GEN_VEXT_VV(vand_vv_h, 2) 1235 GEN_VEXT_VV(vand_vv_w, 4) 1236 GEN_VEXT_VV(vand_vv_d, 8) 1237 GEN_VEXT_VV(vor_vv_b, 1) 1238 GEN_VEXT_VV(vor_vv_h, 2) 1239 GEN_VEXT_VV(vor_vv_w, 4) 1240 GEN_VEXT_VV(vor_vv_d, 8) 1241 GEN_VEXT_VV(vxor_vv_b, 1) 1242 GEN_VEXT_VV(vxor_vv_h, 2) 1243 GEN_VEXT_VV(vxor_vv_w, 4) 1244 GEN_VEXT_VV(vxor_vv_d, 8) 1245 1246 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1247 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1248 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1249 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1250 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1251 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1252 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1253 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1254 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1255 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1256 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1257 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1258 GEN_VEXT_VX(vand_vx_b, 1) 1259 GEN_VEXT_VX(vand_vx_h, 2) 1260 GEN_VEXT_VX(vand_vx_w, 4) 1261 GEN_VEXT_VX(vand_vx_d, 8) 1262 GEN_VEXT_VX(vor_vx_b, 1) 1263 GEN_VEXT_VX(vor_vx_h, 2) 1264 GEN_VEXT_VX(vor_vx_w, 4) 1265 GEN_VEXT_VX(vor_vx_d, 8) 1266 GEN_VEXT_VX(vxor_vx_b, 1) 1267 GEN_VEXT_VX(vxor_vx_h, 2) 1268 GEN_VEXT_VX(vxor_vx_w, 4) 1269 GEN_VEXT_VX(vxor_vx_d, 8) 1270 1271 /* Vector Single-Width Bit Shift Instructions */ 1272 #define DO_SLL(N, M) (N << (M)) 1273 #define DO_SRL(N, M) (N >> (M)) 1274 1275 /* generate the helpers for shift instructions with two vector operators */ 1276 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1277 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1278 void *vs2, CPURISCVState *env, uint32_t desc) \ 1279 { \ 1280 uint32_t vm = vext_vm(desc); \ 1281 uint32_t vl = env->vl; \ 1282 uint32_t esz = sizeof(TS1); \ 1283 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1284 uint32_t vta = vext_vta(desc); \ 1285 uint32_t i; \ 1286 \ 1287 for (i = env->vstart; i < vl; i++) { \ 1288 if (!vm && !vext_elem_mask(v0, i)) { \ 1289 continue; \ 1290 } \ 1291 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1292 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1293 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1294 } \ 1295 env->vstart = 0; \ 1296 /* set tail elements to 1s */ \ 1297 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1298 } 1299 1300 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1301 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1302 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1303 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1304 1305 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1306 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1307 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1308 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1309 1310 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1311 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1312 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1313 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1314 1315 /* generate the helpers for shift instructions with one vector and one scalar */ 1316 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1317 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1318 void *vs2, CPURISCVState *env, uint32_t desc) \ 1319 { \ 1320 uint32_t vm = vext_vm(desc); \ 1321 uint32_t vl = env->vl; \ 1322 uint32_t esz = sizeof(TD); \ 1323 uint32_t total_elems = \ 1324 vext_get_total_elems(env, desc, esz); \ 1325 uint32_t vta = vext_vta(desc); \ 1326 uint32_t i; \ 1327 \ 1328 for (i = env->vstart; i < vl; i++) { \ 1329 if (!vm && !vext_elem_mask(v0, i)) { \ 1330 continue; \ 1331 } \ 1332 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1333 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1334 } \ 1335 env->vstart = 0; \ 1336 /* set tail elements to 1s */ \ 1337 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1338 } 1339 1340 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1341 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1342 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1343 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1344 1345 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1346 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1347 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1348 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1349 1350 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1351 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1352 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1353 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1354 1355 /* Vector Narrowing Integer Right Shift Instructions */ 1356 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1357 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1358 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1359 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1360 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1361 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1362 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1363 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1364 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1365 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1366 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1367 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1368 1369 /* Vector Integer Comparison Instructions */ 1370 #define DO_MSEQ(N, M) (N == M) 1371 #define DO_MSNE(N, M) (N != M) 1372 #define DO_MSLT(N, M) (N < M) 1373 #define DO_MSLE(N, M) (N <= M) 1374 #define DO_MSGT(N, M) (N > M) 1375 1376 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1377 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1378 CPURISCVState *env, uint32_t desc) \ 1379 { \ 1380 uint32_t vm = vext_vm(desc); \ 1381 uint32_t vl = env->vl; \ 1382 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1383 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1384 uint32_t i; \ 1385 \ 1386 for (i = env->vstart; i < vl; i++) { \ 1387 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1388 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1389 if (!vm && !vext_elem_mask(v0, i)) { \ 1390 continue; \ 1391 } \ 1392 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1393 } \ 1394 env->vstart = 0; \ 1395 /* mask destination register are always tail-agnostic */ \ 1396 /* set tail elements to 1s */ \ 1397 if (vta_all_1s) { \ 1398 for (; i < total_elems; i++) { \ 1399 vext_set_elem_mask(vd, i, 1); \ 1400 } \ 1401 } \ 1402 } 1403 1404 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1405 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1406 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1407 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1408 1409 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1410 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1411 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1412 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1413 1414 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1415 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1416 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1417 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1418 1419 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1420 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1421 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1422 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1423 1424 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1425 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1426 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1427 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1428 1429 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1430 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1431 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1432 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1433 1434 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1435 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1436 CPURISCVState *env, uint32_t desc) \ 1437 { \ 1438 uint32_t vm = vext_vm(desc); \ 1439 uint32_t vl = env->vl; \ 1440 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1441 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1442 uint32_t i; \ 1443 \ 1444 for (i = env->vstart; i < vl; i++) { \ 1445 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1446 if (!vm && !vext_elem_mask(v0, i)) { \ 1447 continue; \ 1448 } \ 1449 vext_set_elem_mask(vd, i, \ 1450 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1451 } \ 1452 env->vstart = 0; \ 1453 /* mask destination register are always tail-agnostic */ \ 1454 /* set tail elements to 1s */ \ 1455 if (vta_all_1s) { \ 1456 for (; i < total_elems; i++) { \ 1457 vext_set_elem_mask(vd, i, 1); \ 1458 } \ 1459 } \ 1460 } 1461 1462 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1463 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1464 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1465 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1466 1467 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1468 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1469 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1470 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1471 1472 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1473 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1474 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1475 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1476 1477 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1478 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1479 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1480 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1481 1482 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1483 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1484 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1485 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1486 1487 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1488 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1489 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1490 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1491 1492 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1493 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1494 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1495 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1496 1497 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1498 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1499 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1500 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1501 1502 /* Vector Integer Min/Max Instructions */ 1503 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1504 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1505 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1506 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1507 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1508 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1509 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1510 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1511 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1512 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1513 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1514 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1515 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1516 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1517 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1518 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1519 GEN_VEXT_VV(vminu_vv_b, 1) 1520 GEN_VEXT_VV(vminu_vv_h, 2) 1521 GEN_VEXT_VV(vminu_vv_w, 4) 1522 GEN_VEXT_VV(vminu_vv_d, 8) 1523 GEN_VEXT_VV(vmin_vv_b, 1) 1524 GEN_VEXT_VV(vmin_vv_h, 2) 1525 GEN_VEXT_VV(vmin_vv_w, 4) 1526 GEN_VEXT_VV(vmin_vv_d, 8) 1527 GEN_VEXT_VV(vmaxu_vv_b, 1) 1528 GEN_VEXT_VV(vmaxu_vv_h, 2) 1529 GEN_VEXT_VV(vmaxu_vv_w, 4) 1530 GEN_VEXT_VV(vmaxu_vv_d, 8) 1531 GEN_VEXT_VV(vmax_vv_b, 1) 1532 GEN_VEXT_VV(vmax_vv_h, 2) 1533 GEN_VEXT_VV(vmax_vv_w, 4) 1534 GEN_VEXT_VV(vmax_vv_d, 8) 1535 1536 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1537 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1538 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1539 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1540 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1541 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1542 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1543 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1544 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1545 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1546 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1547 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1548 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1549 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1550 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1551 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1552 GEN_VEXT_VX(vminu_vx_b, 1) 1553 GEN_VEXT_VX(vminu_vx_h, 2) 1554 GEN_VEXT_VX(vminu_vx_w, 4) 1555 GEN_VEXT_VX(vminu_vx_d, 8) 1556 GEN_VEXT_VX(vmin_vx_b, 1) 1557 GEN_VEXT_VX(vmin_vx_h, 2) 1558 GEN_VEXT_VX(vmin_vx_w, 4) 1559 GEN_VEXT_VX(vmin_vx_d, 8) 1560 GEN_VEXT_VX(vmaxu_vx_b, 1) 1561 GEN_VEXT_VX(vmaxu_vx_h, 2) 1562 GEN_VEXT_VX(vmaxu_vx_w, 4) 1563 GEN_VEXT_VX(vmaxu_vx_d, 8) 1564 GEN_VEXT_VX(vmax_vx_b, 1) 1565 GEN_VEXT_VX(vmax_vx_h, 2) 1566 GEN_VEXT_VX(vmax_vx_w, 4) 1567 GEN_VEXT_VX(vmax_vx_d, 8) 1568 1569 /* Vector Single-Width Integer Multiply Instructions */ 1570 #define DO_MUL(N, M) (N * M) 1571 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1572 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1573 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1574 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1575 GEN_VEXT_VV(vmul_vv_b, 1) 1576 GEN_VEXT_VV(vmul_vv_h, 2) 1577 GEN_VEXT_VV(vmul_vv_w, 4) 1578 GEN_VEXT_VV(vmul_vv_d, 8) 1579 1580 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1581 { 1582 return (int16_t)s2 * (int16_t)s1 >> 8; 1583 } 1584 1585 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1586 { 1587 return (int32_t)s2 * (int32_t)s1 >> 16; 1588 } 1589 1590 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1591 { 1592 return (int64_t)s2 * (int64_t)s1 >> 32; 1593 } 1594 1595 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1596 { 1597 uint64_t hi_64, lo_64; 1598 1599 muls64(&lo_64, &hi_64, s1, s2); 1600 return hi_64; 1601 } 1602 1603 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1604 { 1605 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1606 } 1607 1608 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1609 { 1610 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1611 } 1612 1613 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1614 { 1615 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1616 } 1617 1618 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1619 { 1620 uint64_t hi_64, lo_64; 1621 1622 mulu64(&lo_64, &hi_64, s2, s1); 1623 return hi_64; 1624 } 1625 1626 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1627 { 1628 return (int16_t)s2 * (uint16_t)s1 >> 8; 1629 } 1630 1631 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1632 { 1633 return (int32_t)s2 * (uint32_t)s1 >> 16; 1634 } 1635 1636 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1637 { 1638 return (int64_t)s2 * (uint64_t)s1 >> 32; 1639 } 1640 1641 /* 1642 * Let A = signed operand, 1643 * B = unsigned operand 1644 * P = mulu64(A, B), unsigned product 1645 * 1646 * LET X = 2 ** 64 - A, 2's complement of A 1647 * SP = signed product 1648 * THEN 1649 * IF A < 0 1650 * SP = -X * B 1651 * = -(2 ** 64 - A) * B 1652 * = A * B - 2 ** 64 * B 1653 * = P - 2 ** 64 * B 1654 * ELSE 1655 * SP = P 1656 * THEN 1657 * HI_P -= (A < 0 ? B : 0) 1658 */ 1659 1660 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1661 { 1662 uint64_t hi_64, lo_64; 1663 1664 mulu64(&lo_64, &hi_64, s2, s1); 1665 1666 hi_64 -= s2 < 0 ? s1 : 0; 1667 return hi_64; 1668 } 1669 1670 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1671 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1672 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1673 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1674 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1675 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1676 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1677 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1678 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1679 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1680 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1681 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1682 GEN_VEXT_VV(vmulh_vv_b, 1) 1683 GEN_VEXT_VV(vmulh_vv_h, 2) 1684 GEN_VEXT_VV(vmulh_vv_w, 4) 1685 GEN_VEXT_VV(vmulh_vv_d, 8) 1686 GEN_VEXT_VV(vmulhu_vv_b, 1) 1687 GEN_VEXT_VV(vmulhu_vv_h, 2) 1688 GEN_VEXT_VV(vmulhu_vv_w, 4) 1689 GEN_VEXT_VV(vmulhu_vv_d, 8) 1690 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1691 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1692 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1693 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1694 1695 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1696 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1697 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1698 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1699 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1700 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1701 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1702 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1703 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1704 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1705 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1706 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1707 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1708 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1709 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1710 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1711 GEN_VEXT_VX(vmul_vx_b, 1) 1712 GEN_VEXT_VX(vmul_vx_h, 2) 1713 GEN_VEXT_VX(vmul_vx_w, 4) 1714 GEN_VEXT_VX(vmul_vx_d, 8) 1715 GEN_VEXT_VX(vmulh_vx_b, 1) 1716 GEN_VEXT_VX(vmulh_vx_h, 2) 1717 GEN_VEXT_VX(vmulh_vx_w, 4) 1718 GEN_VEXT_VX(vmulh_vx_d, 8) 1719 GEN_VEXT_VX(vmulhu_vx_b, 1) 1720 GEN_VEXT_VX(vmulhu_vx_h, 2) 1721 GEN_VEXT_VX(vmulhu_vx_w, 4) 1722 GEN_VEXT_VX(vmulhu_vx_d, 8) 1723 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1724 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1725 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1726 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1727 1728 /* Vector Integer Divide Instructions */ 1729 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1730 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1731 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1732 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1733 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1734 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1735 1736 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1737 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1738 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1739 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1740 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1741 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1742 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1743 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1744 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1745 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1746 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1747 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1748 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1749 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1750 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1751 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1752 GEN_VEXT_VV(vdivu_vv_b, 1) 1753 GEN_VEXT_VV(vdivu_vv_h, 2) 1754 GEN_VEXT_VV(vdivu_vv_w, 4) 1755 GEN_VEXT_VV(vdivu_vv_d, 8) 1756 GEN_VEXT_VV(vdiv_vv_b, 1) 1757 GEN_VEXT_VV(vdiv_vv_h, 2) 1758 GEN_VEXT_VV(vdiv_vv_w, 4) 1759 GEN_VEXT_VV(vdiv_vv_d, 8) 1760 GEN_VEXT_VV(vremu_vv_b, 1) 1761 GEN_VEXT_VV(vremu_vv_h, 2) 1762 GEN_VEXT_VV(vremu_vv_w, 4) 1763 GEN_VEXT_VV(vremu_vv_d, 8) 1764 GEN_VEXT_VV(vrem_vv_b, 1) 1765 GEN_VEXT_VV(vrem_vv_h, 2) 1766 GEN_VEXT_VV(vrem_vv_w, 4) 1767 GEN_VEXT_VV(vrem_vv_d, 8) 1768 1769 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1770 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1771 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1772 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1773 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1774 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1775 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1776 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1777 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1778 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1779 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1780 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1781 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1782 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1783 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1784 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1785 GEN_VEXT_VX(vdivu_vx_b, 1) 1786 GEN_VEXT_VX(vdivu_vx_h, 2) 1787 GEN_VEXT_VX(vdivu_vx_w, 4) 1788 GEN_VEXT_VX(vdivu_vx_d, 8) 1789 GEN_VEXT_VX(vdiv_vx_b, 1) 1790 GEN_VEXT_VX(vdiv_vx_h, 2) 1791 GEN_VEXT_VX(vdiv_vx_w, 4) 1792 GEN_VEXT_VX(vdiv_vx_d, 8) 1793 GEN_VEXT_VX(vremu_vx_b, 1) 1794 GEN_VEXT_VX(vremu_vx_h, 2) 1795 GEN_VEXT_VX(vremu_vx_w, 4) 1796 GEN_VEXT_VX(vremu_vx_d, 8) 1797 GEN_VEXT_VX(vrem_vx_b, 1) 1798 GEN_VEXT_VX(vrem_vx_h, 2) 1799 GEN_VEXT_VX(vrem_vx_w, 4) 1800 GEN_VEXT_VX(vrem_vx_d, 8) 1801 1802 /* Vector Widening Integer Multiply Instructions */ 1803 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1804 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1805 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1806 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1807 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1808 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1809 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1810 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1811 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1812 GEN_VEXT_VV(vwmul_vv_b, 2) 1813 GEN_VEXT_VV(vwmul_vv_h, 4) 1814 GEN_VEXT_VV(vwmul_vv_w, 8) 1815 GEN_VEXT_VV(vwmulu_vv_b, 2) 1816 GEN_VEXT_VV(vwmulu_vv_h, 4) 1817 GEN_VEXT_VV(vwmulu_vv_w, 8) 1818 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1819 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1820 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1821 1822 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1823 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1824 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1825 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1826 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1827 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1828 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1829 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1830 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1831 GEN_VEXT_VX(vwmul_vx_b, 2) 1832 GEN_VEXT_VX(vwmul_vx_h, 4) 1833 GEN_VEXT_VX(vwmul_vx_w, 8) 1834 GEN_VEXT_VX(vwmulu_vx_b, 2) 1835 GEN_VEXT_VX(vwmulu_vx_h, 4) 1836 GEN_VEXT_VX(vwmulu_vx_w, 8) 1837 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1838 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1839 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1840 1841 /* Vector Single-Width Integer Multiply-Add Instructions */ 1842 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1843 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1844 { \ 1845 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1846 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1847 TD d = *((TD *)vd + HD(i)); \ 1848 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1849 } 1850 1851 #define DO_MACC(N, M, D) (M * N + D) 1852 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1853 #define DO_MADD(N, M, D) (M * D + N) 1854 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1855 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1856 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1857 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1858 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1859 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1860 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1861 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1862 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1863 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1864 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1865 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1866 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1867 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1868 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1869 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1870 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1871 GEN_VEXT_VV(vmacc_vv_b, 1) 1872 GEN_VEXT_VV(vmacc_vv_h, 2) 1873 GEN_VEXT_VV(vmacc_vv_w, 4) 1874 GEN_VEXT_VV(vmacc_vv_d, 8) 1875 GEN_VEXT_VV(vnmsac_vv_b, 1) 1876 GEN_VEXT_VV(vnmsac_vv_h, 2) 1877 GEN_VEXT_VV(vnmsac_vv_w, 4) 1878 GEN_VEXT_VV(vnmsac_vv_d, 8) 1879 GEN_VEXT_VV(vmadd_vv_b, 1) 1880 GEN_VEXT_VV(vmadd_vv_h, 2) 1881 GEN_VEXT_VV(vmadd_vv_w, 4) 1882 GEN_VEXT_VV(vmadd_vv_d, 8) 1883 GEN_VEXT_VV(vnmsub_vv_b, 1) 1884 GEN_VEXT_VV(vnmsub_vv_h, 2) 1885 GEN_VEXT_VV(vnmsub_vv_w, 4) 1886 GEN_VEXT_VV(vnmsub_vv_d, 8) 1887 1888 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1889 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1890 { \ 1891 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1892 TD d = *((TD *)vd + HD(i)); \ 1893 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1894 } 1895 1896 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1897 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1898 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1899 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1900 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1901 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1902 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1903 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1904 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1905 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1906 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1907 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1908 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1909 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1910 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1911 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1912 GEN_VEXT_VX(vmacc_vx_b, 1) 1913 GEN_VEXT_VX(vmacc_vx_h, 2) 1914 GEN_VEXT_VX(vmacc_vx_w, 4) 1915 GEN_VEXT_VX(vmacc_vx_d, 8) 1916 GEN_VEXT_VX(vnmsac_vx_b, 1) 1917 GEN_VEXT_VX(vnmsac_vx_h, 2) 1918 GEN_VEXT_VX(vnmsac_vx_w, 4) 1919 GEN_VEXT_VX(vnmsac_vx_d, 8) 1920 GEN_VEXT_VX(vmadd_vx_b, 1) 1921 GEN_VEXT_VX(vmadd_vx_h, 2) 1922 GEN_VEXT_VX(vmadd_vx_w, 4) 1923 GEN_VEXT_VX(vmadd_vx_d, 8) 1924 GEN_VEXT_VX(vnmsub_vx_b, 1) 1925 GEN_VEXT_VX(vnmsub_vx_h, 2) 1926 GEN_VEXT_VX(vnmsub_vx_w, 4) 1927 GEN_VEXT_VX(vnmsub_vx_d, 8) 1928 1929 /* Vector Widening Integer Multiply-Add Instructions */ 1930 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1931 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1932 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1933 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1934 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1935 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1936 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1937 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1938 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1939 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1940 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1941 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1942 GEN_VEXT_VV(vwmacc_vv_b, 2) 1943 GEN_VEXT_VV(vwmacc_vv_h, 4) 1944 GEN_VEXT_VV(vwmacc_vv_w, 8) 1945 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1946 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1947 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1948 1949 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1950 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1951 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1952 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1953 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1954 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1955 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1956 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1957 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1958 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1959 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1960 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1961 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1962 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1963 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1964 GEN_VEXT_VX(vwmacc_vx_b, 2) 1965 GEN_VEXT_VX(vwmacc_vx_h, 4) 1966 GEN_VEXT_VX(vwmacc_vx_w, 8) 1967 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1968 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1969 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1970 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1971 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1972 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1973 1974 /* Vector Integer Merge and Move Instructions */ 1975 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1976 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1977 uint32_t desc) \ 1978 { \ 1979 uint32_t vl = env->vl; \ 1980 uint32_t esz = sizeof(ETYPE); \ 1981 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1982 uint32_t vta = vext_vta(desc); \ 1983 uint32_t i; \ 1984 \ 1985 for (i = env->vstart; i < vl; i++) { \ 1986 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1987 *((ETYPE *)vd + H(i)) = s1; \ 1988 } \ 1989 env->vstart = 0; \ 1990 /* set tail elements to 1s */ \ 1991 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1992 } 1993 1994 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1995 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1996 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1997 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1998 1999 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2000 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2001 uint32_t desc) \ 2002 { \ 2003 uint32_t vl = env->vl; \ 2004 uint32_t esz = sizeof(ETYPE); \ 2005 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2006 uint32_t vta = vext_vta(desc); \ 2007 uint32_t i; \ 2008 \ 2009 for (i = env->vstart; i < vl; i++) { \ 2010 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2011 } \ 2012 env->vstart = 0; \ 2013 /* set tail elements to 1s */ \ 2014 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2015 } 2016 2017 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2018 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2019 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2020 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2021 2022 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2023 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2024 CPURISCVState *env, uint32_t desc) \ 2025 { \ 2026 uint32_t vl = env->vl; \ 2027 uint32_t esz = sizeof(ETYPE); \ 2028 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2029 uint32_t vta = vext_vta(desc); \ 2030 uint32_t i; \ 2031 \ 2032 for (i = env->vstart; i < vl; i++) { \ 2033 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2034 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2035 } \ 2036 env->vstart = 0; \ 2037 /* set tail elements to 1s */ \ 2038 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2039 } 2040 2041 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2042 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2043 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2044 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2045 2046 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2047 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2048 void *vs2, CPURISCVState *env, uint32_t desc) \ 2049 { \ 2050 uint32_t vl = env->vl; \ 2051 uint32_t esz = sizeof(ETYPE); \ 2052 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2053 uint32_t vta = vext_vta(desc); \ 2054 uint32_t i; \ 2055 \ 2056 for (i = env->vstart; i < vl; i++) { \ 2057 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2058 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2059 (ETYPE)(target_long)s1); \ 2060 *((ETYPE *)vd + H(i)) = d; \ 2061 } \ 2062 env->vstart = 0; \ 2063 /* set tail elements to 1s */ \ 2064 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2065 } 2066 2067 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2068 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2069 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2070 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2071 2072 /* 2073 *** Vector Fixed-Point Arithmetic Instructions 2074 */ 2075 2076 /* Vector Single-Width Saturating Add and Subtract */ 2077 2078 /* 2079 * As fixed point instructions probably have round mode and saturation, 2080 * define common macros for fixed point here. 2081 */ 2082 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2083 CPURISCVState *env, int vxrm); 2084 2085 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2086 static inline void \ 2087 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2088 CPURISCVState *env, int vxrm) \ 2089 { \ 2090 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2091 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2092 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2093 } 2094 2095 static inline void 2096 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2097 CPURISCVState *env, 2098 uint32_t vl, uint32_t vm, int vxrm, 2099 opivv2_rm_fn *fn) 2100 { 2101 for (uint32_t i = env->vstart; i < vl; i++) { 2102 if (!vm && !vext_elem_mask(v0, i)) { 2103 continue; 2104 } 2105 fn(vd, vs1, vs2, i, env, vxrm); 2106 } 2107 env->vstart = 0; 2108 } 2109 2110 static inline void 2111 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2112 CPURISCVState *env, 2113 uint32_t desc, 2114 opivv2_rm_fn *fn, uint32_t esz) 2115 { 2116 uint32_t vm = vext_vm(desc); 2117 uint32_t vl = env->vl; 2118 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2119 uint32_t vta = vext_vta(desc); 2120 2121 switch (env->vxrm) { 2122 case 0: /* rnu */ 2123 vext_vv_rm_1(vd, v0, vs1, vs2, 2124 env, vl, vm, 0, fn); 2125 break; 2126 case 1: /* rne */ 2127 vext_vv_rm_1(vd, v0, vs1, vs2, 2128 env, vl, vm, 1, fn); 2129 break; 2130 case 2: /* rdn */ 2131 vext_vv_rm_1(vd, v0, vs1, vs2, 2132 env, vl, vm, 2, fn); 2133 break; 2134 default: /* rod */ 2135 vext_vv_rm_1(vd, v0, vs1, vs2, 2136 env, vl, vm, 3, fn); 2137 break; 2138 } 2139 /* set tail elements to 1s */ 2140 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2141 } 2142 2143 /* generate helpers for fixed point instructions with OPIVV format */ 2144 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2145 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2146 CPURISCVState *env, uint32_t desc) \ 2147 { \ 2148 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2149 do_##NAME, ESZ); \ 2150 } 2151 2152 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2153 { 2154 uint8_t res = a + b; 2155 if (res < a) { 2156 res = UINT8_MAX; 2157 env->vxsat = 0x1; 2158 } 2159 return res; 2160 } 2161 2162 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2163 uint16_t b) 2164 { 2165 uint16_t res = a + b; 2166 if (res < a) { 2167 res = UINT16_MAX; 2168 env->vxsat = 0x1; 2169 } 2170 return res; 2171 } 2172 2173 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2174 uint32_t b) 2175 { 2176 uint32_t res = a + b; 2177 if (res < a) { 2178 res = UINT32_MAX; 2179 env->vxsat = 0x1; 2180 } 2181 return res; 2182 } 2183 2184 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2185 uint64_t b) 2186 { 2187 uint64_t res = a + b; 2188 if (res < a) { 2189 res = UINT64_MAX; 2190 env->vxsat = 0x1; 2191 } 2192 return res; 2193 } 2194 2195 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2196 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2197 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2198 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2199 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2200 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2201 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2202 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2203 2204 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2205 CPURISCVState *env, int vxrm); 2206 2207 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2208 static inline void \ 2209 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2210 CPURISCVState *env, int vxrm) \ 2211 { \ 2212 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2213 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2214 } 2215 2216 static inline void 2217 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2218 CPURISCVState *env, 2219 uint32_t vl, uint32_t vm, int vxrm, 2220 opivx2_rm_fn *fn) 2221 { 2222 for (uint32_t i = env->vstart; i < vl; i++) { 2223 if (!vm && !vext_elem_mask(v0, i)) { 2224 continue; 2225 } 2226 fn(vd, s1, vs2, i, env, vxrm); 2227 } 2228 env->vstart = 0; 2229 } 2230 2231 static inline void 2232 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2233 CPURISCVState *env, 2234 uint32_t desc, 2235 opivx2_rm_fn *fn, uint32_t esz) 2236 { 2237 uint32_t vm = vext_vm(desc); 2238 uint32_t vl = env->vl; 2239 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2240 uint32_t vta = vext_vta(desc); 2241 2242 switch (env->vxrm) { 2243 case 0: /* rnu */ 2244 vext_vx_rm_1(vd, v0, s1, vs2, 2245 env, vl, vm, 0, fn); 2246 break; 2247 case 1: /* rne */ 2248 vext_vx_rm_1(vd, v0, s1, vs2, 2249 env, vl, vm, 1, fn); 2250 break; 2251 case 2: /* rdn */ 2252 vext_vx_rm_1(vd, v0, s1, vs2, 2253 env, vl, vm, 2, fn); 2254 break; 2255 default: /* rod */ 2256 vext_vx_rm_1(vd, v0, s1, vs2, 2257 env, vl, vm, 3, fn); 2258 break; 2259 } 2260 /* set tail elements to 1s */ 2261 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2262 } 2263 2264 /* generate helpers for fixed point instructions with OPIVX format */ 2265 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2266 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2267 void *vs2, CPURISCVState *env, uint32_t desc) \ 2268 { \ 2269 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2270 do_##NAME, ESZ); \ 2271 } 2272 2273 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2274 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2275 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2276 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2277 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2278 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2279 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2280 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2281 2282 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2283 { 2284 int8_t res = a + b; 2285 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2286 res = a > 0 ? INT8_MAX : INT8_MIN; 2287 env->vxsat = 0x1; 2288 } 2289 return res; 2290 } 2291 2292 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2293 { 2294 int16_t res = a + b; 2295 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2296 res = a > 0 ? INT16_MAX : INT16_MIN; 2297 env->vxsat = 0x1; 2298 } 2299 return res; 2300 } 2301 2302 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2303 { 2304 int32_t res = a + b; 2305 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2306 res = a > 0 ? INT32_MAX : INT32_MIN; 2307 env->vxsat = 0x1; 2308 } 2309 return res; 2310 } 2311 2312 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2313 { 2314 int64_t res = a + b; 2315 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2316 res = a > 0 ? INT64_MAX : INT64_MIN; 2317 env->vxsat = 0x1; 2318 } 2319 return res; 2320 } 2321 2322 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2323 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2324 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2325 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2326 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2327 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2328 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2329 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2330 2331 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2332 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2333 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2334 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2335 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2336 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2337 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2338 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2339 2340 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2341 { 2342 uint8_t res = a - b; 2343 if (res > a) { 2344 res = 0; 2345 env->vxsat = 0x1; 2346 } 2347 return res; 2348 } 2349 2350 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2351 uint16_t b) 2352 { 2353 uint16_t res = a - b; 2354 if (res > a) { 2355 res = 0; 2356 env->vxsat = 0x1; 2357 } 2358 return res; 2359 } 2360 2361 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2362 uint32_t b) 2363 { 2364 uint32_t res = a - b; 2365 if (res > a) { 2366 res = 0; 2367 env->vxsat = 0x1; 2368 } 2369 return res; 2370 } 2371 2372 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2373 uint64_t b) 2374 { 2375 uint64_t res = a - b; 2376 if (res > a) { 2377 res = 0; 2378 env->vxsat = 0x1; 2379 } 2380 return res; 2381 } 2382 2383 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2384 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2385 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2386 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2387 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2388 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2389 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2390 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2391 2392 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2393 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2394 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2395 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2396 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2397 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2398 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2399 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2400 2401 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2402 { 2403 int8_t res = a - b; 2404 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2405 res = a >= 0 ? INT8_MAX : INT8_MIN; 2406 env->vxsat = 0x1; 2407 } 2408 return res; 2409 } 2410 2411 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2412 { 2413 int16_t res = a - b; 2414 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2415 res = a >= 0 ? INT16_MAX : INT16_MIN; 2416 env->vxsat = 0x1; 2417 } 2418 return res; 2419 } 2420 2421 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2422 { 2423 int32_t res = a - b; 2424 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2425 res = a >= 0 ? INT32_MAX : INT32_MIN; 2426 env->vxsat = 0x1; 2427 } 2428 return res; 2429 } 2430 2431 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2432 { 2433 int64_t res = a - b; 2434 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2435 res = a >= 0 ? INT64_MAX : INT64_MIN; 2436 env->vxsat = 0x1; 2437 } 2438 return res; 2439 } 2440 2441 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2442 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2443 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2444 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2445 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2446 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2447 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2448 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2449 2450 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2451 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2452 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2453 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2454 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2455 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2456 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2457 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2458 2459 /* Vector Single-Width Averaging Add and Subtract */ 2460 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2461 { 2462 uint8_t d = extract64(v, shift, 1); 2463 uint8_t d1; 2464 uint64_t D1, D2; 2465 2466 if (shift == 0 || shift > 64) { 2467 return 0; 2468 } 2469 2470 d1 = extract64(v, shift - 1, 1); 2471 D1 = extract64(v, 0, shift); 2472 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2473 return d1; 2474 } else if (vxrm == 1) { /* round-to-nearest-even */ 2475 if (shift > 1) { 2476 D2 = extract64(v, 0, shift - 1); 2477 return d1 & ((D2 != 0) | d); 2478 } else { 2479 return d1 & d; 2480 } 2481 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2482 return !d & (D1 != 0); 2483 } 2484 return 0; /* round-down (truncate) */ 2485 } 2486 2487 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2488 { 2489 int64_t res = (int64_t)a + b; 2490 uint8_t round = get_round(vxrm, res, 1); 2491 2492 return (res >> 1) + round; 2493 } 2494 2495 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2496 { 2497 int64_t res = a + b; 2498 uint8_t round = get_round(vxrm, res, 1); 2499 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2500 2501 /* With signed overflow, bit 64 is inverse of bit 63. */ 2502 return ((res >> 1) ^ over) + round; 2503 } 2504 2505 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2506 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2507 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2508 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2509 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2510 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2511 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2512 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2513 2514 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2515 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2516 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2517 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2518 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2519 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2520 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2521 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2522 2523 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2524 uint32_t a, uint32_t b) 2525 { 2526 uint64_t res = (uint64_t)a + b; 2527 uint8_t round = get_round(vxrm, res, 1); 2528 2529 return (res >> 1) + round; 2530 } 2531 2532 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2533 uint64_t a, uint64_t b) 2534 { 2535 uint64_t res = a + b; 2536 uint8_t round = get_round(vxrm, res, 1); 2537 uint64_t over = (uint64_t)(res < a) << 63; 2538 2539 return ((res >> 1) | over) + round; 2540 } 2541 2542 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2543 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2544 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2545 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2546 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2547 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2548 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2549 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2550 2551 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2552 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2553 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2554 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2555 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2556 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2557 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2558 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2559 2560 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2561 { 2562 int64_t res = (int64_t)a - b; 2563 uint8_t round = get_round(vxrm, res, 1); 2564 2565 return (res >> 1) + round; 2566 } 2567 2568 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2569 { 2570 int64_t res = (int64_t)a - b; 2571 uint8_t round = get_round(vxrm, res, 1); 2572 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2573 2574 /* With signed overflow, bit 64 is inverse of bit 63. */ 2575 return ((res >> 1) ^ over) + round; 2576 } 2577 2578 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2579 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2580 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2581 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2582 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2583 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2584 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2585 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2586 2587 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2588 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2589 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2590 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2591 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2592 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2593 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2594 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2595 2596 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2597 uint32_t a, uint32_t b) 2598 { 2599 int64_t res = (int64_t)a - b; 2600 uint8_t round = get_round(vxrm, res, 1); 2601 2602 return (res >> 1) + round; 2603 } 2604 2605 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2606 uint64_t a, uint64_t b) 2607 { 2608 uint64_t res = (uint64_t)a - b; 2609 uint8_t round = get_round(vxrm, res, 1); 2610 uint64_t over = (uint64_t)(res > a) << 63; 2611 2612 return ((res >> 1) | over) + round; 2613 } 2614 2615 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2616 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2617 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2618 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2619 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2620 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2621 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2622 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2623 2624 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2625 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2626 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2627 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2628 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2629 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2630 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2631 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2632 2633 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2634 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2635 { 2636 uint8_t round; 2637 int16_t res; 2638 2639 res = (int16_t)a * (int16_t)b; 2640 round = get_round(vxrm, res, 7); 2641 res = (res >> 7) + round; 2642 2643 if (res > INT8_MAX) { 2644 env->vxsat = 0x1; 2645 return INT8_MAX; 2646 } else if (res < INT8_MIN) { 2647 env->vxsat = 0x1; 2648 return INT8_MIN; 2649 } else { 2650 return res; 2651 } 2652 } 2653 2654 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2655 { 2656 uint8_t round; 2657 int32_t res; 2658 2659 res = (int32_t)a * (int32_t)b; 2660 round = get_round(vxrm, res, 15); 2661 res = (res >> 15) + round; 2662 2663 if (res > INT16_MAX) { 2664 env->vxsat = 0x1; 2665 return INT16_MAX; 2666 } else if (res < INT16_MIN) { 2667 env->vxsat = 0x1; 2668 return INT16_MIN; 2669 } else { 2670 return res; 2671 } 2672 } 2673 2674 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2675 { 2676 uint8_t round; 2677 int64_t res; 2678 2679 res = (int64_t)a * (int64_t)b; 2680 round = get_round(vxrm, res, 31); 2681 res = (res >> 31) + round; 2682 2683 if (res > INT32_MAX) { 2684 env->vxsat = 0x1; 2685 return INT32_MAX; 2686 } else if (res < INT32_MIN) { 2687 env->vxsat = 0x1; 2688 return INT32_MIN; 2689 } else { 2690 return res; 2691 } 2692 } 2693 2694 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2695 { 2696 uint8_t round; 2697 uint64_t hi_64, lo_64; 2698 int64_t res; 2699 2700 if (a == INT64_MIN && b == INT64_MIN) { 2701 env->vxsat = 1; 2702 return INT64_MAX; 2703 } 2704 2705 muls64(&lo_64, &hi_64, a, b); 2706 round = get_round(vxrm, lo_64, 63); 2707 /* 2708 * Cannot overflow, as there are always 2709 * 2 sign bits after multiply. 2710 */ 2711 res = (hi_64 << 1) | (lo_64 >> 63); 2712 if (round) { 2713 if (res == INT64_MAX) { 2714 env->vxsat = 1; 2715 } else { 2716 res += 1; 2717 } 2718 } 2719 return res; 2720 } 2721 2722 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2723 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2724 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2725 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2726 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2727 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2728 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2729 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2730 2731 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2732 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2733 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2734 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2735 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2736 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2737 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2738 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2739 2740 /* Vector Single-Width Scaling Shift Instructions */ 2741 static inline uint8_t 2742 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2743 { 2744 uint8_t round, shift = b & 0x7; 2745 uint8_t res; 2746 2747 round = get_round(vxrm, a, shift); 2748 res = (a >> shift) + round; 2749 return res; 2750 } 2751 static inline uint16_t 2752 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2753 { 2754 uint8_t round, shift = b & 0xf; 2755 uint16_t res; 2756 2757 round = get_round(vxrm, a, shift); 2758 res = (a >> shift) + round; 2759 return res; 2760 } 2761 static inline uint32_t 2762 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2763 { 2764 uint8_t round, shift = b & 0x1f; 2765 uint32_t res; 2766 2767 round = get_round(vxrm, a, shift); 2768 res = (a >> shift) + round; 2769 return res; 2770 } 2771 static inline uint64_t 2772 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2773 { 2774 uint8_t round, shift = b & 0x3f; 2775 uint64_t res; 2776 2777 round = get_round(vxrm, a, shift); 2778 res = (a >> shift) + round; 2779 return res; 2780 } 2781 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2782 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2783 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2784 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2785 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2786 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2787 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2788 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2789 2790 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2791 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2792 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2793 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2794 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2795 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2796 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2797 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2798 2799 static inline int8_t 2800 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2801 { 2802 uint8_t round, shift = b & 0x7; 2803 int8_t res; 2804 2805 round = get_round(vxrm, a, shift); 2806 res = (a >> shift) + round; 2807 return res; 2808 } 2809 static inline int16_t 2810 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2811 { 2812 uint8_t round, shift = b & 0xf; 2813 int16_t res; 2814 2815 round = get_round(vxrm, a, shift); 2816 res = (a >> shift) + round; 2817 return res; 2818 } 2819 static inline int32_t 2820 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2821 { 2822 uint8_t round, shift = b & 0x1f; 2823 int32_t res; 2824 2825 round = get_round(vxrm, a, shift); 2826 res = (a >> shift) + round; 2827 return res; 2828 } 2829 static inline int64_t 2830 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2831 { 2832 uint8_t round, shift = b & 0x3f; 2833 int64_t res; 2834 2835 round = get_round(vxrm, a, shift); 2836 res = (a >> shift) + round; 2837 return res; 2838 } 2839 2840 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2841 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2842 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2843 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2844 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2845 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2846 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2847 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2848 2849 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2850 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2851 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2852 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2853 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2854 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2855 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2856 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2857 2858 /* Vector Narrowing Fixed-Point Clip Instructions */ 2859 static inline int8_t 2860 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2861 { 2862 uint8_t round, shift = b & 0xf; 2863 int16_t res; 2864 2865 round = get_round(vxrm, a, shift); 2866 res = (a >> shift) + round; 2867 if (res > INT8_MAX) { 2868 env->vxsat = 0x1; 2869 return INT8_MAX; 2870 } else if (res < INT8_MIN) { 2871 env->vxsat = 0x1; 2872 return INT8_MIN; 2873 } else { 2874 return res; 2875 } 2876 } 2877 2878 static inline int16_t 2879 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2880 { 2881 uint8_t round, shift = b & 0x1f; 2882 int32_t res; 2883 2884 round = get_round(vxrm, a, shift); 2885 res = (a >> shift) + round; 2886 if (res > INT16_MAX) { 2887 env->vxsat = 0x1; 2888 return INT16_MAX; 2889 } else if (res < INT16_MIN) { 2890 env->vxsat = 0x1; 2891 return INT16_MIN; 2892 } else { 2893 return res; 2894 } 2895 } 2896 2897 static inline int32_t 2898 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2899 { 2900 uint8_t round, shift = b & 0x3f; 2901 int64_t res; 2902 2903 round = get_round(vxrm, a, shift); 2904 res = (a >> shift) + round; 2905 if (res > INT32_MAX) { 2906 env->vxsat = 0x1; 2907 return INT32_MAX; 2908 } else if (res < INT32_MIN) { 2909 env->vxsat = 0x1; 2910 return INT32_MIN; 2911 } else { 2912 return res; 2913 } 2914 } 2915 2916 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2917 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2918 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2919 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2920 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2921 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2922 2923 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2924 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2925 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2926 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2927 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2928 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2929 2930 static inline uint8_t 2931 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2932 { 2933 uint8_t round, shift = b & 0xf; 2934 uint16_t res; 2935 2936 round = get_round(vxrm, a, shift); 2937 res = (a >> shift) + round; 2938 if (res > UINT8_MAX) { 2939 env->vxsat = 0x1; 2940 return UINT8_MAX; 2941 } else { 2942 return res; 2943 } 2944 } 2945 2946 static inline uint16_t 2947 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2948 { 2949 uint8_t round, shift = b & 0x1f; 2950 uint32_t res; 2951 2952 round = get_round(vxrm, a, shift); 2953 res = (a >> shift) + round; 2954 if (res > UINT16_MAX) { 2955 env->vxsat = 0x1; 2956 return UINT16_MAX; 2957 } else { 2958 return res; 2959 } 2960 } 2961 2962 static inline uint32_t 2963 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2964 { 2965 uint8_t round, shift = b & 0x3f; 2966 uint64_t res; 2967 2968 round = get_round(vxrm, a, shift); 2969 res = (a >> shift) + round; 2970 if (res > UINT32_MAX) { 2971 env->vxsat = 0x1; 2972 return UINT32_MAX; 2973 } else { 2974 return res; 2975 } 2976 } 2977 2978 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2979 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2980 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2981 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2982 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2983 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2984 2985 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2986 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2987 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2988 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2989 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2990 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2991 2992 /* 2993 *** Vector Float Point Arithmetic Instructions 2994 */ 2995 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2996 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2997 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2998 CPURISCVState *env) \ 2999 { \ 3000 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3001 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3002 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3003 } 3004 3005 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3006 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3007 void *vs2, CPURISCVState *env, \ 3008 uint32_t desc) \ 3009 { \ 3010 uint32_t vm = vext_vm(desc); \ 3011 uint32_t vl = env->vl; \ 3012 uint32_t total_elems = \ 3013 vext_get_total_elems(env, desc, ESZ); \ 3014 uint32_t vta = vext_vta(desc); \ 3015 uint32_t i; \ 3016 \ 3017 for (i = env->vstart; i < vl; i++) { \ 3018 if (!vm && !vext_elem_mask(v0, i)) { \ 3019 continue; \ 3020 } \ 3021 do_##NAME(vd, vs1, vs2, i, env); \ 3022 } \ 3023 env->vstart = 0; \ 3024 /* set tail elements to 1s */ \ 3025 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3026 total_elems * ESZ); \ 3027 } 3028 3029 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3030 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3031 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3032 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3033 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3034 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3035 3036 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3037 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3038 CPURISCVState *env) \ 3039 { \ 3040 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3041 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3042 } 3043 3044 #define GEN_VEXT_VF(NAME, ESZ) \ 3045 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3046 void *vs2, CPURISCVState *env, \ 3047 uint32_t desc) \ 3048 { \ 3049 uint32_t vm = vext_vm(desc); \ 3050 uint32_t vl = env->vl; \ 3051 uint32_t total_elems = \ 3052 vext_get_total_elems(env, desc, ESZ); \ 3053 uint32_t vta = vext_vta(desc); \ 3054 uint32_t i; \ 3055 \ 3056 for (i = env->vstart; i < vl; i++) { \ 3057 if (!vm && !vext_elem_mask(v0, i)) { \ 3058 continue; \ 3059 } \ 3060 do_##NAME(vd, s1, vs2, i, env); \ 3061 } \ 3062 env->vstart = 0; \ 3063 /* set tail elements to 1s */ \ 3064 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3065 total_elems * ESZ); \ 3066 } 3067 3068 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3069 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3070 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3071 GEN_VEXT_VF(vfadd_vf_h, 2) 3072 GEN_VEXT_VF(vfadd_vf_w, 4) 3073 GEN_VEXT_VF(vfadd_vf_d, 8) 3074 3075 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3076 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3077 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3078 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3079 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3080 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3081 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3082 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3083 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3084 GEN_VEXT_VF(vfsub_vf_h, 2) 3085 GEN_VEXT_VF(vfsub_vf_w, 4) 3086 GEN_VEXT_VF(vfsub_vf_d, 8) 3087 3088 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3089 { 3090 return float16_sub(b, a, s); 3091 } 3092 3093 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3094 { 3095 return float32_sub(b, a, s); 3096 } 3097 3098 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3099 { 3100 return float64_sub(b, a, s); 3101 } 3102 3103 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3104 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3105 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3106 GEN_VEXT_VF(vfrsub_vf_h, 2) 3107 GEN_VEXT_VF(vfrsub_vf_w, 4) 3108 GEN_VEXT_VF(vfrsub_vf_d, 8) 3109 3110 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3111 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3112 { 3113 return float32_add(float16_to_float32(a, true, s), 3114 float16_to_float32(b, true, s), s); 3115 } 3116 3117 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3118 { 3119 return float64_add(float32_to_float64(a, s), 3120 float32_to_float64(b, s), s); 3121 3122 } 3123 3124 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3125 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3126 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3127 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3128 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3129 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3130 GEN_VEXT_VF(vfwadd_vf_h, 4) 3131 GEN_VEXT_VF(vfwadd_vf_w, 8) 3132 3133 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3134 { 3135 return float32_sub(float16_to_float32(a, true, s), 3136 float16_to_float32(b, true, s), s); 3137 } 3138 3139 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3140 { 3141 return float64_sub(float32_to_float64(a, s), 3142 float32_to_float64(b, s), s); 3143 3144 } 3145 3146 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3147 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3148 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3149 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3150 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3151 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3152 GEN_VEXT_VF(vfwsub_vf_h, 4) 3153 GEN_VEXT_VF(vfwsub_vf_w, 8) 3154 3155 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3156 { 3157 return float32_add(a, float16_to_float32(b, true, s), s); 3158 } 3159 3160 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3161 { 3162 return float64_add(a, float32_to_float64(b, s), s); 3163 } 3164 3165 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3166 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3167 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3168 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3169 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3170 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3171 GEN_VEXT_VF(vfwadd_wf_h, 4) 3172 GEN_VEXT_VF(vfwadd_wf_w, 8) 3173 3174 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3175 { 3176 return float32_sub(a, float16_to_float32(b, true, s), s); 3177 } 3178 3179 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3180 { 3181 return float64_sub(a, float32_to_float64(b, s), s); 3182 } 3183 3184 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3185 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3186 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3187 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3188 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3189 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3190 GEN_VEXT_VF(vfwsub_wf_h, 4) 3191 GEN_VEXT_VF(vfwsub_wf_w, 8) 3192 3193 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3194 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3195 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3196 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3197 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3198 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3199 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3200 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3201 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3202 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3203 GEN_VEXT_VF(vfmul_vf_h, 2) 3204 GEN_VEXT_VF(vfmul_vf_w, 4) 3205 GEN_VEXT_VF(vfmul_vf_d, 8) 3206 3207 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3208 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3209 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3210 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3211 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3212 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3213 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3214 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3215 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3216 GEN_VEXT_VF(vfdiv_vf_h, 2) 3217 GEN_VEXT_VF(vfdiv_vf_w, 4) 3218 GEN_VEXT_VF(vfdiv_vf_d, 8) 3219 3220 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3221 { 3222 return float16_div(b, a, s); 3223 } 3224 3225 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3226 { 3227 return float32_div(b, a, s); 3228 } 3229 3230 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3231 { 3232 return float64_div(b, a, s); 3233 } 3234 3235 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3236 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3237 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3238 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3239 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3240 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3241 3242 /* Vector Widening Floating-Point Multiply */ 3243 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3244 { 3245 return float32_mul(float16_to_float32(a, true, s), 3246 float16_to_float32(b, true, s), s); 3247 } 3248 3249 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3250 { 3251 return float64_mul(float32_to_float64(a, s), 3252 float32_to_float64(b, s), s); 3253 3254 } 3255 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3256 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3257 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3258 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3259 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3260 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3261 GEN_VEXT_VF(vfwmul_vf_h, 4) 3262 GEN_VEXT_VF(vfwmul_vf_w, 8) 3263 3264 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3265 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3266 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3267 CPURISCVState *env) \ 3268 { \ 3269 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3270 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3271 TD d = *((TD *)vd + HD(i)); \ 3272 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3273 } 3274 3275 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3276 { 3277 return float16_muladd(a, b, d, 0, s); 3278 } 3279 3280 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3281 { 3282 return float32_muladd(a, b, d, 0, s); 3283 } 3284 3285 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3286 { 3287 return float64_muladd(a, b, d, 0, s); 3288 } 3289 3290 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3291 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3292 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3293 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3294 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3295 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3296 3297 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3298 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3299 CPURISCVState *env) \ 3300 { \ 3301 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3302 TD d = *((TD *)vd + HD(i)); \ 3303 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3304 } 3305 3306 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3307 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3308 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3309 GEN_VEXT_VF(vfmacc_vf_h, 2) 3310 GEN_VEXT_VF(vfmacc_vf_w, 4) 3311 GEN_VEXT_VF(vfmacc_vf_d, 8) 3312 3313 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3314 { 3315 return float16_muladd(a, b, d, 3316 float_muladd_negate_c | float_muladd_negate_product, s); 3317 } 3318 3319 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3320 { 3321 return float32_muladd(a, b, d, 3322 float_muladd_negate_c | float_muladd_negate_product, s); 3323 } 3324 3325 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3326 { 3327 return float64_muladd(a, b, d, 3328 float_muladd_negate_c | float_muladd_negate_product, s); 3329 } 3330 3331 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3332 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3333 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3334 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3335 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3336 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3337 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3338 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3339 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3340 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3341 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3342 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3343 3344 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3345 { 3346 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3347 } 3348 3349 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3350 { 3351 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3352 } 3353 3354 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3355 { 3356 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3357 } 3358 3359 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3360 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3361 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3362 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3363 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3364 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3365 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3366 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3367 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3368 GEN_VEXT_VF(vfmsac_vf_h, 2) 3369 GEN_VEXT_VF(vfmsac_vf_w, 4) 3370 GEN_VEXT_VF(vfmsac_vf_d, 8) 3371 3372 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3373 { 3374 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3375 } 3376 3377 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3378 { 3379 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3380 } 3381 3382 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3383 { 3384 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3385 } 3386 3387 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3388 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3389 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3390 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3391 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3392 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3393 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3394 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3395 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3396 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3397 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3398 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3399 3400 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3401 { 3402 return float16_muladd(d, b, a, 0, s); 3403 } 3404 3405 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3406 { 3407 return float32_muladd(d, b, a, 0, s); 3408 } 3409 3410 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3411 { 3412 return float64_muladd(d, b, a, 0, s); 3413 } 3414 3415 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3416 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3417 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3418 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3419 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3420 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3421 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3422 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3423 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3424 GEN_VEXT_VF(vfmadd_vf_h, 2) 3425 GEN_VEXT_VF(vfmadd_vf_w, 4) 3426 GEN_VEXT_VF(vfmadd_vf_d, 8) 3427 3428 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3429 { 3430 return float16_muladd(d, b, a, 3431 float_muladd_negate_c | float_muladd_negate_product, s); 3432 } 3433 3434 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3435 { 3436 return float32_muladd(d, b, a, 3437 float_muladd_negate_c | float_muladd_negate_product, s); 3438 } 3439 3440 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3441 { 3442 return float64_muladd(d, b, a, 3443 float_muladd_negate_c | float_muladd_negate_product, s); 3444 } 3445 3446 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3447 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3448 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3449 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3450 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3451 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3452 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3453 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3454 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3455 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3456 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3457 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3458 3459 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3460 { 3461 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3462 } 3463 3464 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3465 { 3466 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3467 } 3468 3469 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3470 { 3471 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3472 } 3473 3474 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3475 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3476 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3477 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3478 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3479 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3480 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3481 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3482 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3483 GEN_VEXT_VF(vfmsub_vf_h, 2) 3484 GEN_VEXT_VF(vfmsub_vf_w, 4) 3485 GEN_VEXT_VF(vfmsub_vf_d, 8) 3486 3487 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3488 { 3489 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3490 } 3491 3492 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3493 { 3494 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3495 } 3496 3497 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3498 { 3499 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3500 } 3501 3502 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3503 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3504 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3505 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3506 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3507 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3508 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3509 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3510 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3511 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3512 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3513 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3514 3515 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3516 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3517 { 3518 return float32_muladd(float16_to_float32(a, true, s), 3519 float16_to_float32(b, true, s), d, 0, s); 3520 } 3521 3522 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3523 { 3524 return float64_muladd(float32_to_float64(a, s), 3525 float32_to_float64(b, s), d, 0, s); 3526 } 3527 3528 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3529 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3530 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3531 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3532 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3533 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3534 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3535 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3536 3537 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3538 { 3539 return float32_muladd(float16_to_float32(a, true, s), 3540 float16_to_float32(b, true, s), d, 3541 float_muladd_negate_c | float_muladd_negate_product, s); 3542 } 3543 3544 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3545 { 3546 return float64_muladd(float32_to_float64(a, s), 3547 float32_to_float64(b, s), d, 3548 float_muladd_negate_c | float_muladd_negate_product, s); 3549 } 3550 3551 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3552 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3553 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3554 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3555 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3556 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3557 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3558 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3559 3560 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3561 { 3562 return float32_muladd(float16_to_float32(a, true, s), 3563 float16_to_float32(b, true, s), d, 3564 float_muladd_negate_c, s); 3565 } 3566 3567 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3568 { 3569 return float64_muladd(float32_to_float64(a, s), 3570 float32_to_float64(b, s), d, 3571 float_muladd_negate_c, s); 3572 } 3573 3574 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3575 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3576 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3577 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3578 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3579 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3580 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3581 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3582 3583 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3584 { 3585 return float32_muladd(float16_to_float32(a, true, s), 3586 float16_to_float32(b, true, s), d, 3587 float_muladd_negate_product, s); 3588 } 3589 3590 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3591 { 3592 return float64_muladd(float32_to_float64(a, s), 3593 float32_to_float64(b, s), d, 3594 float_muladd_negate_product, s); 3595 } 3596 3597 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3598 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3599 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3600 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3601 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3602 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3603 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3604 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3605 3606 /* Vector Floating-Point Square-Root Instruction */ 3607 /* (TD, T2, TX2) */ 3608 #define OP_UU_H uint16_t, uint16_t, uint16_t 3609 #define OP_UU_W uint32_t, uint32_t, uint32_t 3610 #define OP_UU_D uint64_t, uint64_t, uint64_t 3611 3612 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3613 static void do_##NAME(void *vd, void *vs2, int i, \ 3614 CPURISCVState *env) \ 3615 { \ 3616 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3617 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3618 } 3619 3620 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3621 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3622 CPURISCVState *env, uint32_t desc) \ 3623 { \ 3624 uint32_t vm = vext_vm(desc); \ 3625 uint32_t vl = env->vl; \ 3626 uint32_t total_elems = \ 3627 vext_get_total_elems(env, desc, ESZ); \ 3628 uint32_t vta = vext_vta(desc); \ 3629 uint32_t i; \ 3630 \ 3631 if (vl == 0) { \ 3632 return; \ 3633 } \ 3634 for (i = env->vstart; i < vl; i++) { \ 3635 if (!vm && !vext_elem_mask(v0, i)) { \ 3636 continue; \ 3637 } \ 3638 do_##NAME(vd, vs2, i, env); \ 3639 } \ 3640 env->vstart = 0; \ 3641 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3642 total_elems * ESZ); \ 3643 } 3644 3645 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3646 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3647 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3648 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3649 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3650 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3651 3652 /* 3653 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3654 * 3655 * Adapted from riscv-v-spec recip.c: 3656 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3657 */ 3658 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3659 { 3660 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3661 uint64_t exp = extract64(f, frac_size, exp_size); 3662 uint64_t frac = extract64(f, 0, frac_size); 3663 3664 const uint8_t lookup_table[] = { 3665 52, 51, 50, 48, 47, 46, 44, 43, 3666 42, 41, 40, 39, 38, 36, 35, 34, 3667 33, 32, 31, 30, 30, 29, 28, 27, 3668 26, 25, 24, 23, 23, 22, 21, 20, 3669 19, 19, 18, 17, 16, 16, 15, 14, 3670 14, 13, 12, 12, 11, 10, 10, 9, 3671 9, 8, 7, 7, 6, 6, 5, 4, 3672 4, 3, 3, 2, 2, 1, 1, 0, 3673 127, 125, 123, 121, 119, 118, 116, 114, 3674 113, 111, 109, 108, 106, 105, 103, 102, 3675 100, 99, 97, 96, 95, 93, 92, 91, 3676 90, 88, 87, 86, 85, 84, 83, 82, 3677 80, 79, 78, 77, 76, 75, 74, 73, 3678 72, 71, 70, 70, 69, 68, 67, 66, 3679 65, 64, 63, 63, 62, 61, 60, 59, 3680 59, 58, 57, 56, 56, 55, 54, 53 3681 }; 3682 const int precision = 7; 3683 3684 if (exp == 0 && frac != 0) { /* subnormal */ 3685 /* Normalize the subnormal. */ 3686 while (extract64(frac, frac_size - 1, 1) == 0) { 3687 exp--; 3688 frac <<= 1; 3689 } 3690 3691 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3692 } 3693 3694 int idx = ((exp & 1) << (precision - 1)) | 3695 (frac >> (frac_size - precision + 1)); 3696 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3697 (frac_size - precision); 3698 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3699 3700 uint64_t val = 0; 3701 val = deposit64(val, 0, frac_size, out_frac); 3702 val = deposit64(val, frac_size, exp_size, out_exp); 3703 val = deposit64(val, frac_size + exp_size, 1, sign); 3704 return val; 3705 } 3706 3707 static float16 frsqrt7_h(float16 f, float_status *s) 3708 { 3709 int exp_size = 5, frac_size = 10; 3710 bool sign = float16_is_neg(f); 3711 3712 /* 3713 * frsqrt7(sNaN) = canonical NaN 3714 * frsqrt7(-inf) = canonical NaN 3715 * frsqrt7(-normal) = canonical NaN 3716 * frsqrt7(-subnormal) = canonical NaN 3717 */ 3718 if (float16_is_signaling_nan(f, s) || 3719 (float16_is_infinity(f) && sign) || 3720 (float16_is_normal(f) && sign) || 3721 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3722 s->float_exception_flags |= float_flag_invalid; 3723 return float16_default_nan(s); 3724 } 3725 3726 /* frsqrt7(qNaN) = canonical NaN */ 3727 if (float16_is_quiet_nan(f, s)) { 3728 return float16_default_nan(s); 3729 } 3730 3731 /* frsqrt7(+-0) = +-inf */ 3732 if (float16_is_zero(f)) { 3733 s->float_exception_flags |= float_flag_divbyzero; 3734 return float16_set_sign(float16_infinity, sign); 3735 } 3736 3737 /* frsqrt7(+inf) = +0 */ 3738 if (float16_is_infinity(f) && !sign) { 3739 return float16_set_sign(float16_zero, sign); 3740 } 3741 3742 /* +normal, +subnormal */ 3743 uint64_t val = frsqrt7(f, exp_size, frac_size); 3744 return make_float16(val); 3745 } 3746 3747 static float32 frsqrt7_s(float32 f, float_status *s) 3748 { 3749 int exp_size = 8, frac_size = 23; 3750 bool sign = float32_is_neg(f); 3751 3752 /* 3753 * frsqrt7(sNaN) = canonical NaN 3754 * frsqrt7(-inf) = canonical NaN 3755 * frsqrt7(-normal) = canonical NaN 3756 * frsqrt7(-subnormal) = canonical NaN 3757 */ 3758 if (float32_is_signaling_nan(f, s) || 3759 (float32_is_infinity(f) && sign) || 3760 (float32_is_normal(f) && sign) || 3761 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3762 s->float_exception_flags |= float_flag_invalid; 3763 return float32_default_nan(s); 3764 } 3765 3766 /* frsqrt7(qNaN) = canonical NaN */ 3767 if (float32_is_quiet_nan(f, s)) { 3768 return float32_default_nan(s); 3769 } 3770 3771 /* frsqrt7(+-0) = +-inf */ 3772 if (float32_is_zero(f)) { 3773 s->float_exception_flags |= float_flag_divbyzero; 3774 return float32_set_sign(float32_infinity, sign); 3775 } 3776 3777 /* frsqrt7(+inf) = +0 */ 3778 if (float32_is_infinity(f) && !sign) { 3779 return float32_set_sign(float32_zero, sign); 3780 } 3781 3782 /* +normal, +subnormal */ 3783 uint64_t val = frsqrt7(f, exp_size, frac_size); 3784 return make_float32(val); 3785 } 3786 3787 static float64 frsqrt7_d(float64 f, float_status *s) 3788 { 3789 int exp_size = 11, frac_size = 52; 3790 bool sign = float64_is_neg(f); 3791 3792 /* 3793 * frsqrt7(sNaN) = canonical NaN 3794 * frsqrt7(-inf) = canonical NaN 3795 * frsqrt7(-normal) = canonical NaN 3796 * frsqrt7(-subnormal) = canonical NaN 3797 */ 3798 if (float64_is_signaling_nan(f, s) || 3799 (float64_is_infinity(f) && sign) || 3800 (float64_is_normal(f) && sign) || 3801 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3802 s->float_exception_flags |= float_flag_invalid; 3803 return float64_default_nan(s); 3804 } 3805 3806 /* frsqrt7(qNaN) = canonical NaN */ 3807 if (float64_is_quiet_nan(f, s)) { 3808 return float64_default_nan(s); 3809 } 3810 3811 /* frsqrt7(+-0) = +-inf */ 3812 if (float64_is_zero(f)) { 3813 s->float_exception_flags |= float_flag_divbyzero; 3814 return float64_set_sign(float64_infinity, sign); 3815 } 3816 3817 /* frsqrt7(+inf) = +0 */ 3818 if (float64_is_infinity(f) && !sign) { 3819 return float64_set_sign(float64_zero, sign); 3820 } 3821 3822 /* +normal, +subnormal */ 3823 uint64_t val = frsqrt7(f, exp_size, frac_size); 3824 return make_float64(val); 3825 } 3826 3827 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3828 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3829 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3830 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3831 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3832 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3833 3834 /* 3835 * Vector Floating-Point Reciprocal Estimate Instruction 3836 * 3837 * Adapted from riscv-v-spec recip.c: 3838 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3839 */ 3840 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3841 float_status *s) 3842 { 3843 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3844 uint64_t exp = extract64(f, frac_size, exp_size); 3845 uint64_t frac = extract64(f, 0, frac_size); 3846 3847 const uint8_t lookup_table[] = { 3848 127, 125, 123, 121, 119, 117, 116, 114, 3849 112, 110, 109, 107, 105, 104, 102, 100, 3850 99, 97, 96, 94, 93, 91, 90, 88, 3851 87, 85, 84, 83, 81, 80, 79, 77, 3852 76, 75, 74, 72, 71, 70, 69, 68, 3853 66, 65, 64, 63, 62, 61, 60, 59, 3854 58, 57, 56, 55, 54, 53, 52, 51, 3855 50, 49, 48, 47, 46, 45, 44, 43, 3856 42, 41, 40, 40, 39, 38, 37, 36, 3857 35, 35, 34, 33, 32, 31, 31, 30, 3858 29, 28, 28, 27, 26, 25, 25, 24, 3859 23, 23, 22, 21, 21, 20, 19, 19, 3860 18, 17, 17, 16, 15, 15, 14, 14, 3861 13, 12, 12, 11, 11, 10, 9, 9, 3862 8, 8, 7, 7, 6, 5, 5, 4, 3863 4, 3, 3, 2, 2, 1, 1, 0 3864 }; 3865 const int precision = 7; 3866 3867 if (exp == 0 && frac != 0) { /* subnormal */ 3868 /* Normalize the subnormal. */ 3869 while (extract64(frac, frac_size - 1, 1) == 0) { 3870 exp--; 3871 frac <<= 1; 3872 } 3873 3874 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3875 3876 if (exp != 0 && exp != UINT64_MAX) { 3877 /* 3878 * Overflow to inf or max value of same sign, 3879 * depending on sign and rounding mode. 3880 */ 3881 s->float_exception_flags |= (float_flag_inexact | 3882 float_flag_overflow); 3883 3884 if ((s->float_rounding_mode == float_round_to_zero) || 3885 ((s->float_rounding_mode == float_round_down) && !sign) || 3886 ((s->float_rounding_mode == float_round_up) && sign)) { 3887 /* Return greatest/negative finite value. */ 3888 return (sign << (exp_size + frac_size)) | 3889 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3890 } else { 3891 /* Return +-inf. */ 3892 return (sign << (exp_size + frac_size)) | 3893 MAKE_64BIT_MASK(frac_size, exp_size); 3894 } 3895 } 3896 } 3897 3898 int idx = frac >> (frac_size - precision); 3899 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3900 (frac_size - precision); 3901 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3902 3903 if (out_exp == 0 || out_exp == UINT64_MAX) { 3904 /* 3905 * The result is subnormal, but don't raise the underflow exception, 3906 * because there's no additional loss of precision. 3907 */ 3908 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3909 if (out_exp == UINT64_MAX) { 3910 out_frac >>= 1; 3911 out_exp = 0; 3912 } 3913 } 3914 3915 uint64_t val = 0; 3916 val = deposit64(val, 0, frac_size, out_frac); 3917 val = deposit64(val, frac_size, exp_size, out_exp); 3918 val = deposit64(val, frac_size + exp_size, 1, sign); 3919 return val; 3920 } 3921 3922 static float16 frec7_h(float16 f, float_status *s) 3923 { 3924 int exp_size = 5, frac_size = 10; 3925 bool sign = float16_is_neg(f); 3926 3927 /* frec7(+-inf) = +-0 */ 3928 if (float16_is_infinity(f)) { 3929 return float16_set_sign(float16_zero, sign); 3930 } 3931 3932 /* frec7(+-0) = +-inf */ 3933 if (float16_is_zero(f)) { 3934 s->float_exception_flags |= float_flag_divbyzero; 3935 return float16_set_sign(float16_infinity, sign); 3936 } 3937 3938 /* frec7(sNaN) = canonical NaN */ 3939 if (float16_is_signaling_nan(f, s)) { 3940 s->float_exception_flags |= float_flag_invalid; 3941 return float16_default_nan(s); 3942 } 3943 3944 /* frec7(qNaN) = canonical NaN */ 3945 if (float16_is_quiet_nan(f, s)) { 3946 return float16_default_nan(s); 3947 } 3948 3949 /* +-normal, +-subnormal */ 3950 uint64_t val = frec7(f, exp_size, frac_size, s); 3951 return make_float16(val); 3952 } 3953 3954 static float32 frec7_s(float32 f, float_status *s) 3955 { 3956 int exp_size = 8, frac_size = 23; 3957 bool sign = float32_is_neg(f); 3958 3959 /* frec7(+-inf) = +-0 */ 3960 if (float32_is_infinity(f)) { 3961 return float32_set_sign(float32_zero, sign); 3962 } 3963 3964 /* frec7(+-0) = +-inf */ 3965 if (float32_is_zero(f)) { 3966 s->float_exception_flags |= float_flag_divbyzero; 3967 return float32_set_sign(float32_infinity, sign); 3968 } 3969 3970 /* frec7(sNaN) = canonical NaN */ 3971 if (float32_is_signaling_nan(f, s)) { 3972 s->float_exception_flags |= float_flag_invalid; 3973 return float32_default_nan(s); 3974 } 3975 3976 /* frec7(qNaN) = canonical NaN */ 3977 if (float32_is_quiet_nan(f, s)) { 3978 return float32_default_nan(s); 3979 } 3980 3981 /* +-normal, +-subnormal */ 3982 uint64_t val = frec7(f, exp_size, frac_size, s); 3983 return make_float32(val); 3984 } 3985 3986 static float64 frec7_d(float64 f, float_status *s) 3987 { 3988 int exp_size = 11, frac_size = 52; 3989 bool sign = float64_is_neg(f); 3990 3991 /* frec7(+-inf) = +-0 */ 3992 if (float64_is_infinity(f)) { 3993 return float64_set_sign(float64_zero, sign); 3994 } 3995 3996 /* frec7(+-0) = +-inf */ 3997 if (float64_is_zero(f)) { 3998 s->float_exception_flags |= float_flag_divbyzero; 3999 return float64_set_sign(float64_infinity, sign); 4000 } 4001 4002 /* frec7(sNaN) = canonical NaN */ 4003 if (float64_is_signaling_nan(f, s)) { 4004 s->float_exception_flags |= float_flag_invalid; 4005 return float64_default_nan(s); 4006 } 4007 4008 /* frec7(qNaN) = canonical NaN */ 4009 if (float64_is_quiet_nan(f, s)) { 4010 return float64_default_nan(s); 4011 } 4012 4013 /* +-normal, +-subnormal */ 4014 uint64_t val = frec7(f, exp_size, frac_size, s); 4015 return make_float64(val); 4016 } 4017 4018 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4019 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4020 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4021 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4022 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4023 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4024 4025 /* Vector Floating-Point MIN/MAX Instructions */ 4026 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4027 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4028 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4029 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4030 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4031 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4032 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4033 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4034 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4035 GEN_VEXT_VF(vfmin_vf_h, 2) 4036 GEN_VEXT_VF(vfmin_vf_w, 4) 4037 GEN_VEXT_VF(vfmin_vf_d, 8) 4038 4039 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4040 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4041 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4042 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4043 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4044 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4045 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4046 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4047 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4048 GEN_VEXT_VF(vfmax_vf_h, 2) 4049 GEN_VEXT_VF(vfmax_vf_w, 4) 4050 GEN_VEXT_VF(vfmax_vf_d, 8) 4051 4052 /* Vector Floating-Point Sign-Injection Instructions */ 4053 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4054 { 4055 return deposit64(b, 0, 15, a); 4056 } 4057 4058 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4059 { 4060 return deposit64(b, 0, 31, a); 4061 } 4062 4063 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4064 { 4065 return deposit64(b, 0, 63, a); 4066 } 4067 4068 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4069 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4070 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4071 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4072 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4073 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4074 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4075 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4076 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4077 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4078 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4079 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4080 4081 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4082 { 4083 return deposit64(~b, 0, 15, a); 4084 } 4085 4086 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4087 { 4088 return deposit64(~b, 0, 31, a); 4089 } 4090 4091 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4092 { 4093 return deposit64(~b, 0, 63, a); 4094 } 4095 4096 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4097 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4098 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4099 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4100 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4101 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4102 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4103 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4104 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4105 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4106 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4107 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4108 4109 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4110 { 4111 return deposit64(b ^ a, 0, 15, a); 4112 } 4113 4114 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4115 { 4116 return deposit64(b ^ a, 0, 31, a); 4117 } 4118 4119 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4120 { 4121 return deposit64(b ^ a, 0, 63, a); 4122 } 4123 4124 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4125 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4126 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4127 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4128 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4129 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4130 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4131 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4132 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4133 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4134 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4135 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4136 4137 /* Vector Floating-Point Compare Instructions */ 4138 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4139 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4140 CPURISCVState *env, uint32_t desc) \ 4141 { \ 4142 uint32_t vm = vext_vm(desc); \ 4143 uint32_t vl = env->vl; \ 4144 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4145 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4146 uint32_t i; \ 4147 \ 4148 for (i = env->vstart; i < vl; i++) { \ 4149 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4150 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4151 if (!vm && !vext_elem_mask(v0, i)) { \ 4152 continue; \ 4153 } \ 4154 vext_set_elem_mask(vd, i, \ 4155 DO_OP(s2, s1, &env->fp_status)); \ 4156 } \ 4157 env->vstart = 0; \ 4158 /* mask destination register are always tail-agnostic */ \ 4159 /* set tail elements to 1s */ \ 4160 if (vta_all_1s) { \ 4161 for (; i < total_elems; i++) { \ 4162 vext_set_elem_mask(vd, i, 1); \ 4163 } \ 4164 } \ 4165 } 4166 4167 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4168 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4169 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4170 4171 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4172 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4173 CPURISCVState *env, uint32_t desc) \ 4174 { \ 4175 uint32_t vm = vext_vm(desc); \ 4176 uint32_t vl = env->vl; \ 4177 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4178 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4179 uint32_t i; \ 4180 \ 4181 for (i = env->vstart; i < vl; i++) { \ 4182 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4183 if (!vm && !vext_elem_mask(v0, i)) { \ 4184 continue; \ 4185 } \ 4186 vext_set_elem_mask(vd, i, \ 4187 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4188 } \ 4189 env->vstart = 0; \ 4190 /* mask destination register are always tail-agnostic */ \ 4191 /* set tail elements to 1s */ \ 4192 if (vta_all_1s) { \ 4193 for (; i < total_elems; i++) { \ 4194 vext_set_elem_mask(vd, i, 1); \ 4195 } \ 4196 } \ 4197 } 4198 4199 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4200 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4201 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4202 4203 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4204 { 4205 FloatRelation compare = float16_compare_quiet(a, b, s); 4206 return compare != float_relation_equal; 4207 } 4208 4209 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4210 { 4211 FloatRelation compare = float32_compare_quiet(a, b, s); 4212 return compare != float_relation_equal; 4213 } 4214 4215 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4216 { 4217 FloatRelation compare = float64_compare_quiet(a, b, s); 4218 return compare != float_relation_equal; 4219 } 4220 4221 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4222 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4223 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4224 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4225 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4226 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4227 4228 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4229 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4230 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4231 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4232 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4233 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4234 4235 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4236 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4237 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4238 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4239 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4240 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4241 4242 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4243 { 4244 FloatRelation compare = float16_compare(a, b, s); 4245 return compare == float_relation_greater; 4246 } 4247 4248 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4249 { 4250 FloatRelation compare = float32_compare(a, b, s); 4251 return compare == float_relation_greater; 4252 } 4253 4254 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4255 { 4256 FloatRelation compare = float64_compare(a, b, s); 4257 return compare == float_relation_greater; 4258 } 4259 4260 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4261 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4262 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4263 4264 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4265 { 4266 FloatRelation compare = float16_compare(a, b, s); 4267 return compare == float_relation_greater || 4268 compare == float_relation_equal; 4269 } 4270 4271 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4272 { 4273 FloatRelation compare = float32_compare(a, b, s); 4274 return compare == float_relation_greater || 4275 compare == float_relation_equal; 4276 } 4277 4278 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4279 { 4280 FloatRelation compare = float64_compare(a, b, s); 4281 return compare == float_relation_greater || 4282 compare == float_relation_equal; 4283 } 4284 4285 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4286 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4287 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4288 4289 /* Vector Floating-Point Classify Instruction */ 4290 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4291 static void do_##NAME(void *vd, void *vs2, int i) \ 4292 { \ 4293 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4294 *((TD *)vd + HD(i)) = OP(s2); \ 4295 } 4296 4297 #define GEN_VEXT_V(NAME, ESZ) \ 4298 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4299 CPURISCVState *env, uint32_t desc) \ 4300 { \ 4301 uint32_t vm = vext_vm(desc); \ 4302 uint32_t vl = env->vl; \ 4303 uint32_t total_elems = \ 4304 vext_get_total_elems(env, desc, ESZ); \ 4305 uint32_t vta = vext_vta(desc); \ 4306 uint32_t i; \ 4307 \ 4308 for (i = env->vstart; i < vl; i++) { \ 4309 if (!vm && !vext_elem_mask(v0, i)) { \ 4310 continue; \ 4311 } \ 4312 do_##NAME(vd, vs2, i); \ 4313 } \ 4314 env->vstart = 0; \ 4315 /* set tail elements to 1s */ \ 4316 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4317 total_elems * ESZ); \ 4318 } 4319 4320 target_ulong fclass_h(uint64_t frs1) 4321 { 4322 float16 f = frs1; 4323 bool sign = float16_is_neg(f); 4324 4325 if (float16_is_infinity(f)) { 4326 return sign ? 1 << 0 : 1 << 7; 4327 } else if (float16_is_zero(f)) { 4328 return sign ? 1 << 3 : 1 << 4; 4329 } else if (float16_is_zero_or_denormal(f)) { 4330 return sign ? 1 << 2 : 1 << 5; 4331 } else if (float16_is_any_nan(f)) { 4332 float_status s = { }; /* for snan_bit_is_one */ 4333 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4334 } else { 4335 return sign ? 1 << 1 : 1 << 6; 4336 } 4337 } 4338 4339 target_ulong fclass_s(uint64_t frs1) 4340 { 4341 float32 f = frs1; 4342 bool sign = float32_is_neg(f); 4343 4344 if (float32_is_infinity(f)) { 4345 return sign ? 1 << 0 : 1 << 7; 4346 } else if (float32_is_zero(f)) { 4347 return sign ? 1 << 3 : 1 << 4; 4348 } else if (float32_is_zero_or_denormal(f)) { 4349 return sign ? 1 << 2 : 1 << 5; 4350 } else if (float32_is_any_nan(f)) { 4351 float_status s = { }; /* for snan_bit_is_one */ 4352 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4353 } else { 4354 return sign ? 1 << 1 : 1 << 6; 4355 } 4356 } 4357 4358 target_ulong fclass_d(uint64_t frs1) 4359 { 4360 float64 f = frs1; 4361 bool sign = float64_is_neg(f); 4362 4363 if (float64_is_infinity(f)) { 4364 return sign ? 1 << 0 : 1 << 7; 4365 } else if (float64_is_zero(f)) { 4366 return sign ? 1 << 3 : 1 << 4; 4367 } else if (float64_is_zero_or_denormal(f)) { 4368 return sign ? 1 << 2 : 1 << 5; 4369 } else if (float64_is_any_nan(f)) { 4370 float_status s = { }; /* for snan_bit_is_one */ 4371 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4372 } else { 4373 return sign ? 1 << 1 : 1 << 6; 4374 } 4375 } 4376 4377 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4378 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4379 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4380 GEN_VEXT_V(vfclass_v_h, 2) 4381 GEN_VEXT_V(vfclass_v_w, 4) 4382 GEN_VEXT_V(vfclass_v_d, 8) 4383 4384 /* Vector Floating-Point Merge Instruction */ 4385 4386 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4387 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4388 CPURISCVState *env, uint32_t desc) \ 4389 { \ 4390 uint32_t vm = vext_vm(desc); \ 4391 uint32_t vl = env->vl; \ 4392 uint32_t esz = sizeof(ETYPE); \ 4393 uint32_t total_elems = \ 4394 vext_get_total_elems(env, desc, esz); \ 4395 uint32_t vta = vext_vta(desc); \ 4396 uint32_t i; \ 4397 \ 4398 for (i = env->vstart; i < vl; i++) { \ 4399 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4400 *((ETYPE *)vd + H(i)) \ 4401 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4402 } \ 4403 env->vstart = 0; \ 4404 /* set tail elements to 1s */ \ 4405 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4406 } 4407 4408 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4409 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4410 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4411 4412 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4413 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4414 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4415 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4416 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4417 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4418 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4419 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4420 4421 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4422 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4423 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4424 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4425 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4426 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4427 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4428 4429 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4430 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4431 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4432 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4433 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4434 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4435 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4436 4437 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4438 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4439 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4440 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4441 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4442 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4443 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4444 4445 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4446 /* (TD, T2, TX2) */ 4447 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4448 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4449 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4450 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4451 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4452 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4453 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4454 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4455 4456 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4457 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4458 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4459 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4460 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4461 4462 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4463 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4464 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4465 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4466 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4467 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4468 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4469 4470 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4471 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4472 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4473 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4474 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4475 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4476 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4477 4478 /* 4479 * vfwcvt.f.f.v vd, vs2, vm 4480 * Convert single-width float to double-width float. 4481 */ 4482 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4483 { 4484 return float16_to_float32(a, true, s); 4485 } 4486 4487 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4488 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4489 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4490 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4491 4492 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4493 /* (TD, T2, TX2) */ 4494 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4495 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4496 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4497 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4498 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4499 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4500 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4501 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4502 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4503 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4504 4505 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4506 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4507 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4508 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4509 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4510 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4511 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4512 4513 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4514 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4515 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4516 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4517 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4518 4519 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4520 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4521 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4522 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4523 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4524 4525 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4526 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4527 { 4528 return float32_to_float16(a, true, s); 4529 } 4530 4531 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4532 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4533 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4534 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4535 4536 /* 4537 *** Vector Reduction Operations 4538 */ 4539 /* Vector Single-Width Integer Reduction Instructions */ 4540 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4541 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4542 void *vs2, CPURISCVState *env, uint32_t desc) \ 4543 { \ 4544 uint32_t vm = vext_vm(desc); \ 4545 uint32_t vl = env->vl; \ 4546 uint32_t esz = sizeof(TD); \ 4547 uint32_t vlenb = simd_maxsz(desc); \ 4548 uint32_t vta = vext_vta(desc); \ 4549 uint32_t i; \ 4550 TD s1 = *((TD *)vs1 + HD(0)); \ 4551 \ 4552 for (i = env->vstart; i < vl; i++) { \ 4553 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4554 if (!vm && !vext_elem_mask(v0, i)) { \ 4555 continue; \ 4556 } \ 4557 s1 = OP(s1, (TD)s2); \ 4558 } \ 4559 *((TD *)vd + HD(0)) = s1; \ 4560 env->vstart = 0; \ 4561 /* set tail elements to 1s */ \ 4562 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4563 } 4564 4565 /* vd[0] = sum(vs1[0], vs2[*]) */ 4566 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4567 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4568 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4569 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4570 4571 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4572 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4573 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4574 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4575 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4576 4577 /* vd[0] = max(vs1[0], vs2[*]) */ 4578 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4579 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4580 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4581 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4582 4583 /* vd[0] = minu(vs1[0], vs2[*]) */ 4584 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4585 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4586 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4587 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4588 4589 /* vd[0] = min(vs1[0], vs2[*]) */ 4590 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4591 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4592 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4593 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4594 4595 /* vd[0] = and(vs1[0], vs2[*]) */ 4596 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4597 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4598 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4599 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4600 4601 /* vd[0] = or(vs1[0], vs2[*]) */ 4602 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4603 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4604 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4605 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4606 4607 /* vd[0] = xor(vs1[0], vs2[*]) */ 4608 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4609 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4610 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4611 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4612 4613 /* Vector Widening Integer Reduction Instructions */ 4614 /* signed sum reduction into double-width accumulator */ 4615 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4616 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4617 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4618 4619 /* Unsigned sum reduction into double-width accumulator */ 4620 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4621 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4622 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4623 4624 /* Vector Single-Width Floating-Point Reduction Instructions */ 4625 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4626 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4627 void *vs2, CPURISCVState *env, \ 4628 uint32_t desc) \ 4629 { \ 4630 uint32_t vm = vext_vm(desc); \ 4631 uint32_t vl = env->vl; \ 4632 uint32_t esz = sizeof(TD); \ 4633 uint32_t vlenb = simd_maxsz(desc); \ 4634 uint32_t vta = vext_vta(desc); \ 4635 uint32_t i; \ 4636 TD s1 = *((TD *)vs1 + HD(0)); \ 4637 \ 4638 for (i = env->vstart; i < vl; i++) { \ 4639 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4640 if (!vm && !vext_elem_mask(v0, i)) { \ 4641 continue; \ 4642 } \ 4643 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4644 } \ 4645 *((TD *)vd + HD(0)) = s1; \ 4646 env->vstart = 0; \ 4647 /* set tail elements to 1s */ \ 4648 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4649 } 4650 4651 /* Unordered sum */ 4652 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4653 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4654 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4655 4656 /* Maximum value */ 4657 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4658 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4659 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4660 4661 /* Minimum value */ 4662 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4663 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4664 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4665 4666 /* Vector Widening Floating-Point Reduction Instructions */ 4667 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4668 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4669 void *vs2, CPURISCVState *env, uint32_t desc) 4670 { 4671 uint32_t vm = vext_vm(desc); 4672 uint32_t vl = env->vl; 4673 uint32_t esz = sizeof(uint32_t); 4674 uint32_t vlenb = simd_maxsz(desc); 4675 uint32_t vta = vext_vta(desc); 4676 uint32_t i; 4677 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4678 4679 for (i = env->vstart; i < vl; i++) { 4680 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4681 if (!vm && !vext_elem_mask(v0, i)) { 4682 continue; 4683 } 4684 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4685 &env->fp_status); 4686 } 4687 *((uint32_t *)vd + H4(0)) = s1; 4688 env->vstart = 0; 4689 /* set tail elements to 1s */ 4690 vext_set_elems_1s(vd, vta, esz, vlenb); 4691 } 4692 4693 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4694 void *vs2, CPURISCVState *env, uint32_t desc) 4695 { 4696 uint32_t vm = vext_vm(desc); 4697 uint32_t vl = env->vl; 4698 uint32_t esz = sizeof(uint64_t); 4699 uint32_t vlenb = simd_maxsz(desc); 4700 uint32_t vta = vext_vta(desc); 4701 uint32_t i; 4702 uint64_t s1 = *((uint64_t *)vs1); 4703 4704 for (i = env->vstart; i < vl; i++) { 4705 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4706 if (!vm && !vext_elem_mask(v0, i)) { 4707 continue; 4708 } 4709 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4710 &env->fp_status); 4711 } 4712 *((uint64_t *)vd) = s1; 4713 env->vstart = 0; 4714 /* set tail elements to 1s */ 4715 vext_set_elems_1s(vd, vta, esz, vlenb); 4716 } 4717 4718 /* 4719 *** Vector Mask Operations 4720 */ 4721 /* Vector Mask-Register Logical Instructions */ 4722 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4723 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4724 void *vs2, CPURISCVState *env, \ 4725 uint32_t desc) \ 4726 { \ 4727 uint32_t vl = env->vl; \ 4728 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 4729 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4730 uint32_t i; \ 4731 int a, b; \ 4732 \ 4733 for (i = env->vstart; i < vl; i++) { \ 4734 a = vext_elem_mask(vs1, i); \ 4735 b = vext_elem_mask(vs2, i); \ 4736 vext_set_elem_mask(vd, i, OP(b, a)); \ 4737 } \ 4738 env->vstart = 0; \ 4739 /* mask destination register are always tail- \ 4740 * agnostic \ 4741 */ \ 4742 /* set tail elements to 1s */ \ 4743 if (vta_all_1s) { \ 4744 for (; i < total_elems; i++) { \ 4745 vext_set_elem_mask(vd, i, 1); \ 4746 } \ 4747 } \ 4748 } 4749 4750 #define DO_NAND(N, M) (!(N & M)) 4751 #define DO_ANDNOT(N, M) (N & !M) 4752 #define DO_NOR(N, M) (!(N | M)) 4753 #define DO_ORNOT(N, M) (N | !M) 4754 #define DO_XNOR(N, M) (!(N ^ M)) 4755 4756 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4757 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4758 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4759 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4760 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4761 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4762 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4763 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4764 4765 /* Vector count population in mask vcpop */ 4766 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4767 uint32_t desc) 4768 { 4769 target_ulong cnt = 0; 4770 uint32_t vm = vext_vm(desc); 4771 uint32_t vl = env->vl; 4772 int i; 4773 4774 for (i = env->vstart; i < vl; i++) { 4775 if (vm || vext_elem_mask(v0, i)) { 4776 if (vext_elem_mask(vs2, i)) { 4777 cnt++; 4778 } 4779 } 4780 } 4781 env->vstart = 0; 4782 return cnt; 4783 } 4784 4785 /* vfirst find-first-set mask bit*/ 4786 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4787 uint32_t desc) 4788 { 4789 uint32_t vm = vext_vm(desc); 4790 uint32_t vl = env->vl; 4791 int i; 4792 4793 for (i = env->vstart; i < vl; i++) { 4794 if (vm || vext_elem_mask(v0, i)) { 4795 if (vext_elem_mask(vs2, i)) { 4796 return i; 4797 } 4798 } 4799 } 4800 env->vstart = 0; 4801 return -1LL; 4802 } 4803 4804 enum set_mask_type { 4805 ONLY_FIRST = 1, 4806 INCLUDE_FIRST, 4807 BEFORE_FIRST, 4808 }; 4809 4810 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4811 uint32_t desc, enum set_mask_type type) 4812 { 4813 uint32_t vm = vext_vm(desc); 4814 uint32_t vl = env->vl; 4815 uint32_t total_elems = env_archcpu(env)->cfg.vlen; 4816 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4817 int i; 4818 bool first_mask_bit = false; 4819 4820 for (i = env->vstart; i < vl; i++) { 4821 if (!vm && !vext_elem_mask(v0, i)) { 4822 continue; 4823 } 4824 /* write a zero to all following active elements */ 4825 if (first_mask_bit) { 4826 vext_set_elem_mask(vd, i, 0); 4827 continue; 4828 } 4829 if (vext_elem_mask(vs2, i)) { 4830 first_mask_bit = true; 4831 if (type == BEFORE_FIRST) { 4832 vext_set_elem_mask(vd, i, 0); 4833 } else { 4834 vext_set_elem_mask(vd, i, 1); 4835 } 4836 } else { 4837 if (type == ONLY_FIRST) { 4838 vext_set_elem_mask(vd, i, 0); 4839 } else { 4840 vext_set_elem_mask(vd, i, 1); 4841 } 4842 } 4843 } 4844 env->vstart = 0; 4845 /* mask destination register are always tail-agnostic */ 4846 /* set tail elements to 1s */ 4847 if (vta_all_1s) { 4848 for (; i < total_elems; i++) { 4849 vext_set_elem_mask(vd, i, 1); 4850 } 4851 } 4852 } 4853 4854 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4855 uint32_t desc) 4856 { 4857 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4858 } 4859 4860 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4861 uint32_t desc) 4862 { 4863 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4864 } 4865 4866 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4867 uint32_t desc) 4868 { 4869 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4870 } 4871 4872 /* Vector Iota Instruction */ 4873 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4874 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4875 uint32_t desc) \ 4876 { \ 4877 uint32_t vm = vext_vm(desc); \ 4878 uint32_t vl = env->vl; \ 4879 uint32_t esz = sizeof(ETYPE); \ 4880 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4881 uint32_t vta = vext_vta(desc); \ 4882 uint32_t sum = 0; \ 4883 int i; \ 4884 \ 4885 for (i = env->vstart; i < vl; i++) { \ 4886 if (!vm && !vext_elem_mask(v0, i)) { \ 4887 continue; \ 4888 } \ 4889 *((ETYPE *)vd + H(i)) = sum; \ 4890 if (vext_elem_mask(vs2, i)) { \ 4891 sum++; \ 4892 } \ 4893 } \ 4894 env->vstart = 0; \ 4895 /* set tail elements to 1s */ \ 4896 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4897 } 4898 4899 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4900 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4901 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4902 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4903 4904 /* Vector Element Index Instruction */ 4905 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4906 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4907 { \ 4908 uint32_t vm = vext_vm(desc); \ 4909 uint32_t vl = env->vl; \ 4910 uint32_t esz = sizeof(ETYPE); \ 4911 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4912 uint32_t vta = vext_vta(desc); \ 4913 int i; \ 4914 \ 4915 for (i = env->vstart; i < vl; i++) { \ 4916 if (!vm && !vext_elem_mask(v0, i)) { \ 4917 continue; \ 4918 } \ 4919 *((ETYPE *)vd + H(i)) = i; \ 4920 } \ 4921 env->vstart = 0; \ 4922 /* set tail elements to 1s */ \ 4923 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4924 } 4925 4926 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4927 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4928 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4929 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4930 4931 /* 4932 *** Vector Permutation Instructions 4933 */ 4934 4935 /* Vector Slide Instructions */ 4936 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4937 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4938 CPURISCVState *env, uint32_t desc) \ 4939 { \ 4940 uint32_t vm = vext_vm(desc); \ 4941 uint32_t vl = env->vl; \ 4942 uint32_t esz = sizeof(ETYPE); \ 4943 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4944 uint32_t vta = vext_vta(desc); \ 4945 target_ulong offset = s1, i_min, i; \ 4946 \ 4947 i_min = MAX(env->vstart, offset); \ 4948 for (i = i_min; i < vl; i++) { \ 4949 if (!vm && !vext_elem_mask(v0, i)) { \ 4950 continue; \ 4951 } \ 4952 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4953 } \ 4954 /* set tail elements to 1s */ \ 4955 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4956 } 4957 4958 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4959 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4960 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4961 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4962 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4963 4964 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4965 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4966 CPURISCVState *env, uint32_t desc) \ 4967 { \ 4968 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4969 uint32_t vm = vext_vm(desc); \ 4970 uint32_t vl = env->vl; \ 4971 uint32_t esz = sizeof(ETYPE); \ 4972 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4973 uint32_t vta = vext_vta(desc); \ 4974 target_ulong i_max, i; \ 4975 \ 4976 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4977 for (i = env->vstart; i < i_max; ++i) { \ 4978 if (vm || vext_elem_mask(v0, i)) { \ 4979 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4980 } \ 4981 } \ 4982 \ 4983 for (i = i_max; i < vl; ++i) { \ 4984 if (vm || vext_elem_mask(v0, i)) { \ 4985 *((ETYPE *)vd + H(i)) = 0; \ 4986 } \ 4987 } \ 4988 \ 4989 env->vstart = 0; \ 4990 /* set tail elements to 1s */ \ 4991 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4992 } 4993 4994 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4995 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4996 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4997 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4998 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4999 5000 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5001 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 5002 void *vs2, CPURISCVState *env, uint32_t desc) \ 5003 { \ 5004 typedef uint##BITWIDTH##_t ETYPE; \ 5005 uint32_t vm = vext_vm(desc); \ 5006 uint32_t vl = env->vl; \ 5007 uint32_t esz = sizeof(ETYPE); \ 5008 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5009 uint32_t vta = vext_vta(desc); \ 5010 uint32_t i; \ 5011 \ 5012 for (i = env->vstart; i < vl; i++) { \ 5013 if (!vm && !vext_elem_mask(v0, i)) { \ 5014 continue; \ 5015 } \ 5016 if (i == 0) { \ 5017 *((ETYPE *)vd + H(i)) = s1; \ 5018 } else { \ 5019 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5020 } \ 5021 } \ 5022 env->vstart = 0; \ 5023 /* set tail elements to 1s */ \ 5024 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5025 } 5026 5027 GEN_VEXT_VSLIE1UP(8, H1) 5028 GEN_VEXT_VSLIE1UP(16, H2) 5029 GEN_VEXT_VSLIE1UP(32, H4) 5030 GEN_VEXT_VSLIE1UP(64, H8) 5031 5032 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5033 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5034 CPURISCVState *env, uint32_t desc) \ 5035 { \ 5036 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5037 } 5038 5039 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5040 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5041 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5042 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5043 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5044 5045 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5046 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 5047 void *vs2, CPURISCVState *env, uint32_t desc) \ 5048 { \ 5049 typedef uint##BITWIDTH##_t ETYPE; \ 5050 uint32_t vm = vext_vm(desc); \ 5051 uint32_t vl = env->vl; \ 5052 uint32_t esz = sizeof(ETYPE); \ 5053 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5054 uint32_t vta = vext_vta(desc); \ 5055 uint32_t i; \ 5056 \ 5057 for (i = env->vstart; i < vl; i++) { \ 5058 if (!vm && !vext_elem_mask(v0, i)) { \ 5059 continue; \ 5060 } \ 5061 if (i == vl - 1) { \ 5062 *((ETYPE *)vd + H(i)) = s1; \ 5063 } else { \ 5064 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5065 } \ 5066 } \ 5067 env->vstart = 0; \ 5068 /* set tail elements to 1s */ \ 5069 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5070 } 5071 5072 GEN_VEXT_VSLIDE1DOWN(8, H1) 5073 GEN_VEXT_VSLIDE1DOWN(16, H2) 5074 GEN_VEXT_VSLIDE1DOWN(32, H4) 5075 GEN_VEXT_VSLIDE1DOWN(64, H8) 5076 5077 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5078 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5079 CPURISCVState *env, uint32_t desc) \ 5080 { \ 5081 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5082 } 5083 5084 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5085 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5086 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5087 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5088 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5089 5090 /* Vector Floating-Point Slide Instructions */ 5091 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5092 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5093 CPURISCVState *env, uint32_t desc) \ 5094 { \ 5095 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5096 } 5097 5098 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5099 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5100 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5101 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5102 5103 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5104 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5105 CPURISCVState *env, uint32_t desc) \ 5106 { \ 5107 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5108 } 5109 5110 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5111 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5112 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5113 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5114 5115 /* Vector Register Gather Instruction */ 5116 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5117 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5118 CPURISCVState *env, uint32_t desc) \ 5119 { \ 5120 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5121 uint32_t vm = vext_vm(desc); \ 5122 uint32_t vl = env->vl; \ 5123 uint32_t esz = sizeof(TS2); \ 5124 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5125 uint32_t vta = vext_vta(desc); \ 5126 uint64_t index; \ 5127 uint32_t i; \ 5128 \ 5129 for (i = env->vstart; i < vl; i++) { \ 5130 if (!vm && !vext_elem_mask(v0, i)) { \ 5131 continue; \ 5132 } \ 5133 index = *((TS1 *)vs1 + HS1(i)); \ 5134 if (index >= vlmax) { \ 5135 *((TS2 *)vd + HS2(i)) = 0; \ 5136 } else { \ 5137 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5138 } \ 5139 } \ 5140 env->vstart = 0; \ 5141 /* set tail elements to 1s */ \ 5142 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5143 } 5144 5145 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5146 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5147 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5148 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5149 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5150 5151 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5152 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5153 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5154 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5155 5156 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5157 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5158 CPURISCVState *env, uint32_t desc) \ 5159 { \ 5160 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5161 uint32_t vm = vext_vm(desc); \ 5162 uint32_t vl = env->vl; \ 5163 uint32_t esz = sizeof(ETYPE); \ 5164 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5165 uint32_t vta = vext_vta(desc); \ 5166 uint64_t index = s1; \ 5167 uint32_t i; \ 5168 \ 5169 for (i = env->vstart; i < vl; i++) { \ 5170 if (!vm && !vext_elem_mask(v0, i)) { \ 5171 continue; \ 5172 } \ 5173 if (index >= vlmax) { \ 5174 *((ETYPE *)vd + H(i)) = 0; \ 5175 } else { \ 5176 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5177 } \ 5178 } \ 5179 env->vstart = 0; \ 5180 /* set tail elements to 1s */ \ 5181 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5182 } 5183 5184 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5185 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5186 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5187 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5188 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5189 5190 /* Vector Compress Instruction */ 5191 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5192 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5193 CPURISCVState *env, uint32_t desc) \ 5194 { \ 5195 uint32_t vl = env->vl; \ 5196 uint32_t esz = sizeof(ETYPE); \ 5197 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5198 uint32_t vta = vext_vta(desc); \ 5199 uint32_t num = 0, i; \ 5200 \ 5201 for (i = env->vstart; i < vl; i++) { \ 5202 if (!vext_elem_mask(vs1, i)) { \ 5203 continue; \ 5204 } \ 5205 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5206 num++; \ 5207 } \ 5208 env->vstart = 0; \ 5209 /* set tail elements to 1s */ \ 5210 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5211 } 5212 5213 /* Compress into vd elements of vs2 where vs1 is enabled */ 5214 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5215 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5216 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5217 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5218 5219 /* Vector Whole Register Move */ 5220 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5221 { 5222 /* EEW = SEW */ 5223 uint32_t maxsz = simd_maxsz(desc); 5224 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5225 uint32_t startb = env->vstart * sewb; 5226 uint32_t i = startb; 5227 5228 memcpy((uint8_t *)vd + H1(i), 5229 (uint8_t *)vs2 + H1(i), 5230 maxsz - startb); 5231 5232 env->vstart = 0; 5233 } 5234 5235 /* Vector Integer Extension */ 5236 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5237 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5238 CPURISCVState *env, uint32_t desc) \ 5239 { \ 5240 uint32_t vl = env->vl; \ 5241 uint32_t vm = vext_vm(desc); \ 5242 uint32_t esz = sizeof(ETYPE); \ 5243 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5244 uint32_t vta = vext_vta(desc); \ 5245 uint32_t i; \ 5246 \ 5247 for (i = env->vstart; i < vl; i++) { \ 5248 if (!vm && !vext_elem_mask(v0, i)) { \ 5249 continue; \ 5250 } \ 5251 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5252 } \ 5253 env->vstart = 0; \ 5254 /* set tail elements to 1s */ \ 5255 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5256 } 5257 5258 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5259 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5260 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5261 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5262 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5263 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5264 5265 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5266 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5267 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5268 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5269 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5270 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5271