1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 static inline uint32_t vext_vta_all_1s(uint32_t desc) 131 { 132 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 133 } 134 135 /* 136 * Get the maximum number of elements can be operated. 137 * 138 * log2_esz: log2 of element size in bytes. 139 */ 140 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 141 { 142 /* 143 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 144 * so vlen in bytes (vlenb) is encoded as maxsz. 145 */ 146 uint32_t vlenb = simd_maxsz(desc); 147 148 /* Return VLMAX */ 149 int scale = vext_lmul(desc) - log2_esz; 150 return scale < 0 ? vlenb >> -scale : vlenb << scale; 151 } 152 153 /* 154 * Get number of total elements, including prestart, body and tail elements. 155 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 156 * are held in the same vector register. 157 */ 158 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 159 uint32_t esz) 160 { 161 uint32_t vlenb = simd_maxsz(desc); 162 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 163 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 164 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 165 return (vlenb << emul) / esz; 166 } 167 168 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 169 { 170 return (addr & env->cur_pmmask) | env->cur_pmbase; 171 } 172 173 /* 174 * This function checks watchpoint before real load operation. 175 * 176 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 177 * In user mode, there is no watchpoint support now. 178 * 179 * It will trigger an exception if there is no mapping in TLB 180 * and page table walk can't fill the TLB entry. Then the guest 181 * software can return here after process the exception or never return. 182 */ 183 static void probe_pages(CPURISCVState *env, target_ulong addr, 184 target_ulong len, uintptr_t ra, 185 MMUAccessType access_type) 186 { 187 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 188 target_ulong curlen = MIN(pagelen, len); 189 190 probe_access(env, adjust_addr(env, addr), curlen, access_type, 191 cpu_mmu_index(env, false), ra); 192 if (len > curlen) { 193 addr += curlen; 194 curlen = len - curlen; 195 probe_access(env, adjust_addr(env, addr), curlen, access_type, 196 cpu_mmu_index(env, false), ra); 197 } 198 } 199 200 /* set agnostic elements to 1s */ 201 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 202 uint32_t tot) 203 { 204 if (is_agnostic == 0) { 205 /* policy undisturbed */ 206 return; 207 } 208 if (tot - cnt == 0) { 209 return ; 210 } 211 memset(base + cnt, -1, tot - cnt); 212 } 213 214 static inline void vext_set_elem_mask(void *v0, int index, 215 uint8_t value) 216 { 217 int idx = index / 64; 218 int pos = index % 64; 219 uint64_t old = ((uint64_t *)v0)[idx]; 220 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 221 } 222 223 /* 224 * Earlier designs (pre-0.9) had a varying number of bits 225 * per mask value (MLEN). In the 0.9 design, MLEN=1. 226 * (Section 4.5) 227 */ 228 static inline int vext_elem_mask(void *v0, int index) 229 { 230 int idx = index / 64; 231 int pos = index % 64; 232 return (((uint64_t *)v0)[idx] >> pos) & 1; 233 } 234 235 /* elements operations for load and store */ 236 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 237 uint32_t idx, void *vd, uintptr_t retaddr); 238 239 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 240 static void NAME(CPURISCVState *env, abi_ptr addr, \ 241 uint32_t idx, void *vd, uintptr_t retaddr)\ 242 { \ 243 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 244 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 245 } \ 246 247 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 248 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 249 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 250 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 251 252 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 253 static void NAME(CPURISCVState *env, abi_ptr addr, \ 254 uint32_t idx, void *vd, uintptr_t retaddr)\ 255 { \ 256 ETYPE data = *((ETYPE *)vd + H(idx)); \ 257 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 258 } 259 260 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 261 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 262 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 263 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 264 265 /* 266 *** stride: access vector element from strided memory 267 */ 268 static void 269 vext_ldst_stride(void *vd, void *v0, target_ulong base, 270 target_ulong stride, CPURISCVState *env, 271 uint32_t desc, uint32_t vm, 272 vext_ldst_elem_fn *ldst_elem, 273 uint32_t log2_esz, uintptr_t ra) 274 { 275 uint32_t i, k; 276 uint32_t nf = vext_nf(desc); 277 uint32_t max_elems = vext_max_elems(desc, log2_esz); 278 uint32_t esz = 1 << log2_esz; 279 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 280 uint32_t vta = vext_vta(desc); 281 282 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 283 if (!vm && !vext_elem_mask(v0, i)) { 284 continue; 285 } 286 287 k = 0; 288 while (k < nf) { 289 target_ulong addr = base + stride * i + (k << log2_esz); 290 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 291 k++; 292 } 293 } 294 env->vstart = 0; 295 /* set tail elements to 1s */ 296 for (k = 0; k < nf; ++k) { 297 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 298 (k * max_elems + max_elems) * esz); 299 } 300 if (nf * max_elems % total_elems != 0) { 301 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 302 uint32_t registers_used = 303 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 304 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 305 registers_used * vlenb); 306 } 307 } 308 309 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 310 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 311 target_ulong stride, CPURISCVState *env, \ 312 uint32_t desc) \ 313 { \ 314 uint32_t vm = vext_vm(desc); \ 315 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 316 ctzl(sizeof(ETYPE)), GETPC()); \ 317 } 318 319 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 320 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 321 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 322 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 323 324 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 326 target_ulong stride, CPURISCVState *env, \ 327 uint32_t desc) \ 328 { \ 329 uint32_t vm = vext_vm(desc); \ 330 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 331 ctzl(sizeof(ETYPE)), GETPC()); \ 332 } 333 334 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 335 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 336 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 337 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 338 339 /* 340 *** unit-stride: access elements stored contiguously in memory 341 */ 342 343 /* unmasked unit-stride load and store operation*/ 344 static void 345 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 346 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 347 uintptr_t ra) 348 { 349 uint32_t i, k; 350 uint32_t nf = vext_nf(desc); 351 uint32_t max_elems = vext_max_elems(desc, log2_esz); 352 uint32_t esz = 1 << log2_esz; 353 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 354 uint32_t vta = vext_vta(desc); 355 356 /* load bytes from guest memory */ 357 for (i = env->vstart; i < evl; i++, env->vstart++) { 358 k = 0; 359 while (k < nf) { 360 target_ulong addr = base + ((i * nf + k) << log2_esz); 361 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 362 k++; 363 } 364 } 365 env->vstart = 0; 366 /* set tail elements to 1s */ 367 for (k = 0; k < nf; ++k) { 368 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, 369 (k * max_elems + max_elems) * esz); 370 } 371 if (nf * max_elems % total_elems != 0) { 372 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 373 uint32_t registers_used = 374 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 375 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 376 registers_used * vlenb); 377 } 378 } 379 380 /* 381 * masked unit-stride load and store operation will be a special case of stride, 382 * stride = NF * sizeof (MTYPE) 383 */ 384 385 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 386 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 387 CPURISCVState *env, uint32_t desc) \ 388 { \ 389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 391 ctzl(sizeof(ETYPE)), GETPC()); \ 392 } \ 393 \ 394 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 395 CPURISCVState *env, uint32_t desc) \ 396 { \ 397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 399 } 400 401 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 402 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 403 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 404 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 405 406 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 407 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 408 CPURISCVState *env, uint32_t desc) \ 409 { \ 410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 412 ctzl(sizeof(ETYPE)), GETPC()); \ 413 } \ 414 \ 415 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 416 CPURISCVState *env, uint32_t desc) \ 417 { \ 418 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 420 } 421 422 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 423 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 424 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 425 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 426 427 /* 428 *** unit stride mask load and store, EEW = 1 429 */ 430 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 431 CPURISCVState *env, uint32_t desc) 432 { 433 /* evl = ceil(vl/8) */ 434 uint8_t evl = (env->vl + 7) >> 3; 435 vext_ldst_us(vd, base, env, desc, lde_b, 436 0, evl, GETPC()); 437 } 438 439 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, ste_b, 445 0, evl, GETPC()); 446 } 447 448 /* 449 *** index: access vector element from indexed memory 450 */ 451 typedef target_ulong vext_get_index_addr(target_ulong base, 452 uint32_t idx, void *vs2); 453 454 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 455 static target_ulong NAME(target_ulong base, \ 456 uint32_t idx, void *vs2) \ 457 { \ 458 return (base + *((ETYPE *)vs2 + H(idx))); \ 459 } 460 461 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 462 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 463 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 464 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 465 466 static inline void 467 vext_ldst_index(void *vd, void *v0, target_ulong base, 468 void *vs2, CPURISCVState *env, uint32_t desc, 469 vext_get_index_addr get_index_addr, 470 vext_ldst_elem_fn *ldst_elem, 471 uint32_t log2_esz, uintptr_t ra) 472 { 473 uint32_t i, k; 474 uint32_t nf = vext_nf(desc); 475 uint32_t vm = vext_vm(desc); 476 uint32_t max_elems = vext_max_elems(desc, log2_esz); 477 uint32_t esz = 1 << log2_esz; 478 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 479 uint32_t vta = vext_vta(desc); 480 481 /* load bytes from guest memory */ 482 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 483 if (!vm && !vext_elem_mask(v0, i)) { 484 continue; 485 } 486 487 k = 0; 488 while (k < nf) { 489 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 490 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 491 k++; 492 } 493 } 494 env->vstart = 0; 495 /* set tail elements to 1s */ 496 for (k = 0; k < nf; ++k) { 497 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 498 (k * max_elems + max_elems) * esz); 499 } 500 if (nf * max_elems % total_elems != 0) { 501 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 502 uint32_t registers_used = 503 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 504 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 505 registers_used * vlenb); 506 } 507 } 508 509 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 510 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 511 void *vs2, CPURISCVState *env, uint32_t desc) \ 512 { \ 513 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 514 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 515 } 516 517 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 518 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 519 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 520 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 521 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 522 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 523 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 524 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 525 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 526 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 527 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 528 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 529 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 530 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 531 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 532 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 533 534 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 535 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 536 void *vs2, CPURISCVState *env, uint32_t desc) \ 537 { \ 538 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 539 STORE_FN, ctzl(sizeof(ETYPE)), \ 540 GETPC()); \ 541 } 542 543 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 544 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 545 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 546 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 547 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 548 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 549 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 550 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 551 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 552 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 553 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 554 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 555 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 556 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 557 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 558 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 559 560 /* 561 *** unit-stride fault-only-fisrt load instructions 562 */ 563 static inline void 564 vext_ldff(void *vd, void *v0, target_ulong base, 565 CPURISCVState *env, uint32_t desc, 566 vext_ldst_elem_fn *ldst_elem, 567 uint32_t log2_esz, uintptr_t ra) 568 { 569 void *host; 570 uint32_t i, k, vl = 0; 571 uint32_t nf = vext_nf(desc); 572 uint32_t vm = vext_vm(desc); 573 uint32_t max_elems = vext_max_elems(desc, log2_esz); 574 uint32_t esz = 1 << log2_esz; 575 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 576 uint32_t vta = vext_vta(desc); 577 target_ulong addr, offset, remain; 578 579 /* probe every access*/ 580 for (i = env->vstart; i < env->vl; i++) { 581 if (!vm && !vext_elem_mask(v0, i)) { 582 continue; 583 } 584 addr = adjust_addr(env, base + i * (nf << log2_esz)); 585 if (i == 0) { 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 587 } else { 588 /* if it triggers an exception, no need to check watchpoint */ 589 remain = nf << log2_esz; 590 while (remain > 0) { 591 offset = -(addr | TARGET_PAGE_MASK); 592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 593 cpu_mmu_index(env, false)); 594 if (host) { 595 #ifdef CONFIG_USER_ONLY 596 if (page_check_range(addr, offset, PAGE_READ) < 0) { 597 vl = i; 598 goto ProbeSuccess; 599 } 600 #else 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 602 #endif 603 } else { 604 vl = i; 605 goto ProbeSuccess; 606 } 607 if (remain <= offset) { 608 break; 609 } 610 remain -= offset; 611 addr = adjust_addr(env, addr + offset); 612 } 613 } 614 } 615 ProbeSuccess: 616 /* load bytes from guest memory */ 617 if (vl != 0) { 618 env->vl = vl; 619 } 620 for (i = env->vstart; i < env->vl; i++) { 621 k = 0; 622 if (!vm && !vext_elem_mask(v0, i)) { 623 continue; 624 } 625 while (k < nf) { 626 target_ulong addr = base + ((i * nf + k) << log2_esz); 627 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 628 k++; 629 } 630 } 631 env->vstart = 0; 632 /* set tail elements to 1s */ 633 for (k = 0; k < nf; ++k) { 634 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 635 (k * max_elems + max_elems) * esz); 636 } 637 if (nf * max_elems % total_elems != 0) { 638 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 639 uint32_t registers_used = 640 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 641 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 642 registers_used * vlenb); 643 } 644 } 645 646 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 647 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 648 CPURISCVState *env, uint32_t desc) \ 649 { \ 650 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 651 ctzl(sizeof(ETYPE)), GETPC()); \ 652 } 653 654 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 655 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 656 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 657 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 658 659 #define DO_SWAP(N, M) (M) 660 #define DO_AND(N, M) (N & M) 661 #define DO_XOR(N, M) (N ^ M) 662 #define DO_OR(N, M) (N | M) 663 #define DO_ADD(N, M) (N + M) 664 665 /* Signed min/max */ 666 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 667 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 668 669 /* Unsigned min/max */ 670 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 671 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 672 673 /* 674 *** load and store whole register instructions 675 */ 676 static void 677 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 678 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 679 { 680 uint32_t i, k, off, pos; 681 uint32_t nf = vext_nf(desc); 682 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 683 uint32_t max_elems = vlenb >> log2_esz; 684 685 k = env->vstart / max_elems; 686 off = env->vstart % max_elems; 687 688 if (off) { 689 /* load/store rest of elements of current segment pointed by vstart */ 690 for (pos = off; pos < max_elems; pos++, env->vstart++) { 691 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 692 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 693 } 694 k++; 695 } 696 697 /* load/store elements for rest of segments */ 698 for (; k < nf; k++) { 699 for (i = 0; i < max_elems; i++, env->vstart++) { 700 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 701 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 702 } 703 } 704 705 env->vstart = 0; 706 } 707 708 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 709 void HELPER(NAME)(void *vd, target_ulong base, \ 710 CPURISCVState *env, uint32_t desc) \ 711 { \ 712 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 713 ctzl(sizeof(ETYPE)), GETPC()); \ 714 } 715 716 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 717 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 718 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 719 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 720 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 721 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 722 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 723 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 724 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 725 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 726 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 727 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 728 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 729 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 730 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 731 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 732 733 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 734 void HELPER(NAME)(void *vd, target_ulong base, \ 735 CPURISCVState *env, uint32_t desc) \ 736 { \ 737 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 738 ctzl(sizeof(ETYPE)), GETPC()); \ 739 } 740 741 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 742 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 743 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 744 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 745 746 /* 747 *** Vector Integer Arithmetic Instructions 748 */ 749 750 /* expand macro args before macro */ 751 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 752 753 /* (TD, T1, T2, TX1, TX2) */ 754 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 755 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 756 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 757 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 758 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 759 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 760 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 761 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 762 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 763 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 764 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 765 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 766 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 767 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 768 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 769 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 770 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 771 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 772 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 773 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 774 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 775 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 776 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 777 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 778 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 779 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 780 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 781 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 782 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 783 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 784 785 /* operation of two vector elements */ 786 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 787 788 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 789 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 790 { \ 791 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 792 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 793 *((TD *)vd + HD(i)) = OP(s2, s1); \ 794 } 795 #define DO_SUB(N, M) (N - M) 796 #define DO_RSUB(N, M) (M - N) 797 798 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 799 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 800 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 801 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 802 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 803 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 804 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 805 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 806 807 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 808 CPURISCVState *env, uint32_t desc, 809 opivv2_fn *fn, uint32_t esz) 810 { 811 uint32_t vm = vext_vm(desc); 812 uint32_t vl = env->vl; 813 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 814 uint32_t vta = vext_vta(desc); 815 uint32_t i; 816 817 for (i = env->vstart; i < vl; i++) { 818 if (!vm && !vext_elem_mask(v0, i)) { 819 continue; 820 } 821 fn(vd, vs1, vs2, i); 822 } 823 env->vstart = 0; 824 /* set tail elements to 1s */ 825 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 826 } 827 828 /* generate the helpers for OPIVV */ 829 #define GEN_VEXT_VV(NAME, ESZ) \ 830 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 831 void *vs2, CPURISCVState *env, \ 832 uint32_t desc) \ 833 { \ 834 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 835 do_##NAME, ESZ); \ 836 } 837 838 GEN_VEXT_VV(vadd_vv_b, 1) 839 GEN_VEXT_VV(vadd_vv_h, 2) 840 GEN_VEXT_VV(vadd_vv_w, 4) 841 GEN_VEXT_VV(vadd_vv_d, 8) 842 GEN_VEXT_VV(vsub_vv_b, 1) 843 GEN_VEXT_VV(vsub_vv_h, 2) 844 GEN_VEXT_VV(vsub_vv_w, 4) 845 GEN_VEXT_VV(vsub_vv_d, 8) 846 847 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 848 849 /* 850 * (T1)s1 gives the real operator type. 851 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 852 */ 853 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 854 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 855 { \ 856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 858 } 859 860 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 861 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 862 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 863 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 864 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 865 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 866 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 867 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 868 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 869 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 870 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 871 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 872 873 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 874 CPURISCVState *env, uint32_t desc, 875 opivx2_fn fn, uint32_t esz) 876 { 877 uint32_t vm = vext_vm(desc); 878 uint32_t vl = env->vl; 879 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 880 uint32_t vta = vext_vta(desc); 881 uint32_t i; 882 883 for (i = env->vstart; i < vl; i++) { 884 if (!vm && !vext_elem_mask(v0, i)) { 885 continue; 886 } 887 fn(vd, s1, vs2, i); 888 } 889 env->vstart = 0; 890 /* set tail elements to 1s */ 891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 892 } 893 894 /* generate the helpers for OPIVX */ 895 #define GEN_VEXT_VX(NAME, ESZ) \ 896 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 897 void *vs2, CPURISCVState *env, \ 898 uint32_t desc) \ 899 { \ 900 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 901 do_##NAME, ESZ); \ 902 } 903 904 GEN_VEXT_VX(vadd_vx_b, 1) 905 GEN_VEXT_VX(vadd_vx_h, 2) 906 GEN_VEXT_VX(vadd_vx_w, 4) 907 GEN_VEXT_VX(vadd_vx_d, 8) 908 GEN_VEXT_VX(vsub_vx_b, 1) 909 GEN_VEXT_VX(vsub_vx_h, 2) 910 GEN_VEXT_VX(vsub_vx_w, 4) 911 GEN_VEXT_VX(vsub_vx_d, 8) 912 GEN_VEXT_VX(vrsub_vx_b, 1) 913 GEN_VEXT_VX(vrsub_vx_h, 2) 914 GEN_VEXT_VX(vrsub_vx_w, 4) 915 GEN_VEXT_VX(vrsub_vx_d, 8) 916 917 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 918 { 919 intptr_t oprsz = simd_oprsz(desc); 920 intptr_t i; 921 922 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 923 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 924 } 925 } 926 927 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 928 { 929 intptr_t oprsz = simd_oprsz(desc); 930 intptr_t i; 931 932 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 933 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 934 } 935 } 936 937 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 938 { 939 intptr_t oprsz = simd_oprsz(desc); 940 intptr_t i; 941 942 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 943 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 944 } 945 } 946 947 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 948 { 949 intptr_t oprsz = simd_oprsz(desc); 950 intptr_t i; 951 952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 953 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 954 } 955 } 956 957 /* Vector Widening Integer Add/Subtract */ 958 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 959 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 960 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 961 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 962 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 963 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 964 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 965 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 966 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 967 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 968 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 969 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 970 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 971 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 972 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 973 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 974 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 975 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 976 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 977 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 978 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 979 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 980 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 981 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 982 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 983 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 984 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 985 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 986 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 987 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 988 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 989 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 990 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 991 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 992 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 993 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 994 GEN_VEXT_VV(vwaddu_vv_b, 2) 995 GEN_VEXT_VV(vwaddu_vv_h, 4) 996 GEN_VEXT_VV(vwaddu_vv_w, 8) 997 GEN_VEXT_VV(vwsubu_vv_b, 2) 998 GEN_VEXT_VV(vwsubu_vv_h, 4) 999 GEN_VEXT_VV(vwsubu_vv_w, 8) 1000 GEN_VEXT_VV(vwadd_vv_b, 2) 1001 GEN_VEXT_VV(vwadd_vv_h, 4) 1002 GEN_VEXT_VV(vwadd_vv_w, 8) 1003 GEN_VEXT_VV(vwsub_vv_b, 2) 1004 GEN_VEXT_VV(vwsub_vv_h, 4) 1005 GEN_VEXT_VV(vwsub_vv_w, 8) 1006 GEN_VEXT_VV(vwaddu_wv_b, 2) 1007 GEN_VEXT_VV(vwaddu_wv_h, 4) 1008 GEN_VEXT_VV(vwaddu_wv_w, 8) 1009 GEN_VEXT_VV(vwsubu_wv_b, 2) 1010 GEN_VEXT_VV(vwsubu_wv_h, 4) 1011 GEN_VEXT_VV(vwsubu_wv_w, 8) 1012 GEN_VEXT_VV(vwadd_wv_b, 2) 1013 GEN_VEXT_VV(vwadd_wv_h, 4) 1014 GEN_VEXT_VV(vwadd_wv_w, 8) 1015 GEN_VEXT_VV(vwsub_wv_b, 2) 1016 GEN_VEXT_VV(vwsub_wv_h, 4) 1017 GEN_VEXT_VV(vwsub_wv_w, 8) 1018 1019 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1020 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1021 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1022 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1023 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1024 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1025 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1026 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1027 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1028 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1029 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1030 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1031 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1032 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1033 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1034 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1035 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1036 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1037 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1038 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1039 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1040 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1041 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1042 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1043 GEN_VEXT_VX(vwaddu_vx_b, 2) 1044 GEN_VEXT_VX(vwaddu_vx_h, 4) 1045 GEN_VEXT_VX(vwaddu_vx_w, 8) 1046 GEN_VEXT_VX(vwsubu_vx_b, 2) 1047 GEN_VEXT_VX(vwsubu_vx_h, 4) 1048 GEN_VEXT_VX(vwsubu_vx_w, 8) 1049 GEN_VEXT_VX(vwadd_vx_b, 2) 1050 GEN_VEXT_VX(vwadd_vx_h, 4) 1051 GEN_VEXT_VX(vwadd_vx_w, 8) 1052 GEN_VEXT_VX(vwsub_vx_b, 2) 1053 GEN_VEXT_VX(vwsub_vx_h, 4) 1054 GEN_VEXT_VX(vwsub_vx_w, 8) 1055 GEN_VEXT_VX(vwaddu_wx_b, 2) 1056 GEN_VEXT_VX(vwaddu_wx_h, 4) 1057 GEN_VEXT_VX(vwaddu_wx_w, 8) 1058 GEN_VEXT_VX(vwsubu_wx_b, 2) 1059 GEN_VEXT_VX(vwsubu_wx_h, 4) 1060 GEN_VEXT_VX(vwsubu_wx_w, 8) 1061 GEN_VEXT_VX(vwadd_wx_b, 2) 1062 GEN_VEXT_VX(vwadd_wx_h, 4) 1063 GEN_VEXT_VX(vwadd_wx_w, 8) 1064 GEN_VEXT_VX(vwsub_wx_b, 2) 1065 GEN_VEXT_VX(vwsub_wx_h, 4) 1066 GEN_VEXT_VX(vwsub_wx_w, 8) 1067 1068 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1069 #define DO_VADC(N, M, C) (N + M + C) 1070 #define DO_VSBC(N, M, C) (N - M - C) 1071 1072 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1073 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1074 CPURISCVState *env, uint32_t desc) \ 1075 { \ 1076 uint32_t vl = env->vl; \ 1077 uint32_t esz = sizeof(ETYPE); \ 1078 uint32_t total_elems = \ 1079 vext_get_total_elems(env, desc, esz); \ 1080 uint32_t vta = vext_vta(desc); \ 1081 uint32_t i; \ 1082 \ 1083 for (i = env->vstart; i < vl; i++) { \ 1084 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1085 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1086 ETYPE carry = vext_elem_mask(v0, i); \ 1087 \ 1088 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1089 } \ 1090 env->vstart = 0; \ 1091 /* set tail elements to 1s */ \ 1092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1093 } 1094 1095 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1096 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1097 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1098 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1099 1100 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1101 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1102 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1103 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1104 1105 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1106 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1107 CPURISCVState *env, uint32_t desc) \ 1108 { \ 1109 uint32_t vl = env->vl; \ 1110 uint32_t esz = sizeof(ETYPE); \ 1111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1112 uint32_t vta = vext_vta(desc); \ 1113 uint32_t i; \ 1114 \ 1115 for (i = env->vstart; i < vl; i++) { \ 1116 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1117 ETYPE carry = vext_elem_mask(v0, i); \ 1118 \ 1119 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1120 } \ 1121 env->vstart = 0; \ 1122 /* set tail elements to 1s */ \ 1123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1124 } 1125 1126 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1127 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1128 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1129 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1130 1131 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1132 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1133 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1134 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1135 1136 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1137 (__typeof(N))(N + M) < N) 1138 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1139 1140 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1141 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1142 CPURISCVState *env, uint32_t desc) \ 1143 { \ 1144 uint32_t vl = env->vl; \ 1145 uint32_t vm = vext_vm(desc); \ 1146 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1147 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1148 uint32_t i; \ 1149 \ 1150 for (i = env->vstart; i < vl; i++) { \ 1151 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1153 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1154 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1155 } \ 1156 env->vstart = 0; \ 1157 /* mask destination register are always tail-agnostic */ \ 1158 /* set tail elements to 1s */ \ 1159 if (vta_all_1s) { \ 1160 for (; i < total_elems; i++) { \ 1161 vext_set_elem_mask(vd, i, 1); \ 1162 } \ 1163 } \ 1164 } 1165 1166 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1167 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1168 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1169 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1170 1171 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1172 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1173 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1174 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1175 1176 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1177 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1178 void *vs2, CPURISCVState *env, uint32_t desc) \ 1179 { \ 1180 uint32_t vl = env->vl; \ 1181 uint32_t vm = vext_vm(desc); \ 1182 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1183 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1184 uint32_t i; \ 1185 \ 1186 for (i = env->vstart; i < vl; i++) { \ 1187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1188 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1189 vext_set_elem_mask(vd, i, \ 1190 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1191 } \ 1192 env->vstart = 0; \ 1193 /* mask destination register are always tail-agnostic */ \ 1194 /* set tail elements to 1s */ \ 1195 if (vta_all_1s) { \ 1196 for (; i < total_elems; i++) { \ 1197 vext_set_elem_mask(vd, i, 1); \ 1198 } \ 1199 } \ 1200 } 1201 1202 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1203 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1204 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1205 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1206 1207 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1208 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1209 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1210 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1211 1212 /* Vector Bitwise Logical Instructions */ 1213 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1214 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1215 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1216 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1217 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1218 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1219 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1220 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1221 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1222 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1223 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1224 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1225 GEN_VEXT_VV(vand_vv_b, 1) 1226 GEN_VEXT_VV(vand_vv_h, 2) 1227 GEN_VEXT_VV(vand_vv_w, 4) 1228 GEN_VEXT_VV(vand_vv_d, 8) 1229 GEN_VEXT_VV(vor_vv_b, 1) 1230 GEN_VEXT_VV(vor_vv_h, 2) 1231 GEN_VEXT_VV(vor_vv_w, 4) 1232 GEN_VEXT_VV(vor_vv_d, 8) 1233 GEN_VEXT_VV(vxor_vv_b, 1) 1234 GEN_VEXT_VV(vxor_vv_h, 2) 1235 GEN_VEXT_VV(vxor_vv_w, 4) 1236 GEN_VEXT_VV(vxor_vv_d, 8) 1237 1238 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1239 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1240 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1241 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1242 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1243 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1244 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1245 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1246 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1247 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1248 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1249 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1250 GEN_VEXT_VX(vand_vx_b, 1) 1251 GEN_VEXT_VX(vand_vx_h, 2) 1252 GEN_VEXT_VX(vand_vx_w, 4) 1253 GEN_VEXT_VX(vand_vx_d, 8) 1254 GEN_VEXT_VX(vor_vx_b, 1) 1255 GEN_VEXT_VX(vor_vx_h, 2) 1256 GEN_VEXT_VX(vor_vx_w, 4) 1257 GEN_VEXT_VX(vor_vx_d, 8) 1258 GEN_VEXT_VX(vxor_vx_b, 1) 1259 GEN_VEXT_VX(vxor_vx_h, 2) 1260 GEN_VEXT_VX(vxor_vx_w, 4) 1261 GEN_VEXT_VX(vxor_vx_d, 8) 1262 1263 /* Vector Single-Width Bit Shift Instructions */ 1264 #define DO_SLL(N, M) (N << (M)) 1265 #define DO_SRL(N, M) (N >> (M)) 1266 1267 /* generate the helpers for shift instructions with two vector operators */ 1268 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1269 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1270 void *vs2, CPURISCVState *env, uint32_t desc) \ 1271 { \ 1272 uint32_t vm = vext_vm(desc); \ 1273 uint32_t vl = env->vl; \ 1274 uint32_t esz = sizeof(TS1); \ 1275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1276 uint32_t vta = vext_vta(desc); \ 1277 uint32_t i; \ 1278 \ 1279 for (i = env->vstart; i < vl; i++) { \ 1280 if (!vm && !vext_elem_mask(v0, i)) { \ 1281 continue; \ 1282 } \ 1283 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1284 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1285 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1286 } \ 1287 env->vstart = 0; \ 1288 /* set tail elements to 1s */ \ 1289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1290 } 1291 1292 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1293 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1294 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1295 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1296 1297 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1298 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1299 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1300 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1301 1302 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1303 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1304 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1305 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1306 1307 /* generate the helpers for shift instructions with one vector and one scalar */ 1308 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1309 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1310 void *vs2, CPURISCVState *env, uint32_t desc) \ 1311 { \ 1312 uint32_t vm = vext_vm(desc); \ 1313 uint32_t vl = env->vl; \ 1314 uint32_t esz = sizeof(TD); \ 1315 uint32_t total_elems = \ 1316 vext_get_total_elems(env, desc, esz); \ 1317 uint32_t vta = vext_vta(desc); \ 1318 uint32_t i; \ 1319 \ 1320 for (i = env->vstart; i < vl; i++) { \ 1321 if (!vm && !vext_elem_mask(v0, i)) { \ 1322 continue; \ 1323 } \ 1324 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1325 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1326 } \ 1327 env->vstart = 0; \ 1328 /* set tail elements to 1s */ \ 1329 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1330 } 1331 1332 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1333 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1334 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1335 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1336 1337 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1338 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1339 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1340 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1341 1342 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1343 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1344 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1345 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1346 1347 /* Vector Narrowing Integer Right Shift Instructions */ 1348 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1349 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1350 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1351 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1352 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1353 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1354 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1355 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1356 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1357 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1358 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1359 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1360 1361 /* Vector Integer Comparison Instructions */ 1362 #define DO_MSEQ(N, M) (N == M) 1363 #define DO_MSNE(N, M) (N != M) 1364 #define DO_MSLT(N, M) (N < M) 1365 #define DO_MSLE(N, M) (N <= M) 1366 #define DO_MSGT(N, M) (N > M) 1367 1368 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1369 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1370 CPURISCVState *env, uint32_t desc) \ 1371 { \ 1372 uint32_t vm = vext_vm(desc); \ 1373 uint32_t vl = env->vl; \ 1374 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1375 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1376 uint32_t i; \ 1377 \ 1378 for (i = env->vstart; i < vl; i++) { \ 1379 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1380 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1381 if (!vm && !vext_elem_mask(v0, i)) { \ 1382 continue; \ 1383 } \ 1384 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1385 } \ 1386 env->vstart = 0; \ 1387 /* mask destination register are always tail-agnostic */ \ 1388 /* set tail elements to 1s */ \ 1389 if (vta_all_1s) { \ 1390 for (; i < total_elems; i++) { \ 1391 vext_set_elem_mask(vd, i, 1); \ 1392 } \ 1393 } \ 1394 } 1395 1396 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1397 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1398 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1399 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1400 1401 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1402 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1403 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1404 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1405 1406 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1407 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1408 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1409 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1410 1411 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1412 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1413 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1414 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1415 1416 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1417 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1418 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1419 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1420 1421 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1422 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1423 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1424 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1425 1426 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1427 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1428 CPURISCVState *env, uint32_t desc) \ 1429 { \ 1430 uint32_t vm = vext_vm(desc); \ 1431 uint32_t vl = env->vl; \ 1432 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1433 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1434 uint32_t i; \ 1435 \ 1436 for (i = env->vstart; i < vl; i++) { \ 1437 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1438 if (!vm && !vext_elem_mask(v0, i)) { \ 1439 continue; \ 1440 } \ 1441 vext_set_elem_mask(vd, i, \ 1442 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1443 } \ 1444 env->vstart = 0; \ 1445 /* mask destination register are always tail-agnostic */ \ 1446 /* set tail elements to 1s */ \ 1447 if (vta_all_1s) { \ 1448 for (; i < total_elems; i++) { \ 1449 vext_set_elem_mask(vd, i, 1); \ 1450 } \ 1451 } \ 1452 } 1453 1454 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1455 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1456 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1457 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1458 1459 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1460 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1461 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1462 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1463 1464 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1465 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1466 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1467 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1468 1469 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1470 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1471 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1472 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1473 1474 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1475 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1476 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1477 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1478 1479 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1480 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1481 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1482 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1483 1484 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1485 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1486 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1487 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1488 1489 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1490 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1491 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1492 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1493 1494 /* Vector Integer Min/Max Instructions */ 1495 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1496 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1497 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1498 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1499 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1500 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1501 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1502 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1503 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1504 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1505 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1506 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1507 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1508 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1509 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1510 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1511 GEN_VEXT_VV(vminu_vv_b, 1) 1512 GEN_VEXT_VV(vminu_vv_h, 2) 1513 GEN_VEXT_VV(vminu_vv_w, 4) 1514 GEN_VEXT_VV(vminu_vv_d, 8) 1515 GEN_VEXT_VV(vmin_vv_b, 1) 1516 GEN_VEXT_VV(vmin_vv_h, 2) 1517 GEN_VEXT_VV(vmin_vv_w, 4) 1518 GEN_VEXT_VV(vmin_vv_d, 8) 1519 GEN_VEXT_VV(vmaxu_vv_b, 1) 1520 GEN_VEXT_VV(vmaxu_vv_h, 2) 1521 GEN_VEXT_VV(vmaxu_vv_w, 4) 1522 GEN_VEXT_VV(vmaxu_vv_d, 8) 1523 GEN_VEXT_VV(vmax_vv_b, 1) 1524 GEN_VEXT_VV(vmax_vv_h, 2) 1525 GEN_VEXT_VV(vmax_vv_w, 4) 1526 GEN_VEXT_VV(vmax_vv_d, 8) 1527 1528 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1529 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1530 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1531 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1532 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1533 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1534 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1535 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1536 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1537 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1538 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1539 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1540 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1541 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1542 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1543 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1544 GEN_VEXT_VX(vminu_vx_b, 1) 1545 GEN_VEXT_VX(vminu_vx_h, 2) 1546 GEN_VEXT_VX(vminu_vx_w, 4) 1547 GEN_VEXT_VX(vminu_vx_d, 8) 1548 GEN_VEXT_VX(vmin_vx_b, 1) 1549 GEN_VEXT_VX(vmin_vx_h, 2) 1550 GEN_VEXT_VX(vmin_vx_w, 4) 1551 GEN_VEXT_VX(vmin_vx_d, 8) 1552 GEN_VEXT_VX(vmaxu_vx_b, 1) 1553 GEN_VEXT_VX(vmaxu_vx_h, 2) 1554 GEN_VEXT_VX(vmaxu_vx_w, 4) 1555 GEN_VEXT_VX(vmaxu_vx_d, 8) 1556 GEN_VEXT_VX(vmax_vx_b, 1) 1557 GEN_VEXT_VX(vmax_vx_h, 2) 1558 GEN_VEXT_VX(vmax_vx_w, 4) 1559 GEN_VEXT_VX(vmax_vx_d, 8) 1560 1561 /* Vector Single-Width Integer Multiply Instructions */ 1562 #define DO_MUL(N, M) (N * M) 1563 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1564 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1565 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1566 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1567 GEN_VEXT_VV(vmul_vv_b, 1) 1568 GEN_VEXT_VV(vmul_vv_h, 2) 1569 GEN_VEXT_VV(vmul_vv_w, 4) 1570 GEN_VEXT_VV(vmul_vv_d, 8) 1571 1572 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1573 { 1574 return (int16_t)s2 * (int16_t)s1 >> 8; 1575 } 1576 1577 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1578 { 1579 return (int32_t)s2 * (int32_t)s1 >> 16; 1580 } 1581 1582 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1583 { 1584 return (int64_t)s2 * (int64_t)s1 >> 32; 1585 } 1586 1587 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1588 { 1589 uint64_t hi_64, lo_64; 1590 1591 muls64(&lo_64, &hi_64, s1, s2); 1592 return hi_64; 1593 } 1594 1595 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1596 { 1597 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1598 } 1599 1600 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1601 { 1602 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1603 } 1604 1605 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1606 { 1607 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1608 } 1609 1610 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1611 { 1612 uint64_t hi_64, lo_64; 1613 1614 mulu64(&lo_64, &hi_64, s2, s1); 1615 return hi_64; 1616 } 1617 1618 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1619 { 1620 return (int16_t)s2 * (uint16_t)s1 >> 8; 1621 } 1622 1623 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1624 { 1625 return (int32_t)s2 * (uint32_t)s1 >> 16; 1626 } 1627 1628 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1629 { 1630 return (int64_t)s2 * (uint64_t)s1 >> 32; 1631 } 1632 1633 /* 1634 * Let A = signed operand, 1635 * B = unsigned operand 1636 * P = mulu64(A, B), unsigned product 1637 * 1638 * LET X = 2 ** 64 - A, 2's complement of A 1639 * SP = signed product 1640 * THEN 1641 * IF A < 0 1642 * SP = -X * B 1643 * = -(2 ** 64 - A) * B 1644 * = A * B - 2 ** 64 * B 1645 * = P - 2 ** 64 * B 1646 * ELSE 1647 * SP = P 1648 * THEN 1649 * HI_P -= (A < 0 ? B : 0) 1650 */ 1651 1652 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1653 { 1654 uint64_t hi_64, lo_64; 1655 1656 mulu64(&lo_64, &hi_64, s2, s1); 1657 1658 hi_64 -= s2 < 0 ? s1 : 0; 1659 return hi_64; 1660 } 1661 1662 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1663 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1664 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1665 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1666 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1667 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1668 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1669 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1670 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1671 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1672 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1673 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1674 GEN_VEXT_VV(vmulh_vv_b, 1) 1675 GEN_VEXT_VV(vmulh_vv_h, 2) 1676 GEN_VEXT_VV(vmulh_vv_w, 4) 1677 GEN_VEXT_VV(vmulh_vv_d, 8) 1678 GEN_VEXT_VV(vmulhu_vv_b, 1) 1679 GEN_VEXT_VV(vmulhu_vv_h, 2) 1680 GEN_VEXT_VV(vmulhu_vv_w, 4) 1681 GEN_VEXT_VV(vmulhu_vv_d, 8) 1682 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1683 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1684 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1685 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1686 1687 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1688 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1689 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1690 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1691 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1692 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1693 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1694 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1695 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1696 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1697 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1698 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1699 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1700 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1701 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1702 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1703 GEN_VEXT_VX(vmul_vx_b, 1) 1704 GEN_VEXT_VX(vmul_vx_h, 2) 1705 GEN_VEXT_VX(vmul_vx_w, 4) 1706 GEN_VEXT_VX(vmul_vx_d, 8) 1707 GEN_VEXT_VX(vmulh_vx_b, 1) 1708 GEN_VEXT_VX(vmulh_vx_h, 2) 1709 GEN_VEXT_VX(vmulh_vx_w, 4) 1710 GEN_VEXT_VX(vmulh_vx_d, 8) 1711 GEN_VEXT_VX(vmulhu_vx_b, 1) 1712 GEN_VEXT_VX(vmulhu_vx_h, 2) 1713 GEN_VEXT_VX(vmulhu_vx_w, 4) 1714 GEN_VEXT_VX(vmulhu_vx_d, 8) 1715 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1716 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1717 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1718 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1719 1720 /* Vector Integer Divide Instructions */ 1721 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1722 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1723 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1724 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1725 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1726 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1727 1728 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1729 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1730 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1731 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1732 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1733 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1734 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1735 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1736 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1737 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1738 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1739 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1740 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1741 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1742 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1743 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1744 GEN_VEXT_VV(vdivu_vv_b, 1) 1745 GEN_VEXT_VV(vdivu_vv_h, 2) 1746 GEN_VEXT_VV(vdivu_vv_w, 4) 1747 GEN_VEXT_VV(vdivu_vv_d, 8) 1748 GEN_VEXT_VV(vdiv_vv_b, 1) 1749 GEN_VEXT_VV(vdiv_vv_h, 2) 1750 GEN_VEXT_VV(vdiv_vv_w, 4) 1751 GEN_VEXT_VV(vdiv_vv_d, 8) 1752 GEN_VEXT_VV(vremu_vv_b, 1) 1753 GEN_VEXT_VV(vremu_vv_h, 2) 1754 GEN_VEXT_VV(vremu_vv_w, 4) 1755 GEN_VEXT_VV(vremu_vv_d, 8) 1756 GEN_VEXT_VV(vrem_vv_b, 1) 1757 GEN_VEXT_VV(vrem_vv_h, 2) 1758 GEN_VEXT_VV(vrem_vv_w, 4) 1759 GEN_VEXT_VV(vrem_vv_d, 8) 1760 1761 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1762 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1763 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1764 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1765 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1766 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1767 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1768 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1769 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1770 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1771 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1772 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1773 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1774 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1775 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1776 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1777 GEN_VEXT_VX(vdivu_vx_b, 1) 1778 GEN_VEXT_VX(vdivu_vx_h, 2) 1779 GEN_VEXT_VX(vdivu_vx_w, 4) 1780 GEN_VEXT_VX(vdivu_vx_d, 8) 1781 GEN_VEXT_VX(vdiv_vx_b, 1) 1782 GEN_VEXT_VX(vdiv_vx_h, 2) 1783 GEN_VEXT_VX(vdiv_vx_w, 4) 1784 GEN_VEXT_VX(vdiv_vx_d, 8) 1785 GEN_VEXT_VX(vremu_vx_b, 1) 1786 GEN_VEXT_VX(vremu_vx_h, 2) 1787 GEN_VEXT_VX(vremu_vx_w, 4) 1788 GEN_VEXT_VX(vremu_vx_d, 8) 1789 GEN_VEXT_VX(vrem_vx_b, 1) 1790 GEN_VEXT_VX(vrem_vx_h, 2) 1791 GEN_VEXT_VX(vrem_vx_w, 4) 1792 GEN_VEXT_VX(vrem_vx_d, 8) 1793 1794 /* Vector Widening Integer Multiply Instructions */ 1795 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1796 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1797 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1798 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1799 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1800 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1801 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1802 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1803 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1804 GEN_VEXT_VV(vwmul_vv_b, 2) 1805 GEN_VEXT_VV(vwmul_vv_h, 4) 1806 GEN_VEXT_VV(vwmul_vv_w, 8) 1807 GEN_VEXT_VV(vwmulu_vv_b, 2) 1808 GEN_VEXT_VV(vwmulu_vv_h, 4) 1809 GEN_VEXT_VV(vwmulu_vv_w, 8) 1810 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1811 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1812 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1813 1814 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1815 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1816 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1817 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1818 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1819 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1820 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1821 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1822 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1823 GEN_VEXT_VX(vwmul_vx_b, 2) 1824 GEN_VEXT_VX(vwmul_vx_h, 4) 1825 GEN_VEXT_VX(vwmul_vx_w, 8) 1826 GEN_VEXT_VX(vwmulu_vx_b, 2) 1827 GEN_VEXT_VX(vwmulu_vx_h, 4) 1828 GEN_VEXT_VX(vwmulu_vx_w, 8) 1829 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1830 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1831 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1832 1833 /* Vector Single-Width Integer Multiply-Add Instructions */ 1834 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1835 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1836 { \ 1837 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1839 TD d = *((TD *)vd + HD(i)); \ 1840 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1841 } 1842 1843 #define DO_MACC(N, M, D) (M * N + D) 1844 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1845 #define DO_MADD(N, M, D) (M * D + N) 1846 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1847 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1848 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1849 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1850 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1851 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1852 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1853 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1854 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1855 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1856 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1857 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1858 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1859 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1860 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1861 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1862 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1863 GEN_VEXT_VV(vmacc_vv_b, 1) 1864 GEN_VEXT_VV(vmacc_vv_h, 2) 1865 GEN_VEXT_VV(vmacc_vv_w, 4) 1866 GEN_VEXT_VV(vmacc_vv_d, 8) 1867 GEN_VEXT_VV(vnmsac_vv_b, 1) 1868 GEN_VEXT_VV(vnmsac_vv_h, 2) 1869 GEN_VEXT_VV(vnmsac_vv_w, 4) 1870 GEN_VEXT_VV(vnmsac_vv_d, 8) 1871 GEN_VEXT_VV(vmadd_vv_b, 1) 1872 GEN_VEXT_VV(vmadd_vv_h, 2) 1873 GEN_VEXT_VV(vmadd_vv_w, 4) 1874 GEN_VEXT_VV(vmadd_vv_d, 8) 1875 GEN_VEXT_VV(vnmsub_vv_b, 1) 1876 GEN_VEXT_VV(vnmsub_vv_h, 2) 1877 GEN_VEXT_VV(vnmsub_vv_w, 4) 1878 GEN_VEXT_VV(vnmsub_vv_d, 8) 1879 1880 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1881 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1882 { \ 1883 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1884 TD d = *((TD *)vd + HD(i)); \ 1885 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1886 } 1887 1888 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1889 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1890 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1891 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1892 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1893 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1894 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1895 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1896 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1897 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1898 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1899 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1900 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1901 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1902 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1903 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1904 GEN_VEXT_VX(vmacc_vx_b, 1) 1905 GEN_VEXT_VX(vmacc_vx_h, 2) 1906 GEN_VEXT_VX(vmacc_vx_w, 4) 1907 GEN_VEXT_VX(vmacc_vx_d, 8) 1908 GEN_VEXT_VX(vnmsac_vx_b, 1) 1909 GEN_VEXT_VX(vnmsac_vx_h, 2) 1910 GEN_VEXT_VX(vnmsac_vx_w, 4) 1911 GEN_VEXT_VX(vnmsac_vx_d, 8) 1912 GEN_VEXT_VX(vmadd_vx_b, 1) 1913 GEN_VEXT_VX(vmadd_vx_h, 2) 1914 GEN_VEXT_VX(vmadd_vx_w, 4) 1915 GEN_VEXT_VX(vmadd_vx_d, 8) 1916 GEN_VEXT_VX(vnmsub_vx_b, 1) 1917 GEN_VEXT_VX(vnmsub_vx_h, 2) 1918 GEN_VEXT_VX(vnmsub_vx_w, 4) 1919 GEN_VEXT_VX(vnmsub_vx_d, 8) 1920 1921 /* Vector Widening Integer Multiply-Add Instructions */ 1922 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1923 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1924 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1925 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1926 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1927 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1928 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1929 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1930 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1931 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1932 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1933 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1934 GEN_VEXT_VV(vwmacc_vv_b, 2) 1935 GEN_VEXT_VV(vwmacc_vv_h, 4) 1936 GEN_VEXT_VV(vwmacc_vv_w, 8) 1937 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1938 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1939 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1940 1941 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1942 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1943 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1944 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1945 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1946 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1947 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1948 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1949 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1950 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1951 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1952 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1953 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1954 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1955 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1956 GEN_VEXT_VX(vwmacc_vx_b, 2) 1957 GEN_VEXT_VX(vwmacc_vx_h, 4) 1958 GEN_VEXT_VX(vwmacc_vx_w, 8) 1959 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1960 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1961 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1962 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1963 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1964 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1965 1966 /* Vector Integer Merge and Move Instructions */ 1967 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1968 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1969 uint32_t desc) \ 1970 { \ 1971 uint32_t vl = env->vl; \ 1972 uint32_t i; \ 1973 \ 1974 for (i = env->vstart; i < vl; i++) { \ 1975 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1976 *((ETYPE *)vd + H(i)) = s1; \ 1977 } \ 1978 env->vstart = 0; \ 1979 } 1980 1981 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1982 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1983 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1984 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1985 1986 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1987 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1988 uint32_t desc) \ 1989 { \ 1990 uint32_t vl = env->vl; \ 1991 uint32_t i; \ 1992 \ 1993 for (i = env->vstart; i < vl; i++) { \ 1994 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1995 } \ 1996 env->vstart = 0; \ 1997 } 1998 1999 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2000 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2001 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2002 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2003 2004 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2005 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2006 CPURISCVState *env, uint32_t desc) \ 2007 { \ 2008 uint32_t vl = env->vl; \ 2009 uint32_t i; \ 2010 \ 2011 for (i = env->vstart; i < vl; i++) { \ 2012 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2013 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2014 } \ 2015 env->vstart = 0; \ 2016 } 2017 2018 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2019 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2020 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2021 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2022 2023 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2025 void *vs2, CPURISCVState *env, uint32_t desc) \ 2026 { \ 2027 uint32_t vl = env->vl; \ 2028 uint32_t i; \ 2029 \ 2030 for (i = env->vstart; i < vl; i++) { \ 2031 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2032 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2033 (ETYPE)(target_long)s1); \ 2034 *((ETYPE *)vd + H(i)) = d; \ 2035 } \ 2036 env->vstart = 0; \ 2037 } 2038 2039 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2040 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2041 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2042 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2043 2044 /* 2045 *** Vector Fixed-Point Arithmetic Instructions 2046 */ 2047 2048 /* Vector Single-Width Saturating Add and Subtract */ 2049 2050 /* 2051 * As fixed point instructions probably have round mode and saturation, 2052 * define common macros for fixed point here. 2053 */ 2054 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2055 CPURISCVState *env, int vxrm); 2056 2057 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2058 static inline void \ 2059 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2060 CPURISCVState *env, int vxrm) \ 2061 { \ 2062 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2063 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2064 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2065 } 2066 2067 static inline void 2068 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2069 CPURISCVState *env, 2070 uint32_t vl, uint32_t vm, int vxrm, 2071 opivv2_rm_fn *fn) 2072 { 2073 for (uint32_t i = env->vstart; i < vl; i++) { 2074 if (!vm && !vext_elem_mask(v0, i)) { 2075 continue; 2076 } 2077 fn(vd, vs1, vs2, i, env, vxrm); 2078 } 2079 env->vstart = 0; 2080 } 2081 2082 static inline void 2083 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2084 CPURISCVState *env, 2085 uint32_t desc, 2086 opivv2_rm_fn *fn) 2087 { 2088 uint32_t vm = vext_vm(desc); 2089 uint32_t vl = env->vl; 2090 2091 switch (env->vxrm) { 2092 case 0: /* rnu */ 2093 vext_vv_rm_1(vd, v0, vs1, vs2, 2094 env, vl, vm, 0, fn); 2095 break; 2096 case 1: /* rne */ 2097 vext_vv_rm_1(vd, v0, vs1, vs2, 2098 env, vl, vm, 1, fn); 2099 break; 2100 case 2: /* rdn */ 2101 vext_vv_rm_1(vd, v0, vs1, vs2, 2102 env, vl, vm, 2, fn); 2103 break; 2104 default: /* rod */ 2105 vext_vv_rm_1(vd, v0, vs1, vs2, 2106 env, vl, vm, 3, fn); 2107 break; 2108 } 2109 } 2110 2111 /* generate helpers for fixed point instructions with OPIVV format */ 2112 #define GEN_VEXT_VV_RM(NAME) \ 2113 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2114 CPURISCVState *env, uint32_t desc) \ 2115 { \ 2116 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2117 do_##NAME); \ 2118 } 2119 2120 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2121 { 2122 uint8_t res = a + b; 2123 if (res < a) { 2124 res = UINT8_MAX; 2125 env->vxsat = 0x1; 2126 } 2127 return res; 2128 } 2129 2130 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2131 uint16_t b) 2132 { 2133 uint16_t res = a + b; 2134 if (res < a) { 2135 res = UINT16_MAX; 2136 env->vxsat = 0x1; 2137 } 2138 return res; 2139 } 2140 2141 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2142 uint32_t b) 2143 { 2144 uint32_t res = a + b; 2145 if (res < a) { 2146 res = UINT32_MAX; 2147 env->vxsat = 0x1; 2148 } 2149 return res; 2150 } 2151 2152 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2153 uint64_t b) 2154 { 2155 uint64_t res = a + b; 2156 if (res < a) { 2157 res = UINT64_MAX; 2158 env->vxsat = 0x1; 2159 } 2160 return res; 2161 } 2162 2163 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2164 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2165 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2166 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2167 GEN_VEXT_VV_RM(vsaddu_vv_b) 2168 GEN_VEXT_VV_RM(vsaddu_vv_h) 2169 GEN_VEXT_VV_RM(vsaddu_vv_w) 2170 GEN_VEXT_VV_RM(vsaddu_vv_d) 2171 2172 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2173 CPURISCVState *env, int vxrm); 2174 2175 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2176 static inline void \ 2177 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2178 CPURISCVState *env, int vxrm) \ 2179 { \ 2180 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2181 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2182 } 2183 2184 static inline void 2185 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2186 CPURISCVState *env, 2187 uint32_t vl, uint32_t vm, int vxrm, 2188 opivx2_rm_fn *fn) 2189 { 2190 for (uint32_t i = env->vstart; i < vl; i++) { 2191 if (!vm && !vext_elem_mask(v0, i)) { 2192 continue; 2193 } 2194 fn(vd, s1, vs2, i, env, vxrm); 2195 } 2196 env->vstart = 0; 2197 } 2198 2199 static inline void 2200 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2201 CPURISCVState *env, 2202 uint32_t desc, 2203 opivx2_rm_fn *fn) 2204 { 2205 uint32_t vm = vext_vm(desc); 2206 uint32_t vl = env->vl; 2207 2208 switch (env->vxrm) { 2209 case 0: /* rnu */ 2210 vext_vx_rm_1(vd, v0, s1, vs2, 2211 env, vl, vm, 0, fn); 2212 break; 2213 case 1: /* rne */ 2214 vext_vx_rm_1(vd, v0, s1, vs2, 2215 env, vl, vm, 1, fn); 2216 break; 2217 case 2: /* rdn */ 2218 vext_vx_rm_1(vd, v0, s1, vs2, 2219 env, vl, vm, 2, fn); 2220 break; 2221 default: /* rod */ 2222 vext_vx_rm_1(vd, v0, s1, vs2, 2223 env, vl, vm, 3, fn); 2224 break; 2225 } 2226 } 2227 2228 /* generate helpers for fixed point instructions with OPIVX format */ 2229 #define GEN_VEXT_VX_RM(NAME) \ 2230 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2231 void *vs2, CPURISCVState *env, uint32_t desc) \ 2232 { \ 2233 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2234 do_##NAME); \ 2235 } 2236 2237 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2238 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2239 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2240 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2241 GEN_VEXT_VX_RM(vsaddu_vx_b) 2242 GEN_VEXT_VX_RM(vsaddu_vx_h) 2243 GEN_VEXT_VX_RM(vsaddu_vx_w) 2244 GEN_VEXT_VX_RM(vsaddu_vx_d) 2245 2246 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2247 { 2248 int8_t res = a + b; 2249 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2250 res = a > 0 ? INT8_MAX : INT8_MIN; 2251 env->vxsat = 0x1; 2252 } 2253 return res; 2254 } 2255 2256 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2257 { 2258 int16_t res = a + b; 2259 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2260 res = a > 0 ? INT16_MAX : INT16_MIN; 2261 env->vxsat = 0x1; 2262 } 2263 return res; 2264 } 2265 2266 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2267 { 2268 int32_t res = a + b; 2269 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2270 res = a > 0 ? INT32_MAX : INT32_MIN; 2271 env->vxsat = 0x1; 2272 } 2273 return res; 2274 } 2275 2276 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2277 { 2278 int64_t res = a + b; 2279 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2280 res = a > 0 ? INT64_MAX : INT64_MIN; 2281 env->vxsat = 0x1; 2282 } 2283 return res; 2284 } 2285 2286 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2287 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2288 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2289 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2290 GEN_VEXT_VV_RM(vsadd_vv_b) 2291 GEN_VEXT_VV_RM(vsadd_vv_h) 2292 GEN_VEXT_VV_RM(vsadd_vv_w) 2293 GEN_VEXT_VV_RM(vsadd_vv_d) 2294 2295 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2296 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2297 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2298 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2299 GEN_VEXT_VX_RM(vsadd_vx_b) 2300 GEN_VEXT_VX_RM(vsadd_vx_h) 2301 GEN_VEXT_VX_RM(vsadd_vx_w) 2302 GEN_VEXT_VX_RM(vsadd_vx_d) 2303 2304 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2305 { 2306 uint8_t res = a - b; 2307 if (res > a) { 2308 res = 0; 2309 env->vxsat = 0x1; 2310 } 2311 return res; 2312 } 2313 2314 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2315 uint16_t b) 2316 { 2317 uint16_t res = a - b; 2318 if (res > a) { 2319 res = 0; 2320 env->vxsat = 0x1; 2321 } 2322 return res; 2323 } 2324 2325 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2326 uint32_t b) 2327 { 2328 uint32_t res = a - b; 2329 if (res > a) { 2330 res = 0; 2331 env->vxsat = 0x1; 2332 } 2333 return res; 2334 } 2335 2336 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2337 uint64_t b) 2338 { 2339 uint64_t res = a - b; 2340 if (res > a) { 2341 res = 0; 2342 env->vxsat = 0x1; 2343 } 2344 return res; 2345 } 2346 2347 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2348 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2349 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2350 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2351 GEN_VEXT_VV_RM(vssubu_vv_b) 2352 GEN_VEXT_VV_RM(vssubu_vv_h) 2353 GEN_VEXT_VV_RM(vssubu_vv_w) 2354 GEN_VEXT_VV_RM(vssubu_vv_d) 2355 2356 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2357 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2358 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2359 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2360 GEN_VEXT_VX_RM(vssubu_vx_b) 2361 GEN_VEXT_VX_RM(vssubu_vx_h) 2362 GEN_VEXT_VX_RM(vssubu_vx_w) 2363 GEN_VEXT_VX_RM(vssubu_vx_d) 2364 2365 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2366 { 2367 int8_t res = a - b; 2368 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2369 res = a >= 0 ? INT8_MAX : INT8_MIN; 2370 env->vxsat = 0x1; 2371 } 2372 return res; 2373 } 2374 2375 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2376 { 2377 int16_t res = a - b; 2378 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2379 res = a >= 0 ? INT16_MAX : INT16_MIN; 2380 env->vxsat = 0x1; 2381 } 2382 return res; 2383 } 2384 2385 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2386 { 2387 int32_t res = a - b; 2388 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2389 res = a >= 0 ? INT32_MAX : INT32_MIN; 2390 env->vxsat = 0x1; 2391 } 2392 return res; 2393 } 2394 2395 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2396 { 2397 int64_t res = a - b; 2398 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2399 res = a >= 0 ? INT64_MAX : INT64_MIN; 2400 env->vxsat = 0x1; 2401 } 2402 return res; 2403 } 2404 2405 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2406 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2407 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2408 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2409 GEN_VEXT_VV_RM(vssub_vv_b) 2410 GEN_VEXT_VV_RM(vssub_vv_h) 2411 GEN_VEXT_VV_RM(vssub_vv_w) 2412 GEN_VEXT_VV_RM(vssub_vv_d) 2413 2414 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2415 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2416 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2417 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2418 GEN_VEXT_VX_RM(vssub_vx_b) 2419 GEN_VEXT_VX_RM(vssub_vx_h) 2420 GEN_VEXT_VX_RM(vssub_vx_w) 2421 GEN_VEXT_VX_RM(vssub_vx_d) 2422 2423 /* Vector Single-Width Averaging Add and Subtract */ 2424 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2425 { 2426 uint8_t d = extract64(v, shift, 1); 2427 uint8_t d1; 2428 uint64_t D1, D2; 2429 2430 if (shift == 0 || shift > 64) { 2431 return 0; 2432 } 2433 2434 d1 = extract64(v, shift - 1, 1); 2435 D1 = extract64(v, 0, shift); 2436 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2437 return d1; 2438 } else if (vxrm == 1) { /* round-to-nearest-even */ 2439 if (shift > 1) { 2440 D2 = extract64(v, 0, shift - 1); 2441 return d1 & ((D2 != 0) | d); 2442 } else { 2443 return d1 & d; 2444 } 2445 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2446 return !d & (D1 != 0); 2447 } 2448 return 0; /* round-down (truncate) */ 2449 } 2450 2451 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2452 { 2453 int64_t res = (int64_t)a + b; 2454 uint8_t round = get_round(vxrm, res, 1); 2455 2456 return (res >> 1) + round; 2457 } 2458 2459 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2460 { 2461 int64_t res = a + b; 2462 uint8_t round = get_round(vxrm, res, 1); 2463 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2464 2465 /* With signed overflow, bit 64 is inverse of bit 63. */ 2466 return ((res >> 1) ^ over) + round; 2467 } 2468 2469 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2470 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2471 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2472 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2473 GEN_VEXT_VV_RM(vaadd_vv_b) 2474 GEN_VEXT_VV_RM(vaadd_vv_h) 2475 GEN_VEXT_VV_RM(vaadd_vv_w) 2476 GEN_VEXT_VV_RM(vaadd_vv_d) 2477 2478 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2479 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2480 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2481 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2482 GEN_VEXT_VX_RM(vaadd_vx_b) 2483 GEN_VEXT_VX_RM(vaadd_vx_h) 2484 GEN_VEXT_VX_RM(vaadd_vx_w) 2485 GEN_VEXT_VX_RM(vaadd_vx_d) 2486 2487 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2488 uint32_t a, uint32_t b) 2489 { 2490 uint64_t res = (uint64_t)a + b; 2491 uint8_t round = get_round(vxrm, res, 1); 2492 2493 return (res >> 1) + round; 2494 } 2495 2496 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2497 uint64_t a, uint64_t b) 2498 { 2499 uint64_t res = a + b; 2500 uint8_t round = get_round(vxrm, res, 1); 2501 uint64_t over = (uint64_t)(res < a) << 63; 2502 2503 return ((res >> 1) | over) + round; 2504 } 2505 2506 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2507 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2508 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2509 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2510 GEN_VEXT_VV_RM(vaaddu_vv_b) 2511 GEN_VEXT_VV_RM(vaaddu_vv_h) 2512 GEN_VEXT_VV_RM(vaaddu_vv_w) 2513 GEN_VEXT_VV_RM(vaaddu_vv_d) 2514 2515 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2516 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2517 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2518 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2519 GEN_VEXT_VX_RM(vaaddu_vx_b) 2520 GEN_VEXT_VX_RM(vaaddu_vx_h) 2521 GEN_VEXT_VX_RM(vaaddu_vx_w) 2522 GEN_VEXT_VX_RM(vaaddu_vx_d) 2523 2524 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2525 { 2526 int64_t res = (int64_t)a - b; 2527 uint8_t round = get_round(vxrm, res, 1); 2528 2529 return (res >> 1) + round; 2530 } 2531 2532 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2533 { 2534 int64_t res = (int64_t)a - b; 2535 uint8_t round = get_round(vxrm, res, 1); 2536 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2537 2538 /* With signed overflow, bit 64 is inverse of bit 63. */ 2539 return ((res >> 1) ^ over) + round; 2540 } 2541 2542 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2543 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2544 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2545 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2546 GEN_VEXT_VV_RM(vasub_vv_b) 2547 GEN_VEXT_VV_RM(vasub_vv_h) 2548 GEN_VEXT_VV_RM(vasub_vv_w) 2549 GEN_VEXT_VV_RM(vasub_vv_d) 2550 2551 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2552 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2553 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2554 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2555 GEN_VEXT_VX_RM(vasub_vx_b) 2556 GEN_VEXT_VX_RM(vasub_vx_h) 2557 GEN_VEXT_VX_RM(vasub_vx_w) 2558 GEN_VEXT_VX_RM(vasub_vx_d) 2559 2560 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2561 uint32_t a, uint32_t b) 2562 { 2563 int64_t res = (int64_t)a - b; 2564 uint8_t round = get_round(vxrm, res, 1); 2565 2566 return (res >> 1) + round; 2567 } 2568 2569 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2570 uint64_t a, uint64_t b) 2571 { 2572 uint64_t res = (uint64_t)a - b; 2573 uint8_t round = get_round(vxrm, res, 1); 2574 uint64_t over = (uint64_t)(res > a) << 63; 2575 2576 return ((res >> 1) | over) + round; 2577 } 2578 2579 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2580 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2581 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2582 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2583 GEN_VEXT_VV_RM(vasubu_vv_b) 2584 GEN_VEXT_VV_RM(vasubu_vv_h) 2585 GEN_VEXT_VV_RM(vasubu_vv_w) 2586 GEN_VEXT_VV_RM(vasubu_vv_d) 2587 2588 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2589 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2590 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2591 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2592 GEN_VEXT_VX_RM(vasubu_vx_b) 2593 GEN_VEXT_VX_RM(vasubu_vx_h) 2594 GEN_VEXT_VX_RM(vasubu_vx_w) 2595 GEN_VEXT_VX_RM(vasubu_vx_d) 2596 2597 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2598 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2599 { 2600 uint8_t round; 2601 int16_t res; 2602 2603 res = (int16_t)a * (int16_t)b; 2604 round = get_round(vxrm, res, 7); 2605 res = (res >> 7) + round; 2606 2607 if (res > INT8_MAX) { 2608 env->vxsat = 0x1; 2609 return INT8_MAX; 2610 } else if (res < INT8_MIN) { 2611 env->vxsat = 0x1; 2612 return INT8_MIN; 2613 } else { 2614 return res; 2615 } 2616 } 2617 2618 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2619 { 2620 uint8_t round; 2621 int32_t res; 2622 2623 res = (int32_t)a * (int32_t)b; 2624 round = get_round(vxrm, res, 15); 2625 res = (res >> 15) + round; 2626 2627 if (res > INT16_MAX) { 2628 env->vxsat = 0x1; 2629 return INT16_MAX; 2630 } else if (res < INT16_MIN) { 2631 env->vxsat = 0x1; 2632 return INT16_MIN; 2633 } else { 2634 return res; 2635 } 2636 } 2637 2638 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2639 { 2640 uint8_t round; 2641 int64_t res; 2642 2643 res = (int64_t)a * (int64_t)b; 2644 round = get_round(vxrm, res, 31); 2645 res = (res >> 31) + round; 2646 2647 if (res > INT32_MAX) { 2648 env->vxsat = 0x1; 2649 return INT32_MAX; 2650 } else if (res < INT32_MIN) { 2651 env->vxsat = 0x1; 2652 return INT32_MIN; 2653 } else { 2654 return res; 2655 } 2656 } 2657 2658 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2659 { 2660 uint8_t round; 2661 uint64_t hi_64, lo_64; 2662 int64_t res; 2663 2664 if (a == INT64_MIN && b == INT64_MIN) { 2665 env->vxsat = 1; 2666 return INT64_MAX; 2667 } 2668 2669 muls64(&lo_64, &hi_64, a, b); 2670 round = get_round(vxrm, lo_64, 63); 2671 /* 2672 * Cannot overflow, as there are always 2673 * 2 sign bits after multiply. 2674 */ 2675 res = (hi_64 << 1) | (lo_64 >> 63); 2676 if (round) { 2677 if (res == INT64_MAX) { 2678 env->vxsat = 1; 2679 } else { 2680 res += 1; 2681 } 2682 } 2683 return res; 2684 } 2685 2686 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2687 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2688 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2689 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2690 GEN_VEXT_VV_RM(vsmul_vv_b) 2691 GEN_VEXT_VV_RM(vsmul_vv_h) 2692 GEN_VEXT_VV_RM(vsmul_vv_w) 2693 GEN_VEXT_VV_RM(vsmul_vv_d) 2694 2695 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2696 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2697 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2698 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2699 GEN_VEXT_VX_RM(vsmul_vx_b) 2700 GEN_VEXT_VX_RM(vsmul_vx_h) 2701 GEN_VEXT_VX_RM(vsmul_vx_w) 2702 GEN_VEXT_VX_RM(vsmul_vx_d) 2703 2704 /* Vector Single-Width Scaling Shift Instructions */ 2705 static inline uint8_t 2706 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2707 { 2708 uint8_t round, shift = b & 0x7; 2709 uint8_t res; 2710 2711 round = get_round(vxrm, a, shift); 2712 res = (a >> shift) + round; 2713 return res; 2714 } 2715 static inline uint16_t 2716 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2717 { 2718 uint8_t round, shift = b & 0xf; 2719 uint16_t res; 2720 2721 round = get_round(vxrm, a, shift); 2722 res = (a >> shift) + round; 2723 return res; 2724 } 2725 static inline uint32_t 2726 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2727 { 2728 uint8_t round, shift = b & 0x1f; 2729 uint32_t res; 2730 2731 round = get_round(vxrm, a, shift); 2732 res = (a >> shift) + round; 2733 return res; 2734 } 2735 static inline uint64_t 2736 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2737 { 2738 uint8_t round, shift = b & 0x3f; 2739 uint64_t res; 2740 2741 round = get_round(vxrm, a, shift); 2742 res = (a >> shift) + round; 2743 return res; 2744 } 2745 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2746 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2747 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2748 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2749 GEN_VEXT_VV_RM(vssrl_vv_b) 2750 GEN_VEXT_VV_RM(vssrl_vv_h) 2751 GEN_VEXT_VV_RM(vssrl_vv_w) 2752 GEN_VEXT_VV_RM(vssrl_vv_d) 2753 2754 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2755 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2756 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2757 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2758 GEN_VEXT_VX_RM(vssrl_vx_b) 2759 GEN_VEXT_VX_RM(vssrl_vx_h) 2760 GEN_VEXT_VX_RM(vssrl_vx_w) 2761 GEN_VEXT_VX_RM(vssrl_vx_d) 2762 2763 static inline int8_t 2764 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2765 { 2766 uint8_t round, shift = b & 0x7; 2767 int8_t res; 2768 2769 round = get_round(vxrm, a, shift); 2770 res = (a >> shift) + round; 2771 return res; 2772 } 2773 static inline int16_t 2774 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2775 { 2776 uint8_t round, shift = b & 0xf; 2777 int16_t res; 2778 2779 round = get_round(vxrm, a, shift); 2780 res = (a >> shift) + round; 2781 return res; 2782 } 2783 static inline int32_t 2784 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2785 { 2786 uint8_t round, shift = b & 0x1f; 2787 int32_t res; 2788 2789 round = get_round(vxrm, a, shift); 2790 res = (a >> shift) + round; 2791 return res; 2792 } 2793 static inline int64_t 2794 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2795 { 2796 uint8_t round, shift = b & 0x3f; 2797 int64_t res; 2798 2799 round = get_round(vxrm, a, shift); 2800 res = (a >> shift) + round; 2801 return res; 2802 } 2803 2804 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2805 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2806 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2807 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2808 GEN_VEXT_VV_RM(vssra_vv_b) 2809 GEN_VEXT_VV_RM(vssra_vv_h) 2810 GEN_VEXT_VV_RM(vssra_vv_w) 2811 GEN_VEXT_VV_RM(vssra_vv_d) 2812 2813 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2814 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2815 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2816 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2817 GEN_VEXT_VX_RM(vssra_vx_b) 2818 GEN_VEXT_VX_RM(vssra_vx_h) 2819 GEN_VEXT_VX_RM(vssra_vx_w) 2820 GEN_VEXT_VX_RM(vssra_vx_d) 2821 2822 /* Vector Narrowing Fixed-Point Clip Instructions */ 2823 static inline int8_t 2824 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2825 { 2826 uint8_t round, shift = b & 0xf; 2827 int16_t res; 2828 2829 round = get_round(vxrm, a, shift); 2830 res = (a >> shift) + round; 2831 if (res > INT8_MAX) { 2832 env->vxsat = 0x1; 2833 return INT8_MAX; 2834 } else if (res < INT8_MIN) { 2835 env->vxsat = 0x1; 2836 return INT8_MIN; 2837 } else { 2838 return res; 2839 } 2840 } 2841 2842 static inline int16_t 2843 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2844 { 2845 uint8_t round, shift = b & 0x1f; 2846 int32_t res; 2847 2848 round = get_round(vxrm, a, shift); 2849 res = (a >> shift) + round; 2850 if (res > INT16_MAX) { 2851 env->vxsat = 0x1; 2852 return INT16_MAX; 2853 } else if (res < INT16_MIN) { 2854 env->vxsat = 0x1; 2855 return INT16_MIN; 2856 } else { 2857 return res; 2858 } 2859 } 2860 2861 static inline int32_t 2862 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2863 { 2864 uint8_t round, shift = b & 0x3f; 2865 int64_t res; 2866 2867 round = get_round(vxrm, a, shift); 2868 res = (a >> shift) + round; 2869 if (res > INT32_MAX) { 2870 env->vxsat = 0x1; 2871 return INT32_MAX; 2872 } else if (res < INT32_MIN) { 2873 env->vxsat = 0x1; 2874 return INT32_MIN; 2875 } else { 2876 return res; 2877 } 2878 } 2879 2880 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2881 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2882 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2883 GEN_VEXT_VV_RM(vnclip_wv_b) 2884 GEN_VEXT_VV_RM(vnclip_wv_h) 2885 GEN_VEXT_VV_RM(vnclip_wv_w) 2886 2887 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2888 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2889 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2890 GEN_VEXT_VX_RM(vnclip_wx_b) 2891 GEN_VEXT_VX_RM(vnclip_wx_h) 2892 GEN_VEXT_VX_RM(vnclip_wx_w) 2893 2894 static inline uint8_t 2895 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2896 { 2897 uint8_t round, shift = b & 0xf; 2898 uint16_t res; 2899 2900 round = get_round(vxrm, a, shift); 2901 res = (a >> shift) + round; 2902 if (res > UINT8_MAX) { 2903 env->vxsat = 0x1; 2904 return UINT8_MAX; 2905 } else { 2906 return res; 2907 } 2908 } 2909 2910 static inline uint16_t 2911 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2912 { 2913 uint8_t round, shift = b & 0x1f; 2914 uint32_t res; 2915 2916 round = get_round(vxrm, a, shift); 2917 res = (a >> shift) + round; 2918 if (res > UINT16_MAX) { 2919 env->vxsat = 0x1; 2920 return UINT16_MAX; 2921 } else { 2922 return res; 2923 } 2924 } 2925 2926 static inline uint32_t 2927 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2928 { 2929 uint8_t round, shift = b & 0x3f; 2930 uint64_t res; 2931 2932 round = get_round(vxrm, a, shift); 2933 res = (a >> shift) + round; 2934 if (res > UINT32_MAX) { 2935 env->vxsat = 0x1; 2936 return UINT32_MAX; 2937 } else { 2938 return res; 2939 } 2940 } 2941 2942 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2943 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2944 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2945 GEN_VEXT_VV_RM(vnclipu_wv_b) 2946 GEN_VEXT_VV_RM(vnclipu_wv_h) 2947 GEN_VEXT_VV_RM(vnclipu_wv_w) 2948 2949 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2950 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2951 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2952 GEN_VEXT_VX_RM(vnclipu_wx_b) 2953 GEN_VEXT_VX_RM(vnclipu_wx_h) 2954 GEN_VEXT_VX_RM(vnclipu_wx_w) 2955 2956 /* 2957 *** Vector Float Point Arithmetic Instructions 2958 */ 2959 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2960 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2961 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2962 CPURISCVState *env) \ 2963 { \ 2964 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2965 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2966 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2967 } 2968 2969 #define GEN_VEXT_VV_ENV(NAME) \ 2970 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2971 void *vs2, CPURISCVState *env, \ 2972 uint32_t desc) \ 2973 { \ 2974 uint32_t vm = vext_vm(desc); \ 2975 uint32_t vl = env->vl; \ 2976 uint32_t i; \ 2977 \ 2978 for (i = env->vstart; i < vl; i++) { \ 2979 if (!vm && !vext_elem_mask(v0, i)) { \ 2980 continue; \ 2981 } \ 2982 do_##NAME(vd, vs1, vs2, i, env); \ 2983 } \ 2984 env->vstart = 0; \ 2985 } 2986 2987 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2988 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2989 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2990 GEN_VEXT_VV_ENV(vfadd_vv_h) 2991 GEN_VEXT_VV_ENV(vfadd_vv_w) 2992 GEN_VEXT_VV_ENV(vfadd_vv_d) 2993 2994 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2995 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2996 CPURISCVState *env) \ 2997 { \ 2998 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2999 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3000 } 3001 3002 #define GEN_VEXT_VF(NAME) \ 3003 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3004 void *vs2, CPURISCVState *env, \ 3005 uint32_t desc) \ 3006 { \ 3007 uint32_t vm = vext_vm(desc); \ 3008 uint32_t vl = env->vl; \ 3009 uint32_t i; \ 3010 \ 3011 for (i = env->vstart; i < vl; i++) { \ 3012 if (!vm && !vext_elem_mask(v0, i)) { \ 3013 continue; \ 3014 } \ 3015 do_##NAME(vd, s1, vs2, i, env); \ 3016 } \ 3017 env->vstart = 0; \ 3018 } 3019 3020 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3021 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3022 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3023 GEN_VEXT_VF(vfadd_vf_h) 3024 GEN_VEXT_VF(vfadd_vf_w) 3025 GEN_VEXT_VF(vfadd_vf_d) 3026 3027 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3028 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3029 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3030 GEN_VEXT_VV_ENV(vfsub_vv_h) 3031 GEN_VEXT_VV_ENV(vfsub_vv_w) 3032 GEN_VEXT_VV_ENV(vfsub_vv_d) 3033 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3034 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3035 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3036 GEN_VEXT_VF(vfsub_vf_h) 3037 GEN_VEXT_VF(vfsub_vf_w) 3038 GEN_VEXT_VF(vfsub_vf_d) 3039 3040 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3041 { 3042 return float16_sub(b, a, s); 3043 } 3044 3045 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3046 { 3047 return float32_sub(b, a, s); 3048 } 3049 3050 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3051 { 3052 return float64_sub(b, a, s); 3053 } 3054 3055 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3056 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3057 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3058 GEN_VEXT_VF(vfrsub_vf_h) 3059 GEN_VEXT_VF(vfrsub_vf_w) 3060 GEN_VEXT_VF(vfrsub_vf_d) 3061 3062 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3063 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3064 { 3065 return float32_add(float16_to_float32(a, true, s), 3066 float16_to_float32(b, true, s), s); 3067 } 3068 3069 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3070 { 3071 return float64_add(float32_to_float64(a, s), 3072 float32_to_float64(b, s), s); 3073 3074 } 3075 3076 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3077 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3078 GEN_VEXT_VV_ENV(vfwadd_vv_h) 3079 GEN_VEXT_VV_ENV(vfwadd_vv_w) 3080 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3081 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3082 GEN_VEXT_VF(vfwadd_vf_h) 3083 GEN_VEXT_VF(vfwadd_vf_w) 3084 3085 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3086 { 3087 return float32_sub(float16_to_float32(a, true, s), 3088 float16_to_float32(b, true, s), s); 3089 } 3090 3091 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3092 { 3093 return float64_sub(float32_to_float64(a, s), 3094 float32_to_float64(b, s), s); 3095 3096 } 3097 3098 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3099 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3100 GEN_VEXT_VV_ENV(vfwsub_vv_h) 3101 GEN_VEXT_VV_ENV(vfwsub_vv_w) 3102 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3103 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3104 GEN_VEXT_VF(vfwsub_vf_h) 3105 GEN_VEXT_VF(vfwsub_vf_w) 3106 3107 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3108 { 3109 return float32_add(a, float16_to_float32(b, true, s), s); 3110 } 3111 3112 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3113 { 3114 return float64_add(a, float32_to_float64(b, s), s); 3115 } 3116 3117 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3118 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3119 GEN_VEXT_VV_ENV(vfwadd_wv_h) 3120 GEN_VEXT_VV_ENV(vfwadd_wv_w) 3121 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3122 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3123 GEN_VEXT_VF(vfwadd_wf_h) 3124 GEN_VEXT_VF(vfwadd_wf_w) 3125 3126 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3127 { 3128 return float32_sub(a, float16_to_float32(b, true, s), s); 3129 } 3130 3131 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3132 { 3133 return float64_sub(a, float32_to_float64(b, s), s); 3134 } 3135 3136 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3137 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3138 GEN_VEXT_VV_ENV(vfwsub_wv_h) 3139 GEN_VEXT_VV_ENV(vfwsub_wv_w) 3140 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3141 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3142 GEN_VEXT_VF(vfwsub_wf_h) 3143 GEN_VEXT_VF(vfwsub_wf_w) 3144 3145 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3146 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3147 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3148 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3149 GEN_VEXT_VV_ENV(vfmul_vv_h) 3150 GEN_VEXT_VV_ENV(vfmul_vv_w) 3151 GEN_VEXT_VV_ENV(vfmul_vv_d) 3152 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3153 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3154 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3155 GEN_VEXT_VF(vfmul_vf_h) 3156 GEN_VEXT_VF(vfmul_vf_w) 3157 GEN_VEXT_VF(vfmul_vf_d) 3158 3159 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3160 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3161 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3162 GEN_VEXT_VV_ENV(vfdiv_vv_h) 3163 GEN_VEXT_VV_ENV(vfdiv_vv_w) 3164 GEN_VEXT_VV_ENV(vfdiv_vv_d) 3165 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3166 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3167 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3168 GEN_VEXT_VF(vfdiv_vf_h) 3169 GEN_VEXT_VF(vfdiv_vf_w) 3170 GEN_VEXT_VF(vfdiv_vf_d) 3171 3172 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3173 { 3174 return float16_div(b, a, s); 3175 } 3176 3177 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3178 { 3179 return float32_div(b, a, s); 3180 } 3181 3182 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3183 { 3184 return float64_div(b, a, s); 3185 } 3186 3187 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3188 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3189 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3190 GEN_VEXT_VF(vfrdiv_vf_h) 3191 GEN_VEXT_VF(vfrdiv_vf_w) 3192 GEN_VEXT_VF(vfrdiv_vf_d) 3193 3194 /* Vector Widening Floating-Point Multiply */ 3195 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3196 { 3197 return float32_mul(float16_to_float32(a, true, s), 3198 float16_to_float32(b, true, s), s); 3199 } 3200 3201 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3202 { 3203 return float64_mul(float32_to_float64(a, s), 3204 float32_to_float64(b, s), s); 3205 3206 } 3207 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3208 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3209 GEN_VEXT_VV_ENV(vfwmul_vv_h) 3210 GEN_VEXT_VV_ENV(vfwmul_vv_w) 3211 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3212 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3213 GEN_VEXT_VF(vfwmul_vf_h) 3214 GEN_VEXT_VF(vfwmul_vf_w) 3215 3216 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3217 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3218 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3219 CPURISCVState *env) \ 3220 { \ 3221 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3222 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3223 TD d = *((TD *)vd + HD(i)); \ 3224 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3225 } 3226 3227 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3228 { 3229 return float16_muladd(a, b, d, 0, s); 3230 } 3231 3232 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3233 { 3234 return float32_muladd(a, b, d, 0, s); 3235 } 3236 3237 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3238 { 3239 return float64_muladd(a, b, d, 0, s); 3240 } 3241 3242 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3243 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3244 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3245 GEN_VEXT_VV_ENV(vfmacc_vv_h) 3246 GEN_VEXT_VV_ENV(vfmacc_vv_w) 3247 GEN_VEXT_VV_ENV(vfmacc_vv_d) 3248 3249 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3250 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3251 CPURISCVState *env) \ 3252 { \ 3253 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3254 TD d = *((TD *)vd + HD(i)); \ 3255 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3256 } 3257 3258 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3259 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3260 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3261 GEN_VEXT_VF(vfmacc_vf_h) 3262 GEN_VEXT_VF(vfmacc_vf_w) 3263 GEN_VEXT_VF(vfmacc_vf_d) 3264 3265 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3266 { 3267 return float16_muladd(a, b, d, 3268 float_muladd_negate_c | float_muladd_negate_product, s); 3269 } 3270 3271 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3272 { 3273 return float32_muladd(a, b, d, 3274 float_muladd_negate_c | float_muladd_negate_product, s); 3275 } 3276 3277 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3278 { 3279 return float64_muladd(a, b, d, 3280 float_muladd_negate_c | float_muladd_negate_product, s); 3281 } 3282 3283 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3284 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3285 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3286 GEN_VEXT_VV_ENV(vfnmacc_vv_h) 3287 GEN_VEXT_VV_ENV(vfnmacc_vv_w) 3288 GEN_VEXT_VV_ENV(vfnmacc_vv_d) 3289 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3290 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3291 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3292 GEN_VEXT_VF(vfnmacc_vf_h) 3293 GEN_VEXT_VF(vfnmacc_vf_w) 3294 GEN_VEXT_VF(vfnmacc_vf_d) 3295 3296 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3297 { 3298 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3299 } 3300 3301 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3302 { 3303 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3304 } 3305 3306 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3307 { 3308 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3309 } 3310 3311 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3312 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3313 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3314 GEN_VEXT_VV_ENV(vfmsac_vv_h) 3315 GEN_VEXT_VV_ENV(vfmsac_vv_w) 3316 GEN_VEXT_VV_ENV(vfmsac_vv_d) 3317 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3318 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3319 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3320 GEN_VEXT_VF(vfmsac_vf_h) 3321 GEN_VEXT_VF(vfmsac_vf_w) 3322 GEN_VEXT_VF(vfmsac_vf_d) 3323 3324 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3325 { 3326 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3327 } 3328 3329 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3330 { 3331 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3332 } 3333 3334 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3335 { 3336 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3337 } 3338 3339 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3340 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3341 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3342 GEN_VEXT_VV_ENV(vfnmsac_vv_h) 3343 GEN_VEXT_VV_ENV(vfnmsac_vv_w) 3344 GEN_VEXT_VV_ENV(vfnmsac_vv_d) 3345 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3346 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3347 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3348 GEN_VEXT_VF(vfnmsac_vf_h) 3349 GEN_VEXT_VF(vfnmsac_vf_w) 3350 GEN_VEXT_VF(vfnmsac_vf_d) 3351 3352 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3353 { 3354 return float16_muladd(d, b, a, 0, s); 3355 } 3356 3357 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3358 { 3359 return float32_muladd(d, b, a, 0, s); 3360 } 3361 3362 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3363 { 3364 return float64_muladd(d, b, a, 0, s); 3365 } 3366 3367 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3368 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3369 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3370 GEN_VEXT_VV_ENV(vfmadd_vv_h) 3371 GEN_VEXT_VV_ENV(vfmadd_vv_w) 3372 GEN_VEXT_VV_ENV(vfmadd_vv_d) 3373 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3374 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3375 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3376 GEN_VEXT_VF(vfmadd_vf_h) 3377 GEN_VEXT_VF(vfmadd_vf_w) 3378 GEN_VEXT_VF(vfmadd_vf_d) 3379 3380 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3381 { 3382 return float16_muladd(d, b, a, 3383 float_muladd_negate_c | float_muladd_negate_product, s); 3384 } 3385 3386 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3387 { 3388 return float32_muladd(d, b, a, 3389 float_muladd_negate_c | float_muladd_negate_product, s); 3390 } 3391 3392 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3393 { 3394 return float64_muladd(d, b, a, 3395 float_muladd_negate_c | float_muladd_negate_product, s); 3396 } 3397 3398 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3399 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3400 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3401 GEN_VEXT_VV_ENV(vfnmadd_vv_h) 3402 GEN_VEXT_VV_ENV(vfnmadd_vv_w) 3403 GEN_VEXT_VV_ENV(vfnmadd_vv_d) 3404 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3405 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3406 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3407 GEN_VEXT_VF(vfnmadd_vf_h) 3408 GEN_VEXT_VF(vfnmadd_vf_w) 3409 GEN_VEXT_VF(vfnmadd_vf_d) 3410 3411 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3412 { 3413 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3414 } 3415 3416 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3417 { 3418 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3419 } 3420 3421 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3422 { 3423 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3424 } 3425 3426 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3427 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3428 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3429 GEN_VEXT_VV_ENV(vfmsub_vv_h) 3430 GEN_VEXT_VV_ENV(vfmsub_vv_w) 3431 GEN_VEXT_VV_ENV(vfmsub_vv_d) 3432 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3433 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3434 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3435 GEN_VEXT_VF(vfmsub_vf_h) 3436 GEN_VEXT_VF(vfmsub_vf_w) 3437 GEN_VEXT_VF(vfmsub_vf_d) 3438 3439 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3440 { 3441 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3442 } 3443 3444 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3445 { 3446 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3447 } 3448 3449 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3450 { 3451 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3452 } 3453 3454 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3455 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3456 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3457 GEN_VEXT_VV_ENV(vfnmsub_vv_h) 3458 GEN_VEXT_VV_ENV(vfnmsub_vv_w) 3459 GEN_VEXT_VV_ENV(vfnmsub_vv_d) 3460 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3461 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3462 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3463 GEN_VEXT_VF(vfnmsub_vf_h) 3464 GEN_VEXT_VF(vfnmsub_vf_w) 3465 GEN_VEXT_VF(vfnmsub_vf_d) 3466 3467 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3468 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3469 { 3470 return float32_muladd(float16_to_float32(a, true, s), 3471 float16_to_float32(b, true, s), d, 0, s); 3472 } 3473 3474 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3475 { 3476 return float64_muladd(float32_to_float64(a, s), 3477 float32_to_float64(b, s), d, 0, s); 3478 } 3479 3480 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3481 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3482 GEN_VEXT_VV_ENV(vfwmacc_vv_h) 3483 GEN_VEXT_VV_ENV(vfwmacc_vv_w) 3484 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3485 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3486 GEN_VEXT_VF(vfwmacc_vf_h) 3487 GEN_VEXT_VF(vfwmacc_vf_w) 3488 3489 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3490 { 3491 return float32_muladd(float16_to_float32(a, true, s), 3492 float16_to_float32(b, true, s), d, 3493 float_muladd_negate_c | float_muladd_negate_product, s); 3494 } 3495 3496 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3497 { 3498 return float64_muladd(float32_to_float64(a, s), 3499 float32_to_float64(b, s), d, 3500 float_muladd_negate_c | float_muladd_negate_product, s); 3501 } 3502 3503 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3504 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3505 GEN_VEXT_VV_ENV(vfwnmacc_vv_h) 3506 GEN_VEXT_VV_ENV(vfwnmacc_vv_w) 3507 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3508 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3509 GEN_VEXT_VF(vfwnmacc_vf_h) 3510 GEN_VEXT_VF(vfwnmacc_vf_w) 3511 3512 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3513 { 3514 return float32_muladd(float16_to_float32(a, true, s), 3515 float16_to_float32(b, true, s), d, 3516 float_muladd_negate_c, s); 3517 } 3518 3519 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3520 { 3521 return float64_muladd(float32_to_float64(a, s), 3522 float32_to_float64(b, s), d, 3523 float_muladd_negate_c, s); 3524 } 3525 3526 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3527 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3528 GEN_VEXT_VV_ENV(vfwmsac_vv_h) 3529 GEN_VEXT_VV_ENV(vfwmsac_vv_w) 3530 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3531 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3532 GEN_VEXT_VF(vfwmsac_vf_h) 3533 GEN_VEXT_VF(vfwmsac_vf_w) 3534 3535 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3536 { 3537 return float32_muladd(float16_to_float32(a, true, s), 3538 float16_to_float32(b, true, s), d, 3539 float_muladd_negate_product, s); 3540 } 3541 3542 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3543 { 3544 return float64_muladd(float32_to_float64(a, s), 3545 float32_to_float64(b, s), d, 3546 float_muladd_negate_product, s); 3547 } 3548 3549 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3550 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3551 GEN_VEXT_VV_ENV(vfwnmsac_vv_h) 3552 GEN_VEXT_VV_ENV(vfwnmsac_vv_w) 3553 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3554 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3555 GEN_VEXT_VF(vfwnmsac_vf_h) 3556 GEN_VEXT_VF(vfwnmsac_vf_w) 3557 3558 /* Vector Floating-Point Square-Root Instruction */ 3559 /* (TD, T2, TX2) */ 3560 #define OP_UU_H uint16_t, uint16_t, uint16_t 3561 #define OP_UU_W uint32_t, uint32_t, uint32_t 3562 #define OP_UU_D uint64_t, uint64_t, uint64_t 3563 3564 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3565 static void do_##NAME(void *vd, void *vs2, int i, \ 3566 CPURISCVState *env) \ 3567 { \ 3568 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3569 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3570 } 3571 3572 #define GEN_VEXT_V_ENV(NAME) \ 3573 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3574 CPURISCVState *env, uint32_t desc) \ 3575 { \ 3576 uint32_t vm = vext_vm(desc); \ 3577 uint32_t vl = env->vl; \ 3578 uint32_t i; \ 3579 \ 3580 if (vl == 0) { \ 3581 return; \ 3582 } \ 3583 for (i = env->vstart; i < vl; i++) { \ 3584 if (!vm && !vext_elem_mask(v0, i)) { \ 3585 continue; \ 3586 } \ 3587 do_##NAME(vd, vs2, i, env); \ 3588 } \ 3589 env->vstart = 0; \ 3590 } 3591 3592 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3593 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3594 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3595 GEN_VEXT_V_ENV(vfsqrt_v_h) 3596 GEN_VEXT_V_ENV(vfsqrt_v_w) 3597 GEN_VEXT_V_ENV(vfsqrt_v_d) 3598 3599 /* 3600 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3601 * 3602 * Adapted from riscv-v-spec recip.c: 3603 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3604 */ 3605 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3606 { 3607 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3608 uint64_t exp = extract64(f, frac_size, exp_size); 3609 uint64_t frac = extract64(f, 0, frac_size); 3610 3611 const uint8_t lookup_table[] = { 3612 52, 51, 50, 48, 47, 46, 44, 43, 3613 42, 41, 40, 39, 38, 36, 35, 34, 3614 33, 32, 31, 30, 30, 29, 28, 27, 3615 26, 25, 24, 23, 23, 22, 21, 20, 3616 19, 19, 18, 17, 16, 16, 15, 14, 3617 14, 13, 12, 12, 11, 10, 10, 9, 3618 9, 8, 7, 7, 6, 6, 5, 4, 3619 4, 3, 3, 2, 2, 1, 1, 0, 3620 127, 125, 123, 121, 119, 118, 116, 114, 3621 113, 111, 109, 108, 106, 105, 103, 102, 3622 100, 99, 97, 96, 95, 93, 92, 91, 3623 90, 88, 87, 86, 85, 84, 83, 82, 3624 80, 79, 78, 77, 76, 75, 74, 73, 3625 72, 71, 70, 70, 69, 68, 67, 66, 3626 65, 64, 63, 63, 62, 61, 60, 59, 3627 59, 58, 57, 56, 56, 55, 54, 53 3628 }; 3629 const int precision = 7; 3630 3631 if (exp == 0 && frac != 0) { /* subnormal */ 3632 /* Normalize the subnormal. */ 3633 while (extract64(frac, frac_size - 1, 1) == 0) { 3634 exp--; 3635 frac <<= 1; 3636 } 3637 3638 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3639 } 3640 3641 int idx = ((exp & 1) << (precision - 1)) | 3642 (frac >> (frac_size - precision + 1)); 3643 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3644 (frac_size - precision); 3645 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3646 3647 uint64_t val = 0; 3648 val = deposit64(val, 0, frac_size, out_frac); 3649 val = deposit64(val, frac_size, exp_size, out_exp); 3650 val = deposit64(val, frac_size + exp_size, 1, sign); 3651 return val; 3652 } 3653 3654 static float16 frsqrt7_h(float16 f, float_status *s) 3655 { 3656 int exp_size = 5, frac_size = 10; 3657 bool sign = float16_is_neg(f); 3658 3659 /* 3660 * frsqrt7(sNaN) = canonical NaN 3661 * frsqrt7(-inf) = canonical NaN 3662 * frsqrt7(-normal) = canonical NaN 3663 * frsqrt7(-subnormal) = canonical NaN 3664 */ 3665 if (float16_is_signaling_nan(f, s) || 3666 (float16_is_infinity(f) && sign) || 3667 (float16_is_normal(f) && sign) || 3668 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3669 s->float_exception_flags |= float_flag_invalid; 3670 return float16_default_nan(s); 3671 } 3672 3673 /* frsqrt7(qNaN) = canonical NaN */ 3674 if (float16_is_quiet_nan(f, s)) { 3675 return float16_default_nan(s); 3676 } 3677 3678 /* frsqrt7(+-0) = +-inf */ 3679 if (float16_is_zero(f)) { 3680 s->float_exception_flags |= float_flag_divbyzero; 3681 return float16_set_sign(float16_infinity, sign); 3682 } 3683 3684 /* frsqrt7(+inf) = +0 */ 3685 if (float16_is_infinity(f) && !sign) { 3686 return float16_set_sign(float16_zero, sign); 3687 } 3688 3689 /* +normal, +subnormal */ 3690 uint64_t val = frsqrt7(f, exp_size, frac_size); 3691 return make_float16(val); 3692 } 3693 3694 static float32 frsqrt7_s(float32 f, float_status *s) 3695 { 3696 int exp_size = 8, frac_size = 23; 3697 bool sign = float32_is_neg(f); 3698 3699 /* 3700 * frsqrt7(sNaN) = canonical NaN 3701 * frsqrt7(-inf) = canonical NaN 3702 * frsqrt7(-normal) = canonical NaN 3703 * frsqrt7(-subnormal) = canonical NaN 3704 */ 3705 if (float32_is_signaling_nan(f, s) || 3706 (float32_is_infinity(f) && sign) || 3707 (float32_is_normal(f) && sign) || 3708 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3709 s->float_exception_flags |= float_flag_invalid; 3710 return float32_default_nan(s); 3711 } 3712 3713 /* frsqrt7(qNaN) = canonical NaN */ 3714 if (float32_is_quiet_nan(f, s)) { 3715 return float32_default_nan(s); 3716 } 3717 3718 /* frsqrt7(+-0) = +-inf */ 3719 if (float32_is_zero(f)) { 3720 s->float_exception_flags |= float_flag_divbyzero; 3721 return float32_set_sign(float32_infinity, sign); 3722 } 3723 3724 /* frsqrt7(+inf) = +0 */ 3725 if (float32_is_infinity(f) && !sign) { 3726 return float32_set_sign(float32_zero, sign); 3727 } 3728 3729 /* +normal, +subnormal */ 3730 uint64_t val = frsqrt7(f, exp_size, frac_size); 3731 return make_float32(val); 3732 } 3733 3734 static float64 frsqrt7_d(float64 f, float_status *s) 3735 { 3736 int exp_size = 11, frac_size = 52; 3737 bool sign = float64_is_neg(f); 3738 3739 /* 3740 * frsqrt7(sNaN) = canonical NaN 3741 * frsqrt7(-inf) = canonical NaN 3742 * frsqrt7(-normal) = canonical NaN 3743 * frsqrt7(-subnormal) = canonical NaN 3744 */ 3745 if (float64_is_signaling_nan(f, s) || 3746 (float64_is_infinity(f) && sign) || 3747 (float64_is_normal(f) && sign) || 3748 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3749 s->float_exception_flags |= float_flag_invalid; 3750 return float64_default_nan(s); 3751 } 3752 3753 /* frsqrt7(qNaN) = canonical NaN */ 3754 if (float64_is_quiet_nan(f, s)) { 3755 return float64_default_nan(s); 3756 } 3757 3758 /* frsqrt7(+-0) = +-inf */ 3759 if (float64_is_zero(f)) { 3760 s->float_exception_flags |= float_flag_divbyzero; 3761 return float64_set_sign(float64_infinity, sign); 3762 } 3763 3764 /* frsqrt7(+inf) = +0 */ 3765 if (float64_is_infinity(f) && !sign) { 3766 return float64_set_sign(float64_zero, sign); 3767 } 3768 3769 /* +normal, +subnormal */ 3770 uint64_t val = frsqrt7(f, exp_size, frac_size); 3771 return make_float64(val); 3772 } 3773 3774 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3775 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3776 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3777 GEN_VEXT_V_ENV(vfrsqrt7_v_h) 3778 GEN_VEXT_V_ENV(vfrsqrt7_v_w) 3779 GEN_VEXT_V_ENV(vfrsqrt7_v_d) 3780 3781 /* 3782 * Vector Floating-Point Reciprocal Estimate Instruction 3783 * 3784 * Adapted from riscv-v-spec recip.c: 3785 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3786 */ 3787 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3788 float_status *s) 3789 { 3790 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3791 uint64_t exp = extract64(f, frac_size, exp_size); 3792 uint64_t frac = extract64(f, 0, frac_size); 3793 3794 const uint8_t lookup_table[] = { 3795 127, 125, 123, 121, 119, 117, 116, 114, 3796 112, 110, 109, 107, 105, 104, 102, 100, 3797 99, 97, 96, 94, 93, 91, 90, 88, 3798 87, 85, 84, 83, 81, 80, 79, 77, 3799 76, 75, 74, 72, 71, 70, 69, 68, 3800 66, 65, 64, 63, 62, 61, 60, 59, 3801 58, 57, 56, 55, 54, 53, 52, 51, 3802 50, 49, 48, 47, 46, 45, 44, 43, 3803 42, 41, 40, 40, 39, 38, 37, 36, 3804 35, 35, 34, 33, 32, 31, 31, 30, 3805 29, 28, 28, 27, 26, 25, 25, 24, 3806 23, 23, 22, 21, 21, 20, 19, 19, 3807 18, 17, 17, 16, 15, 15, 14, 14, 3808 13, 12, 12, 11, 11, 10, 9, 9, 3809 8, 8, 7, 7, 6, 5, 5, 4, 3810 4, 3, 3, 2, 2, 1, 1, 0 3811 }; 3812 const int precision = 7; 3813 3814 if (exp == 0 && frac != 0) { /* subnormal */ 3815 /* Normalize the subnormal. */ 3816 while (extract64(frac, frac_size - 1, 1) == 0) { 3817 exp--; 3818 frac <<= 1; 3819 } 3820 3821 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3822 3823 if (exp != 0 && exp != UINT64_MAX) { 3824 /* 3825 * Overflow to inf or max value of same sign, 3826 * depending on sign and rounding mode. 3827 */ 3828 s->float_exception_flags |= (float_flag_inexact | 3829 float_flag_overflow); 3830 3831 if ((s->float_rounding_mode == float_round_to_zero) || 3832 ((s->float_rounding_mode == float_round_down) && !sign) || 3833 ((s->float_rounding_mode == float_round_up) && sign)) { 3834 /* Return greatest/negative finite value. */ 3835 return (sign << (exp_size + frac_size)) | 3836 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3837 } else { 3838 /* Return +-inf. */ 3839 return (sign << (exp_size + frac_size)) | 3840 MAKE_64BIT_MASK(frac_size, exp_size); 3841 } 3842 } 3843 } 3844 3845 int idx = frac >> (frac_size - precision); 3846 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3847 (frac_size - precision); 3848 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3849 3850 if (out_exp == 0 || out_exp == UINT64_MAX) { 3851 /* 3852 * The result is subnormal, but don't raise the underflow exception, 3853 * because there's no additional loss of precision. 3854 */ 3855 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3856 if (out_exp == UINT64_MAX) { 3857 out_frac >>= 1; 3858 out_exp = 0; 3859 } 3860 } 3861 3862 uint64_t val = 0; 3863 val = deposit64(val, 0, frac_size, out_frac); 3864 val = deposit64(val, frac_size, exp_size, out_exp); 3865 val = deposit64(val, frac_size + exp_size, 1, sign); 3866 return val; 3867 } 3868 3869 static float16 frec7_h(float16 f, float_status *s) 3870 { 3871 int exp_size = 5, frac_size = 10; 3872 bool sign = float16_is_neg(f); 3873 3874 /* frec7(+-inf) = +-0 */ 3875 if (float16_is_infinity(f)) { 3876 return float16_set_sign(float16_zero, sign); 3877 } 3878 3879 /* frec7(+-0) = +-inf */ 3880 if (float16_is_zero(f)) { 3881 s->float_exception_flags |= float_flag_divbyzero; 3882 return float16_set_sign(float16_infinity, sign); 3883 } 3884 3885 /* frec7(sNaN) = canonical NaN */ 3886 if (float16_is_signaling_nan(f, s)) { 3887 s->float_exception_flags |= float_flag_invalid; 3888 return float16_default_nan(s); 3889 } 3890 3891 /* frec7(qNaN) = canonical NaN */ 3892 if (float16_is_quiet_nan(f, s)) { 3893 return float16_default_nan(s); 3894 } 3895 3896 /* +-normal, +-subnormal */ 3897 uint64_t val = frec7(f, exp_size, frac_size, s); 3898 return make_float16(val); 3899 } 3900 3901 static float32 frec7_s(float32 f, float_status *s) 3902 { 3903 int exp_size = 8, frac_size = 23; 3904 bool sign = float32_is_neg(f); 3905 3906 /* frec7(+-inf) = +-0 */ 3907 if (float32_is_infinity(f)) { 3908 return float32_set_sign(float32_zero, sign); 3909 } 3910 3911 /* frec7(+-0) = +-inf */ 3912 if (float32_is_zero(f)) { 3913 s->float_exception_flags |= float_flag_divbyzero; 3914 return float32_set_sign(float32_infinity, sign); 3915 } 3916 3917 /* frec7(sNaN) = canonical NaN */ 3918 if (float32_is_signaling_nan(f, s)) { 3919 s->float_exception_flags |= float_flag_invalid; 3920 return float32_default_nan(s); 3921 } 3922 3923 /* frec7(qNaN) = canonical NaN */ 3924 if (float32_is_quiet_nan(f, s)) { 3925 return float32_default_nan(s); 3926 } 3927 3928 /* +-normal, +-subnormal */ 3929 uint64_t val = frec7(f, exp_size, frac_size, s); 3930 return make_float32(val); 3931 } 3932 3933 static float64 frec7_d(float64 f, float_status *s) 3934 { 3935 int exp_size = 11, frac_size = 52; 3936 bool sign = float64_is_neg(f); 3937 3938 /* frec7(+-inf) = +-0 */ 3939 if (float64_is_infinity(f)) { 3940 return float64_set_sign(float64_zero, sign); 3941 } 3942 3943 /* frec7(+-0) = +-inf */ 3944 if (float64_is_zero(f)) { 3945 s->float_exception_flags |= float_flag_divbyzero; 3946 return float64_set_sign(float64_infinity, sign); 3947 } 3948 3949 /* frec7(sNaN) = canonical NaN */ 3950 if (float64_is_signaling_nan(f, s)) { 3951 s->float_exception_flags |= float_flag_invalid; 3952 return float64_default_nan(s); 3953 } 3954 3955 /* frec7(qNaN) = canonical NaN */ 3956 if (float64_is_quiet_nan(f, s)) { 3957 return float64_default_nan(s); 3958 } 3959 3960 /* +-normal, +-subnormal */ 3961 uint64_t val = frec7(f, exp_size, frac_size, s); 3962 return make_float64(val); 3963 } 3964 3965 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3966 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3967 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3968 GEN_VEXT_V_ENV(vfrec7_v_h) 3969 GEN_VEXT_V_ENV(vfrec7_v_w) 3970 GEN_VEXT_V_ENV(vfrec7_v_d) 3971 3972 /* Vector Floating-Point MIN/MAX Instructions */ 3973 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3974 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3975 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3976 GEN_VEXT_VV_ENV(vfmin_vv_h) 3977 GEN_VEXT_VV_ENV(vfmin_vv_w) 3978 GEN_VEXT_VV_ENV(vfmin_vv_d) 3979 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3980 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3981 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3982 GEN_VEXT_VF(vfmin_vf_h) 3983 GEN_VEXT_VF(vfmin_vf_w) 3984 GEN_VEXT_VF(vfmin_vf_d) 3985 3986 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3987 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3988 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3989 GEN_VEXT_VV_ENV(vfmax_vv_h) 3990 GEN_VEXT_VV_ENV(vfmax_vv_w) 3991 GEN_VEXT_VV_ENV(vfmax_vv_d) 3992 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3993 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3994 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3995 GEN_VEXT_VF(vfmax_vf_h) 3996 GEN_VEXT_VF(vfmax_vf_w) 3997 GEN_VEXT_VF(vfmax_vf_d) 3998 3999 /* Vector Floating-Point Sign-Injection Instructions */ 4000 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4001 { 4002 return deposit64(b, 0, 15, a); 4003 } 4004 4005 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4006 { 4007 return deposit64(b, 0, 31, a); 4008 } 4009 4010 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4011 { 4012 return deposit64(b, 0, 63, a); 4013 } 4014 4015 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4016 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4017 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4018 GEN_VEXT_VV_ENV(vfsgnj_vv_h) 4019 GEN_VEXT_VV_ENV(vfsgnj_vv_w) 4020 GEN_VEXT_VV_ENV(vfsgnj_vv_d) 4021 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4022 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4023 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4024 GEN_VEXT_VF(vfsgnj_vf_h) 4025 GEN_VEXT_VF(vfsgnj_vf_w) 4026 GEN_VEXT_VF(vfsgnj_vf_d) 4027 4028 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4029 { 4030 return deposit64(~b, 0, 15, a); 4031 } 4032 4033 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4034 { 4035 return deposit64(~b, 0, 31, a); 4036 } 4037 4038 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4039 { 4040 return deposit64(~b, 0, 63, a); 4041 } 4042 4043 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4044 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4045 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4046 GEN_VEXT_VV_ENV(vfsgnjn_vv_h) 4047 GEN_VEXT_VV_ENV(vfsgnjn_vv_w) 4048 GEN_VEXT_VV_ENV(vfsgnjn_vv_d) 4049 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4050 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4051 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4052 GEN_VEXT_VF(vfsgnjn_vf_h) 4053 GEN_VEXT_VF(vfsgnjn_vf_w) 4054 GEN_VEXT_VF(vfsgnjn_vf_d) 4055 4056 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4057 { 4058 return deposit64(b ^ a, 0, 15, a); 4059 } 4060 4061 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4062 { 4063 return deposit64(b ^ a, 0, 31, a); 4064 } 4065 4066 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4067 { 4068 return deposit64(b ^ a, 0, 63, a); 4069 } 4070 4071 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4072 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4073 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4074 GEN_VEXT_VV_ENV(vfsgnjx_vv_h) 4075 GEN_VEXT_VV_ENV(vfsgnjx_vv_w) 4076 GEN_VEXT_VV_ENV(vfsgnjx_vv_d) 4077 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4078 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4079 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4080 GEN_VEXT_VF(vfsgnjx_vf_h) 4081 GEN_VEXT_VF(vfsgnjx_vf_w) 4082 GEN_VEXT_VF(vfsgnjx_vf_d) 4083 4084 /* Vector Floating-Point Compare Instructions */ 4085 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4086 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4087 CPURISCVState *env, uint32_t desc) \ 4088 { \ 4089 uint32_t vm = vext_vm(desc); \ 4090 uint32_t vl = env->vl; \ 4091 uint32_t i; \ 4092 \ 4093 for (i = env->vstart; i < vl; i++) { \ 4094 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4095 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4096 if (!vm && !vext_elem_mask(v0, i)) { \ 4097 continue; \ 4098 } \ 4099 vext_set_elem_mask(vd, i, \ 4100 DO_OP(s2, s1, &env->fp_status)); \ 4101 } \ 4102 env->vstart = 0; \ 4103 } 4104 4105 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4106 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4107 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4108 4109 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4110 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4111 CPURISCVState *env, uint32_t desc) \ 4112 { \ 4113 uint32_t vm = vext_vm(desc); \ 4114 uint32_t vl = env->vl; \ 4115 uint32_t i; \ 4116 \ 4117 for (i = env->vstart; i < vl; i++) { \ 4118 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4119 if (!vm && !vext_elem_mask(v0, i)) { \ 4120 continue; \ 4121 } \ 4122 vext_set_elem_mask(vd, i, \ 4123 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4124 } \ 4125 env->vstart = 0; \ 4126 } 4127 4128 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4129 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4130 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4131 4132 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4133 { 4134 FloatRelation compare = float16_compare_quiet(a, b, s); 4135 return compare != float_relation_equal; 4136 } 4137 4138 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4139 { 4140 FloatRelation compare = float32_compare_quiet(a, b, s); 4141 return compare != float_relation_equal; 4142 } 4143 4144 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4145 { 4146 FloatRelation compare = float64_compare_quiet(a, b, s); 4147 return compare != float_relation_equal; 4148 } 4149 4150 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4151 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4152 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4153 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4154 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4155 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4156 4157 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4158 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4159 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4160 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4161 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4162 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4163 4164 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4165 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4166 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4167 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4168 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4169 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4170 4171 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4172 { 4173 FloatRelation compare = float16_compare(a, b, s); 4174 return compare == float_relation_greater; 4175 } 4176 4177 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4178 { 4179 FloatRelation compare = float32_compare(a, b, s); 4180 return compare == float_relation_greater; 4181 } 4182 4183 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4184 { 4185 FloatRelation compare = float64_compare(a, b, s); 4186 return compare == float_relation_greater; 4187 } 4188 4189 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4190 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4191 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4192 4193 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4194 { 4195 FloatRelation compare = float16_compare(a, b, s); 4196 return compare == float_relation_greater || 4197 compare == float_relation_equal; 4198 } 4199 4200 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4201 { 4202 FloatRelation compare = float32_compare(a, b, s); 4203 return compare == float_relation_greater || 4204 compare == float_relation_equal; 4205 } 4206 4207 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4208 { 4209 FloatRelation compare = float64_compare(a, b, s); 4210 return compare == float_relation_greater || 4211 compare == float_relation_equal; 4212 } 4213 4214 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4215 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4216 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4217 4218 /* Vector Floating-Point Classify Instruction */ 4219 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4220 static void do_##NAME(void *vd, void *vs2, int i) \ 4221 { \ 4222 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4223 *((TD *)vd + HD(i)) = OP(s2); \ 4224 } 4225 4226 #define GEN_VEXT_V(NAME) \ 4227 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4228 CPURISCVState *env, uint32_t desc) \ 4229 { \ 4230 uint32_t vm = vext_vm(desc); \ 4231 uint32_t vl = env->vl; \ 4232 uint32_t i; \ 4233 \ 4234 for (i = env->vstart; i < vl; i++) { \ 4235 if (!vm && !vext_elem_mask(v0, i)) { \ 4236 continue; \ 4237 } \ 4238 do_##NAME(vd, vs2, i); \ 4239 } \ 4240 env->vstart = 0; \ 4241 } 4242 4243 target_ulong fclass_h(uint64_t frs1) 4244 { 4245 float16 f = frs1; 4246 bool sign = float16_is_neg(f); 4247 4248 if (float16_is_infinity(f)) { 4249 return sign ? 1 << 0 : 1 << 7; 4250 } else if (float16_is_zero(f)) { 4251 return sign ? 1 << 3 : 1 << 4; 4252 } else if (float16_is_zero_or_denormal(f)) { 4253 return sign ? 1 << 2 : 1 << 5; 4254 } else if (float16_is_any_nan(f)) { 4255 float_status s = { }; /* for snan_bit_is_one */ 4256 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4257 } else { 4258 return sign ? 1 << 1 : 1 << 6; 4259 } 4260 } 4261 4262 target_ulong fclass_s(uint64_t frs1) 4263 { 4264 float32 f = frs1; 4265 bool sign = float32_is_neg(f); 4266 4267 if (float32_is_infinity(f)) { 4268 return sign ? 1 << 0 : 1 << 7; 4269 } else if (float32_is_zero(f)) { 4270 return sign ? 1 << 3 : 1 << 4; 4271 } else if (float32_is_zero_or_denormal(f)) { 4272 return sign ? 1 << 2 : 1 << 5; 4273 } else if (float32_is_any_nan(f)) { 4274 float_status s = { }; /* for snan_bit_is_one */ 4275 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4276 } else { 4277 return sign ? 1 << 1 : 1 << 6; 4278 } 4279 } 4280 4281 target_ulong fclass_d(uint64_t frs1) 4282 { 4283 float64 f = frs1; 4284 bool sign = float64_is_neg(f); 4285 4286 if (float64_is_infinity(f)) { 4287 return sign ? 1 << 0 : 1 << 7; 4288 } else if (float64_is_zero(f)) { 4289 return sign ? 1 << 3 : 1 << 4; 4290 } else if (float64_is_zero_or_denormal(f)) { 4291 return sign ? 1 << 2 : 1 << 5; 4292 } else if (float64_is_any_nan(f)) { 4293 float_status s = { }; /* for snan_bit_is_one */ 4294 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4295 } else { 4296 return sign ? 1 << 1 : 1 << 6; 4297 } 4298 } 4299 4300 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4301 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4302 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4303 GEN_VEXT_V(vfclass_v_h) 4304 GEN_VEXT_V(vfclass_v_w) 4305 GEN_VEXT_V(vfclass_v_d) 4306 4307 /* Vector Floating-Point Merge Instruction */ 4308 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4309 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4310 CPURISCVState *env, uint32_t desc) \ 4311 { \ 4312 uint32_t vm = vext_vm(desc); \ 4313 uint32_t vl = env->vl; \ 4314 uint32_t i; \ 4315 \ 4316 for (i = env->vstart; i < vl; i++) { \ 4317 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4318 *((ETYPE *)vd + H(i)) \ 4319 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4320 } \ 4321 env->vstart = 0; \ 4322 } 4323 4324 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4325 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4326 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4327 4328 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4329 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4330 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4331 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4332 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4333 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) 4334 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) 4335 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) 4336 4337 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4338 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4339 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4340 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4341 GEN_VEXT_V_ENV(vfcvt_x_f_v_h) 4342 GEN_VEXT_V_ENV(vfcvt_x_f_v_w) 4343 GEN_VEXT_V_ENV(vfcvt_x_f_v_d) 4344 4345 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4346 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4347 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4348 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4349 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) 4350 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) 4351 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) 4352 4353 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4354 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4355 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4356 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4357 GEN_VEXT_V_ENV(vfcvt_f_x_v_h) 4358 GEN_VEXT_V_ENV(vfcvt_f_x_v_w) 4359 GEN_VEXT_V_ENV(vfcvt_f_x_v_d) 4360 4361 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4362 /* (TD, T2, TX2) */ 4363 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4364 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4365 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4366 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4367 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4368 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4369 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) 4370 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) 4371 4372 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4373 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4374 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4375 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) 4376 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) 4377 4378 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4379 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4380 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4381 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4382 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) 4383 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) 4384 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) 4385 4386 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4387 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4388 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4389 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4390 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) 4391 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) 4392 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) 4393 4394 /* 4395 * vfwcvt.f.f.v vd, vs2, vm 4396 * Convert single-width float to double-width float. 4397 */ 4398 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4399 { 4400 return float16_to_float32(a, true, s); 4401 } 4402 4403 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4404 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4405 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) 4406 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) 4407 4408 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4409 /* (TD, T2, TX2) */ 4410 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4411 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4412 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4413 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4414 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4415 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4416 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4417 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) 4418 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) 4419 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) 4420 4421 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4422 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4423 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4424 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4425 GEN_VEXT_V_ENV(vfncvt_x_f_w_b) 4426 GEN_VEXT_V_ENV(vfncvt_x_f_w_h) 4427 GEN_VEXT_V_ENV(vfncvt_x_f_w_w) 4428 4429 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4430 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4431 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4432 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) 4433 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) 4434 4435 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4436 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4437 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4438 GEN_VEXT_V_ENV(vfncvt_f_x_w_h) 4439 GEN_VEXT_V_ENV(vfncvt_f_x_w_w) 4440 4441 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4442 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4443 { 4444 return float32_to_float16(a, true, s); 4445 } 4446 4447 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4448 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4449 GEN_VEXT_V_ENV(vfncvt_f_f_w_h) 4450 GEN_VEXT_V_ENV(vfncvt_f_f_w_w) 4451 4452 /* 4453 *** Vector Reduction Operations 4454 */ 4455 /* Vector Single-Width Integer Reduction Instructions */ 4456 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4457 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4458 void *vs2, CPURISCVState *env, uint32_t desc) \ 4459 { \ 4460 uint32_t vm = vext_vm(desc); \ 4461 uint32_t vl = env->vl; \ 4462 uint32_t i; \ 4463 TD s1 = *((TD *)vs1 + HD(0)); \ 4464 \ 4465 for (i = env->vstart; i < vl; i++) { \ 4466 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4467 if (!vm && !vext_elem_mask(v0, i)) { \ 4468 continue; \ 4469 } \ 4470 s1 = OP(s1, (TD)s2); \ 4471 } \ 4472 *((TD *)vd + HD(0)) = s1; \ 4473 env->vstart = 0; \ 4474 } 4475 4476 /* vd[0] = sum(vs1[0], vs2[*]) */ 4477 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4478 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4479 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4480 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4481 4482 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4483 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4484 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4485 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4486 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4487 4488 /* vd[0] = max(vs1[0], vs2[*]) */ 4489 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4490 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4491 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4492 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4493 4494 /* vd[0] = minu(vs1[0], vs2[*]) */ 4495 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4496 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4497 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4498 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4499 4500 /* vd[0] = min(vs1[0], vs2[*]) */ 4501 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4502 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4503 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4504 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4505 4506 /* vd[0] = and(vs1[0], vs2[*]) */ 4507 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4508 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4509 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4510 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4511 4512 /* vd[0] = or(vs1[0], vs2[*]) */ 4513 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4514 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4515 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4516 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4517 4518 /* vd[0] = xor(vs1[0], vs2[*]) */ 4519 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4520 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4521 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4522 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4523 4524 /* Vector Widening Integer Reduction Instructions */ 4525 /* signed sum reduction into double-width accumulator */ 4526 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4527 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4528 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4529 4530 /* Unsigned sum reduction into double-width accumulator */ 4531 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4532 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4533 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4534 4535 /* Vector Single-Width Floating-Point Reduction Instructions */ 4536 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4537 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4538 void *vs2, CPURISCVState *env, \ 4539 uint32_t desc) \ 4540 { \ 4541 uint32_t vm = vext_vm(desc); \ 4542 uint32_t vl = env->vl; \ 4543 uint32_t i; \ 4544 TD s1 = *((TD *)vs1 + HD(0)); \ 4545 \ 4546 for (i = env->vstart; i < vl; i++) { \ 4547 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4548 if (!vm && !vext_elem_mask(v0, i)) { \ 4549 continue; \ 4550 } \ 4551 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4552 } \ 4553 *((TD *)vd + HD(0)) = s1; \ 4554 env->vstart = 0; \ 4555 } 4556 4557 /* Unordered sum */ 4558 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4559 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4560 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4561 4562 /* Maximum value */ 4563 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4564 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4565 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4566 4567 /* Minimum value */ 4568 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4569 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4570 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4571 4572 /* Vector Widening Floating-Point Reduction Instructions */ 4573 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4574 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4575 void *vs2, CPURISCVState *env, uint32_t desc) 4576 { 4577 uint32_t vm = vext_vm(desc); 4578 uint32_t vl = env->vl; 4579 uint32_t i; 4580 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4581 4582 for (i = env->vstart; i < vl; i++) { 4583 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4584 if (!vm && !vext_elem_mask(v0, i)) { 4585 continue; 4586 } 4587 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4588 &env->fp_status); 4589 } 4590 *((uint32_t *)vd + H4(0)) = s1; 4591 env->vstart = 0; 4592 } 4593 4594 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4595 void *vs2, CPURISCVState *env, uint32_t desc) 4596 { 4597 uint32_t vm = vext_vm(desc); 4598 uint32_t vl = env->vl; 4599 uint32_t i; 4600 uint64_t s1 = *((uint64_t *)vs1); 4601 4602 for (i = env->vstart; i < vl; i++) { 4603 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4604 if (!vm && !vext_elem_mask(v0, i)) { 4605 continue; 4606 } 4607 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4608 &env->fp_status); 4609 } 4610 *((uint64_t *)vd) = s1; 4611 env->vstart = 0; 4612 } 4613 4614 /* 4615 *** Vector Mask Operations 4616 */ 4617 /* Vector Mask-Register Logical Instructions */ 4618 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4619 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4620 void *vs2, CPURISCVState *env, \ 4621 uint32_t desc) \ 4622 { \ 4623 uint32_t vl = env->vl; \ 4624 uint32_t i; \ 4625 int a, b; \ 4626 \ 4627 for (i = env->vstart; i < vl; i++) { \ 4628 a = vext_elem_mask(vs1, i); \ 4629 b = vext_elem_mask(vs2, i); \ 4630 vext_set_elem_mask(vd, i, OP(b, a)); \ 4631 } \ 4632 env->vstart = 0; \ 4633 } 4634 4635 #define DO_NAND(N, M) (!(N & M)) 4636 #define DO_ANDNOT(N, M) (N & !M) 4637 #define DO_NOR(N, M) (!(N | M)) 4638 #define DO_ORNOT(N, M) (N | !M) 4639 #define DO_XNOR(N, M) (!(N ^ M)) 4640 4641 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4642 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4643 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4644 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4645 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4646 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4647 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4648 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4649 4650 /* Vector count population in mask vcpop */ 4651 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4652 uint32_t desc) 4653 { 4654 target_ulong cnt = 0; 4655 uint32_t vm = vext_vm(desc); 4656 uint32_t vl = env->vl; 4657 int i; 4658 4659 for (i = env->vstart; i < vl; i++) { 4660 if (vm || vext_elem_mask(v0, i)) { 4661 if (vext_elem_mask(vs2, i)) { 4662 cnt++; 4663 } 4664 } 4665 } 4666 env->vstart = 0; 4667 return cnt; 4668 } 4669 4670 /* vfirst find-first-set mask bit*/ 4671 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4672 uint32_t desc) 4673 { 4674 uint32_t vm = vext_vm(desc); 4675 uint32_t vl = env->vl; 4676 int i; 4677 4678 for (i = env->vstart; i < vl; i++) { 4679 if (vm || vext_elem_mask(v0, i)) { 4680 if (vext_elem_mask(vs2, i)) { 4681 return i; 4682 } 4683 } 4684 } 4685 env->vstart = 0; 4686 return -1LL; 4687 } 4688 4689 enum set_mask_type { 4690 ONLY_FIRST = 1, 4691 INCLUDE_FIRST, 4692 BEFORE_FIRST, 4693 }; 4694 4695 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4696 uint32_t desc, enum set_mask_type type) 4697 { 4698 uint32_t vm = vext_vm(desc); 4699 uint32_t vl = env->vl; 4700 int i; 4701 bool first_mask_bit = false; 4702 4703 for (i = env->vstart; i < vl; i++) { 4704 if (!vm && !vext_elem_mask(v0, i)) { 4705 continue; 4706 } 4707 /* write a zero to all following active elements */ 4708 if (first_mask_bit) { 4709 vext_set_elem_mask(vd, i, 0); 4710 continue; 4711 } 4712 if (vext_elem_mask(vs2, i)) { 4713 first_mask_bit = true; 4714 if (type == BEFORE_FIRST) { 4715 vext_set_elem_mask(vd, i, 0); 4716 } else { 4717 vext_set_elem_mask(vd, i, 1); 4718 } 4719 } else { 4720 if (type == ONLY_FIRST) { 4721 vext_set_elem_mask(vd, i, 0); 4722 } else { 4723 vext_set_elem_mask(vd, i, 1); 4724 } 4725 } 4726 } 4727 env->vstart = 0; 4728 } 4729 4730 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4731 uint32_t desc) 4732 { 4733 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4734 } 4735 4736 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4737 uint32_t desc) 4738 { 4739 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4740 } 4741 4742 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4743 uint32_t desc) 4744 { 4745 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4746 } 4747 4748 /* Vector Iota Instruction */ 4749 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4750 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4751 uint32_t desc) \ 4752 { \ 4753 uint32_t vm = vext_vm(desc); \ 4754 uint32_t vl = env->vl; \ 4755 uint32_t sum = 0; \ 4756 int i; \ 4757 \ 4758 for (i = env->vstart; i < vl; i++) { \ 4759 if (!vm && !vext_elem_mask(v0, i)) { \ 4760 continue; \ 4761 } \ 4762 *((ETYPE *)vd + H(i)) = sum; \ 4763 if (vext_elem_mask(vs2, i)) { \ 4764 sum++; \ 4765 } \ 4766 } \ 4767 env->vstart = 0; \ 4768 } 4769 4770 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4771 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4772 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4773 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4774 4775 /* Vector Element Index Instruction */ 4776 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4777 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4778 { \ 4779 uint32_t vm = vext_vm(desc); \ 4780 uint32_t vl = env->vl; \ 4781 int i; \ 4782 \ 4783 for (i = env->vstart; i < vl; i++) { \ 4784 if (!vm && !vext_elem_mask(v0, i)) { \ 4785 continue; \ 4786 } \ 4787 *((ETYPE *)vd + H(i)) = i; \ 4788 } \ 4789 env->vstart = 0; \ 4790 } 4791 4792 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4793 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4794 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4795 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4796 4797 /* 4798 *** Vector Permutation Instructions 4799 */ 4800 4801 /* Vector Slide Instructions */ 4802 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4803 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4804 CPURISCVState *env, uint32_t desc) \ 4805 { \ 4806 uint32_t vm = vext_vm(desc); \ 4807 uint32_t vl = env->vl; \ 4808 target_ulong offset = s1, i_min, i; \ 4809 \ 4810 i_min = MAX(env->vstart, offset); \ 4811 for (i = i_min; i < vl; i++) { \ 4812 if (!vm && !vext_elem_mask(v0, i)) { \ 4813 continue; \ 4814 } \ 4815 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4816 } \ 4817 } 4818 4819 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4820 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4821 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4822 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4823 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4824 4825 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4826 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4827 CPURISCVState *env, uint32_t desc) \ 4828 { \ 4829 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4830 uint32_t vm = vext_vm(desc); \ 4831 uint32_t vl = env->vl; \ 4832 target_ulong i_max, i; \ 4833 \ 4834 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4835 for (i = env->vstart; i < i_max; ++i) { \ 4836 if (vm || vext_elem_mask(v0, i)) { \ 4837 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4838 } \ 4839 } \ 4840 \ 4841 for (i = i_max; i < vl; ++i) { \ 4842 if (vm || vext_elem_mask(v0, i)) { \ 4843 *((ETYPE *)vd + H(i)) = 0; \ 4844 } \ 4845 } \ 4846 \ 4847 env->vstart = 0; \ 4848 } 4849 4850 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4851 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4852 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4853 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4854 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4855 4856 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4857 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4858 void *vs2, CPURISCVState *env, uint32_t desc) \ 4859 { \ 4860 typedef uint##BITWIDTH##_t ETYPE; \ 4861 uint32_t vm = vext_vm(desc); \ 4862 uint32_t vl = env->vl; \ 4863 uint32_t i; \ 4864 \ 4865 for (i = env->vstart; i < vl; i++) { \ 4866 if (!vm && !vext_elem_mask(v0, i)) { \ 4867 continue; \ 4868 } \ 4869 if (i == 0) { \ 4870 *((ETYPE *)vd + H(i)) = s1; \ 4871 } else { \ 4872 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4873 } \ 4874 } \ 4875 env->vstart = 0; \ 4876 } 4877 4878 GEN_VEXT_VSLIE1UP(8, H1) 4879 GEN_VEXT_VSLIE1UP(16, H2) 4880 GEN_VEXT_VSLIE1UP(32, H4) 4881 GEN_VEXT_VSLIE1UP(64, H8) 4882 4883 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4884 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4885 CPURISCVState *env, uint32_t desc) \ 4886 { \ 4887 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4888 } 4889 4890 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4891 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4892 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4893 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4894 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4895 4896 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4897 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4898 void *vs2, CPURISCVState *env, uint32_t desc) \ 4899 { \ 4900 typedef uint##BITWIDTH##_t ETYPE; \ 4901 uint32_t vm = vext_vm(desc); \ 4902 uint32_t vl = env->vl; \ 4903 uint32_t i; \ 4904 \ 4905 for (i = env->vstart; i < vl; i++) { \ 4906 if (!vm && !vext_elem_mask(v0, i)) { \ 4907 continue; \ 4908 } \ 4909 if (i == vl - 1) { \ 4910 *((ETYPE *)vd + H(i)) = s1; \ 4911 } else { \ 4912 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4913 } \ 4914 } \ 4915 env->vstart = 0; \ 4916 } 4917 4918 GEN_VEXT_VSLIDE1DOWN(8, H1) 4919 GEN_VEXT_VSLIDE1DOWN(16, H2) 4920 GEN_VEXT_VSLIDE1DOWN(32, H4) 4921 GEN_VEXT_VSLIDE1DOWN(64, H8) 4922 4923 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4924 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4925 CPURISCVState *env, uint32_t desc) \ 4926 { \ 4927 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4928 } 4929 4930 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4931 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4932 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4933 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4934 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4935 4936 /* Vector Floating-Point Slide Instructions */ 4937 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4938 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4939 CPURISCVState *env, uint32_t desc) \ 4940 { \ 4941 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4942 } 4943 4944 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4945 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4946 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4947 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4948 4949 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4950 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4951 CPURISCVState *env, uint32_t desc) \ 4952 { \ 4953 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4954 } 4955 4956 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4957 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4958 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4959 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4960 4961 /* Vector Register Gather Instruction */ 4962 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4963 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4964 CPURISCVState *env, uint32_t desc) \ 4965 { \ 4966 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4967 uint32_t vm = vext_vm(desc); \ 4968 uint32_t vl = env->vl; \ 4969 uint64_t index; \ 4970 uint32_t i; \ 4971 \ 4972 for (i = env->vstart; i < vl; i++) { \ 4973 if (!vm && !vext_elem_mask(v0, i)) { \ 4974 continue; \ 4975 } \ 4976 index = *((TS1 *)vs1 + HS1(i)); \ 4977 if (index >= vlmax) { \ 4978 *((TS2 *)vd + HS2(i)) = 0; \ 4979 } else { \ 4980 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4981 } \ 4982 } \ 4983 env->vstart = 0; \ 4984 } 4985 4986 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4987 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4988 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4989 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4990 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4991 4992 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4993 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4994 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4995 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4996 4997 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4998 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4999 CPURISCVState *env, uint32_t desc) \ 5000 { \ 5001 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5002 uint32_t vm = vext_vm(desc); \ 5003 uint32_t vl = env->vl; \ 5004 uint64_t index = s1; \ 5005 uint32_t i; \ 5006 \ 5007 for (i = env->vstart; i < vl; i++) { \ 5008 if (!vm && !vext_elem_mask(v0, i)) { \ 5009 continue; \ 5010 } \ 5011 if (index >= vlmax) { \ 5012 *((ETYPE *)vd + H(i)) = 0; \ 5013 } else { \ 5014 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5015 } \ 5016 } \ 5017 env->vstart = 0; \ 5018 } 5019 5020 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5021 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5022 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5023 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5024 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5025 5026 /* Vector Compress Instruction */ 5027 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5028 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5029 CPURISCVState *env, uint32_t desc) \ 5030 { \ 5031 uint32_t vl = env->vl; \ 5032 uint32_t num = 0, i; \ 5033 \ 5034 for (i = env->vstart; i < vl; i++) { \ 5035 if (!vext_elem_mask(vs1, i)) { \ 5036 continue; \ 5037 } \ 5038 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5039 num++; \ 5040 } \ 5041 env->vstart = 0; \ 5042 } 5043 5044 /* Compress into vd elements of vs2 where vs1 is enabled */ 5045 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5046 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5047 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5048 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5049 5050 /* Vector Whole Register Move */ 5051 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5052 { 5053 /* EEW = SEW */ 5054 uint32_t maxsz = simd_maxsz(desc); 5055 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5056 uint32_t startb = env->vstart * sewb; 5057 uint32_t i = startb; 5058 5059 memcpy((uint8_t *)vd + H1(i), 5060 (uint8_t *)vs2 + H1(i), 5061 maxsz - startb); 5062 5063 env->vstart = 0; 5064 } 5065 5066 /* Vector Integer Extension */ 5067 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5068 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5069 CPURISCVState *env, uint32_t desc) \ 5070 { \ 5071 uint32_t vl = env->vl; \ 5072 uint32_t vm = vext_vm(desc); \ 5073 uint32_t i; \ 5074 \ 5075 for (i = env->vstart; i < vl; i++) { \ 5076 if (!vm && !vext_elem_mask(v0, i)) { \ 5077 continue; \ 5078 } \ 5079 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5080 } \ 5081 env->vstart = 0; \ 5082 } 5083 5084 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5085 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5086 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5087 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5088 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5089 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5090 5091 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5092 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5093 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5094 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5095 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5096 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5097