1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 static inline uint32_t vext_vta_all_1s(uint32_t desc) 131 { 132 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 133 } 134 135 /* 136 * Get the maximum number of elements can be operated. 137 * 138 * log2_esz: log2 of element size in bytes. 139 */ 140 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 141 { 142 /* 143 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 144 * so vlen in bytes (vlenb) is encoded as maxsz. 145 */ 146 uint32_t vlenb = simd_maxsz(desc); 147 148 /* Return VLMAX */ 149 int scale = vext_lmul(desc) - log2_esz; 150 return scale < 0 ? vlenb >> -scale : vlenb << scale; 151 } 152 153 /* 154 * Get number of total elements, including prestart, body and tail elements. 155 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 156 * are held in the same vector register. 157 */ 158 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 159 uint32_t esz) 160 { 161 uint32_t vlenb = simd_maxsz(desc); 162 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 163 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 164 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 165 return (vlenb << emul) / esz; 166 } 167 168 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 169 { 170 return (addr & env->cur_pmmask) | env->cur_pmbase; 171 } 172 173 /* 174 * This function checks watchpoint before real load operation. 175 * 176 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 177 * In user mode, there is no watchpoint support now. 178 * 179 * It will trigger an exception if there is no mapping in TLB 180 * and page table walk can't fill the TLB entry. Then the guest 181 * software can return here after process the exception or never return. 182 */ 183 static void probe_pages(CPURISCVState *env, target_ulong addr, 184 target_ulong len, uintptr_t ra, 185 MMUAccessType access_type) 186 { 187 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 188 target_ulong curlen = MIN(pagelen, len); 189 190 probe_access(env, adjust_addr(env, addr), curlen, access_type, 191 cpu_mmu_index(env, false), ra); 192 if (len > curlen) { 193 addr += curlen; 194 curlen = len - curlen; 195 probe_access(env, adjust_addr(env, addr), curlen, access_type, 196 cpu_mmu_index(env, false), ra); 197 } 198 } 199 200 /* set agnostic elements to 1s */ 201 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 202 uint32_t tot) 203 { 204 if (is_agnostic == 0) { 205 /* policy undisturbed */ 206 return; 207 } 208 if (tot - cnt == 0) { 209 return ; 210 } 211 memset(base + cnt, -1, tot - cnt); 212 } 213 214 static inline void vext_set_elem_mask(void *v0, int index, 215 uint8_t value) 216 { 217 int idx = index / 64; 218 int pos = index % 64; 219 uint64_t old = ((uint64_t *)v0)[idx]; 220 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 221 } 222 223 /* 224 * Earlier designs (pre-0.9) had a varying number of bits 225 * per mask value (MLEN). In the 0.9 design, MLEN=1. 226 * (Section 4.5) 227 */ 228 static inline int vext_elem_mask(void *v0, int index) 229 { 230 int idx = index / 64; 231 int pos = index % 64; 232 return (((uint64_t *)v0)[idx] >> pos) & 1; 233 } 234 235 /* elements operations for load and store */ 236 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 237 uint32_t idx, void *vd, uintptr_t retaddr); 238 239 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 240 static void NAME(CPURISCVState *env, abi_ptr addr, \ 241 uint32_t idx, void *vd, uintptr_t retaddr)\ 242 { \ 243 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 244 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 245 } \ 246 247 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 248 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 249 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 250 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 251 252 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 253 static void NAME(CPURISCVState *env, abi_ptr addr, \ 254 uint32_t idx, void *vd, uintptr_t retaddr)\ 255 { \ 256 ETYPE data = *((ETYPE *)vd + H(idx)); \ 257 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 258 } 259 260 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 261 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 262 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 263 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 264 265 /* 266 *** stride: access vector element from strided memory 267 */ 268 static void 269 vext_ldst_stride(void *vd, void *v0, target_ulong base, 270 target_ulong stride, CPURISCVState *env, 271 uint32_t desc, uint32_t vm, 272 vext_ldst_elem_fn *ldst_elem, 273 uint32_t log2_esz, uintptr_t ra) 274 { 275 uint32_t i, k; 276 uint32_t nf = vext_nf(desc); 277 uint32_t max_elems = vext_max_elems(desc, log2_esz); 278 uint32_t esz = 1 << log2_esz; 279 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 280 uint32_t vta = vext_vta(desc); 281 282 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 283 if (!vm && !vext_elem_mask(v0, i)) { 284 continue; 285 } 286 287 k = 0; 288 while (k < nf) { 289 target_ulong addr = base + stride * i + (k << log2_esz); 290 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 291 k++; 292 } 293 } 294 env->vstart = 0; 295 /* set tail elements to 1s */ 296 for (k = 0; k < nf; ++k) { 297 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 298 (k * max_elems + max_elems) * esz); 299 } 300 if (nf * max_elems % total_elems != 0) { 301 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 302 uint32_t registers_used = 303 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 304 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 305 registers_used * vlenb); 306 } 307 } 308 309 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 310 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 311 target_ulong stride, CPURISCVState *env, \ 312 uint32_t desc) \ 313 { \ 314 uint32_t vm = vext_vm(desc); \ 315 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 316 ctzl(sizeof(ETYPE)), GETPC()); \ 317 } 318 319 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 320 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 321 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 322 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 323 324 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 326 target_ulong stride, CPURISCVState *env, \ 327 uint32_t desc) \ 328 { \ 329 uint32_t vm = vext_vm(desc); \ 330 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 331 ctzl(sizeof(ETYPE)), GETPC()); \ 332 } 333 334 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 335 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 336 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 337 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 338 339 /* 340 *** unit-stride: access elements stored contiguously in memory 341 */ 342 343 /* unmasked unit-stride load and store operation*/ 344 static void 345 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 346 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 347 uintptr_t ra) 348 { 349 uint32_t i, k; 350 uint32_t nf = vext_nf(desc); 351 uint32_t max_elems = vext_max_elems(desc, log2_esz); 352 uint32_t esz = 1 << log2_esz; 353 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 354 uint32_t vta = vext_vta(desc); 355 356 /* load bytes from guest memory */ 357 for (i = env->vstart; i < evl; i++, env->vstart++) { 358 k = 0; 359 while (k < nf) { 360 target_ulong addr = base + ((i * nf + k) << log2_esz); 361 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 362 k++; 363 } 364 } 365 env->vstart = 0; 366 /* set tail elements to 1s */ 367 for (k = 0; k < nf; ++k) { 368 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, 369 (k * max_elems + max_elems) * esz); 370 } 371 if (nf * max_elems % total_elems != 0) { 372 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 373 uint32_t registers_used = 374 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 375 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 376 registers_used * vlenb); 377 } 378 } 379 380 /* 381 * masked unit-stride load and store operation will be a special case of stride, 382 * stride = NF * sizeof (MTYPE) 383 */ 384 385 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 386 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 387 CPURISCVState *env, uint32_t desc) \ 388 { \ 389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 391 ctzl(sizeof(ETYPE)), GETPC()); \ 392 } \ 393 \ 394 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 395 CPURISCVState *env, uint32_t desc) \ 396 { \ 397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 399 } 400 401 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 402 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 403 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 404 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 405 406 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 407 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 408 CPURISCVState *env, uint32_t desc) \ 409 { \ 410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 412 ctzl(sizeof(ETYPE)), GETPC()); \ 413 } \ 414 \ 415 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 416 CPURISCVState *env, uint32_t desc) \ 417 { \ 418 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 420 } 421 422 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 423 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 424 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 425 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 426 427 /* 428 *** unit stride mask load and store, EEW = 1 429 */ 430 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 431 CPURISCVState *env, uint32_t desc) 432 { 433 /* evl = ceil(vl/8) */ 434 uint8_t evl = (env->vl + 7) >> 3; 435 vext_ldst_us(vd, base, env, desc, lde_b, 436 0, evl, GETPC()); 437 } 438 439 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, ste_b, 445 0, evl, GETPC()); 446 } 447 448 /* 449 *** index: access vector element from indexed memory 450 */ 451 typedef target_ulong vext_get_index_addr(target_ulong base, 452 uint32_t idx, void *vs2); 453 454 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 455 static target_ulong NAME(target_ulong base, \ 456 uint32_t idx, void *vs2) \ 457 { \ 458 return (base + *((ETYPE *)vs2 + H(idx))); \ 459 } 460 461 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 462 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 463 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 464 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 465 466 static inline void 467 vext_ldst_index(void *vd, void *v0, target_ulong base, 468 void *vs2, CPURISCVState *env, uint32_t desc, 469 vext_get_index_addr get_index_addr, 470 vext_ldst_elem_fn *ldst_elem, 471 uint32_t log2_esz, uintptr_t ra) 472 { 473 uint32_t i, k; 474 uint32_t nf = vext_nf(desc); 475 uint32_t vm = vext_vm(desc); 476 uint32_t max_elems = vext_max_elems(desc, log2_esz); 477 uint32_t esz = 1 << log2_esz; 478 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 479 uint32_t vta = vext_vta(desc); 480 481 /* load bytes from guest memory */ 482 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 483 if (!vm && !vext_elem_mask(v0, i)) { 484 continue; 485 } 486 487 k = 0; 488 while (k < nf) { 489 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 490 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 491 k++; 492 } 493 } 494 env->vstart = 0; 495 /* set tail elements to 1s */ 496 for (k = 0; k < nf; ++k) { 497 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 498 (k * max_elems + max_elems) * esz); 499 } 500 if (nf * max_elems % total_elems != 0) { 501 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 502 uint32_t registers_used = 503 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 504 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 505 registers_used * vlenb); 506 } 507 } 508 509 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 510 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 511 void *vs2, CPURISCVState *env, uint32_t desc) \ 512 { \ 513 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 514 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 515 } 516 517 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 518 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 519 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 520 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 521 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 522 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 523 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 524 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 525 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 526 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 527 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 528 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 529 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 530 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 531 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 532 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 533 534 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 535 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 536 void *vs2, CPURISCVState *env, uint32_t desc) \ 537 { \ 538 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 539 STORE_FN, ctzl(sizeof(ETYPE)), \ 540 GETPC()); \ 541 } 542 543 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 544 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 545 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 546 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 547 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 548 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 549 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 550 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 551 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 552 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 553 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 554 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 555 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 556 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 557 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 558 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 559 560 /* 561 *** unit-stride fault-only-fisrt load instructions 562 */ 563 static inline void 564 vext_ldff(void *vd, void *v0, target_ulong base, 565 CPURISCVState *env, uint32_t desc, 566 vext_ldst_elem_fn *ldst_elem, 567 uint32_t log2_esz, uintptr_t ra) 568 { 569 void *host; 570 uint32_t i, k, vl = 0; 571 uint32_t nf = vext_nf(desc); 572 uint32_t vm = vext_vm(desc); 573 uint32_t max_elems = vext_max_elems(desc, log2_esz); 574 uint32_t esz = 1 << log2_esz; 575 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 576 uint32_t vta = vext_vta(desc); 577 target_ulong addr, offset, remain; 578 579 /* probe every access*/ 580 for (i = env->vstart; i < env->vl; i++) { 581 if (!vm && !vext_elem_mask(v0, i)) { 582 continue; 583 } 584 addr = adjust_addr(env, base + i * (nf << log2_esz)); 585 if (i == 0) { 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 587 } else { 588 /* if it triggers an exception, no need to check watchpoint */ 589 remain = nf << log2_esz; 590 while (remain > 0) { 591 offset = -(addr | TARGET_PAGE_MASK); 592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 593 cpu_mmu_index(env, false)); 594 if (host) { 595 #ifdef CONFIG_USER_ONLY 596 if (page_check_range(addr, offset, PAGE_READ) < 0) { 597 vl = i; 598 goto ProbeSuccess; 599 } 600 #else 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 602 #endif 603 } else { 604 vl = i; 605 goto ProbeSuccess; 606 } 607 if (remain <= offset) { 608 break; 609 } 610 remain -= offset; 611 addr = adjust_addr(env, addr + offset); 612 } 613 } 614 } 615 ProbeSuccess: 616 /* load bytes from guest memory */ 617 if (vl != 0) { 618 env->vl = vl; 619 } 620 for (i = env->vstart; i < env->vl; i++) { 621 k = 0; 622 if (!vm && !vext_elem_mask(v0, i)) { 623 continue; 624 } 625 while (k < nf) { 626 target_ulong addr = base + ((i * nf + k) << log2_esz); 627 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 628 k++; 629 } 630 } 631 env->vstart = 0; 632 /* set tail elements to 1s */ 633 for (k = 0; k < nf; ++k) { 634 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 635 (k * max_elems + max_elems) * esz); 636 } 637 if (nf * max_elems % total_elems != 0) { 638 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 639 uint32_t registers_used = 640 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 641 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 642 registers_used * vlenb); 643 } 644 } 645 646 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 647 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 648 CPURISCVState *env, uint32_t desc) \ 649 { \ 650 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 651 ctzl(sizeof(ETYPE)), GETPC()); \ 652 } 653 654 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 655 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 656 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 657 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 658 659 #define DO_SWAP(N, M) (M) 660 #define DO_AND(N, M) (N & M) 661 #define DO_XOR(N, M) (N ^ M) 662 #define DO_OR(N, M) (N | M) 663 #define DO_ADD(N, M) (N + M) 664 665 /* Signed min/max */ 666 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 667 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 668 669 /* Unsigned min/max */ 670 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 671 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 672 673 /* 674 *** load and store whole register instructions 675 */ 676 static void 677 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 678 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 679 { 680 uint32_t i, k, off, pos; 681 uint32_t nf = vext_nf(desc); 682 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 683 uint32_t max_elems = vlenb >> log2_esz; 684 685 k = env->vstart / max_elems; 686 off = env->vstart % max_elems; 687 688 if (off) { 689 /* load/store rest of elements of current segment pointed by vstart */ 690 for (pos = off; pos < max_elems; pos++, env->vstart++) { 691 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 692 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 693 } 694 k++; 695 } 696 697 /* load/store elements for rest of segments */ 698 for (; k < nf; k++) { 699 for (i = 0; i < max_elems; i++, env->vstart++) { 700 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 701 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 702 } 703 } 704 705 env->vstart = 0; 706 } 707 708 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 709 void HELPER(NAME)(void *vd, target_ulong base, \ 710 CPURISCVState *env, uint32_t desc) \ 711 { \ 712 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 713 ctzl(sizeof(ETYPE)), GETPC()); \ 714 } 715 716 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 717 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 718 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 719 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 720 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 721 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 722 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 723 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 724 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 725 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 726 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 727 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 728 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 729 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 730 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 731 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 732 733 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 734 void HELPER(NAME)(void *vd, target_ulong base, \ 735 CPURISCVState *env, uint32_t desc) \ 736 { \ 737 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 738 ctzl(sizeof(ETYPE)), GETPC()); \ 739 } 740 741 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 742 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 743 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 744 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 745 746 /* 747 *** Vector Integer Arithmetic Instructions 748 */ 749 750 /* expand macro args before macro */ 751 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 752 753 /* (TD, T1, T2, TX1, TX2) */ 754 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 755 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 756 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 757 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 758 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 759 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 760 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 761 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 762 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 763 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 764 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 765 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 766 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 767 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 768 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 769 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 770 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 771 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 772 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 773 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 774 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 775 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 776 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 777 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 778 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 779 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 780 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 781 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 782 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 783 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 784 785 /* operation of two vector elements */ 786 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 787 788 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 789 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 790 { \ 791 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 792 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 793 *((TD *)vd + HD(i)) = OP(s2, s1); \ 794 } 795 #define DO_SUB(N, M) (N - M) 796 #define DO_RSUB(N, M) (M - N) 797 798 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 799 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 800 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 801 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 802 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 803 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 804 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 805 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 806 807 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 808 CPURISCVState *env, uint32_t desc, 809 opivv2_fn *fn, uint32_t esz) 810 { 811 uint32_t vm = vext_vm(desc); 812 uint32_t vl = env->vl; 813 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 814 uint32_t vta = vext_vta(desc); 815 uint32_t i; 816 817 for (i = env->vstart; i < vl; i++) { 818 if (!vm && !vext_elem_mask(v0, i)) { 819 continue; 820 } 821 fn(vd, vs1, vs2, i); 822 } 823 env->vstart = 0; 824 /* set tail elements to 1s */ 825 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 826 } 827 828 /* generate the helpers for OPIVV */ 829 #define GEN_VEXT_VV(NAME, ESZ) \ 830 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 831 void *vs2, CPURISCVState *env, \ 832 uint32_t desc) \ 833 { \ 834 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 835 do_##NAME, ESZ); \ 836 } 837 838 GEN_VEXT_VV(vadd_vv_b, 1) 839 GEN_VEXT_VV(vadd_vv_h, 2) 840 GEN_VEXT_VV(vadd_vv_w, 4) 841 GEN_VEXT_VV(vadd_vv_d, 8) 842 GEN_VEXT_VV(vsub_vv_b, 1) 843 GEN_VEXT_VV(vsub_vv_h, 2) 844 GEN_VEXT_VV(vsub_vv_w, 4) 845 GEN_VEXT_VV(vsub_vv_d, 8) 846 847 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 848 849 /* 850 * (T1)s1 gives the real operator type. 851 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 852 */ 853 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 854 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 855 { \ 856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 858 } 859 860 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 861 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 862 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 863 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 864 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 865 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 866 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 867 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 868 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 869 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 870 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 871 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 872 873 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 874 CPURISCVState *env, uint32_t desc, 875 opivx2_fn fn, uint32_t esz) 876 { 877 uint32_t vm = vext_vm(desc); 878 uint32_t vl = env->vl; 879 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 880 uint32_t vta = vext_vta(desc); 881 uint32_t i; 882 883 for (i = env->vstart; i < vl; i++) { 884 if (!vm && !vext_elem_mask(v0, i)) { 885 continue; 886 } 887 fn(vd, s1, vs2, i); 888 } 889 env->vstart = 0; 890 /* set tail elements to 1s */ 891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 892 } 893 894 /* generate the helpers for OPIVX */ 895 #define GEN_VEXT_VX(NAME, ESZ) \ 896 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 897 void *vs2, CPURISCVState *env, \ 898 uint32_t desc) \ 899 { \ 900 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 901 do_##NAME, ESZ); \ 902 } 903 904 GEN_VEXT_VX(vadd_vx_b, 1) 905 GEN_VEXT_VX(vadd_vx_h, 2) 906 GEN_VEXT_VX(vadd_vx_w, 4) 907 GEN_VEXT_VX(vadd_vx_d, 8) 908 GEN_VEXT_VX(vsub_vx_b, 1) 909 GEN_VEXT_VX(vsub_vx_h, 2) 910 GEN_VEXT_VX(vsub_vx_w, 4) 911 GEN_VEXT_VX(vsub_vx_d, 8) 912 GEN_VEXT_VX(vrsub_vx_b, 1) 913 GEN_VEXT_VX(vrsub_vx_h, 2) 914 GEN_VEXT_VX(vrsub_vx_w, 4) 915 GEN_VEXT_VX(vrsub_vx_d, 8) 916 917 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 918 { 919 intptr_t oprsz = simd_oprsz(desc); 920 intptr_t i; 921 922 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 923 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 924 } 925 } 926 927 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 928 { 929 intptr_t oprsz = simd_oprsz(desc); 930 intptr_t i; 931 932 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 933 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 934 } 935 } 936 937 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 938 { 939 intptr_t oprsz = simd_oprsz(desc); 940 intptr_t i; 941 942 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 943 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 944 } 945 } 946 947 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 948 { 949 intptr_t oprsz = simd_oprsz(desc); 950 intptr_t i; 951 952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 953 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 954 } 955 } 956 957 /* Vector Widening Integer Add/Subtract */ 958 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 959 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 960 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 961 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 962 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 963 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 964 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 965 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 966 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 967 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 968 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 969 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 970 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 971 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 972 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 973 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 974 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 975 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 976 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 977 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 978 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 979 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 980 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 981 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 982 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 983 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 984 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 985 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 986 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 987 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 988 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 989 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 990 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 991 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 992 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 993 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 994 GEN_VEXT_VV(vwaddu_vv_b, 2) 995 GEN_VEXT_VV(vwaddu_vv_h, 4) 996 GEN_VEXT_VV(vwaddu_vv_w, 8) 997 GEN_VEXT_VV(vwsubu_vv_b, 2) 998 GEN_VEXT_VV(vwsubu_vv_h, 4) 999 GEN_VEXT_VV(vwsubu_vv_w, 8) 1000 GEN_VEXT_VV(vwadd_vv_b, 2) 1001 GEN_VEXT_VV(vwadd_vv_h, 4) 1002 GEN_VEXT_VV(vwadd_vv_w, 8) 1003 GEN_VEXT_VV(vwsub_vv_b, 2) 1004 GEN_VEXT_VV(vwsub_vv_h, 4) 1005 GEN_VEXT_VV(vwsub_vv_w, 8) 1006 GEN_VEXT_VV(vwaddu_wv_b, 2) 1007 GEN_VEXT_VV(vwaddu_wv_h, 4) 1008 GEN_VEXT_VV(vwaddu_wv_w, 8) 1009 GEN_VEXT_VV(vwsubu_wv_b, 2) 1010 GEN_VEXT_VV(vwsubu_wv_h, 4) 1011 GEN_VEXT_VV(vwsubu_wv_w, 8) 1012 GEN_VEXT_VV(vwadd_wv_b, 2) 1013 GEN_VEXT_VV(vwadd_wv_h, 4) 1014 GEN_VEXT_VV(vwadd_wv_w, 8) 1015 GEN_VEXT_VV(vwsub_wv_b, 2) 1016 GEN_VEXT_VV(vwsub_wv_h, 4) 1017 GEN_VEXT_VV(vwsub_wv_w, 8) 1018 1019 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1020 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1021 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1022 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1023 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1024 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1025 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1026 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1027 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1028 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1029 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1030 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1031 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1032 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1033 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1034 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1035 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1036 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1037 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1038 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1039 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1040 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1041 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1042 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1043 GEN_VEXT_VX(vwaddu_vx_b, 2) 1044 GEN_VEXT_VX(vwaddu_vx_h, 4) 1045 GEN_VEXT_VX(vwaddu_vx_w, 8) 1046 GEN_VEXT_VX(vwsubu_vx_b, 2) 1047 GEN_VEXT_VX(vwsubu_vx_h, 4) 1048 GEN_VEXT_VX(vwsubu_vx_w, 8) 1049 GEN_VEXT_VX(vwadd_vx_b, 2) 1050 GEN_VEXT_VX(vwadd_vx_h, 4) 1051 GEN_VEXT_VX(vwadd_vx_w, 8) 1052 GEN_VEXT_VX(vwsub_vx_b, 2) 1053 GEN_VEXT_VX(vwsub_vx_h, 4) 1054 GEN_VEXT_VX(vwsub_vx_w, 8) 1055 GEN_VEXT_VX(vwaddu_wx_b, 2) 1056 GEN_VEXT_VX(vwaddu_wx_h, 4) 1057 GEN_VEXT_VX(vwaddu_wx_w, 8) 1058 GEN_VEXT_VX(vwsubu_wx_b, 2) 1059 GEN_VEXT_VX(vwsubu_wx_h, 4) 1060 GEN_VEXT_VX(vwsubu_wx_w, 8) 1061 GEN_VEXT_VX(vwadd_wx_b, 2) 1062 GEN_VEXT_VX(vwadd_wx_h, 4) 1063 GEN_VEXT_VX(vwadd_wx_w, 8) 1064 GEN_VEXT_VX(vwsub_wx_b, 2) 1065 GEN_VEXT_VX(vwsub_wx_h, 4) 1066 GEN_VEXT_VX(vwsub_wx_w, 8) 1067 1068 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1069 #define DO_VADC(N, M, C) (N + M + C) 1070 #define DO_VSBC(N, M, C) (N - M - C) 1071 1072 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1073 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1074 CPURISCVState *env, uint32_t desc) \ 1075 { \ 1076 uint32_t vl = env->vl; \ 1077 uint32_t esz = sizeof(ETYPE); \ 1078 uint32_t total_elems = \ 1079 vext_get_total_elems(env, desc, esz); \ 1080 uint32_t vta = vext_vta(desc); \ 1081 uint32_t i; \ 1082 \ 1083 for (i = env->vstart; i < vl; i++) { \ 1084 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1085 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1086 ETYPE carry = vext_elem_mask(v0, i); \ 1087 \ 1088 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1089 } \ 1090 env->vstart = 0; \ 1091 /* set tail elements to 1s */ \ 1092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1093 } 1094 1095 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1096 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1097 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1098 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1099 1100 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1101 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1102 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1103 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1104 1105 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1106 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1107 CPURISCVState *env, uint32_t desc) \ 1108 { \ 1109 uint32_t vl = env->vl; \ 1110 uint32_t esz = sizeof(ETYPE); \ 1111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1112 uint32_t vta = vext_vta(desc); \ 1113 uint32_t i; \ 1114 \ 1115 for (i = env->vstart; i < vl; i++) { \ 1116 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1117 ETYPE carry = vext_elem_mask(v0, i); \ 1118 \ 1119 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1120 } \ 1121 env->vstart = 0; \ 1122 /* set tail elements to 1s */ \ 1123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1124 } 1125 1126 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1127 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1128 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1129 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1130 1131 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1132 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1133 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1134 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1135 1136 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1137 (__typeof(N))(N + M) < N) 1138 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1139 1140 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1141 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1142 CPURISCVState *env, uint32_t desc) \ 1143 { \ 1144 uint32_t vl = env->vl; \ 1145 uint32_t vm = vext_vm(desc); \ 1146 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1147 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1148 uint32_t i; \ 1149 \ 1150 for (i = env->vstart; i < vl; i++) { \ 1151 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1153 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1154 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1155 } \ 1156 env->vstart = 0; \ 1157 /* mask destination register are always tail-agnostic */ \ 1158 /* set tail elements to 1s */ \ 1159 if (vta_all_1s) { \ 1160 for (; i < total_elems; i++) { \ 1161 vext_set_elem_mask(vd, i, 1); \ 1162 } \ 1163 } \ 1164 } 1165 1166 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1167 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1168 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1169 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1170 1171 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1172 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1173 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1174 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1175 1176 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1177 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1178 void *vs2, CPURISCVState *env, uint32_t desc) \ 1179 { \ 1180 uint32_t vl = env->vl; \ 1181 uint32_t vm = vext_vm(desc); \ 1182 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1183 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1184 uint32_t i; \ 1185 \ 1186 for (i = env->vstart; i < vl; i++) { \ 1187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1188 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1189 vext_set_elem_mask(vd, i, \ 1190 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1191 } \ 1192 env->vstart = 0; \ 1193 /* mask destination register are always tail-agnostic */ \ 1194 /* set tail elements to 1s */ \ 1195 if (vta_all_1s) { \ 1196 for (; i < total_elems; i++) { \ 1197 vext_set_elem_mask(vd, i, 1); \ 1198 } \ 1199 } \ 1200 } 1201 1202 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1203 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1204 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1205 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1206 1207 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1208 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1209 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1210 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1211 1212 /* Vector Bitwise Logical Instructions */ 1213 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1214 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1215 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1216 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1217 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1218 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1219 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1220 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1221 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1222 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1223 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1224 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1225 GEN_VEXT_VV(vand_vv_b, 1) 1226 GEN_VEXT_VV(vand_vv_h, 2) 1227 GEN_VEXT_VV(vand_vv_w, 4) 1228 GEN_VEXT_VV(vand_vv_d, 8) 1229 GEN_VEXT_VV(vor_vv_b, 1) 1230 GEN_VEXT_VV(vor_vv_h, 2) 1231 GEN_VEXT_VV(vor_vv_w, 4) 1232 GEN_VEXT_VV(vor_vv_d, 8) 1233 GEN_VEXT_VV(vxor_vv_b, 1) 1234 GEN_VEXT_VV(vxor_vv_h, 2) 1235 GEN_VEXT_VV(vxor_vv_w, 4) 1236 GEN_VEXT_VV(vxor_vv_d, 8) 1237 1238 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1239 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1240 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1241 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1242 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1243 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1244 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1245 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1246 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1247 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1248 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1249 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1250 GEN_VEXT_VX(vand_vx_b, 1) 1251 GEN_VEXT_VX(vand_vx_h, 2) 1252 GEN_VEXT_VX(vand_vx_w, 4) 1253 GEN_VEXT_VX(vand_vx_d, 8) 1254 GEN_VEXT_VX(vor_vx_b, 1) 1255 GEN_VEXT_VX(vor_vx_h, 2) 1256 GEN_VEXT_VX(vor_vx_w, 4) 1257 GEN_VEXT_VX(vor_vx_d, 8) 1258 GEN_VEXT_VX(vxor_vx_b, 1) 1259 GEN_VEXT_VX(vxor_vx_h, 2) 1260 GEN_VEXT_VX(vxor_vx_w, 4) 1261 GEN_VEXT_VX(vxor_vx_d, 8) 1262 1263 /* Vector Single-Width Bit Shift Instructions */ 1264 #define DO_SLL(N, M) (N << (M)) 1265 #define DO_SRL(N, M) (N >> (M)) 1266 1267 /* generate the helpers for shift instructions with two vector operators */ 1268 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1269 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1270 void *vs2, CPURISCVState *env, uint32_t desc) \ 1271 { \ 1272 uint32_t vm = vext_vm(desc); \ 1273 uint32_t vl = env->vl; \ 1274 uint32_t esz = sizeof(TS1); \ 1275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1276 uint32_t vta = vext_vta(desc); \ 1277 uint32_t i; \ 1278 \ 1279 for (i = env->vstart; i < vl; i++) { \ 1280 if (!vm && !vext_elem_mask(v0, i)) { \ 1281 continue; \ 1282 } \ 1283 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1284 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1285 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1286 } \ 1287 env->vstart = 0; \ 1288 /* set tail elements to 1s */ \ 1289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1290 } 1291 1292 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1293 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1294 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1295 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1296 1297 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1298 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1299 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1300 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1301 1302 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1303 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1304 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1305 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1306 1307 /* generate the helpers for shift instructions with one vector and one scalar */ 1308 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1309 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1310 void *vs2, CPURISCVState *env, uint32_t desc) \ 1311 { \ 1312 uint32_t vm = vext_vm(desc); \ 1313 uint32_t vl = env->vl; \ 1314 uint32_t esz = sizeof(TD); \ 1315 uint32_t total_elems = \ 1316 vext_get_total_elems(env, desc, esz); \ 1317 uint32_t vta = vext_vta(desc); \ 1318 uint32_t i; \ 1319 \ 1320 for (i = env->vstart; i < vl; i++) { \ 1321 if (!vm && !vext_elem_mask(v0, i)) { \ 1322 continue; \ 1323 } \ 1324 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1325 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1326 } \ 1327 env->vstart = 0; \ 1328 /* set tail elements to 1s */ \ 1329 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1330 } 1331 1332 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1333 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1334 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1335 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1336 1337 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1338 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1339 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1340 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1341 1342 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1343 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1344 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1345 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1346 1347 /* Vector Narrowing Integer Right Shift Instructions */ 1348 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1349 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1350 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1351 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1352 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1353 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1354 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1355 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1356 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1357 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1358 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1359 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1360 1361 /* Vector Integer Comparison Instructions */ 1362 #define DO_MSEQ(N, M) (N == M) 1363 #define DO_MSNE(N, M) (N != M) 1364 #define DO_MSLT(N, M) (N < M) 1365 #define DO_MSLE(N, M) (N <= M) 1366 #define DO_MSGT(N, M) (N > M) 1367 1368 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1369 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1370 CPURISCVState *env, uint32_t desc) \ 1371 { \ 1372 uint32_t vm = vext_vm(desc); \ 1373 uint32_t vl = env->vl; \ 1374 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1375 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1376 uint32_t i; \ 1377 \ 1378 for (i = env->vstart; i < vl; i++) { \ 1379 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1380 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1381 if (!vm && !vext_elem_mask(v0, i)) { \ 1382 continue; \ 1383 } \ 1384 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1385 } \ 1386 env->vstart = 0; \ 1387 /* mask destination register are always tail-agnostic */ \ 1388 /* set tail elements to 1s */ \ 1389 if (vta_all_1s) { \ 1390 for (; i < total_elems; i++) { \ 1391 vext_set_elem_mask(vd, i, 1); \ 1392 } \ 1393 } \ 1394 } 1395 1396 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1397 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1398 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1399 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1400 1401 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1402 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1403 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1404 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1405 1406 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1407 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1408 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1409 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1410 1411 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1412 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1413 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1414 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1415 1416 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1417 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1418 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1419 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1420 1421 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1422 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1423 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1424 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1425 1426 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1427 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1428 CPURISCVState *env, uint32_t desc) \ 1429 { \ 1430 uint32_t vm = vext_vm(desc); \ 1431 uint32_t vl = env->vl; \ 1432 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1433 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1434 uint32_t i; \ 1435 \ 1436 for (i = env->vstart; i < vl; i++) { \ 1437 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1438 if (!vm && !vext_elem_mask(v0, i)) { \ 1439 continue; \ 1440 } \ 1441 vext_set_elem_mask(vd, i, \ 1442 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1443 } \ 1444 env->vstart = 0; \ 1445 /* mask destination register are always tail-agnostic */ \ 1446 /* set tail elements to 1s */ \ 1447 if (vta_all_1s) { \ 1448 for (; i < total_elems; i++) { \ 1449 vext_set_elem_mask(vd, i, 1); \ 1450 } \ 1451 } \ 1452 } 1453 1454 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1455 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1456 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1457 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1458 1459 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1460 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1461 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1462 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1463 1464 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1465 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1466 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1467 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1468 1469 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1470 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1471 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1472 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1473 1474 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1475 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1476 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1477 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1478 1479 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1480 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1481 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1482 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1483 1484 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1485 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1486 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1487 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1488 1489 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1490 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1491 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1492 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1493 1494 /* Vector Integer Min/Max Instructions */ 1495 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1496 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1497 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1498 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1499 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1500 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1501 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1502 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1503 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1504 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1505 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1506 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1507 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1508 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1509 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1510 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1511 GEN_VEXT_VV(vminu_vv_b, 1) 1512 GEN_VEXT_VV(vminu_vv_h, 2) 1513 GEN_VEXT_VV(vminu_vv_w, 4) 1514 GEN_VEXT_VV(vminu_vv_d, 8) 1515 GEN_VEXT_VV(vmin_vv_b, 1) 1516 GEN_VEXT_VV(vmin_vv_h, 2) 1517 GEN_VEXT_VV(vmin_vv_w, 4) 1518 GEN_VEXT_VV(vmin_vv_d, 8) 1519 GEN_VEXT_VV(vmaxu_vv_b, 1) 1520 GEN_VEXT_VV(vmaxu_vv_h, 2) 1521 GEN_VEXT_VV(vmaxu_vv_w, 4) 1522 GEN_VEXT_VV(vmaxu_vv_d, 8) 1523 GEN_VEXT_VV(vmax_vv_b, 1) 1524 GEN_VEXT_VV(vmax_vv_h, 2) 1525 GEN_VEXT_VV(vmax_vv_w, 4) 1526 GEN_VEXT_VV(vmax_vv_d, 8) 1527 1528 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1529 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1530 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1531 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1532 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1533 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1534 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1535 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1536 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1537 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1538 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1539 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1540 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1541 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1542 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1543 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1544 GEN_VEXT_VX(vminu_vx_b, 1) 1545 GEN_VEXT_VX(vminu_vx_h, 2) 1546 GEN_VEXT_VX(vminu_vx_w, 4) 1547 GEN_VEXT_VX(vminu_vx_d, 8) 1548 GEN_VEXT_VX(vmin_vx_b, 1) 1549 GEN_VEXT_VX(vmin_vx_h, 2) 1550 GEN_VEXT_VX(vmin_vx_w, 4) 1551 GEN_VEXT_VX(vmin_vx_d, 8) 1552 GEN_VEXT_VX(vmaxu_vx_b, 1) 1553 GEN_VEXT_VX(vmaxu_vx_h, 2) 1554 GEN_VEXT_VX(vmaxu_vx_w, 4) 1555 GEN_VEXT_VX(vmaxu_vx_d, 8) 1556 GEN_VEXT_VX(vmax_vx_b, 1) 1557 GEN_VEXT_VX(vmax_vx_h, 2) 1558 GEN_VEXT_VX(vmax_vx_w, 4) 1559 GEN_VEXT_VX(vmax_vx_d, 8) 1560 1561 /* Vector Single-Width Integer Multiply Instructions */ 1562 #define DO_MUL(N, M) (N * M) 1563 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1564 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1565 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1566 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1567 GEN_VEXT_VV(vmul_vv_b, 1) 1568 GEN_VEXT_VV(vmul_vv_h, 2) 1569 GEN_VEXT_VV(vmul_vv_w, 4) 1570 GEN_VEXT_VV(vmul_vv_d, 8) 1571 1572 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1573 { 1574 return (int16_t)s2 * (int16_t)s1 >> 8; 1575 } 1576 1577 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1578 { 1579 return (int32_t)s2 * (int32_t)s1 >> 16; 1580 } 1581 1582 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1583 { 1584 return (int64_t)s2 * (int64_t)s1 >> 32; 1585 } 1586 1587 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1588 { 1589 uint64_t hi_64, lo_64; 1590 1591 muls64(&lo_64, &hi_64, s1, s2); 1592 return hi_64; 1593 } 1594 1595 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1596 { 1597 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1598 } 1599 1600 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1601 { 1602 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1603 } 1604 1605 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1606 { 1607 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1608 } 1609 1610 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1611 { 1612 uint64_t hi_64, lo_64; 1613 1614 mulu64(&lo_64, &hi_64, s2, s1); 1615 return hi_64; 1616 } 1617 1618 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1619 { 1620 return (int16_t)s2 * (uint16_t)s1 >> 8; 1621 } 1622 1623 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1624 { 1625 return (int32_t)s2 * (uint32_t)s1 >> 16; 1626 } 1627 1628 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1629 { 1630 return (int64_t)s2 * (uint64_t)s1 >> 32; 1631 } 1632 1633 /* 1634 * Let A = signed operand, 1635 * B = unsigned operand 1636 * P = mulu64(A, B), unsigned product 1637 * 1638 * LET X = 2 ** 64 - A, 2's complement of A 1639 * SP = signed product 1640 * THEN 1641 * IF A < 0 1642 * SP = -X * B 1643 * = -(2 ** 64 - A) * B 1644 * = A * B - 2 ** 64 * B 1645 * = P - 2 ** 64 * B 1646 * ELSE 1647 * SP = P 1648 * THEN 1649 * HI_P -= (A < 0 ? B : 0) 1650 */ 1651 1652 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1653 { 1654 uint64_t hi_64, lo_64; 1655 1656 mulu64(&lo_64, &hi_64, s2, s1); 1657 1658 hi_64 -= s2 < 0 ? s1 : 0; 1659 return hi_64; 1660 } 1661 1662 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1663 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1664 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1665 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1666 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1667 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1668 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1669 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1670 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1671 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1672 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1673 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1674 GEN_VEXT_VV(vmulh_vv_b, 1) 1675 GEN_VEXT_VV(vmulh_vv_h, 2) 1676 GEN_VEXT_VV(vmulh_vv_w, 4) 1677 GEN_VEXT_VV(vmulh_vv_d, 8) 1678 GEN_VEXT_VV(vmulhu_vv_b, 1) 1679 GEN_VEXT_VV(vmulhu_vv_h, 2) 1680 GEN_VEXT_VV(vmulhu_vv_w, 4) 1681 GEN_VEXT_VV(vmulhu_vv_d, 8) 1682 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1683 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1684 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1685 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1686 1687 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1688 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1689 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1690 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1691 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1692 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1693 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1694 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1695 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1696 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1697 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1698 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1699 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1700 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1701 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1702 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1703 GEN_VEXT_VX(vmul_vx_b, 1) 1704 GEN_VEXT_VX(vmul_vx_h, 2) 1705 GEN_VEXT_VX(vmul_vx_w, 4) 1706 GEN_VEXT_VX(vmul_vx_d, 8) 1707 GEN_VEXT_VX(vmulh_vx_b, 1) 1708 GEN_VEXT_VX(vmulh_vx_h, 2) 1709 GEN_VEXT_VX(vmulh_vx_w, 4) 1710 GEN_VEXT_VX(vmulh_vx_d, 8) 1711 GEN_VEXT_VX(vmulhu_vx_b, 1) 1712 GEN_VEXT_VX(vmulhu_vx_h, 2) 1713 GEN_VEXT_VX(vmulhu_vx_w, 4) 1714 GEN_VEXT_VX(vmulhu_vx_d, 8) 1715 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1716 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1717 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1718 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1719 1720 /* Vector Integer Divide Instructions */ 1721 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1722 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1723 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1724 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1725 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1726 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1727 1728 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1729 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1730 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1731 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1732 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1733 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1734 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1735 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1736 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1737 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1738 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1739 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1740 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1741 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1742 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1743 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1744 GEN_VEXT_VV(vdivu_vv_b, 1) 1745 GEN_VEXT_VV(vdivu_vv_h, 2) 1746 GEN_VEXT_VV(vdivu_vv_w, 4) 1747 GEN_VEXT_VV(vdivu_vv_d, 8) 1748 GEN_VEXT_VV(vdiv_vv_b, 1) 1749 GEN_VEXT_VV(vdiv_vv_h, 2) 1750 GEN_VEXT_VV(vdiv_vv_w, 4) 1751 GEN_VEXT_VV(vdiv_vv_d, 8) 1752 GEN_VEXT_VV(vremu_vv_b, 1) 1753 GEN_VEXT_VV(vremu_vv_h, 2) 1754 GEN_VEXT_VV(vremu_vv_w, 4) 1755 GEN_VEXT_VV(vremu_vv_d, 8) 1756 GEN_VEXT_VV(vrem_vv_b, 1) 1757 GEN_VEXT_VV(vrem_vv_h, 2) 1758 GEN_VEXT_VV(vrem_vv_w, 4) 1759 GEN_VEXT_VV(vrem_vv_d, 8) 1760 1761 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1762 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1763 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1764 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1765 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1766 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1767 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1768 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1769 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1770 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1771 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1772 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1773 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1774 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1775 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1776 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1777 GEN_VEXT_VX(vdivu_vx_b, 1) 1778 GEN_VEXT_VX(vdivu_vx_h, 2) 1779 GEN_VEXT_VX(vdivu_vx_w, 4) 1780 GEN_VEXT_VX(vdivu_vx_d, 8) 1781 GEN_VEXT_VX(vdiv_vx_b, 1) 1782 GEN_VEXT_VX(vdiv_vx_h, 2) 1783 GEN_VEXT_VX(vdiv_vx_w, 4) 1784 GEN_VEXT_VX(vdiv_vx_d, 8) 1785 GEN_VEXT_VX(vremu_vx_b, 1) 1786 GEN_VEXT_VX(vremu_vx_h, 2) 1787 GEN_VEXT_VX(vremu_vx_w, 4) 1788 GEN_VEXT_VX(vremu_vx_d, 8) 1789 GEN_VEXT_VX(vrem_vx_b, 1) 1790 GEN_VEXT_VX(vrem_vx_h, 2) 1791 GEN_VEXT_VX(vrem_vx_w, 4) 1792 GEN_VEXT_VX(vrem_vx_d, 8) 1793 1794 /* Vector Widening Integer Multiply Instructions */ 1795 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1796 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1797 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1798 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1799 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1800 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1801 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1802 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1803 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1804 GEN_VEXT_VV(vwmul_vv_b, 2) 1805 GEN_VEXT_VV(vwmul_vv_h, 4) 1806 GEN_VEXT_VV(vwmul_vv_w, 8) 1807 GEN_VEXT_VV(vwmulu_vv_b, 2) 1808 GEN_VEXT_VV(vwmulu_vv_h, 4) 1809 GEN_VEXT_VV(vwmulu_vv_w, 8) 1810 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1811 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1812 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1813 1814 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1815 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1816 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1817 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1818 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1819 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1820 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1821 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1822 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1823 GEN_VEXT_VX(vwmul_vx_b, 2) 1824 GEN_VEXT_VX(vwmul_vx_h, 4) 1825 GEN_VEXT_VX(vwmul_vx_w, 8) 1826 GEN_VEXT_VX(vwmulu_vx_b, 2) 1827 GEN_VEXT_VX(vwmulu_vx_h, 4) 1828 GEN_VEXT_VX(vwmulu_vx_w, 8) 1829 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1830 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1831 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1832 1833 /* Vector Single-Width Integer Multiply-Add Instructions */ 1834 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1835 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1836 { \ 1837 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1839 TD d = *((TD *)vd + HD(i)); \ 1840 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1841 } 1842 1843 #define DO_MACC(N, M, D) (M * N + D) 1844 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1845 #define DO_MADD(N, M, D) (M * D + N) 1846 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1847 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1848 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1849 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1850 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1851 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1852 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1853 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1854 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1855 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1856 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1857 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1858 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1859 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1860 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1861 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1862 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1863 GEN_VEXT_VV(vmacc_vv_b, 1) 1864 GEN_VEXT_VV(vmacc_vv_h, 2) 1865 GEN_VEXT_VV(vmacc_vv_w, 4) 1866 GEN_VEXT_VV(vmacc_vv_d, 8) 1867 GEN_VEXT_VV(vnmsac_vv_b, 1) 1868 GEN_VEXT_VV(vnmsac_vv_h, 2) 1869 GEN_VEXT_VV(vnmsac_vv_w, 4) 1870 GEN_VEXT_VV(vnmsac_vv_d, 8) 1871 GEN_VEXT_VV(vmadd_vv_b, 1) 1872 GEN_VEXT_VV(vmadd_vv_h, 2) 1873 GEN_VEXT_VV(vmadd_vv_w, 4) 1874 GEN_VEXT_VV(vmadd_vv_d, 8) 1875 GEN_VEXT_VV(vnmsub_vv_b, 1) 1876 GEN_VEXT_VV(vnmsub_vv_h, 2) 1877 GEN_VEXT_VV(vnmsub_vv_w, 4) 1878 GEN_VEXT_VV(vnmsub_vv_d, 8) 1879 1880 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1881 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1882 { \ 1883 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1884 TD d = *((TD *)vd + HD(i)); \ 1885 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1886 } 1887 1888 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1889 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1890 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1891 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1892 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1893 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1894 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1895 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1896 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1897 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1898 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1899 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1900 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1901 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1902 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1903 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1904 GEN_VEXT_VX(vmacc_vx_b, 1) 1905 GEN_VEXT_VX(vmacc_vx_h, 2) 1906 GEN_VEXT_VX(vmacc_vx_w, 4) 1907 GEN_VEXT_VX(vmacc_vx_d, 8) 1908 GEN_VEXT_VX(vnmsac_vx_b, 1) 1909 GEN_VEXT_VX(vnmsac_vx_h, 2) 1910 GEN_VEXT_VX(vnmsac_vx_w, 4) 1911 GEN_VEXT_VX(vnmsac_vx_d, 8) 1912 GEN_VEXT_VX(vmadd_vx_b, 1) 1913 GEN_VEXT_VX(vmadd_vx_h, 2) 1914 GEN_VEXT_VX(vmadd_vx_w, 4) 1915 GEN_VEXT_VX(vmadd_vx_d, 8) 1916 GEN_VEXT_VX(vnmsub_vx_b, 1) 1917 GEN_VEXT_VX(vnmsub_vx_h, 2) 1918 GEN_VEXT_VX(vnmsub_vx_w, 4) 1919 GEN_VEXT_VX(vnmsub_vx_d, 8) 1920 1921 /* Vector Widening Integer Multiply-Add Instructions */ 1922 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1923 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1924 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1925 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1926 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1927 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1928 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1929 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1930 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1931 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1932 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1933 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1934 GEN_VEXT_VV(vwmacc_vv_b, 2) 1935 GEN_VEXT_VV(vwmacc_vv_h, 4) 1936 GEN_VEXT_VV(vwmacc_vv_w, 8) 1937 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1938 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1939 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1940 1941 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1942 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1943 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1944 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1945 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1946 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1947 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1948 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1949 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1950 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1951 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1952 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1953 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1954 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1955 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1956 GEN_VEXT_VX(vwmacc_vx_b, 2) 1957 GEN_VEXT_VX(vwmacc_vx_h, 4) 1958 GEN_VEXT_VX(vwmacc_vx_w, 8) 1959 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1960 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1961 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1962 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1963 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1964 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1965 1966 /* Vector Integer Merge and Move Instructions */ 1967 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1968 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1969 uint32_t desc) \ 1970 { \ 1971 uint32_t vl = env->vl; \ 1972 uint32_t esz = sizeof(ETYPE); \ 1973 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1974 uint32_t vta = vext_vta(desc); \ 1975 uint32_t i; \ 1976 \ 1977 for (i = env->vstart; i < vl; i++) { \ 1978 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1979 *((ETYPE *)vd + H(i)) = s1; \ 1980 } \ 1981 env->vstart = 0; \ 1982 /* set tail elements to 1s */ \ 1983 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1984 } 1985 1986 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1987 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1988 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1989 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1990 1991 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1992 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1993 uint32_t desc) \ 1994 { \ 1995 uint32_t vl = env->vl; \ 1996 uint32_t esz = sizeof(ETYPE); \ 1997 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1998 uint32_t vta = vext_vta(desc); \ 1999 uint32_t i; \ 2000 \ 2001 for (i = env->vstart; i < vl; i++) { \ 2002 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2003 } \ 2004 env->vstart = 0; \ 2005 /* set tail elements to 1s */ \ 2006 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2007 } 2008 2009 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2010 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2011 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2012 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2013 2014 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2015 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2016 CPURISCVState *env, uint32_t desc) \ 2017 { \ 2018 uint32_t vl = env->vl; \ 2019 uint32_t esz = sizeof(ETYPE); \ 2020 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2021 uint32_t vta = vext_vta(desc); \ 2022 uint32_t i; \ 2023 \ 2024 for (i = env->vstart; i < vl; i++) { \ 2025 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2026 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2027 } \ 2028 env->vstart = 0; \ 2029 /* set tail elements to 1s */ \ 2030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2031 } 2032 2033 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2034 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2035 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2036 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2037 2038 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2039 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2040 void *vs2, CPURISCVState *env, uint32_t desc) \ 2041 { \ 2042 uint32_t vl = env->vl; \ 2043 uint32_t esz = sizeof(ETYPE); \ 2044 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2045 uint32_t vta = vext_vta(desc); \ 2046 uint32_t i; \ 2047 \ 2048 for (i = env->vstart; i < vl; i++) { \ 2049 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2050 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2051 (ETYPE)(target_long)s1); \ 2052 *((ETYPE *)vd + H(i)) = d; \ 2053 } \ 2054 env->vstart = 0; \ 2055 /* set tail elements to 1s */ \ 2056 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2057 } 2058 2059 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2060 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2061 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2062 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2063 2064 /* 2065 *** Vector Fixed-Point Arithmetic Instructions 2066 */ 2067 2068 /* Vector Single-Width Saturating Add and Subtract */ 2069 2070 /* 2071 * As fixed point instructions probably have round mode and saturation, 2072 * define common macros for fixed point here. 2073 */ 2074 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2075 CPURISCVState *env, int vxrm); 2076 2077 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2078 static inline void \ 2079 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2080 CPURISCVState *env, int vxrm) \ 2081 { \ 2082 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2083 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2084 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2085 } 2086 2087 static inline void 2088 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2089 CPURISCVState *env, 2090 uint32_t vl, uint32_t vm, int vxrm, 2091 opivv2_rm_fn *fn) 2092 { 2093 for (uint32_t i = env->vstart; i < vl; i++) { 2094 if (!vm && !vext_elem_mask(v0, i)) { 2095 continue; 2096 } 2097 fn(vd, vs1, vs2, i, env, vxrm); 2098 } 2099 env->vstart = 0; 2100 } 2101 2102 static inline void 2103 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2104 CPURISCVState *env, 2105 uint32_t desc, 2106 opivv2_rm_fn *fn, uint32_t esz) 2107 { 2108 uint32_t vm = vext_vm(desc); 2109 uint32_t vl = env->vl; 2110 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2111 uint32_t vta = vext_vta(desc); 2112 2113 switch (env->vxrm) { 2114 case 0: /* rnu */ 2115 vext_vv_rm_1(vd, v0, vs1, vs2, 2116 env, vl, vm, 0, fn); 2117 break; 2118 case 1: /* rne */ 2119 vext_vv_rm_1(vd, v0, vs1, vs2, 2120 env, vl, vm, 1, fn); 2121 break; 2122 case 2: /* rdn */ 2123 vext_vv_rm_1(vd, v0, vs1, vs2, 2124 env, vl, vm, 2, fn); 2125 break; 2126 default: /* rod */ 2127 vext_vv_rm_1(vd, v0, vs1, vs2, 2128 env, vl, vm, 3, fn); 2129 break; 2130 } 2131 /* set tail elements to 1s */ 2132 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2133 } 2134 2135 /* generate helpers for fixed point instructions with OPIVV format */ 2136 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2137 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2138 CPURISCVState *env, uint32_t desc) \ 2139 { \ 2140 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2141 do_##NAME, ESZ); \ 2142 } 2143 2144 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2145 { 2146 uint8_t res = a + b; 2147 if (res < a) { 2148 res = UINT8_MAX; 2149 env->vxsat = 0x1; 2150 } 2151 return res; 2152 } 2153 2154 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2155 uint16_t b) 2156 { 2157 uint16_t res = a + b; 2158 if (res < a) { 2159 res = UINT16_MAX; 2160 env->vxsat = 0x1; 2161 } 2162 return res; 2163 } 2164 2165 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2166 uint32_t b) 2167 { 2168 uint32_t res = a + b; 2169 if (res < a) { 2170 res = UINT32_MAX; 2171 env->vxsat = 0x1; 2172 } 2173 return res; 2174 } 2175 2176 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2177 uint64_t b) 2178 { 2179 uint64_t res = a + b; 2180 if (res < a) { 2181 res = UINT64_MAX; 2182 env->vxsat = 0x1; 2183 } 2184 return res; 2185 } 2186 2187 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2188 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2189 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2190 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2191 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2192 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2193 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2194 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2195 2196 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2197 CPURISCVState *env, int vxrm); 2198 2199 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2200 static inline void \ 2201 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2202 CPURISCVState *env, int vxrm) \ 2203 { \ 2204 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2205 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2206 } 2207 2208 static inline void 2209 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2210 CPURISCVState *env, 2211 uint32_t vl, uint32_t vm, int vxrm, 2212 opivx2_rm_fn *fn) 2213 { 2214 for (uint32_t i = env->vstart; i < vl; i++) { 2215 if (!vm && !vext_elem_mask(v0, i)) { 2216 continue; 2217 } 2218 fn(vd, s1, vs2, i, env, vxrm); 2219 } 2220 env->vstart = 0; 2221 } 2222 2223 static inline void 2224 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2225 CPURISCVState *env, 2226 uint32_t desc, 2227 opivx2_rm_fn *fn, uint32_t esz) 2228 { 2229 uint32_t vm = vext_vm(desc); 2230 uint32_t vl = env->vl; 2231 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2232 uint32_t vta = vext_vta(desc); 2233 2234 switch (env->vxrm) { 2235 case 0: /* rnu */ 2236 vext_vx_rm_1(vd, v0, s1, vs2, 2237 env, vl, vm, 0, fn); 2238 break; 2239 case 1: /* rne */ 2240 vext_vx_rm_1(vd, v0, s1, vs2, 2241 env, vl, vm, 1, fn); 2242 break; 2243 case 2: /* rdn */ 2244 vext_vx_rm_1(vd, v0, s1, vs2, 2245 env, vl, vm, 2, fn); 2246 break; 2247 default: /* rod */ 2248 vext_vx_rm_1(vd, v0, s1, vs2, 2249 env, vl, vm, 3, fn); 2250 break; 2251 } 2252 /* set tail elements to 1s */ 2253 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2254 } 2255 2256 /* generate helpers for fixed point instructions with OPIVX format */ 2257 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2258 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2259 void *vs2, CPURISCVState *env, uint32_t desc) \ 2260 { \ 2261 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2262 do_##NAME, ESZ); \ 2263 } 2264 2265 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2266 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2267 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2268 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2269 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2270 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2271 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2272 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2273 2274 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2275 { 2276 int8_t res = a + b; 2277 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2278 res = a > 0 ? INT8_MAX : INT8_MIN; 2279 env->vxsat = 0x1; 2280 } 2281 return res; 2282 } 2283 2284 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2285 { 2286 int16_t res = a + b; 2287 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2288 res = a > 0 ? INT16_MAX : INT16_MIN; 2289 env->vxsat = 0x1; 2290 } 2291 return res; 2292 } 2293 2294 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2295 { 2296 int32_t res = a + b; 2297 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2298 res = a > 0 ? INT32_MAX : INT32_MIN; 2299 env->vxsat = 0x1; 2300 } 2301 return res; 2302 } 2303 2304 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2305 { 2306 int64_t res = a + b; 2307 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2308 res = a > 0 ? INT64_MAX : INT64_MIN; 2309 env->vxsat = 0x1; 2310 } 2311 return res; 2312 } 2313 2314 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2315 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2316 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2317 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2318 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2319 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2320 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2321 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2322 2323 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2324 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2325 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2326 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2327 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2328 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2329 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2330 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2331 2332 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2333 { 2334 uint8_t res = a - b; 2335 if (res > a) { 2336 res = 0; 2337 env->vxsat = 0x1; 2338 } 2339 return res; 2340 } 2341 2342 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2343 uint16_t b) 2344 { 2345 uint16_t res = a - b; 2346 if (res > a) { 2347 res = 0; 2348 env->vxsat = 0x1; 2349 } 2350 return res; 2351 } 2352 2353 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2354 uint32_t b) 2355 { 2356 uint32_t res = a - b; 2357 if (res > a) { 2358 res = 0; 2359 env->vxsat = 0x1; 2360 } 2361 return res; 2362 } 2363 2364 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2365 uint64_t b) 2366 { 2367 uint64_t res = a - b; 2368 if (res > a) { 2369 res = 0; 2370 env->vxsat = 0x1; 2371 } 2372 return res; 2373 } 2374 2375 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2376 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2377 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2378 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2379 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2380 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2381 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2382 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2383 2384 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2385 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2386 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2387 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2388 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2389 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2390 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2391 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2392 2393 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2394 { 2395 int8_t res = a - b; 2396 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2397 res = a >= 0 ? INT8_MAX : INT8_MIN; 2398 env->vxsat = 0x1; 2399 } 2400 return res; 2401 } 2402 2403 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2404 { 2405 int16_t res = a - b; 2406 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2407 res = a >= 0 ? INT16_MAX : INT16_MIN; 2408 env->vxsat = 0x1; 2409 } 2410 return res; 2411 } 2412 2413 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2414 { 2415 int32_t res = a - b; 2416 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2417 res = a >= 0 ? INT32_MAX : INT32_MIN; 2418 env->vxsat = 0x1; 2419 } 2420 return res; 2421 } 2422 2423 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2424 { 2425 int64_t res = a - b; 2426 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2427 res = a >= 0 ? INT64_MAX : INT64_MIN; 2428 env->vxsat = 0x1; 2429 } 2430 return res; 2431 } 2432 2433 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2434 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2435 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2436 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2437 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2438 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2439 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2440 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2441 2442 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2443 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2444 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2445 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2446 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2447 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2448 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2449 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2450 2451 /* Vector Single-Width Averaging Add and Subtract */ 2452 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2453 { 2454 uint8_t d = extract64(v, shift, 1); 2455 uint8_t d1; 2456 uint64_t D1, D2; 2457 2458 if (shift == 0 || shift > 64) { 2459 return 0; 2460 } 2461 2462 d1 = extract64(v, shift - 1, 1); 2463 D1 = extract64(v, 0, shift); 2464 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2465 return d1; 2466 } else if (vxrm == 1) { /* round-to-nearest-even */ 2467 if (shift > 1) { 2468 D2 = extract64(v, 0, shift - 1); 2469 return d1 & ((D2 != 0) | d); 2470 } else { 2471 return d1 & d; 2472 } 2473 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2474 return !d & (D1 != 0); 2475 } 2476 return 0; /* round-down (truncate) */ 2477 } 2478 2479 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2480 { 2481 int64_t res = (int64_t)a + b; 2482 uint8_t round = get_round(vxrm, res, 1); 2483 2484 return (res >> 1) + round; 2485 } 2486 2487 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2488 { 2489 int64_t res = a + b; 2490 uint8_t round = get_round(vxrm, res, 1); 2491 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2492 2493 /* With signed overflow, bit 64 is inverse of bit 63. */ 2494 return ((res >> 1) ^ over) + round; 2495 } 2496 2497 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2498 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2499 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2500 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2501 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2502 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2503 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2504 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2505 2506 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2507 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2508 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2509 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2510 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2511 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2512 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2513 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2514 2515 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2516 uint32_t a, uint32_t b) 2517 { 2518 uint64_t res = (uint64_t)a + b; 2519 uint8_t round = get_round(vxrm, res, 1); 2520 2521 return (res >> 1) + round; 2522 } 2523 2524 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2525 uint64_t a, uint64_t b) 2526 { 2527 uint64_t res = a + b; 2528 uint8_t round = get_round(vxrm, res, 1); 2529 uint64_t over = (uint64_t)(res < a) << 63; 2530 2531 return ((res >> 1) | over) + round; 2532 } 2533 2534 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2535 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2536 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2537 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2538 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2539 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2540 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2541 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2542 2543 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2544 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2545 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2546 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2547 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2548 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2549 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2550 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2551 2552 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2553 { 2554 int64_t res = (int64_t)a - b; 2555 uint8_t round = get_round(vxrm, res, 1); 2556 2557 return (res >> 1) + round; 2558 } 2559 2560 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2561 { 2562 int64_t res = (int64_t)a - b; 2563 uint8_t round = get_round(vxrm, res, 1); 2564 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2565 2566 /* With signed overflow, bit 64 is inverse of bit 63. */ 2567 return ((res >> 1) ^ over) + round; 2568 } 2569 2570 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2571 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2572 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2573 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2574 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2575 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2576 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2577 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2578 2579 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2580 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2581 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2582 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2583 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2584 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2585 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2586 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2587 2588 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2589 uint32_t a, uint32_t b) 2590 { 2591 int64_t res = (int64_t)a - b; 2592 uint8_t round = get_round(vxrm, res, 1); 2593 2594 return (res >> 1) + round; 2595 } 2596 2597 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2598 uint64_t a, uint64_t b) 2599 { 2600 uint64_t res = (uint64_t)a - b; 2601 uint8_t round = get_round(vxrm, res, 1); 2602 uint64_t over = (uint64_t)(res > a) << 63; 2603 2604 return ((res >> 1) | over) + round; 2605 } 2606 2607 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2608 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2609 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2610 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2611 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2612 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2613 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2614 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2615 2616 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2617 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2618 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2619 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2620 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2621 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2622 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2623 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2624 2625 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2626 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2627 { 2628 uint8_t round; 2629 int16_t res; 2630 2631 res = (int16_t)a * (int16_t)b; 2632 round = get_round(vxrm, res, 7); 2633 res = (res >> 7) + round; 2634 2635 if (res > INT8_MAX) { 2636 env->vxsat = 0x1; 2637 return INT8_MAX; 2638 } else if (res < INT8_MIN) { 2639 env->vxsat = 0x1; 2640 return INT8_MIN; 2641 } else { 2642 return res; 2643 } 2644 } 2645 2646 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2647 { 2648 uint8_t round; 2649 int32_t res; 2650 2651 res = (int32_t)a * (int32_t)b; 2652 round = get_round(vxrm, res, 15); 2653 res = (res >> 15) + round; 2654 2655 if (res > INT16_MAX) { 2656 env->vxsat = 0x1; 2657 return INT16_MAX; 2658 } else if (res < INT16_MIN) { 2659 env->vxsat = 0x1; 2660 return INT16_MIN; 2661 } else { 2662 return res; 2663 } 2664 } 2665 2666 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2667 { 2668 uint8_t round; 2669 int64_t res; 2670 2671 res = (int64_t)a * (int64_t)b; 2672 round = get_round(vxrm, res, 31); 2673 res = (res >> 31) + round; 2674 2675 if (res > INT32_MAX) { 2676 env->vxsat = 0x1; 2677 return INT32_MAX; 2678 } else if (res < INT32_MIN) { 2679 env->vxsat = 0x1; 2680 return INT32_MIN; 2681 } else { 2682 return res; 2683 } 2684 } 2685 2686 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2687 { 2688 uint8_t round; 2689 uint64_t hi_64, lo_64; 2690 int64_t res; 2691 2692 if (a == INT64_MIN && b == INT64_MIN) { 2693 env->vxsat = 1; 2694 return INT64_MAX; 2695 } 2696 2697 muls64(&lo_64, &hi_64, a, b); 2698 round = get_round(vxrm, lo_64, 63); 2699 /* 2700 * Cannot overflow, as there are always 2701 * 2 sign bits after multiply. 2702 */ 2703 res = (hi_64 << 1) | (lo_64 >> 63); 2704 if (round) { 2705 if (res == INT64_MAX) { 2706 env->vxsat = 1; 2707 } else { 2708 res += 1; 2709 } 2710 } 2711 return res; 2712 } 2713 2714 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2715 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2716 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2717 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2718 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2719 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2720 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2721 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2722 2723 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2724 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2725 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2726 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2727 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2728 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2729 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2730 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2731 2732 /* Vector Single-Width Scaling Shift Instructions */ 2733 static inline uint8_t 2734 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2735 { 2736 uint8_t round, shift = b & 0x7; 2737 uint8_t res; 2738 2739 round = get_round(vxrm, a, shift); 2740 res = (a >> shift) + round; 2741 return res; 2742 } 2743 static inline uint16_t 2744 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2745 { 2746 uint8_t round, shift = b & 0xf; 2747 uint16_t res; 2748 2749 round = get_round(vxrm, a, shift); 2750 res = (a >> shift) + round; 2751 return res; 2752 } 2753 static inline uint32_t 2754 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2755 { 2756 uint8_t round, shift = b & 0x1f; 2757 uint32_t res; 2758 2759 round = get_round(vxrm, a, shift); 2760 res = (a >> shift) + round; 2761 return res; 2762 } 2763 static inline uint64_t 2764 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2765 { 2766 uint8_t round, shift = b & 0x3f; 2767 uint64_t res; 2768 2769 round = get_round(vxrm, a, shift); 2770 res = (a >> shift) + round; 2771 return res; 2772 } 2773 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2774 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2775 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2776 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2777 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2778 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2779 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2780 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2781 2782 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2783 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2784 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2785 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2786 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2787 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2788 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2789 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2790 2791 static inline int8_t 2792 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2793 { 2794 uint8_t round, shift = b & 0x7; 2795 int8_t res; 2796 2797 round = get_round(vxrm, a, shift); 2798 res = (a >> shift) + round; 2799 return res; 2800 } 2801 static inline int16_t 2802 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2803 { 2804 uint8_t round, shift = b & 0xf; 2805 int16_t res; 2806 2807 round = get_round(vxrm, a, shift); 2808 res = (a >> shift) + round; 2809 return res; 2810 } 2811 static inline int32_t 2812 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2813 { 2814 uint8_t round, shift = b & 0x1f; 2815 int32_t res; 2816 2817 round = get_round(vxrm, a, shift); 2818 res = (a >> shift) + round; 2819 return res; 2820 } 2821 static inline int64_t 2822 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2823 { 2824 uint8_t round, shift = b & 0x3f; 2825 int64_t res; 2826 2827 round = get_round(vxrm, a, shift); 2828 res = (a >> shift) + round; 2829 return res; 2830 } 2831 2832 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2833 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2834 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2835 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2836 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2837 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2838 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2839 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2840 2841 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2842 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2843 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2844 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2845 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2846 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2847 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2848 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2849 2850 /* Vector Narrowing Fixed-Point Clip Instructions */ 2851 static inline int8_t 2852 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2853 { 2854 uint8_t round, shift = b & 0xf; 2855 int16_t res; 2856 2857 round = get_round(vxrm, a, shift); 2858 res = (a >> shift) + round; 2859 if (res > INT8_MAX) { 2860 env->vxsat = 0x1; 2861 return INT8_MAX; 2862 } else if (res < INT8_MIN) { 2863 env->vxsat = 0x1; 2864 return INT8_MIN; 2865 } else { 2866 return res; 2867 } 2868 } 2869 2870 static inline int16_t 2871 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2872 { 2873 uint8_t round, shift = b & 0x1f; 2874 int32_t res; 2875 2876 round = get_round(vxrm, a, shift); 2877 res = (a >> shift) + round; 2878 if (res > INT16_MAX) { 2879 env->vxsat = 0x1; 2880 return INT16_MAX; 2881 } else if (res < INT16_MIN) { 2882 env->vxsat = 0x1; 2883 return INT16_MIN; 2884 } else { 2885 return res; 2886 } 2887 } 2888 2889 static inline int32_t 2890 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2891 { 2892 uint8_t round, shift = b & 0x3f; 2893 int64_t res; 2894 2895 round = get_round(vxrm, a, shift); 2896 res = (a >> shift) + round; 2897 if (res > INT32_MAX) { 2898 env->vxsat = 0x1; 2899 return INT32_MAX; 2900 } else if (res < INT32_MIN) { 2901 env->vxsat = 0x1; 2902 return INT32_MIN; 2903 } else { 2904 return res; 2905 } 2906 } 2907 2908 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2909 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2910 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2911 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2912 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2913 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2914 2915 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2916 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2917 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2918 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2919 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2920 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2921 2922 static inline uint8_t 2923 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2924 { 2925 uint8_t round, shift = b & 0xf; 2926 uint16_t res; 2927 2928 round = get_round(vxrm, a, shift); 2929 res = (a >> shift) + round; 2930 if (res > UINT8_MAX) { 2931 env->vxsat = 0x1; 2932 return UINT8_MAX; 2933 } else { 2934 return res; 2935 } 2936 } 2937 2938 static inline uint16_t 2939 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2940 { 2941 uint8_t round, shift = b & 0x1f; 2942 uint32_t res; 2943 2944 round = get_round(vxrm, a, shift); 2945 res = (a >> shift) + round; 2946 if (res > UINT16_MAX) { 2947 env->vxsat = 0x1; 2948 return UINT16_MAX; 2949 } else { 2950 return res; 2951 } 2952 } 2953 2954 static inline uint32_t 2955 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2956 { 2957 uint8_t round, shift = b & 0x3f; 2958 uint64_t res; 2959 2960 round = get_round(vxrm, a, shift); 2961 res = (a >> shift) + round; 2962 if (res > UINT32_MAX) { 2963 env->vxsat = 0x1; 2964 return UINT32_MAX; 2965 } else { 2966 return res; 2967 } 2968 } 2969 2970 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2971 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2972 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2973 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2974 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2975 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2976 2977 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2978 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2979 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2980 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2981 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2982 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2983 2984 /* 2985 *** Vector Float Point Arithmetic Instructions 2986 */ 2987 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2988 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2989 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2990 CPURISCVState *env) \ 2991 { \ 2992 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2993 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2994 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2995 } 2996 2997 #define GEN_VEXT_VV_ENV(NAME) \ 2998 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2999 void *vs2, CPURISCVState *env, \ 3000 uint32_t desc) \ 3001 { \ 3002 uint32_t vm = vext_vm(desc); \ 3003 uint32_t vl = env->vl; \ 3004 uint32_t i; \ 3005 \ 3006 for (i = env->vstart; i < vl; i++) { \ 3007 if (!vm && !vext_elem_mask(v0, i)) { \ 3008 continue; \ 3009 } \ 3010 do_##NAME(vd, vs1, vs2, i, env); \ 3011 } \ 3012 env->vstart = 0; \ 3013 } 3014 3015 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3016 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3017 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3018 GEN_VEXT_VV_ENV(vfadd_vv_h) 3019 GEN_VEXT_VV_ENV(vfadd_vv_w) 3020 GEN_VEXT_VV_ENV(vfadd_vv_d) 3021 3022 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3023 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3024 CPURISCVState *env) \ 3025 { \ 3026 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3027 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3028 } 3029 3030 #define GEN_VEXT_VF(NAME) \ 3031 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3032 void *vs2, CPURISCVState *env, \ 3033 uint32_t desc) \ 3034 { \ 3035 uint32_t vm = vext_vm(desc); \ 3036 uint32_t vl = env->vl; \ 3037 uint32_t i; \ 3038 \ 3039 for (i = env->vstart; i < vl; i++) { \ 3040 if (!vm && !vext_elem_mask(v0, i)) { \ 3041 continue; \ 3042 } \ 3043 do_##NAME(vd, s1, vs2, i, env); \ 3044 } \ 3045 env->vstart = 0; \ 3046 } 3047 3048 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3049 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3050 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3051 GEN_VEXT_VF(vfadd_vf_h) 3052 GEN_VEXT_VF(vfadd_vf_w) 3053 GEN_VEXT_VF(vfadd_vf_d) 3054 3055 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3056 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3057 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3058 GEN_VEXT_VV_ENV(vfsub_vv_h) 3059 GEN_VEXT_VV_ENV(vfsub_vv_w) 3060 GEN_VEXT_VV_ENV(vfsub_vv_d) 3061 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3062 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3063 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3064 GEN_VEXT_VF(vfsub_vf_h) 3065 GEN_VEXT_VF(vfsub_vf_w) 3066 GEN_VEXT_VF(vfsub_vf_d) 3067 3068 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3069 { 3070 return float16_sub(b, a, s); 3071 } 3072 3073 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3074 { 3075 return float32_sub(b, a, s); 3076 } 3077 3078 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3079 { 3080 return float64_sub(b, a, s); 3081 } 3082 3083 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3084 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3085 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3086 GEN_VEXT_VF(vfrsub_vf_h) 3087 GEN_VEXT_VF(vfrsub_vf_w) 3088 GEN_VEXT_VF(vfrsub_vf_d) 3089 3090 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3091 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3092 { 3093 return float32_add(float16_to_float32(a, true, s), 3094 float16_to_float32(b, true, s), s); 3095 } 3096 3097 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3098 { 3099 return float64_add(float32_to_float64(a, s), 3100 float32_to_float64(b, s), s); 3101 3102 } 3103 3104 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3105 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3106 GEN_VEXT_VV_ENV(vfwadd_vv_h) 3107 GEN_VEXT_VV_ENV(vfwadd_vv_w) 3108 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3109 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3110 GEN_VEXT_VF(vfwadd_vf_h) 3111 GEN_VEXT_VF(vfwadd_vf_w) 3112 3113 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3114 { 3115 return float32_sub(float16_to_float32(a, true, s), 3116 float16_to_float32(b, true, s), s); 3117 } 3118 3119 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3120 { 3121 return float64_sub(float32_to_float64(a, s), 3122 float32_to_float64(b, s), s); 3123 3124 } 3125 3126 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3127 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3128 GEN_VEXT_VV_ENV(vfwsub_vv_h) 3129 GEN_VEXT_VV_ENV(vfwsub_vv_w) 3130 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3131 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3132 GEN_VEXT_VF(vfwsub_vf_h) 3133 GEN_VEXT_VF(vfwsub_vf_w) 3134 3135 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3136 { 3137 return float32_add(a, float16_to_float32(b, true, s), s); 3138 } 3139 3140 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3141 { 3142 return float64_add(a, float32_to_float64(b, s), s); 3143 } 3144 3145 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3146 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3147 GEN_VEXT_VV_ENV(vfwadd_wv_h) 3148 GEN_VEXT_VV_ENV(vfwadd_wv_w) 3149 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3150 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3151 GEN_VEXT_VF(vfwadd_wf_h) 3152 GEN_VEXT_VF(vfwadd_wf_w) 3153 3154 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3155 { 3156 return float32_sub(a, float16_to_float32(b, true, s), s); 3157 } 3158 3159 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3160 { 3161 return float64_sub(a, float32_to_float64(b, s), s); 3162 } 3163 3164 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3165 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3166 GEN_VEXT_VV_ENV(vfwsub_wv_h) 3167 GEN_VEXT_VV_ENV(vfwsub_wv_w) 3168 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3169 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3170 GEN_VEXT_VF(vfwsub_wf_h) 3171 GEN_VEXT_VF(vfwsub_wf_w) 3172 3173 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3174 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3175 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3176 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3177 GEN_VEXT_VV_ENV(vfmul_vv_h) 3178 GEN_VEXT_VV_ENV(vfmul_vv_w) 3179 GEN_VEXT_VV_ENV(vfmul_vv_d) 3180 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3181 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3182 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3183 GEN_VEXT_VF(vfmul_vf_h) 3184 GEN_VEXT_VF(vfmul_vf_w) 3185 GEN_VEXT_VF(vfmul_vf_d) 3186 3187 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3188 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3189 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3190 GEN_VEXT_VV_ENV(vfdiv_vv_h) 3191 GEN_VEXT_VV_ENV(vfdiv_vv_w) 3192 GEN_VEXT_VV_ENV(vfdiv_vv_d) 3193 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3194 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3195 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3196 GEN_VEXT_VF(vfdiv_vf_h) 3197 GEN_VEXT_VF(vfdiv_vf_w) 3198 GEN_VEXT_VF(vfdiv_vf_d) 3199 3200 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3201 { 3202 return float16_div(b, a, s); 3203 } 3204 3205 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3206 { 3207 return float32_div(b, a, s); 3208 } 3209 3210 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3211 { 3212 return float64_div(b, a, s); 3213 } 3214 3215 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3216 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3217 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3218 GEN_VEXT_VF(vfrdiv_vf_h) 3219 GEN_VEXT_VF(vfrdiv_vf_w) 3220 GEN_VEXT_VF(vfrdiv_vf_d) 3221 3222 /* Vector Widening Floating-Point Multiply */ 3223 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3224 { 3225 return float32_mul(float16_to_float32(a, true, s), 3226 float16_to_float32(b, true, s), s); 3227 } 3228 3229 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3230 { 3231 return float64_mul(float32_to_float64(a, s), 3232 float32_to_float64(b, s), s); 3233 3234 } 3235 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3236 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3237 GEN_VEXT_VV_ENV(vfwmul_vv_h) 3238 GEN_VEXT_VV_ENV(vfwmul_vv_w) 3239 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3240 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3241 GEN_VEXT_VF(vfwmul_vf_h) 3242 GEN_VEXT_VF(vfwmul_vf_w) 3243 3244 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3245 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3246 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3247 CPURISCVState *env) \ 3248 { \ 3249 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3250 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3251 TD d = *((TD *)vd + HD(i)); \ 3252 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3253 } 3254 3255 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3256 { 3257 return float16_muladd(a, b, d, 0, s); 3258 } 3259 3260 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3261 { 3262 return float32_muladd(a, b, d, 0, s); 3263 } 3264 3265 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3266 { 3267 return float64_muladd(a, b, d, 0, s); 3268 } 3269 3270 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3271 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3272 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3273 GEN_VEXT_VV_ENV(vfmacc_vv_h) 3274 GEN_VEXT_VV_ENV(vfmacc_vv_w) 3275 GEN_VEXT_VV_ENV(vfmacc_vv_d) 3276 3277 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3278 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3279 CPURISCVState *env) \ 3280 { \ 3281 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3282 TD d = *((TD *)vd + HD(i)); \ 3283 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3284 } 3285 3286 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3287 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3288 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3289 GEN_VEXT_VF(vfmacc_vf_h) 3290 GEN_VEXT_VF(vfmacc_vf_w) 3291 GEN_VEXT_VF(vfmacc_vf_d) 3292 3293 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3294 { 3295 return float16_muladd(a, b, d, 3296 float_muladd_negate_c | float_muladd_negate_product, s); 3297 } 3298 3299 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3300 { 3301 return float32_muladd(a, b, d, 3302 float_muladd_negate_c | float_muladd_negate_product, s); 3303 } 3304 3305 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(a, b, d, 3308 float_muladd_negate_c | float_muladd_negate_product, s); 3309 } 3310 3311 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3312 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3313 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3314 GEN_VEXT_VV_ENV(vfnmacc_vv_h) 3315 GEN_VEXT_VV_ENV(vfnmacc_vv_w) 3316 GEN_VEXT_VV_ENV(vfnmacc_vv_d) 3317 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3318 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3319 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3320 GEN_VEXT_VF(vfnmacc_vf_h) 3321 GEN_VEXT_VF(vfnmacc_vf_w) 3322 GEN_VEXT_VF(vfnmacc_vf_d) 3323 3324 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3325 { 3326 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3327 } 3328 3329 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3330 { 3331 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3332 } 3333 3334 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3335 { 3336 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3337 } 3338 3339 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3340 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3341 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3342 GEN_VEXT_VV_ENV(vfmsac_vv_h) 3343 GEN_VEXT_VV_ENV(vfmsac_vv_w) 3344 GEN_VEXT_VV_ENV(vfmsac_vv_d) 3345 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3346 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3347 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3348 GEN_VEXT_VF(vfmsac_vf_h) 3349 GEN_VEXT_VF(vfmsac_vf_w) 3350 GEN_VEXT_VF(vfmsac_vf_d) 3351 3352 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3353 { 3354 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3355 } 3356 3357 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3358 { 3359 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3360 } 3361 3362 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3363 { 3364 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3365 } 3366 3367 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3368 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3369 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3370 GEN_VEXT_VV_ENV(vfnmsac_vv_h) 3371 GEN_VEXT_VV_ENV(vfnmsac_vv_w) 3372 GEN_VEXT_VV_ENV(vfnmsac_vv_d) 3373 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3374 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3375 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3376 GEN_VEXT_VF(vfnmsac_vf_h) 3377 GEN_VEXT_VF(vfnmsac_vf_w) 3378 GEN_VEXT_VF(vfnmsac_vf_d) 3379 3380 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3381 { 3382 return float16_muladd(d, b, a, 0, s); 3383 } 3384 3385 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3386 { 3387 return float32_muladd(d, b, a, 0, s); 3388 } 3389 3390 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3391 { 3392 return float64_muladd(d, b, a, 0, s); 3393 } 3394 3395 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3396 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3397 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3398 GEN_VEXT_VV_ENV(vfmadd_vv_h) 3399 GEN_VEXT_VV_ENV(vfmadd_vv_w) 3400 GEN_VEXT_VV_ENV(vfmadd_vv_d) 3401 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3402 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3403 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3404 GEN_VEXT_VF(vfmadd_vf_h) 3405 GEN_VEXT_VF(vfmadd_vf_w) 3406 GEN_VEXT_VF(vfmadd_vf_d) 3407 3408 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3409 { 3410 return float16_muladd(d, b, a, 3411 float_muladd_negate_c | float_muladd_negate_product, s); 3412 } 3413 3414 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3415 { 3416 return float32_muladd(d, b, a, 3417 float_muladd_negate_c | float_muladd_negate_product, s); 3418 } 3419 3420 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3421 { 3422 return float64_muladd(d, b, a, 3423 float_muladd_negate_c | float_muladd_negate_product, s); 3424 } 3425 3426 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3427 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3428 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3429 GEN_VEXT_VV_ENV(vfnmadd_vv_h) 3430 GEN_VEXT_VV_ENV(vfnmadd_vv_w) 3431 GEN_VEXT_VV_ENV(vfnmadd_vv_d) 3432 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3433 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3434 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3435 GEN_VEXT_VF(vfnmadd_vf_h) 3436 GEN_VEXT_VF(vfnmadd_vf_w) 3437 GEN_VEXT_VF(vfnmadd_vf_d) 3438 3439 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3440 { 3441 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3442 } 3443 3444 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3445 { 3446 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3447 } 3448 3449 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3450 { 3451 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3452 } 3453 3454 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3455 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3456 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3457 GEN_VEXT_VV_ENV(vfmsub_vv_h) 3458 GEN_VEXT_VV_ENV(vfmsub_vv_w) 3459 GEN_VEXT_VV_ENV(vfmsub_vv_d) 3460 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3461 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3462 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3463 GEN_VEXT_VF(vfmsub_vf_h) 3464 GEN_VEXT_VF(vfmsub_vf_w) 3465 GEN_VEXT_VF(vfmsub_vf_d) 3466 3467 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3468 { 3469 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3470 } 3471 3472 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3473 { 3474 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3475 } 3476 3477 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3478 { 3479 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3480 } 3481 3482 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3483 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3484 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3485 GEN_VEXT_VV_ENV(vfnmsub_vv_h) 3486 GEN_VEXT_VV_ENV(vfnmsub_vv_w) 3487 GEN_VEXT_VV_ENV(vfnmsub_vv_d) 3488 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3489 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3490 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3491 GEN_VEXT_VF(vfnmsub_vf_h) 3492 GEN_VEXT_VF(vfnmsub_vf_w) 3493 GEN_VEXT_VF(vfnmsub_vf_d) 3494 3495 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3496 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3497 { 3498 return float32_muladd(float16_to_float32(a, true, s), 3499 float16_to_float32(b, true, s), d, 0, s); 3500 } 3501 3502 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3503 { 3504 return float64_muladd(float32_to_float64(a, s), 3505 float32_to_float64(b, s), d, 0, s); 3506 } 3507 3508 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3509 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3510 GEN_VEXT_VV_ENV(vfwmacc_vv_h) 3511 GEN_VEXT_VV_ENV(vfwmacc_vv_w) 3512 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3513 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3514 GEN_VEXT_VF(vfwmacc_vf_h) 3515 GEN_VEXT_VF(vfwmacc_vf_w) 3516 3517 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3518 { 3519 return float32_muladd(float16_to_float32(a, true, s), 3520 float16_to_float32(b, true, s), d, 3521 float_muladd_negate_c | float_muladd_negate_product, s); 3522 } 3523 3524 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3525 { 3526 return float64_muladd(float32_to_float64(a, s), 3527 float32_to_float64(b, s), d, 3528 float_muladd_negate_c | float_muladd_negate_product, s); 3529 } 3530 3531 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3532 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3533 GEN_VEXT_VV_ENV(vfwnmacc_vv_h) 3534 GEN_VEXT_VV_ENV(vfwnmacc_vv_w) 3535 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3536 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3537 GEN_VEXT_VF(vfwnmacc_vf_h) 3538 GEN_VEXT_VF(vfwnmacc_vf_w) 3539 3540 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3541 { 3542 return float32_muladd(float16_to_float32(a, true, s), 3543 float16_to_float32(b, true, s), d, 3544 float_muladd_negate_c, s); 3545 } 3546 3547 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3548 { 3549 return float64_muladd(float32_to_float64(a, s), 3550 float32_to_float64(b, s), d, 3551 float_muladd_negate_c, s); 3552 } 3553 3554 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3555 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3556 GEN_VEXT_VV_ENV(vfwmsac_vv_h) 3557 GEN_VEXT_VV_ENV(vfwmsac_vv_w) 3558 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3559 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3560 GEN_VEXT_VF(vfwmsac_vf_h) 3561 GEN_VEXT_VF(vfwmsac_vf_w) 3562 3563 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3564 { 3565 return float32_muladd(float16_to_float32(a, true, s), 3566 float16_to_float32(b, true, s), d, 3567 float_muladd_negate_product, s); 3568 } 3569 3570 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3571 { 3572 return float64_muladd(float32_to_float64(a, s), 3573 float32_to_float64(b, s), d, 3574 float_muladd_negate_product, s); 3575 } 3576 3577 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3578 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3579 GEN_VEXT_VV_ENV(vfwnmsac_vv_h) 3580 GEN_VEXT_VV_ENV(vfwnmsac_vv_w) 3581 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3582 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3583 GEN_VEXT_VF(vfwnmsac_vf_h) 3584 GEN_VEXT_VF(vfwnmsac_vf_w) 3585 3586 /* Vector Floating-Point Square-Root Instruction */ 3587 /* (TD, T2, TX2) */ 3588 #define OP_UU_H uint16_t, uint16_t, uint16_t 3589 #define OP_UU_W uint32_t, uint32_t, uint32_t 3590 #define OP_UU_D uint64_t, uint64_t, uint64_t 3591 3592 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3593 static void do_##NAME(void *vd, void *vs2, int i, \ 3594 CPURISCVState *env) \ 3595 { \ 3596 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3597 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3598 } 3599 3600 #define GEN_VEXT_V_ENV(NAME) \ 3601 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3602 CPURISCVState *env, uint32_t desc) \ 3603 { \ 3604 uint32_t vm = vext_vm(desc); \ 3605 uint32_t vl = env->vl; \ 3606 uint32_t i; \ 3607 \ 3608 if (vl == 0) { \ 3609 return; \ 3610 } \ 3611 for (i = env->vstart; i < vl; i++) { \ 3612 if (!vm && !vext_elem_mask(v0, i)) { \ 3613 continue; \ 3614 } \ 3615 do_##NAME(vd, vs2, i, env); \ 3616 } \ 3617 env->vstart = 0; \ 3618 } 3619 3620 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3621 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3622 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3623 GEN_VEXT_V_ENV(vfsqrt_v_h) 3624 GEN_VEXT_V_ENV(vfsqrt_v_w) 3625 GEN_VEXT_V_ENV(vfsqrt_v_d) 3626 3627 /* 3628 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3629 * 3630 * Adapted from riscv-v-spec recip.c: 3631 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3632 */ 3633 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3634 { 3635 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3636 uint64_t exp = extract64(f, frac_size, exp_size); 3637 uint64_t frac = extract64(f, 0, frac_size); 3638 3639 const uint8_t lookup_table[] = { 3640 52, 51, 50, 48, 47, 46, 44, 43, 3641 42, 41, 40, 39, 38, 36, 35, 34, 3642 33, 32, 31, 30, 30, 29, 28, 27, 3643 26, 25, 24, 23, 23, 22, 21, 20, 3644 19, 19, 18, 17, 16, 16, 15, 14, 3645 14, 13, 12, 12, 11, 10, 10, 9, 3646 9, 8, 7, 7, 6, 6, 5, 4, 3647 4, 3, 3, 2, 2, 1, 1, 0, 3648 127, 125, 123, 121, 119, 118, 116, 114, 3649 113, 111, 109, 108, 106, 105, 103, 102, 3650 100, 99, 97, 96, 95, 93, 92, 91, 3651 90, 88, 87, 86, 85, 84, 83, 82, 3652 80, 79, 78, 77, 76, 75, 74, 73, 3653 72, 71, 70, 70, 69, 68, 67, 66, 3654 65, 64, 63, 63, 62, 61, 60, 59, 3655 59, 58, 57, 56, 56, 55, 54, 53 3656 }; 3657 const int precision = 7; 3658 3659 if (exp == 0 && frac != 0) { /* subnormal */ 3660 /* Normalize the subnormal. */ 3661 while (extract64(frac, frac_size - 1, 1) == 0) { 3662 exp--; 3663 frac <<= 1; 3664 } 3665 3666 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3667 } 3668 3669 int idx = ((exp & 1) << (precision - 1)) | 3670 (frac >> (frac_size - precision + 1)); 3671 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3672 (frac_size - precision); 3673 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3674 3675 uint64_t val = 0; 3676 val = deposit64(val, 0, frac_size, out_frac); 3677 val = deposit64(val, frac_size, exp_size, out_exp); 3678 val = deposit64(val, frac_size + exp_size, 1, sign); 3679 return val; 3680 } 3681 3682 static float16 frsqrt7_h(float16 f, float_status *s) 3683 { 3684 int exp_size = 5, frac_size = 10; 3685 bool sign = float16_is_neg(f); 3686 3687 /* 3688 * frsqrt7(sNaN) = canonical NaN 3689 * frsqrt7(-inf) = canonical NaN 3690 * frsqrt7(-normal) = canonical NaN 3691 * frsqrt7(-subnormal) = canonical NaN 3692 */ 3693 if (float16_is_signaling_nan(f, s) || 3694 (float16_is_infinity(f) && sign) || 3695 (float16_is_normal(f) && sign) || 3696 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3697 s->float_exception_flags |= float_flag_invalid; 3698 return float16_default_nan(s); 3699 } 3700 3701 /* frsqrt7(qNaN) = canonical NaN */ 3702 if (float16_is_quiet_nan(f, s)) { 3703 return float16_default_nan(s); 3704 } 3705 3706 /* frsqrt7(+-0) = +-inf */ 3707 if (float16_is_zero(f)) { 3708 s->float_exception_flags |= float_flag_divbyzero; 3709 return float16_set_sign(float16_infinity, sign); 3710 } 3711 3712 /* frsqrt7(+inf) = +0 */ 3713 if (float16_is_infinity(f) && !sign) { 3714 return float16_set_sign(float16_zero, sign); 3715 } 3716 3717 /* +normal, +subnormal */ 3718 uint64_t val = frsqrt7(f, exp_size, frac_size); 3719 return make_float16(val); 3720 } 3721 3722 static float32 frsqrt7_s(float32 f, float_status *s) 3723 { 3724 int exp_size = 8, frac_size = 23; 3725 bool sign = float32_is_neg(f); 3726 3727 /* 3728 * frsqrt7(sNaN) = canonical NaN 3729 * frsqrt7(-inf) = canonical NaN 3730 * frsqrt7(-normal) = canonical NaN 3731 * frsqrt7(-subnormal) = canonical NaN 3732 */ 3733 if (float32_is_signaling_nan(f, s) || 3734 (float32_is_infinity(f) && sign) || 3735 (float32_is_normal(f) && sign) || 3736 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3737 s->float_exception_flags |= float_flag_invalid; 3738 return float32_default_nan(s); 3739 } 3740 3741 /* frsqrt7(qNaN) = canonical NaN */ 3742 if (float32_is_quiet_nan(f, s)) { 3743 return float32_default_nan(s); 3744 } 3745 3746 /* frsqrt7(+-0) = +-inf */ 3747 if (float32_is_zero(f)) { 3748 s->float_exception_flags |= float_flag_divbyzero; 3749 return float32_set_sign(float32_infinity, sign); 3750 } 3751 3752 /* frsqrt7(+inf) = +0 */ 3753 if (float32_is_infinity(f) && !sign) { 3754 return float32_set_sign(float32_zero, sign); 3755 } 3756 3757 /* +normal, +subnormal */ 3758 uint64_t val = frsqrt7(f, exp_size, frac_size); 3759 return make_float32(val); 3760 } 3761 3762 static float64 frsqrt7_d(float64 f, float_status *s) 3763 { 3764 int exp_size = 11, frac_size = 52; 3765 bool sign = float64_is_neg(f); 3766 3767 /* 3768 * frsqrt7(sNaN) = canonical NaN 3769 * frsqrt7(-inf) = canonical NaN 3770 * frsqrt7(-normal) = canonical NaN 3771 * frsqrt7(-subnormal) = canonical NaN 3772 */ 3773 if (float64_is_signaling_nan(f, s) || 3774 (float64_is_infinity(f) && sign) || 3775 (float64_is_normal(f) && sign) || 3776 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3777 s->float_exception_flags |= float_flag_invalid; 3778 return float64_default_nan(s); 3779 } 3780 3781 /* frsqrt7(qNaN) = canonical NaN */ 3782 if (float64_is_quiet_nan(f, s)) { 3783 return float64_default_nan(s); 3784 } 3785 3786 /* frsqrt7(+-0) = +-inf */ 3787 if (float64_is_zero(f)) { 3788 s->float_exception_flags |= float_flag_divbyzero; 3789 return float64_set_sign(float64_infinity, sign); 3790 } 3791 3792 /* frsqrt7(+inf) = +0 */ 3793 if (float64_is_infinity(f) && !sign) { 3794 return float64_set_sign(float64_zero, sign); 3795 } 3796 3797 /* +normal, +subnormal */ 3798 uint64_t val = frsqrt7(f, exp_size, frac_size); 3799 return make_float64(val); 3800 } 3801 3802 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3803 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3804 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3805 GEN_VEXT_V_ENV(vfrsqrt7_v_h) 3806 GEN_VEXT_V_ENV(vfrsqrt7_v_w) 3807 GEN_VEXT_V_ENV(vfrsqrt7_v_d) 3808 3809 /* 3810 * Vector Floating-Point Reciprocal Estimate Instruction 3811 * 3812 * Adapted from riscv-v-spec recip.c: 3813 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3814 */ 3815 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3816 float_status *s) 3817 { 3818 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3819 uint64_t exp = extract64(f, frac_size, exp_size); 3820 uint64_t frac = extract64(f, 0, frac_size); 3821 3822 const uint8_t lookup_table[] = { 3823 127, 125, 123, 121, 119, 117, 116, 114, 3824 112, 110, 109, 107, 105, 104, 102, 100, 3825 99, 97, 96, 94, 93, 91, 90, 88, 3826 87, 85, 84, 83, 81, 80, 79, 77, 3827 76, 75, 74, 72, 71, 70, 69, 68, 3828 66, 65, 64, 63, 62, 61, 60, 59, 3829 58, 57, 56, 55, 54, 53, 52, 51, 3830 50, 49, 48, 47, 46, 45, 44, 43, 3831 42, 41, 40, 40, 39, 38, 37, 36, 3832 35, 35, 34, 33, 32, 31, 31, 30, 3833 29, 28, 28, 27, 26, 25, 25, 24, 3834 23, 23, 22, 21, 21, 20, 19, 19, 3835 18, 17, 17, 16, 15, 15, 14, 14, 3836 13, 12, 12, 11, 11, 10, 9, 9, 3837 8, 8, 7, 7, 6, 5, 5, 4, 3838 4, 3, 3, 2, 2, 1, 1, 0 3839 }; 3840 const int precision = 7; 3841 3842 if (exp == 0 && frac != 0) { /* subnormal */ 3843 /* Normalize the subnormal. */ 3844 while (extract64(frac, frac_size - 1, 1) == 0) { 3845 exp--; 3846 frac <<= 1; 3847 } 3848 3849 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3850 3851 if (exp != 0 && exp != UINT64_MAX) { 3852 /* 3853 * Overflow to inf or max value of same sign, 3854 * depending on sign and rounding mode. 3855 */ 3856 s->float_exception_flags |= (float_flag_inexact | 3857 float_flag_overflow); 3858 3859 if ((s->float_rounding_mode == float_round_to_zero) || 3860 ((s->float_rounding_mode == float_round_down) && !sign) || 3861 ((s->float_rounding_mode == float_round_up) && sign)) { 3862 /* Return greatest/negative finite value. */ 3863 return (sign << (exp_size + frac_size)) | 3864 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3865 } else { 3866 /* Return +-inf. */ 3867 return (sign << (exp_size + frac_size)) | 3868 MAKE_64BIT_MASK(frac_size, exp_size); 3869 } 3870 } 3871 } 3872 3873 int idx = frac >> (frac_size - precision); 3874 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3875 (frac_size - precision); 3876 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3877 3878 if (out_exp == 0 || out_exp == UINT64_MAX) { 3879 /* 3880 * The result is subnormal, but don't raise the underflow exception, 3881 * because there's no additional loss of precision. 3882 */ 3883 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3884 if (out_exp == UINT64_MAX) { 3885 out_frac >>= 1; 3886 out_exp = 0; 3887 } 3888 } 3889 3890 uint64_t val = 0; 3891 val = deposit64(val, 0, frac_size, out_frac); 3892 val = deposit64(val, frac_size, exp_size, out_exp); 3893 val = deposit64(val, frac_size + exp_size, 1, sign); 3894 return val; 3895 } 3896 3897 static float16 frec7_h(float16 f, float_status *s) 3898 { 3899 int exp_size = 5, frac_size = 10; 3900 bool sign = float16_is_neg(f); 3901 3902 /* frec7(+-inf) = +-0 */ 3903 if (float16_is_infinity(f)) { 3904 return float16_set_sign(float16_zero, sign); 3905 } 3906 3907 /* frec7(+-0) = +-inf */ 3908 if (float16_is_zero(f)) { 3909 s->float_exception_flags |= float_flag_divbyzero; 3910 return float16_set_sign(float16_infinity, sign); 3911 } 3912 3913 /* frec7(sNaN) = canonical NaN */ 3914 if (float16_is_signaling_nan(f, s)) { 3915 s->float_exception_flags |= float_flag_invalid; 3916 return float16_default_nan(s); 3917 } 3918 3919 /* frec7(qNaN) = canonical NaN */ 3920 if (float16_is_quiet_nan(f, s)) { 3921 return float16_default_nan(s); 3922 } 3923 3924 /* +-normal, +-subnormal */ 3925 uint64_t val = frec7(f, exp_size, frac_size, s); 3926 return make_float16(val); 3927 } 3928 3929 static float32 frec7_s(float32 f, float_status *s) 3930 { 3931 int exp_size = 8, frac_size = 23; 3932 bool sign = float32_is_neg(f); 3933 3934 /* frec7(+-inf) = +-0 */ 3935 if (float32_is_infinity(f)) { 3936 return float32_set_sign(float32_zero, sign); 3937 } 3938 3939 /* frec7(+-0) = +-inf */ 3940 if (float32_is_zero(f)) { 3941 s->float_exception_flags |= float_flag_divbyzero; 3942 return float32_set_sign(float32_infinity, sign); 3943 } 3944 3945 /* frec7(sNaN) = canonical NaN */ 3946 if (float32_is_signaling_nan(f, s)) { 3947 s->float_exception_flags |= float_flag_invalid; 3948 return float32_default_nan(s); 3949 } 3950 3951 /* frec7(qNaN) = canonical NaN */ 3952 if (float32_is_quiet_nan(f, s)) { 3953 return float32_default_nan(s); 3954 } 3955 3956 /* +-normal, +-subnormal */ 3957 uint64_t val = frec7(f, exp_size, frac_size, s); 3958 return make_float32(val); 3959 } 3960 3961 static float64 frec7_d(float64 f, float_status *s) 3962 { 3963 int exp_size = 11, frac_size = 52; 3964 bool sign = float64_is_neg(f); 3965 3966 /* frec7(+-inf) = +-0 */ 3967 if (float64_is_infinity(f)) { 3968 return float64_set_sign(float64_zero, sign); 3969 } 3970 3971 /* frec7(+-0) = +-inf */ 3972 if (float64_is_zero(f)) { 3973 s->float_exception_flags |= float_flag_divbyzero; 3974 return float64_set_sign(float64_infinity, sign); 3975 } 3976 3977 /* frec7(sNaN) = canonical NaN */ 3978 if (float64_is_signaling_nan(f, s)) { 3979 s->float_exception_flags |= float_flag_invalid; 3980 return float64_default_nan(s); 3981 } 3982 3983 /* frec7(qNaN) = canonical NaN */ 3984 if (float64_is_quiet_nan(f, s)) { 3985 return float64_default_nan(s); 3986 } 3987 3988 /* +-normal, +-subnormal */ 3989 uint64_t val = frec7(f, exp_size, frac_size, s); 3990 return make_float64(val); 3991 } 3992 3993 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3994 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3995 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3996 GEN_VEXT_V_ENV(vfrec7_v_h) 3997 GEN_VEXT_V_ENV(vfrec7_v_w) 3998 GEN_VEXT_V_ENV(vfrec7_v_d) 3999 4000 /* Vector Floating-Point MIN/MAX Instructions */ 4001 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4002 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4003 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4004 GEN_VEXT_VV_ENV(vfmin_vv_h) 4005 GEN_VEXT_VV_ENV(vfmin_vv_w) 4006 GEN_VEXT_VV_ENV(vfmin_vv_d) 4007 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4008 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4009 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4010 GEN_VEXT_VF(vfmin_vf_h) 4011 GEN_VEXT_VF(vfmin_vf_w) 4012 GEN_VEXT_VF(vfmin_vf_d) 4013 4014 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4015 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4016 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4017 GEN_VEXT_VV_ENV(vfmax_vv_h) 4018 GEN_VEXT_VV_ENV(vfmax_vv_w) 4019 GEN_VEXT_VV_ENV(vfmax_vv_d) 4020 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4021 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4022 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4023 GEN_VEXT_VF(vfmax_vf_h) 4024 GEN_VEXT_VF(vfmax_vf_w) 4025 GEN_VEXT_VF(vfmax_vf_d) 4026 4027 /* Vector Floating-Point Sign-Injection Instructions */ 4028 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4029 { 4030 return deposit64(b, 0, 15, a); 4031 } 4032 4033 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4034 { 4035 return deposit64(b, 0, 31, a); 4036 } 4037 4038 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4039 { 4040 return deposit64(b, 0, 63, a); 4041 } 4042 4043 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4044 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4045 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4046 GEN_VEXT_VV_ENV(vfsgnj_vv_h) 4047 GEN_VEXT_VV_ENV(vfsgnj_vv_w) 4048 GEN_VEXT_VV_ENV(vfsgnj_vv_d) 4049 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4050 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4051 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4052 GEN_VEXT_VF(vfsgnj_vf_h) 4053 GEN_VEXT_VF(vfsgnj_vf_w) 4054 GEN_VEXT_VF(vfsgnj_vf_d) 4055 4056 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4057 { 4058 return deposit64(~b, 0, 15, a); 4059 } 4060 4061 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4062 { 4063 return deposit64(~b, 0, 31, a); 4064 } 4065 4066 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4067 { 4068 return deposit64(~b, 0, 63, a); 4069 } 4070 4071 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4072 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4073 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4074 GEN_VEXT_VV_ENV(vfsgnjn_vv_h) 4075 GEN_VEXT_VV_ENV(vfsgnjn_vv_w) 4076 GEN_VEXT_VV_ENV(vfsgnjn_vv_d) 4077 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4078 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4079 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4080 GEN_VEXT_VF(vfsgnjn_vf_h) 4081 GEN_VEXT_VF(vfsgnjn_vf_w) 4082 GEN_VEXT_VF(vfsgnjn_vf_d) 4083 4084 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4085 { 4086 return deposit64(b ^ a, 0, 15, a); 4087 } 4088 4089 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4090 { 4091 return deposit64(b ^ a, 0, 31, a); 4092 } 4093 4094 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4095 { 4096 return deposit64(b ^ a, 0, 63, a); 4097 } 4098 4099 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4100 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4101 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4102 GEN_VEXT_VV_ENV(vfsgnjx_vv_h) 4103 GEN_VEXT_VV_ENV(vfsgnjx_vv_w) 4104 GEN_VEXT_VV_ENV(vfsgnjx_vv_d) 4105 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4106 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4107 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4108 GEN_VEXT_VF(vfsgnjx_vf_h) 4109 GEN_VEXT_VF(vfsgnjx_vf_w) 4110 GEN_VEXT_VF(vfsgnjx_vf_d) 4111 4112 /* Vector Floating-Point Compare Instructions */ 4113 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4114 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4115 CPURISCVState *env, uint32_t desc) \ 4116 { \ 4117 uint32_t vm = vext_vm(desc); \ 4118 uint32_t vl = env->vl; \ 4119 uint32_t i; \ 4120 \ 4121 for (i = env->vstart; i < vl; i++) { \ 4122 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4123 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4124 if (!vm && !vext_elem_mask(v0, i)) { \ 4125 continue; \ 4126 } \ 4127 vext_set_elem_mask(vd, i, \ 4128 DO_OP(s2, s1, &env->fp_status)); \ 4129 } \ 4130 env->vstart = 0; \ 4131 } 4132 4133 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4134 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4135 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4136 4137 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4138 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4139 CPURISCVState *env, uint32_t desc) \ 4140 { \ 4141 uint32_t vm = vext_vm(desc); \ 4142 uint32_t vl = env->vl; \ 4143 uint32_t i; \ 4144 \ 4145 for (i = env->vstart; i < vl; i++) { \ 4146 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4147 if (!vm && !vext_elem_mask(v0, i)) { \ 4148 continue; \ 4149 } \ 4150 vext_set_elem_mask(vd, i, \ 4151 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4152 } \ 4153 env->vstart = 0; \ 4154 } 4155 4156 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4157 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4158 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4159 4160 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4161 { 4162 FloatRelation compare = float16_compare_quiet(a, b, s); 4163 return compare != float_relation_equal; 4164 } 4165 4166 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4167 { 4168 FloatRelation compare = float32_compare_quiet(a, b, s); 4169 return compare != float_relation_equal; 4170 } 4171 4172 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4173 { 4174 FloatRelation compare = float64_compare_quiet(a, b, s); 4175 return compare != float_relation_equal; 4176 } 4177 4178 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4179 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4180 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4181 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4182 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4183 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4184 4185 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4186 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4187 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4188 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4189 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4190 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4191 4192 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4193 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4194 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4195 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4196 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4197 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4198 4199 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4200 { 4201 FloatRelation compare = float16_compare(a, b, s); 4202 return compare == float_relation_greater; 4203 } 4204 4205 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4206 { 4207 FloatRelation compare = float32_compare(a, b, s); 4208 return compare == float_relation_greater; 4209 } 4210 4211 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4212 { 4213 FloatRelation compare = float64_compare(a, b, s); 4214 return compare == float_relation_greater; 4215 } 4216 4217 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4218 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4219 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4220 4221 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4222 { 4223 FloatRelation compare = float16_compare(a, b, s); 4224 return compare == float_relation_greater || 4225 compare == float_relation_equal; 4226 } 4227 4228 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4229 { 4230 FloatRelation compare = float32_compare(a, b, s); 4231 return compare == float_relation_greater || 4232 compare == float_relation_equal; 4233 } 4234 4235 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4236 { 4237 FloatRelation compare = float64_compare(a, b, s); 4238 return compare == float_relation_greater || 4239 compare == float_relation_equal; 4240 } 4241 4242 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4243 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4244 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4245 4246 /* Vector Floating-Point Classify Instruction */ 4247 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4248 static void do_##NAME(void *vd, void *vs2, int i) \ 4249 { \ 4250 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4251 *((TD *)vd + HD(i)) = OP(s2); \ 4252 } 4253 4254 #define GEN_VEXT_V(NAME) \ 4255 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4256 CPURISCVState *env, uint32_t desc) \ 4257 { \ 4258 uint32_t vm = vext_vm(desc); \ 4259 uint32_t vl = env->vl; \ 4260 uint32_t i; \ 4261 \ 4262 for (i = env->vstart; i < vl; i++) { \ 4263 if (!vm && !vext_elem_mask(v0, i)) { \ 4264 continue; \ 4265 } \ 4266 do_##NAME(vd, vs2, i); \ 4267 } \ 4268 env->vstart = 0; \ 4269 } 4270 4271 target_ulong fclass_h(uint64_t frs1) 4272 { 4273 float16 f = frs1; 4274 bool sign = float16_is_neg(f); 4275 4276 if (float16_is_infinity(f)) { 4277 return sign ? 1 << 0 : 1 << 7; 4278 } else if (float16_is_zero(f)) { 4279 return sign ? 1 << 3 : 1 << 4; 4280 } else if (float16_is_zero_or_denormal(f)) { 4281 return sign ? 1 << 2 : 1 << 5; 4282 } else if (float16_is_any_nan(f)) { 4283 float_status s = { }; /* for snan_bit_is_one */ 4284 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4285 } else { 4286 return sign ? 1 << 1 : 1 << 6; 4287 } 4288 } 4289 4290 target_ulong fclass_s(uint64_t frs1) 4291 { 4292 float32 f = frs1; 4293 bool sign = float32_is_neg(f); 4294 4295 if (float32_is_infinity(f)) { 4296 return sign ? 1 << 0 : 1 << 7; 4297 } else if (float32_is_zero(f)) { 4298 return sign ? 1 << 3 : 1 << 4; 4299 } else if (float32_is_zero_or_denormal(f)) { 4300 return sign ? 1 << 2 : 1 << 5; 4301 } else if (float32_is_any_nan(f)) { 4302 float_status s = { }; /* for snan_bit_is_one */ 4303 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4304 } else { 4305 return sign ? 1 << 1 : 1 << 6; 4306 } 4307 } 4308 4309 target_ulong fclass_d(uint64_t frs1) 4310 { 4311 float64 f = frs1; 4312 bool sign = float64_is_neg(f); 4313 4314 if (float64_is_infinity(f)) { 4315 return sign ? 1 << 0 : 1 << 7; 4316 } else if (float64_is_zero(f)) { 4317 return sign ? 1 << 3 : 1 << 4; 4318 } else if (float64_is_zero_or_denormal(f)) { 4319 return sign ? 1 << 2 : 1 << 5; 4320 } else if (float64_is_any_nan(f)) { 4321 float_status s = { }; /* for snan_bit_is_one */ 4322 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4323 } else { 4324 return sign ? 1 << 1 : 1 << 6; 4325 } 4326 } 4327 4328 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4329 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4330 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4331 GEN_VEXT_V(vfclass_v_h) 4332 GEN_VEXT_V(vfclass_v_w) 4333 GEN_VEXT_V(vfclass_v_d) 4334 4335 /* Vector Floating-Point Merge Instruction */ 4336 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4337 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4338 CPURISCVState *env, uint32_t desc) \ 4339 { \ 4340 uint32_t vm = vext_vm(desc); \ 4341 uint32_t vl = env->vl; \ 4342 uint32_t i; \ 4343 \ 4344 for (i = env->vstart; i < vl; i++) { \ 4345 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4346 *((ETYPE *)vd + H(i)) \ 4347 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4348 } \ 4349 env->vstart = 0; \ 4350 } 4351 4352 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4353 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4354 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4355 4356 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4357 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4358 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4359 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4360 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4361 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) 4362 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) 4363 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) 4364 4365 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4366 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4367 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4368 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4369 GEN_VEXT_V_ENV(vfcvt_x_f_v_h) 4370 GEN_VEXT_V_ENV(vfcvt_x_f_v_w) 4371 GEN_VEXT_V_ENV(vfcvt_x_f_v_d) 4372 4373 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4374 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4375 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4376 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4377 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) 4378 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) 4379 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) 4380 4381 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4382 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4383 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4384 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4385 GEN_VEXT_V_ENV(vfcvt_f_x_v_h) 4386 GEN_VEXT_V_ENV(vfcvt_f_x_v_w) 4387 GEN_VEXT_V_ENV(vfcvt_f_x_v_d) 4388 4389 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4390 /* (TD, T2, TX2) */ 4391 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4392 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4393 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4394 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4395 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4396 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4397 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) 4398 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) 4399 4400 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4401 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4402 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4403 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) 4404 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) 4405 4406 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4407 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4408 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4409 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4410 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) 4411 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) 4412 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) 4413 4414 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4415 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4416 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4417 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4418 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) 4419 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) 4420 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) 4421 4422 /* 4423 * vfwcvt.f.f.v vd, vs2, vm 4424 * Convert single-width float to double-width float. 4425 */ 4426 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4427 { 4428 return float16_to_float32(a, true, s); 4429 } 4430 4431 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4432 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4433 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) 4434 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) 4435 4436 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4437 /* (TD, T2, TX2) */ 4438 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4439 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4440 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4441 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4442 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4443 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4444 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4445 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) 4446 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) 4447 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) 4448 4449 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4450 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4451 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4452 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4453 GEN_VEXT_V_ENV(vfncvt_x_f_w_b) 4454 GEN_VEXT_V_ENV(vfncvt_x_f_w_h) 4455 GEN_VEXT_V_ENV(vfncvt_x_f_w_w) 4456 4457 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4458 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4459 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4460 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) 4461 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) 4462 4463 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4464 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4465 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4466 GEN_VEXT_V_ENV(vfncvt_f_x_w_h) 4467 GEN_VEXT_V_ENV(vfncvt_f_x_w_w) 4468 4469 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4470 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4471 { 4472 return float32_to_float16(a, true, s); 4473 } 4474 4475 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4476 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4477 GEN_VEXT_V_ENV(vfncvt_f_f_w_h) 4478 GEN_VEXT_V_ENV(vfncvt_f_f_w_w) 4479 4480 /* 4481 *** Vector Reduction Operations 4482 */ 4483 /* Vector Single-Width Integer Reduction Instructions */ 4484 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4485 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4486 void *vs2, CPURISCVState *env, uint32_t desc) \ 4487 { \ 4488 uint32_t vm = vext_vm(desc); \ 4489 uint32_t vl = env->vl; \ 4490 uint32_t i; \ 4491 TD s1 = *((TD *)vs1 + HD(0)); \ 4492 \ 4493 for (i = env->vstart; i < vl; i++) { \ 4494 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4495 if (!vm && !vext_elem_mask(v0, i)) { \ 4496 continue; \ 4497 } \ 4498 s1 = OP(s1, (TD)s2); \ 4499 } \ 4500 *((TD *)vd + HD(0)) = s1; \ 4501 env->vstart = 0; \ 4502 } 4503 4504 /* vd[0] = sum(vs1[0], vs2[*]) */ 4505 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4506 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4507 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4508 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4509 4510 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4511 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4512 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4513 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4514 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4515 4516 /* vd[0] = max(vs1[0], vs2[*]) */ 4517 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4518 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4519 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4520 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4521 4522 /* vd[0] = minu(vs1[0], vs2[*]) */ 4523 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4524 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4525 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4526 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4527 4528 /* vd[0] = min(vs1[0], vs2[*]) */ 4529 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4530 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4531 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4532 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4533 4534 /* vd[0] = and(vs1[0], vs2[*]) */ 4535 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4536 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4537 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4538 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4539 4540 /* vd[0] = or(vs1[0], vs2[*]) */ 4541 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4542 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4543 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4544 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4545 4546 /* vd[0] = xor(vs1[0], vs2[*]) */ 4547 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4548 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4549 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4550 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4551 4552 /* Vector Widening Integer Reduction Instructions */ 4553 /* signed sum reduction into double-width accumulator */ 4554 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4555 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4556 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4557 4558 /* Unsigned sum reduction into double-width accumulator */ 4559 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4560 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4561 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4562 4563 /* Vector Single-Width Floating-Point Reduction Instructions */ 4564 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4565 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4566 void *vs2, CPURISCVState *env, \ 4567 uint32_t desc) \ 4568 { \ 4569 uint32_t vm = vext_vm(desc); \ 4570 uint32_t vl = env->vl; \ 4571 uint32_t i; \ 4572 TD s1 = *((TD *)vs1 + HD(0)); \ 4573 \ 4574 for (i = env->vstart; i < vl; i++) { \ 4575 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4576 if (!vm && !vext_elem_mask(v0, i)) { \ 4577 continue; \ 4578 } \ 4579 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4580 } \ 4581 *((TD *)vd + HD(0)) = s1; \ 4582 env->vstart = 0; \ 4583 } 4584 4585 /* Unordered sum */ 4586 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4587 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4588 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4589 4590 /* Maximum value */ 4591 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4592 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4593 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4594 4595 /* Minimum value */ 4596 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4597 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4598 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4599 4600 /* Vector Widening Floating-Point Reduction Instructions */ 4601 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4602 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4603 void *vs2, CPURISCVState *env, uint32_t desc) 4604 { 4605 uint32_t vm = vext_vm(desc); 4606 uint32_t vl = env->vl; 4607 uint32_t i; 4608 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4609 4610 for (i = env->vstart; i < vl; i++) { 4611 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4612 if (!vm && !vext_elem_mask(v0, i)) { 4613 continue; 4614 } 4615 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4616 &env->fp_status); 4617 } 4618 *((uint32_t *)vd + H4(0)) = s1; 4619 env->vstart = 0; 4620 } 4621 4622 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4623 void *vs2, CPURISCVState *env, uint32_t desc) 4624 { 4625 uint32_t vm = vext_vm(desc); 4626 uint32_t vl = env->vl; 4627 uint32_t i; 4628 uint64_t s1 = *((uint64_t *)vs1); 4629 4630 for (i = env->vstart; i < vl; i++) { 4631 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4632 if (!vm && !vext_elem_mask(v0, i)) { 4633 continue; 4634 } 4635 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4636 &env->fp_status); 4637 } 4638 *((uint64_t *)vd) = s1; 4639 env->vstart = 0; 4640 } 4641 4642 /* 4643 *** Vector Mask Operations 4644 */ 4645 /* Vector Mask-Register Logical Instructions */ 4646 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4647 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4648 void *vs2, CPURISCVState *env, \ 4649 uint32_t desc) \ 4650 { \ 4651 uint32_t vl = env->vl; \ 4652 uint32_t i; \ 4653 int a, b; \ 4654 \ 4655 for (i = env->vstart; i < vl; i++) { \ 4656 a = vext_elem_mask(vs1, i); \ 4657 b = vext_elem_mask(vs2, i); \ 4658 vext_set_elem_mask(vd, i, OP(b, a)); \ 4659 } \ 4660 env->vstart = 0; \ 4661 } 4662 4663 #define DO_NAND(N, M) (!(N & M)) 4664 #define DO_ANDNOT(N, M) (N & !M) 4665 #define DO_NOR(N, M) (!(N | M)) 4666 #define DO_ORNOT(N, M) (N | !M) 4667 #define DO_XNOR(N, M) (!(N ^ M)) 4668 4669 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4670 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4671 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4672 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4673 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4674 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4675 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4676 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4677 4678 /* Vector count population in mask vcpop */ 4679 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4680 uint32_t desc) 4681 { 4682 target_ulong cnt = 0; 4683 uint32_t vm = vext_vm(desc); 4684 uint32_t vl = env->vl; 4685 int i; 4686 4687 for (i = env->vstart; i < vl; i++) { 4688 if (vm || vext_elem_mask(v0, i)) { 4689 if (vext_elem_mask(vs2, i)) { 4690 cnt++; 4691 } 4692 } 4693 } 4694 env->vstart = 0; 4695 return cnt; 4696 } 4697 4698 /* vfirst find-first-set mask bit*/ 4699 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4700 uint32_t desc) 4701 { 4702 uint32_t vm = vext_vm(desc); 4703 uint32_t vl = env->vl; 4704 int i; 4705 4706 for (i = env->vstart; i < vl; i++) { 4707 if (vm || vext_elem_mask(v0, i)) { 4708 if (vext_elem_mask(vs2, i)) { 4709 return i; 4710 } 4711 } 4712 } 4713 env->vstart = 0; 4714 return -1LL; 4715 } 4716 4717 enum set_mask_type { 4718 ONLY_FIRST = 1, 4719 INCLUDE_FIRST, 4720 BEFORE_FIRST, 4721 }; 4722 4723 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4724 uint32_t desc, enum set_mask_type type) 4725 { 4726 uint32_t vm = vext_vm(desc); 4727 uint32_t vl = env->vl; 4728 int i; 4729 bool first_mask_bit = false; 4730 4731 for (i = env->vstart; i < vl; i++) { 4732 if (!vm && !vext_elem_mask(v0, i)) { 4733 continue; 4734 } 4735 /* write a zero to all following active elements */ 4736 if (first_mask_bit) { 4737 vext_set_elem_mask(vd, i, 0); 4738 continue; 4739 } 4740 if (vext_elem_mask(vs2, i)) { 4741 first_mask_bit = true; 4742 if (type == BEFORE_FIRST) { 4743 vext_set_elem_mask(vd, i, 0); 4744 } else { 4745 vext_set_elem_mask(vd, i, 1); 4746 } 4747 } else { 4748 if (type == ONLY_FIRST) { 4749 vext_set_elem_mask(vd, i, 0); 4750 } else { 4751 vext_set_elem_mask(vd, i, 1); 4752 } 4753 } 4754 } 4755 env->vstart = 0; 4756 } 4757 4758 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4759 uint32_t desc) 4760 { 4761 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4762 } 4763 4764 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4765 uint32_t desc) 4766 { 4767 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4768 } 4769 4770 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4771 uint32_t desc) 4772 { 4773 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4774 } 4775 4776 /* Vector Iota Instruction */ 4777 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4778 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4779 uint32_t desc) \ 4780 { \ 4781 uint32_t vm = vext_vm(desc); \ 4782 uint32_t vl = env->vl; \ 4783 uint32_t sum = 0; \ 4784 int i; \ 4785 \ 4786 for (i = env->vstart; i < vl; i++) { \ 4787 if (!vm && !vext_elem_mask(v0, i)) { \ 4788 continue; \ 4789 } \ 4790 *((ETYPE *)vd + H(i)) = sum; \ 4791 if (vext_elem_mask(vs2, i)) { \ 4792 sum++; \ 4793 } \ 4794 } \ 4795 env->vstart = 0; \ 4796 } 4797 4798 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4799 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4800 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4801 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4802 4803 /* Vector Element Index Instruction */ 4804 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4805 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4806 { \ 4807 uint32_t vm = vext_vm(desc); \ 4808 uint32_t vl = env->vl; \ 4809 int i; \ 4810 \ 4811 for (i = env->vstart; i < vl; i++) { \ 4812 if (!vm && !vext_elem_mask(v0, i)) { \ 4813 continue; \ 4814 } \ 4815 *((ETYPE *)vd + H(i)) = i; \ 4816 } \ 4817 env->vstart = 0; \ 4818 } 4819 4820 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4821 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4822 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4823 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4824 4825 /* 4826 *** Vector Permutation Instructions 4827 */ 4828 4829 /* Vector Slide Instructions */ 4830 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4831 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4832 CPURISCVState *env, uint32_t desc) \ 4833 { \ 4834 uint32_t vm = vext_vm(desc); \ 4835 uint32_t vl = env->vl; \ 4836 target_ulong offset = s1, i_min, i; \ 4837 \ 4838 i_min = MAX(env->vstart, offset); \ 4839 for (i = i_min; i < vl; i++) { \ 4840 if (!vm && !vext_elem_mask(v0, i)) { \ 4841 continue; \ 4842 } \ 4843 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4844 } \ 4845 } 4846 4847 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4848 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4849 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4850 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4851 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4852 4853 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4854 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4855 CPURISCVState *env, uint32_t desc) \ 4856 { \ 4857 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4858 uint32_t vm = vext_vm(desc); \ 4859 uint32_t vl = env->vl; \ 4860 target_ulong i_max, i; \ 4861 \ 4862 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4863 for (i = env->vstart; i < i_max; ++i) { \ 4864 if (vm || vext_elem_mask(v0, i)) { \ 4865 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4866 } \ 4867 } \ 4868 \ 4869 for (i = i_max; i < vl; ++i) { \ 4870 if (vm || vext_elem_mask(v0, i)) { \ 4871 *((ETYPE *)vd + H(i)) = 0; \ 4872 } \ 4873 } \ 4874 \ 4875 env->vstart = 0; \ 4876 } 4877 4878 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4879 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4880 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4881 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4882 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4883 4884 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4885 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4886 void *vs2, CPURISCVState *env, uint32_t desc) \ 4887 { \ 4888 typedef uint##BITWIDTH##_t ETYPE; \ 4889 uint32_t vm = vext_vm(desc); \ 4890 uint32_t vl = env->vl; \ 4891 uint32_t i; \ 4892 \ 4893 for (i = env->vstart; i < vl; i++) { \ 4894 if (!vm && !vext_elem_mask(v0, i)) { \ 4895 continue; \ 4896 } \ 4897 if (i == 0) { \ 4898 *((ETYPE *)vd + H(i)) = s1; \ 4899 } else { \ 4900 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4901 } \ 4902 } \ 4903 env->vstart = 0; \ 4904 } 4905 4906 GEN_VEXT_VSLIE1UP(8, H1) 4907 GEN_VEXT_VSLIE1UP(16, H2) 4908 GEN_VEXT_VSLIE1UP(32, H4) 4909 GEN_VEXT_VSLIE1UP(64, H8) 4910 4911 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4912 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4913 CPURISCVState *env, uint32_t desc) \ 4914 { \ 4915 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4916 } 4917 4918 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4919 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4920 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4921 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4922 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4923 4924 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4925 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4926 void *vs2, CPURISCVState *env, uint32_t desc) \ 4927 { \ 4928 typedef uint##BITWIDTH##_t ETYPE; \ 4929 uint32_t vm = vext_vm(desc); \ 4930 uint32_t vl = env->vl; \ 4931 uint32_t i; \ 4932 \ 4933 for (i = env->vstart; i < vl; i++) { \ 4934 if (!vm && !vext_elem_mask(v0, i)) { \ 4935 continue; \ 4936 } \ 4937 if (i == vl - 1) { \ 4938 *((ETYPE *)vd + H(i)) = s1; \ 4939 } else { \ 4940 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4941 } \ 4942 } \ 4943 env->vstart = 0; \ 4944 } 4945 4946 GEN_VEXT_VSLIDE1DOWN(8, H1) 4947 GEN_VEXT_VSLIDE1DOWN(16, H2) 4948 GEN_VEXT_VSLIDE1DOWN(32, H4) 4949 GEN_VEXT_VSLIDE1DOWN(64, H8) 4950 4951 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4952 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4953 CPURISCVState *env, uint32_t desc) \ 4954 { \ 4955 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4956 } 4957 4958 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4959 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4960 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4961 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4962 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4963 4964 /* Vector Floating-Point Slide Instructions */ 4965 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4966 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4967 CPURISCVState *env, uint32_t desc) \ 4968 { \ 4969 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4970 } 4971 4972 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4973 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4974 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4975 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4976 4977 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4978 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4979 CPURISCVState *env, uint32_t desc) \ 4980 { \ 4981 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4982 } 4983 4984 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4985 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4986 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4987 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4988 4989 /* Vector Register Gather Instruction */ 4990 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4991 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4992 CPURISCVState *env, uint32_t desc) \ 4993 { \ 4994 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4995 uint32_t vm = vext_vm(desc); \ 4996 uint32_t vl = env->vl; \ 4997 uint64_t index; \ 4998 uint32_t i; \ 4999 \ 5000 for (i = env->vstart; i < vl; i++) { \ 5001 if (!vm && !vext_elem_mask(v0, i)) { \ 5002 continue; \ 5003 } \ 5004 index = *((TS1 *)vs1 + HS1(i)); \ 5005 if (index >= vlmax) { \ 5006 *((TS2 *)vd + HS2(i)) = 0; \ 5007 } else { \ 5008 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5009 } \ 5010 } \ 5011 env->vstart = 0; \ 5012 } 5013 5014 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5015 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5016 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5017 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5018 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5019 5020 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5021 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5022 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5023 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5024 5025 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5026 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5027 CPURISCVState *env, uint32_t desc) \ 5028 { \ 5029 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5030 uint32_t vm = vext_vm(desc); \ 5031 uint32_t vl = env->vl; \ 5032 uint64_t index = s1; \ 5033 uint32_t i; \ 5034 \ 5035 for (i = env->vstart; i < vl; i++) { \ 5036 if (!vm && !vext_elem_mask(v0, i)) { \ 5037 continue; \ 5038 } \ 5039 if (index >= vlmax) { \ 5040 *((ETYPE *)vd + H(i)) = 0; \ 5041 } else { \ 5042 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5043 } \ 5044 } \ 5045 env->vstart = 0; \ 5046 } 5047 5048 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5049 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5050 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5051 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5052 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5053 5054 /* Vector Compress Instruction */ 5055 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5056 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5057 CPURISCVState *env, uint32_t desc) \ 5058 { \ 5059 uint32_t vl = env->vl; \ 5060 uint32_t num = 0, i; \ 5061 \ 5062 for (i = env->vstart; i < vl; i++) { \ 5063 if (!vext_elem_mask(vs1, i)) { \ 5064 continue; \ 5065 } \ 5066 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5067 num++; \ 5068 } \ 5069 env->vstart = 0; \ 5070 } 5071 5072 /* Compress into vd elements of vs2 where vs1 is enabled */ 5073 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5074 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5075 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5076 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5077 5078 /* Vector Whole Register Move */ 5079 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5080 { 5081 /* EEW = SEW */ 5082 uint32_t maxsz = simd_maxsz(desc); 5083 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5084 uint32_t startb = env->vstart * sewb; 5085 uint32_t i = startb; 5086 5087 memcpy((uint8_t *)vd + H1(i), 5088 (uint8_t *)vs2 + H1(i), 5089 maxsz - startb); 5090 5091 env->vstart = 0; 5092 } 5093 5094 /* Vector Integer Extension */ 5095 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5096 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5097 CPURISCVState *env, uint32_t desc) \ 5098 { \ 5099 uint32_t vl = env->vl; \ 5100 uint32_t vm = vext_vm(desc); \ 5101 uint32_t i; \ 5102 \ 5103 for (i = env->vstart; i < vl; i++) { \ 5104 if (!vm && !vext_elem_mask(v0, i)) { \ 5105 continue; \ 5106 } \ 5107 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5108 } \ 5109 env->vstart = 0; \ 5110 } 5111 5112 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5113 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5114 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5115 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5116 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5117 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5118 5119 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5120 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5121 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5122 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5123 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5124 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5125