1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "tcg/tcg-gvec-desc.h" 29 #include "internals.h" 30 #include <math.h> 31 32 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 33 target_ulong s2) 34 { 35 int vlmax, vl; 36 RISCVCPU *cpu = env_archcpu(env); 37 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 38 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 39 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 40 int xlen = riscv_cpu_xlen(env); 41 bool vill = (s2 >> (xlen - 1)) & 0x1; 42 target_ulong reserved = s2 & 43 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 44 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 45 46 if (lmul & 4) { 47 /* Fractional LMUL - check LMUL * VLEN >= SEW */ 48 if (lmul == 4 || 49 cpu->cfg.vlen >> (8 - lmul) < sew) { 50 vill = true; 51 } 52 } 53 54 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 55 /* only set vill bit. */ 56 env->vill = 1; 57 env->vtype = 0; 58 env->vl = 0; 59 env->vstart = 0; 60 return 0; 61 } 62 63 vlmax = vext_get_vlmax(cpu, s2); 64 if (s1 <= vlmax) { 65 vl = s1; 66 } else { 67 vl = vlmax; 68 } 69 env->vl = vl; 70 env->vtype = s2; 71 env->vstart = 0; 72 env->vill = 0; 73 return vl; 74 } 75 76 /* 77 * Note that vector data is stored in host-endian 64-bit chunks, 78 * so addressing units smaller than that needs a host-endian fixup. 79 */ 80 #if HOST_BIG_ENDIAN 81 #define H1(x) ((x) ^ 7) 82 #define H1_2(x) ((x) ^ 6) 83 #define H1_4(x) ((x) ^ 4) 84 #define H2(x) ((x) ^ 3) 85 #define H4(x) ((x) ^ 1) 86 #define H8(x) ((x)) 87 #else 88 #define H1(x) (x) 89 #define H1_2(x) (x) 90 #define H1_4(x) (x) 91 #define H2(x) (x) 92 #define H4(x) (x) 93 #define H8(x) (x) 94 #endif 95 96 static inline uint32_t vext_nf(uint32_t desc) 97 { 98 return FIELD_EX32(simd_data(desc), VDATA, NF); 99 } 100 101 static inline uint32_t vext_vm(uint32_t desc) 102 { 103 return FIELD_EX32(simd_data(desc), VDATA, VM); 104 } 105 106 /* 107 * Encode LMUL to lmul as following: 108 * LMUL vlmul lmul 109 * 1 000 0 110 * 2 001 1 111 * 4 010 2 112 * 8 011 3 113 * - 100 - 114 * 1/8 101 -3 115 * 1/4 110 -2 116 * 1/2 111 -1 117 */ 118 static inline int32_t vext_lmul(uint32_t desc) 119 { 120 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 121 } 122 123 static inline uint32_t vext_vta(uint32_t desc) 124 { 125 return FIELD_EX32(simd_data(desc), VDATA, VTA); 126 } 127 128 static inline uint32_t vext_vma(uint32_t desc) 129 { 130 return FIELD_EX32(simd_data(desc), VDATA, VMA); 131 } 132 133 static inline uint32_t vext_vta_all_1s(uint32_t desc) 134 { 135 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 136 } 137 138 /* 139 * Get the maximum number of elements can be operated. 140 * 141 * log2_esz: log2 of element size in bytes. 142 */ 143 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 144 { 145 /* 146 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 147 * so vlen in bytes (vlenb) is encoded as maxsz. 148 */ 149 uint32_t vlenb = simd_maxsz(desc); 150 151 /* Return VLMAX */ 152 int scale = vext_lmul(desc) - log2_esz; 153 return scale < 0 ? vlenb >> -scale : vlenb << scale; 154 } 155 156 /* 157 * Get number of total elements, including prestart, body and tail elements. 158 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 159 * are held in the same vector register. 160 */ 161 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 162 uint32_t esz) 163 { 164 uint32_t vlenb = simd_maxsz(desc); 165 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 166 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 167 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 168 return (vlenb << emul) / esz; 169 } 170 171 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 172 { 173 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 174 } 175 176 /* 177 * This function checks watchpoint before real load operation. 178 * 179 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 180 * In user mode, there is no watchpoint support now. 181 * 182 * It will trigger an exception if there is no mapping in TLB 183 * and page table walk can't fill the TLB entry. Then the guest 184 * software can return here after process the exception or never return. 185 */ 186 static void probe_pages(CPURISCVState *env, target_ulong addr, 187 target_ulong len, uintptr_t ra, 188 MMUAccessType access_type) 189 { 190 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 191 target_ulong curlen = MIN(pagelen, len); 192 193 probe_access(env, adjust_addr(env, addr), curlen, access_type, 194 cpu_mmu_index(env, false), ra); 195 if (len > curlen) { 196 addr += curlen; 197 curlen = len - curlen; 198 probe_access(env, adjust_addr(env, addr), curlen, access_type, 199 cpu_mmu_index(env, false), ra); 200 } 201 } 202 203 /* set agnostic elements to 1s */ 204 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 205 uint32_t tot) 206 { 207 if (is_agnostic == 0) { 208 /* policy undisturbed */ 209 return; 210 } 211 if (tot - cnt == 0) { 212 return; 213 } 214 memset(base + cnt, -1, tot - cnt); 215 } 216 217 static inline void vext_set_elem_mask(void *v0, int index, 218 uint8_t value) 219 { 220 int idx = index / 64; 221 int pos = index % 64; 222 uint64_t old = ((uint64_t *)v0)[idx]; 223 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 224 } 225 226 /* 227 * Earlier designs (pre-0.9) had a varying number of bits 228 * per mask value (MLEN). In the 0.9 design, MLEN=1. 229 * (Section 4.5) 230 */ 231 static inline int vext_elem_mask(void *v0, int index) 232 { 233 int idx = index / 64; 234 int pos = index % 64; 235 return (((uint64_t *)v0)[idx] >> pos) & 1; 236 } 237 238 /* elements operations for load and store */ 239 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 240 uint32_t idx, void *vd, uintptr_t retaddr); 241 242 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 243 static void NAME(CPURISCVState *env, abi_ptr addr, \ 244 uint32_t idx, void *vd, uintptr_t retaddr)\ 245 { \ 246 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 247 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 248 } \ 249 250 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 251 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 252 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 253 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 254 255 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 256 static void NAME(CPURISCVState *env, abi_ptr addr, \ 257 uint32_t idx, void *vd, uintptr_t retaddr)\ 258 { \ 259 ETYPE data = *((ETYPE *)vd + H(idx)); \ 260 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 261 } 262 263 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 264 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 265 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 266 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 267 268 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 269 uint32_t desc, uint32_t nf, 270 uint32_t esz, uint32_t max_elems) 271 { 272 uint32_t vta = vext_vta(desc); 273 int k; 274 275 if (vta == 0) { 276 return; 277 } 278 279 for (k = 0; k < nf; ++k) { 280 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 281 (k * max_elems + max_elems) * esz); 282 } 283 } 284 285 /* 286 * stride: access vector element from strided memory 287 */ 288 static void 289 vext_ldst_stride(void *vd, void *v0, target_ulong base, 290 target_ulong stride, CPURISCVState *env, 291 uint32_t desc, uint32_t vm, 292 vext_ldst_elem_fn *ldst_elem, 293 uint32_t log2_esz, uintptr_t ra) 294 { 295 uint32_t i, k; 296 uint32_t nf = vext_nf(desc); 297 uint32_t max_elems = vext_max_elems(desc, log2_esz); 298 uint32_t esz = 1 << log2_esz; 299 uint32_t vma = vext_vma(desc); 300 301 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 302 k = 0; 303 while (k < nf) { 304 if (!vm && !vext_elem_mask(v0, i)) { 305 /* set masked-off elements to 1s */ 306 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 307 (i + k * max_elems + 1) * esz); 308 k++; 309 continue; 310 } 311 target_ulong addr = base + stride * i + (k << log2_esz); 312 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 313 k++; 314 } 315 } 316 env->vstart = 0; 317 318 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 319 } 320 321 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 322 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 323 target_ulong stride, CPURISCVState *env, \ 324 uint32_t desc) \ 325 { \ 326 uint32_t vm = vext_vm(desc); \ 327 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 328 ctzl(sizeof(ETYPE)), GETPC()); \ 329 } 330 331 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 332 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 333 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 334 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 335 336 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 337 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 338 target_ulong stride, CPURISCVState *env, \ 339 uint32_t desc) \ 340 { \ 341 uint32_t vm = vext_vm(desc); \ 342 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 343 ctzl(sizeof(ETYPE)), GETPC()); \ 344 } 345 346 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 347 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 348 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 349 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 350 351 /* 352 * unit-stride: access elements stored contiguously in memory 353 */ 354 355 /* unmasked unit-stride load and store operation */ 356 static void 357 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 358 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 359 uintptr_t ra) 360 { 361 uint32_t i, k; 362 uint32_t nf = vext_nf(desc); 363 uint32_t max_elems = vext_max_elems(desc, log2_esz); 364 uint32_t esz = 1 << log2_esz; 365 366 /* load bytes from guest memory */ 367 for (i = env->vstart; i < evl; i++, env->vstart++) { 368 k = 0; 369 while (k < nf) { 370 target_ulong addr = base + ((i * nf + k) << log2_esz); 371 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 372 k++; 373 } 374 } 375 env->vstart = 0; 376 377 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 378 } 379 380 /* 381 * masked unit-stride load and store operation will be a special case of 382 * stride, stride = NF * sizeof (ETYPE) 383 */ 384 385 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 386 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 387 CPURISCVState *env, uint32_t desc) \ 388 { \ 389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 391 ctzl(sizeof(ETYPE)), GETPC()); \ 392 } \ 393 \ 394 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 395 CPURISCVState *env, uint32_t desc) \ 396 { \ 397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 399 } 400 401 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 402 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 403 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 404 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 405 406 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 407 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 408 CPURISCVState *env, uint32_t desc) \ 409 { \ 410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 412 ctzl(sizeof(ETYPE)), GETPC()); \ 413 } \ 414 \ 415 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 416 CPURISCVState *env, uint32_t desc) \ 417 { \ 418 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 420 } 421 422 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 423 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 424 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 425 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 426 427 /* 428 * unit stride mask load and store, EEW = 1 429 */ 430 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 431 CPURISCVState *env, uint32_t desc) 432 { 433 /* evl = ceil(vl/8) */ 434 uint8_t evl = (env->vl + 7) >> 3; 435 vext_ldst_us(vd, base, env, desc, lde_b, 436 0, evl, GETPC()); 437 } 438 439 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, ste_b, 445 0, evl, GETPC()); 446 } 447 448 /* 449 * index: access vector element from indexed memory 450 */ 451 typedef target_ulong vext_get_index_addr(target_ulong base, 452 uint32_t idx, void *vs2); 453 454 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 455 static target_ulong NAME(target_ulong base, \ 456 uint32_t idx, void *vs2) \ 457 { \ 458 return (base + *((ETYPE *)vs2 + H(idx))); \ 459 } 460 461 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 462 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 463 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 464 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 465 466 static inline void 467 vext_ldst_index(void *vd, void *v0, target_ulong base, 468 void *vs2, CPURISCVState *env, uint32_t desc, 469 vext_get_index_addr get_index_addr, 470 vext_ldst_elem_fn *ldst_elem, 471 uint32_t log2_esz, uintptr_t ra) 472 { 473 uint32_t i, k; 474 uint32_t nf = vext_nf(desc); 475 uint32_t vm = vext_vm(desc); 476 uint32_t max_elems = vext_max_elems(desc, log2_esz); 477 uint32_t esz = 1 << log2_esz; 478 uint32_t vma = vext_vma(desc); 479 480 /* load bytes from guest memory */ 481 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 482 k = 0; 483 while (k < nf) { 484 if (!vm && !vext_elem_mask(v0, i)) { 485 /* set masked-off elements to 1s */ 486 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 487 (i + k * max_elems + 1) * esz); 488 k++; 489 continue; 490 } 491 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 492 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 493 k++; 494 } 495 } 496 env->vstart = 0; 497 498 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 499 } 500 501 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 502 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 503 void *vs2, CPURISCVState *env, uint32_t desc) \ 504 { \ 505 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 506 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 507 } 508 509 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 510 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 511 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 512 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 513 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 514 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 515 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 516 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 517 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 518 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 519 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 520 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 521 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 522 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 523 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 524 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 525 526 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 527 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 528 void *vs2, CPURISCVState *env, uint32_t desc) \ 529 { \ 530 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 531 STORE_FN, ctzl(sizeof(ETYPE)), \ 532 GETPC()); \ 533 } 534 535 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 536 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 537 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 538 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 539 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 540 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 541 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 542 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 543 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 544 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 545 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 546 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 547 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 548 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 549 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 550 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 551 552 /* 553 * unit-stride fault-only-fisrt load instructions 554 */ 555 static inline void 556 vext_ldff(void *vd, void *v0, target_ulong base, 557 CPURISCVState *env, uint32_t desc, 558 vext_ldst_elem_fn *ldst_elem, 559 uint32_t log2_esz, uintptr_t ra) 560 { 561 void *host; 562 uint32_t i, k, vl = 0; 563 uint32_t nf = vext_nf(desc); 564 uint32_t vm = vext_vm(desc); 565 uint32_t max_elems = vext_max_elems(desc, log2_esz); 566 uint32_t esz = 1 << log2_esz; 567 uint32_t vma = vext_vma(desc); 568 target_ulong addr, offset, remain; 569 570 /* probe every access */ 571 for (i = env->vstart; i < env->vl; i++) { 572 if (!vm && !vext_elem_mask(v0, i)) { 573 continue; 574 } 575 addr = adjust_addr(env, base + i * (nf << log2_esz)); 576 if (i == 0) { 577 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 578 } else { 579 /* if it triggers an exception, no need to check watchpoint */ 580 remain = nf << log2_esz; 581 while (remain > 0) { 582 offset = -(addr | TARGET_PAGE_MASK); 583 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 584 cpu_mmu_index(env, false)); 585 if (host) { 586 #ifdef CONFIG_USER_ONLY 587 if (page_check_range(addr, offset, PAGE_READ)) { 588 vl = i; 589 goto ProbeSuccess; 590 } 591 #else 592 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 593 #endif 594 } else { 595 vl = i; 596 goto ProbeSuccess; 597 } 598 if (remain <= offset) { 599 break; 600 } 601 remain -= offset; 602 addr = adjust_addr(env, addr + offset); 603 } 604 } 605 } 606 ProbeSuccess: 607 /* load bytes from guest memory */ 608 if (vl != 0) { 609 env->vl = vl; 610 } 611 for (i = env->vstart; i < env->vl; i++) { 612 k = 0; 613 while (k < nf) { 614 if (!vm && !vext_elem_mask(v0, i)) { 615 /* set masked-off elements to 1s */ 616 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 617 (i + k * max_elems + 1) * esz); 618 k++; 619 continue; 620 } 621 target_ulong addr = base + ((i * nf + k) << log2_esz); 622 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 623 k++; 624 } 625 } 626 env->vstart = 0; 627 628 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 629 } 630 631 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 632 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 633 CPURISCVState *env, uint32_t desc) \ 634 { \ 635 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 636 ctzl(sizeof(ETYPE)), GETPC()); \ 637 } 638 639 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 640 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 641 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 642 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 643 644 #define DO_SWAP(N, M) (M) 645 #define DO_AND(N, M) (N & M) 646 #define DO_XOR(N, M) (N ^ M) 647 #define DO_OR(N, M) (N | M) 648 #define DO_ADD(N, M) (N + M) 649 650 /* Signed min/max */ 651 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 652 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 653 654 /* 655 * load and store whole register instructions 656 */ 657 static void 658 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 659 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 660 { 661 uint32_t i, k, off, pos; 662 uint32_t nf = vext_nf(desc); 663 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 664 uint32_t max_elems = vlenb >> log2_esz; 665 666 k = env->vstart / max_elems; 667 off = env->vstart % max_elems; 668 669 if (off) { 670 /* load/store rest of elements of current segment pointed by vstart */ 671 for (pos = off; pos < max_elems; pos++, env->vstart++) { 672 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 673 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 674 ra); 675 } 676 k++; 677 } 678 679 /* load/store elements for rest of segments */ 680 for (; k < nf; k++) { 681 for (i = 0; i < max_elems; i++, env->vstart++) { 682 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 683 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 684 } 685 } 686 687 env->vstart = 0; 688 } 689 690 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 691 void HELPER(NAME)(void *vd, target_ulong base, \ 692 CPURISCVState *env, uint32_t desc) \ 693 { \ 694 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 695 ctzl(sizeof(ETYPE)), GETPC()); \ 696 } 697 698 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 699 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 700 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 701 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 702 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 703 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 704 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 705 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 706 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 707 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 708 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 709 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 710 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 711 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 712 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 713 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 714 715 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 716 void HELPER(NAME)(void *vd, target_ulong base, \ 717 CPURISCVState *env, uint32_t desc) \ 718 { \ 719 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 720 ctzl(sizeof(ETYPE)), GETPC()); \ 721 } 722 723 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 724 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 725 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 726 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 727 728 /* 729 * Vector Integer Arithmetic Instructions 730 */ 731 732 /* expand macro args before macro */ 733 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 734 735 /* (TD, T1, T2, TX1, TX2) */ 736 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 737 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 738 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 739 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 740 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 741 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 742 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 743 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 744 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 745 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 746 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 747 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 748 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 749 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 750 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 751 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 752 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 753 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 754 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 755 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 756 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 757 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 758 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 759 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 760 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 761 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 762 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 763 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 764 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 765 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 766 767 /* operation of two vector elements */ 768 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 769 770 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 771 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 772 { \ 773 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 774 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 775 *((TD *)vd + HD(i)) = OP(s2, s1); \ 776 } 777 #define DO_SUB(N, M) (N - M) 778 #define DO_RSUB(N, M) (M - N) 779 780 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 781 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 782 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 783 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 784 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 785 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 786 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 787 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 788 789 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 790 CPURISCVState *env, uint32_t desc, 791 opivv2_fn *fn, uint32_t esz) 792 { 793 uint32_t vm = vext_vm(desc); 794 uint32_t vl = env->vl; 795 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 796 uint32_t vta = vext_vta(desc); 797 uint32_t vma = vext_vma(desc); 798 uint32_t i; 799 800 for (i = env->vstart; i < vl; i++) { 801 if (!vm && !vext_elem_mask(v0, i)) { 802 /* set masked-off elements to 1s */ 803 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 804 continue; 805 } 806 fn(vd, vs1, vs2, i); 807 } 808 env->vstart = 0; 809 /* set tail elements to 1s */ 810 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 811 } 812 813 /* generate the helpers for OPIVV */ 814 #define GEN_VEXT_VV(NAME, ESZ) \ 815 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 816 void *vs2, CPURISCVState *env, \ 817 uint32_t desc) \ 818 { \ 819 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 820 do_##NAME, ESZ); \ 821 } 822 823 GEN_VEXT_VV(vadd_vv_b, 1) 824 GEN_VEXT_VV(vadd_vv_h, 2) 825 GEN_VEXT_VV(vadd_vv_w, 4) 826 GEN_VEXT_VV(vadd_vv_d, 8) 827 GEN_VEXT_VV(vsub_vv_b, 1) 828 GEN_VEXT_VV(vsub_vv_h, 2) 829 GEN_VEXT_VV(vsub_vv_w, 4) 830 GEN_VEXT_VV(vsub_vv_d, 8) 831 832 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 833 834 /* 835 * (T1)s1 gives the real operator type. 836 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 837 */ 838 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 839 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 840 { \ 841 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 842 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 843 } 844 845 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 846 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 847 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 848 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 849 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 850 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 851 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 852 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 853 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 854 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 855 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 856 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 857 858 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 859 CPURISCVState *env, uint32_t desc, 860 opivx2_fn fn, uint32_t esz) 861 { 862 uint32_t vm = vext_vm(desc); 863 uint32_t vl = env->vl; 864 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 865 uint32_t vta = vext_vta(desc); 866 uint32_t vma = vext_vma(desc); 867 uint32_t i; 868 869 for (i = env->vstart; i < vl; i++) { 870 if (!vm && !vext_elem_mask(v0, i)) { 871 /* set masked-off elements to 1s */ 872 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 873 continue; 874 } 875 fn(vd, s1, vs2, i); 876 } 877 env->vstart = 0; 878 /* set tail elements to 1s */ 879 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 880 } 881 882 /* generate the helpers for OPIVX */ 883 #define GEN_VEXT_VX(NAME, ESZ) \ 884 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 885 void *vs2, CPURISCVState *env, \ 886 uint32_t desc) \ 887 { \ 888 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 889 do_##NAME, ESZ); \ 890 } 891 892 GEN_VEXT_VX(vadd_vx_b, 1) 893 GEN_VEXT_VX(vadd_vx_h, 2) 894 GEN_VEXT_VX(vadd_vx_w, 4) 895 GEN_VEXT_VX(vadd_vx_d, 8) 896 GEN_VEXT_VX(vsub_vx_b, 1) 897 GEN_VEXT_VX(vsub_vx_h, 2) 898 GEN_VEXT_VX(vsub_vx_w, 4) 899 GEN_VEXT_VX(vsub_vx_d, 8) 900 GEN_VEXT_VX(vrsub_vx_b, 1) 901 GEN_VEXT_VX(vrsub_vx_h, 2) 902 GEN_VEXT_VX(vrsub_vx_w, 4) 903 GEN_VEXT_VX(vrsub_vx_d, 8) 904 905 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 906 { 907 intptr_t oprsz = simd_oprsz(desc); 908 intptr_t i; 909 910 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 911 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 912 } 913 } 914 915 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 916 { 917 intptr_t oprsz = simd_oprsz(desc); 918 intptr_t i; 919 920 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 921 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 922 } 923 } 924 925 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 926 { 927 intptr_t oprsz = simd_oprsz(desc); 928 intptr_t i; 929 930 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 931 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 932 } 933 } 934 935 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 936 { 937 intptr_t oprsz = simd_oprsz(desc); 938 intptr_t i; 939 940 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 941 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 942 } 943 } 944 945 /* Vector Widening Integer Add/Subtract */ 946 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 947 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 948 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 949 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 950 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 951 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 952 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 953 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 954 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 955 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 956 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 957 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 958 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 959 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 960 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 961 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 962 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 963 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 964 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 965 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 966 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 967 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 968 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 969 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 970 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 971 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 972 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 973 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 974 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 975 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 976 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 977 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 978 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 979 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 980 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 981 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 982 GEN_VEXT_VV(vwaddu_vv_b, 2) 983 GEN_VEXT_VV(vwaddu_vv_h, 4) 984 GEN_VEXT_VV(vwaddu_vv_w, 8) 985 GEN_VEXT_VV(vwsubu_vv_b, 2) 986 GEN_VEXT_VV(vwsubu_vv_h, 4) 987 GEN_VEXT_VV(vwsubu_vv_w, 8) 988 GEN_VEXT_VV(vwadd_vv_b, 2) 989 GEN_VEXT_VV(vwadd_vv_h, 4) 990 GEN_VEXT_VV(vwadd_vv_w, 8) 991 GEN_VEXT_VV(vwsub_vv_b, 2) 992 GEN_VEXT_VV(vwsub_vv_h, 4) 993 GEN_VEXT_VV(vwsub_vv_w, 8) 994 GEN_VEXT_VV(vwaddu_wv_b, 2) 995 GEN_VEXT_VV(vwaddu_wv_h, 4) 996 GEN_VEXT_VV(vwaddu_wv_w, 8) 997 GEN_VEXT_VV(vwsubu_wv_b, 2) 998 GEN_VEXT_VV(vwsubu_wv_h, 4) 999 GEN_VEXT_VV(vwsubu_wv_w, 8) 1000 GEN_VEXT_VV(vwadd_wv_b, 2) 1001 GEN_VEXT_VV(vwadd_wv_h, 4) 1002 GEN_VEXT_VV(vwadd_wv_w, 8) 1003 GEN_VEXT_VV(vwsub_wv_b, 2) 1004 GEN_VEXT_VV(vwsub_wv_h, 4) 1005 GEN_VEXT_VV(vwsub_wv_w, 8) 1006 1007 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1008 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1009 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1010 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1011 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1012 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1013 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1014 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1015 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1016 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1017 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1018 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1019 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1020 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1021 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1022 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1023 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1024 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1025 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1026 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1027 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1028 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1029 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1030 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1031 GEN_VEXT_VX(vwaddu_vx_b, 2) 1032 GEN_VEXT_VX(vwaddu_vx_h, 4) 1033 GEN_VEXT_VX(vwaddu_vx_w, 8) 1034 GEN_VEXT_VX(vwsubu_vx_b, 2) 1035 GEN_VEXT_VX(vwsubu_vx_h, 4) 1036 GEN_VEXT_VX(vwsubu_vx_w, 8) 1037 GEN_VEXT_VX(vwadd_vx_b, 2) 1038 GEN_VEXT_VX(vwadd_vx_h, 4) 1039 GEN_VEXT_VX(vwadd_vx_w, 8) 1040 GEN_VEXT_VX(vwsub_vx_b, 2) 1041 GEN_VEXT_VX(vwsub_vx_h, 4) 1042 GEN_VEXT_VX(vwsub_vx_w, 8) 1043 GEN_VEXT_VX(vwaddu_wx_b, 2) 1044 GEN_VEXT_VX(vwaddu_wx_h, 4) 1045 GEN_VEXT_VX(vwaddu_wx_w, 8) 1046 GEN_VEXT_VX(vwsubu_wx_b, 2) 1047 GEN_VEXT_VX(vwsubu_wx_h, 4) 1048 GEN_VEXT_VX(vwsubu_wx_w, 8) 1049 GEN_VEXT_VX(vwadd_wx_b, 2) 1050 GEN_VEXT_VX(vwadd_wx_h, 4) 1051 GEN_VEXT_VX(vwadd_wx_w, 8) 1052 GEN_VEXT_VX(vwsub_wx_b, 2) 1053 GEN_VEXT_VX(vwsub_wx_h, 4) 1054 GEN_VEXT_VX(vwsub_wx_w, 8) 1055 1056 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1057 #define DO_VADC(N, M, C) (N + M + C) 1058 #define DO_VSBC(N, M, C) (N - M - C) 1059 1060 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1061 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1062 CPURISCVState *env, uint32_t desc) \ 1063 { \ 1064 uint32_t vl = env->vl; \ 1065 uint32_t esz = sizeof(ETYPE); \ 1066 uint32_t total_elems = \ 1067 vext_get_total_elems(env, desc, esz); \ 1068 uint32_t vta = vext_vta(desc); \ 1069 uint32_t i; \ 1070 \ 1071 for (i = env->vstart; i < vl; i++) { \ 1072 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1073 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1074 ETYPE carry = vext_elem_mask(v0, i); \ 1075 \ 1076 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1077 } \ 1078 env->vstart = 0; \ 1079 /* set tail elements to 1s */ \ 1080 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1081 } 1082 1083 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1084 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1085 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1086 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1087 1088 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1089 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1090 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1091 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1092 1093 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1094 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1095 CPURISCVState *env, uint32_t desc) \ 1096 { \ 1097 uint32_t vl = env->vl; \ 1098 uint32_t esz = sizeof(ETYPE); \ 1099 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1100 uint32_t vta = vext_vta(desc); \ 1101 uint32_t i; \ 1102 \ 1103 for (i = env->vstart; i < vl; i++) { \ 1104 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1105 ETYPE carry = vext_elem_mask(v0, i); \ 1106 \ 1107 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1108 } \ 1109 env->vstart = 0; \ 1110 /* set tail elements to 1s */ \ 1111 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1112 } 1113 1114 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1115 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1116 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1117 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1118 1119 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1120 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1121 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1122 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1123 1124 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1125 (__typeof(N))(N + M) < N) 1126 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1127 1128 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1129 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1130 CPURISCVState *env, uint32_t desc) \ 1131 { \ 1132 uint32_t vl = env->vl; \ 1133 uint32_t vm = vext_vm(desc); \ 1134 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1135 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1136 uint32_t i; \ 1137 \ 1138 for (i = env->vstart; i < vl; i++) { \ 1139 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1140 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1141 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1142 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1143 } \ 1144 env->vstart = 0; \ 1145 /* 1146 * mask destination register are always tail-agnostic 1147 * set tail elements to 1s 1148 */ \ 1149 if (vta_all_1s) { \ 1150 for (; i < total_elems; i++) { \ 1151 vext_set_elem_mask(vd, i, 1); \ 1152 } \ 1153 } \ 1154 } 1155 1156 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1157 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1158 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1159 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1160 1161 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1162 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1163 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1164 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1165 1166 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1167 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1168 void *vs2, CPURISCVState *env, uint32_t desc) \ 1169 { \ 1170 uint32_t vl = env->vl; \ 1171 uint32_t vm = vext_vm(desc); \ 1172 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1173 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1174 uint32_t i; \ 1175 \ 1176 for (i = env->vstart; i < vl; i++) { \ 1177 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1178 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1179 vext_set_elem_mask(vd, i, \ 1180 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1181 } \ 1182 env->vstart = 0; \ 1183 /* 1184 * mask destination register are always tail-agnostic 1185 * set tail elements to 1s 1186 */ \ 1187 if (vta_all_1s) { \ 1188 for (; i < total_elems; i++) { \ 1189 vext_set_elem_mask(vd, i, 1); \ 1190 } \ 1191 } \ 1192 } 1193 1194 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1195 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1196 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1197 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1198 1199 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1200 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1201 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1202 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1203 1204 /* Vector Bitwise Logical Instructions */ 1205 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1206 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1207 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1208 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1209 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1210 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1211 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1212 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1213 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1214 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1215 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1216 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1217 GEN_VEXT_VV(vand_vv_b, 1) 1218 GEN_VEXT_VV(vand_vv_h, 2) 1219 GEN_VEXT_VV(vand_vv_w, 4) 1220 GEN_VEXT_VV(vand_vv_d, 8) 1221 GEN_VEXT_VV(vor_vv_b, 1) 1222 GEN_VEXT_VV(vor_vv_h, 2) 1223 GEN_VEXT_VV(vor_vv_w, 4) 1224 GEN_VEXT_VV(vor_vv_d, 8) 1225 GEN_VEXT_VV(vxor_vv_b, 1) 1226 GEN_VEXT_VV(vxor_vv_h, 2) 1227 GEN_VEXT_VV(vxor_vv_w, 4) 1228 GEN_VEXT_VV(vxor_vv_d, 8) 1229 1230 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1231 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1232 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1233 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1234 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1235 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1236 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1237 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1238 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1239 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1240 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1241 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1242 GEN_VEXT_VX(vand_vx_b, 1) 1243 GEN_VEXT_VX(vand_vx_h, 2) 1244 GEN_VEXT_VX(vand_vx_w, 4) 1245 GEN_VEXT_VX(vand_vx_d, 8) 1246 GEN_VEXT_VX(vor_vx_b, 1) 1247 GEN_VEXT_VX(vor_vx_h, 2) 1248 GEN_VEXT_VX(vor_vx_w, 4) 1249 GEN_VEXT_VX(vor_vx_d, 8) 1250 GEN_VEXT_VX(vxor_vx_b, 1) 1251 GEN_VEXT_VX(vxor_vx_h, 2) 1252 GEN_VEXT_VX(vxor_vx_w, 4) 1253 GEN_VEXT_VX(vxor_vx_d, 8) 1254 1255 /* Vector Single-Width Bit Shift Instructions */ 1256 #define DO_SLL(N, M) (N << (M)) 1257 #define DO_SRL(N, M) (N >> (M)) 1258 1259 /* generate the helpers for shift instructions with two vector operators */ 1260 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1261 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1262 void *vs2, CPURISCVState *env, uint32_t desc) \ 1263 { \ 1264 uint32_t vm = vext_vm(desc); \ 1265 uint32_t vl = env->vl; \ 1266 uint32_t esz = sizeof(TS1); \ 1267 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1268 uint32_t vta = vext_vta(desc); \ 1269 uint32_t vma = vext_vma(desc); \ 1270 uint32_t i; \ 1271 \ 1272 for (i = env->vstart; i < vl; i++) { \ 1273 if (!vm && !vext_elem_mask(v0, i)) { \ 1274 /* set masked-off elements to 1s */ \ 1275 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1276 continue; \ 1277 } \ 1278 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1279 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1280 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1281 } \ 1282 env->vstart = 0; \ 1283 /* set tail elements to 1s */ \ 1284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1285 } 1286 1287 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1288 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1289 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1290 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1291 1292 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1293 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1294 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1295 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1296 1297 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1298 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1299 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1300 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1301 1302 /* 1303 * generate the helpers for shift instructions with one vector and one scalar 1304 */ 1305 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1306 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1307 void *vs2, CPURISCVState *env, \ 1308 uint32_t desc) \ 1309 { \ 1310 uint32_t vm = vext_vm(desc); \ 1311 uint32_t vl = env->vl; \ 1312 uint32_t esz = sizeof(TD); \ 1313 uint32_t total_elems = \ 1314 vext_get_total_elems(env, desc, esz); \ 1315 uint32_t vta = vext_vta(desc); \ 1316 uint32_t vma = vext_vma(desc); \ 1317 uint32_t i; \ 1318 \ 1319 for (i = env->vstart; i < vl; i++) { \ 1320 if (!vm && !vext_elem_mask(v0, i)) { \ 1321 /* set masked-off elements to 1s */ \ 1322 vext_set_elems_1s(vd, vma, i * esz, \ 1323 (i + 1) * esz); \ 1324 continue; \ 1325 } \ 1326 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1327 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1328 } \ 1329 env->vstart = 0; \ 1330 /* set tail elements to 1s */ \ 1331 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1332 } 1333 1334 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1335 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1336 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1337 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1338 1339 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1340 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1341 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1342 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1343 1344 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1345 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1346 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1347 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1348 1349 /* Vector Narrowing Integer Right Shift Instructions */ 1350 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1351 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1352 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1353 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1354 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1355 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1356 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1357 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1358 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1359 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1360 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1361 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1362 1363 /* Vector Integer Comparison Instructions */ 1364 #define DO_MSEQ(N, M) (N == M) 1365 #define DO_MSNE(N, M) (N != M) 1366 #define DO_MSLT(N, M) (N < M) 1367 #define DO_MSLE(N, M) (N <= M) 1368 #define DO_MSGT(N, M) (N > M) 1369 1370 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1371 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1372 CPURISCVState *env, uint32_t desc) \ 1373 { \ 1374 uint32_t vm = vext_vm(desc); \ 1375 uint32_t vl = env->vl; \ 1376 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1377 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1378 uint32_t vma = vext_vma(desc); \ 1379 uint32_t i; \ 1380 \ 1381 for (i = env->vstart; i < vl; i++) { \ 1382 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1383 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1384 if (!vm && !vext_elem_mask(v0, i)) { \ 1385 /* set masked-off elements to 1s */ \ 1386 if (vma) { \ 1387 vext_set_elem_mask(vd, i, 1); \ 1388 } \ 1389 continue; \ 1390 } \ 1391 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1392 } \ 1393 env->vstart = 0; \ 1394 /* 1395 * mask destination register are always tail-agnostic 1396 * set tail elements to 1s 1397 */ \ 1398 if (vta_all_1s) { \ 1399 for (; i < total_elems; i++) { \ 1400 vext_set_elem_mask(vd, i, 1); \ 1401 } \ 1402 } \ 1403 } 1404 1405 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1406 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1407 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1408 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1409 1410 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1411 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1412 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1413 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1414 1415 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1416 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1417 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1418 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1419 1420 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1421 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1422 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1423 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1424 1425 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1426 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1427 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1428 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1429 1430 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1431 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1432 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1433 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1434 1435 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1436 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1437 CPURISCVState *env, uint32_t desc) \ 1438 { \ 1439 uint32_t vm = vext_vm(desc); \ 1440 uint32_t vl = env->vl; \ 1441 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1442 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1443 uint32_t vma = vext_vma(desc); \ 1444 uint32_t i; \ 1445 \ 1446 for (i = env->vstart; i < vl; i++) { \ 1447 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1448 if (!vm && !vext_elem_mask(v0, i)) { \ 1449 /* set masked-off elements to 1s */ \ 1450 if (vma) { \ 1451 vext_set_elem_mask(vd, i, 1); \ 1452 } \ 1453 continue; \ 1454 } \ 1455 vext_set_elem_mask(vd, i, \ 1456 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1457 } \ 1458 env->vstart = 0; \ 1459 /* 1460 * mask destination register are always tail-agnostic 1461 * set tail elements to 1s 1462 */ \ 1463 if (vta_all_1s) { \ 1464 for (; i < total_elems; i++) { \ 1465 vext_set_elem_mask(vd, i, 1); \ 1466 } \ 1467 } \ 1468 } 1469 1470 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1471 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1472 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1473 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1474 1475 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1476 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1477 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1478 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1479 1480 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1481 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1482 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1483 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1484 1485 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1486 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1487 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1488 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1489 1490 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1491 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1492 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1493 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1494 1495 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1496 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1497 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1498 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1499 1500 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1501 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1502 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1503 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1504 1505 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1506 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1507 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1508 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1509 1510 /* Vector Integer Min/Max Instructions */ 1511 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1512 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1513 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1514 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1515 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1516 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1517 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1518 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1519 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1520 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1521 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1522 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1523 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1524 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1525 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1526 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1527 GEN_VEXT_VV(vminu_vv_b, 1) 1528 GEN_VEXT_VV(vminu_vv_h, 2) 1529 GEN_VEXT_VV(vminu_vv_w, 4) 1530 GEN_VEXT_VV(vminu_vv_d, 8) 1531 GEN_VEXT_VV(vmin_vv_b, 1) 1532 GEN_VEXT_VV(vmin_vv_h, 2) 1533 GEN_VEXT_VV(vmin_vv_w, 4) 1534 GEN_VEXT_VV(vmin_vv_d, 8) 1535 GEN_VEXT_VV(vmaxu_vv_b, 1) 1536 GEN_VEXT_VV(vmaxu_vv_h, 2) 1537 GEN_VEXT_VV(vmaxu_vv_w, 4) 1538 GEN_VEXT_VV(vmaxu_vv_d, 8) 1539 GEN_VEXT_VV(vmax_vv_b, 1) 1540 GEN_VEXT_VV(vmax_vv_h, 2) 1541 GEN_VEXT_VV(vmax_vv_w, 4) 1542 GEN_VEXT_VV(vmax_vv_d, 8) 1543 1544 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1545 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1546 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1547 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1548 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1549 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1550 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1551 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1552 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1553 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1554 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1555 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1556 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1557 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1558 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1559 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1560 GEN_VEXT_VX(vminu_vx_b, 1) 1561 GEN_VEXT_VX(vminu_vx_h, 2) 1562 GEN_VEXT_VX(vminu_vx_w, 4) 1563 GEN_VEXT_VX(vminu_vx_d, 8) 1564 GEN_VEXT_VX(vmin_vx_b, 1) 1565 GEN_VEXT_VX(vmin_vx_h, 2) 1566 GEN_VEXT_VX(vmin_vx_w, 4) 1567 GEN_VEXT_VX(vmin_vx_d, 8) 1568 GEN_VEXT_VX(vmaxu_vx_b, 1) 1569 GEN_VEXT_VX(vmaxu_vx_h, 2) 1570 GEN_VEXT_VX(vmaxu_vx_w, 4) 1571 GEN_VEXT_VX(vmaxu_vx_d, 8) 1572 GEN_VEXT_VX(vmax_vx_b, 1) 1573 GEN_VEXT_VX(vmax_vx_h, 2) 1574 GEN_VEXT_VX(vmax_vx_w, 4) 1575 GEN_VEXT_VX(vmax_vx_d, 8) 1576 1577 /* Vector Single-Width Integer Multiply Instructions */ 1578 #define DO_MUL(N, M) (N * M) 1579 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1580 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1581 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1582 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1583 GEN_VEXT_VV(vmul_vv_b, 1) 1584 GEN_VEXT_VV(vmul_vv_h, 2) 1585 GEN_VEXT_VV(vmul_vv_w, 4) 1586 GEN_VEXT_VV(vmul_vv_d, 8) 1587 1588 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1589 { 1590 return (int16_t)s2 * (int16_t)s1 >> 8; 1591 } 1592 1593 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1594 { 1595 return (int32_t)s2 * (int32_t)s1 >> 16; 1596 } 1597 1598 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1599 { 1600 return (int64_t)s2 * (int64_t)s1 >> 32; 1601 } 1602 1603 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1604 { 1605 uint64_t hi_64, lo_64; 1606 1607 muls64(&lo_64, &hi_64, s1, s2); 1608 return hi_64; 1609 } 1610 1611 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1612 { 1613 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1614 } 1615 1616 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1617 { 1618 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1619 } 1620 1621 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1622 { 1623 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1624 } 1625 1626 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1627 { 1628 uint64_t hi_64, lo_64; 1629 1630 mulu64(&lo_64, &hi_64, s2, s1); 1631 return hi_64; 1632 } 1633 1634 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1635 { 1636 return (int16_t)s2 * (uint16_t)s1 >> 8; 1637 } 1638 1639 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1640 { 1641 return (int32_t)s2 * (uint32_t)s1 >> 16; 1642 } 1643 1644 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1645 { 1646 return (int64_t)s2 * (uint64_t)s1 >> 32; 1647 } 1648 1649 /* 1650 * Let A = signed operand, 1651 * B = unsigned operand 1652 * P = mulu64(A, B), unsigned product 1653 * 1654 * LET X = 2 ** 64 - A, 2's complement of A 1655 * SP = signed product 1656 * THEN 1657 * IF A < 0 1658 * SP = -X * B 1659 * = -(2 ** 64 - A) * B 1660 * = A * B - 2 ** 64 * B 1661 * = P - 2 ** 64 * B 1662 * ELSE 1663 * SP = P 1664 * THEN 1665 * HI_P -= (A < 0 ? B : 0) 1666 */ 1667 1668 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1669 { 1670 uint64_t hi_64, lo_64; 1671 1672 mulu64(&lo_64, &hi_64, s2, s1); 1673 1674 hi_64 -= s2 < 0 ? s1 : 0; 1675 return hi_64; 1676 } 1677 1678 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1679 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1680 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1681 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1682 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1683 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1684 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1685 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1686 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1687 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1688 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1689 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1690 GEN_VEXT_VV(vmulh_vv_b, 1) 1691 GEN_VEXT_VV(vmulh_vv_h, 2) 1692 GEN_VEXT_VV(vmulh_vv_w, 4) 1693 GEN_VEXT_VV(vmulh_vv_d, 8) 1694 GEN_VEXT_VV(vmulhu_vv_b, 1) 1695 GEN_VEXT_VV(vmulhu_vv_h, 2) 1696 GEN_VEXT_VV(vmulhu_vv_w, 4) 1697 GEN_VEXT_VV(vmulhu_vv_d, 8) 1698 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1699 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1700 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1701 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1702 1703 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1704 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1705 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1706 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1707 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1708 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1709 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1710 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1711 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1712 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1713 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1714 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1715 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1716 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1717 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1718 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1719 GEN_VEXT_VX(vmul_vx_b, 1) 1720 GEN_VEXT_VX(vmul_vx_h, 2) 1721 GEN_VEXT_VX(vmul_vx_w, 4) 1722 GEN_VEXT_VX(vmul_vx_d, 8) 1723 GEN_VEXT_VX(vmulh_vx_b, 1) 1724 GEN_VEXT_VX(vmulh_vx_h, 2) 1725 GEN_VEXT_VX(vmulh_vx_w, 4) 1726 GEN_VEXT_VX(vmulh_vx_d, 8) 1727 GEN_VEXT_VX(vmulhu_vx_b, 1) 1728 GEN_VEXT_VX(vmulhu_vx_h, 2) 1729 GEN_VEXT_VX(vmulhu_vx_w, 4) 1730 GEN_VEXT_VX(vmulhu_vx_d, 8) 1731 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1732 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1733 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1734 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1735 1736 /* Vector Integer Divide Instructions */ 1737 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1738 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1739 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1740 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1741 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1742 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1743 1744 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1745 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1746 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1747 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1748 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1749 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1750 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1751 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1752 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1753 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1754 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1755 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1756 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1757 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1758 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1759 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1760 GEN_VEXT_VV(vdivu_vv_b, 1) 1761 GEN_VEXT_VV(vdivu_vv_h, 2) 1762 GEN_VEXT_VV(vdivu_vv_w, 4) 1763 GEN_VEXT_VV(vdivu_vv_d, 8) 1764 GEN_VEXT_VV(vdiv_vv_b, 1) 1765 GEN_VEXT_VV(vdiv_vv_h, 2) 1766 GEN_VEXT_VV(vdiv_vv_w, 4) 1767 GEN_VEXT_VV(vdiv_vv_d, 8) 1768 GEN_VEXT_VV(vremu_vv_b, 1) 1769 GEN_VEXT_VV(vremu_vv_h, 2) 1770 GEN_VEXT_VV(vremu_vv_w, 4) 1771 GEN_VEXT_VV(vremu_vv_d, 8) 1772 GEN_VEXT_VV(vrem_vv_b, 1) 1773 GEN_VEXT_VV(vrem_vv_h, 2) 1774 GEN_VEXT_VV(vrem_vv_w, 4) 1775 GEN_VEXT_VV(vrem_vv_d, 8) 1776 1777 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1778 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1779 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1780 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1781 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1782 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1783 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1784 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1785 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1786 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1787 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1788 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1789 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1790 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1791 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1792 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1793 GEN_VEXT_VX(vdivu_vx_b, 1) 1794 GEN_VEXT_VX(vdivu_vx_h, 2) 1795 GEN_VEXT_VX(vdivu_vx_w, 4) 1796 GEN_VEXT_VX(vdivu_vx_d, 8) 1797 GEN_VEXT_VX(vdiv_vx_b, 1) 1798 GEN_VEXT_VX(vdiv_vx_h, 2) 1799 GEN_VEXT_VX(vdiv_vx_w, 4) 1800 GEN_VEXT_VX(vdiv_vx_d, 8) 1801 GEN_VEXT_VX(vremu_vx_b, 1) 1802 GEN_VEXT_VX(vremu_vx_h, 2) 1803 GEN_VEXT_VX(vremu_vx_w, 4) 1804 GEN_VEXT_VX(vremu_vx_d, 8) 1805 GEN_VEXT_VX(vrem_vx_b, 1) 1806 GEN_VEXT_VX(vrem_vx_h, 2) 1807 GEN_VEXT_VX(vrem_vx_w, 4) 1808 GEN_VEXT_VX(vrem_vx_d, 8) 1809 1810 /* Vector Widening Integer Multiply Instructions */ 1811 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1812 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1813 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1814 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1815 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1816 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1817 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1818 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1819 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1820 GEN_VEXT_VV(vwmul_vv_b, 2) 1821 GEN_VEXT_VV(vwmul_vv_h, 4) 1822 GEN_VEXT_VV(vwmul_vv_w, 8) 1823 GEN_VEXT_VV(vwmulu_vv_b, 2) 1824 GEN_VEXT_VV(vwmulu_vv_h, 4) 1825 GEN_VEXT_VV(vwmulu_vv_w, 8) 1826 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1827 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1828 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1829 1830 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1831 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1832 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1833 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1834 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1835 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1836 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1837 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1838 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1839 GEN_VEXT_VX(vwmul_vx_b, 2) 1840 GEN_VEXT_VX(vwmul_vx_h, 4) 1841 GEN_VEXT_VX(vwmul_vx_w, 8) 1842 GEN_VEXT_VX(vwmulu_vx_b, 2) 1843 GEN_VEXT_VX(vwmulu_vx_h, 4) 1844 GEN_VEXT_VX(vwmulu_vx_w, 8) 1845 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1846 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1847 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1848 1849 /* Vector Single-Width Integer Multiply-Add Instructions */ 1850 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1851 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1852 { \ 1853 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1854 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1855 TD d = *((TD *)vd + HD(i)); \ 1856 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1857 } 1858 1859 #define DO_MACC(N, M, D) (M * N + D) 1860 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1861 #define DO_MADD(N, M, D) (M * D + N) 1862 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1863 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1864 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1865 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1866 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1867 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1868 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1869 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1870 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1871 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1872 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1873 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1874 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1875 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1876 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1877 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1878 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1879 GEN_VEXT_VV(vmacc_vv_b, 1) 1880 GEN_VEXT_VV(vmacc_vv_h, 2) 1881 GEN_VEXT_VV(vmacc_vv_w, 4) 1882 GEN_VEXT_VV(vmacc_vv_d, 8) 1883 GEN_VEXT_VV(vnmsac_vv_b, 1) 1884 GEN_VEXT_VV(vnmsac_vv_h, 2) 1885 GEN_VEXT_VV(vnmsac_vv_w, 4) 1886 GEN_VEXT_VV(vnmsac_vv_d, 8) 1887 GEN_VEXT_VV(vmadd_vv_b, 1) 1888 GEN_VEXT_VV(vmadd_vv_h, 2) 1889 GEN_VEXT_VV(vmadd_vv_w, 4) 1890 GEN_VEXT_VV(vmadd_vv_d, 8) 1891 GEN_VEXT_VV(vnmsub_vv_b, 1) 1892 GEN_VEXT_VV(vnmsub_vv_h, 2) 1893 GEN_VEXT_VV(vnmsub_vv_w, 4) 1894 GEN_VEXT_VV(vnmsub_vv_d, 8) 1895 1896 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1897 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1898 { \ 1899 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1900 TD d = *((TD *)vd + HD(i)); \ 1901 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1902 } 1903 1904 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1905 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1906 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1907 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1908 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1909 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1910 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1911 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1912 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1913 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1914 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1915 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1916 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1917 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1918 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1919 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1920 GEN_VEXT_VX(vmacc_vx_b, 1) 1921 GEN_VEXT_VX(vmacc_vx_h, 2) 1922 GEN_VEXT_VX(vmacc_vx_w, 4) 1923 GEN_VEXT_VX(vmacc_vx_d, 8) 1924 GEN_VEXT_VX(vnmsac_vx_b, 1) 1925 GEN_VEXT_VX(vnmsac_vx_h, 2) 1926 GEN_VEXT_VX(vnmsac_vx_w, 4) 1927 GEN_VEXT_VX(vnmsac_vx_d, 8) 1928 GEN_VEXT_VX(vmadd_vx_b, 1) 1929 GEN_VEXT_VX(vmadd_vx_h, 2) 1930 GEN_VEXT_VX(vmadd_vx_w, 4) 1931 GEN_VEXT_VX(vmadd_vx_d, 8) 1932 GEN_VEXT_VX(vnmsub_vx_b, 1) 1933 GEN_VEXT_VX(vnmsub_vx_h, 2) 1934 GEN_VEXT_VX(vnmsub_vx_w, 4) 1935 GEN_VEXT_VX(vnmsub_vx_d, 8) 1936 1937 /* Vector Widening Integer Multiply-Add Instructions */ 1938 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1939 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1940 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1941 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1942 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1943 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1944 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1945 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1946 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1947 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1948 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1949 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1950 GEN_VEXT_VV(vwmacc_vv_b, 2) 1951 GEN_VEXT_VV(vwmacc_vv_h, 4) 1952 GEN_VEXT_VV(vwmacc_vv_w, 8) 1953 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1954 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1955 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1956 1957 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1958 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1959 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1960 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1961 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1962 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1963 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1964 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1965 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1966 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1967 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1968 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1969 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1970 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1971 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1972 GEN_VEXT_VX(vwmacc_vx_b, 2) 1973 GEN_VEXT_VX(vwmacc_vx_h, 4) 1974 GEN_VEXT_VX(vwmacc_vx_w, 8) 1975 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1976 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1977 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1978 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1979 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1980 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1981 1982 /* Vector Integer Merge and Move Instructions */ 1983 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1984 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1985 uint32_t desc) \ 1986 { \ 1987 uint32_t vl = env->vl; \ 1988 uint32_t esz = sizeof(ETYPE); \ 1989 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1990 uint32_t vta = vext_vta(desc); \ 1991 uint32_t i; \ 1992 \ 1993 for (i = env->vstart; i < vl; i++) { \ 1994 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1995 *((ETYPE *)vd + H(i)) = s1; \ 1996 } \ 1997 env->vstart = 0; \ 1998 /* set tail elements to 1s */ \ 1999 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2000 } 2001 2002 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2003 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2004 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2005 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2006 2007 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2008 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2009 uint32_t desc) \ 2010 { \ 2011 uint32_t vl = env->vl; \ 2012 uint32_t esz = sizeof(ETYPE); \ 2013 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2014 uint32_t vta = vext_vta(desc); \ 2015 uint32_t i; \ 2016 \ 2017 for (i = env->vstart; i < vl; i++) { \ 2018 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2019 } \ 2020 env->vstart = 0; \ 2021 /* set tail elements to 1s */ \ 2022 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2023 } 2024 2025 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2026 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2027 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2028 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2029 2030 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2031 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2032 CPURISCVState *env, uint32_t desc) \ 2033 { \ 2034 uint32_t vl = env->vl; \ 2035 uint32_t esz = sizeof(ETYPE); \ 2036 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2037 uint32_t vta = vext_vta(desc); \ 2038 uint32_t i; \ 2039 \ 2040 for (i = env->vstart; i < vl; i++) { \ 2041 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2042 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2043 } \ 2044 env->vstart = 0; \ 2045 /* set tail elements to 1s */ \ 2046 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2047 } 2048 2049 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2050 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2051 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2052 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2053 2054 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2055 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2056 void *vs2, CPURISCVState *env, uint32_t desc) \ 2057 { \ 2058 uint32_t vl = env->vl; \ 2059 uint32_t esz = sizeof(ETYPE); \ 2060 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2061 uint32_t vta = vext_vta(desc); \ 2062 uint32_t i; \ 2063 \ 2064 for (i = env->vstart; i < vl; i++) { \ 2065 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2066 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2067 (ETYPE)(target_long)s1); \ 2068 *((ETYPE *)vd + H(i)) = d; \ 2069 } \ 2070 env->vstart = 0; \ 2071 /* set tail elements to 1s */ \ 2072 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2073 } 2074 2075 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2076 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2077 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2078 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2079 2080 /* 2081 * Vector Fixed-Point Arithmetic Instructions 2082 */ 2083 2084 /* Vector Single-Width Saturating Add and Subtract */ 2085 2086 /* 2087 * As fixed point instructions probably have round mode and saturation, 2088 * define common macros for fixed point here. 2089 */ 2090 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2091 CPURISCVState *env, int vxrm); 2092 2093 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2094 static inline void \ 2095 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2096 CPURISCVState *env, int vxrm) \ 2097 { \ 2098 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2099 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2100 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2101 } 2102 2103 static inline void 2104 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2105 CPURISCVState *env, 2106 uint32_t vl, uint32_t vm, int vxrm, 2107 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2108 { 2109 for (uint32_t i = env->vstart; i < vl; i++) { 2110 if (!vm && !vext_elem_mask(v0, i)) { 2111 /* set masked-off elements to 1s */ 2112 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2113 continue; 2114 } 2115 fn(vd, vs1, vs2, i, env, vxrm); 2116 } 2117 env->vstart = 0; 2118 } 2119 2120 static inline void 2121 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2122 CPURISCVState *env, 2123 uint32_t desc, 2124 opivv2_rm_fn *fn, uint32_t esz) 2125 { 2126 uint32_t vm = vext_vm(desc); 2127 uint32_t vl = env->vl; 2128 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2129 uint32_t vta = vext_vta(desc); 2130 uint32_t vma = vext_vma(desc); 2131 2132 switch (env->vxrm) { 2133 case 0: /* rnu */ 2134 vext_vv_rm_1(vd, v0, vs1, vs2, 2135 env, vl, vm, 0, fn, vma, esz); 2136 break; 2137 case 1: /* rne */ 2138 vext_vv_rm_1(vd, v0, vs1, vs2, 2139 env, vl, vm, 1, fn, vma, esz); 2140 break; 2141 case 2: /* rdn */ 2142 vext_vv_rm_1(vd, v0, vs1, vs2, 2143 env, vl, vm, 2, fn, vma, esz); 2144 break; 2145 default: /* rod */ 2146 vext_vv_rm_1(vd, v0, vs1, vs2, 2147 env, vl, vm, 3, fn, vma, esz); 2148 break; 2149 } 2150 /* set tail elements to 1s */ 2151 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2152 } 2153 2154 /* generate helpers for fixed point instructions with OPIVV format */ 2155 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2156 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2157 CPURISCVState *env, uint32_t desc) \ 2158 { \ 2159 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2160 do_##NAME, ESZ); \ 2161 } 2162 2163 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2164 uint8_t b) 2165 { 2166 uint8_t res = a + b; 2167 if (res < a) { 2168 res = UINT8_MAX; 2169 env->vxsat = 0x1; 2170 } 2171 return res; 2172 } 2173 2174 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2175 uint16_t b) 2176 { 2177 uint16_t res = a + b; 2178 if (res < a) { 2179 res = UINT16_MAX; 2180 env->vxsat = 0x1; 2181 } 2182 return res; 2183 } 2184 2185 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2186 uint32_t b) 2187 { 2188 uint32_t res = a + b; 2189 if (res < a) { 2190 res = UINT32_MAX; 2191 env->vxsat = 0x1; 2192 } 2193 return res; 2194 } 2195 2196 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2197 uint64_t b) 2198 { 2199 uint64_t res = a + b; 2200 if (res < a) { 2201 res = UINT64_MAX; 2202 env->vxsat = 0x1; 2203 } 2204 return res; 2205 } 2206 2207 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2208 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2209 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2210 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2211 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2212 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2213 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2214 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2215 2216 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2217 CPURISCVState *env, int vxrm); 2218 2219 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2220 static inline void \ 2221 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2222 CPURISCVState *env, int vxrm) \ 2223 { \ 2224 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2225 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2226 } 2227 2228 static inline void 2229 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2230 CPURISCVState *env, 2231 uint32_t vl, uint32_t vm, int vxrm, 2232 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2233 { 2234 for (uint32_t i = env->vstart; i < vl; i++) { 2235 if (!vm && !vext_elem_mask(v0, i)) { 2236 /* set masked-off elements to 1s */ 2237 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2238 continue; 2239 } 2240 fn(vd, s1, vs2, i, env, vxrm); 2241 } 2242 env->vstart = 0; 2243 } 2244 2245 static inline void 2246 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2247 CPURISCVState *env, 2248 uint32_t desc, 2249 opivx2_rm_fn *fn, uint32_t esz) 2250 { 2251 uint32_t vm = vext_vm(desc); 2252 uint32_t vl = env->vl; 2253 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2254 uint32_t vta = vext_vta(desc); 2255 uint32_t vma = vext_vma(desc); 2256 2257 switch (env->vxrm) { 2258 case 0: /* rnu */ 2259 vext_vx_rm_1(vd, v0, s1, vs2, 2260 env, vl, vm, 0, fn, vma, esz); 2261 break; 2262 case 1: /* rne */ 2263 vext_vx_rm_1(vd, v0, s1, vs2, 2264 env, vl, vm, 1, fn, vma, esz); 2265 break; 2266 case 2: /* rdn */ 2267 vext_vx_rm_1(vd, v0, s1, vs2, 2268 env, vl, vm, 2, fn, vma, esz); 2269 break; 2270 default: /* rod */ 2271 vext_vx_rm_1(vd, v0, s1, vs2, 2272 env, vl, vm, 3, fn, vma, esz); 2273 break; 2274 } 2275 /* set tail elements to 1s */ 2276 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2277 } 2278 2279 /* generate helpers for fixed point instructions with OPIVX format */ 2280 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2281 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2282 void *vs2, CPURISCVState *env, \ 2283 uint32_t desc) \ 2284 { \ 2285 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2286 do_##NAME, ESZ); \ 2287 } 2288 2289 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2290 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2291 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2292 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2293 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2294 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2295 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2296 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2297 2298 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2299 { 2300 int8_t res = a + b; 2301 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2302 res = a > 0 ? INT8_MAX : INT8_MIN; 2303 env->vxsat = 0x1; 2304 } 2305 return res; 2306 } 2307 2308 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2309 int16_t b) 2310 { 2311 int16_t res = a + b; 2312 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2313 res = a > 0 ? INT16_MAX : INT16_MIN; 2314 env->vxsat = 0x1; 2315 } 2316 return res; 2317 } 2318 2319 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2320 int32_t b) 2321 { 2322 int32_t res = a + b; 2323 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2324 res = a > 0 ? INT32_MAX : INT32_MIN; 2325 env->vxsat = 0x1; 2326 } 2327 return res; 2328 } 2329 2330 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2331 int64_t b) 2332 { 2333 int64_t res = a + b; 2334 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2335 res = a > 0 ? INT64_MAX : INT64_MIN; 2336 env->vxsat = 0x1; 2337 } 2338 return res; 2339 } 2340 2341 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2342 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2343 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2344 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2345 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2346 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2347 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2348 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2349 2350 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2351 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2352 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2353 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2354 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2355 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2356 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2357 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2358 2359 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2360 uint8_t b) 2361 { 2362 uint8_t res = a - b; 2363 if (res > a) { 2364 res = 0; 2365 env->vxsat = 0x1; 2366 } 2367 return res; 2368 } 2369 2370 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2371 uint16_t b) 2372 { 2373 uint16_t res = a - b; 2374 if (res > a) { 2375 res = 0; 2376 env->vxsat = 0x1; 2377 } 2378 return res; 2379 } 2380 2381 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2382 uint32_t b) 2383 { 2384 uint32_t res = a - b; 2385 if (res > a) { 2386 res = 0; 2387 env->vxsat = 0x1; 2388 } 2389 return res; 2390 } 2391 2392 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2393 uint64_t b) 2394 { 2395 uint64_t res = a - b; 2396 if (res > a) { 2397 res = 0; 2398 env->vxsat = 0x1; 2399 } 2400 return res; 2401 } 2402 2403 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2404 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2405 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2406 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2407 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2408 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2409 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2410 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2411 2412 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2413 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2414 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2415 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2416 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2417 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2418 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2419 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2420 2421 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2422 { 2423 int8_t res = a - b; 2424 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2425 res = a >= 0 ? INT8_MAX : INT8_MIN; 2426 env->vxsat = 0x1; 2427 } 2428 return res; 2429 } 2430 2431 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2432 int16_t b) 2433 { 2434 int16_t res = a - b; 2435 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2436 res = a >= 0 ? INT16_MAX : INT16_MIN; 2437 env->vxsat = 0x1; 2438 } 2439 return res; 2440 } 2441 2442 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2443 int32_t b) 2444 { 2445 int32_t res = a - b; 2446 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2447 res = a >= 0 ? INT32_MAX : INT32_MIN; 2448 env->vxsat = 0x1; 2449 } 2450 return res; 2451 } 2452 2453 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2454 int64_t b) 2455 { 2456 int64_t res = a - b; 2457 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2458 res = a >= 0 ? INT64_MAX : INT64_MIN; 2459 env->vxsat = 0x1; 2460 } 2461 return res; 2462 } 2463 2464 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2465 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2466 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2467 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2468 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2469 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2470 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2471 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2472 2473 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2474 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2475 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2476 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2477 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2478 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2479 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2480 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2481 2482 /* Vector Single-Width Averaging Add and Subtract */ 2483 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2484 { 2485 uint8_t d = extract64(v, shift, 1); 2486 uint8_t d1; 2487 uint64_t D1, D2; 2488 2489 if (shift == 0 || shift > 64) { 2490 return 0; 2491 } 2492 2493 d1 = extract64(v, shift - 1, 1); 2494 D1 = extract64(v, 0, shift); 2495 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2496 return d1; 2497 } else if (vxrm == 1) { /* round-to-nearest-even */ 2498 if (shift > 1) { 2499 D2 = extract64(v, 0, shift - 1); 2500 return d1 & ((D2 != 0) | d); 2501 } else { 2502 return d1 & d; 2503 } 2504 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2505 return !d & (D1 != 0); 2506 } 2507 return 0; /* round-down (truncate) */ 2508 } 2509 2510 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2511 int32_t b) 2512 { 2513 int64_t res = (int64_t)a + b; 2514 uint8_t round = get_round(vxrm, res, 1); 2515 2516 return (res >> 1) + round; 2517 } 2518 2519 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2520 int64_t b) 2521 { 2522 int64_t res = a + b; 2523 uint8_t round = get_round(vxrm, res, 1); 2524 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2525 2526 /* With signed overflow, bit 64 is inverse of bit 63. */ 2527 return ((res >> 1) ^ over) + round; 2528 } 2529 2530 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2531 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2532 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2533 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2534 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2535 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2536 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2537 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2538 2539 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2540 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2541 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2542 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2543 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2544 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2545 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2546 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2547 2548 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2549 uint32_t a, uint32_t b) 2550 { 2551 uint64_t res = (uint64_t)a + b; 2552 uint8_t round = get_round(vxrm, res, 1); 2553 2554 return (res >> 1) + round; 2555 } 2556 2557 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2558 uint64_t a, uint64_t b) 2559 { 2560 uint64_t res = a + b; 2561 uint8_t round = get_round(vxrm, res, 1); 2562 uint64_t over = (uint64_t)(res < a) << 63; 2563 2564 return ((res >> 1) | over) + round; 2565 } 2566 2567 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2568 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2569 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2570 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2571 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2572 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2573 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2574 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2575 2576 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2577 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2578 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2579 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2580 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2581 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2582 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2583 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2584 2585 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2586 int32_t b) 2587 { 2588 int64_t res = (int64_t)a - b; 2589 uint8_t round = get_round(vxrm, res, 1); 2590 2591 return (res >> 1) + round; 2592 } 2593 2594 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2595 int64_t b) 2596 { 2597 int64_t res = (int64_t)a - b; 2598 uint8_t round = get_round(vxrm, res, 1); 2599 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2600 2601 /* With signed overflow, bit 64 is inverse of bit 63. */ 2602 return ((res >> 1) ^ over) + round; 2603 } 2604 2605 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2606 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2607 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2608 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2609 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2610 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2611 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2612 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2613 2614 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2615 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2616 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2617 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2618 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2619 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2620 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2621 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2622 2623 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2624 uint32_t a, uint32_t b) 2625 { 2626 int64_t res = (int64_t)a - b; 2627 uint8_t round = get_round(vxrm, res, 1); 2628 2629 return (res >> 1) + round; 2630 } 2631 2632 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2633 uint64_t a, uint64_t b) 2634 { 2635 uint64_t res = (uint64_t)a - b; 2636 uint8_t round = get_round(vxrm, res, 1); 2637 uint64_t over = (uint64_t)(res > a) << 63; 2638 2639 return ((res >> 1) | over) + round; 2640 } 2641 2642 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2643 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2644 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2645 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2646 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2647 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2648 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2649 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2650 2651 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2652 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2653 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2654 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2655 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2656 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2657 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2658 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2659 2660 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2661 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2662 { 2663 uint8_t round; 2664 int16_t res; 2665 2666 res = (int16_t)a * (int16_t)b; 2667 round = get_round(vxrm, res, 7); 2668 res = (res >> 7) + round; 2669 2670 if (res > INT8_MAX) { 2671 env->vxsat = 0x1; 2672 return INT8_MAX; 2673 } else if (res < INT8_MIN) { 2674 env->vxsat = 0x1; 2675 return INT8_MIN; 2676 } else { 2677 return res; 2678 } 2679 } 2680 2681 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2682 { 2683 uint8_t round; 2684 int32_t res; 2685 2686 res = (int32_t)a * (int32_t)b; 2687 round = get_round(vxrm, res, 15); 2688 res = (res >> 15) + round; 2689 2690 if (res > INT16_MAX) { 2691 env->vxsat = 0x1; 2692 return INT16_MAX; 2693 } else if (res < INT16_MIN) { 2694 env->vxsat = 0x1; 2695 return INT16_MIN; 2696 } else { 2697 return res; 2698 } 2699 } 2700 2701 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2702 { 2703 uint8_t round; 2704 int64_t res; 2705 2706 res = (int64_t)a * (int64_t)b; 2707 round = get_round(vxrm, res, 31); 2708 res = (res >> 31) + round; 2709 2710 if (res > INT32_MAX) { 2711 env->vxsat = 0x1; 2712 return INT32_MAX; 2713 } else if (res < INT32_MIN) { 2714 env->vxsat = 0x1; 2715 return INT32_MIN; 2716 } else { 2717 return res; 2718 } 2719 } 2720 2721 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2722 { 2723 uint8_t round; 2724 uint64_t hi_64, lo_64; 2725 int64_t res; 2726 2727 if (a == INT64_MIN && b == INT64_MIN) { 2728 env->vxsat = 1; 2729 return INT64_MAX; 2730 } 2731 2732 muls64(&lo_64, &hi_64, a, b); 2733 round = get_round(vxrm, lo_64, 63); 2734 /* 2735 * Cannot overflow, as there are always 2736 * 2 sign bits after multiply. 2737 */ 2738 res = (hi_64 << 1) | (lo_64 >> 63); 2739 if (round) { 2740 if (res == INT64_MAX) { 2741 env->vxsat = 1; 2742 } else { 2743 res += 1; 2744 } 2745 } 2746 return res; 2747 } 2748 2749 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2750 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2751 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2752 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2753 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2754 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2755 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2756 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2757 2758 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2759 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2760 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2761 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2762 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2763 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2764 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2765 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2766 2767 /* Vector Single-Width Scaling Shift Instructions */ 2768 static inline uint8_t 2769 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2770 { 2771 uint8_t round, shift = b & 0x7; 2772 uint8_t res; 2773 2774 round = get_round(vxrm, a, shift); 2775 res = (a >> shift) + round; 2776 return res; 2777 } 2778 static inline uint16_t 2779 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2780 { 2781 uint8_t round, shift = b & 0xf; 2782 2783 round = get_round(vxrm, a, shift); 2784 return (a >> shift) + round; 2785 } 2786 static inline uint32_t 2787 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2788 { 2789 uint8_t round, shift = b & 0x1f; 2790 2791 round = get_round(vxrm, a, shift); 2792 return (a >> shift) + round; 2793 } 2794 static inline uint64_t 2795 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2796 { 2797 uint8_t round, shift = b & 0x3f; 2798 2799 round = get_round(vxrm, a, shift); 2800 return (a >> shift) + round; 2801 } 2802 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2803 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2804 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2805 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2806 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2807 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2808 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2809 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2810 2811 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2812 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2813 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2814 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2815 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2816 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2817 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2818 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2819 2820 static inline int8_t 2821 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2822 { 2823 uint8_t round, shift = b & 0x7; 2824 2825 round = get_round(vxrm, a, shift); 2826 return (a >> shift) + round; 2827 } 2828 static inline int16_t 2829 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2830 { 2831 uint8_t round, shift = b & 0xf; 2832 2833 round = get_round(vxrm, a, shift); 2834 return (a >> shift) + round; 2835 } 2836 static inline int32_t 2837 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2838 { 2839 uint8_t round, shift = b & 0x1f; 2840 2841 round = get_round(vxrm, a, shift); 2842 return (a >> shift) + round; 2843 } 2844 static inline int64_t 2845 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2846 { 2847 uint8_t round, shift = b & 0x3f; 2848 2849 round = get_round(vxrm, a, shift); 2850 return (a >> shift) + round; 2851 } 2852 2853 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2854 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2855 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2856 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2857 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2858 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2859 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2860 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2861 2862 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2863 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2864 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2865 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2866 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2867 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2868 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2869 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2870 2871 /* Vector Narrowing Fixed-Point Clip Instructions */ 2872 static inline int8_t 2873 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2874 { 2875 uint8_t round, shift = b & 0xf; 2876 int16_t res; 2877 2878 round = get_round(vxrm, a, shift); 2879 res = (a >> shift) + round; 2880 if (res > INT8_MAX) { 2881 env->vxsat = 0x1; 2882 return INT8_MAX; 2883 } else if (res < INT8_MIN) { 2884 env->vxsat = 0x1; 2885 return INT8_MIN; 2886 } else { 2887 return res; 2888 } 2889 } 2890 2891 static inline int16_t 2892 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2893 { 2894 uint8_t round, shift = b & 0x1f; 2895 int32_t res; 2896 2897 round = get_round(vxrm, a, shift); 2898 res = (a >> shift) + round; 2899 if (res > INT16_MAX) { 2900 env->vxsat = 0x1; 2901 return INT16_MAX; 2902 } else if (res < INT16_MIN) { 2903 env->vxsat = 0x1; 2904 return INT16_MIN; 2905 } else { 2906 return res; 2907 } 2908 } 2909 2910 static inline int32_t 2911 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2912 { 2913 uint8_t round, shift = b & 0x3f; 2914 int64_t res; 2915 2916 round = get_round(vxrm, a, shift); 2917 res = (a >> shift) + round; 2918 if (res > INT32_MAX) { 2919 env->vxsat = 0x1; 2920 return INT32_MAX; 2921 } else if (res < INT32_MIN) { 2922 env->vxsat = 0x1; 2923 return INT32_MIN; 2924 } else { 2925 return res; 2926 } 2927 } 2928 2929 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2930 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2931 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2932 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2933 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2934 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2935 2936 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2937 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2938 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2939 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2940 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2941 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2942 2943 static inline uint8_t 2944 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2945 { 2946 uint8_t round, shift = b & 0xf; 2947 uint16_t res; 2948 2949 round = get_round(vxrm, a, shift); 2950 res = (a >> shift) + round; 2951 if (res > UINT8_MAX) { 2952 env->vxsat = 0x1; 2953 return UINT8_MAX; 2954 } else { 2955 return res; 2956 } 2957 } 2958 2959 static inline uint16_t 2960 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2961 { 2962 uint8_t round, shift = b & 0x1f; 2963 uint32_t res; 2964 2965 round = get_round(vxrm, a, shift); 2966 res = (a >> shift) + round; 2967 if (res > UINT16_MAX) { 2968 env->vxsat = 0x1; 2969 return UINT16_MAX; 2970 } else { 2971 return res; 2972 } 2973 } 2974 2975 static inline uint32_t 2976 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2977 { 2978 uint8_t round, shift = b & 0x3f; 2979 uint64_t res; 2980 2981 round = get_round(vxrm, a, shift); 2982 res = (a >> shift) + round; 2983 if (res > UINT32_MAX) { 2984 env->vxsat = 0x1; 2985 return UINT32_MAX; 2986 } else { 2987 return res; 2988 } 2989 } 2990 2991 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2992 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2993 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2994 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2995 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2996 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2997 2998 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2999 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3000 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3001 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3002 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3003 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3004 3005 /* 3006 * Vector Float Point Arithmetic Instructions 3007 */ 3008 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3009 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3010 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3011 CPURISCVState *env) \ 3012 { \ 3013 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3014 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3015 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3016 } 3017 3018 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3019 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3020 void *vs2, CPURISCVState *env, \ 3021 uint32_t desc) \ 3022 { \ 3023 uint32_t vm = vext_vm(desc); \ 3024 uint32_t vl = env->vl; \ 3025 uint32_t total_elems = \ 3026 vext_get_total_elems(env, desc, ESZ); \ 3027 uint32_t vta = vext_vta(desc); \ 3028 uint32_t vma = vext_vma(desc); \ 3029 uint32_t i; \ 3030 \ 3031 for (i = env->vstart; i < vl; i++) { \ 3032 if (!vm && !vext_elem_mask(v0, i)) { \ 3033 /* set masked-off elements to 1s */ \ 3034 vext_set_elems_1s(vd, vma, i * ESZ, \ 3035 (i + 1) * ESZ); \ 3036 continue; \ 3037 } \ 3038 do_##NAME(vd, vs1, vs2, i, env); \ 3039 } \ 3040 env->vstart = 0; \ 3041 /* set tail elements to 1s */ \ 3042 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3043 total_elems * ESZ); \ 3044 } 3045 3046 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3047 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3048 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3049 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3050 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3051 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3052 3053 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3054 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3055 CPURISCVState *env) \ 3056 { \ 3057 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3058 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3059 } 3060 3061 #define GEN_VEXT_VF(NAME, ESZ) \ 3062 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3063 void *vs2, CPURISCVState *env, \ 3064 uint32_t desc) \ 3065 { \ 3066 uint32_t vm = vext_vm(desc); \ 3067 uint32_t vl = env->vl; \ 3068 uint32_t total_elems = \ 3069 vext_get_total_elems(env, desc, ESZ); \ 3070 uint32_t vta = vext_vta(desc); \ 3071 uint32_t vma = vext_vma(desc); \ 3072 uint32_t i; \ 3073 \ 3074 for (i = env->vstart; i < vl; i++) { \ 3075 if (!vm && !vext_elem_mask(v0, i)) { \ 3076 /* set masked-off elements to 1s */ \ 3077 vext_set_elems_1s(vd, vma, i * ESZ, \ 3078 (i + 1) * ESZ); \ 3079 continue; \ 3080 } \ 3081 do_##NAME(vd, s1, vs2, i, env); \ 3082 } \ 3083 env->vstart = 0; \ 3084 /* set tail elements to 1s */ \ 3085 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3086 total_elems * ESZ); \ 3087 } 3088 3089 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3090 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3091 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3092 GEN_VEXT_VF(vfadd_vf_h, 2) 3093 GEN_VEXT_VF(vfadd_vf_w, 4) 3094 GEN_VEXT_VF(vfadd_vf_d, 8) 3095 3096 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3097 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3098 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3099 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3100 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3101 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3102 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3103 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3104 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3105 GEN_VEXT_VF(vfsub_vf_h, 2) 3106 GEN_VEXT_VF(vfsub_vf_w, 4) 3107 GEN_VEXT_VF(vfsub_vf_d, 8) 3108 3109 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3110 { 3111 return float16_sub(b, a, s); 3112 } 3113 3114 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3115 { 3116 return float32_sub(b, a, s); 3117 } 3118 3119 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3120 { 3121 return float64_sub(b, a, s); 3122 } 3123 3124 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3125 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3126 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3127 GEN_VEXT_VF(vfrsub_vf_h, 2) 3128 GEN_VEXT_VF(vfrsub_vf_w, 4) 3129 GEN_VEXT_VF(vfrsub_vf_d, 8) 3130 3131 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3132 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3133 { 3134 return float32_add(float16_to_float32(a, true, s), 3135 float16_to_float32(b, true, s), s); 3136 } 3137 3138 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3139 { 3140 return float64_add(float32_to_float64(a, s), 3141 float32_to_float64(b, s), s); 3142 3143 } 3144 3145 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3146 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3147 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3148 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3149 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3150 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3151 GEN_VEXT_VF(vfwadd_vf_h, 4) 3152 GEN_VEXT_VF(vfwadd_vf_w, 8) 3153 3154 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3155 { 3156 return float32_sub(float16_to_float32(a, true, s), 3157 float16_to_float32(b, true, s), s); 3158 } 3159 3160 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3161 { 3162 return float64_sub(float32_to_float64(a, s), 3163 float32_to_float64(b, s), s); 3164 3165 } 3166 3167 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3168 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3169 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3170 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3171 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3172 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3173 GEN_VEXT_VF(vfwsub_vf_h, 4) 3174 GEN_VEXT_VF(vfwsub_vf_w, 8) 3175 3176 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3177 { 3178 return float32_add(a, float16_to_float32(b, true, s), s); 3179 } 3180 3181 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3182 { 3183 return float64_add(a, float32_to_float64(b, s), s); 3184 } 3185 3186 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3187 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3188 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3189 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3190 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3191 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3192 GEN_VEXT_VF(vfwadd_wf_h, 4) 3193 GEN_VEXT_VF(vfwadd_wf_w, 8) 3194 3195 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3196 { 3197 return float32_sub(a, float16_to_float32(b, true, s), s); 3198 } 3199 3200 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3201 { 3202 return float64_sub(a, float32_to_float64(b, s), s); 3203 } 3204 3205 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3206 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3207 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3208 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3209 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3210 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3211 GEN_VEXT_VF(vfwsub_wf_h, 4) 3212 GEN_VEXT_VF(vfwsub_wf_w, 8) 3213 3214 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3215 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3216 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3217 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3218 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3219 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3220 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3221 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3222 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3223 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3224 GEN_VEXT_VF(vfmul_vf_h, 2) 3225 GEN_VEXT_VF(vfmul_vf_w, 4) 3226 GEN_VEXT_VF(vfmul_vf_d, 8) 3227 3228 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3229 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3230 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3231 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3232 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3233 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3234 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3235 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3236 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3237 GEN_VEXT_VF(vfdiv_vf_h, 2) 3238 GEN_VEXT_VF(vfdiv_vf_w, 4) 3239 GEN_VEXT_VF(vfdiv_vf_d, 8) 3240 3241 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3242 { 3243 return float16_div(b, a, s); 3244 } 3245 3246 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3247 { 3248 return float32_div(b, a, s); 3249 } 3250 3251 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3252 { 3253 return float64_div(b, a, s); 3254 } 3255 3256 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3257 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3258 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3259 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3260 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3261 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3262 3263 /* Vector Widening Floating-Point Multiply */ 3264 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3265 { 3266 return float32_mul(float16_to_float32(a, true, s), 3267 float16_to_float32(b, true, s), s); 3268 } 3269 3270 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3271 { 3272 return float64_mul(float32_to_float64(a, s), 3273 float32_to_float64(b, s), s); 3274 3275 } 3276 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3277 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3278 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3279 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3280 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3281 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3282 GEN_VEXT_VF(vfwmul_vf_h, 4) 3283 GEN_VEXT_VF(vfwmul_vf_w, 8) 3284 3285 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3286 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3287 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3288 CPURISCVState *env) \ 3289 { \ 3290 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3291 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3292 TD d = *((TD *)vd + HD(i)); \ 3293 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3294 } 3295 3296 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3297 { 3298 return float16_muladd(a, b, d, 0, s); 3299 } 3300 3301 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3302 { 3303 return float32_muladd(a, b, d, 0, s); 3304 } 3305 3306 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3307 { 3308 return float64_muladd(a, b, d, 0, s); 3309 } 3310 3311 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3312 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3313 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3314 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3315 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3316 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3317 3318 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3319 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3320 CPURISCVState *env) \ 3321 { \ 3322 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3323 TD d = *((TD *)vd + HD(i)); \ 3324 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3325 } 3326 3327 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3328 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3329 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3330 GEN_VEXT_VF(vfmacc_vf_h, 2) 3331 GEN_VEXT_VF(vfmacc_vf_w, 4) 3332 GEN_VEXT_VF(vfmacc_vf_d, 8) 3333 3334 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3335 { 3336 return float16_muladd(a, b, d, float_muladd_negate_c | 3337 float_muladd_negate_product, s); 3338 } 3339 3340 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3341 { 3342 return float32_muladd(a, b, d, float_muladd_negate_c | 3343 float_muladd_negate_product, s); 3344 } 3345 3346 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3347 { 3348 return float64_muladd(a, b, d, float_muladd_negate_c | 3349 float_muladd_negate_product, s); 3350 } 3351 3352 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3353 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3354 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3355 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3356 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3357 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3358 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3359 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3360 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3361 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3362 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3363 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3364 3365 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3366 { 3367 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3368 } 3369 3370 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3371 { 3372 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3373 } 3374 3375 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3376 { 3377 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3378 } 3379 3380 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3381 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3382 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3383 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3384 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3385 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3386 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3387 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3388 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3389 GEN_VEXT_VF(vfmsac_vf_h, 2) 3390 GEN_VEXT_VF(vfmsac_vf_w, 4) 3391 GEN_VEXT_VF(vfmsac_vf_d, 8) 3392 3393 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3394 { 3395 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3396 } 3397 3398 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3399 { 3400 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3401 } 3402 3403 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3404 { 3405 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3406 } 3407 3408 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3409 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3410 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3411 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3412 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3413 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3414 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3415 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3416 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3417 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3418 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3419 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3420 3421 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3422 { 3423 return float16_muladd(d, b, a, 0, s); 3424 } 3425 3426 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3427 { 3428 return float32_muladd(d, b, a, 0, s); 3429 } 3430 3431 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3432 { 3433 return float64_muladd(d, b, a, 0, s); 3434 } 3435 3436 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3437 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3438 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3439 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3440 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3441 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3442 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3443 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3444 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3445 GEN_VEXT_VF(vfmadd_vf_h, 2) 3446 GEN_VEXT_VF(vfmadd_vf_w, 4) 3447 GEN_VEXT_VF(vfmadd_vf_d, 8) 3448 3449 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3450 { 3451 return float16_muladd(d, b, a, float_muladd_negate_c | 3452 float_muladd_negate_product, s); 3453 } 3454 3455 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3456 { 3457 return float32_muladd(d, b, a, float_muladd_negate_c | 3458 float_muladd_negate_product, s); 3459 } 3460 3461 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3462 { 3463 return float64_muladd(d, b, a, float_muladd_negate_c | 3464 float_muladd_negate_product, s); 3465 } 3466 3467 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3468 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3469 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3470 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3471 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3472 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3473 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3474 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3475 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3476 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3477 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3478 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3479 3480 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3481 { 3482 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3483 } 3484 3485 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3486 { 3487 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3488 } 3489 3490 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3491 { 3492 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3493 } 3494 3495 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3496 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3497 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3498 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3499 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3500 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3501 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3502 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3503 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3504 GEN_VEXT_VF(vfmsub_vf_h, 2) 3505 GEN_VEXT_VF(vfmsub_vf_w, 4) 3506 GEN_VEXT_VF(vfmsub_vf_d, 8) 3507 3508 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3509 { 3510 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3511 } 3512 3513 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3514 { 3515 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3516 } 3517 3518 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3519 { 3520 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3521 } 3522 3523 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3524 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3525 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3526 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3527 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3528 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3529 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3530 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3531 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3532 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3533 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3534 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3535 3536 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3537 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3538 { 3539 return float32_muladd(float16_to_float32(a, true, s), 3540 float16_to_float32(b, true, s), d, 0, s); 3541 } 3542 3543 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3544 { 3545 return float64_muladd(float32_to_float64(a, s), 3546 float32_to_float64(b, s), d, 0, s); 3547 } 3548 3549 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3550 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3551 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3552 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3553 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3554 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3555 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3556 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3557 3558 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3559 { 3560 return float32_muladd(bfloat16_to_float32(a, s), 3561 bfloat16_to_float32(b, s), d, 0, s); 3562 } 3563 3564 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3565 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3566 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmacc16) 3567 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3568 3569 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3570 { 3571 return float32_muladd(float16_to_float32(a, true, s), 3572 float16_to_float32(b, true, s), d, 3573 float_muladd_negate_c | float_muladd_negate_product, 3574 s); 3575 } 3576 3577 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3578 { 3579 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3580 d, float_muladd_negate_c | 3581 float_muladd_negate_product, s); 3582 } 3583 3584 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3585 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3586 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3587 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3588 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3589 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3590 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3591 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3592 3593 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3594 { 3595 return float32_muladd(float16_to_float32(a, true, s), 3596 float16_to_float32(b, true, s), d, 3597 float_muladd_negate_c, s); 3598 } 3599 3600 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3601 { 3602 return float64_muladd(float32_to_float64(a, s), 3603 float32_to_float64(b, s), d, 3604 float_muladd_negate_c, s); 3605 } 3606 3607 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3608 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3609 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3610 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3611 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3612 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3613 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3614 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3615 3616 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3617 { 3618 return float32_muladd(float16_to_float32(a, true, s), 3619 float16_to_float32(b, true, s), d, 3620 float_muladd_negate_product, s); 3621 } 3622 3623 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3624 { 3625 return float64_muladd(float32_to_float64(a, s), 3626 float32_to_float64(b, s), d, 3627 float_muladd_negate_product, s); 3628 } 3629 3630 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3631 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3632 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3633 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3634 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3635 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3636 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3637 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3638 3639 /* Vector Floating-Point Square-Root Instruction */ 3640 /* (TD, T2, TX2) */ 3641 #define OP_UU_H uint16_t, uint16_t, uint16_t 3642 #define OP_UU_W uint32_t, uint32_t, uint32_t 3643 #define OP_UU_D uint64_t, uint64_t, uint64_t 3644 3645 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3646 static void do_##NAME(void *vd, void *vs2, int i, \ 3647 CPURISCVState *env) \ 3648 { \ 3649 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3650 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3651 } 3652 3653 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3654 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3655 CPURISCVState *env, uint32_t desc) \ 3656 { \ 3657 uint32_t vm = vext_vm(desc); \ 3658 uint32_t vl = env->vl; \ 3659 uint32_t total_elems = \ 3660 vext_get_total_elems(env, desc, ESZ); \ 3661 uint32_t vta = vext_vta(desc); \ 3662 uint32_t vma = vext_vma(desc); \ 3663 uint32_t i; \ 3664 \ 3665 if (vl == 0) { \ 3666 return; \ 3667 } \ 3668 for (i = env->vstart; i < vl; i++) { \ 3669 if (!vm && !vext_elem_mask(v0, i)) { \ 3670 /* set masked-off elements to 1s */ \ 3671 vext_set_elems_1s(vd, vma, i * ESZ, \ 3672 (i + 1) * ESZ); \ 3673 continue; \ 3674 } \ 3675 do_##NAME(vd, vs2, i, env); \ 3676 } \ 3677 env->vstart = 0; \ 3678 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3679 total_elems * ESZ); \ 3680 } 3681 3682 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3683 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3684 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3685 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3686 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3687 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3688 3689 /* 3690 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3691 * 3692 * Adapted from riscv-v-spec recip.c: 3693 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3694 */ 3695 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3696 { 3697 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3698 uint64_t exp = extract64(f, frac_size, exp_size); 3699 uint64_t frac = extract64(f, 0, frac_size); 3700 3701 const uint8_t lookup_table[] = { 3702 52, 51, 50, 48, 47, 46, 44, 43, 3703 42, 41, 40, 39, 38, 36, 35, 34, 3704 33, 32, 31, 30, 30, 29, 28, 27, 3705 26, 25, 24, 23, 23, 22, 21, 20, 3706 19, 19, 18, 17, 16, 16, 15, 14, 3707 14, 13, 12, 12, 11, 10, 10, 9, 3708 9, 8, 7, 7, 6, 6, 5, 4, 3709 4, 3, 3, 2, 2, 1, 1, 0, 3710 127, 125, 123, 121, 119, 118, 116, 114, 3711 113, 111, 109, 108, 106, 105, 103, 102, 3712 100, 99, 97, 96, 95, 93, 92, 91, 3713 90, 88, 87, 86, 85, 84, 83, 82, 3714 80, 79, 78, 77, 76, 75, 74, 73, 3715 72, 71, 70, 70, 69, 68, 67, 66, 3716 65, 64, 63, 63, 62, 61, 60, 59, 3717 59, 58, 57, 56, 56, 55, 54, 53 3718 }; 3719 const int precision = 7; 3720 3721 if (exp == 0 && frac != 0) { /* subnormal */ 3722 /* Normalize the subnormal. */ 3723 while (extract64(frac, frac_size - 1, 1) == 0) { 3724 exp--; 3725 frac <<= 1; 3726 } 3727 3728 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3729 } 3730 3731 int idx = ((exp & 1) << (precision - 1)) | 3732 (frac >> (frac_size - precision + 1)); 3733 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3734 (frac_size - precision); 3735 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3736 3737 uint64_t val = 0; 3738 val = deposit64(val, 0, frac_size, out_frac); 3739 val = deposit64(val, frac_size, exp_size, out_exp); 3740 val = deposit64(val, frac_size + exp_size, 1, sign); 3741 return val; 3742 } 3743 3744 static float16 frsqrt7_h(float16 f, float_status *s) 3745 { 3746 int exp_size = 5, frac_size = 10; 3747 bool sign = float16_is_neg(f); 3748 3749 /* 3750 * frsqrt7(sNaN) = canonical NaN 3751 * frsqrt7(-inf) = canonical NaN 3752 * frsqrt7(-normal) = canonical NaN 3753 * frsqrt7(-subnormal) = canonical NaN 3754 */ 3755 if (float16_is_signaling_nan(f, s) || 3756 (float16_is_infinity(f) && sign) || 3757 (float16_is_normal(f) && sign) || 3758 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3759 s->float_exception_flags |= float_flag_invalid; 3760 return float16_default_nan(s); 3761 } 3762 3763 /* frsqrt7(qNaN) = canonical NaN */ 3764 if (float16_is_quiet_nan(f, s)) { 3765 return float16_default_nan(s); 3766 } 3767 3768 /* frsqrt7(+-0) = +-inf */ 3769 if (float16_is_zero(f)) { 3770 s->float_exception_flags |= float_flag_divbyzero; 3771 return float16_set_sign(float16_infinity, sign); 3772 } 3773 3774 /* frsqrt7(+inf) = +0 */ 3775 if (float16_is_infinity(f) && !sign) { 3776 return float16_set_sign(float16_zero, sign); 3777 } 3778 3779 /* +normal, +subnormal */ 3780 uint64_t val = frsqrt7(f, exp_size, frac_size); 3781 return make_float16(val); 3782 } 3783 3784 static float32 frsqrt7_s(float32 f, float_status *s) 3785 { 3786 int exp_size = 8, frac_size = 23; 3787 bool sign = float32_is_neg(f); 3788 3789 /* 3790 * frsqrt7(sNaN) = canonical NaN 3791 * frsqrt7(-inf) = canonical NaN 3792 * frsqrt7(-normal) = canonical NaN 3793 * frsqrt7(-subnormal) = canonical NaN 3794 */ 3795 if (float32_is_signaling_nan(f, s) || 3796 (float32_is_infinity(f) && sign) || 3797 (float32_is_normal(f) && sign) || 3798 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3799 s->float_exception_flags |= float_flag_invalid; 3800 return float32_default_nan(s); 3801 } 3802 3803 /* frsqrt7(qNaN) = canonical NaN */ 3804 if (float32_is_quiet_nan(f, s)) { 3805 return float32_default_nan(s); 3806 } 3807 3808 /* frsqrt7(+-0) = +-inf */ 3809 if (float32_is_zero(f)) { 3810 s->float_exception_flags |= float_flag_divbyzero; 3811 return float32_set_sign(float32_infinity, sign); 3812 } 3813 3814 /* frsqrt7(+inf) = +0 */ 3815 if (float32_is_infinity(f) && !sign) { 3816 return float32_set_sign(float32_zero, sign); 3817 } 3818 3819 /* +normal, +subnormal */ 3820 uint64_t val = frsqrt7(f, exp_size, frac_size); 3821 return make_float32(val); 3822 } 3823 3824 static float64 frsqrt7_d(float64 f, float_status *s) 3825 { 3826 int exp_size = 11, frac_size = 52; 3827 bool sign = float64_is_neg(f); 3828 3829 /* 3830 * frsqrt7(sNaN) = canonical NaN 3831 * frsqrt7(-inf) = canonical NaN 3832 * frsqrt7(-normal) = canonical NaN 3833 * frsqrt7(-subnormal) = canonical NaN 3834 */ 3835 if (float64_is_signaling_nan(f, s) || 3836 (float64_is_infinity(f) && sign) || 3837 (float64_is_normal(f) && sign) || 3838 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3839 s->float_exception_flags |= float_flag_invalid; 3840 return float64_default_nan(s); 3841 } 3842 3843 /* frsqrt7(qNaN) = canonical NaN */ 3844 if (float64_is_quiet_nan(f, s)) { 3845 return float64_default_nan(s); 3846 } 3847 3848 /* frsqrt7(+-0) = +-inf */ 3849 if (float64_is_zero(f)) { 3850 s->float_exception_flags |= float_flag_divbyzero; 3851 return float64_set_sign(float64_infinity, sign); 3852 } 3853 3854 /* frsqrt7(+inf) = +0 */ 3855 if (float64_is_infinity(f) && !sign) { 3856 return float64_set_sign(float64_zero, sign); 3857 } 3858 3859 /* +normal, +subnormal */ 3860 uint64_t val = frsqrt7(f, exp_size, frac_size); 3861 return make_float64(val); 3862 } 3863 3864 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3865 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3866 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3867 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3868 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3869 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3870 3871 /* 3872 * Vector Floating-Point Reciprocal Estimate Instruction 3873 * 3874 * Adapted from riscv-v-spec recip.c: 3875 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3876 */ 3877 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3878 float_status *s) 3879 { 3880 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3881 uint64_t exp = extract64(f, frac_size, exp_size); 3882 uint64_t frac = extract64(f, 0, frac_size); 3883 3884 const uint8_t lookup_table[] = { 3885 127, 125, 123, 121, 119, 117, 116, 114, 3886 112, 110, 109, 107, 105, 104, 102, 100, 3887 99, 97, 96, 94, 93, 91, 90, 88, 3888 87, 85, 84, 83, 81, 80, 79, 77, 3889 76, 75, 74, 72, 71, 70, 69, 68, 3890 66, 65, 64, 63, 62, 61, 60, 59, 3891 58, 57, 56, 55, 54, 53, 52, 51, 3892 50, 49, 48, 47, 46, 45, 44, 43, 3893 42, 41, 40, 40, 39, 38, 37, 36, 3894 35, 35, 34, 33, 32, 31, 31, 30, 3895 29, 28, 28, 27, 26, 25, 25, 24, 3896 23, 23, 22, 21, 21, 20, 19, 19, 3897 18, 17, 17, 16, 15, 15, 14, 14, 3898 13, 12, 12, 11, 11, 10, 9, 9, 3899 8, 8, 7, 7, 6, 5, 5, 4, 3900 4, 3, 3, 2, 2, 1, 1, 0 3901 }; 3902 const int precision = 7; 3903 3904 if (exp == 0 && frac != 0) { /* subnormal */ 3905 /* Normalize the subnormal. */ 3906 while (extract64(frac, frac_size - 1, 1) == 0) { 3907 exp--; 3908 frac <<= 1; 3909 } 3910 3911 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3912 3913 if (exp != 0 && exp != UINT64_MAX) { 3914 /* 3915 * Overflow to inf or max value of same sign, 3916 * depending on sign and rounding mode. 3917 */ 3918 s->float_exception_flags |= (float_flag_inexact | 3919 float_flag_overflow); 3920 3921 if ((s->float_rounding_mode == float_round_to_zero) || 3922 ((s->float_rounding_mode == float_round_down) && !sign) || 3923 ((s->float_rounding_mode == float_round_up) && sign)) { 3924 /* Return greatest/negative finite value. */ 3925 return (sign << (exp_size + frac_size)) | 3926 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3927 } else { 3928 /* Return +-inf. */ 3929 return (sign << (exp_size + frac_size)) | 3930 MAKE_64BIT_MASK(frac_size, exp_size); 3931 } 3932 } 3933 } 3934 3935 int idx = frac >> (frac_size - precision); 3936 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3937 (frac_size - precision); 3938 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3939 3940 if (out_exp == 0 || out_exp == UINT64_MAX) { 3941 /* 3942 * The result is subnormal, but don't raise the underflow exception, 3943 * because there's no additional loss of precision. 3944 */ 3945 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3946 if (out_exp == UINT64_MAX) { 3947 out_frac >>= 1; 3948 out_exp = 0; 3949 } 3950 } 3951 3952 uint64_t val = 0; 3953 val = deposit64(val, 0, frac_size, out_frac); 3954 val = deposit64(val, frac_size, exp_size, out_exp); 3955 val = deposit64(val, frac_size + exp_size, 1, sign); 3956 return val; 3957 } 3958 3959 static float16 frec7_h(float16 f, float_status *s) 3960 { 3961 int exp_size = 5, frac_size = 10; 3962 bool sign = float16_is_neg(f); 3963 3964 /* frec7(+-inf) = +-0 */ 3965 if (float16_is_infinity(f)) { 3966 return float16_set_sign(float16_zero, sign); 3967 } 3968 3969 /* frec7(+-0) = +-inf */ 3970 if (float16_is_zero(f)) { 3971 s->float_exception_flags |= float_flag_divbyzero; 3972 return float16_set_sign(float16_infinity, sign); 3973 } 3974 3975 /* frec7(sNaN) = canonical NaN */ 3976 if (float16_is_signaling_nan(f, s)) { 3977 s->float_exception_flags |= float_flag_invalid; 3978 return float16_default_nan(s); 3979 } 3980 3981 /* frec7(qNaN) = canonical NaN */ 3982 if (float16_is_quiet_nan(f, s)) { 3983 return float16_default_nan(s); 3984 } 3985 3986 /* +-normal, +-subnormal */ 3987 uint64_t val = frec7(f, exp_size, frac_size, s); 3988 return make_float16(val); 3989 } 3990 3991 static float32 frec7_s(float32 f, float_status *s) 3992 { 3993 int exp_size = 8, frac_size = 23; 3994 bool sign = float32_is_neg(f); 3995 3996 /* frec7(+-inf) = +-0 */ 3997 if (float32_is_infinity(f)) { 3998 return float32_set_sign(float32_zero, sign); 3999 } 4000 4001 /* frec7(+-0) = +-inf */ 4002 if (float32_is_zero(f)) { 4003 s->float_exception_flags |= float_flag_divbyzero; 4004 return float32_set_sign(float32_infinity, sign); 4005 } 4006 4007 /* frec7(sNaN) = canonical NaN */ 4008 if (float32_is_signaling_nan(f, s)) { 4009 s->float_exception_flags |= float_flag_invalid; 4010 return float32_default_nan(s); 4011 } 4012 4013 /* frec7(qNaN) = canonical NaN */ 4014 if (float32_is_quiet_nan(f, s)) { 4015 return float32_default_nan(s); 4016 } 4017 4018 /* +-normal, +-subnormal */ 4019 uint64_t val = frec7(f, exp_size, frac_size, s); 4020 return make_float32(val); 4021 } 4022 4023 static float64 frec7_d(float64 f, float_status *s) 4024 { 4025 int exp_size = 11, frac_size = 52; 4026 bool sign = float64_is_neg(f); 4027 4028 /* frec7(+-inf) = +-0 */ 4029 if (float64_is_infinity(f)) { 4030 return float64_set_sign(float64_zero, sign); 4031 } 4032 4033 /* frec7(+-0) = +-inf */ 4034 if (float64_is_zero(f)) { 4035 s->float_exception_flags |= float_flag_divbyzero; 4036 return float64_set_sign(float64_infinity, sign); 4037 } 4038 4039 /* frec7(sNaN) = canonical NaN */ 4040 if (float64_is_signaling_nan(f, s)) { 4041 s->float_exception_flags |= float_flag_invalid; 4042 return float64_default_nan(s); 4043 } 4044 4045 /* frec7(qNaN) = canonical NaN */ 4046 if (float64_is_quiet_nan(f, s)) { 4047 return float64_default_nan(s); 4048 } 4049 4050 /* +-normal, +-subnormal */ 4051 uint64_t val = frec7(f, exp_size, frac_size, s); 4052 return make_float64(val); 4053 } 4054 4055 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4056 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4057 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4058 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4059 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4060 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4061 4062 /* Vector Floating-Point MIN/MAX Instructions */ 4063 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4064 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4065 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4066 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4067 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4068 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4069 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4070 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4071 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4072 GEN_VEXT_VF(vfmin_vf_h, 2) 4073 GEN_VEXT_VF(vfmin_vf_w, 4) 4074 GEN_VEXT_VF(vfmin_vf_d, 8) 4075 4076 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4077 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4078 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4079 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4080 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4081 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4082 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4083 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4084 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4085 GEN_VEXT_VF(vfmax_vf_h, 2) 4086 GEN_VEXT_VF(vfmax_vf_w, 4) 4087 GEN_VEXT_VF(vfmax_vf_d, 8) 4088 4089 /* Vector Floating-Point Sign-Injection Instructions */ 4090 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4091 { 4092 return deposit64(b, 0, 15, a); 4093 } 4094 4095 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4096 { 4097 return deposit64(b, 0, 31, a); 4098 } 4099 4100 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4101 { 4102 return deposit64(b, 0, 63, a); 4103 } 4104 4105 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4106 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4107 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4108 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4109 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4110 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4111 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4112 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4113 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4114 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4115 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4116 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4117 4118 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4119 { 4120 return deposit64(~b, 0, 15, a); 4121 } 4122 4123 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4124 { 4125 return deposit64(~b, 0, 31, a); 4126 } 4127 4128 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4129 { 4130 return deposit64(~b, 0, 63, a); 4131 } 4132 4133 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4134 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4135 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4136 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4137 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4138 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4139 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4140 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4141 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4142 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4143 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4144 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4145 4146 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4147 { 4148 return deposit64(b ^ a, 0, 15, a); 4149 } 4150 4151 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4152 { 4153 return deposit64(b ^ a, 0, 31, a); 4154 } 4155 4156 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4157 { 4158 return deposit64(b ^ a, 0, 63, a); 4159 } 4160 4161 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4162 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4163 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4164 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4165 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4166 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4167 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4168 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4169 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4170 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4171 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4172 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4173 4174 /* Vector Floating-Point Compare Instructions */ 4175 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4176 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4177 CPURISCVState *env, uint32_t desc) \ 4178 { \ 4179 uint32_t vm = vext_vm(desc); \ 4180 uint32_t vl = env->vl; \ 4181 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4182 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4183 uint32_t vma = vext_vma(desc); \ 4184 uint32_t i; \ 4185 \ 4186 for (i = env->vstart; i < vl; i++) { \ 4187 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4188 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4189 if (!vm && !vext_elem_mask(v0, i)) { \ 4190 /* set masked-off elements to 1s */ \ 4191 if (vma) { \ 4192 vext_set_elem_mask(vd, i, 1); \ 4193 } \ 4194 continue; \ 4195 } \ 4196 vext_set_elem_mask(vd, i, \ 4197 DO_OP(s2, s1, &env->fp_status)); \ 4198 } \ 4199 env->vstart = 0; \ 4200 /* 4201 * mask destination register are always tail-agnostic 4202 * set tail elements to 1s 4203 */ \ 4204 if (vta_all_1s) { \ 4205 for (; i < total_elems; i++) { \ 4206 vext_set_elem_mask(vd, i, 1); \ 4207 } \ 4208 } \ 4209 } 4210 4211 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4212 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4213 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4214 4215 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4216 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4217 CPURISCVState *env, uint32_t desc) \ 4218 { \ 4219 uint32_t vm = vext_vm(desc); \ 4220 uint32_t vl = env->vl; \ 4221 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4222 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4223 uint32_t vma = vext_vma(desc); \ 4224 uint32_t i; \ 4225 \ 4226 for (i = env->vstart; i < vl; i++) { \ 4227 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4228 if (!vm && !vext_elem_mask(v0, i)) { \ 4229 /* set masked-off elements to 1s */ \ 4230 if (vma) { \ 4231 vext_set_elem_mask(vd, i, 1); \ 4232 } \ 4233 continue; \ 4234 } \ 4235 vext_set_elem_mask(vd, i, \ 4236 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4237 } \ 4238 env->vstart = 0; \ 4239 /* 4240 * mask destination register are always tail-agnostic 4241 * set tail elements to 1s 4242 */ \ 4243 if (vta_all_1s) { \ 4244 for (; i < total_elems; i++) { \ 4245 vext_set_elem_mask(vd, i, 1); \ 4246 } \ 4247 } \ 4248 } 4249 4250 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4251 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4252 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4253 4254 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4255 { 4256 FloatRelation compare = float16_compare_quiet(a, b, s); 4257 return compare != float_relation_equal; 4258 } 4259 4260 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4261 { 4262 FloatRelation compare = float32_compare_quiet(a, b, s); 4263 return compare != float_relation_equal; 4264 } 4265 4266 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4267 { 4268 FloatRelation compare = float64_compare_quiet(a, b, s); 4269 return compare != float_relation_equal; 4270 } 4271 4272 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4273 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4274 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4275 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4276 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4277 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4278 4279 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4280 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4281 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4282 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4283 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4284 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4285 4286 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4287 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4288 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4289 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4290 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4291 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4292 4293 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4294 { 4295 FloatRelation compare = float16_compare(a, b, s); 4296 return compare == float_relation_greater; 4297 } 4298 4299 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4300 { 4301 FloatRelation compare = float32_compare(a, b, s); 4302 return compare == float_relation_greater; 4303 } 4304 4305 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4306 { 4307 FloatRelation compare = float64_compare(a, b, s); 4308 return compare == float_relation_greater; 4309 } 4310 4311 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4312 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4313 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4314 4315 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4316 { 4317 FloatRelation compare = float16_compare(a, b, s); 4318 return compare == float_relation_greater || 4319 compare == float_relation_equal; 4320 } 4321 4322 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4323 { 4324 FloatRelation compare = float32_compare(a, b, s); 4325 return compare == float_relation_greater || 4326 compare == float_relation_equal; 4327 } 4328 4329 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4330 { 4331 FloatRelation compare = float64_compare(a, b, s); 4332 return compare == float_relation_greater || 4333 compare == float_relation_equal; 4334 } 4335 4336 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4337 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4338 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4339 4340 /* Vector Floating-Point Classify Instruction */ 4341 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4342 static void do_##NAME(void *vd, void *vs2, int i) \ 4343 { \ 4344 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4345 *((TD *)vd + HD(i)) = OP(s2); \ 4346 } 4347 4348 #define GEN_VEXT_V(NAME, ESZ) \ 4349 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4350 CPURISCVState *env, uint32_t desc) \ 4351 { \ 4352 uint32_t vm = vext_vm(desc); \ 4353 uint32_t vl = env->vl; \ 4354 uint32_t total_elems = \ 4355 vext_get_total_elems(env, desc, ESZ); \ 4356 uint32_t vta = vext_vta(desc); \ 4357 uint32_t vma = vext_vma(desc); \ 4358 uint32_t i; \ 4359 \ 4360 for (i = env->vstart; i < vl; i++) { \ 4361 if (!vm && !vext_elem_mask(v0, i)) { \ 4362 /* set masked-off elements to 1s */ \ 4363 vext_set_elems_1s(vd, vma, i * ESZ, \ 4364 (i + 1) * ESZ); \ 4365 continue; \ 4366 } \ 4367 do_##NAME(vd, vs2, i); \ 4368 } \ 4369 env->vstart = 0; \ 4370 /* set tail elements to 1s */ \ 4371 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4372 total_elems * ESZ); \ 4373 } 4374 4375 target_ulong fclass_h(uint64_t frs1) 4376 { 4377 float16 f = frs1; 4378 bool sign = float16_is_neg(f); 4379 4380 if (float16_is_infinity(f)) { 4381 return sign ? 1 << 0 : 1 << 7; 4382 } else if (float16_is_zero(f)) { 4383 return sign ? 1 << 3 : 1 << 4; 4384 } else if (float16_is_zero_or_denormal(f)) { 4385 return sign ? 1 << 2 : 1 << 5; 4386 } else if (float16_is_any_nan(f)) { 4387 float_status s = { }; /* for snan_bit_is_one */ 4388 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4389 } else { 4390 return sign ? 1 << 1 : 1 << 6; 4391 } 4392 } 4393 4394 target_ulong fclass_s(uint64_t frs1) 4395 { 4396 float32 f = frs1; 4397 bool sign = float32_is_neg(f); 4398 4399 if (float32_is_infinity(f)) { 4400 return sign ? 1 << 0 : 1 << 7; 4401 } else if (float32_is_zero(f)) { 4402 return sign ? 1 << 3 : 1 << 4; 4403 } else if (float32_is_zero_or_denormal(f)) { 4404 return sign ? 1 << 2 : 1 << 5; 4405 } else if (float32_is_any_nan(f)) { 4406 float_status s = { }; /* for snan_bit_is_one */ 4407 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4408 } else { 4409 return sign ? 1 << 1 : 1 << 6; 4410 } 4411 } 4412 4413 target_ulong fclass_d(uint64_t frs1) 4414 { 4415 float64 f = frs1; 4416 bool sign = float64_is_neg(f); 4417 4418 if (float64_is_infinity(f)) { 4419 return sign ? 1 << 0 : 1 << 7; 4420 } else if (float64_is_zero(f)) { 4421 return sign ? 1 << 3 : 1 << 4; 4422 } else if (float64_is_zero_or_denormal(f)) { 4423 return sign ? 1 << 2 : 1 << 5; 4424 } else if (float64_is_any_nan(f)) { 4425 float_status s = { }; /* for snan_bit_is_one */ 4426 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4427 } else { 4428 return sign ? 1 << 1 : 1 << 6; 4429 } 4430 } 4431 4432 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4433 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4434 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4435 GEN_VEXT_V(vfclass_v_h, 2) 4436 GEN_VEXT_V(vfclass_v_w, 4) 4437 GEN_VEXT_V(vfclass_v_d, 8) 4438 4439 /* Vector Floating-Point Merge Instruction */ 4440 4441 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4442 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4443 CPURISCVState *env, uint32_t desc) \ 4444 { \ 4445 uint32_t vm = vext_vm(desc); \ 4446 uint32_t vl = env->vl; \ 4447 uint32_t esz = sizeof(ETYPE); \ 4448 uint32_t total_elems = \ 4449 vext_get_total_elems(env, desc, esz); \ 4450 uint32_t vta = vext_vta(desc); \ 4451 uint32_t i; \ 4452 \ 4453 for (i = env->vstart; i < vl; i++) { \ 4454 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4455 *((ETYPE *)vd + H(i)) = \ 4456 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4457 } \ 4458 env->vstart = 0; \ 4459 /* set tail elements to 1s */ \ 4460 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4461 } 4462 4463 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4464 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4465 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4466 4467 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4468 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4469 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4470 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4471 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4472 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4473 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4474 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4475 4476 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4477 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4478 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4479 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4480 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4481 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4482 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4483 4484 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4485 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4486 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4487 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4488 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4489 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4490 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4491 4492 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4493 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4494 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4495 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4496 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4497 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4498 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4499 4500 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4501 /* (TD, T2, TX2) */ 4502 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4503 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4504 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4505 /* 4506 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4507 */ 4508 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4509 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4510 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4511 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4512 4513 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4514 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4515 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4516 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4517 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4518 4519 /* 4520 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4521 */ 4522 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4523 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4524 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4525 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4526 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4527 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4528 4529 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4530 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4531 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4532 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4533 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4534 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4535 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4536 4537 /* 4538 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4539 */ 4540 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4541 { 4542 return float16_to_float32(a, true, s); 4543 } 4544 4545 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4546 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4547 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4548 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4549 4550 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4551 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4552 4553 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4554 /* (TD, T2, TX2) */ 4555 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4556 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4557 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4558 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4559 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4560 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4561 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4562 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4563 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4564 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4565 4566 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4567 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4568 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4569 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4570 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4571 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4572 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4573 4574 /* 4575 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4576 */ 4577 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4578 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4579 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4580 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4581 4582 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4583 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4584 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4585 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4586 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4587 4588 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4589 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4590 { 4591 return float32_to_float16(a, true, s); 4592 } 4593 4594 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4595 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4596 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4597 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4598 4599 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4600 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4601 4602 /* 4603 * Vector Reduction Operations 4604 */ 4605 /* Vector Single-Width Integer Reduction Instructions */ 4606 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4607 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4608 void *vs2, CPURISCVState *env, \ 4609 uint32_t desc) \ 4610 { \ 4611 uint32_t vm = vext_vm(desc); \ 4612 uint32_t vl = env->vl; \ 4613 uint32_t esz = sizeof(TD); \ 4614 uint32_t vlenb = simd_maxsz(desc); \ 4615 uint32_t vta = vext_vta(desc); \ 4616 uint32_t i; \ 4617 TD s1 = *((TD *)vs1 + HD(0)); \ 4618 \ 4619 for (i = env->vstart; i < vl; i++) { \ 4620 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4621 if (!vm && !vext_elem_mask(v0, i)) { \ 4622 continue; \ 4623 } \ 4624 s1 = OP(s1, (TD)s2); \ 4625 } \ 4626 *((TD *)vd + HD(0)) = s1; \ 4627 env->vstart = 0; \ 4628 /* set tail elements to 1s */ \ 4629 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4630 } 4631 4632 /* vd[0] = sum(vs1[0], vs2[*]) */ 4633 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4634 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4635 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4636 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4637 4638 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4639 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4640 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4641 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4642 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4643 4644 /* vd[0] = max(vs1[0], vs2[*]) */ 4645 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4646 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4647 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4648 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4649 4650 /* vd[0] = minu(vs1[0], vs2[*]) */ 4651 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4652 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4653 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4654 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4655 4656 /* vd[0] = min(vs1[0], vs2[*]) */ 4657 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4658 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4659 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4660 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4661 4662 /* vd[0] = and(vs1[0], vs2[*]) */ 4663 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4664 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4665 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4666 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4667 4668 /* vd[0] = or(vs1[0], vs2[*]) */ 4669 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4670 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4671 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4672 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4673 4674 /* vd[0] = xor(vs1[0], vs2[*]) */ 4675 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4676 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4677 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4678 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4679 4680 /* Vector Widening Integer Reduction Instructions */ 4681 /* signed sum reduction into double-width accumulator */ 4682 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4683 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4684 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4685 4686 /* Unsigned sum reduction into double-width accumulator */ 4687 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4688 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4689 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4690 4691 /* Vector Single-Width Floating-Point Reduction Instructions */ 4692 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4693 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4694 void *vs2, CPURISCVState *env, \ 4695 uint32_t desc) \ 4696 { \ 4697 uint32_t vm = vext_vm(desc); \ 4698 uint32_t vl = env->vl; \ 4699 uint32_t esz = sizeof(TD); \ 4700 uint32_t vlenb = simd_maxsz(desc); \ 4701 uint32_t vta = vext_vta(desc); \ 4702 uint32_t i; \ 4703 TD s1 = *((TD *)vs1 + HD(0)); \ 4704 \ 4705 for (i = env->vstart; i < vl; i++) { \ 4706 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4707 if (!vm && !vext_elem_mask(v0, i)) { \ 4708 continue; \ 4709 } \ 4710 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4711 } \ 4712 *((TD *)vd + HD(0)) = s1; \ 4713 env->vstart = 0; \ 4714 /* set tail elements to 1s */ \ 4715 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4716 } 4717 4718 /* Unordered sum */ 4719 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4720 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4721 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4722 4723 /* Ordered sum */ 4724 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4725 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4726 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4727 4728 /* Maximum value */ 4729 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4730 float16_maximum_number) 4731 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4732 float32_maximum_number) 4733 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4734 float64_maximum_number) 4735 4736 /* Minimum value */ 4737 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4738 float16_minimum_number) 4739 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4740 float32_minimum_number) 4741 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4742 float64_minimum_number) 4743 4744 /* Vector Widening Floating-Point Add Instructions */ 4745 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4746 { 4747 return float32_add(a, float16_to_float32(b, true, s), s); 4748 } 4749 4750 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4751 { 4752 return float64_add(a, float32_to_float64(b, s), s); 4753 } 4754 4755 /* Vector Widening Floating-Point Reduction Instructions */ 4756 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4757 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4758 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4759 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4760 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4761 4762 /* 4763 * Vector Mask Operations 4764 */ 4765 /* Vector Mask-Register Logical Instructions */ 4766 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4767 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4768 void *vs2, CPURISCVState *env, \ 4769 uint32_t desc) \ 4770 { \ 4771 uint32_t vl = env->vl; \ 4772 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4773 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4774 uint32_t i; \ 4775 int a, b; \ 4776 \ 4777 for (i = env->vstart; i < vl; i++) { \ 4778 a = vext_elem_mask(vs1, i); \ 4779 b = vext_elem_mask(vs2, i); \ 4780 vext_set_elem_mask(vd, i, OP(b, a)); \ 4781 } \ 4782 env->vstart = 0; \ 4783 /* 4784 * mask destination register are always tail-agnostic 4785 * set tail elements to 1s 4786 */ \ 4787 if (vta_all_1s) { \ 4788 for (; i < total_elems; i++) { \ 4789 vext_set_elem_mask(vd, i, 1); \ 4790 } \ 4791 } \ 4792 } 4793 4794 #define DO_NAND(N, M) (!(N & M)) 4795 #define DO_ANDNOT(N, M) (N & !M) 4796 #define DO_NOR(N, M) (!(N | M)) 4797 #define DO_ORNOT(N, M) (N | !M) 4798 #define DO_XNOR(N, M) (!(N ^ M)) 4799 4800 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4801 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4802 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4803 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4804 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4805 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4806 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4807 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4808 4809 /* Vector count population in mask vcpop */ 4810 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4811 uint32_t desc) 4812 { 4813 target_ulong cnt = 0; 4814 uint32_t vm = vext_vm(desc); 4815 uint32_t vl = env->vl; 4816 int i; 4817 4818 for (i = env->vstart; i < vl; i++) { 4819 if (vm || vext_elem_mask(v0, i)) { 4820 if (vext_elem_mask(vs2, i)) { 4821 cnt++; 4822 } 4823 } 4824 } 4825 env->vstart = 0; 4826 return cnt; 4827 } 4828 4829 /* vfirst find-first-set mask bit */ 4830 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4831 uint32_t desc) 4832 { 4833 uint32_t vm = vext_vm(desc); 4834 uint32_t vl = env->vl; 4835 int i; 4836 4837 for (i = env->vstart; i < vl; i++) { 4838 if (vm || vext_elem_mask(v0, i)) { 4839 if (vext_elem_mask(vs2, i)) { 4840 return i; 4841 } 4842 } 4843 } 4844 env->vstart = 0; 4845 return -1LL; 4846 } 4847 4848 enum set_mask_type { 4849 ONLY_FIRST = 1, 4850 INCLUDE_FIRST, 4851 BEFORE_FIRST, 4852 }; 4853 4854 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4855 uint32_t desc, enum set_mask_type type) 4856 { 4857 uint32_t vm = vext_vm(desc); 4858 uint32_t vl = env->vl; 4859 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4860 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4861 uint32_t vma = vext_vma(desc); 4862 int i; 4863 bool first_mask_bit = false; 4864 4865 for (i = env->vstart; i < vl; i++) { 4866 if (!vm && !vext_elem_mask(v0, i)) { 4867 /* set masked-off elements to 1s */ 4868 if (vma) { 4869 vext_set_elem_mask(vd, i, 1); 4870 } 4871 continue; 4872 } 4873 /* write a zero to all following active elements */ 4874 if (first_mask_bit) { 4875 vext_set_elem_mask(vd, i, 0); 4876 continue; 4877 } 4878 if (vext_elem_mask(vs2, i)) { 4879 first_mask_bit = true; 4880 if (type == BEFORE_FIRST) { 4881 vext_set_elem_mask(vd, i, 0); 4882 } else { 4883 vext_set_elem_mask(vd, i, 1); 4884 } 4885 } else { 4886 if (type == ONLY_FIRST) { 4887 vext_set_elem_mask(vd, i, 0); 4888 } else { 4889 vext_set_elem_mask(vd, i, 1); 4890 } 4891 } 4892 } 4893 env->vstart = 0; 4894 /* 4895 * mask destination register are always tail-agnostic 4896 * set tail elements to 1s 4897 */ 4898 if (vta_all_1s) { 4899 for (; i < total_elems; i++) { 4900 vext_set_elem_mask(vd, i, 1); 4901 } 4902 } 4903 } 4904 4905 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4906 uint32_t desc) 4907 { 4908 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4909 } 4910 4911 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4912 uint32_t desc) 4913 { 4914 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4915 } 4916 4917 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4918 uint32_t desc) 4919 { 4920 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4921 } 4922 4923 /* Vector Iota Instruction */ 4924 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4925 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4926 uint32_t desc) \ 4927 { \ 4928 uint32_t vm = vext_vm(desc); \ 4929 uint32_t vl = env->vl; \ 4930 uint32_t esz = sizeof(ETYPE); \ 4931 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4932 uint32_t vta = vext_vta(desc); \ 4933 uint32_t vma = vext_vma(desc); \ 4934 uint32_t sum = 0; \ 4935 int i; \ 4936 \ 4937 for (i = env->vstart; i < vl; i++) { \ 4938 if (!vm && !vext_elem_mask(v0, i)) { \ 4939 /* set masked-off elements to 1s */ \ 4940 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4941 continue; \ 4942 } \ 4943 *((ETYPE *)vd + H(i)) = sum; \ 4944 if (vext_elem_mask(vs2, i)) { \ 4945 sum++; \ 4946 } \ 4947 } \ 4948 env->vstart = 0; \ 4949 /* set tail elements to 1s */ \ 4950 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4951 } 4952 4953 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4954 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4955 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4956 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4957 4958 /* Vector Element Index Instruction */ 4959 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4960 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4961 { \ 4962 uint32_t vm = vext_vm(desc); \ 4963 uint32_t vl = env->vl; \ 4964 uint32_t esz = sizeof(ETYPE); \ 4965 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4966 uint32_t vta = vext_vta(desc); \ 4967 uint32_t vma = vext_vma(desc); \ 4968 int i; \ 4969 \ 4970 for (i = env->vstart; i < vl; i++) { \ 4971 if (!vm && !vext_elem_mask(v0, i)) { \ 4972 /* set masked-off elements to 1s */ \ 4973 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4974 continue; \ 4975 } \ 4976 *((ETYPE *)vd + H(i)) = i; \ 4977 } \ 4978 env->vstart = 0; \ 4979 /* set tail elements to 1s */ \ 4980 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4981 } 4982 4983 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4984 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4985 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4986 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4987 4988 /* 4989 * Vector Permutation Instructions 4990 */ 4991 4992 /* Vector Slide Instructions */ 4993 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4994 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4995 CPURISCVState *env, uint32_t desc) \ 4996 { \ 4997 uint32_t vm = vext_vm(desc); \ 4998 uint32_t vl = env->vl; \ 4999 uint32_t esz = sizeof(ETYPE); \ 5000 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5001 uint32_t vta = vext_vta(desc); \ 5002 uint32_t vma = vext_vma(desc); \ 5003 target_ulong offset = s1, i_min, i; \ 5004 \ 5005 i_min = MAX(env->vstart, offset); \ 5006 for (i = i_min; i < vl; i++) { \ 5007 if (!vm && !vext_elem_mask(v0, i)) { \ 5008 /* set masked-off elements to 1s */ \ 5009 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5010 continue; \ 5011 } \ 5012 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5013 } \ 5014 /* set tail elements to 1s */ \ 5015 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5016 } 5017 5018 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5019 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5020 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5021 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5022 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5023 5024 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5025 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5026 CPURISCVState *env, uint32_t desc) \ 5027 { \ 5028 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5029 uint32_t vm = vext_vm(desc); \ 5030 uint32_t vl = env->vl; \ 5031 uint32_t esz = sizeof(ETYPE); \ 5032 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5033 uint32_t vta = vext_vta(desc); \ 5034 uint32_t vma = vext_vma(desc); \ 5035 target_ulong i_max, i; \ 5036 \ 5037 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5038 for (i = env->vstart; i < i_max; ++i) { \ 5039 if (!vm && !vext_elem_mask(v0, i)) { \ 5040 /* set masked-off elements to 1s */ \ 5041 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5042 continue; \ 5043 } \ 5044 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5045 } \ 5046 \ 5047 for (i = i_max; i < vl; ++i) { \ 5048 if (vm || vext_elem_mask(v0, i)) { \ 5049 *((ETYPE *)vd + H(i)) = 0; \ 5050 } \ 5051 } \ 5052 \ 5053 env->vstart = 0; \ 5054 /* set tail elements to 1s */ \ 5055 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5056 } 5057 5058 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5059 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5060 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5061 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5062 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5063 5064 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5065 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5066 void *vs2, CPURISCVState *env, \ 5067 uint32_t desc) \ 5068 { \ 5069 typedef uint##BITWIDTH##_t ETYPE; \ 5070 uint32_t vm = vext_vm(desc); \ 5071 uint32_t vl = env->vl; \ 5072 uint32_t esz = sizeof(ETYPE); \ 5073 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5074 uint32_t vta = vext_vta(desc); \ 5075 uint32_t vma = vext_vma(desc); \ 5076 uint32_t i; \ 5077 \ 5078 for (i = env->vstart; i < vl; i++) { \ 5079 if (!vm && !vext_elem_mask(v0, i)) { \ 5080 /* set masked-off elements to 1s */ \ 5081 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5082 continue; \ 5083 } \ 5084 if (i == 0) { \ 5085 *((ETYPE *)vd + H(i)) = s1; \ 5086 } else { \ 5087 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5088 } \ 5089 } \ 5090 env->vstart = 0; \ 5091 /* set tail elements to 1s */ \ 5092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5093 } 5094 5095 GEN_VEXT_VSLIE1UP(8, H1) 5096 GEN_VEXT_VSLIE1UP(16, H2) 5097 GEN_VEXT_VSLIE1UP(32, H4) 5098 GEN_VEXT_VSLIE1UP(64, H8) 5099 5100 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5101 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5102 CPURISCVState *env, uint32_t desc) \ 5103 { \ 5104 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5105 } 5106 5107 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5108 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5109 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5110 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5111 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5112 5113 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5114 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5115 void *vs2, CPURISCVState *env, \ 5116 uint32_t desc) \ 5117 { \ 5118 typedef uint##BITWIDTH##_t ETYPE; \ 5119 uint32_t vm = vext_vm(desc); \ 5120 uint32_t vl = env->vl; \ 5121 uint32_t esz = sizeof(ETYPE); \ 5122 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5123 uint32_t vta = vext_vta(desc); \ 5124 uint32_t vma = vext_vma(desc); \ 5125 uint32_t i; \ 5126 \ 5127 for (i = env->vstart; i < vl; i++) { \ 5128 if (!vm && !vext_elem_mask(v0, i)) { \ 5129 /* set masked-off elements to 1s */ \ 5130 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5131 continue; \ 5132 } \ 5133 if (i == vl - 1) { \ 5134 *((ETYPE *)vd + H(i)) = s1; \ 5135 } else { \ 5136 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5137 } \ 5138 } \ 5139 env->vstart = 0; \ 5140 /* set tail elements to 1s */ \ 5141 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5142 } 5143 5144 GEN_VEXT_VSLIDE1DOWN(8, H1) 5145 GEN_VEXT_VSLIDE1DOWN(16, H2) 5146 GEN_VEXT_VSLIDE1DOWN(32, H4) 5147 GEN_VEXT_VSLIDE1DOWN(64, H8) 5148 5149 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5150 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5151 CPURISCVState *env, uint32_t desc) \ 5152 { \ 5153 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5154 } 5155 5156 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5157 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5158 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5159 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5160 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5161 5162 /* Vector Floating-Point Slide Instructions */ 5163 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5164 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5165 CPURISCVState *env, uint32_t desc) \ 5166 { \ 5167 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5168 } 5169 5170 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5171 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5172 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5173 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5174 5175 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5176 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5177 CPURISCVState *env, uint32_t desc) \ 5178 { \ 5179 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5180 } 5181 5182 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5183 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5184 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5185 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5186 5187 /* Vector Register Gather Instruction */ 5188 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5189 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5190 CPURISCVState *env, uint32_t desc) \ 5191 { \ 5192 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5193 uint32_t vm = vext_vm(desc); \ 5194 uint32_t vl = env->vl; \ 5195 uint32_t esz = sizeof(TS2); \ 5196 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5197 uint32_t vta = vext_vta(desc); \ 5198 uint32_t vma = vext_vma(desc); \ 5199 uint64_t index; \ 5200 uint32_t i; \ 5201 \ 5202 for (i = env->vstart; i < vl; i++) { \ 5203 if (!vm && !vext_elem_mask(v0, i)) { \ 5204 /* set masked-off elements to 1s */ \ 5205 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5206 continue; \ 5207 } \ 5208 index = *((TS1 *)vs1 + HS1(i)); \ 5209 if (index >= vlmax) { \ 5210 *((TS2 *)vd + HS2(i)) = 0; \ 5211 } else { \ 5212 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5213 } \ 5214 } \ 5215 env->vstart = 0; \ 5216 /* set tail elements to 1s */ \ 5217 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5218 } 5219 5220 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5221 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5222 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5223 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5224 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5225 5226 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5227 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5228 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5229 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5230 5231 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5232 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5233 CPURISCVState *env, uint32_t desc) \ 5234 { \ 5235 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5236 uint32_t vm = vext_vm(desc); \ 5237 uint32_t vl = env->vl; \ 5238 uint32_t esz = sizeof(ETYPE); \ 5239 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5240 uint32_t vta = vext_vta(desc); \ 5241 uint32_t vma = vext_vma(desc); \ 5242 uint64_t index = s1; \ 5243 uint32_t i; \ 5244 \ 5245 for (i = env->vstart; i < vl; i++) { \ 5246 if (!vm && !vext_elem_mask(v0, i)) { \ 5247 /* set masked-off elements to 1s */ \ 5248 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5249 continue; \ 5250 } \ 5251 if (index >= vlmax) { \ 5252 *((ETYPE *)vd + H(i)) = 0; \ 5253 } else { \ 5254 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5255 } \ 5256 } \ 5257 env->vstart = 0; \ 5258 /* set tail elements to 1s */ \ 5259 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5260 } 5261 5262 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5263 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5264 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5265 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5266 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5267 5268 /* Vector Compress Instruction */ 5269 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5270 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5271 CPURISCVState *env, uint32_t desc) \ 5272 { \ 5273 uint32_t vl = env->vl; \ 5274 uint32_t esz = sizeof(ETYPE); \ 5275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5276 uint32_t vta = vext_vta(desc); \ 5277 uint32_t num = 0, i; \ 5278 \ 5279 for (i = env->vstart; i < vl; i++) { \ 5280 if (!vext_elem_mask(vs1, i)) { \ 5281 continue; \ 5282 } \ 5283 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5284 num++; \ 5285 } \ 5286 env->vstart = 0; \ 5287 /* set tail elements to 1s */ \ 5288 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5289 } 5290 5291 /* Compress into vd elements of vs2 where vs1 is enabled */ 5292 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5293 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5294 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5295 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5296 5297 /* Vector Whole Register Move */ 5298 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5299 { 5300 /* EEW = SEW */ 5301 uint32_t maxsz = simd_maxsz(desc); 5302 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5303 uint32_t startb = env->vstart * sewb; 5304 uint32_t i = startb; 5305 5306 memcpy((uint8_t *)vd + H1(i), 5307 (uint8_t *)vs2 + H1(i), 5308 maxsz - startb); 5309 5310 env->vstart = 0; 5311 } 5312 5313 /* Vector Integer Extension */ 5314 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5315 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5316 CPURISCVState *env, uint32_t desc) \ 5317 { \ 5318 uint32_t vl = env->vl; \ 5319 uint32_t vm = vext_vm(desc); \ 5320 uint32_t esz = sizeof(ETYPE); \ 5321 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5322 uint32_t vta = vext_vta(desc); \ 5323 uint32_t vma = vext_vma(desc); \ 5324 uint32_t i; \ 5325 \ 5326 for (i = env->vstart; i < vl; i++) { \ 5327 if (!vm && !vext_elem_mask(v0, i)) { \ 5328 /* set masked-off elements to 1s */ \ 5329 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5330 continue; \ 5331 } \ 5332 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5333 } \ 5334 env->vstart = 0; \ 5335 /* set tail elements to 1s */ \ 5336 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5337 } 5338 5339 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5340 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5341 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5342 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5343 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5344 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5345 5346 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5347 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5348 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5349 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5350 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5351 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5352