1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vill = 1; 56 env->vtype = 0; 57 env->vl = 0; 58 env->vstart = 0; 59 return 0; 60 } 61 62 vlmax = vext_get_vlmax(cpu, s2); 63 if (s1 <= vlmax) { 64 vl = s1; 65 } else { 66 vl = vlmax; 67 } 68 env->vl = vl; 69 env->vtype = s2; 70 env->vstart = 0; 71 env->vill = 0; 72 return vl; 73 } 74 75 /* 76 * Note that vector data is stored in host-endian 64-bit chunks, 77 * so addressing units smaller than that needs a host-endian fixup. 78 */ 79 #if HOST_BIG_ENDIAN 80 #define H1(x) ((x) ^ 7) 81 #define H1_2(x) ((x) ^ 6) 82 #define H1_4(x) ((x) ^ 4) 83 #define H2(x) ((x) ^ 3) 84 #define H4(x) ((x) ^ 1) 85 #define H8(x) ((x)) 86 #else 87 #define H1(x) (x) 88 #define H1_2(x) (x) 89 #define H1_4(x) (x) 90 #define H2(x) (x) 91 #define H4(x) (x) 92 #define H8(x) (x) 93 #endif 94 95 static inline uint32_t vext_nf(uint32_t desc) 96 { 97 return FIELD_EX32(simd_data(desc), VDATA, NF); 98 } 99 100 static inline uint32_t vext_vm(uint32_t desc) 101 { 102 return FIELD_EX32(simd_data(desc), VDATA, VM); 103 } 104 105 /* 106 * Encode LMUL to lmul as following: 107 * LMUL vlmul lmul 108 * 1 000 0 109 * 2 001 1 110 * 4 010 2 111 * 8 011 3 112 * - 100 - 113 * 1/8 101 -3 114 * 1/4 110 -2 115 * 1/2 111 -1 116 */ 117 static inline int32_t vext_lmul(uint32_t desc) 118 { 119 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 120 } 121 122 static inline uint32_t vext_vta(uint32_t desc) 123 { 124 return FIELD_EX32(simd_data(desc), VDATA, VTA); 125 } 126 127 static inline uint32_t vext_vma(uint32_t desc) 128 { 129 return FIELD_EX32(simd_data(desc), VDATA, VMA); 130 } 131 132 static inline uint32_t vext_vta_all_1s(uint32_t desc) 133 { 134 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 135 } 136 137 /* 138 * Get the maximum number of elements can be operated. 139 * 140 * log2_esz: log2 of element size in bytes. 141 */ 142 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 143 { 144 /* 145 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 146 * so vlen in bytes (vlenb) is encoded as maxsz. 147 */ 148 uint32_t vlenb = simd_maxsz(desc); 149 150 /* Return VLMAX */ 151 int scale = vext_lmul(desc) - log2_esz; 152 return scale < 0 ? vlenb >> -scale : vlenb << scale; 153 } 154 155 /* 156 * Get number of total elements, including prestart, body and tail elements. 157 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 158 * are held in the same vector register. 159 */ 160 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 161 uint32_t esz) 162 { 163 uint32_t vlenb = simd_maxsz(desc); 164 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 165 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 166 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 167 return (vlenb << emul) / esz; 168 } 169 170 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 171 { 172 return (addr & env->cur_pmmask) | env->cur_pmbase; 173 } 174 175 /* 176 * This function checks watchpoint before real load operation. 177 * 178 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 179 * In user mode, there is no watchpoint support now. 180 * 181 * It will trigger an exception if there is no mapping in TLB 182 * and page table walk can't fill the TLB entry. Then the guest 183 * software can return here after process the exception or never return. 184 */ 185 static void probe_pages(CPURISCVState *env, target_ulong addr, 186 target_ulong len, uintptr_t ra, 187 MMUAccessType access_type) 188 { 189 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 190 target_ulong curlen = MIN(pagelen, len); 191 192 probe_access(env, adjust_addr(env, addr), curlen, access_type, 193 cpu_mmu_index(env, false), ra); 194 if (len > curlen) { 195 addr += curlen; 196 curlen = len - curlen; 197 probe_access(env, adjust_addr(env, addr), curlen, access_type, 198 cpu_mmu_index(env, false), ra); 199 } 200 } 201 202 /* set agnostic elements to 1s */ 203 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 204 uint32_t tot) 205 { 206 if (is_agnostic == 0) { 207 /* policy undisturbed */ 208 return; 209 } 210 if (tot - cnt == 0) { 211 return; 212 } 213 memset(base + cnt, -1, tot - cnt); 214 } 215 216 static inline void vext_set_elem_mask(void *v0, int index, 217 uint8_t value) 218 { 219 int idx = index / 64; 220 int pos = index % 64; 221 uint64_t old = ((uint64_t *)v0)[idx]; 222 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 223 } 224 225 /* 226 * Earlier designs (pre-0.9) had a varying number of bits 227 * per mask value (MLEN). In the 0.9 design, MLEN=1. 228 * (Section 4.5) 229 */ 230 static inline int vext_elem_mask(void *v0, int index) 231 { 232 int idx = index / 64; 233 int pos = index % 64; 234 return (((uint64_t *)v0)[idx] >> pos) & 1; 235 } 236 237 /* elements operations for load and store */ 238 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 239 uint32_t idx, void *vd, uintptr_t retaddr); 240 241 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 242 static void NAME(CPURISCVState *env, abi_ptr addr, \ 243 uint32_t idx, void *vd, uintptr_t retaddr)\ 244 { \ 245 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 246 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 247 } \ 248 249 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 250 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 251 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 252 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 253 254 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 255 static void NAME(CPURISCVState *env, abi_ptr addr, \ 256 uint32_t idx, void *vd, uintptr_t retaddr)\ 257 { \ 258 ETYPE data = *((ETYPE *)vd + H(idx)); \ 259 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 260 } 261 262 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 263 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 264 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 265 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 266 267 static void vext_set_tail_elems_1s(CPURISCVState *env, target_ulong vl, 268 void *vd, uint32_t desc, uint32_t nf, 269 uint32_t esz, uint32_t max_elems) 270 { 271 uint32_t total_elems, vlenb, registers_used; 272 uint32_t vta = vext_vta(desc); 273 int k; 274 275 if (vta == 0) { 276 return; 277 } 278 279 total_elems = vext_get_total_elems(env, desc, esz); 280 vlenb = riscv_cpu_cfg(env)->vlen >> 3; 281 282 for (k = 0; k < nf; ++k) { 283 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 284 (k * max_elems + max_elems) * esz); 285 } 286 287 if (nf * max_elems % total_elems != 0) { 288 registers_used = ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 289 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 290 registers_used * vlenb); 291 } 292 } 293 294 /* 295 * stride: access vector element from strided memory 296 */ 297 static void 298 vext_ldst_stride(void *vd, void *v0, target_ulong base, 299 target_ulong stride, CPURISCVState *env, 300 uint32_t desc, uint32_t vm, 301 vext_ldst_elem_fn *ldst_elem, 302 uint32_t log2_esz, uintptr_t ra) 303 { 304 uint32_t i, k; 305 uint32_t nf = vext_nf(desc); 306 uint32_t max_elems = vext_max_elems(desc, log2_esz); 307 uint32_t esz = 1 << log2_esz; 308 uint32_t vma = vext_vma(desc); 309 310 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 311 k = 0; 312 while (k < nf) { 313 if (!vm && !vext_elem_mask(v0, i)) { 314 /* set masked-off elements to 1s */ 315 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 316 (i + k * max_elems + 1) * esz); 317 k++; 318 continue; 319 } 320 target_ulong addr = base + stride * i + (k << log2_esz); 321 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 322 k++; 323 } 324 } 325 env->vstart = 0; 326 327 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 328 } 329 330 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 331 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 332 target_ulong stride, CPURISCVState *env, \ 333 uint32_t desc) \ 334 { \ 335 uint32_t vm = vext_vm(desc); \ 336 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 337 ctzl(sizeof(ETYPE)), GETPC()); \ 338 } 339 340 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 341 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 342 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 343 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 344 345 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 346 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 347 target_ulong stride, CPURISCVState *env, \ 348 uint32_t desc) \ 349 { \ 350 uint32_t vm = vext_vm(desc); \ 351 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 352 ctzl(sizeof(ETYPE)), GETPC()); \ 353 } 354 355 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 356 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 357 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 358 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 359 360 /* 361 * unit-stride: access elements stored contiguously in memory 362 */ 363 364 /* unmasked unit-stride load and store operation */ 365 static void 366 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 367 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 368 uintptr_t ra) 369 { 370 uint32_t i, k; 371 uint32_t nf = vext_nf(desc); 372 uint32_t max_elems = vext_max_elems(desc, log2_esz); 373 uint32_t esz = 1 << log2_esz; 374 375 /* load bytes from guest memory */ 376 for (i = env->vstart; i < evl; i++, env->vstart++) { 377 k = 0; 378 while (k < nf) { 379 target_ulong addr = base + ((i * nf + k) << log2_esz); 380 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 381 k++; 382 } 383 } 384 env->vstart = 0; 385 386 vext_set_tail_elems_1s(env, evl, vd, desc, nf, esz, max_elems); 387 } 388 389 /* 390 * masked unit-stride load and store operation will be a special case of 391 * stride, stride = NF * sizeof (MTYPE) 392 */ 393 394 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 395 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 396 CPURISCVState *env, uint32_t desc) \ 397 { \ 398 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 399 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 400 ctzl(sizeof(ETYPE)), GETPC()); \ 401 } \ 402 \ 403 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 404 CPURISCVState *env, uint32_t desc) \ 405 { \ 406 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 407 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 408 } 409 410 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 411 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 412 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 413 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 414 415 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 416 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 417 CPURISCVState *env, uint32_t desc) \ 418 { \ 419 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 420 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 421 ctzl(sizeof(ETYPE)), GETPC()); \ 422 } \ 423 \ 424 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 425 CPURISCVState *env, uint32_t desc) \ 426 { \ 427 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 428 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 429 } 430 431 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 432 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 433 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 434 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 435 436 /* 437 * unit stride mask load and store, EEW = 1 438 */ 439 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, lde_b, 445 0, evl, GETPC()); 446 } 447 448 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 449 CPURISCVState *env, uint32_t desc) 450 { 451 /* evl = ceil(vl/8) */ 452 uint8_t evl = (env->vl + 7) >> 3; 453 vext_ldst_us(vd, base, env, desc, ste_b, 454 0, evl, GETPC()); 455 } 456 457 /* 458 * index: access vector element from indexed memory 459 */ 460 typedef target_ulong vext_get_index_addr(target_ulong base, 461 uint32_t idx, void *vs2); 462 463 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 464 static target_ulong NAME(target_ulong base, \ 465 uint32_t idx, void *vs2) \ 466 { \ 467 return (base + *((ETYPE *)vs2 + H(idx))); \ 468 } 469 470 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 471 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 472 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 473 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 474 475 static inline void 476 vext_ldst_index(void *vd, void *v0, target_ulong base, 477 void *vs2, CPURISCVState *env, uint32_t desc, 478 vext_get_index_addr get_index_addr, 479 vext_ldst_elem_fn *ldst_elem, 480 uint32_t log2_esz, uintptr_t ra) 481 { 482 uint32_t i, k; 483 uint32_t nf = vext_nf(desc); 484 uint32_t vm = vext_vm(desc); 485 uint32_t max_elems = vext_max_elems(desc, log2_esz); 486 uint32_t esz = 1 << log2_esz; 487 uint32_t vma = vext_vma(desc); 488 489 /* load bytes from guest memory */ 490 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 491 k = 0; 492 while (k < nf) { 493 if (!vm && !vext_elem_mask(v0, i)) { 494 /* set masked-off elements to 1s */ 495 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 496 (i + k * max_elems + 1) * esz); 497 k++; 498 continue; 499 } 500 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 501 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 502 k++; 503 } 504 } 505 env->vstart = 0; 506 507 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 508 } 509 510 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 511 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 512 void *vs2, CPURISCVState *env, uint32_t desc) \ 513 { \ 514 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 515 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 516 } 517 518 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 519 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 520 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 521 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 522 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 523 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 524 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 525 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 526 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 527 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 528 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 529 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 530 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 531 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 532 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 533 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 534 535 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 536 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 537 void *vs2, CPURISCVState *env, uint32_t desc) \ 538 { \ 539 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 540 STORE_FN, ctzl(sizeof(ETYPE)), \ 541 GETPC()); \ 542 } 543 544 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 545 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 546 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 547 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 548 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 549 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 550 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 551 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 552 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 553 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 554 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 555 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 556 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 557 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 558 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 559 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 560 561 /* 562 * unit-stride fault-only-fisrt load instructions 563 */ 564 static inline void 565 vext_ldff(void *vd, void *v0, target_ulong base, 566 CPURISCVState *env, uint32_t desc, 567 vext_ldst_elem_fn *ldst_elem, 568 uint32_t log2_esz, uintptr_t ra) 569 { 570 void *host; 571 uint32_t i, k, vl = 0; 572 uint32_t nf = vext_nf(desc); 573 uint32_t vm = vext_vm(desc); 574 uint32_t max_elems = vext_max_elems(desc, log2_esz); 575 uint32_t esz = 1 << log2_esz; 576 uint32_t vma = vext_vma(desc); 577 target_ulong addr, offset, remain; 578 579 /* probe every access */ 580 for (i = env->vstart; i < env->vl; i++) { 581 if (!vm && !vext_elem_mask(v0, i)) { 582 continue; 583 } 584 addr = adjust_addr(env, base + i * (nf << log2_esz)); 585 if (i == 0) { 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 587 } else { 588 /* if it triggers an exception, no need to check watchpoint */ 589 remain = nf << log2_esz; 590 while (remain > 0) { 591 offset = -(addr | TARGET_PAGE_MASK); 592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 593 cpu_mmu_index(env, false)); 594 if (host) { 595 #ifdef CONFIG_USER_ONLY 596 if (page_check_range(addr, offset, PAGE_READ) < 0) { 597 vl = i; 598 goto ProbeSuccess; 599 } 600 #else 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 602 #endif 603 } else { 604 vl = i; 605 goto ProbeSuccess; 606 } 607 if (remain <= offset) { 608 break; 609 } 610 remain -= offset; 611 addr = adjust_addr(env, addr + offset); 612 } 613 } 614 } 615 ProbeSuccess: 616 /* load bytes from guest memory */ 617 if (vl != 0) { 618 env->vl = vl; 619 } 620 for (i = env->vstart; i < env->vl; i++) { 621 k = 0; 622 while (k < nf) { 623 if (!vm && !vext_elem_mask(v0, i)) { 624 /* set masked-off elements to 1s */ 625 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 626 (i + k * max_elems + 1) * esz); 627 k++; 628 continue; 629 } 630 target_ulong addr = base + ((i * nf + k) << log2_esz); 631 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 632 k++; 633 } 634 } 635 env->vstart = 0; 636 637 vext_set_tail_elems_1s(env, env->vl, vd, desc, nf, esz, max_elems); 638 } 639 640 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 641 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 642 CPURISCVState *env, uint32_t desc) \ 643 { \ 644 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 645 ctzl(sizeof(ETYPE)), GETPC()); \ 646 } 647 648 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 649 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 650 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 651 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 652 653 #define DO_SWAP(N, M) (M) 654 #define DO_AND(N, M) (N & M) 655 #define DO_XOR(N, M) (N ^ M) 656 #define DO_OR(N, M) (N | M) 657 #define DO_ADD(N, M) (N + M) 658 659 /* Signed min/max */ 660 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 661 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 662 663 /* Unsigned min/max */ 664 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 665 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 666 667 /* 668 * load and store whole register instructions 669 */ 670 static void 671 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 672 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 673 { 674 uint32_t i, k, off, pos; 675 uint32_t nf = vext_nf(desc); 676 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 677 uint32_t max_elems = vlenb >> log2_esz; 678 679 k = env->vstart / max_elems; 680 off = env->vstart % max_elems; 681 682 if (off) { 683 /* load/store rest of elements of current segment pointed by vstart */ 684 for (pos = off; pos < max_elems; pos++, env->vstart++) { 685 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 686 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 687 ra); 688 } 689 k++; 690 } 691 692 /* load/store elements for rest of segments */ 693 for (; k < nf; k++) { 694 for (i = 0; i < max_elems; i++, env->vstart++) { 695 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 696 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 697 } 698 } 699 700 env->vstart = 0; 701 } 702 703 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 704 void HELPER(NAME)(void *vd, target_ulong base, \ 705 CPURISCVState *env, uint32_t desc) \ 706 { \ 707 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 708 ctzl(sizeof(ETYPE)), GETPC()); \ 709 } 710 711 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 712 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 713 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 714 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 715 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 716 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 717 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 718 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 719 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 720 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 721 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 722 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 723 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 724 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 725 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 726 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 727 728 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 729 void HELPER(NAME)(void *vd, target_ulong base, \ 730 CPURISCVState *env, uint32_t desc) \ 731 { \ 732 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 733 ctzl(sizeof(ETYPE)), GETPC()); \ 734 } 735 736 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 737 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 738 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 739 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 740 741 /* 742 * Vector Integer Arithmetic Instructions 743 */ 744 745 /* expand macro args before macro */ 746 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 747 748 /* (TD, T1, T2, TX1, TX2) */ 749 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 750 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 751 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 752 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 753 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 754 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 755 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 756 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 757 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 758 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 759 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 760 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 761 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 762 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 763 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 764 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 765 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 766 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 767 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 768 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 769 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 770 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 771 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 772 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 773 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 774 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 775 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 776 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 777 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 778 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 779 780 /* operation of two vector elements */ 781 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 782 783 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 784 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 785 { \ 786 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 787 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 788 *((TD *)vd + HD(i)) = OP(s2, s1); \ 789 } 790 #define DO_SUB(N, M) (N - M) 791 #define DO_RSUB(N, M) (M - N) 792 793 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 794 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 795 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 796 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 797 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 798 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 799 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 800 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 801 802 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 803 CPURISCVState *env, uint32_t desc, 804 opivv2_fn *fn, uint32_t esz) 805 { 806 uint32_t vm = vext_vm(desc); 807 uint32_t vl = env->vl; 808 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 809 uint32_t vta = vext_vta(desc); 810 uint32_t vma = vext_vma(desc); 811 uint32_t i; 812 813 for (i = env->vstart; i < vl; i++) { 814 if (!vm && !vext_elem_mask(v0, i)) { 815 /* set masked-off elements to 1s */ 816 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 817 continue; 818 } 819 fn(vd, vs1, vs2, i); 820 } 821 env->vstart = 0; 822 /* set tail elements to 1s */ 823 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 824 } 825 826 /* generate the helpers for OPIVV */ 827 #define GEN_VEXT_VV(NAME, ESZ) \ 828 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 829 void *vs2, CPURISCVState *env, \ 830 uint32_t desc) \ 831 { \ 832 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 833 do_##NAME, ESZ); \ 834 } 835 836 GEN_VEXT_VV(vadd_vv_b, 1) 837 GEN_VEXT_VV(vadd_vv_h, 2) 838 GEN_VEXT_VV(vadd_vv_w, 4) 839 GEN_VEXT_VV(vadd_vv_d, 8) 840 GEN_VEXT_VV(vsub_vv_b, 1) 841 GEN_VEXT_VV(vsub_vv_h, 2) 842 GEN_VEXT_VV(vsub_vv_w, 4) 843 GEN_VEXT_VV(vsub_vv_d, 8) 844 845 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 846 847 /* 848 * (T1)s1 gives the real operator type. 849 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 850 */ 851 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 852 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 853 { \ 854 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 855 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 856 } 857 858 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 859 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 860 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 861 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 862 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 863 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 864 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 865 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 866 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 867 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 868 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 869 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 870 871 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 872 CPURISCVState *env, uint32_t desc, 873 opivx2_fn fn, uint32_t esz) 874 { 875 uint32_t vm = vext_vm(desc); 876 uint32_t vl = env->vl; 877 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 878 uint32_t vta = vext_vta(desc); 879 uint32_t vma = vext_vma(desc); 880 uint32_t i; 881 882 for (i = env->vstart; i < vl; i++) { 883 if (!vm && !vext_elem_mask(v0, i)) { 884 /* set masked-off elements to 1s */ 885 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 886 continue; 887 } 888 fn(vd, s1, vs2, i); 889 } 890 env->vstart = 0; 891 /* set tail elements to 1s */ 892 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 893 } 894 895 /* generate the helpers for OPIVX */ 896 #define GEN_VEXT_VX(NAME, ESZ) \ 897 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 898 void *vs2, CPURISCVState *env, \ 899 uint32_t desc) \ 900 { \ 901 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 902 do_##NAME, ESZ); \ 903 } 904 905 GEN_VEXT_VX(vadd_vx_b, 1) 906 GEN_VEXT_VX(vadd_vx_h, 2) 907 GEN_VEXT_VX(vadd_vx_w, 4) 908 GEN_VEXT_VX(vadd_vx_d, 8) 909 GEN_VEXT_VX(vsub_vx_b, 1) 910 GEN_VEXT_VX(vsub_vx_h, 2) 911 GEN_VEXT_VX(vsub_vx_w, 4) 912 GEN_VEXT_VX(vsub_vx_d, 8) 913 GEN_VEXT_VX(vrsub_vx_b, 1) 914 GEN_VEXT_VX(vrsub_vx_h, 2) 915 GEN_VEXT_VX(vrsub_vx_w, 4) 916 GEN_VEXT_VX(vrsub_vx_d, 8) 917 918 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 919 { 920 intptr_t oprsz = simd_oprsz(desc); 921 intptr_t i; 922 923 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 924 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 925 } 926 } 927 928 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 929 { 930 intptr_t oprsz = simd_oprsz(desc); 931 intptr_t i; 932 933 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 934 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 935 } 936 } 937 938 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 939 { 940 intptr_t oprsz = simd_oprsz(desc); 941 intptr_t i; 942 943 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 944 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 945 } 946 } 947 948 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 949 { 950 intptr_t oprsz = simd_oprsz(desc); 951 intptr_t i; 952 953 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 954 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 955 } 956 } 957 958 /* Vector Widening Integer Add/Subtract */ 959 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 960 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 961 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 962 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 963 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 964 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 965 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 966 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 967 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 968 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 969 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 970 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 971 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 972 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 973 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 974 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 975 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 976 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 977 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 978 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 979 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 980 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 981 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 982 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 983 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 984 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 985 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 986 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 987 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 988 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 989 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 990 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 991 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 992 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 993 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 994 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 995 GEN_VEXT_VV(vwaddu_vv_b, 2) 996 GEN_VEXT_VV(vwaddu_vv_h, 4) 997 GEN_VEXT_VV(vwaddu_vv_w, 8) 998 GEN_VEXT_VV(vwsubu_vv_b, 2) 999 GEN_VEXT_VV(vwsubu_vv_h, 4) 1000 GEN_VEXT_VV(vwsubu_vv_w, 8) 1001 GEN_VEXT_VV(vwadd_vv_b, 2) 1002 GEN_VEXT_VV(vwadd_vv_h, 4) 1003 GEN_VEXT_VV(vwadd_vv_w, 8) 1004 GEN_VEXT_VV(vwsub_vv_b, 2) 1005 GEN_VEXT_VV(vwsub_vv_h, 4) 1006 GEN_VEXT_VV(vwsub_vv_w, 8) 1007 GEN_VEXT_VV(vwaddu_wv_b, 2) 1008 GEN_VEXT_VV(vwaddu_wv_h, 4) 1009 GEN_VEXT_VV(vwaddu_wv_w, 8) 1010 GEN_VEXT_VV(vwsubu_wv_b, 2) 1011 GEN_VEXT_VV(vwsubu_wv_h, 4) 1012 GEN_VEXT_VV(vwsubu_wv_w, 8) 1013 GEN_VEXT_VV(vwadd_wv_b, 2) 1014 GEN_VEXT_VV(vwadd_wv_h, 4) 1015 GEN_VEXT_VV(vwadd_wv_w, 8) 1016 GEN_VEXT_VV(vwsub_wv_b, 2) 1017 GEN_VEXT_VV(vwsub_wv_h, 4) 1018 GEN_VEXT_VV(vwsub_wv_w, 8) 1019 1020 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1021 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1022 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1023 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1024 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1025 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1026 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1027 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1028 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1029 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1030 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1031 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1032 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1033 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1034 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1035 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1036 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1037 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1038 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1039 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1040 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1041 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1042 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1043 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1044 GEN_VEXT_VX(vwaddu_vx_b, 2) 1045 GEN_VEXT_VX(vwaddu_vx_h, 4) 1046 GEN_VEXT_VX(vwaddu_vx_w, 8) 1047 GEN_VEXT_VX(vwsubu_vx_b, 2) 1048 GEN_VEXT_VX(vwsubu_vx_h, 4) 1049 GEN_VEXT_VX(vwsubu_vx_w, 8) 1050 GEN_VEXT_VX(vwadd_vx_b, 2) 1051 GEN_VEXT_VX(vwadd_vx_h, 4) 1052 GEN_VEXT_VX(vwadd_vx_w, 8) 1053 GEN_VEXT_VX(vwsub_vx_b, 2) 1054 GEN_VEXT_VX(vwsub_vx_h, 4) 1055 GEN_VEXT_VX(vwsub_vx_w, 8) 1056 GEN_VEXT_VX(vwaddu_wx_b, 2) 1057 GEN_VEXT_VX(vwaddu_wx_h, 4) 1058 GEN_VEXT_VX(vwaddu_wx_w, 8) 1059 GEN_VEXT_VX(vwsubu_wx_b, 2) 1060 GEN_VEXT_VX(vwsubu_wx_h, 4) 1061 GEN_VEXT_VX(vwsubu_wx_w, 8) 1062 GEN_VEXT_VX(vwadd_wx_b, 2) 1063 GEN_VEXT_VX(vwadd_wx_h, 4) 1064 GEN_VEXT_VX(vwadd_wx_w, 8) 1065 GEN_VEXT_VX(vwsub_wx_b, 2) 1066 GEN_VEXT_VX(vwsub_wx_h, 4) 1067 GEN_VEXT_VX(vwsub_wx_w, 8) 1068 1069 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1070 #define DO_VADC(N, M, C) (N + M + C) 1071 #define DO_VSBC(N, M, C) (N - M - C) 1072 1073 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1074 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1075 CPURISCVState *env, uint32_t desc) \ 1076 { \ 1077 uint32_t vl = env->vl; \ 1078 uint32_t esz = sizeof(ETYPE); \ 1079 uint32_t total_elems = \ 1080 vext_get_total_elems(env, desc, esz); \ 1081 uint32_t vta = vext_vta(desc); \ 1082 uint32_t i; \ 1083 \ 1084 for (i = env->vstart; i < vl; i++) { \ 1085 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1086 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1087 ETYPE carry = vext_elem_mask(v0, i); \ 1088 \ 1089 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1090 } \ 1091 env->vstart = 0; \ 1092 /* set tail elements to 1s */ \ 1093 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1094 } 1095 1096 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1097 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1098 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1099 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1100 1101 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1102 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1103 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1104 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1105 1106 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1107 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1108 CPURISCVState *env, uint32_t desc) \ 1109 { \ 1110 uint32_t vl = env->vl; \ 1111 uint32_t esz = sizeof(ETYPE); \ 1112 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1113 uint32_t vta = vext_vta(desc); \ 1114 uint32_t i; \ 1115 \ 1116 for (i = env->vstart; i < vl; i++) { \ 1117 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1118 ETYPE carry = vext_elem_mask(v0, i); \ 1119 \ 1120 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1121 } \ 1122 env->vstart = 0; \ 1123 /* set tail elements to 1s */ \ 1124 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1125 } 1126 1127 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1128 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1129 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1130 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1131 1132 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1133 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1134 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1135 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1136 1137 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1138 (__typeof(N))(N + M) < N) 1139 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1140 1141 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1142 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1143 CPURISCVState *env, uint32_t desc) \ 1144 { \ 1145 uint32_t vl = env->vl; \ 1146 uint32_t vm = vext_vm(desc); \ 1147 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1148 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1149 uint32_t i; \ 1150 \ 1151 for (i = env->vstart; i < vl; i++) { \ 1152 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1153 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1154 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1155 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1156 } \ 1157 env->vstart = 0; \ 1158 /* 1159 * mask destination register are always tail-agnostic 1160 * set tail elements to 1s 1161 */ \ 1162 if (vta_all_1s) { \ 1163 for (; i < total_elems; i++) { \ 1164 vext_set_elem_mask(vd, i, 1); \ 1165 } \ 1166 } \ 1167 } 1168 1169 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1170 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1171 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1172 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1173 1174 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1175 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1176 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1177 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1178 1179 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1180 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1181 void *vs2, CPURISCVState *env, uint32_t desc) \ 1182 { \ 1183 uint32_t vl = env->vl; \ 1184 uint32_t vm = vext_vm(desc); \ 1185 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1186 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1187 uint32_t i; \ 1188 \ 1189 for (i = env->vstart; i < vl; i++) { \ 1190 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1191 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1192 vext_set_elem_mask(vd, i, \ 1193 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1194 } \ 1195 env->vstart = 0; \ 1196 /* 1197 * mask destination register are always tail-agnostic 1198 * set tail elements to 1s 1199 */ \ 1200 if (vta_all_1s) { \ 1201 for (; i < total_elems; i++) { \ 1202 vext_set_elem_mask(vd, i, 1); \ 1203 } \ 1204 } \ 1205 } 1206 1207 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1208 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1209 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1210 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1211 1212 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1213 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1214 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1215 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1216 1217 /* Vector Bitwise Logical Instructions */ 1218 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1219 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1220 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1221 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1222 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1223 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1224 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1225 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1226 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1227 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1228 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1229 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1230 GEN_VEXT_VV(vand_vv_b, 1) 1231 GEN_VEXT_VV(vand_vv_h, 2) 1232 GEN_VEXT_VV(vand_vv_w, 4) 1233 GEN_VEXT_VV(vand_vv_d, 8) 1234 GEN_VEXT_VV(vor_vv_b, 1) 1235 GEN_VEXT_VV(vor_vv_h, 2) 1236 GEN_VEXT_VV(vor_vv_w, 4) 1237 GEN_VEXT_VV(vor_vv_d, 8) 1238 GEN_VEXT_VV(vxor_vv_b, 1) 1239 GEN_VEXT_VV(vxor_vv_h, 2) 1240 GEN_VEXT_VV(vxor_vv_w, 4) 1241 GEN_VEXT_VV(vxor_vv_d, 8) 1242 1243 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1244 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1245 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1246 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1247 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1248 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1249 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1250 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1251 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1252 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1253 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1254 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1255 GEN_VEXT_VX(vand_vx_b, 1) 1256 GEN_VEXT_VX(vand_vx_h, 2) 1257 GEN_VEXT_VX(vand_vx_w, 4) 1258 GEN_VEXT_VX(vand_vx_d, 8) 1259 GEN_VEXT_VX(vor_vx_b, 1) 1260 GEN_VEXT_VX(vor_vx_h, 2) 1261 GEN_VEXT_VX(vor_vx_w, 4) 1262 GEN_VEXT_VX(vor_vx_d, 8) 1263 GEN_VEXT_VX(vxor_vx_b, 1) 1264 GEN_VEXT_VX(vxor_vx_h, 2) 1265 GEN_VEXT_VX(vxor_vx_w, 4) 1266 GEN_VEXT_VX(vxor_vx_d, 8) 1267 1268 /* Vector Single-Width Bit Shift Instructions */ 1269 #define DO_SLL(N, M) (N << (M)) 1270 #define DO_SRL(N, M) (N >> (M)) 1271 1272 /* generate the helpers for shift instructions with two vector operators */ 1273 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1274 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1275 void *vs2, CPURISCVState *env, uint32_t desc) \ 1276 { \ 1277 uint32_t vm = vext_vm(desc); \ 1278 uint32_t vl = env->vl; \ 1279 uint32_t esz = sizeof(TS1); \ 1280 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1281 uint32_t vta = vext_vta(desc); \ 1282 uint32_t vma = vext_vma(desc); \ 1283 uint32_t i; \ 1284 \ 1285 for (i = env->vstart; i < vl; i++) { \ 1286 if (!vm && !vext_elem_mask(v0, i)) { \ 1287 /* set masked-off elements to 1s */ \ 1288 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1289 continue; \ 1290 } \ 1291 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1292 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1293 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1294 } \ 1295 env->vstart = 0; \ 1296 /* set tail elements to 1s */ \ 1297 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1298 } 1299 1300 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1301 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1302 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1303 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1304 1305 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1306 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1307 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1308 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1309 1310 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1311 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1312 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1313 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1314 1315 /* 1316 * generate the helpers for shift instructions with one vector and one scalar 1317 */ 1318 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1319 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1320 void *vs2, CPURISCVState *env, \ 1321 uint32_t desc) \ 1322 { \ 1323 uint32_t vm = vext_vm(desc); \ 1324 uint32_t vl = env->vl; \ 1325 uint32_t esz = sizeof(TD); \ 1326 uint32_t total_elems = \ 1327 vext_get_total_elems(env, desc, esz); \ 1328 uint32_t vta = vext_vta(desc); \ 1329 uint32_t vma = vext_vma(desc); \ 1330 uint32_t i; \ 1331 \ 1332 for (i = env->vstart; i < vl; i++) { \ 1333 if (!vm && !vext_elem_mask(v0, i)) { \ 1334 /* set masked-off elements to 1s */ \ 1335 vext_set_elems_1s(vd, vma, i * esz, \ 1336 (i + 1) * esz); \ 1337 continue; \ 1338 } \ 1339 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1340 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1341 } \ 1342 env->vstart = 0; \ 1343 /* set tail elements to 1s */ \ 1344 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1345 } 1346 1347 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1348 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1349 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1350 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1351 1352 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1353 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1354 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1355 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1356 1357 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1358 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1359 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1360 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1361 1362 /* Vector Narrowing Integer Right Shift Instructions */ 1363 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1364 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1365 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1366 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1367 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1368 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1369 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1370 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1371 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1372 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1373 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1374 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1375 1376 /* Vector Integer Comparison Instructions */ 1377 #define DO_MSEQ(N, M) (N == M) 1378 #define DO_MSNE(N, M) (N != M) 1379 #define DO_MSLT(N, M) (N < M) 1380 #define DO_MSLE(N, M) (N <= M) 1381 #define DO_MSGT(N, M) (N > M) 1382 1383 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1384 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1385 CPURISCVState *env, uint32_t desc) \ 1386 { \ 1387 uint32_t vm = vext_vm(desc); \ 1388 uint32_t vl = env->vl; \ 1389 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1390 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1391 uint32_t vma = vext_vma(desc); \ 1392 uint32_t i; \ 1393 \ 1394 for (i = env->vstart; i < vl; i++) { \ 1395 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1396 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1397 if (!vm && !vext_elem_mask(v0, i)) { \ 1398 /* set masked-off elements to 1s */ \ 1399 if (vma) { \ 1400 vext_set_elem_mask(vd, i, 1); \ 1401 } \ 1402 continue; \ 1403 } \ 1404 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1405 } \ 1406 env->vstart = 0; \ 1407 /* 1408 * mask destination register are always tail-agnostic 1409 * set tail elements to 1s 1410 */ \ 1411 if (vta_all_1s) { \ 1412 for (; i < total_elems; i++) { \ 1413 vext_set_elem_mask(vd, i, 1); \ 1414 } \ 1415 } \ 1416 } 1417 1418 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1419 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1420 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1421 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1422 1423 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1424 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1425 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1426 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1427 1428 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1429 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1430 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1431 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1432 1433 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1434 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1435 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1436 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1437 1438 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1439 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1440 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1441 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1442 1443 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1444 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1445 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1446 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1447 1448 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1449 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1450 CPURISCVState *env, uint32_t desc) \ 1451 { \ 1452 uint32_t vm = vext_vm(desc); \ 1453 uint32_t vl = env->vl; \ 1454 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1455 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1456 uint32_t vma = vext_vma(desc); \ 1457 uint32_t i; \ 1458 \ 1459 for (i = env->vstart; i < vl; i++) { \ 1460 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1461 if (!vm && !vext_elem_mask(v0, i)) { \ 1462 /* set masked-off elements to 1s */ \ 1463 if (vma) { \ 1464 vext_set_elem_mask(vd, i, 1); \ 1465 } \ 1466 continue; \ 1467 } \ 1468 vext_set_elem_mask(vd, i, \ 1469 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1470 } \ 1471 env->vstart = 0; \ 1472 /* 1473 * mask destination register are always tail-agnostic 1474 * set tail elements to 1s 1475 */ \ 1476 if (vta_all_1s) { \ 1477 for (; i < total_elems; i++) { \ 1478 vext_set_elem_mask(vd, i, 1); \ 1479 } \ 1480 } \ 1481 } 1482 1483 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1484 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1485 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1486 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1487 1488 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1489 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1490 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1491 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1492 1493 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1494 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1495 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1496 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1497 1498 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1499 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1500 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1501 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1502 1503 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1504 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1505 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1506 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1507 1508 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1509 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1510 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1511 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1512 1513 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1514 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1515 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1516 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1517 1518 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1519 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1520 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1521 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1522 1523 /* Vector Integer Min/Max Instructions */ 1524 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1525 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1526 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1527 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1528 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1529 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1530 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1531 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1532 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1533 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1534 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1535 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1536 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1537 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1538 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1539 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1540 GEN_VEXT_VV(vminu_vv_b, 1) 1541 GEN_VEXT_VV(vminu_vv_h, 2) 1542 GEN_VEXT_VV(vminu_vv_w, 4) 1543 GEN_VEXT_VV(vminu_vv_d, 8) 1544 GEN_VEXT_VV(vmin_vv_b, 1) 1545 GEN_VEXT_VV(vmin_vv_h, 2) 1546 GEN_VEXT_VV(vmin_vv_w, 4) 1547 GEN_VEXT_VV(vmin_vv_d, 8) 1548 GEN_VEXT_VV(vmaxu_vv_b, 1) 1549 GEN_VEXT_VV(vmaxu_vv_h, 2) 1550 GEN_VEXT_VV(vmaxu_vv_w, 4) 1551 GEN_VEXT_VV(vmaxu_vv_d, 8) 1552 GEN_VEXT_VV(vmax_vv_b, 1) 1553 GEN_VEXT_VV(vmax_vv_h, 2) 1554 GEN_VEXT_VV(vmax_vv_w, 4) 1555 GEN_VEXT_VV(vmax_vv_d, 8) 1556 1557 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1558 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1559 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1560 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1561 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1562 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1563 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1564 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1565 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1566 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1567 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1568 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1569 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1570 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1571 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1572 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1573 GEN_VEXT_VX(vminu_vx_b, 1) 1574 GEN_VEXT_VX(vminu_vx_h, 2) 1575 GEN_VEXT_VX(vminu_vx_w, 4) 1576 GEN_VEXT_VX(vminu_vx_d, 8) 1577 GEN_VEXT_VX(vmin_vx_b, 1) 1578 GEN_VEXT_VX(vmin_vx_h, 2) 1579 GEN_VEXT_VX(vmin_vx_w, 4) 1580 GEN_VEXT_VX(vmin_vx_d, 8) 1581 GEN_VEXT_VX(vmaxu_vx_b, 1) 1582 GEN_VEXT_VX(vmaxu_vx_h, 2) 1583 GEN_VEXT_VX(vmaxu_vx_w, 4) 1584 GEN_VEXT_VX(vmaxu_vx_d, 8) 1585 GEN_VEXT_VX(vmax_vx_b, 1) 1586 GEN_VEXT_VX(vmax_vx_h, 2) 1587 GEN_VEXT_VX(vmax_vx_w, 4) 1588 GEN_VEXT_VX(vmax_vx_d, 8) 1589 1590 /* Vector Single-Width Integer Multiply Instructions */ 1591 #define DO_MUL(N, M) (N * M) 1592 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1593 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1594 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1595 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1596 GEN_VEXT_VV(vmul_vv_b, 1) 1597 GEN_VEXT_VV(vmul_vv_h, 2) 1598 GEN_VEXT_VV(vmul_vv_w, 4) 1599 GEN_VEXT_VV(vmul_vv_d, 8) 1600 1601 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1602 { 1603 return (int16_t)s2 * (int16_t)s1 >> 8; 1604 } 1605 1606 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1607 { 1608 return (int32_t)s2 * (int32_t)s1 >> 16; 1609 } 1610 1611 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1612 { 1613 return (int64_t)s2 * (int64_t)s1 >> 32; 1614 } 1615 1616 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1617 { 1618 uint64_t hi_64, lo_64; 1619 1620 muls64(&lo_64, &hi_64, s1, s2); 1621 return hi_64; 1622 } 1623 1624 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1625 { 1626 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1627 } 1628 1629 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1630 { 1631 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1632 } 1633 1634 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1635 { 1636 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1637 } 1638 1639 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1640 { 1641 uint64_t hi_64, lo_64; 1642 1643 mulu64(&lo_64, &hi_64, s2, s1); 1644 return hi_64; 1645 } 1646 1647 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1648 { 1649 return (int16_t)s2 * (uint16_t)s1 >> 8; 1650 } 1651 1652 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1653 { 1654 return (int32_t)s2 * (uint32_t)s1 >> 16; 1655 } 1656 1657 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1658 { 1659 return (int64_t)s2 * (uint64_t)s1 >> 32; 1660 } 1661 1662 /* 1663 * Let A = signed operand, 1664 * B = unsigned operand 1665 * P = mulu64(A, B), unsigned product 1666 * 1667 * LET X = 2 ** 64 - A, 2's complement of A 1668 * SP = signed product 1669 * THEN 1670 * IF A < 0 1671 * SP = -X * B 1672 * = -(2 ** 64 - A) * B 1673 * = A * B - 2 ** 64 * B 1674 * = P - 2 ** 64 * B 1675 * ELSE 1676 * SP = P 1677 * THEN 1678 * HI_P -= (A < 0 ? B : 0) 1679 */ 1680 1681 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1682 { 1683 uint64_t hi_64, lo_64; 1684 1685 mulu64(&lo_64, &hi_64, s2, s1); 1686 1687 hi_64 -= s2 < 0 ? s1 : 0; 1688 return hi_64; 1689 } 1690 1691 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1692 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1693 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1694 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1695 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1696 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1697 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1698 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1699 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1700 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1701 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1702 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1703 GEN_VEXT_VV(vmulh_vv_b, 1) 1704 GEN_VEXT_VV(vmulh_vv_h, 2) 1705 GEN_VEXT_VV(vmulh_vv_w, 4) 1706 GEN_VEXT_VV(vmulh_vv_d, 8) 1707 GEN_VEXT_VV(vmulhu_vv_b, 1) 1708 GEN_VEXT_VV(vmulhu_vv_h, 2) 1709 GEN_VEXT_VV(vmulhu_vv_w, 4) 1710 GEN_VEXT_VV(vmulhu_vv_d, 8) 1711 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1712 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1713 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1714 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1715 1716 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1717 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1718 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1719 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1720 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1721 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1722 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1723 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1724 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1725 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1726 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1727 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1728 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1729 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1730 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1731 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1732 GEN_VEXT_VX(vmul_vx_b, 1) 1733 GEN_VEXT_VX(vmul_vx_h, 2) 1734 GEN_VEXT_VX(vmul_vx_w, 4) 1735 GEN_VEXT_VX(vmul_vx_d, 8) 1736 GEN_VEXT_VX(vmulh_vx_b, 1) 1737 GEN_VEXT_VX(vmulh_vx_h, 2) 1738 GEN_VEXT_VX(vmulh_vx_w, 4) 1739 GEN_VEXT_VX(vmulh_vx_d, 8) 1740 GEN_VEXT_VX(vmulhu_vx_b, 1) 1741 GEN_VEXT_VX(vmulhu_vx_h, 2) 1742 GEN_VEXT_VX(vmulhu_vx_w, 4) 1743 GEN_VEXT_VX(vmulhu_vx_d, 8) 1744 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1745 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1746 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1747 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1748 1749 /* Vector Integer Divide Instructions */ 1750 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1751 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1752 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1753 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1754 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1755 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1756 1757 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1758 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1759 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1760 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1761 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1762 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1763 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1764 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1765 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1766 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1767 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1768 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1769 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1770 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1771 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1772 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1773 GEN_VEXT_VV(vdivu_vv_b, 1) 1774 GEN_VEXT_VV(vdivu_vv_h, 2) 1775 GEN_VEXT_VV(vdivu_vv_w, 4) 1776 GEN_VEXT_VV(vdivu_vv_d, 8) 1777 GEN_VEXT_VV(vdiv_vv_b, 1) 1778 GEN_VEXT_VV(vdiv_vv_h, 2) 1779 GEN_VEXT_VV(vdiv_vv_w, 4) 1780 GEN_VEXT_VV(vdiv_vv_d, 8) 1781 GEN_VEXT_VV(vremu_vv_b, 1) 1782 GEN_VEXT_VV(vremu_vv_h, 2) 1783 GEN_VEXT_VV(vremu_vv_w, 4) 1784 GEN_VEXT_VV(vremu_vv_d, 8) 1785 GEN_VEXT_VV(vrem_vv_b, 1) 1786 GEN_VEXT_VV(vrem_vv_h, 2) 1787 GEN_VEXT_VV(vrem_vv_w, 4) 1788 GEN_VEXT_VV(vrem_vv_d, 8) 1789 1790 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1791 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1792 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1793 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1794 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1795 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1796 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1797 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1798 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1799 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1800 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1801 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1802 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1803 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1804 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1805 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1806 GEN_VEXT_VX(vdivu_vx_b, 1) 1807 GEN_VEXT_VX(vdivu_vx_h, 2) 1808 GEN_VEXT_VX(vdivu_vx_w, 4) 1809 GEN_VEXT_VX(vdivu_vx_d, 8) 1810 GEN_VEXT_VX(vdiv_vx_b, 1) 1811 GEN_VEXT_VX(vdiv_vx_h, 2) 1812 GEN_VEXT_VX(vdiv_vx_w, 4) 1813 GEN_VEXT_VX(vdiv_vx_d, 8) 1814 GEN_VEXT_VX(vremu_vx_b, 1) 1815 GEN_VEXT_VX(vremu_vx_h, 2) 1816 GEN_VEXT_VX(vremu_vx_w, 4) 1817 GEN_VEXT_VX(vremu_vx_d, 8) 1818 GEN_VEXT_VX(vrem_vx_b, 1) 1819 GEN_VEXT_VX(vrem_vx_h, 2) 1820 GEN_VEXT_VX(vrem_vx_w, 4) 1821 GEN_VEXT_VX(vrem_vx_d, 8) 1822 1823 /* Vector Widening Integer Multiply Instructions */ 1824 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1825 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1826 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1827 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1828 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1829 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1830 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1831 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1832 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1833 GEN_VEXT_VV(vwmul_vv_b, 2) 1834 GEN_VEXT_VV(vwmul_vv_h, 4) 1835 GEN_VEXT_VV(vwmul_vv_w, 8) 1836 GEN_VEXT_VV(vwmulu_vv_b, 2) 1837 GEN_VEXT_VV(vwmulu_vv_h, 4) 1838 GEN_VEXT_VV(vwmulu_vv_w, 8) 1839 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1840 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1841 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1842 1843 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1844 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1845 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1846 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1847 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1848 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1849 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1850 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1851 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1852 GEN_VEXT_VX(vwmul_vx_b, 2) 1853 GEN_VEXT_VX(vwmul_vx_h, 4) 1854 GEN_VEXT_VX(vwmul_vx_w, 8) 1855 GEN_VEXT_VX(vwmulu_vx_b, 2) 1856 GEN_VEXT_VX(vwmulu_vx_h, 4) 1857 GEN_VEXT_VX(vwmulu_vx_w, 8) 1858 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1859 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1860 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1861 1862 /* Vector Single-Width Integer Multiply-Add Instructions */ 1863 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1864 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1865 { \ 1866 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1867 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1868 TD d = *((TD *)vd + HD(i)); \ 1869 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1870 } 1871 1872 #define DO_MACC(N, M, D) (M * N + D) 1873 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1874 #define DO_MADD(N, M, D) (M * D + N) 1875 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1876 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1877 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1878 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1879 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1880 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1881 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1882 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1883 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1884 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1885 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1886 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1887 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1888 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1889 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1890 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1891 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1892 GEN_VEXT_VV(vmacc_vv_b, 1) 1893 GEN_VEXT_VV(vmacc_vv_h, 2) 1894 GEN_VEXT_VV(vmacc_vv_w, 4) 1895 GEN_VEXT_VV(vmacc_vv_d, 8) 1896 GEN_VEXT_VV(vnmsac_vv_b, 1) 1897 GEN_VEXT_VV(vnmsac_vv_h, 2) 1898 GEN_VEXT_VV(vnmsac_vv_w, 4) 1899 GEN_VEXT_VV(vnmsac_vv_d, 8) 1900 GEN_VEXT_VV(vmadd_vv_b, 1) 1901 GEN_VEXT_VV(vmadd_vv_h, 2) 1902 GEN_VEXT_VV(vmadd_vv_w, 4) 1903 GEN_VEXT_VV(vmadd_vv_d, 8) 1904 GEN_VEXT_VV(vnmsub_vv_b, 1) 1905 GEN_VEXT_VV(vnmsub_vv_h, 2) 1906 GEN_VEXT_VV(vnmsub_vv_w, 4) 1907 GEN_VEXT_VV(vnmsub_vv_d, 8) 1908 1909 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1910 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1911 { \ 1912 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1913 TD d = *((TD *)vd + HD(i)); \ 1914 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1915 } 1916 1917 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1918 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1919 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1920 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1921 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1922 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1923 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1924 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1925 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1926 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1927 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1928 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1929 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1930 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1931 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1932 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1933 GEN_VEXT_VX(vmacc_vx_b, 1) 1934 GEN_VEXT_VX(vmacc_vx_h, 2) 1935 GEN_VEXT_VX(vmacc_vx_w, 4) 1936 GEN_VEXT_VX(vmacc_vx_d, 8) 1937 GEN_VEXT_VX(vnmsac_vx_b, 1) 1938 GEN_VEXT_VX(vnmsac_vx_h, 2) 1939 GEN_VEXT_VX(vnmsac_vx_w, 4) 1940 GEN_VEXT_VX(vnmsac_vx_d, 8) 1941 GEN_VEXT_VX(vmadd_vx_b, 1) 1942 GEN_VEXT_VX(vmadd_vx_h, 2) 1943 GEN_VEXT_VX(vmadd_vx_w, 4) 1944 GEN_VEXT_VX(vmadd_vx_d, 8) 1945 GEN_VEXT_VX(vnmsub_vx_b, 1) 1946 GEN_VEXT_VX(vnmsub_vx_h, 2) 1947 GEN_VEXT_VX(vnmsub_vx_w, 4) 1948 GEN_VEXT_VX(vnmsub_vx_d, 8) 1949 1950 /* Vector Widening Integer Multiply-Add Instructions */ 1951 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1952 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1953 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1954 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1955 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1956 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1957 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1958 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1959 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1960 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1961 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1962 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1963 GEN_VEXT_VV(vwmacc_vv_b, 2) 1964 GEN_VEXT_VV(vwmacc_vv_h, 4) 1965 GEN_VEXT_VV(vwmacc_vv_w, 8) 1966 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1967 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1968 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1969 1970 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1971 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1972 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1973 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1974 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1975 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1976 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1977 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1978 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1979 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1980 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1981 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1982 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1983 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1984 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1985 GEN_VEXT_VX(vwmacc_vx_b, 2) 1986 GEN_VEXT_VX(vwmacc_vx_h, 4) 1987 GEN_VEXT_VX(vwmacc_vx_w, 8) 1988 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1989 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1990 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1991 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1992 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1993 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1994 1995 /* Vector Integer Merge and Move Instructions */ 1996 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1997 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1998 uint32_t desc) \ 1999 { \ 2000 uint32_t vl = env->vl; \ 2001 uint32_t esz = sizeof(ETYPE); \ 2002 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2003 uint32_t vta = vext_vta(desc); \ 2004 uint32_t i; \ 2005 \ 2006 for (i = env->vstart; i < vl; i++) { \ 2007 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2008 *((ETYPE *)vd + H(i)) = s1; \ 2009 } \ 2010 env->vstart = 0; \ 2011 /* set tail elements to 1s */ \ 2012 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2013 } 2014 2015 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2016 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2017 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2018 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2019 2020 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2021 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2022 uint32_t desc) \ 2023 { \ 2024 uint32_t vl = env->vl; \ 2025 uint32_t esz = sizeof(ETYPE); \ 2026 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2027 uint32_t vta = vext_vta(desc); \ 2028 uint32_t i; \ 2029 \ 2030 for (i = env->vstart; i < vl; i++) { \ 2031 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2032 } \ 2033 env->vstart = 0; \ 2034 /* set tail elements to 1s */ \ 2035 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2036 } 2037 2038 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2039 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2040 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2041 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2042 2043 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2044 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2045 CPURISCVState *env, uint32_t desc) \ 2046 { \ 2047 uint32_t vl = env->vl; \ 2048 uint32_t esz = sizeof(ETYPE); \ 2049 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2050 uint32_t vta = vext_vta(desc); \ 2051 uint32_t i; \ 2052 \ 2053 for (i = env->vstart; i < vl; i++) { \ 2054 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2055 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2056 } \ 2057 env->vstart = 0; \ 2058 /* set tail elements to 1s */ \ 2059 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2060 } 2061 2062 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2063 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2064 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2065 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2066 2067 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2068 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2069 void *vs2, CPURISCVState *env, uint32_t desc) \ 2070 { \ 2071 uint32_t vl = env->vl; \ 2072 uint32_t esz = sizeof(ETYPE); \ 2073 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2074 uint32_t vta = vext_vta(desc); \ 2075 uint32_t i; \ 2076 \ 2077 for (i = env->vstart; i < vl; i++) { \ 2078 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2079 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2080 (ETYPE)(target_long)s1); \ 2081 *((ETYPE *)vd + H(i)) = d; \ 2082 } \ 2083 env->vstart = 0; \ 2084 /* set tail elements to 1s */ \ 2085 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2086 } 2087 2088 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2089 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2090 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2091 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2092 2093 /* 2094 * Vector Fixed-Point Arithmetic Instructions 2095 */ 2096 2097 /* Vector Single-Width Saturating Add and Subtract */ 2098 2099 /* 2100 * As fixed point instructions probably have round mode and saturation, 2101 * define common macros for fixed point here. 2102 */ 2103 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2104 CPURISCVState *env, int vxrm); 2105 2106 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2107 static inline void \ 2108 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2109 CPURISCVState *env, int vxrm) \ 2110 { \ 2111 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2112 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2113 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2114 } 2115 2116 static inline void 2117 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2118 CPURISCVState *env, 2119 uint32_t vl, uint32_t vm, int vxrm, 2120 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2121 { 2122 for (uint32_t i = env->vstart; i < vl; i++) { 2123 if (!vm && !vext_elem_mask(v0, i)) { 2124 /* set masked-off elements to 1s */ 2125 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2126 continue; 2127 } 2128 fn(vd, vs1, vs2, i, env, vxrm); 2129 } 2130 env->vstart = 0; 2131 } 2132 2133 static inline void 2134 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2135 CPURISCVState *env, 2136 uint32_t desc, 2137 opivv2_rm_fn *fn, uint32_t esz) 2138 { 2139 uint32_t vm = vext_vm(desc); 2140 uint32_t vl = env->vl; 2141 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2142 uint32_t vta = vext_vta(desc); 2143 uint32_t vma = vext_vma(desc); 2144 2145 switch (env->vxrm) { 2146 case 0: /* rnu */ 2147 vext_vv_rm_1(vd, v0, vs1, vs2, 2148 env, vl, vm, 0, fn, vma, esz); 2149 break; 2150 case 1: /* rne */ 2151 vext_vv_rm_1(vd, v0, vs1, vs2, 2152 env, vl, vm, 1, fn, vma, esz); 2153 break; 2154 case 2: /* rdn */ 2155 vext_vv_rm_1(vd, v0, vs1, vs2, 2156 env, vl, vm, 2, fn, vma, esz); 2157 break; 2158 default: /* rod */ 2159 vext_vv_rm_1(vd, v0, vs1, vs2, 2160 env, vl, vm, 3, fn, vma, esz); 2161 break; 2162 } 2163 /* set tail elements to 1s */ 2164 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2165 } 2166 2167 /* generate helpers for fixed point instructions with OPIVV format */ 2168 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2169 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2170 CPURISCVState *env, uint32_t desc) \ 2171 { \ 2172 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2173 do_##NAME, ESZ); \ 2174 } 2175 2176 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2177 uint8_t b) 2178 { 2179 uint8_t res = a + b; 2180 if (res < a) { 2181 res = UINT8_MAX; 2182 env->vxsat = 0x1; 2183 } 2184 return res; 2185 } 2186 2187 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2188 uint16_t b) 2189 { 2190 uint16_t res = a + b; 2191 if (res < a) { 2192 res = UINT16_MAX; 2193 env->vxsat = 0x1; 2194 } 2195 return res; 2196 } 2197 2198 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2199 uint32_t b) 2200 { 2201 uint32_t res = a + b; 2202 if (res < a) { 2203 res = UINT32_MAX; 2204 env->vxsat = 0x1; 2205 } 2206 return res; 2207 } 2208 2209 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2210 uint64_t b) 2211 { 2212 uint64_t res = a + b; 2213 if (res < a) { 2214 res = UINT64_MAX; 2215 env->vxsat = 0x1; 2216 } 2217 return res; 2218 } 2219 2220 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2221 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2222 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2223 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2224 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2225 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2226 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2227 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2228 2229 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2230 CPURISCVState *env, int vxrm); 2231 2232 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2233 static inline void \ 2234 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2235 CPURISCVState *env, int vxrm) \ 2236 { \ 2237 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2238 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2239 } 2240 2241 static inline void 2242 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2243 CPURISCVState *env, 2244 uint32_t vl, uint32_t vm, int vxrm, 2245 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2246 { 2247 for (uint32_t i = env->vstart; i < vl; i++) { 2248 if (!vm && !vext_elem_mask(v0, i)) { 2249 /* set masked-off elements to 1s */ 2250 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2251 continue; 2252 } 2253 fn(vd, s1, vs2, i, env, vxrm); 2254 } 2255 env->vstart = 0; 2256 } 2257 2258 static inline void 2259 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2260 CPURISCVState *env, 2261 uint32_t desc, 2262 opivx2_rm_fn *fn, uint32_t esz) 2263 { 2264 uint32_t vm = vext_vm(desc); 2265 uint32_t vl = env->vl; 2266 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2267 uint32_t vta = vext_vta(desc); 2268 uint32_t vma = vext_vma(desc); 2269 2270 switch (env->vxrm) { 2271 case 0: /* rnu */ 2272 vext_vx_rm_1(vd, v0, s1, vs2, 2273 env, vl, vm, 0, fn, vma, esz); 2274 break; 2275 case 1: /* rne */ 2276 vext_vx_rm_1(vd, v0, s1, vs2, 2277 env, vl, vm, 1, fn, vma, esz); 2278 break; 2279 case 2: /* rdn */ 2280 vext_vx_rm_1(vd, v0, s1, vs2, 2281 env, vl, vm, 2, fn, vma, esz); 2282 break; 2283 default: /* rod */ 2284 vext_vx_rm_1(vd, v0, s1, vs2, 2285 env, vl, vm, 3, fn, vma, esz); 2286 break; 2287 } 2288 /* set tail elements to 1s */ 2289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2290 } 2291 2292 /* generate helpers for fixed point instructions with OPIVX format */ 2293 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2294 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2295 void *vs2, CPURISCVState *env, \ 2296 uint32_t desc) \ 2297 { \ 2298 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2299 do_##NAME, ESZ); \ 2300 } 2301 2302 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2303 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2304 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2305 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2306 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2307 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2308 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2309 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2310 2311 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2312 { 2313 int8_t res = a + b; 2314 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2315 res = a > 0 ? INT8_MAX : INT8_MIN; 2316 env->vxsat = 0x1; 2317 } 2318 return res; 2319 } 2320 2321 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2322 int16_t b) 2323 { 2324 int16_t res = a + b; 2325 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2326 res = a > 0 ? INT16_MAX : INT16_MIN; 2327 env->vxsat = 0x1; 2328 } 2329 return res; 2330 } 2331 2332 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2333 int32_t b) 2334 { 2335 int32_t res = a + b; 2336 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2337 res = a > 0 ? INT32_MAX : INT32_MIN; 2338 env->vxsat = 0x1; 2339 } 2340 return res; 2341 } 2342 2343 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2344 int64_t b) 2345 { 2346 int64_t res = a + b; 2347 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2348 res = a > 0 ? INT64_MAX : INT64_MIN; 2349 env->vxsat = 0x1; 2350 } 2351 return res; 2352 } 2353 2354 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2355 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2356 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2357 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2358 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2359 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2360 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2361 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2362 2363 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2364 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2365 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2366 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2367 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2368 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2369 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2370 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2371 2372 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2373 uint8_t b) 2374 { 2375 uint8_t res = a - b; 2376 if (res > a) { 2377 res = 0; 2378 env->vxsat = 0x1; 2379 } 2380 return res; 2381 } 2382 2383 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2384 uint16_t b) 2385 { 2386 uint16_t res = a - b; 2387 if (res > a) { 2388 res = 0; 2389 env->vxsat = 0x1; 2390 } 2391 return res; 2392 } 2393 2394 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2395 uint32_t b) 2396 { 2397 uint32_t res = a - b; 2398 if (res > a) { 2399 res = 0; 2400 env->vxsat = 0x1; 2401 } 2402 return res; 2403 } 2404 2405 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2406 uint64_t b) 2407 { 2408 uint64_t res = a - b; 2409 if (res > a) { 2410 res = 0; 2411 env->vxsat = 0x1; 2412 } 2413 return res; 2414 } 2415 2416 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2417 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2418 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2419 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2420 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2421 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2422 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2423 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2424 2425 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2426 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2427 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2428 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2429 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2430 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2431 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2432 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2433 2434 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2435 { 2436 int8_t res = a - b; 2437 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2438 res = a >= 0 ? INT8_MAX : INT8_MIN; 2439 env->vxsat = 0x1; 2440 } 2441 return res; 2442 } 2443 2444 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2445 int16_t b) 2446 { 2447 int16_t res = a - b; 2448 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2449 res = a >= 0 ? INT16_MAX : INT16_MIN; 2450 env->vxsat = 0x1; 2451 } 2452 return res; 2453 } 2454 2455 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2456 int32_t b) 2457 { 2458 int32_t res = a - b; 2459 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2460 res = a >= 0 ? INT32_MAX : INT32_MIN; 2461 env->vxsat = 0x1; 2462 } 2463 return res; 2464 } 2465 2466 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2467 int64_t b) 2468 { 2469 int64_t res = a - b; 2470 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2471 res = a >= 0 ? INT64_MAX : INT64_MIN; 2472 env->vxsat = 0x1; 2473 } 2474 return res; 2475 } 2476 2477 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2478 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2479 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2480 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2481 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2482 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2483 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2484 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2485 2486 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2487 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2488 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2489 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2490 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2491 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2492 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2493 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2494 2495 /* Vector Single-Width Averaging Add and Subtract */ 2496 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2497 { 2498 uint8_t d = extract64(v, shift, 1); 2499 uint8_t d1; 2500 uint64_t D1, D2; 2501 2502 if (shift == 0 || shift > 64) { 2503 return 0; 2504 } 2505 2506 d1 = extract64(v, shift - 1, 1); 2507 D1 = extract64(v, 0, shift); 2508 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2509 return d1; 2510 } else if (vxrm == 1) { /* round-to-nearest-even */ 2511 if (shift > 1) { 2512 D2 = extract64(v, 0, shift - 1); 2513 return d1 & ((D2 != 0) | d); 2514 } else { 2515 return d1 & d; 2516 } 2517 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2518 return !d & (D1 != 0); 2519 } 2520 return 0; /* round-down (truncate) */ 2521 } 2522 2523 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2524 int32_t b) 2525 { 2526 int64_t res = (int64_t)a + b; 2527 uint8_t round = get_round(vxrm, res, 1); 2528 2529 return (res >> 1) + round; 2530 } 2531 2532 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2533 int64_t b) 2534 { 2535 int64_t res = a + b; 2536 uint8_t round = get_round(vxrm, res, 1); 2537 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2538 2539 /* With signed overflow, bit 64 is inverse of bit 63. */ 2540 return ((res >> 1) ^ over) + round; 2541 } 2542 2543 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2544 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2545 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2546 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2547 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2548 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2549 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2550 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2551 2552 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2553 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2554 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2555 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2556 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2557 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2558 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2559 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2560 2561 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2562 uint32_t a, uint32_t b) 2563 { 2564 uint64_t res = (uint64_t)a + b; 2565 uint8_t round = get_round(vxrm, res, 1); 2566 2567 return (res >> 1) + round; 2568 } 2569 2570 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2571 uint64_t a, uint64_t b) 2572 { 2573 uint64_t res = a + b; 2574 uint8_t round = get_round(vxrm, res, 1); 2575 uint64_t over = (uint64_t)(res < a) << 63; 2576 2577 return ((res >> 1) | over) + round; 2578 } 2579 2580 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2581 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2582 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2583 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2584 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2585 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2586 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2587 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2588 2589 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2590 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2591 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2592 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2593 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2594 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2595 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2596 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2597 2598 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2599 int32_t b) 2600 { 2601 int64_t res = (int64_t)a - b; 2602 uint8_t round = get_round(vxrm, res, 1); 2603 2604 return (res >> 1) + round; 2605 } 2606 2607 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2608 int64_t b) 2609 { 2610 int64_t res = (int64_t)a - b; 2611 uint8_t round = get_round(vxrm, res, 1); 2612 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2613 2614 /* With signed overflow, bit 64 is inverse of bit 63. */ 2615 return ((res >> 1) ^ over) + round; 2616 } 2617 2618 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2619 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2620 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2621 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2622 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2623 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2624 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2625 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2626 2627 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2628 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2629 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2630 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2631 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2632 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2633 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2634 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2635 2636 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2637 uint32_t a, uint32_t b) 2638 { 2639 int64_t res = (int64_t)a - b; 2640 uint8_t round = get_round(vxrm, res, 1); 2641 2642 return (res >> 1) + round; 2643 } 2644 2645 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2646 uint64_t a, uint64_t b) 2647 { 2648 uint64_t res = (uint64_t)a - b; 2649 uint8_t round = get_round(vxrm, res, 1); 2650 uint64_t over = (uint64_t)(res > a) << 63; 2651 2652 return ((res >> 1) | over) + round; 2653 } 2654 2655 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2656 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2657 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2658 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2659 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2660 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2661 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2662 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2663 2664 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2665 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2666 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2667 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2668 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2669 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2670 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2671 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2672 2673 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2674 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2675 { 2676 uint8_t round; 2677 int16_t res; 2678 2679 res = (int16_t)a * (int16_t)b; 2680 round = get_round(vxrm, res, 7); 2681 res = (res >> 7) + round; 2682 2683 if (res > INT8_MAX) { 2684 env->vxsat = 0x1; 2685 return INT8_MAX; 2686 } else if (res < INT8_MIN) { 2687 env->vxsat = 0x1; 2688 return INT8_MIN; 2689 } else { 2690 return res; 2691 } 2692 } 2693 2694 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2695 { 2696 uint8_t round; 2697 int32_t res; 2698 2699 res = (int32_t)a * (int32_t)b; 2700 round = get_round(vxrm, res, 15); 2701 res = (res >> 15) + round; 2702 2703 if (res > INT16_MAX) { 2704 env->vxsat = 0x1; 2705 return INT16_MAX; 2706 } else if (res < INT16_MIN) { 2707 env->vxsat = 0x1; 2708 return INT16_MIN; 2709 } else { 2710 return res; 2711 } 2712 } 2713 2714 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2715 { 2716 uint8_t round; 2717 int64_t res; 2718 2719 res = (int64_t)a * (int64_t)b; 2720 round = get_round(vxrm, res, 31); 2721 res = (res >> 31) + round; 2722 2723 if (res > INT32_MAX) { 2724 env->vxsat = 0x1; 2725 return INT32_MAX; 2726 } else if (res < INT32_MIN) { 2727 env->vxsat = 0x1; 2728 return INT32_MIN; 2729 } else { 2730 return res; 2731 } 2732 } 2733 2734 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2735 { 2736 uint8_t round; 2737 uint64_t hi_64, lo_64; 2738 int64_t res; 2739 2740 if (a == INT64_MIN && b == INT64_MIN) { 2741 env->vxsat = 1; 2742 return INT64_MAX; 2743 } 2744 2745 muls64(&lo_64, &hi_64, a, b); 2746 round = get_round(vxrm, lo_64, 63); 2747 /* 2748 * Cannot overflow, as there are always 2749 * 2 sign bits after multiply. 2750 */ 2751 res = (hi_64 << 1) | (lo_64 >> 63); 2752 if (round) { 2753 if (res == INT64_MAX) { 2754 env->vxsat = 1; 2755 } else { 2756 res += 1; 2757 } 2758 } 2759 return res; 2760 } 2761 2762 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2763 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2764 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2765 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2766 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2767 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2768 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2769 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2770 2771 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2772 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2773 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2774 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2775 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2776 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2777 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2778 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2779 2780 /* Vector Single-Width Scaling Shift Instructions */ 2781 static inline uint8_t 2782 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2783 { 2784 uint8_t round, shift = b & 0x7; 2785 uint8_t res; 2786 2787 round = get_round(vxrm, a, shift); 2788 res = (a >> shift) + round; 2789 return res; 2790 } 2791 static inline uint16_t 2792 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2793 { 2794 uint8_t round, shift = b & 0xf; 2795 2796 round = get_round(vxrm, a, shift); 2797 return (a >> shift) + round; 2798 } 2799 static inline uint32_t 2800 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2801 { 2802 uint8_t round, shift = b & 0x1f; 2803 2804 round = get_round(vxrm, a, shift); 2805 return (a >> shift) + round; 2806 } 2807 static inline uint64_t 2808 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2809 { 2810 uint8_t round, shift = b & 0x3f; 2811 2812 round = get_round(vxrm, a, shift); 2813 return (a >> shift) + round; 2814 } 2815 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2816 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2817 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2818 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2819 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2820 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2821 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2822 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2823 2824 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2825 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2826 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2827 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2828 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2829 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2830 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2831 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2832 2833 static inline int8_t 2834 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2835 { 2836 uint8_t round, shift = b & 0x7; 2837 2838 round = get_round(vxrm, a, shift); 2839 return (a >> shift) + round; 2840 } 2841 static inline int16_t 2842 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2843 { 2844 uint8_t round, shift = b & 0xf; 2845 2846 round = get_round(vxrm, a, shift); 2847 return (a >> shift) + round; 2848 } 2849 static inline int32_t 2850 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2851 { 2852 uint8_t round, shift = b & 0x1f; 2853 2854 round = get_round(vxrm, a, shift); 2855 return (a >> shift) + round; 2856 } 2857 static inline int64_t 2858 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2859 { 2860 uint8_t round, shift = b & 0x3f; 2861 2862 round = get_round(vxrm, a, shift); 2863 return (a >> shift) + round; 2864 } 2865 2866 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2867 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2868 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2869 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2870 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2871 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2872 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2873 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2874 2875 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2876 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2877 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2878 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2879 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2880 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2881 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2882 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2883 2884 /* Vector Narrowing Fixed-Point Clip Instructions */ 2885 static inline int8_t 2886 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2887 { 2888 uint8_t round, shift = b & 0xf; 2889 int16_t res; 2890 2891 round = get_round(vxrm, a, shift); 2892 res = (a >> shift) + round; 2893 if (res > INT8_MAX) { 2894 env->vxsat = 0x1; 2895 return INT8_MAX; 2896 } else if (res < INT8_MIN) { 2897 env->vxsat = 0x1; 2898 return INT8_MIN; 2899 } else { 2900 return res; 2901 } 2902 } 2903 2904 static inline int16_t 2905 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2906 { 2907 uint8_t round, shift = b & 0x1f; 2908 int32_t res; 2909 2910 round = get_round(vxrm, a, shift); 2911 res = (a >> shift) + round; 2912 if (res > INT16_MAX) { 2913 env->vxsat = 0x1; 2914 return INT16_MAX; 2915 } else if (res < INT16_MIN) { 2916 env->vxsat = 0x1; 2917 return INT16_MIN; 2918 } else { 2919 return res; 2920 } 2921 } 2922 2923 static inline int32_t 2924 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2925 { 2926 uint8_t round, shift = b & 0x3f; 2927 int64_t res; 2928 2929 round = get_round(vxrm, a, shift); 2930 res = (a >> shift) + round; 2931 if (res > INT32_MAX) { 2932 env->vxsat = 0x1; 2933 return INT32_MAX; 2934 } else if (res < INT32_MIN) { 2935 env->vxsat = 0x1; 2936 return INT32_MIN; 2937 } else { 2938 return res; 2939 } 2940 } 2941 2942 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2943 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2944 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2945 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2946 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2947 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2948 2949 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2950 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2951 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2952 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2953 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2954 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2955 2956 static inline uint8_t 2957 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2958 { 2959 uint8_t round, shift = b & 0xf; 2960 uint16_t res; 2961 2962 round = get_round(vxrm, a, shift); 2963 res = (a >> shift) + round; 2964 if (res > UINT8_MAX) { 2965 env->vxsat = 0x1; 2966 return UINT8_MAX; 2967 } else { 2968 return res; 2969 } 2970 } 2971 2972 static inline uint16_t 2973 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2974 { 2975 uint8_t round, shift = b & 0x1f; 2976 uint32_t res; 2977 2978 round = get_round(vxrm, a, shift); 2979 res = (a >> shift) + round; 2980 if (res > UINT16_MAX) { 2981 env->vxsat = 0x1; 2982 return UINT16_MAX; 2983 } else { 2984 return res; 2985 } 2986 } 2987 2988 static inline uint32_t 2989 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2990 { 2991 uint8_t round, shift = b & 0x3f; 2992 uint64_t res; 2993 2994 round = get_round(vxrm, a, shift); 2995 res = (a >> shift) + round; 2996 if (res > UINT32_MAX) { 2997 env->vxsat = 0x1; 2998 return UINT32_MAX; 2999 } else { 3000 return res; 3001 } 3002 } 3003 3004 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3005 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3006 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3007 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3008 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3009 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3010 3011 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3012 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3013 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3014 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3015 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3016 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3017 3018 /* 3019 * Vector Float Point Arithmetic Instructions 3020 */ 3021 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3022 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3023 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3024 CPURISCVState *env) \ 3025 { \ 3026 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3027 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3028 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3029 } 3030 3031 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3032 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3033 void *vs2, CPURISCVState *env, \ 3034 uint32_t desc) \ 3035 { \ 3036 uint32_t vm = vext_vm(desc); \ 3037 uint32_t vl = env->vl; \ 3038 uint32_t total_elems = \ 3039 vext_get_total_elems(env, desc, ESZ); \ 3040 uint32_t vta = vext_vta(desc); \ 3041 uint32_t vma = vext_vma(desc); \ 3042 uint32_t i; \ 3043 \ 3044 for (i = env->vstart; i < vl; i++) { \ 3045 if (!vm && !vext_elem_mask(v0, i)) { \ 3046 /* set masked-off elements to 1s */ \ 3047 vext_set_elems_1s(vd, vma, i * ESZ, \ 3048 (i + 1) * ESZ); \ 3049 continue; \ 3050 } \ 3051 do_##NAME(vd, vs1, vs2, i, env); \ 3052 } \ 3053 env->vstart = 0; \ 3054 /* set tail elements to 1s */ \ 3055 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3056 total_elems * ESZ); \ 3057 } 3058 3059 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3060 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3061 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3062 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3063 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3064 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3065 3066 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3067 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3068 CPURISCVState *env) \ 3069 { \ 3070 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3071 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3072 } 3073 3074 #define GEN_VEXT_VF(NAME, ESZ) \ 3075 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3076 void *vs2, CPURISCVState *env, \ 3077 uint32_t desc) \ 3078 { \ 3079 uint32_t vm = vext_vm(desc); \ 3080 uint32_t vl = env->vl; \ 3081 uint32_t total_elems = \ 3082 vext_get_total_elems(env, desc, ESZ); \ 3083 uint32_t vta = vext_vta(desc); \ 3084 uint32_t vma = vext_vma(desc); \ 3085 uint32_t i; \ 3086 \ 3087 for (i = env->vstart; i < vl; i++) { \ 3088 if (!vm && !vext_elem_mask(v0, i)) { \ 3089 /* set masked-off elements to 1s */ \ 3090 vext_set_elems_1s(vd, vma, i * ESZ, \ 3091 (i + 1) * ESZ); \ 3092 continue; \ 3093 } \ 3094 do_##NAME(vd, s1, vs2, i, env); \ 3095 } \ 3096 env->vstart = 0; \ 3097 /* set tail elements to 1s */ \ 3098 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3099 total_elems * ESZ); \ 3100 } 3101 3102 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3103 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3104 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3105 GEN_VEXT_VF(vfadd_vf_h, 2) 3106 GEN_VEXT_VF(vfadd_vf_w, 4) 3107 GEN_VEXT_VF(vfadd_vf_d, 8) 3108 3109 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3110 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3111 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3112 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3113 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3114 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3115 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3116 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3117 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3118 GEN_VEXT_VF(vfsub_vf_h, 2) 3119 GEN_VEXT_VF(vfsub_vf_w, 4) 3120 GEN_VEXT_VF(vfsub_vf_d, 8) 3121 3122 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3123 { 3124 return float16_sub(b, a, s); 3125 } 3126 3127 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3128 { 3129 return float32_sub(b, a, s); 3130 } 3131 3132 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3133 { 3134 return float64_sub(b, a, s); 3135 } 3136 3137 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3138 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3139 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3140 GEN_VEXT_VF(vfrsub_vf_h, 2) 3141 GEN_VEXT_VF(vfrsub_vf_w, 4) 3142 GEN_VEXT_VF(vfrsub_vf_d, 8) 3143 3144 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3145 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3146 { 3147 return float32_add(float16_to_float32(a, true, s), 3148 float16_to_float32(b, true, s), s); 3149 } 3150 3151 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3152 { 3153 return float64_add(float32_to_float64(a, s), 3154 float32_to_float64(b, s), s); 3155 3156 } 3157 3158 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3159 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3160 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3161 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3162 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3163 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3164 GEN_VEXT_VF(vfwadd_vf_h, 4) 3165 GEN_VEXT_VF(vfwadd_vf_w, 8) 3166 3167 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3168 { 3169 return float32_sub(float16_to_float32(a, true, s), 3170 float16_to_float32(b, true, s), s); 3171 } 3172 3173 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3174 { 3175 return float64_sub(float32_to_float64(a, s), 3176 float32_to_float64(b, s), s); 3177 3178 } 3179 3180 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3181 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3182 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3183 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3184 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3185 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3186 GEN_VEXT_VF(vfwsub_vf_h, 4) 3187 GEN_VEXT_VF(vfwsub_vf_w, 8) 3188 3189 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3190 { 3191 return float32_add(a, float16_to_float32(b, true, s), s); 3192 } 3193 3194 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3195 { 3196 return float64_add(a, float32_to_float64(b, s), s); 3197 } 3198 3199 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3200 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3201 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3202 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3203 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3204 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3205 GEN_VEXT_VF(vfwadd_wf_h, 4) 3206 GEN_VEXT_VF(vfwadd_wf_w, 8) 3207 3208 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3209 { 3210 return float32_sub(a, float16_to_float32(b, true, s), s); 3211 } 3212 3213 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3214 { 3215 return float64_sub(a, float32_to_float64(b, s), s); 3216 } 3217 3218 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3219 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3220 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3221 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3222 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3223 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3224 GEN_VEXT_VF(vfwsub_wf_h, 4) 3225 GEN_VEXT_VF(vfwsub_wf_w, 8) 3226 3227 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3228 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3229 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3230 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3231 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3232 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3233 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3234 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3235 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3236 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3237 GEN_VEXT_VF(vfmul_vf_h, 2) 3238 GEN_VEXT_VF(vfmul_vf_w, 4) 3239 GEN_VEXT_VF(vfmul_vf_d, 8) 3240 3241 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3242 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3243 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3244 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3245 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3246 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3247 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3248 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3249 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3250 GEN_VEXT_VF(vfdiv_vf_h, 2) 3251 GEN_VEXT_VF(vfdiv_vf_w, 4) 3252 GEN_VEXT_VF(vfdiv_vf_d, 8) 3253 3254 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3255 { 3256 return float16_div(b, a, s); 3257 } 3258 3259 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3260 { 3261 return float32_div(b, a, s); 3262 } 3263 3264 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3265 { 3266 return float64_div(b, a, s); 3267 } 3268 3269 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3270 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3271 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3272 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3273 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3274 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3275 3276 /* Vector Widening Floating-Point Multiply */ 3277 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3278 { 3279 return float32_mul(float16_to_float32(a, true, s), 3280 float16_to_float32(b, true, s), s); 3281 } 3282 3283 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3284 { 3285 return float64_mul(float32_to_float64(a, s), 3286 float32_to_float64(b, s), s); 3287 3288 } 3289 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3290 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3291 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3292 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3293 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3294 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3295 GEN_VEXT_VF(vfwmul_vf_h, 4) 3296 GEN_VEXT_VF(vfwmul_vf_w, 8) 3297 3298 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3299 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3300 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3301 CPURISCVState *env) \ 3302 { \ 3303 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3304 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3305 TD d = *((TD *)vd + HD(i)); \ 3306 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3307 } 3308 3309 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3310 { 3311 return float16_muladd(a, b, d, 0, s); 3312 } 3313 3314 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3315 { 3316 return float32_muladd(a, b, d, 0, s); 3317 } 3318 3319 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3320 { 3321 return float64_muladd(a, b, d, 0, s); 3322 } 3323 3324 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3325 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3326 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3327 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3328 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3329 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3330 3331 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3332 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3333 CPURISCVState *env) \ 3334 { \ 3335 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3336 TD d = *((TD *)vd + HD(i)); \ 3337 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3338 } 3339 3340 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3341 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3342 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3343 GEN_VEXT_VF(vfmacc_vf_h, 2) 3344 GEN_VEXT_VF(vfmacc_vf_w, 4) 3345 GEN_VEXT_VF(vfmacc_vf_d, 8) 3346 3347 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3348 { 3349 return float16_muladd(a, b, d, float_muladd_negate_c | 3350 float_muladd_negate_product, s); 3351 } 3352 3353 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3354 { 3355 return float32_muladd(a, b, d, float_muladd_negate_c | 3356 float_muladd_negate_product, s); 3357 } 3358 3359 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3360 { 3361 return float64_muladd(a, b, d, float_muladd_negate_c | 3362 float_muladd_negate_product, s); 3363 } 3364 3365 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3366 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3367 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3368 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3369 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3370 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3371 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3372 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3373 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3374 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3375 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3376 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3377 3378 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3379 { 3380 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3381 } 3382 3383 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3384 { 3385 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3386 } 3387 3388 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3389 { 3390 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3391 } 3392 3393 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3394 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3395 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3396 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3397 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3398 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3399 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3400 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3401 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3402 GEN_VEXT_VF(vfmsac_vf_h, 2) 3403 GEN_VEXT_VF(vfmsac_vf_w, 4) 3404 GEN_VEXT_VF(vfmsac_vf_d, 8) 3405 3406 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3407 { 3408 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3409 } 3410 3411 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3412 { 3413 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3414 } 3415 3416 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3417 { 3418 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3419 } 3420 3421 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3422 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3423 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3424 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3425 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3426 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3427 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3428 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3429 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3430 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3431 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3432 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3433 3434 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3435 { 3436 return float16_muladd(d, b, a, 0, s); 3437 } 3438 3439 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3440 { 3441 return float32_muladd(d, b, a, 0, s); 3442 } 3443 3444 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3445 { 3446 return float64_muladd(d, b, a, 0, s); 3447 } 3448 3449 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3450 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3451 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3452 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3453 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3454 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3455 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3456 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3457 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3458 GEN_VEXT_VF(vfmadd_vf_h, 2) 3459 GEN_VEXT_VF(vfmadd_vf_w, 4) 3460 GEN_VEXT_VF(vfmadd_vf_d, 8) 3461 3462 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3463 { 3464 return float16_muladd(d, b, a, float_muladd_negate_c | 3465 float_muladd_negate_product, s); 3466 } 3467 3468 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3469 { 3470 return float32_muladd(d, b, a, float_muladd_negate_c | 3471 float_muladd_negate_product, s); 3472 } 3473 3474 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3475 { 3476 return float64_muladd(d, b, a, float_muladd_negate_c | 3477 float_muladd_negate_product, s); 3478 } 3479 3480 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3481 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3482 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3483 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3484 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3485 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3486 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3487 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3488 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3489 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3490 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3491 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3492 3493 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3494 { 3495 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3496 } 3497 3498 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3499 { 3500 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3501 } 3502 3503 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3504 { 3505 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3506 } 3507 3508 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3509 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3510 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3511 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3512 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3513 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3514 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3515 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3516 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3517 GEN_VEXT_VF(vfmsub_vf_h, 2) 3518 GEN_VEXT_VF(vfmsub_vf_w, 4) 3519 GEN_VEXT_VF(vfmsub_vf_d, 8) 3520 3521 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3522 { 3523 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3524 } 3525 3526 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3527 { 3528 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3529 } 3530 3531 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3532 { 3533 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3534 } 3535 3536 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3537 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3538 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3539 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3540 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3541 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3542 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3543 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3544 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3545 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3546 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3547 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3548 3549 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3550 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3551 { 3552 return float32_muladd(float16_to_float32(a, true, s), 3553 float16_to_float32(b, true, s), d, 0, s); 3554 } 3555 3556 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3557 { 3558 return float64_muladd(float32_to_float64(a, s), 3559 float32_to_float64(b, s), d, 0, s); 3560 } 3561 3562 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3563 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3564 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3565 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3566 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3567 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3568 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3569 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3570 3571 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3572 { 3573 return float32_muladd(float16_to_float32(a, true, s), 3574 float16_to_float32(b, true, s), d, 3575 float_muladd_negate_c | float_muladd_negate_product, 3576 s); 3577 } 3578 3579 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3580 { 3581 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3582 d, float_muladd_negate_c | 3583 float_muladd_negate_product, s); 3584 } 3585 3586 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3587 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3588 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3589 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3590 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3591 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3592 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3593 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3594 3595 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3596 { 3597 return float32_muladd(float16_to_float32(a, true, s), 3598 float16_to_float32(b, true, s), d, 3599 float_muladd_negate_c, s); 3600 } 3601 3602 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3603 { 3604 return float64_muladd(float32_to_float64(a, s), 3605 float32_to_float64(b, s), d, 3606 float_muladd_negate_c, s); 3607 } 3608 3609 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3610 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3611 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3612 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3613 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3614 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3615 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3616 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3617 3618 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3619 { 3620 return float32_muladd(float16_to_float32(a, true, s), 3621 float16_to_float32(b, true, s), d, 3622 float_muladd_negate_product, s); 3623 } 3624 3625 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3626 { 3627 return float64_muladd(float32_to_float64(a, s), 3628 float32_to_float64(b, s), d, 3629 float_muladd_negate_product, s); 3630 } 3631 3632 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3633 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3634 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3635 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3636 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3637 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3638 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3639 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3640 3641 /* Vector Floating-Point Square-Root Instruction */ 3642 /* (TD, T2, TX2) */ 3643 #define OP_UU_H uint16_t, uint16_t, uint16_t 3644 #define OP_UU_W uint32_t, uint32_t, uint32_t 3645 #define OP_UU_D uint64_t, uint64_t, uint64_t 3646 3647 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3648 static void do_##NAME(void *vd, void *vs2, int i, \ 3649 CPURISCVState *env) \ 3650 { \ 3651 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3652 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3653 } 3654 3655 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3656 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3657 CPURISCVState *env, uint32_t desc) \ 3658 { \ 3659 uint32_t vm = vext_vm(desc); \ 3660 uint32_t vl = env->vl; \ 3661 uint32_t total_elems = \ 3662 vext_get_total_elems(env, desc, ESZ); \ 3663 uint32_t vta = vext_vta(desc); \ 3664 uint32_t vma = vext_vma(desc); \ 3665 uint32_t i; \ 3666 \ 3667 if (vl == 0) { \ 3668 return; \ 3669 } \ 3670 for (i = env->vstart; i < vl; i++) { \ 3671 if (!vm && !vext_elem_mask(v0, i)) { \ 3672 /* set masked-off elements to 1s */ \ 3673 vext_set_elems_1s(vd, vma, i * ESZ, \ 3674 (i + 1) * ESZ); \ 3675 continue; \ 3676 } \ 3677 do_##NAME(vd, vs2, i, env); \ 3678 } \ 3679 env->vstart = 0; \ 3680 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3681 total_elems * ESZ); \ 3682 } 3683 3684 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3685 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3686 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3687 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3688 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3689 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3690 3691 /* 3692 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3693 * 3694 * Adapted from riscv-v-spec recip.c: 3695 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3696 */ 3697 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3698 { 3699 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3700 uint64_t exp = extract64(f, frac_size, exp_size); 3701 uint64_t frac = extract64(f, 0, frac_size); 3702 3703 const uint8_t lookup_table[] = { 3704 52, 51, 50, 48, 47, 46, 44, 43, 3705 42, 41, 40, 39, 38, 36, 35, 34, 3706 33, 32, 31, 30, 30, 29, 28, 27, 3707 26, 25, 24, 23, 23, 22, 21, 20, 3708 19, 19, 18, 17, 16, 16, 15, 14, 3709 14, 13, 12, 12, 11, 10, 10, 9, 3710 9, 8, 7, 7, 6, 6, 5, 4, 3711 4, 3, 3, 2, 2, 1, 1, 0, 3712 127, 125, 123, 121, 119, 118, 116, 114, 3713 113, 111, 109, 108, 106, 105, 103, 102, 3714 100, 99, 97, 96, 95, 93, 92, 91, 3715 90, 88, 87, 86, 85, 84, 83, 82, 3716 80, 79, 78, 77, 76, 75, 74, 73, 3717 72, 71, 70, 70, 69, 68, 67, 66, 3718 65, 64, 63, 63, 62, 61, 60, 59, 3719 59, 58, 57, 56, 56, 55, 54, 53 3720 }; 3721 const int precision = 7; 3722 3723 if (exp == 0 && frac != 0) { /* subnormal */ 3724 /* Normalize the subnormal. */ 3725 while (extract64(frac, frac_size - 1, 1) == 0) { 3726 exp--; 3727 frac <<= 1; 3728 } 3729 3730 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3731 } 3732 3733 int idx = ((exp & 1) << (precision - 1)) | 3734 (frac >> (frac_size - precision + 1)); 3735 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3736 (frac_size - precision); 3737 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3738 3739 uint64_t val = 0; 3740 val = deposit64(val, 0, frac_size, out_frac); 3741 val = deposit64(val, frac_size, exp_size, out_exp); 3742 val = deposit64(val, frac_size + exp_size, 1, sign); 3743 return val; 3744 } 3745 3746 static float16 frsqrt7_h(float16 f, float_status *s) 3747 { 3748 int exp_size = 5, frac_size = 10; 3749 bool sign = float16_is_neg(f); 3750 3751 /* 3752 * frsqrt7(sNaN) = canonical NaN 3753 * frsqrt7(-inf) = canonical NaN 3754 * frsqrt7(-normal) = canonical NaN 3755 * frsqrt7(-subnormal) = canonical NaN 3756 */ 3757 if (float16_is_signaling_nan(f, s) || 3758 (float16_is_infinity(f) && sign) || 3759 (float16_is_normal(f) && sign) || 3760 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3761 s->float_exception_flags |= float_flag_invalid; 3762 return float16_default_nan(s); 3763 } 3764 3765 /* frsqrt7(qNaN) = canonical NaN */ 3766 if (float16_is_quiet_nan(f, s)) { 3767 return float16_default_nan(s); 3768 } 3769 3770 /* frsqrt7(+-0) = +-inf */ 3771 if (float16_is_zero(f)) { 3772 s->float_exception_flags |= float_flag_divbyzero; 3773 return float16_set_sign(float16_infinity, sign); 3774 } 3775 3776 /* frsqrt7(+inf) = +0 */ 3777 if (float16_is_infinity(f) && !sign) { 3778 return float16_set_sign(float16_zero, sign); 3779 } 3780 3781 /* +normal, +subnormal */ 3782 uint64_t val = frsqrt7(f, exp_size, frac_size); 3783 return make_float16(val); 3784 } 3785 3786 static float32 frsqrt7_s(float32 f, float_status *s) 3787 { 3788 int exp_size = 8, frac_size = 23; 3789 bool sign = float32_is_neg(f); 3790 3791 /* 3792 * frsqrt7(sNaN) = canonical NaN 3793 * frsqrt7(-inf) = canonical NaN 3794 * frsqrt7(-normal) = canonical NaN 3795 * frsqrt7(-subnormal) = canonical NaN 3796 */ 3797 if (float32_is_signaling_nan(f, s) || 3798 (float32_is_infinity(f) && sign) || 3799 (float32_is_normal(f) && sign) || 3800 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3801 s->float_exception_flags |= float_flag_invalid; 3802 return float32_default_nan(s); 3803 } 3804 3805 /* frsqrt7(qNaN) = canonical NaN */ 3806 if (float32_is_quiet_nan(f, s)) { 3807 return float32_default_nan(s); 3808 } 3809 3810 /* frsqrt7(+-0) = +-inf */ 3811 if (float32_is_zero(f)) { 3812 s->float_exception_flags |= float_flag_divbyzero; 3813 return float32_set_sign(float32_infinity, sign); 3814 } 3815 3816 /* frsqrt7(+inf) = +0 */ 3817 if (float32_is_infinity(f) && !sign) { 3818 return float32_set_sign(float32_zero, sign); 3819 } 3820 3821 /* +normal, +subnormal */ 3822 uint64_t val = frsqrt7(f, exp_size, frac_size); 3823 return make_float32(val); 3824 } 3825 3826 static float64 frsqrt7_d(float64 f, float_status *s) 3827 { 3828 int exp_size = 11, frac_size = 52; 3829 bool sign = float64_is_neg(f); 3830 3831 /* 3832 * frsqrt7(sNaN) = canonical NaN 3833 * frsqrt7(-inf) = canonical NaN 3834 * frsqrt7(-normal) = canonical NaN 3835 * frsqrt7(-subnormal) = canonical NaN 3836 */ 3837 if (float64_is_signaling_nan(f, s) || 3838 (float64_is_infinity(f) && sign) || 3839 (float64_is_normal(f) && sign) || 3840 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3841 s->float_exception_flags |= float_flag_invalid; 3842 return float64_default_nan(s); 3843 } 3844 3845 /* frsqrt7(qNaN) = canonical NaN */ 3846 if (float64_is_quiet_nan(f, s)) { 3847 return float64_default_nan(s); 3848 } 3849 3850 /* frsqrt7(+-0) = +-inf */ 3851 if (float64_is_zero(f)) { 3852 s->float_exception_flags |= float_flag_divbyzero; 3853 return float64_set_sign(float64_infinity, sign); 3854 } 3855 3856 /* frsqrt7(+inf) = +0 */ 3857 if (float64_is_infinity(f) && !sign) { 3858 return float64_set_sign(float64_zero, sign); 3859 } 3860 3861 /* +normal, +subnormal */ 3862 uint64_t val = frsqrt7(f, exp_size, frac_size); 3863 return make_float64(val); 3864 } 3865 3866 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3867 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3868 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3869 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3870 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3871 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3872 3873 /* 3874 * Vector Floating-Point Reciprocal Estimate Instruction 3875 * 3876 * Adapted from riscv-v-spec recip.c: 3877 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3878 */ 3879 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3880 float_status *s) 3881 { 3882 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3883 uint64_t exp = extract64(f, frac_size, exp_size); 3884 uint64_t frac = extract64(f, 0, frac_size); 3885 3886 const uint8_t lookup_table[] = { 3887 127, 125, 123, 121, 119, 117, 116, 114, 3888 112, 110, 109, 107, 105, 104, 102, 100, 3889 99, 97, 96, 94, 93, 91, 90, 88, 3890 87, 85, 84, 83, 81, 80, 79, 77, 3891 76, 75, 74, 72, 71, 70, 69, 68, 3892 66, 65, 64, 63, 62, 61, 60, 59, 3893 58, 57, 56, 55, 54, 53, 52, 51, 3894 50, 49, 48, 47, 46, 45, 44, 43, 3895 42, 41, 40, 40, 39, 38, 37, 36, 3896 35, 35, 34, 33, 32, 31, 31, 30, 3897 29, 28, 28, 27, 26, 25, 25, 24, 3898 23, 23, 22, 21, 21, 20, 19, 19, 3899 18, 17, 17, 16, 15, 15, 14, 14, 3900 13, 12, 12, 11, 11, 10, 9, 9, 3901 8, 8, 7, 7, 6, 5, 5, 4, 3902 4, 3, 3, 2, 2, 1, 1, 0 3903 }; 3904 const int precision = 7; 3905 3906 if (exp == 0 && frac != 0) { /* subnormal */ 3907 /* Normalize the subnormal. */ 3908 while (extract64(frac, frac_size - 1, 1) == 0) { 3909 exp--; 3910 frac <<= 1; 3911 } 3912 3913 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3914 3915 if (exp != 0 && exp != UINT64_MAX) { 3916 /* 3917 * Overflow to inf or max value of same sign, 3918 * depending on sign and rounding mode. 3919 */ 3920 s->float_exception_flags |= (float_flag_inexact | 3921 float_flag_overflow); 3922 3923 if ((s->float_rounding_mode == float_round_to_zero) || 3924 ((s->float_rounding_mode == float_round_down) && !sign) || 3925 ((s->float_rounding_mode == float_round_up) && sign)) { 3926 /* Return greatest/negative finite value. */ 3927 return (sign << (exp_size + frac_size)) | 3928 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3929 } else { 3930 /* Return +-inf. */ 3931 return (sign << (exp_size + frac_size)) | 3932 MAKE_64BIT_MASK(frac_size, exp_size); 3933 } 3934 } 3935 } 3936 3937 int idx = frac >> (frac_size - precision); 3938 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3939 (frac_size - precision); 3940 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3941 3942 if (out_exp == 0 || out_exp == UINT64_MAX) { 3943 /* 3944 * The result is subnormal, but don't raise the underflow exception, 3945 * because there's no additional loss of precision. 3946 */ 3947 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3948 if (out_exp == UINT64_MAX) { 3949 out_frac >>= 1; 3950 out_exp = 0; 3951 } 3952 } 3953 3954 uint64_t val = 0; 3955 val = deposit64(val, 0, frac_size, out_frac); 3956 val = deposit64(val, frac_size, exp_size, out_exp); 3957 val = deposit64(val, frac_size + exp_size, 1, sign); 3958 return val; 3959 } 3960 3961 static float16 frec7_h(float16 f, float_status *s) 3962 { 3963 int exp_size = 5, frac_size = 10; 3964 bool sign = float16_is_neg(f); 3965 3966 /* frec7(+-inf) = +-0 */ 3967 if (float16_is_infinity(f)) { 3968 return float16_set_sign(float16_zero, sign); 3969 } 3970 3971 /* frec7(+-0) = +-inf */ 3972 if (float16_is_zero(f)) { 3973 s->float_exception_flags |= float_flag_divbyzero; 3974 return float16_set_sign(float16_infinity, sign); 3975 } 3976 3977 /* frec7(sNaN) = canonical NaN */ 3978 if (float16_is_signaling_nan(f, s)) { 3979 s->float_exception_flags |= float_flag_invalid; 3980 return float16_default_nan(s); 3981 } 3982 3983 /* frec7(qNaN) = canonical NaN */ 3984 if (float16_is_quiet_nan(f, s)) { 3985 return float16_default_nan(s); 3986 } 3987 3988 /* +-normal, +-subnormal */ 3989 uint64_t val = frec7(f, exp_size, frac_size, s); 3990 return make_float16(val); 3991 } 3992 3993 static float32 frec7_s(float32 f, float_status *s) 3994 { 3995 int exp_size = 8, frac_size = 23; 3996 bool sign = float32_is_neg(f); 3997 3998 /* frec7(+-inf) = +-0 */ 3999 if (float32_is_infinity(f)) { 4000 return float32_set_sign(float32_zero, sign); 4001 } 4002 4003 /* frec7(+-0) = +-inf */ 4004 if (float32_is_zero(f)) { 4005 s->float_exception_flags |= float_flag_divbyzero; 4006 return float32_set_sign(float32_infinity, sign); 4007 } 4008 4009 /* frec7(sNaN) = canonical NaN */ 4010 if (float32_is_signaling_nan(f, s)) { 4011 s->float_exception_flags |= float_flag_invalid; 4012 return float32_default_nan(s); 4013 } 4014 4015 /* frec7(qNaN) = canonical NaN */ 4016 if (float32_is_quiet_nan(f, s)) { 4017 return float32_default_nan(s); 4018 } 4019 4020 /* +-normal, +-subnormal */ 4021 uint64_t val = frec7(f, exp_size, frac_size, s); 4022 return make_float32(val); 4023 } 4024 4025 static float64 frec7_d(float64 f, float_status *s) 4026 { 4027 int exp_size = 11, frac_size = 52; 4028 bool sign = float64_is_neg(f); 4029 4030 /* frec7(+-inf) = +-0 */ 4031 if (float64_is_infinity(f)) { 4032 return float64_set_sign(float64_zero, sign); 4033 } 4034 4035 /* frec7(+-0) = +-inf */ 4036 if (float64_is_zero(f)) { 4037 s->float_exception_flags |= float_flag_divbyzero; 4038 return float64_set_sign(float64_infinity, sign); 4039 } 4040 4041 /* frec7(sNaN) = canonical NaN */ 4042 if (float64_is_signaling_nan(f, s)) { 4043 s->float_exception_flags |= float_flag_invalid; 4044 return float64_default_nan(s); 4045 } 4046 4047 /* frec7(qNaN) = canonical NaN */ 4048 if (float64_is_quiet_nan(f, s)) { 4049 return float64_default_nan(s); 4050 } 4051 4052 /* +-normal, +-subnormal */ 4053 uint64_t val = frec7(f, exp_size, frac_size, s); 4054 return make_float64(val); 4055 } 4056 4057 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4058 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4059 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4060 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4061 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4062 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4063 4064 /* Vector Floating-Point MIN/MAX Instructions */ 4065 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4066 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4067 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4068 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4069 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4070 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4071 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4072 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4073 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4074 GEN_VEXT_VF(vfmin_vf_h, 2) 4075 GEN_VEXT_VF(vfmin_vf_w, 4) 4076 GEN_VEXT_VF(vfmin_vf_d, 8) 4077 4078 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4079 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4080 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4081 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4082 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4083 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4084 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4085 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4086 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4087 GEN_VEXT_VF(vfmax_vf_h, 2) 4088 GEN_VEXT_VF(vfmax_vf_w, 4) 4089 GEN_VEXT_VF(vfmax_vf_d, 8) 4090 4091 /* Vector Floating-Point Sign-Injection Instructions */ 4092 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4093 { 4094 return deposit64(b, 0, 15, a); 4095 } 4096 4097 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4098 { 4099 return deposit64(b, 0, 31, a); 4100 } 4101 4102 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4103 { 4104 return deposit64(b, 0, 63, a); 4105 } 4106 4107 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4108 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4109 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4110 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4111 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4112 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4113 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4114 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4115 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4116 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4117 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4118 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4119 4120 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4121 { 4122 return deposit64(~b, 0, 15, a); 4123 } 4124 4125 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4126 { 4127 return deposit64(~b, 0, 31, a); 4128 } 4129 4130 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4131 { 4132 return deposit64(~b, 0, 63, a); 4133 } 4134 4135 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4136 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4137 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4138 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4139 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4140 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4141 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4142 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4143 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4144 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4145 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4146 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4147 4148 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4149 { 4150 return deposit64(b ^ a, 0, 15, a); 4151 } 4152 4153 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4154 { 4155 return deposit64(b ^ a, 0, 31, a); 4156 } 4157 4158 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4159 { 4160 return deposit64(b ^ a, 0, 63, a); 4161 } 4162 4163 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4164 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4165 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4166 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4167 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4168 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4169 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4170 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4171 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4172 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4173 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4174 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4175 4176 /* Vector Floating-Point Compare Instructions */ 4177 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4178 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4179 CPURISCVState *env, uint32_t desc) \ 4180 { \ 4181 uint32_t vm = vext_vm(desc); \ 4182 uint32_t vl = env->vl; \ 4183 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4184 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4185 uint32_t vma = vext_vma(desc); \ 4186 uint32_t i; \ 4187 \ 4188 for (i = env->vstart; i < vl; i++) { \ 4189 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4190 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4191 if (!vm && !vext_elem_mask(v0, i)) { \ 4192 /* set masked-off elements to 1s */ \ 4193 if (vma) { \ 4194 vext_set_elem_mask(vd, i, 1); \ 4195 } \ 4196 continue; \ 4197 } \ 4198 vext_set_elem_mask(vd, i, \ 4199 DO_OP(s2, s1, &env->fp_status)); \ 4200 } \ 4201 env->vstart = 0; \ 4202 /* 4203 * mask destination register are always tail-agnostic 4204 * set tail elements to 1s 4205 */ \ 4206 if (vta_all_1s) { \ 4207 for (; i < total_elems; i++) { \ 4208 vext_set_elem_mask(vd, i, 1); \ 4209 } \ 4210 } \ 4211 } 4212 4213 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4214 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4215 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4216 4217 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4218 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4219 CPURISCVState *env, uint32_t desc) \ 4220 { \ 4221 uint32_t vm = vext_vm(desc); \ 4222 uint32_t vl = env->vl; \ 4223 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4224 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4225 uint32_t vma = vext_vma(desc); \ 4226 uint32_t i; \ 4227 \ 4228 for (i = env->vstart; i < vl; i++) { \ 4229 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4230 if (!vm && !vext_elem_mask(v0, i)) { \ 4231 /* set masked-off elements to 1s */ \ 4232 if (vma) { \ 4233 vext_set_elem_mask(vd, i, 1); \ 4234 } \ 4235 continue; \ 4236 } \ 4237 vext_set_elem_mask(vd, i, \ 4238 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4239 } \ 4240 env->vstart = 0; \ 4241 /* 4242 * mask destination register are always tail-agnostic 4243 * set tail elements to 1s 4244 */ \ 4245 if (vta_all_1s) { \ 4246 for (; i < total_elems; i++) { \ 4247 vext_set_elem_mask(vd, i, 1); \ 4248 } \ 4249 } \ 4250 } 4251 4252 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4253 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4254 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4255 4256 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4257 { 4258 FloatRelation compare = float16_compare_quiet(a, b, s); 4259 return compare != float_relation_equal; 4260 } 4261 4262 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4263 { 4264 FloatRelation compare = float32_compare_quiet(a, b, s); 4265 return compare != float_relation_equal; 4266 } 4267 4268 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4269 { 4270 FloatRelation compare = float64_compare_quiet(a, b, s); 4271 return compare != float_relation_equal; 4272 } 4273 4274 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4275 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4276 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4277 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4278 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4279 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4280 4281 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4282 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4283 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4284 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4285 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4286 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4287 4288 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4289 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4290 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4291 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4292 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4293 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4294 4295 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4296 { 4297 FloatRelation compare = float16_compare(a, b, s); 4298 return compare == float_relation_greater; 4299 } 4300 4301 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4302 { 4303 FloatRelation compare = float32_compare(a, b, s); 4304 return compare == float_relation_greater; 4305 } 4306 4307 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4308 { 4309 FloatRelation compare = float64_compare(a, b, s); 4310 return compare == float_relation_greater; 4311 } 4312 4313 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4314 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4315 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4316 4317 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4318 { 4319 FloatRelation compare = float16_compare(a, b, s); 4320 return compare == float_relation_greater || 4321 compare == float_relation_equal; 4322 } 4323 4324 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4325 { 4326 FloatRelation compare = float32_compare(a, b, s); 4327 return compare == float_relation_greater || 4328 compare == float_relation_equal; 4329 } 4330 4331 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4332 { 4333 FloatRelation compare = float64_compare(a, b, s); 4334 return compare == float_relation_greater || 4335 compare == float_relation_equal; 4336 } 4337 4338 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4339 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4340 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4341 4342 /* Vector Floating-Point Classify Instruction */ 4343 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4344 static void do_##NAME(void *vd, void *vs2, int i) \ 4345 { \ 4346 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4347 *((TD *)vd + HD(i)) = OP(s2); \ 4348 } 4349 4350 #define GEN_VEXT_V(NAME, ESZ) \ 4351 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4352 CPURISCVState *env, uint32_t desc) \ 4353 { \ 4354 uint32_t vm = vext_vm(desc); \ 4355 uint32_t vl = env->vl; \ 4356 uint32_t total_elems = \ 4357 vext_get_total_elems(env, desc, ESZ); \ 4358 uint32_t vta = vext_vta(desc); \ 4359 uint32_t vma = vext_vma(desc); \ 4360 uint32_t i; \ 4361 \ 4362 for (i = env->vstart; i < vl; i++) { \ 4363 if (!vm && !vext_elem_mask(v0, i)) { \ 4364 /* set masked-off elements to 1s */ \ 4365 vext_set_elems_1s(vd, vma, i * ESZ, \ 4366 (i + 1) * ESZ); \ 4367 continue; \ 4368 } \ 4369 do_##NAME(vd, vs2, i); \ 4370 } \ 4371 env->vstart = 0; \ 4372 /* set tail elements to 1s */ \ 4373 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4374 total_elems * ESZ); \ 4375 } 4376 4377 target_ulong fclass_h(uint64_t frs1) 4378 { 4379 float16 f = frs1; 4380 bool sign = float16_is_neg(f); 4381 4382 if (float16_is_infinity(f)) { 4383 return sign ? 1 << 0 : 1 << 7; 4384 } else if (float16_is_zero(f)) { 4385 return sign ? 1 << 3 : 1 << 4; 4386 } else if (float16_is_zero_or_denormal(f)) { 4387 return sign ? 1 << 2 : 1 << 5; 4388 } else if (float16_is_any_nan(f)) { 4389 float_status s = { }; /* for snan_bit_is_one */ 4390 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4391 } else { 4392 return sign ? 1 << 1 : 1 << 6; 4393 } 4394 } 4395 4396 target_ulong fclass_s(uint64_t frs1) 4397 { 4398 float32 f = frs1; 4399 bool sign = float32_is_neg(f); 4400 4401 if (float32_is_infinity(f)) { 4402 return sign ? 1 << 0 : 1 << 7; 4403 } else if (float32_is_zero(f)) { 4404 return sign ? 1 << 3 : 1 << 4; 4405 } else if (float32_is_zero_or_denormal(f)) { 4406 return sign ? 1 << 2 : 1 << 5; 4407 } else if (float32_is_any_nan(f)) { 4408 float_status s = { }; /* for snan_bit_is_one */ 4409 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4410 } else { 4411 return sign ? 1 << 1 : 1 << 6; 4412 } 4413 } 4414 4415 target_ulong fclass_d(uint64_t frs1) 4416 { 4417 float64 f = frs1; 4418 bool sign = float64_is_neg(f); 4419 4420 if (float64_is_infinity(f)) { 4421 return sign ? 1 << 0 : 1 << 7; 4422 } else if (float64_is_zero(f)) { 4423 return sign ? 1 << 3 : 1 << 4; 4424 } else if (float64_is_zero_or_denormal(f)) { 4425 return sign ? 1 << 2 : 1 << 5; 4426 } else if (float64_is_any_nan(f)) { 4427 float_status s = { }; /* for snan_bit_is_one */ 4428 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4429 } else { 4430 return sign ? 1 << 1 : 1 << 6; 4431 } 4432 } 4433 4434 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4435 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4436 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4437 GEN_VEXT_V(vfclass_v_h, 2) 4438 GEN_VEXT_V(vfclass_v_w, 4) 4439 GEN_VEXT_V(vfclass_v_d, 8) 4440 4441 /* Vector Floating-Point Merge Instruction */ 4442 4443 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4444 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4445 CPURISCVState *env, uint32_t desc) \ 4446 { \ 4447 uint32_t vm = vext_vm(desc); \ 4448 uint32_t vl = env->vl; \ 4449 uint32_t esz = sizeof(ETYPE); \ 4450 uint32_t total_elems = \ 4451 vext_get_total_elems(env, desc, esz); \ 4452 uint32_t vta = vext_vta(desc); \ 4453 uint32_t i; \ 4454 \ 4455 for (i = env->vstart; i < vl; i++) { \ 4456 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4457 *((ETYPE *)vd + H(i)) = \ 4458 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4459 } \ 4460 env->vstart = 0; \ 4461 /* set tail elements to 1s */ \ 4462 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4463 } 4464 4465 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4466 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4467 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4468 4469 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4470 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4471 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4472 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4473 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4474 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4475 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4476 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4477 4478 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4479 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4480 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4481 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4482 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4483 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4484 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4485 4486 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4487 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4488 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4489 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4490 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4491 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4492 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4493 4494 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4495 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4496 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4497 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4498 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4499 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4500 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4501 4502 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4503 /* (TD, T2, TX2) */ 4504 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4505 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4506 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4507 /* 4508 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4509 */ 4510 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4511 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4512 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4513 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4514 4515 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4516 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4517 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4518 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4519 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4520 4521 /* 4522 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4523 */ 4524 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4525 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4526 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4527 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4528 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4529 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4530 4531 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4532 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4533 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4534 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4535 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4536 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4537 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4538 4539 /* 4540 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4541 */ 4542 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4543 { 4544 return float16_to_float32(a, true, s); 4545 } 4546 4547 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4548 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4549 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4550 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4551 4552 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4553 /* (TD, T2, TX2) */ 4554 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4555 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4556 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4557 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4558 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4559 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4560 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4561 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4562 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4563 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4564 4565 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4566 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4567 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4568 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4569 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4570 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4571 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4572 4573 /* 4574 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4575 */ 4576 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4577 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4578 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4579 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4580 4581 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4582 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4583 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4584 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4585 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4586 4587 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4588 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4589 { 4590 return float32_to_float16(a, true, s); 4591 } 4592 4593 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4594 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4595 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4596 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4597 4598 /* 4599 * Vector Reduction Operations 4600 */ 4601 /* Vector Single-Width Integer Reduction Instructions */ 4602 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4603 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4604 void *vs2, CPURISCVState *env, \ 4605 uint32_t desc) \ 4606 { \ 4607 uint32_t vm = vext_vm(desc); \ 4608 uint32_t vl = env->vl; \ 4609 uint32_t esz = sizeof(TD); \ 4610 uint32_t vlenb = simd_maxsz(desc); \ 4611 uint32_t vta = vext_vta(desc); \ 4612 uint32_t i; \ 4613 TD s1 = *((TD *)vs1 + HD(0)); \ 4614 \ 4615 for (i = env->vstart; i < vl; i++) { \ 4616 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4617 if (!vm && !vext_elem_mask(v0, i)) { \ 4618 continue; \ 4619 } \ 4620 s1 = OP(s1, (TD)s2); \ 4621 } \ 4622 *((TD *)vd + HD(0)) = s1; \ 4623 env->vstart = 0; \ 4624 /* set tail elements to 1s */ \ 4625 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4626 } 4627 4628 /* vd[0] = sum(vs1[0], vs2[*]) */ 4629 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4630 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4631 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4632 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4633 4634 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4635 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4636 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4637 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4638 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4639 4640 /* vd[0] = max(vs1[0], vs2[*]) */ 4641 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4642 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4643 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4644 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4645 4646 /* vd[0] = minu(vs1[0], vs2[*]) */ 4647 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4648 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4649 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4650 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4651 4652 /* vd[0] = min(vs1[0], vs2[*]) */ 4653 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4654 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4655 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4656 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4657 4658 /* vd[0] = and(vs1[0], vs2[*]) */ 4659 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4660 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4661 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4662 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4663 4664 /* vd[0] = or(vs1[0], vs2[*]) */ 4665 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4666 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4667 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4668 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4669 4670 /* vd[0] = xor(vs1[0], vs2[*]) */ 4671 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4672 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4673 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4674 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4675 4676 /* Vector Widening Integer Reduction Instructions */ 4677 /* signed sum reduction into double-width accumulator */ 4678 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4679 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4680 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4681 4682 /* Unsigned sum reduction into double-width accumulator */ 4683 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4684 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4685 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4686 4687 /* Vector Single-Width Floating-Point Reduction Instructions */ 4688 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4689 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4690 void *vs2, CPURISCVState *env, \ 4691 uint32_t desc) \ 4692 { \ 4693 uint32_t vm = vext_vm(desc); \ 4694 uint32_t vl = env->vl; \ 4695 uint32_t esz = sizeof(TD); \ 4696 uint32_t vlenb = simd_maxsz(desc); \ 4697 uint32_t vta = vext_vta(desc); \ 4698 uint32_t i; \ 4699 TD s1 = *((TD *)vs1 + HD(0)); \ 4700 \ 4701 for (i = env->vstart; i < vl; i++) { \ 4702 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4703 if (!vm && !vext_elem_mask(v0, i)) { \ 4704 continue; \ 4705 } \ 4706 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4707 } \ 4708 *((TD *)vd + HD(0)) = s1; \ 4709 env->vstart = 0; \ 4710 /* set tail elements to 1s */ \ 4711 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4712 } 4713 4714 /* Unordered sum */ 4715 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4716 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4717 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4718 4719 /* Ordered sum */ 4720 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4721 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4722 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4723 4724 /* Maximum value */ 4725 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4726 float16_maximum_number) 4727 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4728 float32_maximum_number) 4729 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4730 float64_maximum_number) 4731 4732 /* Minimum value */ 4733 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4734 float16_minimum_number) 4735 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4736 float32_minimum_number) 4737 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4738 float64_minimum_number) 4739 4740 /* Vector Widening Floating-Point Add Instructions */ 4741 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4742 { 4743 return float32_add(a, float16_to_float32(b, true, s), s); 4744 } 4745 4746 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4747 { 4748 return float64_add(a, float32_to_float64(b, s), s); 4749 } 4750 4751 /* Vector Widening Floating-Point Reduction Instructions */ 4752 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4753 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4754 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4755 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4756 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4757 4758 /* 4759 * Vector Mask Operations 4760 */ 4761 /* Vector Mask-Register Logical Instructions */ 4762 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4763 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4764 void *vs2, CPURISCVState *env, \ 4765 uint32_t desc) \ 4766 { \ 4767 uint32_t vl = env->vl; \ 4768 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4769 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4770 uint32_t i; \ 4771 int a, b; \ 4772 \ 4773 for (i = env->vstart; i < vl; i++) { \ 4774 a = vext_elem_mask(vs1, i); \ 4775 b = vext_elem_mask(vs2, i); \ 4776 vext_set_elem_mask(vd, i, OP(b, a)); \ 4777 } \ 4778 env->vstart = 0; \ 4779 /* 4780 * mask destination register are always tail-agnostic 4781 * set tail elements to 1s 4782 */ \ 4783 if (vta_all_1s) { \ 4784 for (; i < total_elems; i++) { \ 4785 vext_set_elem_mask(vd, i, 1); \ 4786 } \ 4787 } \ 4788 } 4789 4790 #define DO_NAND(N, M) (!(N & M)) 4791 #define DO_ANDNOT(N, M) (N & !M) 4792 #define DO_NOR(N, M) (!(N | M)) 4793 #define DO_ORNOT(N, M) (N | !M) 4794 #define DO_XNOR(N, M) (!(N ^ M)) 4795 4796 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4797 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4798 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4799 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4800 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4801 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4802 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4803 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4804 4805 /* Vector count population in mask vcpop */ 4806 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4807 uint32_t desc) 4808 { 4809 target_ulong cnt = 0; 4810 uint32_t vm = vext_vm(desc); 4811 uint32_t vl = env->vl; 4812 int i; 4813 4814 for (i = env->vstart; i < vl; i++) { 4815 if (vm || vext_elem_mask(v0, i)) { 4816 if (vext_elem_mask(vs2, i)) { 4817 cnt++; 4818 } 4819 } 4820 } 4821 env->vstart = 0; 4822 return cnt; 4823 } 4824 4825 /* vfirst find-first-set mask bit */ 4826 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4827 uint32_t desc) 4828 { 4829 uint32_t vm = vext_vm(desc); 4830 uint32_t vl = env->vl; 4831 int i; 4832 4833 for (i = env->vstart; i < vl; i++) { 4834 if (vm || vext_elem_mask(v0, i)) { 4835 if (vext_elem_mask(vs2, i)) { 4836 return i; 4837 } 4838 } 4839 } 4840 env->vstart = 0; 4841 return -1LL; 4842 } 4843 4844 enum set_mask_type { 4845 ONLY_FIRST = 1, 4846 INCLUDE_FIRST, 4847 BEFORE_FIRST, 4848 }; 4849 4850 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4851 uint32_t desc, enum set_mask_type type) 4852 { 4853 uint32_t vm = vext_vm(desc); 4854 uint32_t vl = env->vl; 4855 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4856 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4857 uint32_t vma = vext_vma(desc); 4858 int i; 4859 bool first_mask_bit = false; 4860 4861 for (i = env->vstart; i < vl; i++) { 4862 if (!vm && !vext_elem_mask(v0, i)) { 4863 /* set masked-off elements to 1s */ 4864 if (vma) { 4865 vext_set_elem_mask(vd, i, 1); 4866 } 4867 continue; 4868 } 4869 /* write a zero to all following active elements */ 4870 if (first_mask_bit) { 4871 vext_set_elem_mask(vd, i, 0); 4872 continue; 4873 } 4874 if (vext_elem_mask(vs2, i)) { 4875 first_mask_bit = true; 4876 if (type == BEFORE_FIRST) { 4877 vext_set_elem_mask(vd, i, 0); 4878 } else { 4879 vext_set_elem_mask(vd, i, 1); 4880 } 4881 } else { 4882 if (type == ONLY_FIRST) { 4883 vext_set_elem_mask(vd, i, 0); 4884 } else { 4885 vext_set_elem_mask(vd, i, 1); 4886 } 4887 } 4888 } 4889 env->vstart = 0; 4890 /* 4891 * mask destination register are always tail-agnostic 4892 * set tail elements to 1s 4893 */ 4894 if (vta_all_1s) { 4895 for (; i < total_elems; i++) { 4896 vext_set_elem_mask(vd, i, 1); 4897 } 4898 } 4899 } 4900 4901 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4902 uint32_t desc) 4903 { 4904 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4905 } 4906 4907 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4908 uint32_t desc) 4909 { 4910 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4911 } 4912 4913 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4914 uint32_t desc) 4915 { 4916 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4917 } 4918 4919 /* Vector Iota Instruction */ 4920 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4921 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4922 uint32_t desc) \ 4923 { \ 4924 uint32_t vm = vext_vm(desc); \ 4925 uint32_t vl = env->vl; \ 4926 uint32_t esz = sizeof(ETYPE); \ 4927 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4928 uint32_t vta = vext_vta(desc); \ 4929 uint32_t vma = vext_vma(desc); \ 4930 uint32_t sum = 0; \ 4931 int i; \ 4932 \ 4933 for (i = env->vstart; i < vl; i++) { \ 4934 if (!vm && !vext_elem_mask(v0, i)) { \ 4935 /* set masked-off elements to 1s */ \ 4936 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4937 continue; \ 4938 } \ 4939 *((ETYPE *)vd + H(i)) = sum; \ 4940 if (vext_elem_mask(vs2, i)) { \ 4941 sum++; \ 4942 } \ 4943 } \ 4944 env->vstart = 0; \ 4945 /* set tail elements to 1s */ \ 4946 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4947 } 4948 4949 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4950 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4951 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4952 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4953 4954 /* Vector Element Index Instruction */ 4955 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4956 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4957 { \ 4958 uint32_t vm = vext_vm(desc); \ 4959 uint32_t vl = env->vl; \ 4960 uint32_t esz = sizeof(ETYPE); \ 4961 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4962 uint32_t vta = vext_vta(desc); \ 4963 uint32_t vma = vext_vma(desc); \ 4964 int i; \ 4965 \ 4966 for (i = env->vstart; i < vl; i++) { \ 4967 if (!vm && !vext_elem_mask(v0, i)) { \ 4968 /* set masked-off elements to 1s */ \ 4969 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4970 continue; \ 4971 } \ 4972 *((ETYPE *)vd + H(i)) = i; \ 4973 } \ 4974 env->vstart = 0; \ 4975 /* set tail elements to 1s */ \ 4976 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4977 } 4978 4979 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4980 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4981 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4982 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4983 4984 /* 4985 * Vector Permutation Instructions 4986 */ 4987 4988 /* Vector Slide Instructions */ 4989 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4990 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4991 CPURISCVState *env, uint32_t desc) \ 4992 { \ 4993 uint32_t vm = vext_vm(desc); \ 4994 uint32_t vl = env->vl; \ 4995 uint32_t esz = sizeof(ETYPE); \ 4996 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4997 uint32_t vta = vext_vta(desc); \ 4998 uint32_t vma = vext_vma(desc); \ 4999 target_ulong offset = s1, i_min, i; \ 5000 \ 5001 i_min = MAX(env->vstart, offset); \ 5002 for (i = i_min; i < vl; i++) { \ 5003 if (!vm && !vext_elem_mask(v0, i)) { \ 5004 /* set masked-off elements to 1s */ \ 5005 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5006 continue; \ 5007 } \ 5008 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5009 } \ 5010 /* set tail elements to 1s */ \ 5011 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5012 } 5013 5014 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5015 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5016 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5017 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5018 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5019 5020 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5021 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5022 CPURISCVState *env, uint32_t desc) \ 5023 { \ 5024 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5025 uint32_t vm = vext_vm(desc); \ 5026 uint32_t vl = env->vl; \ 5027 uint32_t esz = sizeof(ETYPE); \ 5028 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5029 uint32_t vta = vext_vta(desc); \ 5030 uint32_t vma = vext_vma(desc); \ 5031 target_ulong i_max, i; \ 5032 \ 5033 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 5034 for (i = env->vstart; i < i_max; ++i) { \ 5035 if (!vm && !vext_elem_mask(v0, i)) { \ 5036 /* set masked-off elements to 1s */ \ 5037 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5038 continue; \ 5039 } \ 5040 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5041 } \ 5042 \ 5043 for (i = i_max; i < vl; ++i) { \ 5044 if (vm || vext_elem_mask(v0, i)) { \ 5045 *((ETYPE *)vd + H(i)) = 0; \ 5046 } \ 5047 } \ 5048 \ 5049 env->vstart = 0; \ 5050 /* set tail elements to 1s */ \ 5051 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5052 } 5053 5054 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5055 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5056 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5057 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5058 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5059 5060 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5061 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5062 void *vs2, CPURISCVState *env, \ 5063 uint32_t desc) \ 5064 { \ 5065 typedef uint##BITWIDTH##_t ETYPE; \ 5066 uint32_t vm = vext_vm(desc); \ 5067 uint32_t vl = env->vl; \ 5068 uint32_t esz = sizeof(ETYPE); \ 5069 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5070 uint32_t vta = vext_vta(desc); \ 5071 uint32_t vma = vext_vma(desc); \ 5072 uint32_t i; \ 5073 \ 5074 for (i = env->vstart; i < vl; i++) { \ 5075 if (!vm && !vext_elem_mask(v0, i)) { \ 5076 /* set masked-off elements to 1s */ \ 5077 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5078 continue; \ 5079 } \ 5080 if (i == 0) { \ 5081 *((ETYPE *)vd + H(i)) = s1; \ 5082 } else { \ 5083 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5084 } \ 5085 } \ 5086 env->vstart = 0; \ 5087 /* set tail elements to 1s */ \ 5088 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5089 } 5090 5091 GEN_VEXT_VSLIE1UP(8, H1) 5092 GEN_VEXT_VSLIE1UP(16, H2) 5093 GEN_VEXT_VSLIE1UP(32, H4) 5094 GEN_VEXT_VSLIE1UP(64, H8) 5095 5096 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5097 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5098 CPURISCVState *env, uint32_t desc) \ 5099 { \ 5100 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5101 } 5102 5103 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5104 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5105 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5106 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5107 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5108 5109 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5110 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5111 void *vs2, CPURISCVState *env, \ 5112 uint32_t desc) \ 5113 { \ 5114 typedef uint##BITWIDTH##_t ETYPE; \ 5115 uint32_t vm = vext_vm(desc); \ 5116 uint32_t vl = env->vl; \ 5117 uint32_t esz = sizeof(ETYPE); \ 5118 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5119 uint32_t vta = vext_vta(desc); \ 5120 uint32_t vma = vext_vma(desc); \ 5121 uint32_t i; \ 5122 \ 5123 for (i = env->vstart; i < vl; i++) { \ 5124 if (!vm && !vext_elem_mask(v0, i)) { \ 5125 /* set masked-off elements to 1s */ \ 5126 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5127 continue; \ 5128 } \ 5129 if (i == vl - 1) { \ 5130 *((ETYPE *)vd + H(i)) = s1; \ 5131 } else { \ 5132 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5133 } \ 5134 } \ 5135 env->vstart = 0; \ 5136 /* set tail elements to 1s */ \ 5137 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5138 } 5139 5140 GEN_VEXT_VSLIDE1DOWN(8, H1) 5141 GEN_VEXT_VSLIDE1DOWN(16, H2) 5142 GEN_VEXT_VSLIDE1DOWN(32, H4) 5143 GEN_VEXT_VSLIDE1DOWN(64, H8) 5144 5145 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5146 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5147 CPURISCVState *env, uint32_t desc) \ 5148 { \ 5149 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5150 } 5151 5152 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5153 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5154 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5155 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5156 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5157 5158 /* Vector Floating-Point Slide Instructions */ 5159 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5160 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5161 CPURISCVState *env, uint32_t desc) \ 5162 { \ 5163 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5164 } 5165 5166 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5167 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5168 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5169 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5170 5171 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5172 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5173 CPURISCVState *env, uint32_t desc) \ 5174 { \ 5175 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5176 } 5177 5178 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5179 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5180 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5181 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5182 5183 /* Vector Register Gather Instruction */ 5184 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5185 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5186 CPURISCVState *env, uint32_t desc) \ 5187 { \ 5188 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5189 uint32_t vm = vext_vm(desc); \ 5190 uint32_t vl = env->vl; \ 5191 uint32_t esz = sizeof(TS2); \ 5192 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5193 uint32_t vta = vext_vta(desc); \ 5194 uint32_t vma = vext_vma(desc); \ 5195 uint64_t index; \ 5196 uint32_t i; \ 5197 \ 5198 for (i = env->vstart; i < vl; i++) { \ 5199 if (!vm && !vext_elem_mask(v0, i)) { \ 5200 /* set masked-off elements to 1s */ \ 5201 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5202 continue; \ 5203 } \ 5204 index = *((TS1 *)vs1 + HS1(i)); \ 5205 if (index >= vlmax) { \ 5206 *((TS2 *)vd + HS2(i)) = 0; \ 5207 } else { \ 5208 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5209 } \ 5210 } \ 5211 env->vstart = 0; \ 5212 /* set tail elements to 1s */ \ 5213 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5214 } 5215 5216 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5217 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5218 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5219 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5220 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5221 5222 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5223 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5224 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5225 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5226 5227 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5228 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5229 CPURISCVState *env, uint32_t desc) \ 5230 { \ 5231 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5232 uint32_t vm = vext_vm(desc); \ 5233 uint32_t vl = env->vl; \ 5234 uint32_t esz = sizeof(ETYPE); \ 5235 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5236 uint32_t vta = vext_vta(desc); \ 5237 uint32_t vma = vext_vma(desc); \ 5238 uint64_t index = s1; \ 5239 uint32_t i; \ 5240 \ 5241 for (i = env->vstart; i < vl; i++) { \ 5242 if (!vm && !vext_elem_mask(v0, i)) { \ 5243 /* set masked-off elements to 1s */ \ 5244 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5245 continue; \ 5246 } \ 5247 if (index >= vlmax) { \ 5248 *((ETYPE *)vd + H(i)) = 0; \ 5249 } else { \ 5250 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5251 } \ 5252 } \ 5253 env->vstart = 0; \ 5254 /* set tail elements to 1s */ \ 5255 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5256 } 5257 5258 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5259 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5260 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5261 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5262 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5263 5264 /* Vector Compress Instruction */ 5265 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5266 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5267 CPURISCVState *env, uint32_t desc) \ 5268 { \ 5269 uint32_t vl = env->vl; \ 5270 uint32_t esz = sizeof(ETYPE); \ 5271 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5272 uint32_t vta = vext_vta(desc); \ 5273 uint32_t num = 0, i; \ 5274 \ 5275 for (i = env->vstart; i < vl; i++) { \ 5276 if (!vext_elem_mask(vs1, i)) { \ 5277 continue; \ 5278 } \ 5279 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5280 num++; \ 5281 } \ 5282 env->vstart = 0; \ 5283 /* set tail elements to 1s */ \ 5284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5285 } 5286 5287 /* Compress into vd elements of vs2 where vs1 is enabled */ 5288 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5289 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5290 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5291 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5292 5293 /* Vector Whole Register Move */ 5294 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5295 { 5296 /* EEW = SEW */ 5297 uint32_t maxsz = simd_maxsz(desc); 5298 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5299 uint32_t startb = env->vstart * sewb; 5300 uint32_t i = startb; 5301 5302 memcpy((uint8_t *)vd + H1(i), 5303 (uint8_t *)vs2 + H1(i), 5304 maxsz - startb); 5305 5306 env->vstart = 0; 5307 } 5308 5309 /* Vector Integer Extension */ 5310 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5311 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5312 CPURISCVState *env, uint32_t desc) \ 5313 { \ 5314 uint32_t vl = env->vl; \ 5315 uint32_t vm = vext_vm(desc); \ 5316 uint32_t esz = sizeof(ETYPE); \ 5317 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5318 uint32_t vta = vext_vta(desc); \ 5319 uint32_t vma = vext_vma(desc); \ 5320 uint32_t i; \ 5321 \ 5322 for (i = env->vstart; i < vl; i++) { \ 5323 if (!vm && !vext_elem_mask(v0, i)) { \ 5324 /* set masked-off elements to 1s */ \ 5325 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5326 continue; \ 5327 } \ 5328 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5329 } \ 5330 env->vstart = 0; \ 5331 /* set tail elements to 1s */ \ 5332 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5333 } 5334 5335 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5336 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5337 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5338 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5339 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5340 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5341 5342 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5343 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5344 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5345 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5346 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5347 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5348