1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "accel/tcg/cpu-ldst.h" 26 #include "accel/tcg/probe.h" 27 #include "exec/page-protection.h" 28 #include "exec/helper-proto.h" 29 #include "exec/tlb-flags.h" 30 #include "exec/target_page.h" 31 #include "exec/tswap.h" 32 #include "fpu/softfloat.h" 33 #include "tcg/tcg-gvec-desc.h" 34 #include "internals.h" 35 #include "vector_internals.h" 36 #include <math.h> 37 38 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 39 target_ulong s2) 40 { 41 int vlmax, vl; 42 RISCVCPU *cpu = env_archcpu(env); 43 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 44 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 45 uint16_t sew = 8 << vsew; 46 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 47 int xlen = riscv_cpu_xlen(env); 48 bool vill = (s2 >> (xlen - 1)) & 0x1; 49 target_ulong reserved = s2 & 50 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 51 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 52 uint16_t vlen = cpu->cfg.vlenb << 3; 53 int8_t lmul; 54 55 if (vlmul & 4) { 56 /* 57 * Fractional LMUL, check: 58 * 59 * VLEN * LMUL >= SEW 60 * VLEN >> (8 - lmul) >= sew 61 * (vlenb << 3) >> (8 - lmul) >= sew 62 */ 63 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 64 vill = true; 65 } 66 } 67 68 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 69 /* only set vill bit. */ 70 env->vill = 1; 71 env->vtype = 0; 72 env->vl = 0; 73 env->vstart = 0; 74 return 0; 75 } 76 77 /* lmul encoded as in DisasContext::lmul */ 78 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 79 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 80 if (s1 <= vlmax) { 81 vl = s1; 82 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 83 vl = (s1 + 1) >> 1; 84 } else { 85 vl = vlmax; 86 } 87 env->vl = vl; 88 env->vtype = s2; 89 env->vstart = 0; 90 env->vill = 0; 91 return vl; 92 } 93 94 /* 95 * Get the maximum number of elements can be operated. 96 * 97 * log2_esz: log2 of element size in bytes. 98 */ 99 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 100 { 101 /* 102 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 103 * so vlen in bytes (vlenb) is encoded as maxsz. 104 */ 105 uint32_t vlenb = simd_maxsz(desc); 106 107 /* Return VLMAX */ 108 int scale = vext_lmul(desc) - log2_esz; 109 return scale < 0 ? vlenb >> -scale : vlenb << scale; 110 } 111 112 /* 113 * This function checks watchpoint before real load operation. 114 * 115 * In system mode, the TLB API probe_access is enough for watchpoint check. 116 * In user mode, there is no watchpoint support now. 117 * 118 * It will trigger an exception if there is no mapping in TLB 119 * and page table walk can't fill the TLB entry. Then the guest 120 * software can return here after process the exception or never return. 121 */ 122 static void probe_pages(CPURISCVState *env, target_ulong addr, 123 target_ulong len, uintptr_t ra, 124 MMUAccessType access_type) 125 { 126 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 127 target_ulong curlen = MIN(pagelen, len); 128 int mmu_index = riscv_env_mmu_index(env, false); 129 130 probe_access(env, adjust_addr(env, addr), curlen, access_type, 131 mmu_index, ra); 132 if (len > curlen) { 133 addr += curlen; 134 curlen = len - curlen; 135 probe_access(env, adjust_addr(env, addr), curlen, access_type, 136 mmu_index, ra); 137 } 138 } 139 140 static inline void vext_set_elem_mask(void *v0, int index, 141 uint8_t value) 142 { 143 int idx = index / 64; 144 int pos = index % 64; 145 uint64_t old = ((uint64_t *)v0)[idx]; 146 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 147 } 148 149 /* elements operations for load and store */ 150 typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, 151 uint32_t idx, void *vd, uintptr_t retaddr); 152 typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); 153 154 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 155 static inline QEMU_ALWAYS_INLINE \ 156 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 157 uint32_t idx, void *vd, uintptr_t retaddr) \ 158 { \ 159 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 160 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 161 } \ 162 \ 163 static inline QEMU_ALWAYS_INLINE \ 164 void NAME##_host(void *vd, uint32_t idx, void *host) \ 165 { \ 166 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 167 *cur = (ETYPE)LDSUF##_p(host); \ 168 } 169 170 GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) 171 GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) 172 GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) 173 GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) 174 175 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 176 static inline QEMU_ALWAYS_INLINE \ 177 void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ 178 uint32_t idx, void *vd, uintptr_t retaddr) \ 179 { \ 180 ETYPE data = *((ETYPE *)vd + H(idx)); \ 181 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 182 } \ 183 \ 184 static inline QEMU_ALWAYS_INLINE \ 185 void NAME##_host(void *vd, uint32_t idx, void *host) \ 186 { \ 187 ETYPE data = *((ETYPE *)vd + H(idx)); \ 188 STSUF##_p(host, data); \ 189 } 190 191 GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) 192 GEN_VEXT_ST_ELEM(ste_h, uint16_t, H2, stw) 193 GEN_VEXT_ST_ELEM(ste_w, uint32_t, H4, stl) 194 GEN_VEXT_ST_ELEM(ste_d, uint64_t, H8, stq) 195 196 static inline QEMU_ALWAYS_INLINE void 197 vext_continuous_ldst_tlb(CPURISCVState *env, vext_ldst_elem_fn_tlb *ldst_tlb, 198 void *vd, uint32_t evl, target_ulong addr, 199 uint32_t reg_start, uintptr_t ra, uint32_t esz, 200 bool is_load) 201 { 202 uint32_t i; 203 for (i = env->vstart; i < evl; env->vstart = ++i, addr += esz) { 204 ldst_tlb(env, adjust_addr(env, addr), i, vd, ra); 205 } 206 } 207 208 static inline QEMU_ALWAYS_INLINE void 209 vext_continuous_ldst_host(CPURISCVState *env, vext_ldst_elem_fn_host *ldst_host, 210 void *vd, uint32_t evl, uint32_t reg_start, void *host, 211 uint32_t esz, bool is_load) 212 { 213 #if HOST_BIG_ENDIAN 214 for (; reg_start < evl; reg_start++, host += esz) { 215 ldst_host(vd, reg_start, host); 216 } 217 #else 218 if (esz == 1) { 219 uint32_t byte_offset = reg_start * esz; 220 uint32_t size = (evl - reg_start) * esz; 221 222 if (is_load) { 223 memcpy(vd + byte_offset, host, size); 224 } else { 225 memcpy(host, vd + byte_offset, size); 226 } 227 } else { 228 for (; reg_start < evl; reg_start++, host += esz) { 229 ldst_host(vd, reg_start, host); 230 } 231 } 232 #endif 233 } 234 235 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 236 uint32_t desc, uint32_t nf, 237 uint32_t esz, uint32_t max_elems) 238 { 239 uint32_t vta = vext_vta(desc); 240 int k; 241 242 if (vta == 0) { 243 return; 244 } 245 246 for (k = 0; k < nf; ++k) { 247 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 248 (k * max_elems + max_elems) * esz); 249 } 250 } 251 252 /* 253 * stride: access vector element from strided memory 254 */ 255 static void 256 vext_ldst_stride(void *vd, void *v0, target_ulong base, target_ulong stride, 257 CPURISCVState *env, uint32_t desc, uint32_t vm, 258 vext_ldst_elem_fn_tlb *ldst_elem, uint32_t log2_esz, 259 uintptr_t ra) 260 { 261 uint32_t i, k; 262 uint32_t nf = vext_nf(desc); 263 uint32_t max_elems = vext_max_elems(desc, log2_esz); 264 uint32_t esz = 1 << log2_esz; 265 uint32_t vma = vext_vma(desc); 266 267 VSTART_CHECK_EARLY_EXIT(env, env->vl); 268 269 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 270 k = 0; 271 while (k < nf) { 272 if (!vm && !vext_elem_mask(v0, i)) { 273 /* set masked-off elements to 1s */ 274 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 275 (i + k * max_elems + 1) * esz); 276 k++; 277 continue; 278 } 279 target_ulong addr = base + stride * i + (k << log2_esz); 280 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 281 k++; 282 } 283 } 284 env->vstart = 0; 285 286 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 287 } 288 289 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 290 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 291 target_ulong stride, CPURISCVState *env, \ 292 uint32_t desc) \ 293 { \ 294 uint32_t vm = vext_vm(desc); \ 295 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 296 ctzl(sizeof(ETYPE)), GETPC()); \ 297 } 298 299 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b_tlb) 300 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h_tlb) 301 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w_tlb) 302 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d_tlb) 303 304 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 305 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 306 target_ulong stride, CPURISCVState *env, \ 307 uint32_t desc) \ 308 { \ 309 uint32_t vm = vext_vm(desc); \ 310 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 311 ctzl(sizeof(ETYPE)), GETPC()); \ 312 } 313 314 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b_tlb) 315 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h_tlb) 316 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w_tlb) 317 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) 318 319 /* 320 * unit-stride: access elements stored contiguously in memory 321 */ 322 323 /* unmasked unit-stride load and store operation */ 324 static inline QEMU_ALWAYS_INLINE void 325 vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, 326 uint32_t elems, uint32_t nf, uint32_t max_elems, 327 uint32_t log2_esz, bool is_load, int mmu_index, 328 vext_ldst_elem_fn_tlb *ldst_tlb, 329 vext_ldst_elem_fn_host *ldst_host, uintptr_t ra) 330 { 331 void *host; 332 int i, k, flags; 333 uint32_t esz = 1 << log2_esz; 334 uint32_t size = (elems * nf) << log2_esz; 335 uint32_t evl = env->vstart + elems; 336 MMUAccessType access_type = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; 337 338 /* Check page permission/pmp/watchpoint/etc. */ 339 flags = probe_access_flags(env, adjust_addr(env, addr), size, access_type, 340 mmu_index, true, &host, ra); 341 342 if (flags == 0) { 343 if (nf == 1) { 344 vext_continuous_ldst_host(env, ldst_host, vd, evl, env->vstart, 345 host, esz, is_load); 346 } else { 347 for (i = env->vstart; i < evl; ++i) { 348 k = 0; 349 while (k < nf) { 350 ldst_host(vd, i + k * max_elems, host); 351 host += esz; 352 k++; 353 } 354 } 355 } 356 env->vstart += elems; 357 } else { 358 if (nf == 1) { 359 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, 360 ra, esz, is_load); 361 } else { 362 /* load bytes from guest memory */ 363 for (i = env->vstart; i < evl; env->vstart = ++i) { 364 k = 0; 365 while (k < nf) { 366 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 367 vd, ra); 368 addr += esz; 369 k++; 370 } 371 } 372 } 373 } 374 } 375 376 static inline QEMU_ALWAYS_INLINE void 377 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 378 vext_ldst_elem_fn_tlb *ldst_tlb, 379 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 380 uint32_t evl, uintptr_t ra, bool is_load) 381 { 382 uint32_t k; 383 target_ulong page_split, elems, addr; 384 uint32_t nf = vext_nf(desc); 385 uint32_t max_elems = vext_max_elems(desc, log2_esz); 386 uint32_t esz = 1 << log2_esz; 387 uint32_t msize = nf * esz; 388 int mmu_index = riscv_env_mmu_index(env, false); 389 390 VSTART_CHECK_EARLY_EXIT(env, evl); 391 392 #if defined(CONFIG_USER_ONLY) 393 /* 394 * For data sizes <= 6 bytes we get better performance by simply calling 395 * vext_continuous_ldst_tlb 396 */ 397 if (nf == 1 && (evl << log2_esz) <= 6) { 398 addr = base + (env->vstart << log2_esz); 399 vext_continuous_ldst_tlb(env, ldst_tlb, vd, evl, addr, env->vstart, ra, 400 esz, is_load); 401 402 env->vstart = 0; 403 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 404 return; 405 } 406 #endif 407 408 /* Calculate the page range of first page */ 409 addr = base + ((env->vstart * nf) << log2_esz); 410 page_split = -(addr | TARGET_PAGE_MASK); 411 /* Get number of elements */ 412 elems = page_split / msize; 413 if (unlikely(env->vstart + elems >= evl)) { 414 elems = evl - env->vstart; 415 } 416 417 /* Load/store elements in the first page */ 418 if (likely(elems)) { 419 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 420 is_load, mmu_index, ldst_tlb, ldst_host, ra); 421 } 422 423 /* Load/store elements in the second page */ 424 if (unlikely(env->vstart < evl)) { 425 /* Cross page element */ 426 if (unlikely(page_split % msize)) { 427 for (k = 0; k < nf; k++) { 428 addr = base + ((env->vstart * nf + k) << log2_esz); 429 ldst_tlb(env, adjust_addr(env, addr), 430 env->vstart + k * max_elems, vd, ra); 431 } 432 env->vstart++; 433 } 434 435 addr = base + ((env->vstart * nf) << log2_esz); 436 /* Get number of elements of second page */ 437 elems = evl - env->vstart; 438 439 /* Load/store elements in the second page */ 440 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, log2_esz, 441 is_load, mmu_index, ldst_tlb, ldst_host, ra); 442 } 443 444 env->vstart = 0; 445 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 446 } 447 448 /* 449 * masked unit-stride load and store operation will be a special case of 450 * stride, stride = NF * sizeof (ETYPE) 451 */ 452 453 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 454 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 455 CPURISCVState *env, uint32_t desc) \ 456 { \ 457 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 458 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 459 LOAD_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 460 } \ 461 \ 462 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 463 CPURISCVState *env, uint32_t desc) \ 464 { \ 465 vext_ldst_us(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 466 ctzl(sizeof(ETYPE)), env->vl, GETPC(), true); \ 467 } 468 469 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b_tlb, lde_b_host) 470 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h_tlb, lde_h_host) 471 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w_tlb, lde_w_host) 472 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d_tlb, lde_d_host) 473 474 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 475 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 476 CPURISCVState *env, uint32_t desc) \ 477 { \ 478 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 479 vext_ldst_stride(vd, v0, base, stride, env, desc, false, \ 480 STORE_FN_TLB, ctzl(sizeof(ETYPE)), GETPC()); \ 481 } \ 482 \ 483 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 484 CPURISCVState *env, uint32_t desc) \ 485 { \ 486 vext_ldst_us(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 487 ctzl(sizeof(ETYPE)), env->vl, GETPC(), false); \ 488 } 489 490 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b_tlb, ste_b_host) 491 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h_tlb, ste_h_host) 492 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w_tlb, ste_w_host) 493 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d_tlb, ste_d_host) 494 495 /* 496 * unit stride mask load and store, EEW = 1 497 */ 498 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 499 CPURISCVState *env, uint32_t desc) 500 { 501 /* evl = ceil(vl/8) */ 502 uint8_t evl = (env->vl + 7) >> 3; 503 vext_ldst_us(vd, base, env, desc, lde_b_tlb, lde_b_host, 504 0, evl, GETPC(), true); 505 } 506 507 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 508 CPURISCVState *env, uint32_t desc) 509 { 510 /* evl = ceil(vl/8) */ 511 uint8_t evl = (env->vl + 7) >> 3; 512 vext_ldst_us(vd, base, env, desc, ste_b_tlb, ste_b_host, 513 0, evl, GETPC(), false); 514 } 515 516 /* 517 * index: access vector element from indexed memory 518 */ 519 typedef target_ulong vext_get_index_addr(target_ulong base, 520 uint32_t idx, void *vs2); 521 522 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 523 static target_ulong NAME(target_ulong base, \ 524 uint32_t idx, void *vs2) \ 525 { \ 526 return (base + *((ETYPE *)vs2 + H(idx))); \ 527 } 528 529 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 530 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 531 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 532 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 533 534 static inline void 535 vext_ldst_index(void *vd, void *v0, target_ulong base, 536 void *vs2, CPURISCVState *env, uint32_t desc, 537 vext_get_index_addr get_index_addr, 538 vext_ldst_elem_fn_tlb *ldst_elem, 539 uint32_t log2_esz, uintptr_t ra) 540 { 541 uint32_t i, k; 542 uint32_t nf = vext_nf(desc); 543 uint32_t vm = vext_vm(desc); 544 uint32_t max_elems = vext_max_elems(desc, log2_esz); 545 uint32_t esz = 1 << log2_esz; 546 uint32_t vma = vext_vma(desc); 547 548 VSTART_CHECK_EARLY_EXIT(env, env->vl); 549 550 /* load bytes from guest memory */ 551 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 552 k = 0; 553 while (k < nf) { 554 if (!vm && !vext_elem_mask(v0, i)) { 555 /* set masked-off elements to 1s */ 556 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 557 (i + k * max_elems + 1) * esz); 558 k++; 559 continue; 560 } 561 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 562 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 563 k++; 564 } 565 } 566 env->vstart = 0; 567 568 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 569 } 570 571 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 572 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 573 void *vs2, CPURISCVState *env, uint32_t desc) \ 574 { \ 575 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 576 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 577 } 578 579 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b_tlb) 580 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h_tlb) 581 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w_tlb) 582 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d_tlb) 583 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b_tlb) 584 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h_tlb) 585 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w_tlb) 586 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d_tlb) 587 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b_tlb) 588 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h_tlb) 589 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w_tlb) 590 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d_tlb) 591 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b_tlb) 592 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h_tlb) 593 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w_tlb) 594 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d_tlb) 595 596 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 597 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 598 void *vs2, CPURISCVState *env, uint32_t desc) \ 599 { \ 600 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 601 STORE_FN, ctzl(sizeof(ETYPE)), \ 602 GETPC()); \ 603 } 604 605 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b_tlb) 606 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h_tlb) 607 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w_tlb) 608 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d_tlb) 609 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b_tlb) 610 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h_tlb) 611 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w_tlb) 612 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d_tlb) 613 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b_tlb) 614 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h_tlb) 615 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w_tlb) 616 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d_tlb) 617 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b_tlb) 618 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h_tlb) 619 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w_tlb) 620 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d_tlb) 621 622 /* 623 * unit-stride fault-only-fisrt load instructions 624 */ 625 static inline void 626 vext_ldff(void *vd, void *v0, target_ulong base, CPURISCVState *env, 627 uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, 628 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, uintptr_t ra) 629 { 630 uint32_t i, k, vl = 0; 631 uint32_t nf = vext_nf(desc); 632 uint32_t vm = vext_vm(desc); 633 uint32_t max_elems = vext_max_elems(desc, log2_esz); 634 uint32_t esz = 1 << log2_esz; 635 uint32_t msize = nf * esz; 636 uint32_t vma = vext_vma(desc); 637 target_ulong addr, addr_probe, addr_i, offset, remain, page_split, elems; 638 int mmu_index = riscv_env_mmu_index(env, false); 639 int flags; 640 void *host; 641 642 VSTART_CHECK_EARLY_EXIT(env, env->vl); 643 644 addr = base + ((env->vstart * nf) << log2_esz); 645 page_split = -(addr | TARGET_PAGE_MASK); 646 /* Get number of elements */ 647 elems = page_split / msize; 648 if (unlikely(env->vstart + elems >= env->vl)) { 649 elems = env->vl - env->vstart; 650 } 651 652 /* Check page permission/pmp/watchpoint/etc. */ 653 flags = probe_access_flags(env, adjust_addr(env, addr), elems * msize, 654 MMU_DATA_LOAD, mmu_index, true, &host, ra); 655 656 /* If we are crossing a page check also the second page. */ 657 if (env->vl > elems) { 658 addr_probe = addr + (elems << log2_esz); 659 flags |= probe_access_flags(env, adjust_addr(env, addr_probe), 660 elems * msize, MMU_DATA_LOAD, mmu_index, 661 true, &host, ra); 662 } 663 664 if (flags & ~TLB_WATCHPOINT) { 665 /* probe every access */ 666 for (i = env->vstart; i < env->vl; i++) { 667 if (!vm && !vext_elem_mask(v0, i)) { 668 continue; 669 } 670 addr_i = adjust_addr(env, base + i * (nf << log2_esz)); 671 if (i == 0) { 672 /* Allow fault on first element. */ 673 probe_pages(env, addr_i, nf << log2_esz, ra, MMU_DATA_LOAD); 674 } else { 675 remain = nf << log2_esz; 676 while (remain > 0) { 677 offset = -(addr_i | TARGET_PAGE_MASK); 678 679 /* Probe nonfault on subsequent elements. */ 680 flags = probe_access_flags(env, addr_i, offset, 681 MMU_DATA_LOAD, mmu_index, true, 682 &host, 0); 683 684 /* 685 * Stop if invalid (unmapped) or mmio (transaction may 686 * fail). Do not stop if watchpoint, as the spec says that 687 * first-fault should continue to access the same 688 * elements regardless of any watchpoint. 689 */ 690 if (flags & ~TLB_WATCHPOINT) { 691 vl = i; 692 goto ProbeSuccess; 693 } 694 if (remain <= offset) { 695 break; 696 } 697 remain -= offset; 698 addr_i = adjust_addr(env, addr_i + offset); 699 } 700 } 701 } 702 } 703 ProbeSuccess: 704 /* load bytes from guest memory */ 705 if (vl != 0) { 706 env->vl = vl; 707 } 708 709 if (env->vstart < env->vl) { 710 if (vm) { 711 /* Load/store elements in the first page */ 712 if (likely(elems)) { 713 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 714 log2_esz, true, mmu_index, ldst_tlb, 715 ldst_host, ra); 716 } 717 718 /* Load/store elements in the second page */ 719 if (unlikely(env->vstart < env->vl)) { 720 /* Cross page element */ 721 if (unlikely(page_split % msize)) { 722 for (k = 0; k < nf; k++) { 723 addr = base + ((env->vstart * nf + k) << log2_esz); 724 ldst_tlb(env, adjust_addr(env, addr), 725 env->vstart + k * max_elems, vd, ra); 726 } 727 env->vstart++; 728 } 729 730 addr = base + ((env->vstart * nf) << log2_esz); 731 /* Get number of elements of second page */ 732 elems = env->vl - env->vstart; 733 734 /* Load/store elements in the second page */ 735 vext_page_ldst_us(env, vd, addr, elems, nf, max_elems, 736 log2_esz, true, mmu_index, ldst_tlb, 737 ldst_host, ra); 738 } 739 } else { 740 for (i = env->vstart; i < env->vl; i++) { 741 k = 0; 742 while (k < nf) { 743 if (!vext_elem_mask(v0, i)) { 744 /* set masked-off elements to 1s */ 745 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 746 (i + k * max_elems + 1) * esz); 747 k++; 748 continue; 749 } 750 addr = base + ((i * nf + k) << log2_esz); 751 ldst_tlb(env, adjust_addr(env, addr), i + k * max_elems, 752 vd, ra); 753 k++; 754 } 755 } 756 } 757 } 758 env->vstart = 0; 759 760 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 761 } 762 763 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 764 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 765 CPURISCVState *env, uint32_t desc) \ 766 { \ 767 vext_ldff(vd, v0, base, env, desc, LOAD_FN_TLB, \ 768 LOAD_FN_HOST, ctzl(sizeof(ETYPE)), GETPC()); \ 769 } 770 771 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b_tlb, lde_b_host) 772 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h_tlb, lde_h_host) 773 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w_tlb, lde_w_host) 774 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) 775 776 #define DO_SWAP(N, M) (M) 777 #define DO_AND(N, M) (N & M) 778 #define DO_XOR(N, M) (N ^ M) 779 #define DO_OR(N, M) (N | M) 780 #define DO_ADD(N, M) (N + M) 781 782 /* Signed min/max */ 783 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 784 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 785 786 /* 787 * load and store whole register instructions 788 */ 789 static inline QEMU_ALWAYS_INLINE void 790 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 791 vext_ldst_elem_fn_tlb *ldst_tlb, 792 vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, 793 uintptr_t ra, bool is_load) 794 { 795 target_ulong page_split, elems, addr; 796 uint32_t nf = vext_nf(desc); 797 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 798 uint32_t max_elems = vlenb >> log2_esz; 799 uint32_t evl = nf * max_elems; 800 uint32_t esz = 1 << log2_esz; 801 int mmu_index = riscv_env_mmu_index(env, false); 802 803 /* Calculate the page range of first page */ 804 addr = base + (env->vstart << log2_esz); 805 page_split = -(addr | TARGET_PAGE_MASK); 806 /* Get number of elements */ 807 elems = page_split / esz; 808 if (unlikely(env->vstart + elems >= evl)) { 809 elems = evl - env->vstart; 810 } 811 812 /* Load/store elements in the first page */ 813 if (likely(elems)) { 814 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 815 is_load, mmu_index, ldst_tlb, ldst_host, ra); 816 } 817 818 /* Load/store elements in the second page */ 819 if (unlikely(env->vstart < evl)) { 820 /* Cross page element */ 821 if (unlikely(page_split % esz)) { 822 addr = base + (env->vstart << log2_esz); 823 ldst_tlb(env, adjust_addr(env, addr), env->vstart, vd, ra); 824 env->vstart++; 825 } 826 827 addr = base + (env->vstart << log2_esz); 828 /* Get number of elements of second page */ 829 elems = evl - env->vstart; 830 831 /* Load/store elements in the second page */ 832 vext_page_ldst_us(env, vd, addr, elems, 1, max_elems, log2_esz, 833 is_load, mmu_index, ldst_tlb, ldst_host, ra); 834 } 835 836 env->vstart = 0; 837 } 838 839 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN_TLB, LOAD_FN_HOST) \ 840 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 841 uint32_t desc) \ 842 { \ 843 vext_ldst_whole(vd, base, env, desc, LOAD_FN_TLB, LOAD_FN_HOST, \ 844 ctzl(sizeof(ETYPE)), GETPC(), true); \ 845 } 846 847 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b_tlb, lde_b_host) 848 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h_tlb, lde_h_host) 849 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w_tlb, lde_w_host) 850 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d_tlb, lde_d_host) 851 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b_tlb, lde_b_host) 852 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h_tlb, lde_h_host) 853 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w_tlb, lde_w_host) 854 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d_tlb, lde_d_host) 855 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b_tlb, lde_b_host) 856 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h_tlb, lde_h_host) 857 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w_tlb, lde_w_host) 858 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d_tlb, lde_d_host) 859 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b_tlb, lde_b_host) 860 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h_tlb, lde_h_host) 861 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w_tlb, lde_w_host) 862 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d_tlb, lde_d_host) 863 864 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN_TLB, STORE_FN_HOST) \ 865 void HELPER(NAME)(void *vd, target_ulong base, CPURISCVState *env, \ 866 uint32_t desc) \ 867 { \ 868 vext_ldst_whole(vd, base, env, desc, STORE_FN_TLB, STORE_FN_HOST, \ 869 ctzl(sizeof(ETYPE)), GETPC(), false); \ 870 } 871 872 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b_tlb, ste_b_host) 873 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b_tlb, ste_b_host) 874 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b_tlb, ste_b_host) 875 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b_tlb, ste_b_host) 876 877 /* 878 * Vector Integer Arithmetic Instructions 879 */ 880 881 /* (TD, T1, T2, TX1, TX2) */ 882 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 883 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 884 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 885 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 886 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 887 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 888 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 889 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 890 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 891 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 892 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 893 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 894 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 895 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 896 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 897 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 898 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 899 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 900 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 901 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 902 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 903 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 904 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 905 906 #define DO_SUB(N, M) (N - M) 907 #define DO_RSUB(N, M) (M - N) 908 909 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 910 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 911 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 912 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 913 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 914 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 915 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 916 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 917 918 GEN_VEXT_VV(vadd_vv_b, 1) 919 GEN_VEXT_VV(vadd_vv_h, 2) 920 GEN_VEXT_VV(vadd_vv_w, 4) 921 GEN_VEXT_VV(vadd_vv_d, 8) 922 GEN_VEXT_VV(vsub_vv_b, 1) 923 GEN_VEXT_VV(vsub_vv_h, 2) 924 GEN_VEXT_VV(vsub_vv_w, 4) 925 GEN_VEXT_VV(vsub_vv_d, 8) 926 927 928 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 929 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 930 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 931 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 932 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 933 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 934 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 935 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 936 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 937 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 938 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 939 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 940 941 GEN_VEXT_VX(vadd_vx_b, 1) 942 GEN_VEXT_VX(vadd_vx_h, 2) 943 GEN_VEXT_VX(vadd_vx_w, 4) 944 GEN_VEXT_VX(vadd_vx_d, 8) 945 GEN_VEXT_VX(vsub_vx_b, 1) 946 GEN_VEXT_VX(vsub_vx_h, 2) 947 GEN_VEXT_VX(vsub_vx_w, 4) 948 GEN_VEXT_VX(vsub_vx_d, 8) 949 GEN_VEXT_VX(vrsub_vx_b, 1) 950 GEN_VEXT_VX(vrsub_vx_h, 2) 951 GEN_VEXT_VX(vrsub_vx_w, 4) 952 GEN_VEXT_VX(vrsub_vx_d, 8) 953 954 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 955 { 956 intptr_t oprsz = simd_oprsz(desc); 957 intptr_t i; 958 959 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 960 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 961 } 962 } 963 964 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 965 { 966 intptr_t oprsz = simd_oprsz(desc); 967 intptr_t i; 968 969 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 970 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 971 } 972 } 973 974 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 975 { 976 intptr_t oprsz = simd_oprsz(desc); 977 intptr_t i; 978 979 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 980 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 981 } 982 } 983 984 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 985 { 986 intptr_t oprsz = simd_oprsz(desc); 987 intptr_t i; 988 989 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 990 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 991 } 992 } 993 994 /* Vector Widening Integer Add/Subtract */ 995 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 996 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 997 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 998 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 999 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 1000 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 1001 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 1002 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1003 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1004 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1005 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1006 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1007 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1008 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1009 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1010 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1011 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1012 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1013 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1014 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1015 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1016 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1017 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1018 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1019 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1020 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1021 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1022 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1023 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1024 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1025 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1026 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1027 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1028 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1029 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1030 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1031 GEN_VEXT_VV(vwaddu_vv_b, 2) 1032 GEN_VEXT_VV(vwaddu_vv_h, 4) 1033 GEN_VEXT_VV(vwaddu_vv_w, 8) 1034 GEN_VEXT_VV(vwsubu_vv_b, 2) 1035 GEN_VEXT_VV(vwsubu_vv_h, 4) 1036 GEN_VEXT_VV(vwsubu_vv_w, 8) 1037 GEN_VEXT_VV(vwadd_vv_b, 2) 1038 GEN_VEXT_VV(vwadd_vv_h, 4) 1039 GEN_VEXT_VV(vwadd_vv_w, 8) 1040 GEN_VEXT_VV(vwsub_vv_b, 2) 1041 GEN_VEXT_VV(vwsub_vv_h, 4) 1042 GEN_VEXT_VV(vwsub_vv_w, 8) 1043 GEN_VEXT_VV(vwaddu_wv_b, 2) 1044 GEN_VEXT_VV(vwaddu_wv_h, 4) 1045 GEN_VEXT_VV(vwaddu_wv_w, 8) 1046 GEN_VEXT_VV(vwsubu_wv_b, 2) 1047 GEN_VEXT_VV(vwsubu_wv_h, 4) 1048 GEN_VEXT_VV(vwsubu_wv_w, 8) 1049 GEN_VEXT_VV(vwadd_wv_b, 2) 1050 GEN_VEXT_VV(vwadd_wv_h, 4) 1051 GEN_VEXT_VV(vwadd_wv_w, 8) 1052 GEN_VEXT_VV(vwsub_wv_b, 2) 1053 GEN_VEXT_VV(vwsub_wv_h, 4) 1054 GEN_VEXT_VV(vwsub_wv_w, 8) 1055 1056 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1057 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1058 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1059 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1060 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1061 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1062 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1063 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1064 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1065 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1066 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1067 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1068 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1069 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1070 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1071 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1072 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1073 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1074 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1075 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1076 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1077 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1078 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1079 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1080 GEN_VEXT_VX(vwaddu_vx_b, 2) 1081 GEN_VEXT_VX(vwaddu_vx_h, 4) 1082 GEN_VEXT_VX(vwaddu_vx_w, 8) 1083 GEN_VEXT_VX(vwsubu_vx_b, 2) 1084 GEN_VEXT_VX(vwsubu_vx_h, 4) 1085 GEN_VEXT_VX(vwsubu_vx_w, 8) 1086 GEN_VEXT_VX(vwadd_vx_b, 2) 1087 GEN_VEXT_VX(vwadd_vx_h, 4) 1088 GEN_VEXT_VX(vwadd_vx_w, 8) 1089 GEN_VEXT_VX(vwsub_vx_b, 2) 1090 GEN_VEXT_VX(vwsub_vx_h, 4) 1091 GEN_VEXT_VX(vwsub_vx_w, 8) 1092 GEN_VEXT_VX(vwaddu_wx_b, 2) 1093 GEN_VEXT_VX(vwaddu_wx_h, 4) 1094 GEN_VEXT_VX(vwaddu_wx_w, 8) 1095 GEN_VEXT_VX(vwsubu_wx_b, 2) 1096 GEN_VEXT_VX(vwsubu_wx_h, 4) 1097 GEN_VEXT_VX(vwsubu_wx_w, 8) 1098 GEN_VEXT_VX(vwadd_wx_b, 2) 1099 GEN_VEXT_VX(vwadd_wx_h, 4) 1100 GEN_VEXT_VX(vwadd_wx_w, 8) 1101 GEN_VEXT_VX(vwsub_wx_b, 2) 1102 GEN_VEXT_VX(vwsub_wx_h, 4) 1103 GEN_VEXT_VX(vwsub_wx_w, 8) 1104 1105 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1106 #define DO_VADC(N, M, C) (N + M + C) 1107 #define DO_VSBC(N, M, C) (N - M - C) 1108 1109 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1110 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1111 CPURISCVState *env, uint32_t desc) \ 1112 { \ 1113 uint32_t vl = env->vl; \ 1114 uint32_t esz = sizeof(ETYPE); \ 1115 uint32_t total_elems = \ 1116 vext_get_total_elems(env, desc, esz); \ 1117 uint32_t vta = vext_vta(desc); \ 1118 uint32_t i; \ 1119 \ 1120 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1121 \ 1122 for (i = env->vstart; i < vl; i++) { \ 1123 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1124 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1125 ETYPE carry = vext_elem_mask(v0, i); \ 1126 \ 1127 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1128 } \ 1129 env->vstart = 0; \ 1130 /* set tail elements to 1s */ \ 1131 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1132 } 1133 1134 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1135 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1136 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1137 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1138 1139 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1140 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1141 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1142 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1143 1144 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1145 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1146 CPURISCVState *env, uint32_t desc) \ 1147 { \ 1148 uint32_t vl = env->vl; \ 1149 uint32_t esz = sizeof(ETYPE); \ 1150 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1151 uint32_t vta = vext_vta(desc); \ 1152 uint32_t i; \ 1153 \ 1154 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1155 \ 1156 for (i = env->vstart; i < vl; i++) { \ 1157 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1158 ETYPE carry = vext_elem_mask(v0, i); \ 1159 \ 1160 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1161 } \ 1162 env->vstart = 0; \ 1163 /* set tail elements to 1s */ \ 1164 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1165 } 1166 1167 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1168 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1169 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1170 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1171 1172 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1173 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1174 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1175 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1176 1177 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1178 (__typeof(N))(N + M) < N) 1179 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1180 1181 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1182 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1183 CPURISCVState *env, uint32_t desc) \ 1184 { \ 1185 uint32_t vl = env->vl; \ 1186 uint32_t vm = vext_vm(desc); \ 1187 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1188 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1189 uint32_t i; \ 1190 \ 1191 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1192 \ 1193 for (i = env->vstart; i < vl; i++) { \ 1194 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1195 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1196 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1197 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1198 } \ 1199 env->vstart = 0; \ 1200 /* 1201 * mask destination register are always tail-agnostic 1202 * set tail elements to 1s 1203 */ \ 1204 if (vta_all_1s) { \ 1205 for (; i < total_elems; i++) { \ 1206 vext_set_elem_mask(vd, i, 1); \ 1207 } \ 1208 } \ 1209 } 1210 1211 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1212 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1213 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1214 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1215 1216 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1217 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1218 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1219 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1220 1221 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1222 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1223 void *vs2, CPURISCVState *env, uint32_t desc) \ 1224 { \ 1225 uint32_t vl = env->vl; \ 1226 uint32_t vm = vext_vm(desc); \ 1227 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1228 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1229 uint32_t i; \ 1230 \ 1231 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1232 \ 1233 for (i = env->vstart; i < vl; i++) { \ 1234 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1235 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1236 vext_set_elem_mask(vd, i, \ 1237 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1238 } \ 1239 env->vstart = 0; \ 1240 /* 1241 * mask destination register are always tail-agnostic 1242 * set tail elements to 1s 1243 */ \ 1244 if (vta_all_1s) { \ 1245 for (; i < total_elems; i++) { \ 1246 vext_set_elem_mask(vd, i, 1); \ 1247 } \ 1248 } \ 1249 } 1250 1251 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1252 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1253 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1254 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1255 1256 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1257 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1258 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1259 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1260 1261 /* Vector Bitwise Logical Instructions */ 1262 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1263 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1264 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1265 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1266 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1267 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1268 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1269 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1270 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1271 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1272 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1273 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1274 GEN_VEXT_VV(vand_vv_b, 1) 1275 GEN_VEXT_VV(vand_vv_h, 2) 1276 GEN_VEXT_VV(vand_vv_w, 4) 1277 GEN_VEXT_VV(vand_vv_d, 8) 1278 GEN_VEXT_VV(vor_vv_b, 1) 1279 GEN_VEXT_VV(vor_vv_h, 2) 1280 GEN_VEXT_VV(vor_vv_w, 4) 1281 GEN_VEXT_VV(vor_vv_d, 8) 1282 GEN_VEXT_VV(vxor_vv_b, 1) 1283 GEN_VEXT_VV(vxor_vv_h, 2) 1284 GEN_VEXT_VV(vxor_vv_w, 4) 1285 GEN_VEXT_VV(vxor_vv_d, 8) 1286 1287 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1288 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1289 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1290 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1291 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1292 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1293 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1294 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1295 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1296 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1297 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1298 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1299 GEN_VEXT_VX(vand_vx_b, 1) 1300 GEN_VEXT_VX(vand_vx_h, 2) 1301 GEN_VEXT_VX(vand_vx_w, 4) 1302 GEN_VEXT_VX(vand_vx_d, 8) 1303 GEN_VEXT_VX(vor_vx_b, 1) 1304 GEN_VEXT_VX(vor_vx_h, 2) 1305 GEN_VEXT_VX(vor_vx_w, 4) 1306 GEN_VEXT_VX(vor_vx_d, 8) 1307 GEN_VEXT_VX(vxor_vx_b, 1) 1308 GEN_VEXT_VX(vxor_vx_h, 2) 1309 GEN_VEXT_VX(vxor_vx_w, 4) 1310 GEN_VEXT_VX(vxor_vx_d, 8) 1311 1312 /* Vector Single-Width Bit Shift Instructions */ 1313 #define DO_SLL(N, M) (N << (M)) 1314 #define DO_SRL(N, M) (N >> (M)) 1315 1316 /* generate the helpers for shift instructions with two vector operators */ 1317 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1318 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1319 void *vs2, CPURISCVState *env, uint32_t desc) \ 1320 { \ 1321 uint32_t vm = vext_vm(desc); \ 1322 uint32_t vl = env->vl; \ 1323 uint32_t esz = sizeof(TS1); \ 1324 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1325 uint32_t vta = vext_vta(desc); \ 1326 uint32_t vma = vext_vma(desc); \ 1327 uint32_t i; \ 1328 \ 1329 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1330 \ 1331 for (i = env->vstart; i < vl; i++) { \ 1332 if (!vm && !vext_elem_mask(v0, i)) { \ 1333 /* set masked-off elements to 1s */ \ 1334 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1335 continue; \ 1336 } \ 1337 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1338 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1339 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1340 } \ 1341 env->vstart = 0; \ 1342 /* set tail elements to 1s */ \ 1343 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1344 } 1345 1346 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1347 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1348 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1349 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1350 1351 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1352 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1353 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1354 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1355 1356 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1357 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1358 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1359 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1360 1361 /* 1362 * generate the helpers for shift instructions with one vector and one scalar 1363 */ 1364 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1365 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1366 void *vs2, CPURISCVState *env, \ 1367 uint32_t desc) \ 1368 { \ 1369 uint32_t vm = vext_vm(desc); \ 1370 uint32_t vl = env->vl; \ 1371 uint32_t esz = sizeof(TD); \ 1372 uint32_t total_elems = \ 1373 vext_get_total_elems(env, desc, esz); \ 1374 uint32_t vta = vext_vta(desc); \ 1375 uint32_t vma = vext_vma(desc); \ 1376 uint32_t i; \ 1377 \ 1378 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1379 \ 1380 for (i = env->vstart; i < vl; i++) { \ 1381 if (!vm && !vext_elem_mask(v0, i)) { \ 1382 /* set masked-off elements to 1s */ \ 1383 vext_set_elems_1s(vd, vma, i * esz, \ 1384 (i + 1) * esz); \ 1385 continue; \ 1386 } \ 1387 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1388 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1389 } \ 1390 env->vstart = 0; \ 1391 /* set tail elements to 1s */ \ 1392 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1393 } 1394 1395 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1396 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1397 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1398 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1399 1400 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1401 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1402 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1403 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1404 1405 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1406 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1407 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1408 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1409 1410 /* Vector Narrowing Integer Right Shift Instructions */ 1411 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1412 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1413 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1414 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1415 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1416 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1417 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1418 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1419 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1420 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1421 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1422 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1423 1424 /* Vector Integer Comparison Instructions */ 1425 #define DO_MSEQ(N, M) (N == M) 1426 #define DO_MSNE(N, M) (N != M) 1427 #define DO_MSLT(N, M) (N < M) 1428 #define DO_MSLE(N, M) (N <= M) 1429 #define DO_MSGT(N, M) (N > M) 1430 1431 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1432 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1433 CPURISCVState *env, uint32_t desc) \ 1434 { \ 1435 uint32_t vm = vext_vm(desc); \ 1436 uint32_t vl = env->vl; \ 1437 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1438 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1439 uint32_t vma = vext_vma(desc); \ 1440 uint32_t i; \ 1441 \ 1442 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1443 \ 1444 for (i = env->vstart; i < vl; i++) { \ 1445 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1446 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1447 if (!vm && !vext_elem_mask(v0, i)) { \ 1448 /* set masked-off elements to 1s */ \ 1449 if (vma) { \ 1450 vext_set_elem_mask(vd, i, 1); \ 1451 } \ 1452 continue; \ 1453 } \ 1454 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1455 } \ 1456 env->vstart = 0; \ 1457 /* 1458 * mask destination register are always tail-agnostic 1459 * set tail elements to 1s 1460 */ \ 1461 if (vta_all_1s) { \ 1462 for (; i < total_elems; i++) { \ 1463 vext_set_elem_mask(vd, i, 1); \ 1464 } \ 1465 } \ 1466 } 1467 1468 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1469 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1470 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1471 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1472 1473 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1474 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1475 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1476 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1477 1478 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1479 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1480 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1481 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1482 1483 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1484 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1485 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1486 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1487 1488 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1489 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1490 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1491 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1492 1493 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1494 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1495 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1496 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1497 1498 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1499 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1500 CPURISCVState *env, uint32_t desc) \ 1501 { \ 1502 uint32_t vm = vext_vm(desc); \ 1503 uint32_t vl = env->vl; \ 1504 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1505 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1506 uint32_t vma = vext_vma(desc); \ 1507 uint32_t i; \ 1508 \ 1509 VSTART_CHECK_EARLY_EXIT(env, vl); \ 1510 \ 1511 for (i = env->vstart; i < vl; i++) { \ 1512 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1513 if (!vm && !vext_elem_mask(v0, i)) { \ 1514 /* set masked-off elements to 1s */ \ 1515 if (vma) { \ 1516 vext_set_elem_mask(vd, i, 1); \ 1517 } \ 1518 continue; \ 1519 } \ 1520 vext_set_elem_mask(vd, i, \ 1521 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1522 } \ 1523 env->vstart = 0; \ 1524 /* 1525 * mask destination register are always tail-agnostic 1526 * set tail elements to 1s 1527 */ \ 1528 if (vta_all_1s) { \ 1529 for (; i < total_elems; i++) { \ 1530 vext_set_elem_mask(vd, i, 1); \ 1531 } \ 1532 } \ 1533 } 1534 1535 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1536 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1537 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1538 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1539 1540 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1541 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1542 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1543 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1544 1545 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1546 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1547 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1548 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1549 1550 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1551 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1552 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1553 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1554 1555 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1556 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1557 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1558 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1559 1560 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1561 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1562 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1563 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1564 1565 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1566 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1567 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1568 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1569 1570 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1571 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1572 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1573 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1574 1575 /* Vector Integer Min/Max Instructions */ 1576 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1577 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1578 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1579 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1580 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1581 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1582 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1583 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1584 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1585 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1586 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1587 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1588 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1589 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1590 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1591 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1592 GEN_VEXT_VV(vminu_vv_b, 1) 1593 GEN_VEXT_VV(vminu_vv_h, 2) 1594 GEN_VEXT_VV(vminu_vv_w, 4) 1595 GEN_VEXT_VV(vminu_vv_d, 8) 1596 GEN_VEXT_VV(vmin_vv_b, 1) 1597 GEN_VEXT_VV(vmin_vv_h, 2) 1598 GEN_VEXT_VV(vmin_vv_w, 4) 1599 GEN_VEXT_VV(vmin_vv_d, 8) 1600 GEN_VEXT_VV(vmaxu_vv_b, 1) 1601 GEN_VEXT_VV(vmaxu_vv_h, 2) 1602 GEN_VEXT_VV(vmaxu_vv_w, 4) 1603 GEN_VEXT_VV(vmaxu_vv_d, 8) 1604 GEN_VEXT_VV(vmax_vv_b, 1) 1605 GEN_VEXT_VV(vmax_vv_h, 2) 1606 GEN_VEXT_VV(vmax_vv_w, 4) 1607 GEN_VEXT_VV(vmax_vv_d, 8) 1608 1609 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1610 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1611 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1612 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1613 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1614 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1615 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1616 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1617 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1618 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1619 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1620 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1621 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1622 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1623 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1624 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1625 GEN_VEXT_VX(vminu_vx_b, 1) 1626 GEN_VEXT_VX(vminu_vx_h, 2) 1627 GEN_VEXT_VX(vminu_vx_w, 4) 1628 GEN_VEXT_VX(vminu_vx_d, 8) 1629 GEN_VEXT_VX(vmin_vx_b, 1) 1630 GEN_VEXT_VX(vmin_vx_h, 2) 1631 GEN_VEXT_VX(vmin_vx_w, 4) 1632 GEN_VEXT_VX(vmin_vx_d, 8) 1633 GEN_VEXT_VX(vmaxu_vx_b, 1) 1634 GEN_VEXT_VX(vmaxu_vx_h, 2) 1635 GEN_VEXT_VX(vmaxu_vx_w, 4) 1636 GEN_VEXT_VX(vmaxu_vx_d, 8) 1637 GEN_VEXT_VX(vmax_vx_b, 1) 1638 GEN_VEXT_VX(vmax_vx_h, 2) 1639 GEN_VEXT_VX(vmax_vx_w, 4) 1640 GEN_VEXT_VX(vmax_vx_d, 8) 1641 1642 /* Vector Single-Width Integer Multiply Instructions */ 1643 #define DO_MUL(N, M) (N * M) 1644 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1645 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1646 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1647 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1648 GEN_VEXT_VV(vmul_vv_b, 1) 1649 GEN_VEXT_VV(vmul_vv_h, 2) 1650 GEN_VEXT_VV(vmul_vv_w, 4) 1651 GEN_VEXT_VV(vmul_vv_d, 8) 1652 1653 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1654 { 1655 return (int16_t)s2 * (int16_t)s1 >> 8; 1656 } 1657 1658 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1659 { 1660 return (int32_t)s2 * (int32_t)s1 >> 16; 1661 } 1662 1663 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1664 { 1665 return (int64_t)s2 * (int64_t)s1 >> 32; 1666 } 1667 1668 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1669 { 1670 uint64_t hi_64, lo_64; 1671 1672 muls64(&lo_64, &hi_64, s1, s2); 1673 return hi_64; 1674 } 1675 1676 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1677 { 1678 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1679 } 1680 1681 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1682 { 1683 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1684 } 1685 1686 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1687 { 1688 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1689 } 1690 1691 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1692 { 1693 uint64_t hi_64, lo_64; 1694 1695 mulu64(&lo_64, &hi_64, s2, s1); 1696 return hi_64; 1697 } 1698 1699 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1700 { 1701 return (int16_t)s2 * (uint16_t)s1 >> 8; 1702 } 1703 1704 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1705 { 1706 return (int32_t)s2 * (uint32_t)s1 >> 16; 1707 } 1708 1709 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1710 { 1711 return (int64_t)s2 * (uint64_t)s1 >> 32; 1712 } 1713 1714 /* 1715 * Let A = signed operand, 1716 * B = unsigned operand 1717 * P = mulu64(A, B), unsigned product 1718 * 1719 * LET X = 2 ** 64 - A, 2's complement of A 1720 * SP = signed product 1721 * THEN 1722 * IF A < 0 1723 * SP = -X * B 1724 * = -(2 ** 64 - A) * B 1725 * = A * B - 2 ** 64 * B 1726 * = P - 2 ** 64 * B 1727 * ELSE 1728 * SP = P 1729 * THEN 1730 * HI_P -= (A < 0 ? B : 0) 1731 */ 1732 1733 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1734 { 1735 uint64_t hi_64, lo_64; 1736 1737 mulu64(&lo_64, &hi_64, s2, s1); 1738 1739 hi_64 -= s2 < 0 ? s1 : 0; 1740 return hi_64; 1741 } 1742 1743 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1744 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1745 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1746 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1747 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1748 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1749 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1750 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1751 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1752 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1753 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1754 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1755 GEN_VEXT_VV(vmulh_vv_b, 1) 1756 GEN_VEXT_VV(vmulh_vv_h, 2) 1757 GEN_VEXT_VV(vmulh_vv_w, 4) 1758 GEN_VEXT_VV(vmulh_vv_d, 8) 1759 GEN_VEXT_VV(vmulhu_vv_b, 1) 1760 GEN_VEXT_VV(vmulhu_vv_h, 2) 1761 GEN_VEXT_VV(vmulhu_vv_w, 4) 1762 GEN_VEXT_VV(vmulhu_vv_d, 8) 1763 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1764 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1765 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1766 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1767 1768 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1769 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1770 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1771 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1772 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1773 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1774 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1775 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1776 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1777 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1778 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1779 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1780 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1781 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1782 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1783 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1784 GEN_VEXT_VX(vmul_vx_b, 1) 1785 GEN_VEXT_VX(vmul_vx_h, 2) 1786 GEN_VEXT_VX(vmul_vx_w, 4) 1787 GEN_VEXT_VX(vmul_vx_d, 8) 1788 GEN_VEXT_VX(vmulh_vx_b, 1) 1789 GEN_VEXT_VX(vmulh_vx_h, 2) 1790 GEN_VEXT_VX(vmulh_vx_w, 4) 1791 GEN_VEXT_VX(vmulh_vx_d, 8) 1792 GEN_VEXT_VX(vmulhu_vx_b, 1) 1793 GEN_VEXT_VX(vmulhu_vx_h, 2) 1794 GEN_VEXT_VX(vmulhu_vx_w, 4) 1795 GEN_VEXT_VX(vmulhu_vx_d, 8) 1796 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1797 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1798 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1799 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1800 1801 /* Vector Integer Divide Instructions */ 1802 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1803 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1804 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1805 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1806 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1807 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1808 1809 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1810 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1811 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1812 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1813 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1814 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1815 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1816 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1817 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1818 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1819 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1820 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1821 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1822 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1823 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1824 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1825 GEN_VEXT_VV(vdivu_vv_b, 1) 1826 GEN_VEXT_VV(vdivu_vv_h, 2) 1827 GEN_VEXT_VV(vdivu_vv_w, 4) 1828 GEN_VEXT_VV(vdivu_vv_d, 8) 1829 GEN_VEXT_VV(vdiv_vv_b, 1) 1830 GEN_VEXT_VV(vdiv_vv_h, 2) 1831 GEN_VEXT_VV(vdiv_vv_w, 4) 1832 GEN_VEXT_VV(vdiv_vv_d, 8) 1833 GEN_VEXT_VV(vremu_vv_b, 1) 1834 GEN_VEXT_VV(vremu_vv_h, 2) 1835 GEN_VEXT_VV(vremu_vv_w, 4) 1836 GEN_VEXT_VV(vremu_vv_d, 8) 1837 GEN_VEXT_VV(vrem_vv_b, 1) 1838 GEN_VEXT_VV(vrem_vv_h, 2) 1839 GEN_VEXT_VV(vrem_vv_w, 4) 1840 GEN_VEXT_VV(vrem_vv_d, 8) 1841 1842 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1843 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1844 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1845 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1846 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1847 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1848 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1849 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1850 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1851 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1852 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1853 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1854 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1855 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1856 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1857 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1858 GEN_VEXT_VX(vdivu_vx_b, 1) 1859 GEN_VEXT_VX(vdivu_vx_h, 2) 1860 GEN_VEXT_VX(vdivu_vx_w, 4) 1861 GEN_VEXT_VX(vdivu_vx_d, 8) 1862 GEN_VEXT_VX(vdiv_vx_b, 1) 1863 GEN_VEXT_VX(vdiv_vx_h, 2) 1864 GEN_VEXT_VX(vdiv_vx_w, 4) 1865 GEN_VEXT_VX(vdiv_vx_d, 8) 1866 GEN_VEXT_VX(vremu_vx_b, 1) 1867 GEN_VEXT_VX(vremu_vx_h, 2) 1868 GEN_VEXT_VX(vremu_vx_w, 4) 1869 GEN_VEXT_VX(vremu_vx_d, 8) 1870 GEN_VEXT_VX(vrem_vx_b, 1) 1871 GEN_VEXT_VX(vrem_vx_h, 2) 1872 GEN_VEXT_VX(vrem_vx_w, 4) 1873 GEN_VEXT_VX(vrem_vx_d, 8) 1874 1875 /* Vector Widening Integer Multiply Instructions */ 1876 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1877 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1878 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1879 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1880 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1881 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1882 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1883 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1884 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1885 GEN_VEXT_VV(vwmul_vv_b, 2) 1886 GEN_VEXT_VV(vwmul_vv_h, 4) 1887 GEN_VEXT_VV(vwmul_vv_w, 8) 1888 GEN_VEXT_VV(vwmulu_vv_b, 2) 1889 GEN_VEXT_VV(vwmulu_vv_h, 4) 1890 GEN_VEXT_VV(vwmulu_vv_w, 8) 1891 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1892 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1893 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1894 1895 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1896 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1897 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1898 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1899 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1900 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1901 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1902 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1903 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1904 GEN_VEXT_VX(vwmul_vx_b, 2) 1905 GEN_VEXT_VX(vwmul_vx_h, 4) 1906 GEN_VEXT_VX(vwmul_vx_w, 8) 1907 GEN_VEXT_VX(vwmulu_vx_b, 2) 1908 GEN_VEXT_VX(vwmulu_vx_h, 4) 1909 GEN_VEXT_VX(vwmulu_vx_w, 8) 1910 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1911 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1912 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1913 1914 /* Vector Single-Width Integer Multiply-Add Instructions */ 1915 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1916 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1917 { \ 1918 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1919 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1920 TD d = *((TD *)vd + HD(i)); \ 1921 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1922 } 1923 1924 #define DO_MACC(N, M, D) (M * N + D) 1925 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1926 #define DO_MADD(N, M, D) (M * D + N) 1927 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1928 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1929 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1930 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1931 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1932 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1933 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1934 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1935 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1936 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1937 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1938 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1939 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1940 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1941 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1942 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1943 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1944 GEN_VEXT_VV(vmacc_vv_b, 1) 1945 GEN_VEXT_VV(vmacc_vv_h, 2) 1946 GEN_VEXT_VV(vmacc_vv_w, 4) 1947 GEN_VEXT_VV(vmacc_vv_d, 8) 1948 GEN_VEXT_VV(vnmsac_vv_b, 1) 1949 GEN_VEXT_VV(vnmsac_vv_h, 2) 1950 GEN_VEXT_VV(vnmsac_vv_w, 4) 1951 GEN_VEXT_VV(vnmsac_vv_d, 8) 1952 GEN_VEXT_VV(vmadd_vv_b, 1) 1953 GEN_VEXT_VV(vmadd_vv_h, 2) 1954 GEN_VEXT_VV(vmadd_vv_w, 4) 1955 GEN_VEXT_VV(vmadd_vv_d, 8) 1956 GEN_VEXT_VV(vnmsub_vv_b, 1) 1957 GEN_VEXT_VV(vnmsub_vv_h, 2) 1958 GEN_VEXT_VV(vnmsub_vv_w, 4) 1959 GEN_VEXT_VV(vnmsub_vv_d, 8) 1960 1961 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1962 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1963 { \ 1964 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1965 TD d = *((TD *)vd + HD(i)); \ 1966 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1967 } 1968 1969 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1970 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1971 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1972 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1973 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1974 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1975 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1976 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1977 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1978 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1979 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1980 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1981 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1982 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1983 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1984 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1985 GEN_VEXT_VX(vmacc_vx_b, 1) 1986 GEN_VEXT_VX(vmacc_vx_h, 2) 1987 GEN_VEXT_VX(vmacc_vx_w, 4) 1988 GEN_VEXT_VX(vmacc_vx_d, 8) 1989 GEN_VEXT_VX(vnmsac_vx_b, 1) 1990 GEN_VEXT_VX(vnmsac_vx_h, 2) 1991 GEN_VEXT_VX(vnmsac_vx_w, 4) 1992 GEN_VEXT_VX(vnmsac_vx_d, 8) 1993 GEN_VEXT_VX(vmadd_vx_b, 1) 1994 GEN_VEXT_VX(vmadd_vx_h, 2) 1995 GEN_VEXT_VX(vmadd_vx_w, 4) 1996 GEN_VEXT_VX(vmadd_vx_d, 8) 1997 GEN_VEXT_VX(vnmsub_vx_b, 1) 1998 GEN_VEXT_VX(vnmsub_vx_h, 2) 1999 GEN_VEXT_VX(vnmsub_vx_w, 4) 2000 GEN_VEXT_VX(vnmsub_vx_d, 8) 2001 2002 /* Vector Widening Integer Multiply-Add Instructions */ 2003 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 2004 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 2005 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 2006 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 2007 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 2008 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 2009 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 2010 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 2011 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2012 GEN_VEXT_VV(vwmaccu_vv_b, 2) 2013 GEN_VEXT_VV(vwmaccu_vv_h, 4) 2014 GEN_VEXT_VV(vwmaccu_vv_w, 8) 2015 GEN_VEXT_VV(vwmacc_vv_b, 2) 2016 GEN_VEXT_VV(vwmacc_vv_h, 4) 2017 GEN_VEXT_VV(vwmacc_vv_w, 8) 2018 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 2019 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 2020 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 2021 2022 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2023 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2024 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2025 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2026 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2027 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2028 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2029 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2030 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2031 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2032 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2033 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2034 GEN_VEXT_VX(vwmaccu_vx_b, 2) 2035 GEN_VEXT_VX(vwmaccu_vx_h, 4) 2036 GEN_VEXT_VX(vwmaccu_vx_w, 8) 2037 GEN_VEXT_VX(vwmacc_vx_b, 2) 2038 GEN_VEXT_VX(vwmacc_vx_h, 4) 2039 GEN_VEXT_VX(vwmacc_vx_w, 8) 2040 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 2041 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 2042 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 2043 GEN_VEXT_VX(vwmaccus_vx_b, 2) 2044 GEN_VEXT_VX(vwmaccus_vx_h, 4) 2045 GEN_VEXT_VX(vwmaccus_vx_w, 8) 2046 2047 /* Vector Integer Merge and Move Instructions */ 2048 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 2049 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2050 uint32_t desc) \ 2051 { \ 2052 uint32_t vl = env->vl; \ 2053 uint32_t esz = sizeof(ETYPE); \ 2054 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2055 uint32_t vta = vext_vta(desc); \ 2056 uint32_t i; \ 2057 \ 2058 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2059 \ 2060 for (i = env->vstart; i < vl; i++) { \ 2061 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2062 *((ETYPE *)vd + H(i)) = s1; \ 2063 } \ 2064 env->vstart = 0; \ 2065 /* set tail elements to 1s */ \ 2066 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2067 } 2068 2069 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 2070 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 2071 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 2072 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 2073 2074 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 2075 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2076 uint32_t desc) \ 2077 { \ 2078 uint32_t vl = env->vl; \ 2079 uint32_t esz = sizeof(ETYPE); \ 2080 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2081 uint32_t vta = vext_vta(desc); \ 2082 uint32_t i; \ 2083 \ 2084 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2085 \ 2086 for (i = env->vstart; i < vl; i++) { \ 2087 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2088 } \ 2089 env->vstart = 0; \ 2090 /* set tail elements to 1s */ \ 2091 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2092 } 2093 2094 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 2095 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 2096 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 2097 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 2098 2099 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 2100 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2101 CPURISCVState *env, uint32_t desc) \ 2102 { \ 2103 uint32_t vl = env->vl; \ 2104 uint32_t esz = sizeof(ETYPE); \ 2105 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2106 uint32_t vta = vext_vta(desc); \ 2107 uint32_t i; \ 2108 \ 2109 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2110 \ 2111 for (i = env->vstart; i < vl; i++) { \ 2112 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 2113 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2114 } \ 2115 env->vstart = 0; \ 2116 /* set tail elements to 1s */ \ 2117 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2118 } 2119 2120 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 2121 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 2122 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 2123 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 2124 2125 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 2126 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2127 void *vs2, CPURISCVState *env, uint32_t desc) \ 2128 { \ 2129 uint32_t vl = env->vl; \ 2130 uint32_t esz = sizeof(ETYPE); \ 2131 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 2132 uint32_t vta = vext_vta(desc); \ 2133 uint32_t i; \ 2134 \ 2135 VSTART_CHECK_EARLY_EXIT(env, vl); \ 2136 \ 2137 for (i = env->vstart; i < vl; i++) { \ 2138 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2139 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2140 (ETYPE)(target_long)s1); \ 2141 *((ETYPE *)vd + H(i)) = d; \ 2142 } \ 2143 env->vstart = 0; \ 2144 /* set tail elements to 1s */ \ 2145 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 2146 } 2147 2148 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2149 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2150 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2151 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2152 2153 /* 2154 * Vector Fixed-Point Arithmetic Instructions 2155 */ 2156 2157 /* Vector Single-Width Saturating Add and Subtract */ 2158 2159 /* 2160 * As fixed point instructions probably have round mode and saturation, 2161 * define common macros for fixed point here. 2162 */ 2163 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2164 CPURISCVState *env, int vxrm); 2165 2166 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2167 static inline void \ 2168 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2169 CPURISCVState *env, int vxrm) \ 2170 { \ 2171 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2172 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2173 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2174 } 2175 2176 static inline void 2177 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2178 CPURISCVState *env, 2179 uint32_t vl, uint32_t vm, int vxrm, 2180 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 2181 { 2182 for (uint32_t i = env->vstart; i < vl; i++) { 2183 if (!vm && !vext_elem_mask(v0, i)) { 2184 /* set masked-off elements to 1s */ 2185 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2186 continue; 2187 } 2188 fn(vd, vs1, vs2, i, env, vxrm); 2189 } 2190 env->vstart = 0; 2191 } 2192 2193 static inline void 2194 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2195 CPURISCVState *env, 2196 uint32_t desc, 2197 opivv2_rm_fn *fn, uint32_t esz) 2198 { 2199 uint32_t vm = vext_vm(desc); 2200 uint32_t vl = env->vl; 2201 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2202 uint32_t vta = vext_vta(desc); 2203 uint32_t vma = vext_vma(desc); 2204 2205 VSTART_CHECK_EARLY_EXIT(env, vl); 2206 2207 switch (env->vxrm) { 2208 case 0: /* rnu */ 2209 vext_vv_rm_1(vd, v0, vs1, vs2, 2210 env, vl, vm, 0, fn, vma, esz); 2211 break; 2212 case 1: /* rne */ 2213 vext_vv_rm_1(vd, v0, vs1, vs2, 2214 env, vl, vm, 1, fn, vma, esz); 2215 break; 2216 case 2: /* rdn */ 2217 vext_vv_rm_1(vd, v0, vs1, vs2, 2218 env, vl, vm, 2, fn, vma, esz); 2219 break; 2220 default: /* rod */ 2221 vext_vv_rm_1(vd, v0, vs1, vs2, 2222 env, vl, vm, 3, fn, vma, esz); 2223 break; 2224 } 2225 /* set tail elements to 1s */ 2226 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2227 } 2228 2229 /* generate helpers for fixed point instructions with OPIVV format */ 2230 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2231 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2232 CPURISCVState *env, uint32_t desc) \ 2233 { \ 2234 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2235 do_##NAME, ESZ); \ 2236 } 2237 2238 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2239 uint8_t b) 2240 { 2241 uint8_t res = a + b; 2242 if (res < a) { 2243 res = UINT8_MAX; 2244 env->vxsat = 0x1; 2245 } 2246 return res; 2247 } 2248 2249 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2250 uint16_t b) 2251 { 2252 uint16_t res = a + b; 2253 if (res < a) { 2254 res = UINT16_MAX; 2255 env->vxsat = 0x1; 2256 } 2257 return res; 2258 } 2259 2260 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2261 uint32_t b) 2262 { 2263 uint32_t res = a + b; 2264 if (res < a) { 2265 res = UINT32_MAX; 2266 env->vxsat = 0x1; 2267 } 2268 return res; 2269 } 2270 2271 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2272 uint64_t b) 2273 { 2274 uint64_t res = a + b; 2275 if (res < a) { 2276 res = UINT64_MAX; 2277 env->vxsat = 0x1; 2278 } 2279 return res; 2280 } 2281 2282 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2283 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2284 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2285 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2286 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2287 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2288 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2289 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2290 2291 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2292 CPURISCVState *env, int vxrm); 2293 2294 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2295 static inline void \ 2296 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2297 CPURISCVState *env, int vxrm) \ 2298 { \ 2299 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2300 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2301 } 2302 2303 static inline void 2304 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2305 CPURISCVState *env, 2306 uint32_t vl, uint32_t vm, int vxrm, 2307 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2308 { 2309 for (uint32_t i = env->vstart; i < vl; i++) { 2310 if (!vm && !vext_elem_mask(v0, i)) { 2311 /* set masked-off elements to 1s */ 2312 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2313 continue; 2314 } 2315 fn(vd, s1, vs2, i, env, vxrm); 2316 } 2317 env->vstart = 0; 2318 } 2319 2320 static inline void 2321 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2322 CPURISCVState *env, 2323 uint32_t desc, 2324 opivx2_rm_fn *fn, uint32_t esz) 2325 { 2326 uint32_t vm = vext_vm(desc); 2327 uint32_t vl = env->vl; 2328 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2329 uint32_t vta = vext_vta(desc); 2330 uint32_t vma = vext_vma(desc); 2331 2332 VSTART_CHECK_EARLY_EXIT(env, vl); 2333 2334 switch (env->vxrm) { 2335 case 0: /* rnu */ 2336 vext_vx_rm_1(vd, v0, s1, vs2, 2337 env, vl, vm, 0, fn, vma, esz); 2338 break; 2339 case 1: /* rne */ 2340 vext_vx_rm_1(vd, v0, s1, vs2, 2341 env, vl, vm, 1, fn, vma, esz); 2342 break; 2343 case 2: /* rdn */ 2344 vext_vx_rm_1(vd, v0, s1, vs2, 2345 env, vl, vm, 2, fn, vma, esz); 2346 break; 2347 default: /* rod */ 2348 vext_vx_rm_1(vd, v0, s1, vs2, 2349 env, vl, vm, 3, fn, vma, esz); 2350 break; 2351 } 2352 /* set tail elements to 1s */ 2353 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2354 } 2355 2356 /* generate helpers for fixed point instructions with OPIVX format */ 2357 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2358 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2359 void *vs2, CPURISCVState *env, \ 2360 uint32_t desc) \ 2361 { \ 2362 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2363 do_##NAME, ESZ); \ 2364 } 2365 2366 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2367 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2368 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2369 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2370 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2371 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2372 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2373 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2374 2375 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2376 { 2377 int8_t res = a + b; 2378 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2379 res = a > 0 ? INT8_MAX : INT8_MIN; 2380 env->vxsat = 0x1; 2381 } 2382 return res; 2383 } 2384 2385 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2386 int16_t b) 2387 { 2388 int16_t res = a + b; 2389 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2390 res = a > 0 ? INT16_MAX : INT16_MIN; 2391 env->vxsat = 0x1; 2392 } 2393 return res; 2394 } 2395 2396 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2397 int32_t b) 2398 { 2399 int32_t res = a + b; 2400 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2401 res = a > 0 ? INT32_MAX : INT32_MIN; 2402 env->vxsat = 0x1; 2403 } 2404 return res; 2405 } 2406 2407 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2408 int64_t b) 2409 { 2410 int64_t res = a + b; 2411 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2412 res = a > 0 ? INT64_MAX : INT64_MIN; 2413 env->vxsat = 0x1; 2414 } 2415 return res; 2416 } 2417 2418 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2419 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2420 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2421 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2422 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2423 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2424 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2425 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2426 2427 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2428 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2429 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2430 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2431 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2432 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2433 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2434 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2435 2436 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2437 uint8_t b) 2438 { 2439 uint8_t res = a - b; 2440 if (res > a) { 2441 res = 0; 2442 env->vxsat = 0x1; 2443 } 2444 return res; 2445 } 2446 2447 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2448 uint16_t b) 2449 { 2450 uint16_t res = a - b; 2451 if (res > a) { 2452 res = 0; 2453 env->vxsat = 0x1; 2454 } 2455 return res; 2456 } 2457 2458 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2459 uint32_t b) 2460 { 2461 uint32_t res = a - b; 2462 if (res > a) { 2463 res = 0; 2464 env->vxsat = 0x1; 2465 } 2466 return res; 2467 } 2468 2469 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2470 uint64_t b) 2471 { 2472 uint64_t res = a - b; 2473 if (res > a) { 2474 res = 0; 2475 env->vxsat = 0x1; 2476 } 2477 return res; 2478 } 2479 2480 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2481 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2482 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2483 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2484 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2485 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2486 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2487 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2488 2489 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2490 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2491 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2492 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2493 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2494 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2495 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2496 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2497 2498 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2499 { 2500 int8_t res = a - b; 2501 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2502 res = a >= 0 ? INT8_MAX : INT8_MIN; 2503 env->vxsat = 0x1; 2504 } 2505 return res; 2506 } 2507 2508 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2509 int16_t b) 2510 { 2511 int16_t res = a - b; 2512 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2513 res = a >= 0 ? INT16_MAX : INT16_MIN; 2514 env->vxsat = 0x1; 2515 } 2516 return res; 2517 } 2518 2519 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2520 int32_t b) 2521 { 2522 int32_t res = a - b; 2523 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2524 res = a >= 0 ? INT32_MAX : INT32_MIN; 2525 env->vxsat = 0x1; 2526 } 2527 return res; 2528 } 2529 2530 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2531 int64_t b) 2532 { 2533 int64_t res = a - b; 2534 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2535 res = a >= 0 ? INT64_MAX : INT64_MIN; 2536 env->vxsat = 0x1; 2537 } 2538 return res; 2539 } 2540 2541 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2542 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2543 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2544 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2545 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2546 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2547 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2548 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2549 2550 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2551 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2552 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2553 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2554 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2555 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2556 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2557 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2558 2559 /* Vector Single-Width Averaging Add and Subtract */ 2560 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2561 { 2562 uint8_t d = extract64(v, shift, 1); 2563 uint8_t d1; 2564 uint64_t D1, D2; 2565 2566 if (shift == 0 || shift > 64) { 2567 return 0; 2568 } 2569 2570 d1 = extract64(v, shift - 1, 1); 2571 D1 = extract64(v, 0, shift); 2572 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2573 return d1; 2574 } else if (vxrm == 1) { /* round-to-nearest-even */ 2575 if (shift > 1) { 2576 D2 = extract64(v, 0, shift - 1); 2577 return d1 & ((D2 != 0) | d); 2578 } else { 2579 return d1 & d; 2580 } 2581 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2582 return !d & (D1 != 0); 2583 } 2584 return 0; /* round-down (truncate) */ 2585 } 2586 2587 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2588 int32_t b) 2589 { 2590 int64_t res = (int64_t)a + b; 2591 uint8_t round = get_round(vxrm, res, 1); 2592 2593 return (res >> 1) + round; 2594 } 2595 2596 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2597 int64_t b) 2598 { 2599 int64_t res = a + b; 2600 uint8_t round = get_round(vxrm, res, 1); 2601 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2602 2603 /* With signed overflow, bit 64 is inverse of bit 63. */ 2604 return ((res >> 1) ^ over) + round; 2605 } 2606 2607 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2608 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2609 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2610 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2611 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2612 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2613 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2614 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2615 2616 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2617 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2618 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2619 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2620 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2621 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2622 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2623 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2624 2625 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2626 uint32_t a, uint32_t b) 2627 { 2628 uint64_t res = (uint64_t)a + b; 2629 uint8_t round = get_round(vxrm, res, 1); 2630 2631 return (res >> 1) + round; 2632 } 2633 2634 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2635 uint64_t a, uint64_t b) 2636 { 2637 uint64_t res = a + b; 2638 uint8_t round = get_round(vxrm, res, 1); 2639 uint64_t over = (uint64_t)(res < a) << 63; 2640 2641 return ((res >> 1) | over) + round; 2642 } 2643 2644 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2645 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2646 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2647 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2648 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2649 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2650 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2651 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2652 2653 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2654 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2655 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2656 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2657 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2658 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2659 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2660 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2661 2662 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2663 int32_t b) 2664 { 2665 int64_t res = (int64_t)a - b; 2666 uint8_t round = get_round(vxrm, res, 1); 2667 2668 return (res >> 1) + round; 2669 } 2670 2671 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2672 int64_t b) 2673 { 2674 int64_t res = (int64_t)a - b; 2675 uint8_t round = get_round(vxrm, res, 1); 2676 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2677 2678 /* With signed overflow, bit 64 is inverse of bit 63. */ 2679 return ((res >> 1) ^ over) + round; 2680 } 2681 2682 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2683 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2684 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2685 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2686 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2687 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2688 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2689 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2690 2691 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2692 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2693 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2694 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2695 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2696 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2697 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2698 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2699 2700 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2701 uint32_t a, uint32_t b) 2702 { 2703 int64_t res = (int64_t)a - b; 2704 uint8_t round = get_round(vxrm, res, 1); 2705 2706 return (res >> 1) + round; 2707 } 2708 2709 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2710 uint64_t a, uint64_t b) 2711 { 2712 uint64_t res = (uint64_t)a - b; 2713 uint8_t round = get_round(vxrm, res, 1); 2714 uint64_t over = (uint64_t)(res > a) << 63; 2715 2716 return ((res >> 1) | over) + round; 2717 } 2718 2719 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2720 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2721 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2722 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2723 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2724 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2725 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2726 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2727 2728 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2729 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2730 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2731 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2732 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2733 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2734 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2735 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2736 2737 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2738 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2739 { 2740 uint8_t round; 2741 int16_t res; 2742 2743 res = (int16_t)a * (int16_t)b; 2744 round = get_round(vxrm, res, 7); 2745 res = (res >> 7) + round; 2746 2747 if (res > INT8_MAX) { 2748 env->vxsat = 0x1; 2749 return INT8_MAX; 2750 } else if (res < INT8_MIN) { 2751 env->vxsat = 0x1; 2752 return INT8_MIN; 2753 } else { 2754 return res; 2755 } 2756 } 2757 2758 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2759 { 2760 uint8_t round; 2761 int32_t res; 2762 2763 res = (int32_t)a * (int32_t)b; 2764 round = get_round(vxrm, res, 15); 2765 res = (res >> 15) + round; 2766 2767 if (res > INT16_MAX) { 2768 env->vxsat = 0x1; 2769 return INT16_MAX; 2770 } else if (res < INT16_MIN) { 2771 env->vxsat = 0x1; 2772 return INT16_MIN; 2773 } else { 2774 return res; 2775 } 2776 } 2777 2778 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2779 { 2780 uint8_t round; 2781 int64_t res; 2782 2783 res = (int64_t)a * (int64_t)b; 2784 round = get_round(vxrm, res, 31); 2785 res = (res >> 31) + round; 2786 2787 if (res > INT32_MAX) { 2788 env->vxsat = 0x1; 2789 return INT32_MAX; 2790 } else if (res < INT32_MIN) { 2791 env->vxsat = 0x1; 2792 return INT32_MIN; 2793 } else { 2794 return res; 2795 } 2796 } 2797 2798 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2799 { 2800 uint8_t round; 2801 uint64_t hi_64, lo_64; 2802 int64_t res; 2803 2804 if (a == INT64_MIN && b == INT64_MIN) { 2805 env->vxsat = 1; 2806 return INT64_MAX; 2807 } 2808 2809 muls64(&lo_64, &hi_64, a, b); 2810 round = get_round(vxrm, lo_64, 63); 2811 /* 2812 * Cannot overflow, as there are always 2813 * 2 sign bits after multiply. 2814 */ 2815 res = (hi_64 << 1) | (lo_64 >> 63); 2816 if (round) { 2817 if (res == INT64_MAX) { 2818 env->vxsat = 1; 2819 } else { 2820 res += 1; 2821 } 2822 } 2823 return res; 2824 } 2825 2826 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2827 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2828 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2829 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2830 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2831 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2832 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2833 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2834 2835 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2836 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2837 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2838 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2839 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2840 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2841 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2842 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2843 2844 /* Vector Single-Width Scaling Shift Instructions */ 2845 static inline uint8_t 2846 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2847 { 2848 uint8_t round, shift = b & 0x7; 2849 uint8_t res; 2850 2851 round = get_round(vxrm, a, shift); 2852 res = (a >> shift) + round; 2853 return res; 2854 } 2855 static inline uint16_t 2856 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2857 { 2858 uint8_t round, shift = b & 0xf; 2859 2860 round = get_round(vxrm, a, shift); 2861 return (a >> shift) + round; 2862 } 2863 static inline uint32_t 2864 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2865 { 2866 uint8_t round, shift = b & 0x1f; 2867 2868 round = get_round(vxrm, a, shift); 2869 return (a >> shift) + round; 2870 } 2871 static inline uint64_t 2872 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2873 { 2874 uint8_t round, shift = b & 0x3f; 2875 2876 round = get_round(vxrm, a, shift); 2877 return (a >> shift) + round; 2878 } 2879 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2880 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2881 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2882 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2883 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2884 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2885 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2886 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2887 2888 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2889 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2890 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2891 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2892 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2893 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2894 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2895 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2896 2897 static inline int8_t 2898 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2899 { 2900 uint8_t round, shift = b & 0x7; 2901 2902 round = get_round(vxrm, a, shift); 2903 return (a >> shift) + round; 2904 } 2905 static inline int16_t 2906 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2907 { 2908 uint8_t round, shift = b & 0xf; 2909 2910 round = get_round(vxrm, a, shift); 2911 return (a >> shift) + round; 2912 } 2913 static inline int32_t 2914 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2915 { 2916 uint8_t round, shift = b & 0x1f; 2917 2918 round = get_round(vxrm, a, shift); 2919 return (a >> shift) + round; 2920 } 2921 static inline int64_t 2922 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2923 { 2924 uint8_t round, shift = b & 0x3f; 2925 2926 round = get_round(vxrm, a, shift); 2927 return (a >> shift) + round; 2928 } 2929 2930 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2931 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2932 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2933 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2934 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2935 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2936 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2937 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2938 2939 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2940 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2941 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2942 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2943 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2944 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2945 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2946 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2947 2948 /* Vector Narrowing Fixed-Point Clip Instructions */ 2949 static inline int8_t 2950 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2951 { 2952 uint8_t round, shift = b & 0xf; 2953 int16_t res; 2954 2955 round = get_round(vxrm, a, shift); 2956 res = (a >> shift) + round; 2957 if (res > INT8_MAX) { 2958 env->vxsat = 0x1; 2959 return INT8_MAX; 2960 } else if (res < INT8_MIN) { 2961 env->vxsat = 0x1; 2962 return INT8_MIN; 2963 } else { 2964 return res; 2965 } 2966 } 2967 2968 static inline int16_t 2969 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2970 { 2971 uint8_t round, shift = b & 0x1f; 2972 int32_t res; 2973 2974 round = get_round(vxrm, a, shift); 2975 res = (a >> shift) + round; 2976 if (res > INT16_MAX) { 2977 env->vxsat = 0x1; 2978 return INT16_MAX; 2979 } else if (res < INT16_MIN) { 2980 env->vxsat = 0x1; 2981 return INT16_MIN; 2982 } else { 2983 return res; 2984 } 2985 } 2986 2987 static inline int32_t 2988 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2989 { 2990 uint8_t round, shift = b & 0x3f; 2991 int64_t res; 2992 2993 round = get_round(vxrm, a, shift); 2994 res = (a >> shift) + round; 2995 if (res > INT32_MAX) { 2996 env->vxsat = 0x1; 2997 return INT32_MAX; 2998 } else if (res < INT32_MIN) { 2999 env->vxsat = 0x1; 3000 return INT32_MIN; 3001 } else { 3002 return res; 3003 } 3004 } 3005 3006 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3007 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3008 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3009 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 3010 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 3011 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 3012 3013 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 3014 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 3015 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 3016 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 3017 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 3018 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 3019 3020 static inline uint8_t 3021 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3022 { 3023 uint8_t round, shift = b & 0xf; 3024 uint16_t res; 3025 3026 round = get_round(vxrm, a, shift); 3027 res = (a >> shift) + round; 3028 if (res > UINT8_MAX) { 3029 env->vxsat = 0x1; 3030 return UINT8_MAX; 3031 } else { 3032 return res; 3033 } 3034 } 3035 3036 static inline uint16_t 3037 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3038 { 3039 uint8_t round, shift = b & 0x1f; 3040 uint32_t res; 3041 3042 round = get_round(vxrm, a, shift); 3043 res = (a >> shift) + round; 3044 if (res > UINT16_MAX) { 3045 env->vxsat = 0x1; 3046 return UINT16_MAX; 3047 } else { 3048 return res; 3049 } 3050 } 3051 3052 static inline uint32_t 3053 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3054 { 3055 uint8_t round, shift = b & 0x3f; 3056 uint64_t res; 3057 3058 round = get_round(vxrm, a, shift); 3059 res = (a >> shift) + round; 3060 if (res > UINT32_MAX) { 3061 env->vxsat = 0x1; 3062 return UINT32_MAX; 3063 } else { 3064 return res; 3065 } 3066 } 3067 3068 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3069 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3070 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3071 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 3072 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 3073 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 3074 3075 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 3076 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 3077 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 3078 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 3079 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 3080 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 3081 3082 /* 3083 * Vector Float Point Arithmetic Instructions 3084 */ 3085 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3086 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3087 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3088 CPURISCVState *env) \ 3089 { \ 3090 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3091 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3092 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3093 } 3094 3095 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 3096 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3097 void *vs2, CPURISCVState *env, \ 3098 uint32_t desc) \ 3099 { \ 3100 uint32_t vm = vext_vm(desc); \ 3101 uint32_t vl = env->vl; \ 3102 uint32_t total_elems = \ 3103 vext_get_total_elems(env, desc, ESZ); \ 3104 uint32_t vta = vext_vta(desc); \ 3105 uint32_t vma = vext_vma(desc); \ 3106 uint32_t i; \ 3107 \ 3108 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3109 \ 3110 for (i = env->vstart; i < vl; i++) { \ 3111 if (!vm && !vext_elem_mask(v0, i)) { \ 3112 /* set masked-off elements to 1s */ \ 3113 vext_set_elems_1s(vd, vma, i * ESZ, \ 3114 (i + 1) * ESZ); \ 3115 continue; \ 3116 } \ 3117 do_##NAME(vd, vs1, vs2, i, env); \ 3118 } \ 3119 env->vstart = 0; \ 3120 /* set tail elements to 1s */ \ 3121 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3122 total_elems * ESZ); \ 3123 } 3124 3125 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3126 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3127 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3128 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 3129 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 3130 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 3131 3132 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3133 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3134 CPURISCVState *env) \ 3135 { \ 3136 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3137 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3138 } 3139 3140 #define GEN_VEXT_VF(NAME, ESZ) \ 3141 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3142 void *vs2, CPURISCVState *env, \ 3143 uint32_t desc) \ 3144 { \ 3145 uint32_t vm = vext_vm(desc); \ 3146 uint32_t vl = env->vl; \ 3147 uint32_t total_elems = \ 3148 vext_get_total_elems(env, desc, ESZ); \ 3149 uint32_t vta = vext_vta(desc); \ 3150 uint32_t vma = vext_vma(desc); \ 3151 uint32_t i; \ 3152 \ 3153 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3154 \ 3155 for (i = env->vstart; i < vl; i++) { \ 3156 if (!vm && !vext_elem_mask(v0, i)) { \ 3157 /* set masked-off elements to 1s */ \ 3158 vext_set_elems_1s(vd, vma, i * ESZ, \ 3159 (i + 1) * ESZ); \ 3160 continue; \ 3161 } \ 3162 do_##NAME(vd, s1, vs2, i, env); \ 3163 } \ 3164 env->vstart = 0; \ 3165 /* set tail elements to 1s */ \ 3166 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3167 total_elems * ESZ); \ 3168 } 3169 3170 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3171 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3172 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3173 GEN_VEXT_VF(vfadd_vf_h, 2) 3174 GEN_VEXT_VF(vfadd_vf_w, 4) 3175 GEN_VEXT_VF(vfadd_vf_d, 8) 3176 3177 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3178 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3179 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3180 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 3181 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 3182 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 3183 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3184 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3185 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3186 GEN_VEXT_VF(vfsub_vf_h, 2) 3187 GEN_VEXT_VF(vfsub_vf_w, 4) 3188 GEN_VEXT_VF(vfsub_vf_d, 8) 3189 3190 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3191 { 3192 return float16_sub(b, a, s); 3193 } 3194 3195 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3196 { 3197 return float32_sub(b, a, s); 3198 } 3199 3200 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3201 { 3202 return float64_sub(b, a, s); 3203 } 3204 3205 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3206 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3207 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3208 GEN_VEXT_VF(vfrsub_vf_h, 2) 3209 GEN_VEXT_VF(vfrsub_vf_w, 4) 3210 GEN_VEXT_VF(vfrsub_vf_d, 8) 3211 3212 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3213 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3214 { 3215 return float32_add(float16_to_float32(a, true, s), 3216 float16_to_float32(b, true, s), s); 3217 } 3218 3219 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3220 { 3221 return float64_add(float32_to_float64(a, s), 3222 float32_to_float64(b, s), s); 3223 3224 } 3225 3226 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3227 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3228 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3229 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3230 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3231 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3232 GEN_VEXT_VF(vfwadd_vf_h, 4) 3233 GEN_VEXT_VF(vfwadd_vf_w, 8) 3234 3235 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3236 { 3237 return float32_sub(float16_to_float32(a, true, s), 3238 float16_to_float32(b, true, s), s); 3239 } 3240 3241 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3242 { 3243 return float64_sub(float32_to_float64(a, s), 3244 float32_to_float64(b, s), s); 3245 3246 } 3247 3248 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3249 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3250 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3251 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3252 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3253 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3254 GEN_VEXT_VF(vfwsub_vf_h, 4) 3255 GEN_VEXT_VF(vfwsub_vf_w, 8) 3256 3257 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3258 { 3259 return float32_add(a, float16_to_float32(b, true, s), s); 3260 } 3261 3262 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3263 { 3264 return float64_add(a, float32_to_float64(b, s), s); 3265 } 3266 3267 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3268 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3269 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3270 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3271 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3272 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3273 GEN_VEXT_VF(vfwadd_wf_h, 4) 3274 GEN_VEXT_VF(vfwadd_wf_w, 8) 3275 3276 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3277 { 3278 return float32_sub(a, float16_to_float32(b, true, s), s); 3279 } 3280 3281 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3282 { 3283 return float64_sub(a, float32_to_float64(b, s), s); 3284 } 3285 3286 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3287 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3288 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3289 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3290 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3291 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3292 GEN_VEXT_VF(vfwsub_wf_h, 4) 3293 GEN_VEXT_VF(vfwsub_wf_w, 8) 3294 3295 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3296 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3297 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3298 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3299 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3300 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3301 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3302 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3303 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3304 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3305 GEN_VEXT_VF(vfmul_vf_h, 2) 3306 GEN_VEXT_VF(vfmul_vf_w, 4) 3307 GEN_VEXT_VF(vfmul_vf_d, 8) 3308 3309 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3310 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3311 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3312 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3313 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3314 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3315 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3316 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3317 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3318 GEN_VEXT_VF(vfdiv_vf_h, 2) 3319 GEN_VEXT_VF(vfdiv_vf_w, 4) 3320 GEN_VEXT_VF(vfdiv_vf_d, 8) 3321 3322 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3323 { 3324 return float16_div(b, a, s); 3325 } 3326 3327 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3328 { 3329 return float32_div(b, a, s); 3330 } 3331 3332 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3333 { 3334 return float64_div(b, a, s); 3335 } 3336 3337 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3338 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3339 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3340 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3341 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3342 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3343 3344 /* Vector Widening Floating-Point Multiply */ 3345 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3346 { 3347 return float32_mul(float16_to_float32(a, true, s), 3348 float16_to_float32(b, true, s), s); 3349 } 3350 3351 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3352 { 3353 return float64_mul(float32_to_float64(a, s), 3354 float32_to_float64(b, s), s); 3355 3356 } 3357 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3358 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3359 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3360 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3361 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3362 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3363 GEN_VEXT_VF(vfwmul_vf_h, 4) 3364 GEN_VEXT_VF(vfwmul_vf_w, 8) 3365 3366 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3367 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3368 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3369 CPURISCVState *env) \ 3370 { \ 3371 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3372 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3373 TD d = *((TD *)vd + HD(i)); \ 3374 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3375 } 3376 3377 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3378 { 3379 return float16_muladd(a, b, d, 0, s); 3380 } 3381 3382 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3383 { 3384 return float32_muladd(a, b, d, 0, s); 3385 } 3386 3387 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3388 { 3389 return float64_muladd(a, b, d, 0, s); 3390 } 3391 3392 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3393 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3394 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3395 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3396 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3397 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3398 3399 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3400 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3401 CPURISCVState *env) \ 3402 { \ 3403 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3404 TD d = *((TD *)vd + HD(i)); \ 3405 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3406 } 3407 3408 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3409 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3410 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3411 GEN_VEXT_VF(vfmacc_vf_h, 2) 3412 GEN_VEXT_VF(vfmacc_vf_w, 4) 3413 GEN_VEXT_VF(vfmacc_vf_d, 8) 3414 3415 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3416 { 3417 return float16_muladd(a, b, d, float_muladd_negate_c | 3418 float_muladd_negate_product, s); 3419 } 3420 3421 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3422 { 3423 return float32_muladd(a, b, d, float_muladd_negate_c | 3424 float_muladd_negate_product, s); 3425 } 3426 3427 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3428 { 3429 return float64_muladd(a, b, d, float_muladd_negate_c | 3430 float_muladd_negate_product, s); 3431 } 3432 3433 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3434 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3435 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3436 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3437 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3438 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3439 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3440 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3441 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3442 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3443 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3444 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3445 3446 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3447 { 3448 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3449 } 3450 3451 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3452 { 3453 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3454 } 3455 3456 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3457 { 3458 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3459 } 3460 3461 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3462 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3463 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3464 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3465 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3466 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3467 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3468 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3469 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3470 GEN_VEXT_VF(vfmsac_vf_h, 2) 3471 GEN_VEXT_VF(vfmsac_vf_w, 4) 3472 GEN_VEXT_VF(vfmsac_vf_d, 8) 3473 3474 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3475 { 3476 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3477 } 3478 3479 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3480 { 3481 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3482 } 3483 3484 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3485 { 3486 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3487 } 3488 3489 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3490 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3491 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3492 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3493 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3494 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3495 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3496 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3497 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3498 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3499 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3500 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3501 3502 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3503 { 3504 return float16_muladd(d, b, a, 0, s); 3505 } 3506 3507 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3508 { 3509 return float32_muladd(d, b, a, 0, s); 3510 } 3511 3512 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3513 { 3514 return float64_muladd(d, b, a, 0, s); 3515 } 3516 3517 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3518 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3519 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3520 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3521 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3522 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3523 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3524 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3525 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3526 GEN_VEXT_VF(vfmadd_vf_h, 2) 3527 GEN_VEXT_VF(vfmadd_vf_w, 4) 3528 GEN_VEXT_VF(vfmadd_vf_d, 8) 3529 3530 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3531 { 3532 return float16_muladd(d, b, a, float_muladd_negate_c | 3533 float_muladd_negate_product, s); 3534 } 3535 3536 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3537 { 3538 return float32_muladd(d, b, a, float_muladd_negate_c | 3539 float_muladd_negate_product, s); 3540 } 3541 3542 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3543 { 3544 return float64_muladd(d, b, a, float_muladd_negate_c | 3545 float_muladd_negate_product, s); 3546 } 3547 3548 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3549 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3550 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3551 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3552 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3553 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3554 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3555 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3556 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3557 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3558 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3559 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3560 3561 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3562 { 3563 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3564 } 3565 3566 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3567 { 3568 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3569 } 3570 3571 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3572 { 3573 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3574 } 3575 3576 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3577 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3578 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3579 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3580 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3581 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3582 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3583 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3584 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3585 GEN_VEXT_VF(vfmsub_vf_h, 2) 3586 GEN_VEXT_VF(vfmsub_vf_w, 4) 3587 GEN_VEXT_VF(vfmsub_vf_d, 8) 3588 3589 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3590 { 3591 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3592 } 3593 3594 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3595 { 3596 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3597 } 3598 3599 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3600 { 3601 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3602 } 3603 3604 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3605 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3606 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3607 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3608 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3609 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3610 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3611 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3612 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3613 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3614 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3615 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3616 3617 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3618 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3619 { 3620 return float32_muladd(float16_to_float32(a, true, s), 3621 float16_to_float32(b, true, s), d, 0, s); 3622 } 3623 3624 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3625 { 3626 return float64_muladd(float32_to_float64(a, s), 3627 float32_to_float64(b, s), d, 0, s); 3628 } 3629 3630 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3631 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3632 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3633 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3634 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3635 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3636 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3637 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3638 3639 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3640 { 3641 return float32_muladd(bfloat16_to_float32(a, s), 3642 bfloat16_to_float32(b, s), d, 0, s); 3643 } 3644 3645 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3646 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3647 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3648 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3649 3650 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3651 { 3652 return float32_muladd(float16_to_float32(a, true, s), 3653 float16_to_float32(b, true, s), d, 3654 float_muladd_negate_c | float_muladd_negate_product, 3655 s); 3656 } 3657 3658 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3659 { 3660 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3661 d, float_muladd_negate_c | 3662 float_muladd_negate_product, s); 3663 } 3664 3665 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3666 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3667 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3668 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3669 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3670 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3671 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3672 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3673 3674 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3675 { 3676 return float32_muladd(float16_to_float32(a, true, s), 3677 float16_to_float32(b, true, s), d, 3678 float_muladd_negate_c, s); 3679 } 3680 3681 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3682 { 3683 return float64_muladd(float32_to_float64(a, s), 3684 float32_to_float64(b, s), d, 3685 float_muladd_negate_c, s); 3686 } 3687 3688 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3689 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3690 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3691 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3692 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3693 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3694 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3695 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3696 3697 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3698 { 3699 return float32_muladd(float16_to_float32(a, true, s), 3700 float16_to_float32(b, true, s), d, 3701 float_muladd_negate_product, s); 3702 } 3703 3704 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3705 { 3706 return float64_muladd(float32_to_float64(a, s), 3707 float32_to_float64(b, s), d, 3708 float_muladd_negate_product, s); 3709 } 3710 3711 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3712 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3713 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3714 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3715 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3716 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3717 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3718 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3719 3720 /* Vector Floating-Point Square-Root Instruction */ 3721 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3722 static void do_##NAME(void *vd, void *vs2, int i, \ 3723 CPURISCVState *env) \ 3724 { \ 3725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3726 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3727 } 3728 3729 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3730 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3731 CPURISCVState *env, uint32_t desc) \ 3732 { \ 3733 uint32_t vm = vext_vm(desc); \ 3734 uint32_t vl = env->vl; \ 3735 uint32_t total_elems = \ 3736 vext_get_total_elems(env, desc, ESZ); \ 3737 uint32_t vta = vext_vta(desc); \ 3738 uint32_t vma = vext_vma(desc); \ 3739 uint32_t i; \ 3740 \ 3741 VSTART_CHECK_EARLY_EXIT(env, vl); \ 3742 \ 3743 if (vl == 0) { \ 3744 return; \ 3745 } \ 3746 for (i = env->vstart; i < vl; i++) { \ 3747 if (!vm && !vext_elem_mask(v0, i)) { \ 3748 /* set masked-off elements to 1s */ \ 3749 vext_set_elems_1s(vd, vma, i * ESZ, \ 3750 (i + 1) * ESZ); \ 3751 continue; \ 3752 } \ 3753 do_##NAME(vd, vs2, i, env); \ 3754 } \ 3755 env->vstart = 0; \ 3756 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3757 total_elems * ESZ); \ 3758 } 3759 3760 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3761 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3762 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3763 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3764 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3765 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3766 3767 /* 3768 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3769 * 3770 * Adapted from riscv-v-spec recip.c: 3771 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3772 */ 3773 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3774 { 3775 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3776 uint64_t exp = extract64(f, frac_size, exp_size); 3777 uint64_t frac = extract64(f, 0, frac_size); 3778 3779 const uint8_t lookup_table[] = { 3780 52, 51, 50, 48, 47, 46, 44, 43, 3781 42, 41, 40, 39, 38, 36, 35, 34, 3782 33, 32, 31, 30, 30, 29, 28, 27, 3783 26, 25, 24, 23, 23, 22, 21, 20, 3784 19, 19, 18, 17, 16, 16, 15, 14, 3785 14, 13, 12, 12, 11, 10, 10, 9, 3786 9, 8, 7, 7, 6, 6, 5, 4, 3787 4, 3, 3, 2, 2, 1, 1, 0, 3788 127, 125, 123, 121, 119, 118, 116, 114, 3789 113, 111, 109, 108, 106, 105, 103, 102, 3790 100, 99, 97, 96, 95, 93, 92, 91, 3791 90, 88, 87, 86, 85, 84, 83, 82, 3792 80, 79, 78, 77, 76, 75, 74, 73, 3793 72, 71, 70, 70, 69, 68, 67, 66, 3794 65, 64, 63, 63, 62, 61, 60, 59, 3795 59, 58, 57, 56, 56, 55, 54, 53 3796 }; 3797 const int precision = 7; 3798 3799 if (exp == 0 && frac != 0) { /* subnormal */ 3800 /* Normalize the subnormal. */ 3801 while (extract64(frac, frac_size - 1, 1) == 0) { 3802 exp--; 3803 frac <<= 1; 3804 } 3805 3806 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3807 } 3808 3809 int idx = ((exp & 1) << (precision - 1)) | 3810 (frac >> (frac_size - precision + 1)); 3811 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3812 (frac_size - precision); 3813 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3814 3815 uint64_t val = 0; 3816 val = deposit64(val, 0, frac_size, out_frac); 3817 val = deposit64(val, frac_size, exp_size, out_exp); 3818 val = deposit64(val, frac_size + exp_size, 1, sign); 3819 return val; 3820 } 3821 3822 static float16 frsqrt7_h(float16 f, float_status *s) 3823 { 3824 int exp_size = 5, frac_size = 10; 3825 bool sign = float16_is_neg(f); 3826 3827 /* 3828 * frsqrt7(sNaN) = canonical NaN 3829 * frsqrt7(-inf) = canonical NaN 3830 * frsqrt7(-normal) = canonical NaN 3831 * frsqrt7(-subnormal) = canonical NaN 3832 */ 3833 if (float16_is_signaling_nan(f, s) || 3834 (float16_is_infinity(f) && sign) || 3835 (float16_is_normal(f) && sign) || 3836 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3837 s->float_exception_flags |= float_flag_invalid; 3838 return float16_default_nan(s); 3839 } 3840 3841 /* frsqrt7(qNaN) = canonical NaN */ 3842 if (float16_is_quiet_nan(f, s)) { 3843 return float16_default_nan(s); 3844 } 3845 3846 /* frsqrt7(+-0) = +-inf */ 3847 if (float16_is_zero(f)) { 3848 s->float_exception_flags |= float_flag_divbyzero; 3849 return float16_set_sign(float16_infinity, sign); 3850 } 3851 3852 /* frsqrt7(+inf) = +0 */ 3853 if (float16_is_infinity(f) && !sign) { 3854 return float16_set_sign(float16_zero, sign); 3855 } 3856 3857 /* +normal, +subnormal */ 3858 uint64_t val = frsqrt7(f, exp_size, frac_size); 3859 return make_float16(val); 3860 } 3861 3862 static float32 frsqrt7_s(float32 f, float_status *s) 3863 { 3864 int exp_size = 8, frac_size = 23; 3865 bool sign = float32_is_neg(f); 3866 3867 /* 3868 * frsqrt7(sNaN) = canonical NaN 3869 * frsqrt7(-inf) = canonical NaN 3870 * frsqrt7(-normal) = canonical NaN 3871 * frsqrt7(-subnormal) = canonical NaN 3872 */ 3873 if (float32_is_signaling_nan(f, s) || 3874 (float32_is_infinity(f) && sign) || 3875 (float32_is_normal(f) && sign) || 3876 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3877 s->float_exception_flags |= float_flag_invalid; 3878 return float32_default_nan(s); 3879 } 3880 3881 /* frsqrt7(qNaN) = canonical NaN */ 3882 if (float32_is_quiet_nan(f, s)) { 3883 return float32_default_nan(s); 3884 } 3885 3886 /* frsqrt7(+-0) = +-inf */ 3887 if (float32_is_zero(f)) { 3888 s->float_exception_flags |= float_flag_divbyzero; 3889 return float32_set_sign(float32_infinity, sign); 3890 } 3891 3892 /* frsqrt7(+inf) = +0 */ 3893 if (float32_is_infinity(f) && !sign) { 3894 return float32_set_sign(float32_zero, sign); 3895 } 3896 3897 /* +normal, +subnormal */ 3898 uint64_t val = frsqrt7(f, exp_size, frac_size); 3899 return make_float32(val); 3900 } 3901 3902 static float64 frsqrt7_d(float64 f, float_status *s) 3903 { 3904 int exp_size = 11, frac_size = 52; 3905 bool sign = float64_is_neg(f); 3906 3907 /* 3908 * frsqrt7(sNaN) = canonical NaN 3909 * frsqrt7(-inf) = canonical NaN 3910 * frsqrt7(-normal) = canonical NaN 3911 * frsqrt7(-subnormal) = canonical NaN 3912 */ 3913 if (float64_is_signaling_nan(f, s) || 3914 (float64_is_infinity(f) && sign) || 3915 (float64_is_normal(f) && sign) || 3916 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3917 s->float_exception_flags |= float_flag_invalid; 3918 return float64_default_nan(s); 3919 } 3920 3921 /* frsqrt7(qNaN) = canonical NaN */ 3922 if (float64_is_quiet_nan(f, s)) { 3923 return float64_default_nan(s); 3924 } 3925 3926 /* frsqrt7(+-0) = +-inf */ 3927 if (float64_is_zero(f)) { 3928 s->float_exception_flags |= float_flag_divbyzero; 3929 return float64_set_sign(float64_infinity, sign); 3930 } 3931 3932 /* frsqrt7(+inf) = +0 */ 3933 if (float64_is_infinity(f) && !sign) { 3934 return float64_set_sign(float64_zero, sign); 3935 } 3936 3937 /* +normal, +subnormal */ 3938 uint64_t val = frsqrt7(f, exp_size, frac_size); 3939 return make_float64(val); 3940 } 3941 3942 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3943 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3944 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3945 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3946 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3947 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3948 3949 /* 3950 * Vector Floating-Point Reciprocal Estimate Instruction 3951 * 3952 * Adapted from riscv-v-spec recip.c: 3953 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3954 */ 3955 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3956 float_status *s) 3957 { 3958 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3959 uint64_t exp = extract64(f, frac_size, exp_size); 3960 uint64_t frac = extract64(f, 0, frac_size); 3961 3962 const uint8_t lookup_table[] = { 3963 127, 125, 123, 121, 119, 117, 116, 114, 3964 112, 110, 109, 107, 105, 104, 102, 100, 3965 99, 97, 96, 94, 93, 91, 90, 88, 3966 87, 85, 84, 83, 81, 80, 79, 77, 3967 76, 75, 74, 72, 71, 70, 69, 68, 3968 66, 65, 64, 63, 62, 61, 60, 59, 3969 58, 57, 56, 55, 54, 53, 52, 51, 3970 50, 49, 48, 47, 46, 45, 44, 43, 3971 42, 41, 40, 40, 39, 38, 37, 36, 3972 35, 35, 34, 33, 32, 31, 31, 30, 3973 29, 28, 28, 27, 26, 25, 25, 24, 3974 23, 23, 22, 21, 21, 20, 19, 19, 3975 18, 17, 17, 16, 15, 15, 14, 14, 3976 13, 12, 12, 11, 11, 10, 9, 9, 3977 8, 8, 7, 7, 6, 5, 5, 4, 3978 4, 3, 3, 2, 2, 1, 1, 0 3979 }; 3980 const int precision = 7; 3981 3982 if (exp == 0 && frac != 0) { /* subnormal */ 3983 /* Normalize the subnormal. */ 3984 while (extract64(frac, frac_size - 1, 1) == 0) { 3985 exp--; 3986 frac <<= 1; 3987 } 3988 3989 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3990 3991 if (exp != 0 && exp != UINT64_MAX) { 3992 /* 3993 * Overflow to inf or max value of same sign, 3994 * depending on sign and rounding mode. 3995 */ 3996 s->float_exception_flags |= (float_flag_inexact | 3997 float_flag_overflow); 3998 3999 if ((s->float_rounding_mode == float_round_to_zero) || 4000 ((s->float_rounding_mode == float_round_down) && !sign) || 4001 ((s->float_rounding_mode == float_round_up) && sign)) { 4002 /* Return greatest/negative finite value. */ 4003 return (sign << (exp_size + frac_size)) | 4004 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 4005 } else { 4006 /* Return +-inf. */ 4007 return (sign << (exp_size + frac_size)) | 4008 MAKE_64BIT_MASK(frac_size, exp_size); 4009 } 4010 } 4011 } 4012 4013 int idx = frac >> (frac_size - precision); 4014 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 4015 (frac_size - precision); 4016 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 4017 4018 if (out_exp == 0 || out_exp == UINT64_MAX) { 4019 /* 4020 * The result is subnormal, but don't raise the underflow exception, 4021 * because there's no additional loss of precision. 4022 */ 4023 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 4024 if (out_exp == UINT64_MAX) { 4025 out_frac >>= 1; 4026 out_exp = 0; 4027 } 4028 } 4029 4030 uint64_t val = 0; 4031 val = deposit64(val, 0, frac_size, out_frac); 4032 val = deposit64(val, frac_size, exp_size, out_exp); 4033 val = deposit64(val, frac_size + exp_size, 1, sign); 4034 return val; 4035 } 4036 4037 static float16 frec7_h(float16 f, float_status *s) 4038 { 4039 int exp_size = 5, frac_size = 10; 4040 bool sign = float16_is_neg(f); 4041 4042 /* frec7(+-inf) = +-0 */ 4043 if (float16_is_infinity(f)) { 4044 return float16_set_sign(float16_zero, sign); 4045 } 4046 4047 /* frec7(+-0) = +-inf */ 4048 if (float16_is_zero(f)) { 4049 s->float_exception_flags |= float_flag_divbyzero; 4050 return float16_set_sign(float16_infinity, sign); 4051 } 4052 4053 /* frec7(sNaN) = canonical NaN */ 4054 if (float16_is_signaling_nan(f, s)) { 4055 s->float_exception_flags |= float_flag_invalid; 4056 return float16_default_nan(s); 4057 } 4058 4059 /* frec7(qNaN) = canonical NaN */ 4060 if (float16_is_quiet_nan(f, s)) { 4061 return float16_default_nan(s); 4062 } 4063 4064 /* +-normal, +-subnormal */ 4065 uint64_t val = frec7(f, exp_size, frac_size, s); 4066 return make_float16(val); 4067 } 4068 4069 static float32 frec7_s(float32 f, float_status *s) 4070 { 4071 int exp_size = 8, frac_size = 23; 4072 bool sign = float32_is_neg(f); 4073 4074 /* frec7(+-inf) = +-0 */ 4075 if (float32_is_infinity(f)) { 4076 return float32_set_sign(float32_zero, sign); 4077 } 4078 4079 /* frec7(+-0) = +-inf */ 4080 if (float32_is_zero(f)) { 4081 s->float_exception_flags |= float_flag_divbyzero; 4082 return float32_set_sign(float32_infinity, sign); 4083 } 4084 4085 /* frec7(sNaN) = canonical NaN */ 4086 if (float32_is_signaling_nan(f, s)) { 4087 s->float_exception_flags |= float_flag_invalid; 4088 return float32_default_nan(s); 4089 } 4090 4091 /* frec7(qNaN) = canonical NaN */ 4092 if (float32_is_quiet_nan(f, s)) { 4093 return float32_default_nan(s); 4094 } 4095 4096 /* +-normal, +-subnormal */ 4097 uint64_t val = frec7(f, exp_size, frac_size, s); 4098 return make_float32(val); 4099 } 4100 4101 static float64 frec7_d(float64 f, float_status *s) 4102 { 4103 int exp_size = 11, frac_size = 52; 4104 bool sign = float64_is_neg(f); 4105 4106 /* frec7(+-inf) = +-0 */ 4107 if (float64_is_infinity(f)) { 4108 return float64_set_sign(float64_zero, sign); 4109 } 4110 4111 /* frec7(+-0) = +-inf */ 4112 if (float64_is_zero(f)) { 4113 s->float_exception_flags |= float_flag_divbyzero; 4114 return float64_set_sign(float64_infinity, sign); 4115 } 4116 4117 /* frec7(sNaN) = canonical NaN */ 4118 if (float64_is_signaling_nan(f, s)) { 4119 s->float_exception_flags |= float_flag_invalid; 4120 return float64_default_nan(s); 4121 } 4122 4123 /* frec7(qNaN) = canonical NaN */ 4124 if (float64_is_quiet_nan(f, s)) { 4125 return float64_default_nan(s); 4126 } 4127 4128 /* +-normal, +-subnormal */ 4129 uint64_t val = frec7(f, exp_size, frac_size, s); 4130 return make_float64(val); 4131 } 4132 4133 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 4134 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 4135 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 4136 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 4137 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 4138 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 4139 4140 /* Vector Floating-Point MIN/MAX Instructions */ 4141 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 4142 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 4143 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 4144 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 4145 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 4146 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 4147 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 4148 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 4149 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 4150 GEN_VEXT_VF(vfmin_vf_h, 2) 4151 GEN_VEXT_VF(vfmin_vf_w, 4) 4152 GEN_VEXT_VF(vfmin_vf_d, 8) 4153 4154 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 4155 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 4156 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 4157 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 4158 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 4159 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 4160 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 4161 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 4162 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 4163 GEN_VEXT_VF(vfmax_vf_h, 2) 4164 GEN_VEXT_VF(vfmax_vf_w, 4) 4165 GEN_VEXT_VF(vfmax_vf_d, 8) 4166 4167 /* Vector Floating-Point Sign-Injection Instructions */ 4168 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 4169 { 4170 return deposit64(b, 0, 15, a); 4171 } 4172 4173 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 4174 { 4175 return deposit64(b, 0, 31, a); 4176 } 4177 4178 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 4179 { 4180 return deposit64(b, 0, 63, a); 4181 } 4182 4183 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 4184 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 4185 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 4186 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 4187 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 4188 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 4189 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 4190 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 4191 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 4192 GEN_VEXT_VF(vfsgnj_vf_h, 2) 4193 GEN_VEXT_VF(vfsgnj_vf_w, 4) 4194 GEN_VEXT_VF(vfsgnj_vf_d, 8) 4195 4196 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4197 { 4198 return deposit64(~b, 0, 15, a); 4199 } 4200 4201 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4202 { 4203 return deposit64(~b, 0, 31, a); 4204 } 4205 4206 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4207 { 4208 return deposit64(~b, 0, 63, a); 4209 } 4210 4211 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4212 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4213 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4214 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 4215 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 4216 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 4217 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4218 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4219 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4220 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4221 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4222 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4223 4224 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4225 { 4226 return deposit64(b ^ a, 0, 15, a); 4227 } 4228 4229 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4230 { 4231 return deposit64(b ^ a, 0, 31, a); 4232 } 4233 4234 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4235 { 4236 return deposit64(b ^ a, 0, 63, a); 4237 } 4238 4239 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4240 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4241 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4242 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4243 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4244 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4245 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4246 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4247 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4248 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4249 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4250 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4251 4252 /* Vector Floating-Point Compare Instructions */ 4253 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4254 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4255 CPURISCVState *env, uint32_t desc) \ 4256 { \ 4257 uint32_t vm = vext_vm(desc); \ 4258 uint32_t vl = env->vl; \ 4259 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4260 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4261 uint32_t vma = vext_vma(desc); \ 4262 uint32_t i; \ 4263 \ 4264 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4265 \ 4266 for (i = env->vstart; i < vl; i++) { \ 4267 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4268 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4269 if (!vm && !vext_elem_mask(v0, i)) { \ 4270 /* set masked-off elements to 1s */ \ 4271 if (vma) { \ 4272 vext_set_elem_mask(vd, i, 1); \ 4273 } \ 4274 continue; \ 4275 } \ 4276 vext_set_elem_mask(vd, i, \ 4277 DO_OP(s2, s1, &env->fp_status)); \ 4278 } \ 4279 env->vstart = 0; \ 4280 /* 4281 * mask destination register are always tail-agnostic 4282 * set tail elements to 1s 4283 */ \ 4284 if (vta_all_1s) { \ 4285 for (; i < total_elems; i++) { \ 4286 vext_set_elem_mask(vd, i, 1); \ 4287 } \ 4288 } \ 4289 } 4290 4291 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4292 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4293 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4294 4295 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4296 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4297 CPURISCVState *env, uint32_t desc) \ 4298 { \ 4299 uint32_t vm = vext_vm(desc); \ 4300 uint32_t vl = env->vl; \ 4301 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4302 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4303 uint32_t vma = vext_vma(desc); \ 4304 uint32_t i; \ 4305 \ 4306 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4307 \ 4308 for (i = env->vstart; i < vl; i++) { \ 4309 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4310 if (!vm && !vext_elem_mask(v0, i)) { \ 4311 /* set masked-off elements to 1s */ \ 4312 if (vma) { \ 4313 vext_set_elem_mask(vd, i, 1); \ 4314 } \ 4315 continue; \ 4316 } \ 4317 vext_set_elem_mask(vd, i, \ 4318 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4319 } \ 4320 env->vstart = 0; \ 4321 /* 4322 * mask destination register are always tail-agnostic 4323 * set tail elements to 1s 4324 */ \ 4325 if (vta_all_1s) { \ 4326 for (; i < total_elems; i++) { \ 4327 vext_set_elem_mask(vd, i, 1); \ 4328 } \ 4329 } \ 4330 } 4331 4332 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4333 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4334 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4335 4336 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4337 { 4338 FloatRelation compare = float16_compare_quiet(a, b, s); 4339 return compare != float_relation_equal; 4340 } 4341 4342 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4343 { 4344 FloatRelation compare = float32_compare_quiet(a, b, s); 4345 return compare != float_relation_equal; 4346 } 4347 4348 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4349 { 4350 FloatRelation compare = float64_compare_quiet(a, b, s); 4351 return compare != float_relation_equal; 4352 } 4353 4354 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4355 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4356 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4357 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4358 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4359 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4360 4361 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4362 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4363 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4364 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4365 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4366 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4367 4368 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4369 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4370 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4371 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4372 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4373 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4374 4375 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4376 { 4377 FloatRelation compare = float16_compare(a, b, s); 4378 return compare == float_relation_greater; 4379 } 4380 4381 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4382 { 4383 FloatRelation compare = float32_compare(a, b, s); 4384 return compare == float_relation_greater; 4385 } 4386 4387 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4388 { 4389 FloatRelation compare = float64_compare(a, b, s); 4390 return compare == float_relation_greater; 4391 } 4392 4393 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4394 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4395 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4396 4397 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4398 { 4399 FloatRelation compare = float16_compare(a, b, s); 4400 return compare == float_relation_greater || 4401 compare == float_relation_equal; 4402 } 4403 4404 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4405 { 4406 FloatRelation compare = float32_compare(a, b, s); 4407 return compare == float_relation_greater || 4408 compare == float_relation_equal; 4409 } 4410 4411 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4412 { 4413 FloatRelation compare = float64_compare(a, b, s); 4414 return compare == float_relation_greater || 4415 compare == float_relation_equal; 4416 } 4417 4418 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4419 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4420 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4421 4422 /* Vector Floating-Point Classify Instruction */ 4423 target_ulong fclass_h(uint64_t frs1) 4424 { 4425 float16 f = frs1; 4426 bool sign = float16_is_neg(f); 4427 4428 if (float16_is_infinity(f)) { 4429 return sign ? 1 << 0 : 1 << 7; 4430 } else if (float16_is_zero(f)) { 4431 return sign ? 1 << 3 : 1 << 4; 4432 } else if (float16_is_zero_or_denormal(f)) { 4433 return sign ? 1 << 2 : 1 << 5; 4434 } else if (float16_is_any_nan(f)) { 4435 float_status s = { }; /* for snan_bit_is_one */ 4436 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4437 } else { 4438 return sign ? 1 << 1 : 1 << 6; 4439 } 4440 } 4441 4442 target_ulong fclass_s(uint64_t frs1) 4443 { 4444 float32 f = frs1; 4445 bool sign = float32_is_neg(f); 4446 4447 if (float32_is_infinity(f)) { 4448 return sign ? 1 << 0 : 1 << 7; 4449 } else if (float32_is_zero(f)) { 4450 return sign ? 1 << 3 : 1 << 4; 4451 } else if (float32_is_zero_or_denormal(f)) { 4452 return sign ? 1 << 2 : 1 << 5; 4453 } else if (float32_is_any_nan(f)) { 4454 float_status s = { }; /* for snan_bit_is_one */ 4455 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4456 } else { 4457 return sign ? 1 << 1 : 1 << 6; 4458 } 4459 } 4460 4461 target_ulong fclass_d(uint64_t frs1) 4462 { 4463 float64 f = frs1; 4464 bool sign = float64_is_neg(f); 4465 4466 if (float64_is_infinity(f)) { 4467 return sign ? 1 << 0 : 1 << 7; 4468 } else if (float64_is_zero(f)) { 4469 return sign ? 1 << 3 : 1 << 4; 4470 } else if (float64_is_zero_or_denormal(f)) { 4471 return sign ? 1 << 2 : 1 << 5; 4472 } else if (float64_is_any_nan(f)) { 4473 float_status s = { }; /* for snan_bit_is_one */ 4474 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4475 } else { 4476 return sign ? 1 << 1 : 1 << 6; 4477 } 4478 } 4479 4480 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4481 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4482 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4483 GEN_VEXT_V(vfclass_v_h, 2) 4484 GEN_VEXT_V(vfclass_v_w, 4) 4485 GEN_VEXT_V(vfclass_v_d, 8) 4486 4487 /* Vector Floating-Point Merge Instruction */ 4488 4489 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4490 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4491 CPURISCVState *env, uint32_t desc) \ 4492 { \ 4493 uint32_t vm = vext_vm(desc); \ 4494 uint32_t vl = env->vl; \ 4495 uint32_t esz = sizeof(ETYPE); \ 4496 uint32_t total_elems = \ 4497 vext_get_total_elems(env, desc, esz); \ 4498 uint32_t vta = vext_vta(desc); \ 4499 uint32_t i; \ 4500 \ 4501 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4502 \ 4503 for (i = env->vstart; i < vl; i++) { \ 4504 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4505 *((ETYPE *)vd + H(i)) = \ 4506 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4507 } \ 4508 env->vstart = 0; \ 4509 /* set tail elements to 1s */ \ 4510 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4511 } 4512 4513 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4514 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4515 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4516 4517 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4518 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4519 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4520 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4521 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4522 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4523 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4524 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4525 4526 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4527 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4528 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4529 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4530 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4531 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4532 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4533 4534 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4535 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4536 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4537 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4538 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4539 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4540 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4541 4542 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4543 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4544 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4545 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4546 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4547 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4548 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4549 4550 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4551 /* (TD, T2, TX2) */ 4552 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4553 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4554 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4555 /* 4556 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4557 */ 4558 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4559 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4560 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4561 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4562 4563 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4564 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4565 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4566 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4567 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4568 4569 /* 4570 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4571 */ 4572 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4573 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4574 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4575 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4576 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4577 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4578 4579 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4580 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4581 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4582 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4583 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4584 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4585 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4586 4587 /* 4588 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4589 */ 4590 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4591 { 4592 return float16_to_float32(a, true, s); 4593 } 4594 4595 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4596 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4597 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4598 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4599 4600 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4601 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4602 4603 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4604 /* (TD, T2, TX2) */ 4605 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4606 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4607 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4608 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4609 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4610 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4611 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4612 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4613 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4614 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4615 4616 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4617 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4618 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4619 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4620 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4621 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4622 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4623 4624 /* 4625 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4626 */ 4627 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4628 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4629 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4630 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4631 4632 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4633 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4634 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4635 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4636 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4637 4638 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4639 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4640 { 4641 return float32_to_float16(a, true, s); 4642 } 4643 4644 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4645 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4646 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4647 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4648 4649 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4650 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4651 4652 /* 4653 * Vector Reduction Operations 4654 */ 4655 /* Vector Single-Width Integer Reduction Instructions */ 4656 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4657 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4658 void *vs2, CPURISCVState *env, \ 4659 uint32_t desc) \ 4660 { \ 4661 uint32_t vm = vext_vm(desc); \ 4662 uint32_t vl = env->vl; \ 4663 uint32_t esz = sizeof(TD); \ 4664 uint32_t vlenb = simd_maxsz(desc); \ 4665 uint32_t vta = vext_vta(desc); \ 4666 uint32_t i; \ 4667 TD s1 = *((TD *)vs1 + HD(0)); \ 4668 \ 4669 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4670 \ 4671 for (i = env->vstart; i < vl; i++) { \ 4672 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4673 if (!vm && !vext_elem_mask(v0, i)) { \ 4674 continue; \ 4675 } \ 4676 s1 = OP(s1, (TD)s2); \ 4677 } \ 4678 if (vl > 0) { \ 4679 *((TD *)vd + HD(0)) = s1; \ 4680 } \ 4681 env->vstart = 0; \ 4682 /* set tail elements to 1s */ \ 4683 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4684 } 4685 4686 /* vd[0] = sum(vs1[0], vs2[*]) */ 4687 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4688 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4689 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4690 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4691 4692 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4693 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4694 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4695 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4696 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4697 4698 /* vd[0] = max(vs1[0], vs2[*]) */ 4699 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4700 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4701 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4702 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4703 4704 /* vd[0] = minu(vs1[0], vs2[*]) */ 4705 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4706 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4707 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4708 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4709 4710 /* vd[0] = min(vs1[0], vs2[*]) */ 4711 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4712 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4713 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4714 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4715 4716 /* vd[0] = and(vs1[0], vs2[*]) */ 4717 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4718 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4719 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4720 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4721 4722 /* vd[0] = or(vs1[0], vs2[*]) */ 4723 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4724 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4725 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4726 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4727 4728 /* vd[0] = xor(vs1[0], vs2[*]) */ 4729 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4730 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4731 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4732 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4733 4734 /* Vector Widening Integer Reduction Instructions */ 4735 /* signed sum reduction into double-width accumulator */ 4736 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4737 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4738 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4739 4740 /* Unsigned sum reduction into double-width accumulator */ 4741 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4742 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4743 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4744 4745 /* Vector Single-Width Floating-Point Reduction Instructions */ 4746 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4747 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4748 void *vs2, CPURISCVState *env, \ 4749 uint32_t desc) \ 4750 { \ 4751 uint32_t vm = vext_vm(desc); \ 4752 uint32_t vl = env->vl; \ 4753 uint32_t esz = sizeof(TD); \ 4754 uint32_t vlenb = simd_maxsz(desc); \ 4755 uint32_t vta = vext_vta(desc); \ 4756 uint32_t i; \ 4757 TD s1 = *((TD *)vs1 + HD(0)); \ 4758 \ 4759 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4760 \ 4761 for (i = env->vstart; i < vl; i++) { \ 4762 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4763 if (!vm && !vext_elem_mask(v0, i)) { \ 4764 continue; \ 4765 } \ 4766 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4767 } \ 4768 if (vl > 0) { \ 4769 *((TD *)vd + HD(0)) = s1; \ 4770 } \ 4771 env->vstart = 0; \ 4772 /* set tail elements to 1s */ \ 4773 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4774 } 4775 4776 /* Unordered sum */ 4777 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4778 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4779 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4780 4781 /* Ordered sum */ 4782 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4783 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4784 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4785 4786 /* Maximum value */ 4787 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4788 float16_maximum_number) 4789 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4790 float32_maximum_number) 4791 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4792 float64_maximum_number) 4793 4794 /* Minimum value */ 4795 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4796 float16_minimum_number) 4797 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4798 float32_minimum_number) 4799 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4800 float64_minimum_number) 4801 4802 /* Vector Widening Floating-Point Add Instructions */ 4803 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4804 { 4805 return float32_add(a, float16_to_float32(b, true, s), s); 4806 } 4807 4808 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4809 { 4810 return float64_add(a, float32_to_float64(b, s), s); 4811 } 4812 4813 /* Vector Widening Floating-Point Reduction Instructions */ 4814 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4815 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4816 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4817 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4818 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4819 4820 /* 4821 * Vector Mask Operations 4822 */ 4823 /* Vector Mask-Register Logical Instructions */ 4824 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4825 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4826 void *vs2, CPURISCVState *env, \ 4827 uint32_t desc) \ 4828 { \ 4829 uint32_t vl = env->vl; \ 4830 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4831 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4832 uint32_t i; \ 4833 int a, b; \ 4834 \ 4835 VSTART_CHECK_EARLY_EXIT(env, vl); \ 4836 \ 4837 for (i = env->vstart; i < vl; i++) { \ 4838 a = vext_elem_mask(vs1, i); \ 4839 b = vext_elem_mask(vs2, i); \ 4840 vext_set_elem_mask(vd, i, OP(b, a)); \ 4841 } \ 4842 env->vstart = 0; \ 4843 /* 4844 * mask destination register are always tail-agnostic 4845 * set tail elements to 1s 4846 */ \ 4847 if (vta_all_1s) { \ 4848 for (; i < total_elems; i++) { \ 4849 vext_set_elem_mask(vd, i, 1); \ 4850 } \ 4851 } \ 4852 } 4853 4854 #define DO_NAND(N, M) (!(N & M)) 4855 #define DO_ANDNOT(N, M) (N & !M) 4856 #define DO_NOR(N, M) (!(N | M)) 4857 #define DO_ORNOT(N, M) (N | !M) 4858 #define DO_XNOR(N, M) (!(N ^ M)) 4859 4860 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4861 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4862 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4863 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4864 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4865 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4866 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4867 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4868 4869 /* Vector count population in mask vcpop */ 4870 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4871 uint32_t desc) 4872 { 4873 target_ulong cnt = 0; 4874 uint32_t vm = vext_vm(desc); 4875 uint32_t vl = env->vl; 4876 int i; 4877 4878 for (i = env->vstart; i < vl; i++) { 4879 if (vm || vext_elem_mask(v0, i)) { 4880 if (vext_elem_mask(vs2, i)) { 4881 cnt++; 4882 } 4883 } 4884 } 4885 env->vstart = 0; 4886 return cnt; 4887 } 4888 4889 /* vfirst find-first-set mask bit */ 4890 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4891 uint32_t desc) 4892 { 4893 uint32_t vm = vext_vm(desc); 4894 uint32_t vl = env->vl; 4895 int i; 4896 4897 for (i = env->vstart; i < vl; i++) { 4898 if (vm || vext_elem_mask(v0, i)) { 4899 if (vext_elem_mask(vs2, i)) { 4900 return i; 4901 } 4902 } 4903 } 4904 env->vstart = 0; 4905 return -1LL; 4906 } 4907 4908 enum set_mask_type { 4909 ONLY_FIRST = 1, 4910 INCLUDE_FIRST, 4911 BEFORE_FIRST, 4912 }; 4913 4914 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4915 uint32_t desc, enum set_mask_type type) 4916 { 4917 uint32_t vm = vext_vm(desc); 4918 uint32_t vl = env->vl; 4919 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4920 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4921 uint32_t vma = vext_vma(desc); 4922 int i; 4923 bool first_mask_bit = false; 4924 4925 VSTART_CHECK_EARLY_EXIT(env, vl); 4926 4927 for (i = env->vstart; i < vl; i++) { 4928 if (!vm && !vext_elem_mask(v0, i)) { 4929 /* set masked-off elements to 1s */ 4930 if (vma) { 4931 vext_set_elem_mask(vd, i, 1); 4932 } 4933 continue; 4934 } 4935 /* write a zero to all following active elements */ 4936 if (first_mask_bit) { 4937 vext_set_elem_mask(vd, i, 0); 4938 continue; 4939 } 4940 if (vext_elem_mask(vs2, i)) { 4941 first_mask_bit = true; 4942 if (type == BEFORE_FIRST) { 4943 vext_set_elem_mask(vd, i, 0); 4944 } else { 4945 vext_set_elem_mask(vd, i, 1); 4946 } 4947 } else { 4948 if (type == ONLY_FIRST) { 4949 vext_set_elem_mask(vd, i, 0); 4950 } else { 4951 vext_set_elem_mask(vd, i, 1); 4952 } 4953 } 4954 } 4955 env->vstart = 0; 4956 /* 4957 * mask destination register are always tail-agnostic 4958 * set tail elements to 1s 4959 */ 4960 if (vta_all_1s) { 4961 for (; i < total_elems; i++) { 4962 vext_set_elem_mask(vd, i, 1); 4963 } 4964 } 4965 } 4966 4967 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4968 uint32_t desc) 4969 { 4970 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4971 } 4972 4973 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4974 uint32_t desc) 4975 { 4976 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4977 } 4978 4979 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4980 uint32_t desc) 4981 { 4982 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4983 } 4984 4985 /* Vector Iota Instruction */ 4986 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4987 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4988 uint32_t desc) \ 4989 { \ 4990 uint32_t vm = vext_vm(desc); \ 4991 uint32_t vl = env->vl; \ 4992 uint32_t esz = sizeof(ETYPE); \ 4993 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4994 uint32_t vta = vext_vta(desc); \ 4995 uint32_t vma = vext_vma(desc); \ 4996 uint32_t sum = 0; \ 4997 int i; \ 4998 \ 4999 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5000 \ 5001 for (i = env->vstart; i < vl; i++) { \ 5002 if (!vm && !vext_elem_mask(v0, i)) { \ 5003 /* set masked-off elements to 1s */ \ 5004 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5005 continue; \ 5006 } \ 5007 *((ETYPE *)vd + H(i)) = sum; \ 5008 if (vext_elem_mask(vs2, i)) { \ 5009 sum++; \ 5010 } \ 5011 } \ 5012 env->vstart = 0; \ 5013 /* set tail elements to 1s */ \ 5014 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5015 } 5016 5017 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 5018 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 5019 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 5020 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 5021 5022 /* Vector Element Index Instruction */ 5023 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 5024 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 5025 { \ 5026 uint32_t vm = vext_vm(desc); \ 5027 uint32_t vl = env->vl; \ 5028 uint32_t esz = sizeof(ETYPE); \ 5029 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5030 uint32_t vta = vext_vta(desc); \ 5031 uint32_t vma = vext_vma(desc); \ 5032 int i; \ 5033 \ 5034 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5035 \ 5036 for (i = env->vstart; i < vl; i++) { \ 5037 if (!vm && !vext_elem_mask(v0, i)) { \ 5038 /* set masked-off elements to 1s */ \ 5039 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5040 continue; \ 5041 } \ 5042 *((ETYPE *)vd + H(i)) = i; \ 5043 } \ 5044 env->vstart = 0; \ 5045 /* set tail elements to 1s */ \ 5046 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5047 } 5048 5049 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 5050 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 5051 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 5052 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 5053 5054 /* 5055 * Vector Permutation Instructions 5056 */ 5057 5058 /* Vector Slide Instructions */ 5059 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 5060 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5061 CPURISCVState *env, uint32_t desc) \ 5062 { \ 5063 uint32_t vm = vext_vm(desc); \ 5064 uint32_t vl = env->vl; \ 5065 uint32_t esz = sizeof(ETYPE); \ 5066 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5067 uint32_t vta = vext_vta(desc); \ 5068 uint32_t vma = vext_vma(desc); \ 5069 target_ulong offset = s1, i_min, i; \ 5070 \ 5071 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5072 \ 5073 i_min = MAX(env->vstart, offset); \ 5074 for (i = i_min; i < vl; i++) { \ 5075 if (!vm && !vext_elem_mask(v0, i)) { \ 5076 /* set masked-off elements to 1s */ \ 5077 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5078 continue; \ 5079 } \ 5080 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 5081 } \ 5082 env->vstart = 0; \ 5083 /* set tail elements to 1s */ \ 5084 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5085 } 5086 5087 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 5088 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 5089 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 5090 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 5091 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 5092 5093 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 5094 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5095 CPURISCVState *env, uint32_t desc) \ 5096 { \ 5097 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5098 uint32_t vm = vext_vm(desc); \ 5099 uint32_t vl = env->vl; \ 5100 uint32_t esz = sizeof(ETYPE); \ 5101 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5102 uint32_t vta = vext_vta(desc); \ 5103 uint32_t vma = vext_vma(desc); \ 5104 target_ulong i_max, i_min, i; \ 5105 \ 5106 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5107 \ 5108 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 5109 i_max = MAX(i_min, env->vstart); \ 5110 for (i = env->vstart; i < i_max; ++i) { \ 5111 if (!vm && !vext_elem_mask(v0, i)) { \ 5112 /* set masked-off elements to 1s */ \ 5113 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5114 continue; \ 5115 } \ 5116 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 5117 } \ 5118 \ 5119 for (i = i_max; i < vl; ++i) { \ 5120 if (vm || vext_elem_mask(v0, i)) { \ 5121 *((ETYPE *)vd + H(i)) = 0; \ 5122 } \ 5123 } \ 5124 \ 5125 env->vstart = 0; \ 5126 /* set tail elements to 1s */ \ 5127 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5128 } 5129 5130 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 5131 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 5132 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 5133 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 5134 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 5135 5136 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 5137 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5138 void *vs2, CPURISCVState *env, \ 5139 uint32_t desc) \ 5140 { \ 5141 typedef uint##BITWIDTH##_t ETYPE; \ 5142 uint32_t vm = vext_vm(desc); \ 5143 uint32_t vl = env->vl; \ 5144 uint32_t esz = sizeof(ETYPE); \ 5145 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5146 uint32_t vta = vext_vta(desc); \ 5147 uint32_t vma = vext_vma(desc); \ 5148 uint32_t i; \ 5149 \ 5150 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5151 \ 5152 for (i = env->vstart; i < vl; i++) { \ 5153 if (!vm && !vext_elem_mask(v0, i)) { \ 5154 /* set masked-off elements to 1s */ \ 5155 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5156 continue; \ 5157 } \ 5158 if (i == 0) { \ 5159 *((ETYPE *)vd + H(i)) = s1; \ 5160 } else { \ 5161 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 5162 } \ 5163 } \ 5164 env->vstart = 0; \ 5165 /* set tail elements to 1s */ \ 5166 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5167 } 5168 5169 GEN_VEXT_VSLIE1UP(8, H1) 5170 GEN_VEXT_VSLIE1UP(16, H2) 5171 GEN_VEXT_VSLIE1UP(32, H4) 5172 GEN_VEXT_VSLIE1UP(64, H8) 5173 5174 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 5175 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5176 CPURISCVState *env, uint32_t desc) \ 5177 { \ 5178 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5179 } 5180 5181 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 5182 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 5183 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 5184 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 5185 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 5186 5187 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 5188 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 5189 void *vs2, CPURISCVState *env, \ 5190 uint32_t desc) \ 5191 { \ 5192 typedef uint##BITWIDTH##_t ETYPE; \ 5193 uint32_t vm = vext_vm(desc); \ 5194 uint32_t vl = env->vl; \ 5195 uint32_t esz = sizeof(ETYPE); \ 5196 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5197 uint32_t vta = vext_vta(desc); \ 5198 uint32_t vma = vext_vma(desc); \ 5199 uint32_t i; \ 5200 \ 5201 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5202 \ 5203 for (i = env->vstart; i < vl; i++) { \ 5204 if (!vm && !vext_elem_mask(v0, i)) { \ 5205 /* set masked-off elements to 1s */ \ 5206 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5207 continue; \ 5208 } \ 5209 if (i == vl - 1) { \ 5210 *((ETYPE *)vd + H(i)) = s1; \ 5211 } else { \ 5212 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 5213 } \ 5214 } \ 5215 env->vstart = 0; \ 5216 /* set tail elements to 1s */ \ 5217 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5218 } 5219 5220 GEN_VEXT_VSLIDE1DOWN(8, H1) 5221 GEN_VEXT_VSLIDE1DOWN(16, H2) 5222 GEN_VEXT_VSLIDE1DOWN(32, H4) 5223 GEN_VEXT_VSLIDE1DOWN(64, H8) 5224 5225 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 5226 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5227 CPURISCVState *env, uint32_t desc) \ 5228 { \ 5229 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5230 } 5231 5232 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5233 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5234 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5235 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5236 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5237 5238 /* Vector Floating-Point Slide Instructions */ 5239 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5240 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5241 CPURISCVState *env, uint32_t desc) \ 5242 { \ 5243 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5244 } 5245 5246 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5247 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5248 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5249 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5250 5251 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5252 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5253 CPURISCVState *env, uint32_t desc) \ 5254 { \ 5255 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5256 } 5257 5258 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5259 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5260 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5261 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5262 5263 /* Vector Register Gather Instruction */ 5264 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5265 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5266 CPURISCVState *env, uint32_t desc) \ 5267 { \ 5268 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5269 uint32_t vm = vext_vm(desc); \ 5270 uint32_t vl = env->vl; \ 5271 uint32_t esz = sizeof(TS2); \ 5272 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5273 uint32_t vta = vext_vta(desc); \ 5274 uint32_t vma = vext_vma(desc); \ 5275 uint64_t index; \ 5276 uint32_t i; \ 5277 \ 5278 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5279 \ 5280 for (i = env->vstart; i < vl; i++) { \ 5281 if (!vm && !vext_elem_mask(v0, i)) { \ 5282 /* set masked-off elements to 1s */ \ 5283 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5284 continue; \ 5285 } \ 5286 index = *((TS1 *)vs1 + HS1(i)); \ 5287 if (index >= vlmax) { \ 5288 *((TS2 *)vd + HS2(i)) = 0; \ 5289 } else { \ 5290 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5291 } \ 5292 } \ 5293 env->vstart = 0; \ 5294 /* set tail elements to 1s */ \ 5295 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5296 } 5297 5298 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5299 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5300 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5301 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5302 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5303 5304 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5305 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5306 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5307 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5308 5309 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5310 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5311 CPURISCVState *env, uint32_t desc) \ 5312 { \ 5313 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5314 uint32_t vm = vext_vm(desc); \ 5315 uint32_t vl = env->vl; \ 5316 uint32_t esz = sizeof(ETYPE); \ 5317 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5318 uint32_t vta = vext_vta(desc); \ 5319 uint32_t vma = vext_vma(desc); \ 5320 uint64_t index = s1; \ 5321 uint32_t i; \ 5322 \ 5323 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5324 \ 5325 for (i = env->vstart; i < vl; i++) { \ 5326 if (!vm && !vext_elem_mask(v0, i)) { \ 5327 /* set masked-off elements to 1s */ \ 5328 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5329 continue; \ 5330 } \ 5331 if (index >= vlmax) { \ 5332 *((ETYPE *)vd + H(i)) = 0; \ 5333 } else { \ 5334 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5335 } \ 5336 } \ 5337 env->vstart = 0; \ 5338 /* set tail elements to 1s */ \ 5339 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5340 } 5341 5342 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5343 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5344 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5345 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5346 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5347 5348 /* Vector Compress Instruction */ 5349 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5350 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5351 CPURISCVState *env, uint32_t desc) \ 5352 { \ 5353 uint32_t vl = env->vl; \ 5354 uint32_t esz = sizeof(ETYPE); \ 5355 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5356 uint32_t vta = vext_vta(desc); \ 5357 uint32_t num = 0, i; \ 5358 \ 5359 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5360 \ 5361 for (i = env->vstart; i < vl; i++) { \ 5362 if (!vext_elem_mask(vs1, i)) { \ 5363 continue; \ 5364 } \ 5365 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5366 num++; \ 5367 } \ 5368 env->vstart = 0; \ 5369 /* set tail elements to 1s */ \ 5370 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5371 } 5372 5373 /* Compress into vd elements of vs2 where vs1 is enabled */ 5374 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5375 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5376 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5377 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5378 5379 /* Vector Whole Register Move */ 5380 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5381 { 5382 /* EEW = SEW */ 5383 uint32_t maxsz = simd_maxsz(desc); 5384 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5385 uint32_t startb = env->vstart * sewb; 5386 uint32_t i = startb; 5387 5388 if (startb >= maxsz) { 5389 env->vstart = 0; 5390 return; 5391 } 5392 5393 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5394 uint32_t j = ROUND_UP(i, 8); 5395 memcpy((uint8_t *)vd + H1(j - 1), 5396 (uint8_t *)vs2 + H1(j - 1), 5397 j - i); 5398 i = j; 5399 } 5400 5401 memcpy((uint8_t *)vd + H1(i), 5402 (uint8_t *)vs2 + H1(i), 5403 maxsz - i); 5404 5405 env->vstart = 0; 5406 } 5407 5408 /* Vector Integer Extension */ 5409 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5410 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5411 CPURISCVState *env, uint32_t desc) \ 5412 { \ 5413 uint32_t vl = env->vl; \ 5414 uint32_t vm = vext_vm(desc); \ 5415 uint32_t esz = sizeof(ETYPE); \ 5416 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5417 uint32_t vta = vext_vta(desc); \ 5418 uint32_t vma = vext_vma(desc); \ 5419 uint32_t i; \ 5420 \ 5421 VSTART_CHECK_EARLY_EXIT(env, vl); \ 5422 \ 5423 for (i = env->vstart; i < vl; i++) { \ 5424 if (!vm && !vext_elem_mask(v0, i)) { \ 5425 /* set masked-off elements to 1s */ \ 5426 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5427 continue; \ 5428 } \ 5429 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5430 } \ 5431 env->vstart = 0; \ 5432 /* set tail elements to 1s */ \ 5433 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5434 } 5435 5436 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5437 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5438 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5439 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5440 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5441 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5442 5443 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5444 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5445 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5446 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5447 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5448 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5449