1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 /* 126 * Get the maximum number of elements can be operated. 127 * 128 * esz: log2 of element size in bytes. 129 */ 130 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 131 { 132 /* 133 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 134 * so vlen in bytes (vlenb) is encoded as maxsz. 135 */ 136 uint32_t vlenb = simd_maxsz(desc); 137 138 /* Return VLMAX */ 139 int scale = vext_lmul(desc) - esz; 140 return scale < 0 ? vlenb >> -scale : vlenb << scale; 141 } 142 143 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 144 { 145 return (addr & env->cur_pmmask) | env->cur_pmbase; 146 } 147 148 /* 149 * This function checks watchpoint before real load operation. 150 * 151 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 152 * In user mode, there is no watchpoint support now. 153 * 154 * It will trigger an exception if there is no mapping in TLB 155 * and page table walk can't fill the TLB entry. Then the guest 156 * software can return here after process the exception or never return. 157 */ 158 static void probe_pages(CPURISCVState *env, target_ulong addr, 159 target_ulong len, uintptr_t ra, 160 MMUAccessType access_type) 161 { 162 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 163 target_ulong curlen = MIN(pagelen, len); 164 165 probe_access(env, adjust_addr(env, addr), curlen, access_type, 166 cpu_mmu_index(env, false), ra); 167 if (len > curlen) { 168 addr += curlen; 169 curlen = len - curlen; 170 probe_access(env, adjust_addr(env, addr), curlen, access_type, 171 cpu_mmu_index(env, false), ra); 172 } 173 } 174 175 static inline void vext_set_elem_mask(void *v0, int index, 176 uint8_t value) 177 { 178 int idx = index / 64; 179 int pos = index % 64; 180 uint64_t old = ((uint64_t *)v0)[idx]; 181 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 182 } 183 184 /* 185 * Earlier designs (pre-0.9) had a varying number of bits 186 * per mask value (MLEN). In the 0.9 design, MLEN=1. 187 * (Section 4.5) 188 */ 189 static inline int vext_elem_mask(void *v0, int index) 190 { 191 int idx = index / 64; 192 int pos = index % 64; 193 return (((uint64_t *)v0)[idx] >> pos) & 1; 194 } 195 196 /* elements operations for load and store */ 197 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 198 uint32_t idx, void *vd, uintptr_t retaddr); 199 200 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 201 static void NAME(CPURISCVState *env, abi_ptr addr, \ 202 uint32_t idx, void *vd, uintptr_t retaddr)\ 203 { \ 204 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 205 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 206 } \ 207 208 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 209 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 210 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 211 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 212 213 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 214 static void NAME(CPURISCVState *env, abi_ptr addr, \ 215 uint32_t idx, void *vd, uintptr_t retaddr)\ 216 { \ 217 ETYPE data = *((ETYPE *)vd + H(idx)); \ 218 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 219 } 220 221 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 222 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 223 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 224 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 225 226 /* 227 *** stride: access vector element from strided memory 228 */ 229 static void 230 vext_ldst_stride(void *vd, void *v0, target_ulong base, 231 target_ulong stride, CPURISCVState *env, 232 uint32_t desc, uint32_t vm, 233 vext_ldst_elem_fn *ldst_elem, 234 uint32_t esz, uintptr_t ra) 235 { 236 uint32_t i, k; 237 uint32_t nf = vext_nf(desc); 238 uint32_t max_elems = vext_max_elems(desc, esz); 239 240 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 241 if (!vm && !vext_elem_mask(v0, i)) { 242 continue; 243 } 244 245 k = 0; 246 while (k < nf) { 247 target_ulong addr = base + stride * i + (k << esz); 248 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 249 k++; 250 } 251 } 252 env->vstart = 0; 253 } 254 255 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 256 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 257 target_ulong stride, CPURISCVState *env, \ 258 uint32_t desc) \ 259 { \ 260 uint32_t vm = vext_vm(desc); \ 261 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 262 ctzl(sizeof(ETYPE)), GETPC()); \ 263 } 264 265 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 266 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 267 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 268 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 269 270 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 271 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 272 target_ulong stride, CPURISCVState *env, \ 273 uint32_t desc) \ 274 { \ 275 uint32_t vm = vext_vm(desc); \ 276 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 277 ctzl(sizeof(ETYPE)), GETPC()); \ 278 } 279 280 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 281 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 282 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 283 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 284 285 /* 286 *** unit-stride: access elements stored contiguously in memory 287 */ 288 289 /* unmasked unit-stride load and store operation*/ 290 static void 291 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 292 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 293 uintptr_t ra) 294 { 295 uint32_t i, k; 296 uint32_t nf = vext_nf(desc); 297 uint32_t max_elems = vext_max_elems(desc, esz); 298 299 /* load bytes from guest memory */ 300 for (i = env->vstart; i < evl; i++, env->vstart++) { 301 k = 0; 302 while (k < nf) { 303 target_ulong addr = base + ((i * nf + k) << esz); 304 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 305 k++; 306 } 307 } 308 env->vstart = 0; 309 } 310 311 /* 312 * masked unit-stride load and store operation will be a special case of stride, 313 * stride = NF * sizeof (MTYPE) 314 */ 315 316 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 318 CPURISCVState *env, uint32_t desc) \ 319 { \ 320 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 321 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 322 ctzl(sizeof(ETYPE)), GETPC()); \ 323 } \ 324 \ 325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 326 CPURISCVState *env, uint32_t desc) \ 327 { \ 328 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 329 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 330 } 331 332 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 333 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 334 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 335 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 336 337 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 338 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 339 CPURISCVState *env, uint32_t desc) \ 340 { \ 341 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 342 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 343 ctzl(sizeof(ETYPE)), GETPC()); \ 344 } \ 345 \ 346 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 347 CPURISCVState *env, uint32_t desc) \ 348 { \ 349 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 350 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 351 } 352 353 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 354 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 355 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 356 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 357 358 /* 359 *** unit stride mask load and store, EEW = 1 360 */ 361 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 362 CPURISCVState *env, uint32_t desc) 363 { 364 /* evl = ceil(vl/8) */ 365 uint8_t evl = (env->vl + 7) >> 3; 366 vext_ldst_us(vd, base, env, desc, lde_b, 367 0, evl, GETPC()); 368 } 369 370 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 371 CPURISCVState *env, uint32_t desc) 372 { 373 /* evl = ceil(vl/8) */ 374 uint8_t evl = (env->vl + 7) >> 3; 375 vext_ldst_us(vd, base, env, desc, ste_b, 376 0, evl, GETPC()); 377 } 378 379 /* 380 *** index: access vector element from indexed memory 381 */ 382 typedef target_ulong vext_get_index_addr(target_ulong base, 383 uint32_t idx, void *vs2); 384 385 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 386 static target_ulong NAME(target_ulong base, \ 387 uint32_t idx, void *vs2) \ 388 { \ 389 return (base + *((ETYPE *)vs2 + H(idx))); \ 390 } 391 392 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 393 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 394 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 395 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 396 397 static inline void 398 vext_ldst_index(void *vd, void *v0, target_ulong base, 399 void *vs2, CPURISCVState *env, uint32_t desc, 400 vext_get_index_addr get_index_addr, 401 vext_ldst_elem_fn *ldst_elem, 402 uint32_t esz, uintptr_t ra) 403 { 404 uint32_t i, k; 405 uint32_t nf = vext_nf(desc); 406 uint32_t vm = vext_vm(desc); 407 uint32_t max_elems = vext_max_elems(desc, esz); 408 409 /* load bytes from guest memory */ 410 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 411 if (!vm && !vext_elem_mask(v0, i)) { 412 continue; 413 } 414 415 k = 0; 416 while (k < nf) { 417 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 418 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 419 k++; 420 } 421 } 422 env->vstart = 0; 423 } 424 425 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 426 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 427 void *vs2, CPURISCVState *env, uint32_t desc) \ 428 { \ 429 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 430 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 431 } 432 433 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 434 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 435 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 436 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 437 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 438 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 439 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 440 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 441 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 442 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 443 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 444 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 445 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 446 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 447 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 448 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 449 450 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 451 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 452 void *vs2, CPURISCVState *env, uint32_t desc) \ 453 { \ 454 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 455 STORE_FN, ctzl(sizeof(ETYPE)), \ 456 GETPC()); \ 457 } 458 459 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 460 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 461 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 462 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 463 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 464 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 465 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 466 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 467 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 468 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 469 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 470 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 471 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 472 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 473 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 474 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 475 476 /* 477 *** unit-stride fault-only-fisrt load instructions 478 */ 479 static inline void 480 vext_ldff(void *vd, void *v0, target_ulong base, 481 CPURISCVState *env, uint32_t desc, 482 vext_ldst_elem_fn *ldst_elem, 483 uint32_t esz, uintptr_t ra) 484 { 485 void *host; 486 uint32_t i, k, vl = 0; 487 uint32_t nf = vext_nf(desc); 488 uint32_t vm = vext_vm(desc); 489 uint32_t max_elems = vext_max_elems(desc, esz); 490 target_ulong addr, offset, remain; 491 492 /* probe every access*/ 493 for (i = env->vstart; i < env->vl; i++) { 494 if (!vm && !vext_elem_mask(v0, i)) { 495 continue; 496 } 497 addr = adjust_addr(env, base + i * (nf << esz)); 498 if (i == 0) { 499 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 500 } else { 501 /* if it triggers an exception, no need to check watchpoint */ 502 remain = nf << esz; 503 while (remain > 0) { 504 offset = -(addr | TARGET_PAGE_MASK); 505 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 506 cpu_mmu_index(env, false)); 507 if (host) { 508 #ifdef CONFIG_USER_ONLY 509 if (page_check_range(addr, offset, PAGE_READ) < 0) { 510 vl = i; 511 goto ProbeSuccess; 512 } 513 #else 514 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 515 #endif 516 } else { 517 vl = i; 518 goto ProbeSuccess; 519 } 520 if (remain <= offset) { 521 break; 522 } 523 remain -= offset; 524 addr = adjust_addr(env, addr + offset); 525 } 526 } 527 } 528 ProbeSuccess: 529 /* load bytes from guest memory */ 530 if (vl != 0) { 531 env->vl = vl; 532 } 533 for (i = env->vstart; i < env->vl; i++) { 534 k = 0; 535 if (!vm && !vext_elem_mask(v0, i)) { 536 continue; 537 } 538 while (k < nf) { 539 target_ulong addr = base + ((i * nf + k) << esz); 540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 541 k++; 542 } 543 } 544 env->vstart = 0; 545 } 546 547 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 548 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 549 CPURISCVState *env, uint32_t desc) \ 550 { \ 551 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 552 ctzl(sizeof(ETYPE)), GETPC()); \ 553 } 554 555 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 556 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 557 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 558 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 559 560 #define DO_SWAP(N, M) (M) 561 #define DO_AND(N, M) (N & M) 562 #define DO_XOR(N, M) (N ^ M) 563 #define DO_OR(N, M) (N | M) 564 #define DO_ADD(N, M) (N + M) 565 566 /* Signed min/max */ 567 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 568 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 569 570 /* Unsigned min/max */ 571 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 572 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 573 574 /* 575 *** load and store whole register instructions 576 */ 577 static void 578 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 579 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra) 580 { 581 uint32_t i, k, off, pos; 582 uint32_t nf = vext_nf(desc); 583 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 584 uint32_t max_elems = vlenb >> esz; 585 586 k = env->vstart / max_elems; 587 off = env->vstart % max_elems; 588 589 if (off) { 590 /* load/store rest of elements of current segment pointed by vstart */ 591 for (pos = off; pos < max_elems; pos++, env->vstart++) { 592 target_ulong addr = base + ((pos + k * max_elems) << esz); 593 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 594 } 595 k++; 596 } 597 598 /* load/store elements for rest of segments */ 599 for (; k < nf; k++) { 600 for (i = 0; i < max_elems; i++, env->vstart++) { 601 target_ulong addr = base + ((i + k * max_elems) << esz); 602 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 603 } 604 } 605 606 env->vstart = 0; 607 } 608 609 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 610 void HELPER(NAME)(void *vd, target_ulong base, \ 611 CPURISCVState *env, uint32_t desc) \ 612 { \ 613 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 614 ctzl(sizeof(ETYPE)), GETPC()); \ 615 } 616 617 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 618 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 619 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 620 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 621 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 622 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 623 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 624 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 625 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 626 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 627 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 628 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 629 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 630 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 631 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 632 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 633 634 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 635 void HELPER(NAME)(void *vd, target_ulong base, \ 636 CPURISCVState *env, uint32_t desc) \ 637 { \ 638 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 639 ctzl(sizeof(ETYPE)), GETPC()); \ 640 } 641 642 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 643 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 644 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 645 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 646 647 /* 648 *** Vector Integer Arithmetic Instructions 649 */ 650 651 /* expand macro args before macro */ 652 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 653 654 /* (TD, T1, T2, TX1, TX2) */ 655 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 656 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 657 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 658 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 659 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 660 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 661 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 662 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 663 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 664 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 665 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 666 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 667 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 668 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 669 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 670 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 671 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 672 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 673 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 674 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 675 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 676 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 677 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 678 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 679 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 680 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 681 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 682 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 683 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 684 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 685 686 /* operation of two vector elements */ 687 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 688 689 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 690 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 691 { \ 692 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 693 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 694 *((TD *)vd + HD(i)) = OP(s2, s1); \ 695 } 696 #define DO_SUB(N, M) (N - M) 697 #define DO_RSUB(N, M) (M - N) 698 699 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 700 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 701 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 702 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 703 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 704 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 705 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 706 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 707 708 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 709 CPURISCVState *env, uint32_t desc, 710 opivv2_fn *fn) 711 { 712 uint32_t vm = vext_vm(desc); 713 uint32_t vl = env->vl; 714 uint32_t i; 715 716 for (i = env->vstart; i < vl; i++) { 717 if (!vm && !vext_elem_mask(v0, i)) { 718 continue; 719 } 720 fn(vd, vs1, vs2, i); 721 } 722 env->vstart = 0; 723 } 724 725 /* generate the helpers for OPIVV */ 726 #define GEN_VEXT_VV(NAME) \ 727 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 728 void *vs2, CPURISCVState *env, \ 729 uint32_t desc) \ 730 { \ 731 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 732 do_##NAME); \ 733 } 734 735 GEN_VEXT_VV(vadd_vv_b) 736 GEN_VEXT_VV(vadd_vv_h) 737 GEN_VEXT_VV(vadd_vv_w) 738 GEN_VEXT_VV(vadd_vv_d) 739 GEN_VEXT_VV(vsub_vv_b) 740 GEN_VEXT_VV(vsub_vv_h) 741 GEN_VEXT_VV(vsub_vv_w) 742 GEN_VEXT_VV(vsub_vv_d) 743 744 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 745 746 /* 747 * (T1)s1 gives the real operator type. 748 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 749 */ 750 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 751 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 752 { \ 753 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 754 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 755 } 756 757 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 758 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 759 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 760 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 761 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 762 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 763 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 764 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 765 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 766 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 767 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 768 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 769 770 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 771 CPURISCVState *env, uint32_t desc, 772 opivx2_fn fn) 773 { 774 uint32_t vm = vext_vm(desc); 775 uint32_t vl = env->vl; 776 uint32_t i; 777 778 for (i = env->vstart; i < vl; i++) { 779 if (!vm && !vext_elem_mask(v0, i)) { 780 continue; 781 } 782 fn(vd, s1, vs2, i); 783 } 784 env->vstart = 0; 785 } 786 787 /* generate the helpers for OPIVX */ 788 #define GEN_VEXT_VX(NAME) \ 789 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 790 void *vs2, CPURISCVState *env, \ 791 uint32_t desc) \ 792 { \ 793 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 794 do_##NAME); \ 795 } 796 797 GEN_VEXT_VX(vadd_vx_b) 798 GEN_VEXT_VX(vadd_vx_h) 799 GEN_VEXT_VX(vadd_vx_w) 800 GEN_VEXT_VX(vadd_vx_d) 801 GEN_VEXT_VX(vsub_vx_b) 802 GEN_VEXT_VX(vsub_vx_h) 803 GEN_VEXT_VX(vsub_vx_w) 804 GEN_VEXT_VX(vsub_vx_d) 805 GEN_VEXT_VX(vrsub_vx_b) 806 GEN_VEXT_VX(vrsub_vx_h) 807 GEN_VEXT_VX(vrsub_vx_w) 808 GEN_VEXT_VX(vrsub_vx_d) 809 810 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 811 { 812 intptr_t oprsz = simd_oprsz(desc); 813 intptr_t i; 814 815 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 816 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 817 } 818 } 819 820 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 821 { 822 intptr_t oprsz = simd_oprsz(desc); 823 intptr_t i; 824 825 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 826 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 827 } 828 } 829 830 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 831 { 832 intptr_t oprsz = simd_oprsz(desc); 833 intptr_t i; 834 835 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 836 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 837 } 838 } 839 840 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 841 { 842 intptr_t oprsz = simd_oprsz(desc); 843 intptr_t i; 844 845 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 846 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 847 } 848 } 849 850 /* Vector Widening Integer Add/Subtract */ 851 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 852 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 853 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 854 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 855 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 856 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 857 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 858 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 859 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 860 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 861 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 862 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 863 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 864 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 865 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 866 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 867 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 868 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 869 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 870 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 871 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 872 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 873 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 874 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 875 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 876 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 877 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 878 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 879 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 880 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 881 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 882 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 883 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 884 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 885 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 886 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 887 GEN_VEXT_VV(vwaddu_vv_b) 888 GEN_VEXT_VV(vwaddu_vv_h) 889 GEN_VEXT_VV(vwaddu_vv_w) 890 GEN_VEXT_VV(vwsubu_vv_b) 891 GEN_VEXT_VV(vwsubu_vv_h) 892 GEN_VEXT_VV(vwsubu_vv_w) 893 GEN_VEXT_VV(vwadd_vv_b) 894 GEN_VEXT_VV(vwadd_vv_h) 895 GEN_VEXT_VV(vwadd_vv_w) 896 GEN_VEXT_VV(vwsub_vv_b) 897 GEN_VEXT_VV(vwsub_vv_h) 898 GEN_VEXT_VV(vwsub_vv_w) 899 GEN_VEXT_VV(vwaddu_wv_b) 900 GEN_VEXT_VV(vwaddu_wv_h) 901 GEN_VEXT_VV(vwaddu_wv_w) 902 GEN_VEXT_VV(vwsubu_wv_b) 903 GEN_VEXT_VV(vwsubu_wv_h) 904 GEN_VEXT_VV(vwsubu_wv_w) 905 GEN_VEXT_VV(vwadd_wv_b) 906 GEN_VEXT_VV(vwadd_wv_h) 907 GEN_VEXT_VV(vwadd_wv_w) 908 GEN_VEXT_VV(vwsub_wv_b) 909 GEN_VEXT_VV(vwsub_wv_h) 910 GEN_VEXT_VV(vwsub_wv_w) 911 912 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 913 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 914 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 915 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 916 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 917 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 918 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 919 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 920 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 921 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 922 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 923 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 924 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 925 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 926 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 927 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 928 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 929 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 930 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 931 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 932 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 933 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 934 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 935 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 936 GEN_VEXT_VX(vwaddu_vx_b) 937 GEN_VEXT_VX(vwaddu_vx_h) 938 GEN_VEXT_VX(vwaddu_vx_w) 939 GEN_VEXT_VX(vwsubu_vx_b) 940 GEN_VEXT_VX(vwsubu_vx_h) 941 GEN_VEXT_VX(vwsubu_vx_w) 942 GEN_VEXT_VX(vwadd_vx_b) 943 GEN_VEXT_VX(vwadd_vx_h) 944 GEN_VEXT_VX(vwadd_vx_w) 945 GEN_VEXT_VX(vwsub_vx_b) 946 GEN_VEXT_VX(vwsub_vx_h) 947 GEN_VEXT_VX(vwsub_vx_w) 948 GEN_VEXT_VX(vwaddu_wx_b) 949 GEN_VEXT_VX(vwaddu_wx_h) 950 GEN_VEXT_VX(vwaddu_wx_w) 951 GEN_VEXT_VX(vwsubu_wx_b) 952 GEN_VEXT_VX(vwsubu_wx_h) 953 GEN_VEXT_VX(vwsubu_wx_w) 954 GEN_VEXT_VX(vwadd_wx_b) 955 GEN_VEXT_VX(vwadd_wx_h) 956 GEN_VEXT_VX(vwadd_wx_w) 957 GEN_VEXT_VX(vwsub_wx_b) 958 GEN_VEXT_VX(vwsub_wx_h) 959 GEN_VEXT_VX(vwsub_wx_w) 960 961 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 962 #define DO_VADC(N, M, C) (N + M + C) 963 #define DO_VSBC(N, M, C) (N - M - C) 964 965 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 966 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 967 CPURISCVState *env, uint32_t desc) \ 968 { \ 969 uint32_t vl = env->vl; \ 970 uint32_t i; \ 971 \ 972 for (i = env->vstart; i < vl; i++) { \ 973 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 974 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 975 ETYPE carry = vext_elem_mask(v0, i); \ 976 \ 977 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 978 } \ 979 env->vstart = 0; \ 980 } 981 982 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 983 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 984 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 985 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 986 987 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 988 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 989 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 990 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 991 992 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 993 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 994 CPURISCVState *env, uint32_t desc) \ 995 { \ 996 uint32_t vl = env->vl; \ 997 uint32_t i; \ 998 \ 999 for (i = env->vstart; i < vl; i++) { \ 1000 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1001 ETYPE carry = vext_elem_mask(v0, i); \ 1002 \ 1003 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1004 } \ 1005 env->vstart = 0; \ 1006 } 1007 1008 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1009 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1010 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1011 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1012 1013 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1014 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1015 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1016 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1017 1018 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1019 (__typeof(N))(N + M) < N) 1020 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1021 1022 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1023 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1024 CPURISCVState *env, uint32_t desc) \ 1025 { \ 1026 uint32_t vl = env->vl; \ 1027 uint32_t vm = vext_vm(desc); \ 1028 uint32_t i; \ 1029 \ 1030 for (i = env->vstart; i < vl; i++) { \ 1031 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1032 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1033 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1034 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1035 } \ 1036 env->vstart = 0; \ 1037 } 1038 1039 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1040 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1041 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1042 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1043 1044 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1045 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1046 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1047 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1048 1049 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1050 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1051 void *vs2, CPURISCVState *env, uint32_t desc) \ 1052 { \ 1053 uint32_t vl = env->vl; \ 1054 uint32_t vm = vext_vm(desc); \ 1055 uint32_t i; \ 1056 \ 1057 for (i = env->vstart; i < vl; i++) { \ 1058 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1059 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1060 vext_set_elem_mask(vd, i, \ 1061 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1062 } \ 1063 env->vstart = 0; \ 1064 } 1065 1066 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1067 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1068 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1069 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1070 1071 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1072 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1073 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1074 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1075 1076 /* Vector Bitwise Logical Instructions */ 1077 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1078 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1079 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1080 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1081 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1082 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1083 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1084 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1085 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1086 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1087 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1088 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1089 GEN_VEXT_VV(vand_vv_b) 1090 GEN_VEXT_VV(vand_vv_h) 1091 GEN_VEXT_VV(vand_vv_w) 1092 GEN_VEXT_VV(vand_vv_d) 1093 GEN_VEXT_VV(vor_vv_b) 1094 GEN_VEXT_VV(vor_vv_h) 1095 GEN_VEXT_VV(vor_vv_w) 1096 GEN_VEXT_VV(vor_vv_d) 1097 GEN_VEXT_VV(vxor_vv_b) 1098 GEN_VEXT_VV(vxor_vv_h) 1099 GEN_VEXT_VV(vxor_vv_w) 1100 GEN_VEXT_VV(vxor_vv_d) 1101 1102 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1103 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1104 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1105 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1106 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1107 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1108 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1109 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1110 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1111 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1112 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1113 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1114 GEN_VEXT_VX(vand_vx_b) 1115 GEN_VEXT_VX(vand_vx_h) 1116 GEN_VEXT_VX(vand_vx_w) 1117 GEN_VEXT_VX(vand_vx_d) 1118 GEN_VEXT_VX(vor_vx_b) 1119 GEN_VEXT_VX(vor_vx_h) 1120 GEN_VEXT_VX(vor_vx_w) 1121 GEN_VEXT_VX(vor_vx_d) 1122 GEN_VEXT_VX(vxor_vx_b) 1123 GEN_VEXT_VX(vxor_vx_h) 1124 GEN_VEXT_VX(vxor_vx_w) 1125 GEN_VEXT_VX(vxor_vx_d) 1126 1127 /* Vector Single-Width Bit Shift Instructions */ 1128 #define DO_SLL(N, M) (N << (M)) 1129 #define DO_SRL(N, M) (N >> (M)) 1130 1131 /* generate the helpers for shift instructions with two vector operators */ 1132 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1133 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1134 void *vs2, CPURISCVState *env, uint32_t desc) \ 1135 { \ 1136 uint32_t vm = vext_vm(desc); \ 1137 uint32_t vl = env->vl; \ 1138 uint32_t i; \ 1139 \ 1140 for (i = env->vstart; i < vl; i++) { \ 1141 if (!vm && !vext_elem_mask(v0, i)) { \ 1142 continue; \ 1143 } \ 1144 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1145 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1146 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1147 } \ 1148 env->vstart = 0; \ 1149 } 1150 1151 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1152 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1153 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1154 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1155 1156 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1157 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1160 1161 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1162 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1163 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1164 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1165 1166 /* generate the helpers for shift instructions with one vector and one scalar */ 1167 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1168 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1169 void *vs2, CPURISCVState *env, uint32_t desc) \ 1170 { \ 1171 uint32_t vm = vext_vm(desc); \ 1172 uint32_t vl = env->vl; \ 1173 uint32_t i; \ 1174 \ 1175 for (i = env->vstart; i < vl; i++) { \ 1176 if (!vm && !vext_elem_mask(v0, i)) { \ 1177 continue; \ 1178 } \ 1179 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1180 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1181 } \ 1182 env->vstart = 0; \ 1183 } 1184 1185 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1186 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1187 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1188 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1189 1190 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1191 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1192 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1193 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1194 1195 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1196 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1197 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1198 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1199 1200 /* Vector Narrowing Integer Right Shift Instructions */ 1201 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1202 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1203 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1204 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1205 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1206 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1207 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1208 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1209 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1210 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1211 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1212 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1213 1214 /* Vector Integer Comparison Instructions */ 1215 #define DO_MSEQ(N, M) (N == M) 1216 #define DO_MSNE(N, M) (N != M) 1217 #define DO_MSLT(N, M) (N < M) 1218 #define DO_MSLE(N, M) (N <= M) 1219 #define DO_MSGT(N, M) (N > M) 1220 1221 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1222 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1223 CPURISCVState *env, uint32_t desc) \ 1224 { \ 1225 uint32_t vm = vext_vm(desc); \ 1226 uint32_t vl = env->vl; \ 1227 uint32_t i; \ 1228 \ 1229 for (i = env->vstart; i < vl; i++) { \ 1230 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1231 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1232 if (!vm && !vext_elem_mask(v0, i)) { \ 1233 continue; \ 1234 } \ 1235 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1236 } \ 1237 env->vstart = 0; \ 1238 } 1239 1240 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1241 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1242 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1243 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1244 1245 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1246 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1247 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1248 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1249 1250 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1251 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1252 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1253 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1254 1255 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1256 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1257 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1258 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1259 1260 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1261 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1262 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1263 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1264 1265 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1266 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1267 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1268 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1269 1270 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1271 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1272 CPURISCVState *env, uint32_t desc) \ 1273 { \ 1274 uint32_t vm = vext_vm(desc); \ 1275 uint32_t vl = env->vl; \ 1276 uint32_t i; \ 1277 \ 1278 for (i = env->vstart; i < vl; i++) { \ 1279 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1280 if (!vm && !vext_elem_mask(v0, i)) { \ 1281 continue; \ 1282 } \ 1283 vext_set_elem_mask(vd, i, \ 1284 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1285 } \ 1286 env->vstart = 0; \ 1287 } 1288 1289 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1290 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1291 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1292 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1293 1294 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1295 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1296 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1297 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1298 1299 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1300 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1301 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1302 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1303 1304 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1305 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1306 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1307 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1308 1309 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1310 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1311 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1312 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1313 1314 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1315 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1316 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1317 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1318 1319 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1320 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1321 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1322 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1323 1324 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1325 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1326 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1327 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1328 1329 /* Vector Integer Min/Max Instructions */ 1330 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1331 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1332 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1333 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1334 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1335 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1336 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1337 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1338 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1339 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1340 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1341 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1342 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1343 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1344 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1345 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1346 GEN_VEXT_VV(vminu_vv_b) 1347 GEN_VEXT_VV(vminu_vv_h) 1348 GEN_VEXT_VV(vminu_vv_w) 1349 GEN_VEXT_VV(vminu_vv_d) 1350 GEN_VEXT_VV(vmin_vv_b) 1351 GEN_VEXT_VV(vmin_vv_h) 1352 GEN_VEXT_VV(vmin_vv_w) 1353 GEN_VEXT_VV(vmin_vv_d) 1354 GEN_VEXT_VV(vmaxu_vv_b) 1355 GEN_VEXT_VV(vmaxu_vv_h) 1356 GEN_VEXT_VV(vmaxu_vv_w) 1357 GEN_VEXT_VV(vmaxu_vv_d) 1358 GEN_VEXT_VV(vmax_vv_b) 1359 GEN_VEXT_VV(vmax_vv_h) 1360 GEN_VEXT_VV(vmax_vv_w) 1361 GEN_VEXT_VV(vmax_vv_d) 1362 1363 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1364 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1365 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1366 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1367 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1368 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1369 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1370 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1371 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1372 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1373 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1374 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1375 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1376 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1377 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1378 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1379 GEN_VEXT_VX(vminu_vx_b) 1380 GEN_VEXT_VX(vminu_vx_h) 1381 GEN_VEXT_VX(vminu_vx_w) 1382 GEN_VEXT_VX(vminu_vx_d) 1383 GEN_VEXT_VX(vmin_vx_b) 1384 GEN_VEXT_VX(vmin_vx_h) 1385 GEN_VEXT_VX(vmin_vx_w) 1386 GEN_VEXT_VX(vmin_vx_d) 1387 GEN_VEXT_VX(vmaxu_vx_b) 1388 GEN_VEXT_VX(vmaxu_vx_h) 1389 GEN_VEXT_VX(vmaxu_vx_w) 1390 GEN_VEXT_VX(vmaxu_vx_d) 1391 GEN_VEXT_VX(vmax_vx_b) 1392 GEN_VEXT_VX(vmax_vx_h) 1393 GEN_VEXT_VX(vmax_vx_w) 1394 GEN_VEXT_VX(vmax_vx_d) 1395 1396 /* Vector Single-Width Integer Multiply Instructions */ 1397 #define DO_MUL(N, M) (N * M) 1398 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1399 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1400 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1401 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1402 GEN_VEXT_VV(vmul_vv_b) 1403 GEN_VEXT_VV(vmul_vv_h) 1404 GEN_VEXT_VV(vmul_vv_w) 1405 GEN_VEXT_VV(vmul_vv_d) 1406 1407 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1408 { 1409 return (int16_t)s2 * (int16_t)s1 >> 8; 1410 } 1411 1412 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1413 { 1414 return (int32_t)s2 * (int32_t)s1 >> 16; 1415 } 1416 1417 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1418 { 1419 return (int64_t)s2 * (int64_t)s1 >> 32; 1420 } 1421 1422 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1423 { 1424 uint64_t hi_64, lo_64; 1425 1426 muls64(&lo_64, &hi_64, s1, s2); 1427 return hi_64; 1428 } 1429 1430 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1431 { 1432 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1433 } 1434 1435 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1436 { 1437 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1438 } 1439 1440 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1441 { 1442 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1443 } 1444 1445 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1446 { 1447 uint64_t hi_64, lo_64; 1448 1449 mulu64(&lo_64, &hi_64, s2, s1); 1450 return hi_64; 1451 } 1452 1453 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1454 { 1455 return (int16_t)s2 * (uint16_t)s1 >> 8; 1456 } 1457 1458 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1459 { 1460 return (int32_t)s2 * (uint32_t)s1 >> 16; 1461 } 1462 1463 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1464 { 1465 return (int64_t)s2 * (uint64_t)s1 >> 32; 1466 } 1467 1468 /* 1469 * Let A = signed operand, 1470 * B = unsigned operand 1471 * P = mulu64(A, B), unsigned product 1472 * 1473 * LET X = 2 ** 64 - A, 2's complement of A 1474 * SP = signed product 1475 * THEN 1476 * IF A < 0 1477 * SP = -X * B 1478 * = -(2 ** 64 - A) * B 1479 * = A * B - 2 ** 64 * B 1480 * = P - 2 ** 64 * B 1481 * ELSE 1482 * SP = P 1483 * THEN 1484 * HI_P -= (A < 0 ? B : 0) 1485 */ 1486 1487 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1488 { 1489 uint64_t hi_64, lo_64; 1490 1491 mulu64(&lo_64, &hi_64, s2, s1); 1492 1493 hi_64 -= s2 < 0 ? s1 : 0; 1494 return hi_64; 1495 } 1496 1497 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1498 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1499 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1500 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1501 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1502 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1503 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1504 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1505 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1506 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1507 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1508 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1509 GEN_VEXT_VV(vmulh_vv_b) 1510 GEN_VEXT_VV(vmulh_vv_h) 1511 GEN_VEXT_VV(vmulh_vv_w) 1512 GEN_VEXT_VV(vmulh_vv_d) 1513 GEN_VEXT_VV(vmulhu_vv_b) 1514 GEN_VEXT_VV(vmulhu_vv_h) 1515 GEN_VEXT_VV(vmulhu_vv_w) 1516 GEN_VEXT_VV(vmulhu_vv_d) 1517 GEN_VEXT_VV(vmulhsu_vv_b) 1518 GEN_VEXT_VV(vmulhsu_vv_h) 1519 GEN_VEXT_VV(vmulhsu_vv_w) 1520 GEN_VEXT_VV(vmulhsu_vv_d) 1521 1522 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1523 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1524 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1525 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1526 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1527 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1528 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1529 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1530 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1531 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1532 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1533 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1534 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1535 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1536 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1537 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1538 GEN_VEXT_VX(vmul_vx_b) 1539 GEN_VEXT_VX(vmul_vx_h) 1540 GEN_VEXT_VX(vmul_vx_w) 1541 GEN_VEXT_VX(vmul_vx_d) 1542 GEN_VEXT_VX(vmulh_vx_b) 1543 GEN_VEXT_VX(vmulh_vx_h) 1544 GEN_VEXT_VX(vmulh_vx_w) 1545 GEN_VEXT_VX(vmulh_vx_d) 1546 GEN_VEXT_VX(vmulhu_vx_b) 1547 GEN_VEXT_VX(vmulhu_vx_h) 1548 GEN_VEXT_VX(vmulhu_vx_w) 1549 GEN_VEXT_VX(vmulhu_vx_d) 1550 GEN_VEXT_VX(vmulhsu_vx_b) 1551 GEN_VEXT_VX(vmulhsu_vx_h) 1552 GEN_VEXT_VX(vmulhsu_vx_w) 1553 GEN_VEXT_VX(vmulhsu_vx_d) 1554 1555 /* Vector Integer Divide Instructions */ 1556 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1557 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1558 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1559 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1560 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1561 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1562 1563 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1564 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1565 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1566 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1567 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1568 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1569 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1570 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1571 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1572 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1573 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1574 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1575 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1576 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1577 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1578 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1579 GEN_VEXT_VV(vdivu_vv_b) 1580 GEN_VEXT_VV(vdivu_vv_h) 1581 GEN_VEXT_VV(vdivu_vv_w) 1582 GEN_VEXT_VV(vdivu_vv_d) 1583 GEN_VEXT_VV(vdiv_vv_b) 1584 GEN_VEXT_VV(vdiv_vv_h) 1585 GEN_VEXT_VV(vdiv_vv_w) 1586 GEN_VEXT_VV(vdiv_vv_d) 1587 GEN_VEXT_VV(vremu_vv_b) 1588 GEN_VEXT_VV(vremu_vv_h) 1589 GEN_VEXT_VV(vremu_vv_w) 1590 GEN_VEXT_VV(vremu_vv_d) 1591 GEN_VEXT_VV(vrem_vv_b) 1592 GEN_VEXT_VV(vrem_vv_h) 1593 GEN_VEXT_VV(vrem_vv_w) 1594 GEN_VEXT_VV(vrem_vv_d) 1595 1596 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1597 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1598 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1599 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1600 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1601 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1602 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1603 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1604 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1605 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1606 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1607 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1608 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1609 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1610 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1611 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1612 GEN_VEXT_VX(vdivu_vx_b) 1613 GEN_VEXT_VX(vdivu_vx_h) 1614 GEN_VEXT_VX(vdivu_vx_w) 1615 GEN_VEXT_VX(vdivu_vx_d) 1616 GEN_VEXT_VX(vdiv_vx_b) 1617 GEN_VEXT_VX(vdiv_vx_h) 1618 GEN_VEXT_VX(vdiv_vx_w) 1619 GEN_VEXT_VX(vdiv_vx_d) 1620 GEN_VEXT_VX(vremu_vx_b) 1621 GEN_VEXT_VX(vremu_vx_h) 1622 GEN_VEXT_VX(vremu_vx_w) 1623 GEN_VEXT_VX(vremu_vx_d) 1624 GEN_VEXT_VX(vrem_vx_b) 1625 GEN_VEXT_VX(vrem_vx_h) 1626 GEN_VEXT_VX(vrem_vx_w) 1627 GEN_VEXT_VX(vrem_vx_d) 1628 1629 /* Vector Widening Integer Multiply Instructions */ 1630 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1631 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1632 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1633 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1634 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1635 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1636 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1637 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1638 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1639 GEN_VEXT_VV(vwmul_vv_b) 1640 GEN_VEXT_VV(vwmul_vv_h) 1641 GEN_VEXT_VV(vwmul_vv_w) 1642 GEN_VEXT_VV(vwmulu_vv_b) 1643 GEN_VEXT_VV(vwmulu_vv_h) 1644 GEN_VEXT_VV(vwmulu_vv_w) 1645 GEN_VEXT_VV(vwmulsu_vv_b) 1646 GEN_VEXT_VV(vwmulsu_vv_h) 1647 GEN_VEXT_VV(vwmulsu_vv_w) 1648 1649 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1650 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1651 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1652 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1653 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1654 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1655 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1656 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1657 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1658 GEN_VEXT_VX(vwmul_vx_b) 1659 GEN_VEXT_VX(vwmul_vx_h) 1660 GEN_VEXT_VX(vwmul_vx_w) 1661 GEN_VEXT_VX(vwmulu_vx_b) 1662 GEN_VEXT_VX(vwmulu_vx_h) 1663 GEN_VEXT_VX(vwmulu_vx_w) 1664 GEN_VEXT_VX(vwmulsu_vx_b) 1665 GEN_VEXT_VX(vwmulsu_vx_h) 1666 GEN_VEXT_VX(vwmulsu_vx_w) 1667 1668 /* Vector Single-Width Integer Multiply-Add Instructions */ 1669 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1670 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1671 { \ 1672 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1673 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1674 TD d = *((TD *)vd + HD(i)); \ 1675 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1676 } 1677 1678 #define DO_MACC(N, M, D) (M * N + D) 1679 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1680 #define DO_MADD(N, M, D) (M * D + N) 1681 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1682 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1683 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1684 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1685 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1686 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1687 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1688 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1689 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1690 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1691 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1692 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1693 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1694 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1695 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1696 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1697 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1698 GEN_VEXT_VV(vmacc_vv_b) 1699 GEN_VEXT_VV(vmacc_vv_h) 1700 GEN_VEXT_VV(vmacc_vv_w) 1701 GEN_VEXT_VV(vmacc_vv_d) 1702 GEN_VEXT_VV(vnmsac_vv_b) 1703 GEN_VEXT_VV(vnmsac_vv_h) 1704 GEN_VEXT_VV(vnmsac_vv_w) 1705 GEN_VEXT_VV(vnmsac_vv_d) 1706 GEN_VEXT_VV(vmadd_vv_b) 1707 GEN_VEXT_VV(vmadd_vv_h) 1708 GEN_VEXT_VV(vmadd_vv_w) 1709 GEN_VEXT_VV(vmadd_vv_d) 1710 GEN_VEXT_VV(vnmsub_vv_b) 1711 GEN_VEXT_VV(vnmsub_vv_h) 1712 GEN_VEXT_VV(vnmsub_vv_w) 1713 GEN_VEXT_VV(vnmsub_vv_d) 1714 1715 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1716 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1717 { \ 1718 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1719 TD d = *((TD *)vd + HD(i)); \ 1720 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1721 } 1722 1723 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1724 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1725 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1726 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1727 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1728 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1729 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1730 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1731 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1732 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1733 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1734 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1735 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1736 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1737 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1738 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1739 GEN_VEXT_VX(vmacc_vx_b) 1740 GEN_VEXT_VX(vmacc_vx_h) 1741 GEN_VEXT_VX(vmacc_vx_w) 1742 GEN_VEXT_VX(vmacc_vx_d) 1743 GEN_VEXT_VX(vnmsac_vx_b) 1744 GEN_VEXT_VX(vnmsac_vx_h) 1745 GEN_VEXT_VX(vnmsac_vx_w) 1746 GEN_VEXT_VX(vnmsac_vx_d) 1747 GEN_VEXT_VX(vmadd_vx_b) 1748 GEN_VEXT_VX(vmadd_vx_h) 1749 GEN_VEXT_VX(vmadd_vx_w) 1750 GEN_VEXT_VX(vmadd_vx_d) 1751 GEN_VEXT_VX(vnmsub_vx_b) 1752 GEN_VEXT_VX(vnmsub_vx_h) 1753 GEN_VEXT_VX(vnmsub_vx_w) 1754 GEN_VEXT_VX(vnmsub_vx_d) 1755 1756 /* Vector Widening Integer Multiply-Add Instructions */ 1757 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1758 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1759 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1760 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1761 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1762 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1763 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1764 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1765 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1766 GEN_VEXT_VV(vwmaccu_vv_b) 1767 GEN_VEXT_VV(vwmaccu_vv_h) 1768 GEN_VEXT_VV(vwmaccu_vv_w) 1769 GEN_VEXT_VV(vwmacc_vv_b) 1770 GEN_VEXT_VV(vwmacc_vv_h) 1771 GEN_VEXT_VV(vwmacc_vv_w) 1772 GEN_VEXT_VV(vwmaccsu_vv_b) 1773 GEN_VEXT_VV(vwmaccsu_vv_h) 1774 GEN_VEXT_VV(vwmaccsu_vv_w) 1775 1776 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1777 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1778 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1779 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1780 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1781 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1782 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1783 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1784 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1785 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1786 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1787 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1788 GEN_VEXT_VX(vwmaccu_vx_b) 1789 GEN_VEXT_VX(vwmaccu_vx_h) 1790 GEN_VEXT_VX(vwmaccu_vx_w) 1791 GEN_VEXT_VX(vwmacc_vx_b) 1792 GEN_VEXT_VX(vwmacc_vx_h) 1793 GEN_VEXT_VX(vwmacc_vx_w) 1794 GEN_VEXT_VX(vwmaccsu_vx_b) 1795 GEN_VEXT_VX(vwmaccsu_vx_h) 1796 GEN_VEXT_VX(vwmaccsu_vx_w) 1797 GEN_VEXT_VX(vwmaccus_vx_b) 1798 GEN_VEXT_VX(vwmaccus_vx_h) 1799 GEN_VEXT_VX(vwmaccus_vx_w) 1800 1801 /* Vector Integer Merge and Move Instructions */ 1802 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1803 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1804 uint32_t desc) \ 1805 { \ 1806 uint32_t vl = env->vl; \ 1807 uint32_t i; \ 1808 \ 1809 for (i = env->vstart; i < vl; i++) { \ 1810 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1811 *((ETYPE *)vd + H(i)) = s1; \ 1812 } \ 1813 env->vstart = 0; \ 1814 } 1815 1816 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1817 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1818 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1819 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1820 1821 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1822 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1823 uint32_t desc) \ 1824 { \ 1825 uint32_t vl = env->vl; \ 1826 uint32_t i; \ 1827 \ 1828 for (i = env->vstart; i < vl; i++) { \ 1829 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1830 } \ 1831 env->vstart = 0; \ 1832 } 1833 1834 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1835 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1836 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1837 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1838 1839 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1840 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1841 CPURISCVState *env, uint32_t desc) \ 1842 { \ 1843 uint32_t vl = env->vl; \ 1844 uint32_t i; \ 1845 \ 1846 for (i = env->vstart; i < vl; i++) { \ 1847 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1848 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1849 } \ 1850 env->vstart = 0; \ 1851 } 1852 1853 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1854 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1855 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1856 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1857 1858 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1859 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1860 void *vs2, CPURISCVState *env, uint32_t desc) \ 1861 { \ 1862 uint32_t vl = env->vl; \ 1863 uint32_t i; \ 1864 \ 1865 for (i = env->vstart; i < vl; i++) { \ 1866 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1867 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1868 (ETYPE)(target_long)s1); \ 1869 *((ETYPE *)vd + H(i)) = d; \ 1870 } \ 1871 env->vstart = 0; \ 1872 } 1873 1874 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1875 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1878 1879 /* 1880 *** Vector Fixed-Point Arithmetic Instructions 1881 */ 1882 1883 /* Vector Single-Width Saturating Add and Subtract */ 1884 1885 /* 1886 * As fixed point instructions probably have round mode and saturation, 1887 * define common macros for fixed point here. 1888 */ 1889 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1890 CPURISCVState *env, int vxrm); 1891 1892 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1893 static inline void \ 1894 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1895 CPURISCVState *env, int vxrm) \ 1896 { \ 1897 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1899 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1900 } 1901 1902 static inline void 1903 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1904 CPURISCVState *env, 1905 uint32_t vl, uint32_t vm, int vxrm, 1906 opivv2_rm_fn *fn) 1907 { 1908 for (uint32_t i = env->vstart; i < vl; i++) { 1909 if (!vm && !vext_elem_mask(v0, i)) { 1910 continue; 1911 } 1912 fn(vd, vs1, vs2, i, env, vxrm); 1913 } 1914 env->vstart = 0; 1915 } 1916 1917 static inline void 1918 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1919 CPURISCVState *env, 1920 uint32_t desc, 1921 opivv2_rm_fn *fn) 1922 { 1923 uint32_t vm = vext_vm(desc); 1924 uint32_t vl = env->vl; 1925 1926 switch (env->vxrm) { 1927 case 0: /* rnu */ 1928 vext_vv_rm_1(vd, v0, vs1, vs2, 1929 env, vl, vm, 0, fn); 1930 break; 1931 case 1: /* rne */ 1932 vext_vv_rm_1(vd, v0, vs1, vs2, 1933 env, vl, vm, 1, fn); 1934 break; 1935 case 2: /* rdn */ 1936 vext_vv_rm_1(vd, v0, vs1, vs2, 1937 env, vl, vm, 2, fn); 1938 break; 1939 default: /* rod */ 1940 vext_vv_rm_1(vd, v0, vs1, vs2, 1941 env, vl, vm, 3, fn); 1942 break; 1943 } 1944 } 1945 1946 /* generate helpers for fixed point instructions with OPIVV format */ 1947 #define GEN_VEXT_VV_RM(NAME) \ 1948 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1949 CPURISCVState *env, uint32_t desc) \ 1950 { \ 1951 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1952 do_##NAME); \ 1953 } 1954 1955 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1956 { 1957 uint8_t res = a + b; 1958 if (res < a) { 1959 res = UINT8_MAX; 1960 env->vxsat = 0x1; 1961 } 1962 return res; 1963 } 1964 1965 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1966 uint16_t b) 1967 { 1968 uint16_t res = a + b; 1969 if (res < a) { 1970 res = UINT16_MAX; 1971 env->vxsat = 0x1; 1972 } 1973 return res; 1974 } 1975 1976 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1977 uint32_t b) 1978 { 1979 uint32_t res = a + b; 1980 if (res < a) { 1981 res = UINT32_MAX; 1982 env->vxsat = 0x1; 1983 } 1984 return res; 1985 } 1986 1987 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1988 uint64_t b) 1989 { 1990 uint64_t res = a + b; 1991 if (res < a) { 1992 res = UINT64_MAX; 1993 env->vxsat = 0x1; 1994 } 1995 return res; 1996 } 1997 1998 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1999 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2000 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2001 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2002 GEN_VEXT_VV_RM(vsaddu_vv_b) 2003 GEN_VEXT_VV_RM(vsaddu_vv_h) 2004 GEN_VEXT_VV_RM(vsaddu_vv_w) 2005 GEN_VEXT_VV_RM(vsaddu_vv_d) 2006 2007 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2008 CPURISCVState *env, int vxrm); 2009 2010 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2011 static inline void \ 2012 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2013 CPURISCVState *env, int vxrm) \ 2014 { \ 2015 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2016 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2017 } 2018 2019 static inline void 2020 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2021 CPURISCVState *env, 2022 uint32_t vl, uint32_t vm, int vxrm, 2023 opivx2_rm_fn *fn) 2024 { 2025 for (uint32_t i = env->vstart; i < vl; i++) { 2026 if (!vm && !vext_elem_mask(v0, i)) { 2027 continue; 2028 } 2029 fn(vd, s1, vs2, i, env, vxrm); 2030 } 2031 env->vstart = 0; 2032 } 2033 2034 static inline void 2035 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2036 CPURISCVState *env, 2037 uint32_t desc, 2038 opivx2_rm_fn *fn) 2039 { 2040 uint32_t vm = vext_vm(desc); 2041 uint32_t vl = env->vl; 2042 2043 switch (env->vxrm) { 2044 case 0: /* rnu */ 2045 vext_vx_rm_1(vd, v0, s1, vs2, 2046 env, vl, vm, 0, fn); 2047 break; 2048 case 1: /* rne */ 2049 vext_vx_rm_1(vd, v0, s1, vs2, 2050 env, vl, vm, 1, fn); 2051 break; 2052 case 2: /* rdn */ 2053 vext_vx_rm_1(vd, v0, s1, vs2, 2054 env, vl, vm, 2, fn); 2055 break; 2056 default: /* rod */ 2057 vext_vx_rm_1(vd, v0, s1, vs2, 2058 env, vl, vm, 3, fn); 2059 break; 2060 } 2061 } 2062 2063 /* generate helpers for fixed point instructions with OPIVX format */ 2064 #define GEN_VEXT_VX_RM(NAME) \ 2065 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2066 void *vs2, CPURISCVState *env, uint32_t desc) \ 2067 { \ 2068 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2069 do_##NAME); \ 2070 } 2071 2072 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2073 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2074 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2075 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2076 GEN_VEXT_VX_RM(vsaddu_vx_b) 2077 GEN_VEXT_VX_RM(vsaddu_vx_h) 2078 GEN_VEXT_VX_RM(vsaddu_vx_w) 2079 GEN_VEXT_VX_RM(vsaddu_vx_d) 2080 2081 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2082 { 2083 int8_t res = a + b; 2084 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2085 res = a > 0 ? INT8_MAX : INT8_MIN; 2086 env->vxsat = 0x1; 2087 } 2088 return res; 2089 } 2090 2091 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2092 { 2093 int16_t res = a + b; 2094 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2095 res = a > 0 ? INT16_MAX : INT16_MIN; 2096 env->vxsat = 0x1; 2097 } 2098 return res; 2099 } 2100 2101 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2102 { 2103 int32_t res = a + b; 2104 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2105 res = a > 0 ? INT32_MAX : INT32_MIN; 2106 env->vxsat = 0x1; 2107 } 2108 return res; 2109 } 2110 2111 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2112 { 2113 int64_t res = a + b; 2114 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2115 res = a > 0 ? INT64_MAX : INT64_MIN; 2116 env->vxsat = 0x1; 2117 } 2118 return res; 2119 } 2120 2121 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2122 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2123 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2124 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2125 GEN_VEXT_VV_RM(vsadd_vv_b) 2126 GEN_VEXT_VV_RM(vsadd_vv_h) 2127 GEN_VEXT_VV_RM(vsadd_vv_w) 2128 GEN_VEXT_VV_RM(vsadd_vv_d) 2129 2130 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2131 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2132 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2133 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2134 GEN_VEXT_VX_RM(vsadd_vx_b) 2135 GEN_VEXT_VX_RM(vsadd_vx_h) 2136 GEN_VEXT_VX_RM(vsadd_vx_w) 2137 GEN_VEXT_VX_RM(vsadd_vx_d) 2138 2139 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2140 { 2141 uint8_t res = a - b; 2142 if (res > a) { 2143 res = 0; 2144 env->vxsat = 0x1; 2145 } 2146 return res; 2147 } 2148 2149 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2150 uint16_t b) 2151 { 2152 uint16_t res = a - b; 2153 if (res > a) { 2154 res = 0; 2155 env->vxsat = 0x1; 2156 } 2157 return res; 2158 } 2159 2160 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2161 uint32_t b) 2162 { 2163 uint32_t res = a - b; 2164 if (res > a) { 2165 res = 0; 2166 env->vxsat = 0x1; 2167 } 2168 return res; 2169 } 2170 2171 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2172 uint64_t b) 2173 { 2174 uint64_t res = a - b; 2175 if (res > a) { 2176 res = 0; 2177 env->vxsat = 0x1; 2178 } 2179 return res; 2180 } 2181 2182 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2183 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2184 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2185 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2186 GEN_VEXT_VV_RM(vssubu_vv_b) 2187 GEN_VEXT_VV_RM(vssubu_vv_h) 2188 GEN_VEXT_VV_RM(vssubu_vv_w) 2189 GEN_VEXT_VV_RM(vssubu_vv_d) 2190 2191 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2192 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2193 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2194 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2195 GEN_VEXT_VX_RM(vssubu_vx_b) 2196 GEN_VEXT_VX_RM(vssubu_vx_h) 2197 GEN_VEXT_VX_RM(vssubu_vx_w) 2198 GEN_VEXT_VX_RM(vssubu_vx_d) 2199 2200 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2201 { 2202 int8_t res = a - b; 2203 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2204 res = a >= 0 ? INT8_MAX : INT8_MIN; 2205 env->vxsat = 0x1; 2206 } 2207 return res; 2208 } 2209 2210 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2211 { 2212 int16_t res = a - b; 2213 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2214 res = a >= 0 ? INT16_MAX : INT16_MIN; 2215 env->vxsat = 0x1; 2216 } 2217 return res; 2218 } 2219 2220 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2221 { 2222 int32_t res = a - b; 2223 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2224 res = a >= 0 ? INT32_MAX : INT32_MIN; 2225 env->vxsat = 0x1; 2226 } 2227 return res; 2228 } 2229 2230 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2231 { 2232 int64_t res = a - b; 2233 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2234 res = a >= 0 ? INT64_MAX : INT64_MIN; 2235 env->vxsat = 0x1; 2236 } 2237 return res; 2238 } 2239 2240 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2241 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2242 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2243 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2244 GEN_VEXT_VV_RM(vssub_vv_b) 2245 GEN_VEXT_VV_RM(vssub_vv_h) 2246 GEN_VEXT_VV_RM(vssub_vv_w) 2247 GEN_VEXT_VV_RM(vssub_vv_d) 2248 2249 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2250 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2251 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2252 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2253 GEN_VEXT_VX_RM(vssub_vx_b) 2254 GEN_VEXT_VX_RM(vssub_vx_h) 2255 GEN_VEXT_VX_RM(vssub_vx_w) 2256 GEN_VEXT_VX_RM(vssub_vx_d) 2257 2258 /* Vector Single-Width Averaging Add and Subtract */ 2259 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2260 { 2261 uint8_t d = extract64(v, shift, 1); 2262 uint8_t d1; 2263 uint64_t D1, D2; 2264 2265 if (shift == 0 || shift > 64) { 2266 return 0; 2267 } 2268 2269 d1 = extract64(v, shift - 1, 1); 2270 D1 = extract64(v, 0, shift); 2271 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2272 return d1; 2273 } else if (vxrm == 1) { /* round-to-nearest-even */ 2274 if (shift > 1) { 2275 D2 = extract64(v, 0, shift - 1); 2276 return d1 & ((D2 != 0) | d); 2277 } else { 2278 return d1 & d; 2279 } 2280 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2281 return !d & (D1 != 0); 2282 } 2283 return 0; /* round-down (truncate) */ 2284 } 2285 2286 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2287 { 2288 int64_t res = (int64_t)a + b; 2289 uint8_t round = get_round(vxrm, res, 1); 2290 2291 return (res >> 1) + round; 2292 } 2293 2294 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2295 { 2296 int64_t res = a + b; 2297 uint8_t round = get_round(vxrm, res, 1); 2298 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2299 2300 /* With signed overflow, bit 64 is inverse of bit 63. */ 2301 return ((res >> 1) ^ over) + round; 2302 } 2303 2304 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2305 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2306 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2307 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2308 GEN_VEXT_VV_RM(vaadd_vv_b) 2309 GEN_VEXT_VV_RM(vaadd_vv_h) 2310 GEN_VEXT_VV_RM(vaadd_vv_w) 2311 GEN_VEXT_VV_RM(vaadd_vv_d) 2312 2313 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2314 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2315 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2316 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2317 GEN_VEXT_VX_RM(vaadd_vx_b) 2318 GEN_VEXT_VX_RM(vaadd_vx_h) 2319 GEN_VEXT_VX_RM(vaadd_vx_w) 2320 GEN_VEXT_VX_RM(vaadd_vx_d) 2321 2322 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2323 uint32_t a, uint32_t b) 2324 { 2325 uint64_t res = (uint64_t)a + b; 2326 uint8_t round = get_round(vxrm, res, 1); 2327 2328 return (res >> 1) + round; 2329 } 2330 2331 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2332 uint64_t a, uint64_t b) 2333 { 2334 uint64_t res = a + b; 2335 uint8_t round = get_round(vxrm, res, 1); 2336 uint64_t over = (uint64_t)(res < a) << 63; 2337 2338 return ((res >> 1) | over) + round; 2339 } 2340 2341 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2342 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2343 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2344 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2345 GEN_VEXT_VV_RM(vaaddu_vv_b) 2346 GEN_VEXT_VV_RM(vaaddu_vv_h) 2347 GEN_VEXT_VV_RM(vaaddu_vv_w) 2348 GEN_VEXT_VV_RM(vaaddu_vv_d) 2349 2350 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2351 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2352 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2353 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2354 GEN_VEXT_VX_RM(vaaddu_vx_b) 2355 GEN_VEXT_VX_RM(vaaddu_vx_h) 2356 GEN_VEXT_VX_RM(vaaddu_vx_w) 2357 GEN_VEXT_VX_RM(vaaddu_vx_d) 2358 2359 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2360 { 2361 int64_t res = (int64_t)a - b; 2362 uint8_t round = get_round(vxrm, res, 1); 2363 2364 return (res >> 1) + round; 2365 } 2366 2367 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2368 { 2369 int64_t res = (int64_t)a - b; 2370 uint8_t round = get_round(vxrm, res, 1); 2371 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2372 2373 /* With signed overflow, bit 64 is inverse of bit 63. */ 2374 return ((res >> 1) ^ over) + round; 2375 } 2376 2377 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2378 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2379 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2380 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2381 GEN_VEXT_VV_RM(vasub_vv_b) 2382 GEN_VEXT_VV_RM(vasub_vv_h) 2383 GEN_VEXT_VV_RM(vasub_vv_w) 2384 GEN_VEXT_VV_RM(vasub_vv_d) 2385 2386 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2387 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2388 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2389 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2390 GEN_VEXT_VX_RM(vasub_vx_b) 2391 GEN_VEXT_VX_RM(vasub_vx_h) 2392 GEN_VEXT_VX_RM(vasub_vx_w) 2393 GEN_VEXT_VX_RM(vasub_vx_d) 2394 2395 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2396 uint32_t a, uint32_t b) 2397 { 2398 int64_t res = (int64_t)a - b; 2399 uint8_t round = get_round(vxrm, res, 1); 2400 2401 return (res >> 1) + round; 2402 } 2403 2404 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2405 uint64_t a, uint64_t b) 2406 { 2407 uint64_t res = (uint64_t)a - b; 2408 uint8_t round = get_round(vxrm, res, 1); 2409 uint64_t over = (uint64_t)(res > a) << 63; 2410 2411 return ((res >> 1) | over) + round; 2412 } 2413 2414 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2415 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2416 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2417 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2418 GEN_VEXT_VV_RM(vasubu_vv_b) 2419 GEN_VEXT_VV_RM(vasubu_vv_h) 2420 GEN_VEXT_VV_RM(vasubu_vv_w) 2421 GEN_VEXT_VV_RM(vasubu_vv_d) 2422 2423 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2424 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2425 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2426 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2427 GEN_VEXT_VX_RM(vasubu_vx_b) 2428 GEN_VEXT_VX_RM(vasubu_vx_h) 2429 GEN_VEXT_VX_RM(vasubu_vx_w) 2430 GEN_VEXT_VX_RM(vasubu_vx_d) 2431 2432 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2433 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2434 { 2435 uint8_t round; 2436 int16_t res; 2437 2438 res = (int16_t)a * (int16_t)b; 2439 round = get_round(vxrm, res, 7); 2440 res = (res >> 7) + round; 2441 2442 if (res > INT8_MAX) { 2443 env->vxsat = 0x1; 2444 return INT8_MAX; 2445 } else if (res < INT8_MIN) { 2446 env->vxsat = 0x1; 2447 return INT8_MIN; 2448 } else { 2449 return res; 2450 } 2451 } 2452 2453 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2454 { 2455 uint8_t round; 2456 int32_t res; 2457 2458 res = (int32_t)a * (int32_t)b; 2459 round = get_round(vxrm, res, 15); 2460 res = (res >> 15) + round; 2461 2462 if (res > INT16_MAX) { 2463 env->vxsat = 0x1; 2464 return INT16_MAX; 2465 } else if (res < INT16_MIN) { 2466 env->vxsat = 0x1; 2467 return INT16_MIN; 2468 } else { 2469 return res; 2470 } 2471 } 2472 2473 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2474 { 2475 uint8_t round; 2476 int64_t res; 2477 2478 res = (int64_t)a * (int64_t)b; 2479 round = get_round(vxrm, res, 31); 2480 res = (res >> 31) + round; 2481 2482 if (res > INT32_MAX) { 2483 env->vxsat = 0x1; 2484 return INT32_MAX; 2485 } else if (res < INT32_MIN) { 2486 env->vxsat = 0x1; 2487 return INT32_MIN; 2488 } else { 2489 return res; 2490 } 2491 } 2492 2493 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2494 { 2495 uint8_t round; 2496 uint64_t hi_64, lo_64; 2497 int64_t res; 2498 2499 if (a == INT64_MIN && b == INT64_MIN) { 2500 env->vxsat = 1; 2501 return INT64_MAX; 2502 } 2503 2504 muls64(&lo_64, &hi_64, a, b); 2505 round = get_round(vxrm, lo_64, 63); 2506 /* 2507 * Cannot overflow, as there are always 2508 * 2 sign bits after multiply. 2509 */ 2510 res = (hi_64 << 1) | (lo_64 >> 63); 2511 if (round) { 2512 if (res == INT64_MAX) { 2513 env->vxsat = 1; 2514 } else { 2515 res += 1; 2516 } 2517 } 2518 return res; 2519 } 2520 2521 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2522 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2523 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2524 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2525 GEN_VEXT_VV_RM(vsmul_vv_b) 2526 GEN_VEXT_VV_RM(vsmul_vv_h) 2527 GEN_VEXT_VV_RM(vsmul_vv_w) 2528 GEN_VEXT_VV_RM(vsmul_vv_d) 2529 2530 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2531 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2532 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2533 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2534 GEN_VEXT_VX_RM(vsmul_vx_b) 2535 GEN_VEXT_VX_RM(vsmul_vx_h) 2536 GEN_VEXT_VX_RM(vsmul_vx_w) 2537 GEN_VEXT_VX_RM(vsmul_vx_d) 2538 2539 /* Vector Single-Width Scaling Shift Instructions */ 2540 static inline uint8_t 2541 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2542 { 2543 uint8_t round, shift = b & 0x7; 2544 uint8_t res; 2545 2546 round = get_round(vxrm, a, shift); 2547 res = (a >> shift) + round; 2548 return res; 2549 } 2550 static inline uint16_t 2551 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2552 { 2553 uint8_t round, shift = b & 0xf; 2554 uint16_t res; 2555 2556 round = get_round(vxrm, a, shift); 2557 res = (a >> shift) + round; 2558 return res; 2559 } 2560 static inline uint32_t 2561 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2562 { 2563 uint8_t round, shift = b & 0x1f; 2564 uint32_t res; 2565 2566 round = get_round(vxrm, a, shift); 2567 res = (a >> shift) + round; 2568 return res; 2569 } 2570 static inline uint64_t 2571 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2572 { 2573 uint8_t round, shift = b & 0x3f; 2574 uint64_t res; 2575 2576 round = get_round(vxrm, a, shift); 2577 res = (a >> shift) + round; 2578 return res; 2579 } 2580 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2581 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2582 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2583 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2584 GEN_VEXT_VV_RM(vssrl_vv_b) 2585 GEN_VEXT_VV_RM(vssrl_vv_h) 2586 GEN_VEXT_VV_RM(vssrl_vv_w) 2587 GEN_VEXT_VV_RM(vssrl_vv_d) 2588 2589 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2590 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2591 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2592 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2593 GEN_VEXT_VX_RM(vssrl_vx_b) 2594 GEN_VEXT_VX_RM(vssrl_vx_h) 2595 GEN_VEXT_VX_RM(vssrl_vx_w) 2596 GEN_VEXT_VX_RM(vssrl_vx_d) 2597 2598 static inline int8_t 2599 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2600 { 2601 uint8_t round, shift = b & 0x7; 2602 int8_t res; 2603 2604 round = get_round(vxrm, a, shift); 2605 res = (a >> shift) + round; 2606 return res; 2607 } 2608 static inline int16_t 2609 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2610 { 2611 uint8_t round, shift = b & 0xf; 2612 int16_t res; 2613 2614 round = get_round(vxrm, a, shift); 2615 res = (a >> shift) + round; 2616 return res; 2617 } 2618 static inline int32_t 2619 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2620 { 2621 uint8_t round, shift = b & 0x1f; 2622 int32_t res; 2623 2624 round = get_round(vxrm, a, shift); 2625 res = (a >> shift) + round; 2626 return res; 2627 } 2628 static inline int64_t 2629 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2630 { 2631 uint8_t round, shift = b & 0x3f; 2632 int64_t res; 2633 2634 round = get_round(vxrm, a, shift); 2635 res = (a >> shift) + round; 2636 return res; 2637 } 2638 2639 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2640 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2641 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2642 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2643 GEN_VEXT_VV_RM(vssra_vv_b) 2644 GEN_VEXT_VV_RM(vssra_vv_h) 2645 GEN_VEXT_VV_RM(vssra_vv_w) 2646 GEN_VEXT_VV_RM(vssra_vv_d) 2647 2648 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2649 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2650 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2651 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2652 GEN_VEXT_VX_RM(vssra_vx_b) 2653 GEN_VEXT_VX_RM(vssra_vx_h) 2654 GEN_VEXT_VX_RM(vssra_vx_w) 2655 GEN_VEXT_VX_RM(vssra_vx_d) 2656 2657 /* Vector Narrowing Fixed-Point Clip Instructions */ 2658 static inline int8_t 2659 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2660 { 2661 uint8_t round, shift = b & 0xf; 2662 int16_t res; 2663 2664 round = get_round(vxrm, a, shift); 2665 res = (a >> shift) + round; 2666 if (res > INT8_MAX) { 2667 env->vxsat = 0x1; 2668 return INT8_MAX; 2669 } else if (res < INT8_MIN) { 2670 env->vxsat = 0x1; 2671 return INT8_MIN; 2672 } else { 2673 return res; 2674 } 2675 } 2676 2677 static inline int16_t 2678 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2679 { 2680 uint8_t round, shift = b & 0x1f; 2681 int32_t res; 2682 2683 round = get_round(vxrm, a, shift); 2684 res = (a >> shift) + round; 2685 if (res > INT16_MAX) { 2686 env->vxsat = 0x1; 2687 return INT16_MAX; 2688 } else if (res < INT16_MIN) { 2689 env->vxsat = 0x1; 2690 return INT16_MIN; 2691 } else { 2692 return res; 2693 } 2694 } 2695 2696 static inline int32_t 2697 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2698 { 2699 uint8_t round, shift = b & 0x3f; 2700 int64_t res; 2701 2702 round = get_round(vxrm, a, shift); 2703 res = (a >> shift) + round; 2704 if (res > INT32_MAX) { 2705 env->vxsat = 0x1; 2706 return INT32_MAX; 2707 } else if (res < INT32_MIN) { 2708 env->vxsat = 0x1; 2709 return INT32_MIN; 2710 } else { 2711 return res; 2712 } 2713 } 2714 2715 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2716 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2717 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2718 GEN_VEXT_VV_RM(vnclip_wv_b) 2719 GEN_VEXT_VV_RM(vnclip_wv_h) 2720 GEN_VEXT_VV_RM(vnclip_wv_w) 2721 2722 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2723 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2724 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2725 GEN_VEXT_VX_RM(vnclip_wx_b) 2726 GEN_VEXT_VX_RM(vnclip_wx_h) 2727 GEN_VEXT_VX_RM(vnclip_wx_w) 2728 2729 static inline uint8_t 2730 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2731 { 2732 uint8_t round, shift = b & 0xf; 2733 uint16_t res; 2734 2735 round = get_round(vxrm, a, shift); 2736 res = (a >> shift) + round; 2737 if (res > UINT8_MAX) { 2738 env->vxsat = 0x1; 2739 return UINT8_MAX; 2740 } else { 2741 return res; 2742 } 2743 } 2744 2745 static inline uint16_t 2746 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2747 { 2748 uint8_t round, shift = b & 0x1f; 2749 uint32_t res; 2750 2751 round = get_round(vxrm, a, shift); 2752 res = (a >> shift) + round; 2753 if (res > UINT16_MAX) { 2754 env->vxsat = 0x1; 2755 return UINT16_MAX; 2756 } else { 2757 return res; 2758 } 2759 } 2760 2761 static inline uint32_t 2762 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2763 { 2764 uint8_t round, shift = b & 0x3f; 2765 uint64_t res; 2766 2767 round = get_round(vxrm, a, shift); 2768 res = (a >> shift) + round; 2769 if (res > UINT32_MAX) { 2770 env->vxsat = 0x1; 2771 return UINT32_MAX; 2772 } else { 2773 return res; 2774 } 2775 } 2776 2777 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2778 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2779 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2780 GEN_VEXT_VV_RM(vnclipu_wv_b) 2781 GEN_VEXT_VV_RM(vnclipu_wv_h) 2782 GEN_VEXT_VV_RM(vnclipu_wv_w) 2783 2784 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2785 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2786 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2787 GEN_VEXT_VX_RM(vnclipu_wx_b) 2788 GEN_VEXT_VX_RM(vnclipu_wx_h) 2789 GEN_VEXT_VX_RM(vnclipu_wx_w) 2790 2791 /* 2792 *** Vector Float Point Arithmetic Instructions 2793 */ 2794 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2795 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2796 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2797 CPURISCVState *env) \ 2798 { \ 2799 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2800 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2801 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2802 } 2803 2804 #define GEN_VEXT_VV_ENV(NAME) \ 2805 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2806 void *vs2, CPURISCVState *env, \ 2807 uint32_t desc) \ 2808 { \ 2809 uint32_t vm = vext_vm(desc); \ 2810 uint32_t vl = env->vl; \ 2811 uint32_t i; \ 2812 \ 2813 for (i = env->vstart; i < vl; i++) { \ 2814 if (!vm && !vext_elem_mask(v0, i)) { \ 2815 continue; \ 2816 } \ 2817 do_##NAME(vd, vs1, vs2, i, env); \ 2818 } \ 2819 env->vstart = 0; \ 2820 } 2821 2822 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2823 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2824 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2825 GEN_VEXT_VV_ENV(vfadd_vv_h) 2826 GEN_VEXT_VV_ENV(vfadd_vv_w) 2827 GEN_VEXT_VV_ENV(vfadd_vv_d) 2828 2829 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2830 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2831 CPURISCVState *env) \ 2832 { \ 2833 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2834 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2835 } 2836 2837 #define GEN_VEXT_VF(NAME) \ 2838 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2839 void *vs2, CPURISCVState *env, \ 2840 uint32_t desc) \ 2841 { \ 2842 uint32_t vm = vext_vm(desc); \ 2843 uint32_t vl = env->vl; \ 2844 uint32_t i; \ 2845 \ 2846 for (i = env->vstart; i < vl; i++) { \ 2847 if (!vm && !vext_elem_mask(v0, i)) { \ 2848 continue; \ 2849 } \ 2850 do_##NAME(vd, s1, vs2, i, env); \ 2851 } \ 2852 env->vstart = 0; \ 2853 } 2854 2855 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2856 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2857 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2858 GEN_VEXT_VF(vfadd_vf_h) 2859 GEN_VEXT_VF(vfadd_vf_w) 2860 GEN_VEXT_VF(vfadd_vf_d) 2861 2862 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2863 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2864 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2865 GEN_VEXT_VV_ENV(vfsub_vv_h) 2866 GEN_VEXT_VV_ENV(vfsub_vv_w) 2867 GEN_VEXT_VV_ENV(vfsub_vv_d) 2868 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2869 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2870 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2871 GEN_VEXT_VF(vfsub_vf_h) 2872 GEN_VEXT_VF(vfsub_vf_w) 2873 GEN_VEXT_VF(vfsub_vf_d) 2874 2875 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2876 { 2877 return float16_sub(b, a, s); 2878 } 2879 2880 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2881 { 2882 return float32_sub(b, a, s); 2883 } 2884 2885 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2886 { 2887 return float64_sub(b, a, s); 2888 } 2889 2890 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2891 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2892 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2893 GEN_VEXT_VF(vfrsub_vf_h) 2894 GEN_VEXT_VF(vfrsub_vf_w) 2895 GEN_VEXT_VF(vfrsub_vf_d) 2896 2897 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2898 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2899 { 2900 return float32_add(float16_to_float32(a, true, s), 2901 float16_to_float32(b, true, s), s); 2902 } 2903 2904 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2905 { 2906 return float64_add(float32_to_float64(a, s), 2907 float32_to_float64(b, s), s); 2908 2909 } 2910 2911 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2912 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2913 GEN_VEXT_VV_ENV(vfwadd_vv_h) 2914 GEN_VEXT_VV_ENV(vfwadd_vv_w) 2915 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2916 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2917 GEN_VEXT_VF(vfwadd_vf_h) 2918 GEN_VEXT_VF(vfwadd_vf_w) 2919 2920 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2921 { 2922 return float32_sub(float16_to_float32(a, true, s), 2923 float16_to_float32(b, true, s), s); 2924 } 2925 2926 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2927 { 2928 return float64_sub(float32_to_float64(a, s), 2929 float32_to_float64(b, s), s); 2930 2931 } 2932 2933 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2934 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2935 GEN_VEXT_VV_ENV(vfwsub_vv_h) 2936 GEN_VEXT_VV_ENV(vfwsub_vv_w) 2937 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2938 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2939 GEN_VEXT_VF(vfwsub_vf_h) 2940 GEN_VEXT_VF(vfwsub_vf_w) 2941 2942 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2943 { 2944 return float32_add(a, float16_to_float32(b, true, s), s); 2945 } 2946 2947 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2948 { 2949 return float64_add(a, float32_to_float64(b, s), s); 2950 } 2951 2952 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2953 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2954 GEN_VEXT_VV_ENV(vfwadd_wv_h) 2955 GEN_VEXT_VV_ENV(vfwadd_wv_w) 2956 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2957 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2958 GEN_VEXT_VF(vfwadd_wf_h) 2959 GEN_VEXT_VF(vfwadd_wf_w) 2960 2961 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2962 { 2963 return float32_sub(a, float16_to_float32(b, true, s), s); 2964 } 2965 2966 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2967 { 2968 return float64_sub(a, float32_to_float64(b, s), s); 2969 } 2970 2971 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2972 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2973 GEN_VEXT_VV_ENV(vfwsub_wv_h) 2974 GEN_VEXT_VV_ENV(vfwsub_wv_w) 2975 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2976 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2977 GEN_VEXT_VF(vfwsub_wf_h) 2978 GEN_VEXT_VF(vfwsub_wf_w) 2979 2980 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2981 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2982 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2983 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2984 GEN_VEXT_VV_ENV(vfmul_vv_h) 2985 GEN_VEXT_VV_ENV(vfmul_vv_w) 2986 GEN_VEXT_VV_ENV(vfmul_vv_d) 2987 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2988 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2989 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2990 GEN_VEXT_VF(vfmul_vf_h) 2991 GEN_VEXT_VF(vfmul_vf_w) 2992 GEN_VEXT_VF(vfmul_vf_d) 2993 2994 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2995 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2996 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2997 GEN_VEXT_VV_ENV(vfdiv_vv_h) 2998 GEN_VEXT_VV_ENV(vfdiv_vv_w) 2999 GEN_VEXT_VV_ENV(vfdiv_vv_d) 3000 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3001 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3002 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3003 GEN_VEXT_VF(vfdiv_vf_h) 3004 GEN_VEXT_VF(vfdiv_vf_w) 3005 GEN_VEXT_VF(vfdiv_vf_d) 3006 3007 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3008 { 3009 return float16_div(b, a, s); 3010 } 3011 3012 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3013 { 3014 return float32_div(b, a, s); 3015 } 3016 3017 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3018 { 3019 return float64_div(b, a, s); 3020 } 3021 3022 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3023 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3024 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3025 GEN_VEXT_VF(vfrdiv_vf_h) 3026 GEN_VEXT_VF(vfrdiv_vf_w) 3027 GEN_VEXT_VF(vfrdiv_vf_d) 3028 3029 /* Vector Widening Floating-Point Multiply */ 3030 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3031 { 3032 return float32_mul(float16_to_float32(a, true, s), 3033 float16_to_float32(b, true, s), s); 3034 } 3035 3036 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3037 { 3038 return float64_mul(float32_to_float64(a, s), 3039 float32_to_float64(b, s), s); 3040 3041 } 3042 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3043 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3044 GEN_VEXT_VV_ENV(vfwmul_vv_h) 3045 GEN_VEXT_VV_ENV(vfwmul_vv_w) 3046 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3047 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3048 GEN_VEXT_VF(vfwmul_vf_h) 3049 GEN_VEXT_VF(vfwmul_vf_w) 3050 3051 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3052 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3053 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3054 CPURISCVState *env) \ 3055 { \ 3056 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3057 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3058 TD d = *((TD *)vd + HD(i)); \ 3059 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3060 } 3061 3062 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3063 { 3064 return float16_muladd(a, b, d, 0, s); 3065 } 3066 3067 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3068 { 3069 return float32_muladd(a, b, d, 0, s); 3070 } 3071 3072 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3073 { 3074 return float64_muladd(a, b, d, 0, s); 3075 } 3076 3077 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3078 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3079 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3080 GEN_VEXT_VV_ENV(vfmacc_vv_h) 3081 GEN_VEXT_VV_ENV(vfmacc_vv_w) 3082 GEN_VEXT_VV_ENV(vfmacc_vv_d) 3083 3084 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3085 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3086 CPURISCVState *env) \ 3087 { \ 3088 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3089 TD d = *((TD *)vd + HD(i)); \ 3090 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3091 } 3092 3093 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3094 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3095 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3096 GEN_VEXT_VF(vfmacc_vf_h) 3097 GEN_VEXT_VF(vfmacc_vf_w) 3098 GEN_VEXT_VF(vfmacc_vf_d) 3099 3100 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3101 { 3102 return float16_muladd(a, b, d, 3103 float_muladd_negate_c | float_muladd_negate_product, s); 3104 } 3105 3106 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3107 { 3108 return float32_muladd(a, b, d, 3109 float_muladd_negate_c | float_muladd_negate_product, s); 3110 } 3111 3112 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3113 { 3114 return float64_muladd(a, b, d, 3115 float_muladd_negate_c | float_muladd_negate_product, s); 3116 } 3117 3118 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3119 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3120 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3121 GEN_VEXT_VV_ENV(vfnmacc_vv_h) 3122 GEN_VEXT_VV_ENV(vfnmacc_vv_w) 3123 GEN_VEXT_VV_ENV(vfnmacc_vv_d) 3124 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3125 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3126 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3127 GEN_VEXT_VF(vfnmacc_vf_h) 3128 GEN_VEXT_VF(vfnmacc_vf_w) 3129 GEN_VEXT_VF(vfnmacc_vf_d) 3130 3131 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3132 { 3133 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3134 } 3135 3136 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3137 { 3138 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3139 } 3140 3141 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3142 { 3143 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3144 } 3145 3146 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3147 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3148 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3149 GEN_VEXT_VV_ENV(vfmsac_vv_h) 3150 GEN_VEXT_VV_ENV(vfmsac_vv_w) 3151 GEN_VEXT_VV_ENV(vfmsac_vv_d) 3152 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3153 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3154 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3155 GEN_VEXT_VF(vfmsac_vf_h) 3156 GEN_VEXT_VF(vfmsac_vf_w) 3157 GEN_VEXT_VF(vfmsac_vf_d) 3158 3159 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3160 { 3161 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3162 } 3163 3164 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3165 { 3166 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3167 } 3168 3169 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3170 { 3171 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3172 } 3173 3174 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3175 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3176 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3177 GEN_VEXT_VV_ENV(vfnmsac_vv_h) 3178 GEN_VEXT_VV_ENV(vfnmsac_vv_w) 3179 GEN_VEXT_VV_ENV(vfnmsac_vv_d) 3180 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3181 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3182 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3183 GEN_VEXT_VF(vfnmsac_vf_h) 3184 GEN_VEXT_VF(vfnmsac_vf_w) 3185 GEN_VEXT_VF(vfnmsac_vf_d) 3186 3187 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3188 { 3189 return float16_muladd(d, b, a, 0, s); 3190 } 3191 3192 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3193 { 3194 return float32_muladd(d, b, a, 0, s); 3195 } 3196 3197 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3198 { 3199 return float64_muladd(d, b, a, 0, s); 3200 } 3201 3202 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3203 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3204 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3205 GEN_VEXT_VV_ENV(vfmadd_vv_h) 3206 GEN_VEXT_VV_ENV(vfmadd_vv_w) 3207 GEN_VEXT_VV_ENV(vfmadd_vv_d) 3208 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3209 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3210 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3211 GEN_VEXT_VF(vfmadd_vf_h) 3212 GEN_VEXT_VF(vfmadd_vf_w) 3213 GEN_VEXT_VF(vfmadd_vf_d) 3214 3215 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3216 { 3217 return float16_muladd(d, b, a, 3218 float_muladd_negate_c | float_muladd_negate_product, s); 3219 } 3220 3221 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3222 { 3223 return float32_muladd(d, b, a, 3224 float_muladd_negate_c | float_muladd_negate_product, s); 3225 } 3226 3227 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3228 { 3229 return float64_muladd(d, b, a, 3230 float_muladd_negate_c | float_muladd_negate_product, s); 3231 } 3232 3233 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3234 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3235 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3236 GEN_VEXT_VV_ENV(vfnmadd_vv_h) 3237 GEN_VEXT_VV_ENV(vfnmadd_vv_w) 3238 GEN_VEXT_VV_ENV(vfnmadd_vv_d) 3239 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3240 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3241 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3242 GEN_VEXT_VF(vfnmadd_vf_h) 3243 GEN_VEXT_VF(vfnmadd_vf_w) 3244 GEN_VEXT_VF(vfnmadd_vf_d) 3245 3246 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3247 { 3248 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3249 } 3250 3251 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3252 { 3253 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3254 } 3255 3256 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3257 { 3258 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3259 } 3260 3261 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3262 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3263 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3264 GEN_VEXT_VV_ENV(vfmsub_vv_h) 3265 GEN_VEXT_VV_ENV(vfmsub_vv_w) 3266 GEN_VEXT_VV_ENV(vfmsub_vv_d) 3267 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3268 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3269 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3270 GEN_VEXT_VF(vfmsub_vf_h) 3271 GEN_VEXT_VF(vfmsub_vf_w) 3272 GEN_VEXT_VF(vfmsub_vf_d) 3273 3274 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3275 { 3276 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3277 } 3278 3279 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3280 { 3281 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3282 } 3283 3284 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3285 { 3286 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3287 } 3288 3289 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3290 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3291 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3292 GEN_VEXT_VV_ENV(vfnmsub_vv_h) 3293 GEN_VEXT_VV_ENV(vfnmsub_vv_w) 3294 GEN_VEXT_VV_ENV(vfnmsub_vv_d) 3295 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3296 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3297 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3298 GEN_VEXT_VF(vfnmsub_vf_h) 3299 GEN_VEXT_VF(vfnmsub_vf_w) 3300 GEN_VEXT_VF(vfnmsub_vf_d) 3301 3302 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3303 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3304 { 3305 return float32_muladd(float16_to_float32(a, true, s), 3306 float16_to_float32(b, true, s), d, 0, s); 3307 } 3308 3309 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3310 { 3311 return float64_muladd(float32_to_float64(a, s), 3312 float32_to_float64(b, s), d, 0, s); 3313 } 3314 3315 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3316 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3317 GEN_VEXT_VV_ENV(vfwmacc_vv_h) 3318 GEN_VEXT_VV_ENV(vfwmacc_vv_w) 3319 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3320 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3321 GEN_VEXT_VF(vfwmacc_vf_h) 3322 GEN_VEXT_VF(vfwmacc_vf_w) 3323 3324 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3325 { 3326 return float32_muladd(float16_to_float32(a, true, s), 3327 float16_to_float32(b, true, s), d, 3328 float_muladd_negate_c | float_muladd_negate_product, s); 3329 } 3330 3331 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3332 { 3333 return float64_muladd(float32_to_float64(a, s), 3334 float32_to_float64(b, s), d, 3335 float_muladd_negate_c | float_muladd_negate_product, s); 3336 } 3337 3338 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3339 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3340 GEN_VEXT_VV_ENV(vfwnmacc_vv_h) 3341 GEN_VEXT_VV_ENV(vfwnmacc_vv_w) 3342 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3343 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3344 GEN_VEXT_VF(vfwnmacc_vf_h) 3345 GEN_VEXT_VF(vfwnmacc_vf_w) 3346 3347 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3348 { 3349 return float32_muladd(float16_to_float32(a, true, s), 3350 float16_to_float32(b, true, s), d, 3351 float_muladd_negate_c, s); 3352 } 3353 3354 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3355 { 3356 return float64_muladd(float32_to_float64(a, s), 3357 float32_to_float64(b, s), d, 3358 float_muladd_negate_c, s); 3359 } 3360 3361 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3362 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3363 GEN_VEXT_VV_ENV(vfwmsac_vv_h) 3364 GEN_VEXT_VV_ENV(vfwmsac_vv_w) 3365 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3366 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3367 GEN_VEXT_VF(vfwmsac_vf_h) 3368 GEN_VEXT_VF(vfwmsac_vf_w) 3369 3370 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3371 { 3372 return float32_muladd(float16_to_float32(a, true, s), 3373 float16_to_float32(b, true, s), d, 3374 float_muladd_negate_product, s); 3375 } 3376 3377 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3378 { 3379 return float64_muladd(float32_to_float64(a, s), 3380 float32_to_float64(b, s), d, 3381 float_muladd_negate_product, s); 3382 } 3383 3384 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3385 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3386 GEN_VEXT_VV_ENV(vfwnmsac_vv_h) 3387 GEN_VEXT_VV_ENV(vfwnmsac_vv_w) 3388 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3389 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3390 GEN_VEXT_VF(vfwnmsac_vf_h) 3391 GEN_VEXT_VF(vfwnmsac_vf_w) 3392 3393 /* Vector Floating-Point Square-Root Instruction */ 3394 /* (TD, T2, TX2) */ 3395 #define OP_UU_H uint16_t, uint16_t, uint16_t 3396 #define OP_UU_W uint32_t, uint32_t, uint32_t 3397 #define OP_UU_D uint64_t, uint64_t, uint64_t 3398 3399 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3400 static void do_##NAME(void *vd, void *vs2, int i, \ 3401 CPURISCVState *env) \ 3402 { \ 3403 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3404 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3405 } 3406 3407 #define GEN_VEXT_V_ENV(NAME) \ 3408 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3409 CPURISCVState *env, uint32_t desc) \ 3410 { \ 3411 uint32_t vm = vext_vm(desc); \ 3412 uint32_t vl = env->vl; \ 3413 uint32_t i; \ 3414 \ 3415 if (vl == 0) { \ 3416 return; \ 3417 } \ 3418 for (i = env->vstart; i < vl; i++) { \ 3419 if (!vm && !vext_elem_mask(v0, i)) { \ 3420 continue; \ 3421 } \ 3422 do_##NAME(vd, vs2, i, env); \ 3423 } \ 3424 env->vstart = 0; \ 3425 } 3426 3427 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3428 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3429 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3430 GEN_VEXT_V_ENV(vfsqrt_v_h) 3431 GEN_VEXT_V_ENV(vfsqrt_v_w) 3432 GEN_VEXT_V_ENV(vfsqrt_v_d) 3433 3434 /* 3435 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3436 * 3437 * Adapted from riscv-v-spec recip.c: 3438 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3439 */ 3440 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3441 { 3442 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3443 uint64_t exp = extract64(f, frac_size, exp_size); 3444 uint64_t frac = extract64(f, 0, frac_size); 3445 3446 const uint8_t lookup_table[] = { 3447 52, 51, 50, 48, 47, 46, 44, 43, 3448 42, 41, 40, 39, 38, 36, 35, 34, 3449 33, 32, 31, 30, 30, 29, 28, 27, 3450 26, 25, 24, 23, 23, 22, 21, 20, 3451 19, 19, 18, 17, 16, 16, 15, 14, 3452 14, 13, 12, 12, 11, 10, 10, 9, 3453 9, 8, 7, 7, 6, 6, 5, 4, 3454 4, 3, 3, 2, 2, 1, 1, 0, 3455 127, 125, 123, 121, 119, 118, 116, 114, 3456 113, 111, 109, 108, 106, 105, 103, 102, 3457 100, 99, 97, 96, 95, 93, 92, 91, 3458 90, 88, 87, 86, 85, 84, 83, 82, 3459 80, 79, 78, 77, 76, 75, 74, 73, 3460 72, 71, 70, 70, 69, 68, 67, 66, 3461 65, 64, 63, 63, 62, 61, 60, 59, 3462 59, 58, 57, 56, 56, 55, 54, 53 3463 }; 3464 const int precision = 7; 3465 3466 if (exp == 0 && frac != 0) { /* subnormal */ 3467 /* Normalize the subnormal. */ 3468 while (extract64(frac, frac_size - 1, 1) == 0) { 3469 exp--; 3470 frac <<= 1; 3471 } 3472 3473 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3474 } 3475 3476 int idx = ((exp & 1) << (precision - 1)) | 3477 (frac >> (frac_size - precision + 1)); 3478 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3479 (frac_size - precision); 3480 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3481 3482 uint64_t val = 0; 3483 val = deposit64(val, 0, frac_size, out_frac); 3484 val = deposit64(val, frac_size, exp_size, out_exp); 3485 val = deposit64(val, frac_size + exp_size, 1, sign); 3486 return val; 3487 } 3488 3489 static float16 frsqrt7_h(float16 f, float_status *s) 3490 { 3491 int exp_size = 5, frac_size = 10; 3492 bool sign = float16_is_neg(f); 3493 3494 /* 3495 * frsqrt7(sNaN) = canonical NaN 3496 * frsqrt7(-inf) = canonical NaN 3497 * frsqrt7(-normal) = canonical NaN 3498 * frsqrt7(-subnormal) = canonical NaN 3499 */ 3500 if (float16_is_signaling_nan(f, s) || 3501 (float16_is_infinity(f) && sign) || 3502 (float16_is_normal(f) && sign) || 3503 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3504 s->float_exception_flags |= float_flag_invalid; 3505 return float16_default_nan(s); 3506 } 3507 3508 /* frsqrt7(qNaN) = canonical NaN */ 3509 if (float16_is_quiet_nan(f, s)) { 3510 return float16_default_nan(s); 3511 } 3512 3513 /* frsqrt7(+-0) = +-inf */ 3514 if (float16_is_zero(f)) { 3515 s->float_exception_flags |= float_flag_divbyzero; 3516 return float16_set_sign(float16_infinity, sign); 3517 } 3518 3519 /* frsqrt7(+inf) = +0 */ 3520 if (float16_is_infinity(f) && !sign) { 3521 return float16_set_sign(float16_zero, sign); 3522 } 3523 3524 /* +normal, +subnormal */ 3525 uint64_t val = frsqrt7(f, exp_size, frac_size); 3526 return make_float16(val); 3527 } 3528 3529 static float32 frsqrt7_s(float32 f, float_status *s) 3530 { 3531 int exp_size = 8, frac_size = 23; 3532 bool sign = float32_is_neg(f); 3533 3534 /* 3535 * frsqrt7(sNaN) = canonical NaN 3536 * frsqrt7(-inf) = canonical NaN 3537 * frsqrt7(-normal) = canonical NaN 3538 * frsqrt7(-subnormal) = canonical NaN 3539 */ 3540 if (float32_is_signaling_nan(f, s) || 3541 (float32_is_infinity(f) && sign) || 3542 (float32_is_normal(f) && sign) || 3543 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3544 s->float_exception_flags |= float_flag_invalid; 3545 return float32_default_nan(s); 3546 } 3547 3548 /* frsqrt7(qNaN) = canonical NaN */ 3549 if (float32_is_quiet_nan(f, s)) { 3550 return float32_default_nan(s); 3551 } 3552 3553 /* frsqrt7(+-0) = +-inf */ 3554 if (float32_is_zero(f)) { 3555 s->float_exception_flags |= float_flag_divbyzero; 3556 return float32_set_sign(float32_infinity, sign); 3557 } 3558 3559 /* frsqrt7(+inf) = +0 */ 3560 if (float32_is_infinity(f) && !sign) { 3561 return float32_set_sign(float32_zero, sign); 3562 } 3563 3564 /* +normal, +subnormal */ 3565 uint64_t val = frsqrt7(f, exp_size, frac_size); 3566 return make_float32(val); 3567 } 3568 3569 static float64 frsqrt7_d(float64 f, float_status *s) 3570 { 3571 int exp_size = 11, frac_size = 52; 3572 bool sign = float64_is_neg(f); 3573 3574 /* 3575 * frsqrt7(sNaN) = canonical NaN 3576 * frsqrt7(-inf) = canonical NaN 3577 * frsqrt7(-normal) = canonical NaN 3578 * frsqrt7(-subnormal) = canonical NaN 3579 */ 3580 if (float64_is_signaling_nan(f, s) || 3581 (float64_is_infinity(f) && sign) || 3582 (float64_is_normal(f) && sign) || 3583 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3584 s->float_exception_flags |= float_flag_invalid; 3585 return float64_default_nan(s); 3586 } 3587 3588 /* frsqrt7(qNaN) = canonical NaN */ 3589 if (float64_is_quiet_nan(f, s)) { 3590 return float64_default_nan(s); 3591 } 3592 3593 /* frsqrt7(+-0) = +-inf */ 3594 if (float64_is_zero(f)) { 3595 s->float_exception_flags |= float_flag_divbyzero; 3596 return float64_set_sign(float64_infinity, sign); 3597 } 3598 3599 /* frsqrt7(+inf) = +0 */ 3600 if (float64_is_infinity(f) && !sign) { 3601 return float64_set_sign(float64_zero, sign); 3602 } 3603 3604 /* +normal, +subnormal */ 3605 uint64_t val = frsqrt7(f, exp_size, frac_size); 3606 return make_float64(val); 3607 } 3608 3609 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3610 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3611 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3612 GEN_VEXT_V_ENV(vfrsqrt7_v_h) 3613 GEN_VEXT_V_ENV(vfrsqrt7_v_w) 3614 GEN_VEXT_V_ENV(vfrsqrt7_v_d) 3615 3616 /* 3617 * Vector Floating-Point Reciprocal Estimate Instruction 3618 * 3619 * Adapted from riscv-v-spec recip.c: 3620 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3621 */ 3622 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3623 float_status *s) 3624 { 3625 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3626 uint64_t exp = extract64(f, frac_size, exp_size); 3627 uint64_t frac = extract64(f, 0, frac_size); 3628 3629 const uint8_t lookup_table[] = { 3630 127, 125, 123, 121, 119, 117, 116, 114, 3631 112, 110, 109, 107, 105, 104, 102, 100, 3632 99, 97, 96, 94, 93, 91, 90, 88, 3633 87, 85, 84, 83, 81, 80, 79, 77, 3634 76, 75, 74, 72, 71, 70, 69, 68, 3635 66, 65, 64, 63, 62, 61, 60, 59, 3636 58, 57, 56, 55, 54, 53, 52, 51, 3637 50, 49, 48, 47, 46, 45, 44, 43, 3638 42, 41, 40, 40, 39, 38, 37, 36, 3639 35, 35, 34, 33, 32, 31, 31, 30, 3640 29, 28, 28, 27, 26, 25, 25, 24, 3641 23, 23, 22, 21, 21, 20, 19, 19, 3642 18, 17, 17, 16, 15, 15, 14, 14, 3643 13, 12, 12, 11, 11, 10, 9, 9, 3644 8, 8, 7, 7, 6, 5, 5, 4, 3645 4, 3, 3, 2, 2, 1, 1, 0 3646 }; 3647 const int precision = 7; 3648 3649 if (exp == 0 && frac != 0) { /* subnormal */ 3650 /* Normalize the subnormal. */ 3651 while (extract64(frac, frac_size - 1, 1) == 0) { 3652 exp--; 3653 frac <<= 1; 3654 } 3655 3656 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3657 3658 if (exp != 0 && exp != UINT64_MAX) { 3659 /* 3660 * Overflow to inf or max value of same sign, 3661 * depending on sign and rounding mode. 3662 */ 3663 s->float_exception_flags |= (float_flag_inexact | 3664 float_flag_overflow); 3665 3666 if ((s->float_rounding_mode == float_round_to_zero) || 3667 ((s->float_rounding_mode == float_round_down) && !sign) || 3668 ((s->float_rounding_mode == float_round_up) && sign)) { 3669 /* Return greatest/negative finite value. */ 3670 return (sign << (exp_size + frac_size)) | 3671 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3672 } else { 3673 /* Return +-inf. */ 3674 return (sign << (exp_size + frac_size)) | 3675 MAKE_64BIT_MASK(frac_size, exp_size); 3676 } 3677 } 3678 } 3679 3680 int idx = frac >> (frac_size - precision); 3681 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3682 (frac_size - precision); 3683 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3684 3685 if (out_exp == 0 || out_exp == UINT64_MAX) { 3686 /* 3687 * The result is subnormal, but don't raise the underflow exception, 3688 * because there's no additional loss of precision. 3689 */ 3690 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3691 if (out_exp == UINT64_MAX) { 3692 out_frac >>= 1; 3693 out_exp = 0; 3694 } 3695 } 3696 3697 uint64_t val = 0; 3698 val = deposit64(val, 0, frac_size, out_frac); 3699 val = deposit64(val, frac_size, exp_size, out_exp); 3700 val = deposit64(val, frac_size + exp_size, 1, sign); 3701 return val; 3702 } 3703 3704 static float16 frec7_h(float16 f, float_status *s) 3705 { 3706 int exp_size = 5, frac_size = 10; 3707 bool sign = float16_is_neg(f); 3708 3709 /* frec7(+-inf) = +-0 */ 3710 if (float16_is_infinity(f)) { 3711 return float16_set_sign(float16_zero, sign); 3712 } 3713 3714 /* frec7(+-0) = +-inf */ 3715 if (float16_is_zero(f)) { 3716 s->float_exception_flags |= float_flag_divbyzero; 3717 return float16_set_sign(float16_infinity, sign); 3718 } 3719 3720 /* frec7(sNaN) = canonical NaN */ 3721 if (float16_is_signaling_nan(f, s)) { 3722 s->float_exception_flags |= float_flag_invalid; 3723 return float16_default_nan(s); 3724 } 3725 3726 /* frec7(qNaN) = canonical NaN */ 3727 if (float16_is_quiet_nan(f, s)) { 3728 return float16_default_nan(s); 3729 } 3730 3731 /* +-normal, +-subnormal */ 3732 uint64_t val = frec7(f, exp_size, frac_size, s); 3733 return make_float16(val); 3734 } 3735 3736 static float32 frec7_s(float32 f, float_status *s) 3737 { 3738 int exp_size = 8, frac_size = 23; 3739 bool sign = float32_is_neg(f); 3740 3741 /* frec7(+-inf) = +-0 */ 3742 if (float32_is_infinity(f)) { 3743 return float32_set_sign(float32_zero, sign); 3744 } 3745 3746 /* frec7(+-0) = +-inf */ 3747 if (float32_is_zero(f)) { 3748 s->float_exception_flags |= float_flag_divbyzero; 3749 return float32_set_sign(float32_infinity, sign); 3750 } 3751 3752 /* frec7(sNaN) = canonical NaN */ 3753 if (float32_is_signaling_nan(f, s)) { 3754 s->float_exception_flags |= float_flag_invalid; 3755 return float32_default_nan(s); 3756 } 3757 3758 /* frec7(qNaN) = canonical NaN */ 3759 if (float32_is_quiet_nan(f, s)) { 3760 return float32_default_nan(s); 3761 } 3762 3763 /* +-normal, +-subnormal */ 3764 uint64_t val = frec7(f, exp_size, frac_size, s); 3765 return make_float32(val); 3766 } 3767 3768 static float64 frec7_d(float64 f, float_status *s) 3769 { 3770 int exp_size = 11, frac_size = 52; 3771 bool sign = float64_is_neg(f); 3772 3773 /* frec7(+-inf) = +-0 */ 3774 if (float64_is_infinity(f)) { 3775 return float64_set_sign(float64_zero, sign); 3776 } 3777 3778 /* frec7(+-0) = +-inf */ 3779 if (float64_is_zero(f)) { 3780 s->float_exception_flags |= float_flag_divbyzero; 3781 return float64_set_sign(float64_infinity, sign); 3782 } 3783 3784 /* frec7(sNaN) = canonical NaN */ 3785 if (float64_is_signaling_nan(f, s)) { 3786 s->float_exception_flags |= float_flag_invalid; 3787 return float64_default_nan(s); 3788 } 3789 3790 /* frec7(qNaN) = canonical NaN */ 3791 if (float64_is_quiet_nan(f, s)) { 3792 return float64_default_nan(s); 3793 } 3794 3795 /* +-normal, +-subnormal */ 3796 uint64_t val = frec7(f, exp_size, frac_size, s); 3797 return make_float64(val); 3798 } 3799 3800 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3801 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3802 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3803 GEN_VEXT_V_ENV(vfrec7_v_h) 3804 GEN_VEXT_V_ENV(vfrec7_v_w) 3805 GEN_VEXT_V_ENV(vfrec7_v_d) 3806 3807 /* Vector Floating-Point MIN/MAX Instructions */ 3808 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3809 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3810 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3811 GEN_VEXT_VV_ENV(vfmin_vv_h) 3812 GEN_VEXT_VV_ENV(vfmin_vv_w) 3813 GEN_VEXT_VV_ENV(vfmin_vv_d) 3814 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3815 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3816 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3817 GEN_VEXT_VF(vfmin_vf_h) 3818 GEN_VEXT_VF(vfmin_vf_w) 3819 GEN_VEXT_VF(vfmin_vf_d) 3820 3821 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3822 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3823 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3824 GEN_VEXT_VV_ENV(vfmax_vv_h) 3825 GEN_VEXT_VV_ENV(vfmax_vv_w) 3826 GEN_VEXT_VV_ENV(vfmax_vv_d) 3827 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3828 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3829 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3830 GEN_VEXT_VF(vfmax_vf_h) 3831 GEN_VEXT_VF(vfmax_vf_w) 3832 GEN_VEXT_VF(vfmax_vf_d) 3833 3834 /* Vector Floating-Point Sign-Injection Instructions */ 3835 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3836 { 3837 return deposit64(b, 0, 15, a); 3838 } 3839 3840 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3841 { 3842 return deposit64(b, 0, 31, a); 3843 } 3844 3845 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3846 { 3847 return deposit64(b, 0, 63, a); 3848 } 3849 3850 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3851 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3852 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3853 GEN_VEXT_VV_ENV(vfsgnj_vv_h) 3854 GEN_VEXT_VV_ENV(vfsgnj_vv_w) 3855 GEN_VEXT_VV_ENV(vfsgnj_vv_d) 3856 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3857 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3858 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3859 GEN_VEXT_VF(vfsgnj_vf_h) 3860 GEN_VEXT_VF(vfsgnj_vf_w) 3861 GEN_VEXT_VF(vfsgnj_vf_d) 3862 3863 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3864 { 3865 return deposit64(~b, 0, 15, a); 3866 } 3867 3868 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3869 { 3870 return deposit64(~b, 0, 31, a); 3871 } 3872 3873 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3874 { 3875 return deposit64(~b, 0, 63, a); 3876 } 3877 3878 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3879 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3880 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3881 GEN_VEXT_VV_ENV(vfsgnjn_vv_h) 3882 GEN_VEXT_VV_ENV(vfsgnjn_vv_w) 3883 GEN_VEXT_VV_ENV(vfsgnjn_vv_d) 3884 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3885 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3886 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3887 GEN_VEXT_VF(vfsgnjn_vf_h) 3888 GEN_VEXT_VF(vfsgnjn_vf_w) 3889 GEN_VEXT_VF(vfsgnjn_vf_d) 3890 3891 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3892 { 3893 return deposit64(b ^ a, 0, 15, a); 3894 } 3895 3896 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3897 { 3898 return deposit64(b ^ a, 0, 31, a); 3899 } 3900 3901 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3902 { 3903 return deposit64(b ^ a, 0, 63, a); 3904 } 3905 3906 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3907 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3908 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3909 GEN_VEXT_VV_ENV(vfsgnjx_vv_h) 3910 GEN_VEXT_VV_ENV(vfsgnjx_vv_w) 3911 GEN_VEXT_VV_ENV(vfsgnjx_vv_d) 3912 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3913 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3914 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3915 GEN_VEXT_VF(vfsgnjx_vf_h) 3916 GEN_VEXT_VF(vfsgnjx_vf_w) 3917 GEN_VEXT_VF(vfsgnjx_vf_d) 3918 3919 /* Vector Floating-Point Compare Instructions */ 3920 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3921 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3922 CPURISCVState *env, uint32_t desc) \ 3923 { \ 3924 uint32_t vm = vext_vm(desc); \ 3925 uint32_t vl = env->vl; \ 3926 uint32_t i; \ 3927 \ 3928 for (i = env->vstart; i < vl; i++) { \ 3929 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3930 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3931 if (!vm && !vext_elem_mask(v0, i)) { \ 3932 continue; \ 3933 } \ 3934 vext_set_elem_mask(vd, i, \ 3935 DO_OP(s2, s1, &env->fp_status)); \ 3936 } \ 3937 env->vstart = 0; \ 3938 } 3939 3940 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3941 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3942 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3943 3944 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3945 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3946 CPURISCVState *env, uint32_t desc) \ 3947 { \ 3948 uint32_t vm = vext_vm(desc); \ 3949 uint32_t vl = env->vl; \ 3950 uint32_t i; \ 3951 \ 3952 for (i = env->vstart; i < vl; i++) { \ 3953 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3954 if (!vm && !vext_elem_mask(v0, i)) { \ 3955 continue; \ 3956 } \ 3957 vext_set_elem_mask(vd, i, \ 3958 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3959 } \ 3960 env->vstart = 0; \ 3961 } 3962 3963 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3964 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3965 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3966 3967 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3968 { 3969 FloatRelation compare = float16_compare_quiet(a, b, s); 3970 return compare != float_relation_equal; 3971 } 3972 3973 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3974 { 3975 FloatRelation compare = float32_compare_quiet(a, b, s); 3976 return compare != float_relation_equal; 3977 } 3978 3979 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3980 { 3981 FloatRelation compare = float64_compare_quiet(a, b, s); 3982 return compare != float_relation_equal; 3983 } 3984 3985 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3986 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3987 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3988 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3989 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3990 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3991 3992 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3993 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3994 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3995 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3996 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3997 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3998 3999 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4000 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4001 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4002 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4003 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4004 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4005 4006 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4007 { 4008 FloatRelation compare = float16_compare(a, b, s); 4009 return compare == float_relation_greater; 4010 } 4011 4012 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4013 { 4014 FloatRelation compare = float32_compare(a, b, s); 4015 return compare == float_relation_greater; 4016 } 4017 4018 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4019 { 4020 FloatRelation compare = float64_compare(a, b, s); 4021 return compare == float_relation_greater; 4022 } 4023 4024 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4025 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4026 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4027 4028 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4029 { 4030 FloatRelation compare = float16_compare(a, b, s); 4031 return compare == float_relation_greater || 4032 compare == float_relation_equal; 4033 } 4034 4035 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4036 { 4037 FloatRelation compare = float32_compare(a, b, s); 4038 return compare == float_relation_greater || 4039 compare == float_relation_equal; 4040 } 4041 4042 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4043 { 4044 FloatRelation compare = float64_compare(a, b, s); 4045 return compare == float_relation_greater || 4046 compare == float_relation_equal; 4047 } 4048 4049 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4050 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4051 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4052 4053 /* Vector Floating-Point Classify Instruction */ 4054 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4055 static void do_##NAME(void *vd, void *vs2, int i) \ 4056 { \ 4057 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4058 *((TD *)vd + HD(i)) = OP(s2); \ 4059 } 4060 4061 #define GEN_VEXT_V(NAME) \ 4062 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4063 CPURISCVState *env, uint32_t desc) \ 4064 { \ 4065 uint32_t vm = vext_vm(desc); \ 4066 uint32_t vl = env->vl; \ 4067 uint32_t i; \ 4068 \ 4069 for (i = env->vstart; i < vl; i++) { \ 4070 if (!vm && !vext_elem_mask(v0, i)) { \ 4071 continue; \ 4072 } \ 4073 do_##NAME(vd, vs2, i); \ 4074 } \ 4075 env->vstart = 0; \ 4076 } 4077 4078 target_ulong fclass_h(uint64_t frs1) 4079 { 4080 float16 f = frs1; 4081 bool sign = float16_is_neg(f); 4082 4083 if (float16_is_infinity(f)) { 4084 return sign ? 1 << 0 : 1 << 7; 4085 } else if (float16_is_zero(f)) { 4086 return sign ? 1 << 3 : 1 << 4; 4087 } else if (float16_is_zero_or_denormal(f)) { 4088 return sign ? 1 << 2 : 1 << 5; 4089 } else if (float16_is_any_nan(f)) { 4090 float_status s = { }; /* for snan_bit_is_one */ 4091 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4092 } else { 4093 return sign ? 1 << 1 : 1 << 6; 4094 } 4095 } 4096 4097 target_ulong fclass_s(uint64_t frs1) 4098 { 4099 float32 f = frs1; 4100 bool sign = float32_is_neg(f); 4101 4102 if (float32_is_infinity(f)) { 4103 return sign ? 1 << 0 : 1 << 7; 4104 } else if (float32_is_zero(f)) { 4105 return sign ? 1 << 3 : 1 << 4; 4106 } else if (float32_is_zero_or_denormal(f)) { 4107 return sign ? 1 << 2 : 1 << 5; 4108 } else if (float32_is_any_nan(f)) { 4109 float_status s = { }; /* for snan_bit_is_one */ 4110 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4111 } else { 4112 return sign ? 1 << 1 : 1 << 6; 4113 } 4114 } 4115 4116 target_ulong fclass_d(uint64_t frs1) 4117 { 4118 float64 f = frs1; 4119 bool sign = float64_is_neg(f); 4120 4121 if (float64_is_infinity(f)) { 4122 return sign ? 1 << 0 : 1 << 7; 4123 } else if (float64_is_zero(f)) { 4124 return sign ? 1 << 3 : 1 << 4; 4125 } else if (float64_is_zero_or_denormal(f)) { 4126 return sign ? 1 << 2 : 1 << 5; 4127 } else if (float64_is_any_nan(f)) { 4128 float_status s = { }; /* for snan_bit_is_one */ 4129 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4130 } else { 4131 return sign ? 1 << 1 : 1 << 6; 4132 } 4133 } 4134 4135 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4136 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4137 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4138 GEN_VEXT_V(vfclass_v_h) 4139 GEN_VEXT_V(vfclass_v_w) 4140 GEN_VEXT_V(vfclass_v_d) 4141 4142 /* Vector Floating-Point Merge Instruction */ 4143 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4144 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4145 CPURISCVState *env, uint32_t desc) \ 4146 { \ 4147 uint32_t vm = vext_vm(desc); \ 4148 uint32_t vl = env->vl; \ 4149 uint32_t i; \ 4150 \ 4151 for (i = env->vstart; i < vl; i++) { \ 4152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4153 *((ETYPE *)vd + H(i)) \ 4154 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4155 } \ 4156 env->vstart = 0; \ 4157 } 4158 4159 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4160 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4161 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4162 4163 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4164 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4165 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4166 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4167 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4168 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) 4169 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) 4170 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) 4171 4172 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4173 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4174 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4175 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4176 GEN_VEXT_V_ENV(vfcvt_x_f_v_h) 4177 GEN_VEXT_V_ENV(vfcvt_x_f_v_w) 4178 GEN_VEXT_V_ENV(vfcvt_x_f_v_d) 4179 4180 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4181 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4182 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4183 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4184 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) 4185 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) 4186 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) 4187 4188 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4189 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4190 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4191 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4192 GEN_VEXT_V_ENV(vfcvt_f_x_v_h) 4193 GEN_VEXT_V_ENV(vfcvt_f_x_v_w) 4194 GEN_VEXT_V_ENV(vfcvt_f_x_v_d) 4195 4196 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4197 /* (TD, T2, TX2) */ 4198 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4199 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4200 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4201 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4202 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4203 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4204 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) 4205 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) 4206 4207 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4208 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4209 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4210 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) 4211 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) 4212 4213 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4214 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4215 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4216 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4217 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) 4218 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) 4219 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) 4220 4221 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4222 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4223 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4224 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4225 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) 4226 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) 4227 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) 4228 4229 /* 4230 * vfwcvt.f.f.v vd, vs2, vm 4231 * Convert single-width float to double-width float. 4232 */ 4233 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4234 { 4235 return float16_to_float32(a, true, s); 4236 } 4237 4238 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4239 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4240 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) 4241 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) 4242 4243 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4244 /* (TD, T2, TX2) */ 4245 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4246 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4247 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4248 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4249 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4250 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4251 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4252 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) 4253 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) 4254 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) 4255 4256 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4257 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4258 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4259 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4260 GEN_VEXT_V_ENV(vfncvt_x_f_w_b) 4261 GEN_VEXT_V_ENV(vfncvt_x_f_w_h) 4262 GEN_VEXT_V_ENV(vfncvt_x_f_w_w) 4263 4264 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4265 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4266 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4267 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) 4268 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) 4269 4270 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4271 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4272 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4273 GEN_VEXT_V_ENV(vfncvt_f_x_w_h) 4274 GEN_VEXT_V_ENV(vfncvt_f_x_w_w) 4275 4276 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4277 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4278 { 4279 return float32_to_float16(a, true, s); 4280 } 4281 4282 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4283 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4284 GEN_VEXT_V_ENV(vfncvt_f_f_w_h) 4285 GEN_VEXT_V_ENV(vfncvt_f_f_w_w) 4286 4287 /* 4288 *** Vector Reduction Operations 4289 */ 4290 /* Vector Single-Width Integer Reduction Instructions */ 4291 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4292 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4293 void *vs2, CPURISCVState *env, uint32_t desc) \ 4294 { \ 4295 uint32_t vm = vext_vm(desc); \ 4296 uint32_t vl = env->vl; \ 4297 uint32_t i; \ 4298 TD s1 = *((TD *)vs1 + HD(0)); \ 4299 \ 4300 for (i = env->vstart; i < vl; i++) { \ 4301 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4302 if (!vm && !vext_elem_mask(v0, i)) { \ 4303 continue; \ 4304 } \ 4305 s1 = OP(s1, (TD)s2); \ 4306 } \ 4307 *((TD *)vd + HD(0)) = s1; \ 4308 env->vstart = 0; \ 4309 } 4310 4311 /* vd[0] = sum(vs1[0], vs2[*]) */ 4312 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4313 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4314 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4315 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4316 4317 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4318 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4319 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4320 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4321 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4322 4323 /* vd[0] = max(vs1[0], vs2[*]) */ 4324 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4325 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4326 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4327 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4328 4329 /* vd[0] = minu(vs1[0], vs2[*]) */ 4330 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4331 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4332 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4333 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4334 4335 /* vd[0] = min(vs1[0], vs2[*]) */ 4336 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4337 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4338 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4339 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4340 4341 /* vd[0] = and(vs1[0], vs2[*]) */ 4342 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4343 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4344 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4345 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4346 4347 /* vd[0] = or(vs1[0], vs2[*]) */ 4348 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4349 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4350 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4351 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4352 4353 /* vd[0] = xor(vs1[0], vs2[*]) */ 4354 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4355 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4356 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4357 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4358 4359 /* Vector Widening Integer Reduction Instructions */ 4360 /* signed sum reduction into double-width accumulator */ 4361 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4362 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4363 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4364 4365 /* Unsigned sum reduction into double-width accumulator */ 4366 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4367 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4368 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4369 4370 /* Vector Single-Width Floating-Point Reduction Instructions */ 4371 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4372 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4373 void *vs2, CPURISCVState *env, \ 4374 uint32_t desc) \ 4375 { \ 4376 uint32_t vm = vext_vm(desc); \ 4377 uint32_t vl = env->vl; \ 4378 uint32_t i; \ 4379 TD s1 = *((TD *)vs1 + HD(0)); \ 4380 \ 4381 for (i = env->vstart; i < vl; i++) { \ 4382 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4383 if (!vm && !vext_elem_mask(v0, i)) { \ 4384 continue; \ 4385 } \ 4386 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4387 } \ 4388 *((TD *)vd + HD(0)) = s1; \ 4389 env->vstart = 0; \ 4390 } 4391 4392 /* Unordered sum */ 4393 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4394 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4395 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4396 4397 /* Maximum value */ 4398 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4399 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4400 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4401 4402 /* Minimum value */ 4403 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4404 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4405 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4406 4407 /* Vector Widening Floating-Point Reduction Instructions */ 4408 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4409 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4410 void *vs2, CPURISCVState *env, uint32_t desc) 4411 { 4412 uint32_t vm = vext_vm(desc); 4413 uint32_t vl = env->vl; 4414 uint32_t i; 4415 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4416 4417 for (i = env->vstart; i < vl; i++) { 4418 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4419 if (!vm && !vext_elem_mask(v0, i)) { 4420 continue; 4421 } 4422 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4423 &env->fp_status); 4424 } 4425 *((uint32_t *)vd + H4(0)) = s1; 4426 env->vstart = 0; 4427 } 4428 4429 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4430 void *vs2, CPURISCVState *env, uint32_t desc) 4431 { 4432 uint32_t vm = vext_vm(desc); 4433 uint32_t vl = env->vl; 4434 uint32_t i; 4435 uint64_t s1 = *((uint64_t *)vs1); 4436 4437 for (i = env->vstart; i < vl; i++) { 4438 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4439 if (!vm && !vext_elem_mask(v0, i)) { 4440 continue; 4441 } 4442 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4443 &env->fp_status); 4444 } 4445 *((uint64_t *)vd) = s1; 4446 env->vstart = 0; 4447 } 4448 4449 /* 4450 *** Vector Mask Operations 4451 */ 4452 /* Vector Mask-Register Logical Instructions */ 4453 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4454 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4455 void *vs2, CPURISCVState *env, \ 4456 uint32_t desc) \ 4457 { \ 4458 uint32_t vl = env->vl; \ 4459 uint32_t i; \ 4460 int a, b; \ 4461 \ 4462 for (i = env->vstart; i < vl; i++) { \ 4463 a = vext_elem_mask(vs1, i); \ 4464 b = vext_elem_mask(vs2, i); \ 4465 vext_set_elem_mask(vd, i, OP(b, a)); \ 4466 } \ 4467 env->vstart = 0; \ 4468 } 4469 4470 #define DO_NAND(N, M) (!(N & M)) 4471 #define DO_ANDNOT(N, M) (N & !M) 4472 #define DO_NOR(N, M) (!(N | M)) 4473 #define DO_ORNOT(N, M) (N | !M) 4474 #define DO_XNOR(N, M) (!(N ^ M)) 4475 4476 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4477 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4478 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4479 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4480 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4481 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4482 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4483 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4484 4485 /* Vector count population in mask vcpop */ 4486 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4487 uint32_t desc) 4488 { 4489 target_ulong cnt = 0; 4490 uint32_t vm = vext_vm(desc); 4491 uint32_t vl = env->vl; 4492 int i; 4493 4494 for (i = env->vstart; i < vl; i++) { 4495 if (vm || vext_elem_mask(v0, i)) { 4496 if (vext_elem_mask(vs2, i)) { 4497 cnt++; 4498 } 4499 } 4500 } 4501 env->vstart = 0; 4502 return cnt; 4503 } 4504 4505 /* vfirst find-first-set mask bit*/ 4506 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4507 uint32_t desc) 4508 { 4509 uint32_t vm = vext_vm(desc); 4510 uint32_t vl = env->vl; 4511 int i; 4512 4513 for (i = env->vstart; i < vl; i++) { 4514 if (vm || vext_elem_mask(v0, i)) { 4515 if (vext_elem_mask(vs2, i)) { 4516 return i; 4517 } 4518 } 4519 } 4520 env->vstart = 0; 4521 return -1LL; 4522 } 4523 4524 enum set_mask_type { 4525 ONLY_FIRST = 1, 4526 INCLUDE_FIRST, 4527 BEFORE_FIRST, 4528 }; 4529 4530 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4531 uint32_t desc, enum set_mask_type type) 4532 { 4533 uint32_t vm = vext_vm(desc); 4534 uint32_t vl = env->vl; 4535 int i; 4536 bool first_mask_bit = false; 4537 4538 for (i = env->vstart; i < vl; i++) { 4539 if (!vm && !vext_elem_mask(v0, i)) { 4540 continue; 4541 } 4542 /* write a zero to all following active elements */ 4543 if (first_mask_bit) { 4544 vext_set_elem_mask(vd, i, 0); 4545 continue; 4546 } 4547 if (vext_elem_mask(vs2, i)) { 4548 first_mask_bit = true; 4549 if (type == BEFORE_FIRST) { 4550 vext_set_elem_mask(vd, i, 0); 4551 } else { 4552 vext_set_elem_mask(vd, i, 1); 4553 } 4554 } else { 4555 if (type == ONLY_FIRST) { 4556 vext_set_elem_mask(vd, i, 0); 4557 } else { 4558 vext_set_elem_mask(vd, i, 1); 4559 } 4560 } 4561 } 4562 env->vstart = 0; 4563 } 4564 4565 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4566 uint32_t desc) 4567 { 4568 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4569 } 4570 4571 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4572 uint32_t desc) 4573 { 4574 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4575 } 4576 4577 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4578 uint32_t desc) 4579 { 4580 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4581 } 4582 4583 /* Vector Iota Instruction */ 4584 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4585 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4586 uint32_t desc) \ 4587 { \ 4588 uint32_t vm = vext_vm(desc); \ 4589 uint32_t vl = env->vl; \ 4590 uint32_t sum = 0; \ 4591 int i; \ 4592 \ 4593 for (i = env->vstart; i < vl; i++) { \ 4594 if (!vm && !vext_elem_mask(v0, i)) { \ 4595 continue; \ 4596 } \ 4597 *((ETYPE *)vd + H(i)) = sum; \ 4598 if (vext_elem_mask(vs2, i)) { \ 4599 sum++; \ 4600 } \ 4601 } \ 4602 env->vstart = 0; \ 4603 } 4604 4605 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4606 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4607 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4608 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4609 4610 /* Vector Element Index Instruction */ 4611 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4612 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4613 { \ 4614 uint32_t vm = vext_vm(desc); \ 4615 uint32_t vl = env->vl; \ 4616 int i; \ 4617 \ 4618 for (i = env->vstart; i < vl; i++) { \ 4619 if (!vm && !vext_elem_mask(v0, i)) { \ 4620 continue; \ 4621 } \ 4622 *((ETYPE *)vd + H(i)) = i; \ 4623 } \ 4624 env->vstart = 0; \ 4625 } 4626 4627 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4628 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4629 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4630 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4631 4632 /* 4633 *** Vector Permutation Instructions 4634 */ 4635 4636 /* Vector Slide Instructions */ 4637 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4638 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4639 CPURISCVState *env, uint32_t desc) \ 4640 { \ 4641 uint32_t vm = vext_vm(desc); \ 4642 uint32_t vl = env->vl; \ 4643 target_ulong offset = s1, i_min, i; \ 4644 \ 4645 i_min = MAX(env->vstart, offset); \ 4646 for (i = i_min; i < vl; i++) { \ 4647 if (!vm && !vext_elem_mask(v0, i)) { \ 4648 continue; \ 4649 } \ 4650 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4651 } \ 4652 } 4653 4654 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4655 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4656 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4657 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4658 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4659 4660 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4661 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4662 CPURISCVState *env, uint32_t desc) \ 4663 { \ 4664 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4665 uint32_t vm = vext_vm(desc); \ 4666 uint32_t vl = env->vl; \ 4667 target_ulong i_max, i; \ 4668 \ 4669 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4670 for (i = env->vstart; i < i_max; ++i) { \ 4671 if (vm || vext_elem_mask(v0, i)) { \ 4672 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4673 } \ 4674 } \ 4675 \ 4676 for (i = i_max; i < vl; ++i) { \ 4677 if (vm || vext_elem_mask(v0, i)) { \ 4678 *((ETYPE *)vd + H(i)) = 0; \ 4679 } \ 4680 } \ 4681 \ 4682 env->vstart = 0; \ 4683 } 4684 4685 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4686 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4687 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4688 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4689 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4690 4691 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4692 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4693 CPURISCVState *env, uint32_t desc) \ 4694 { \ 4695 typedef uint##ESZ##_t ETYPE; \ 4696 uint32_t vm = vext_vm(desc); \ 4697 uint32_t vl = env->vl; \ 4698 uint32_t i; \ 4699 \ 4700 for (i = env->vstart; i < vl; i++) { \ 4701 if (!vm && !vext_elem_mask(v0, i)) { \ 4702 continue; \ 4703 } \ 4704 if (i == 0) { \ 4705 *((ETYPE *)vd + H(i)) = s1; \ 4706 } else { \ 4707 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4708 } \ 4709 } \ 4710 env->vstart = 0; \ 4711 } 4712 4713 GEN_VEXT_VSLIE1UP(8, H1) 4714 GEN_VEXT_VSLIE1UP(16, H2) 4715 GEN_VEXT_VSLIE1UP(32, H4) 4716 GEN_VEXT_VSLIE1UP(64, H8) 4717 4718 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4719 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4720 CPURISCVState *env, uint32_t desc) \ 4721 { \ 4722 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4723 } 4724 4725 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4726 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4727 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4728 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4729 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4730 4731 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4732 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4733 CPURISCVState *env, uint32_t desc) \ 4734 { \ 4735 typedef uint##ESZ##_t ETYPE; \ 4736 uint32_t vm = vext_vm(desc); \ 4737 uint32_t vl = env->vl; \ 4738 uint32_t i; \ 4739 \ 4740 for (i = env->vstart; i < vl; i++) { \ 4741 if (!vm && !vext_elem_mask(v0, i)) { \ 4742 continue; \ 4743 } \ 4744 if (i == vl - 1) { \ 4745 *((ETYPE *)vd + H(i)) = s1; \ 4746 } else { \ 4747 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4748 } \ 4749 } \ 4750 env->vstart = 0; \ 4751 } 4752 4753 GEN_VEXT_VSLIDE1DOWN(8, H1) 4754 GEN_VEXT_VSLIDE1DOWN(16, H2) 4755 GEN_VEXT_VSLIDE1DOWN(32, H4) 4756 GEN_VEXT_VSLIDE1DOWN(64, H8) 4757 4758 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4759 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4760 CPURISCVState *env, uint32_t desc) \ 4761 { \ 4762 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4763 } 4764 4765 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4766 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4767 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4768 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4769 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4770 4771 /* Vector Floating-Point Slide Instructions */ 4772 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4773 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4774 CPURISCVState *env, uint32_t desc) \ 4775 { \ 4776 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4777 } 4778 4779 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4780 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4781 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4782 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4783 4784 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4785 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4786 CPURISCVState *env, uint32_t desc) \ 4787 { \ 4788 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4789 } 4790 4791 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4792 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4793 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4794 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4795 4796 /* Vector Register Gather Instruction */ 4797 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4798 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4799 CPURISCVState *env, uint32_t desc) \ 4800 { \ 4801 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4802 uint32_t vm = vext_vm(desc); \ 4803 uint32_t vl = env->vl; \ 4804 uint64_t index; \ 4805 uint32_t i; \ 4806 \ 4807 for (i = env->vstart; i < vl; i++) { \ 4808 if (!vm && !vext_elem_mask(v0, i)) { \ 4809 continue; \ 4810 } \ 4811 index = *((TS1 *)vs1 + HS1(i)); \ 4812 if (index >= vlmax) { \ 4813 *((TS2 *)vd + HS2(i)) = 0; \ 4814 } else { \ 4815 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4816 } \ 4817 } \ 4818 env->vstart = 0; \ 4819 } 4820 4821 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4822 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4823 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4824 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4825 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4826 4827 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4828 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4829 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4830 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4831 4832 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4833 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4834 CPURISCVState *env, uint32_t desc) \ 4835 { \ 4836 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4837 uint32_t vm = vext_vm(desc); \ 4838 uint32_t vl = env->vl; \ 4839 uint64_t index = s1; \ 4840 uint32_t i; \ 4841 \ 4842 for (i = env->vstart; i < vl; i++) { \ 4843 if (!vm && !vext_elem_mask(v0, i)) { \ 4844 continue; \ 4845 } \ 4846 if (index >= vlmax) { \ 4847 *((ETYPE *)vd + H(i)) = 0; \ 4848 } else { \ 4849 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4850 } \ 4851 } \ 4852 env->vstart = 0; \ 4853 } 4854 4855 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4856 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4857 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4858 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4859 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4860 4861 /* Vector Compress Instruction */ 4862 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4863 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4864 CPURISCVState *env, uint32_t desc) \ 4865 { \ 4866 uint32_t vl = env->vl; \ 4867 uint32_t num = 0, i; \ 4868 \ 4869 for (i = env->vstart; i < vl; i++) { \ 4870 if (!vext_elem_mask(vs1, i)) { \ 4871 continue; \ 4872 } \ 4873 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4874 num++; \ 4875 } \ 4876 env->vstart = 0; \ 4877 } 4878 4879 /* Compress into vd elements of vs2 where vs1 is enabled */ 4880 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4881 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4882 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4883 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4884 4885 /* Vector Whole Register Move */ 4886 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 4887 { 4888 /* EEW = SEW */ 4889 uint32_t maxsz = simd_maxsz(desc); 4890 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 4891 uint32_t startb = env->vstart * sewb; 4892 uint32_t i = startb; 4893 4894 memcpy((uint8_t *)vd + H1(i), 4895 (uint8_t *)vs2 + H1(i), 4896 maxsz - startb); 4897 4898 env->vstart = 0; 4899 } 4900 4901 /* Vector Integer Extension */ 4902 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4903 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4904 CPURISCVState *env, uint32_t desc) \ 4905 { \ 4906 uint32_t vl = env->vl; \ 4907 uint32_t vm = vext_vm(desc); \ 4908 uint32_t i; \ 4909 \ 4910 for (i = env->vstart; i < vl; i++) { \ 4911 if (!vm && !vext_elem_mask(v0, i)) { \ 4912 continue; \ 4913 } \ 4914 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4915 } \ 4916 env->vstart = 0; \ 4917 } 4918 4919 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4920 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4921 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4922 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4923 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4924 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4925 4926 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4927 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4928 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4929 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4930 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4931 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4932