1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 /* 126 * Get the maximum number of elements can be operated. 127 * 128 * esz: log2 of element size in bytes. 129 */ 130 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 131 { 132 /* 133 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 134 * so vlen in bytes (vlenb) is encoded as maxsz. 135 */ 136 uint32_t vlenb = simd_maxsz(desc); 137 138 /* Return VLMAX */ 139 int scale = vext_lmul(desc) - esz; 140 return scale < 0 ? vlenb >> -scale : vlenb << scale; 141 } 142 143 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 144 { 145 return (addr & env->cur_pmmask) | env->cur_pmbase; 146 } 147 148 /* 149 * This function checks watchpoint before real load operation. 150 * 151 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 152 * In user mode, there is no watchpoint support now. 153 * 154 * It will trigger an exception if there is no mapping in TLB 155 * and page table walk can't fill the TLB entry. Then the guest 156 * software can return here after process the exception or never return. 157 */ 158 static void probe_pages(CPURISCVState *env, target_ulong addr, 159 target_ulong len, uintptr_t ra, 160 MMUAccessType access_type) 161 { 162 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 163 target_ulong curlen = MIN(pagelen, len); 164 165 probe_access(env, adjust_addr(env, addr), curlen, access_type, 166 cpu_mmu_index(env, false), ra); 167 if (len > curlen) { 168 addr += curlen; 169 curlen = len - curlen; 170 probe_access(env, adjust_addr(env, addr), curlen, access_type, 171 cpu_mmu_index(env, false), ra); 172 } 173 } 174 175 static inline void vext_set_elem_mask(void *v0, int index, 176 uint8_t value) 177 { 178 int idx = index / 64; 179 int pos = index % 64; 180 uint64_t old = ((uint64_t *)v0)[idx]; 181 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 182 } 183 184 /* 185 * Earlier designs (pre-0.9) had a varying number of bits 186 * per mask value (MLEN). In the 0.9 design, MLEN=1. 187 * (Section 4.5) 188 */ 189 static inline int vext_elem_mask(void *v0, int index) 190 { 191 int idx = index / 64; 192 int pos = index % 64; 193 return (((uint64_t *)v0)[idx] >> pos) & 1; 194 } 195 196 /* elements operations for load and store */ 197 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 198 uint32_t idx, void *vd, uintptr_t retaddr); 199 200 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 201 static void NAME(CPURISCVState *env, abi_ptr addr, \ 202 uint32_t idx, void *vd, uintptr_t retaddr)\ 203 { \ 204 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 205 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 206 } \ 207 208 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 209 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 210 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 211 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 212 213 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 214 static void NAME(CPURISCVState *env, abi_ptr addr, \ 215 uint32_t idx, void *vd, uintptr_t retaddr)\ 216 { \ 217 ETYPE data = *((ETYPE *)vd + H(idx)); \ 218 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 219 } 220 221 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 222 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 223 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 224 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 225 226 /* 227 *** stride: access vector element from strided memory 228 */ 229 static void 230 vext_ldst_stride(void *vd, void *v0, target_ulong base, 231 target_ulong stride, CPURISCVState *env, 232 uint32_t desc, uint32_t vm, 233 vext_ldst_elem_fn *ldst_elem, 234 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 235 { 236 uint32_t i, k; 237 uint32_t nf = vext_nf(desc); 238 uint32_t max_elems = vext_max_elems(desc, esz); 239 240 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 241 if (!vm && !vext_elem_mask(v0, i)) { 242 continue; 243 } 244 245 k = 0; 246 while (k < nf) { 247 target_ulong addr = base + stride * i + (k << esz); 248 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 249 k++; 250 } 251 } 252 env->vstart = 0; 253 } 254 255 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 256 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 257 target_ulong stride, CPURISCVState *env, \ 258 uint32_t desc) \ 259 { \ 260 uint32_t vm = vext_vm(desc); \ 261 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 262 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 263 } 264 265 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 266 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 267 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 268 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 269 270 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 271 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 272 target_ulong stride, CPURISCVState *env, \ 273 uint32_t desc) \ 274 { \ 275 uint32_t vm = vext_vm(desc); \ 276 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 277 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 278 } 279 280 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 281 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 282 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 283 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 284 285 /* 286 *** unit-stride: access elements stored contiguously in memory 287 */ 288 289 /* unmasked unit-stride load and store operation*/ 290 static void 291 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 292 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 293 uintptr_t ra, MMUAccessType access_type) 294 { 295 uint32_t i, k; 296 uint32_t nf = vext_nf(desc); 297 uint32_t max_elems = vext_max_elems(desc, esz); 298 299 /* load bytes from guest memory */ 300 for (i = env->vstart; i < evl; i++, env->vstart++) { 301 k = 0; 302 while (k < nf) { 303 target_ulong addr = base + ((i * nf + k) << esz); 304 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 305 k++; 306 } 307 } 308 env->vstart = 0; 309 } 310 311 /* 312 * masked unit-stride load and store operation will be a special case of stride, 313 * stride = NF * sizeof (MTYPE) 314 */ 315 316 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 318 CPURISCVState *env, uint32_t desc) \ 319 { \ 320 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 321 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 322 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 323 } \ 324 \ 325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 326 CPURISCVState *env, uint32_t desc) \ 327 { \ 328 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 329 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ 330 } 331 332 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 333 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 334 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 335 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 336 337 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 338 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 339 CPURISCVState *env, uint32_t desc) \ 340 { \ 341 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 342 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 343 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 344 } \ 345 \ 346 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 347 CPURISCVState *env, uint32_t desc) \ 348 { \ 349 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 350 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ 351 } 352 353 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 354 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 355 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 356 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 357 358 /* 359 *** unit stride mask load and store, EEW = 1 360 */ 361 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 362 CPURISCVState *env, uint32_t desc) 363 { 364 /* evl = ceil(vl/8) */ 365 uint8_t evl = (env->vl + 7) >> 3; 366 vext_ldst_us(vd, base, env, desc, lde_b, 367 0, evl, GETPC(), MMU_DATA_LOAD); 368 } 369 370 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 371 CPURISCVState *env, uint32_t desc) 372 { 373 /* evl = ceil(vl/8) */ 374 uint8_t evl = (env->vl + 7) >> 3; 375 vext_ldst_us(vd, base, env, desc, ste_b, 376 0, evl, GETPC(), MMU_DATA_STORE); 377 } 378 379 /* 380 *** index: access vector element from indexed memory 381 */ 382 typedef target_ulong vext_get_index_addr(target_ulong base, 383 uint32_t idx, void *vs2); 384 385 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 386 static target_ulong NAME(target_ulong base, \ 387 uint32_t idx, void *vs2) \ 388 { \ 389 return (base + *((ETYPE *)vs2 + H(idx))); \ 390 } 391 392 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 393 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 394 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 395 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 396 397 static inline void 398 vext_ldst_index(void *vd, void *v0, target_ulong base, 399 void *vs2, CPURISCVState *env, uint32_t desc, 400 vext_get_index_addr get_index_addr, 401 vext_ldst_elem_fn *ldst_elem, 402 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 403 { 404 uint32_t i, k; 405 uint32_t nf = vext_nf(desc); 406 uint32_t vm = vext_vm(desc); 407 uint32_t max_elems = vext_max_elems(desc, esz); 408 409 /* load bytes from guest memory */ 410 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 411 if (!vm && !vext_elem_mask(v0, i)) { 412 continue; 413 } 414 415 k = 0; 416 while (k < nf) { 417 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 418 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 419 k++; 420 } 421 } 422 env->vstart = 0; 423 } 424 425 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 426 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 427 void *vs2, CPURISCVState *env, uint32_t desc) \ 428 { \ 429 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 430 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 431 } 432 433 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 434 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 435 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 436 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 437 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 438 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 439 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 440 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 441 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 442 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 443 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 444 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 445 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 446 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 447 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 448 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 449 450 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 451 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 452 void *vs2, CPURISCVState *env, uint32_t desc) \ 453 { \ 454 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 455 STORE_FN, ctzl(sizeof(ETYPE)), \ 456 GETPC(), MMU_DATA_STORE); \ 457 } 458 459 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 460 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 461 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 462 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 463 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 464 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 465 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 466 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 467 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 468 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 469 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 470 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 471 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 472 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 473 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 474 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 475 476 /* 477 *** unit-stride fault-only-fisrt load instructions 478 */ 479 static inline void 480 vext_ldff(void *vd, void *v0, target_ulong base, 481 CPURISCVState *env, uint32_t desc, 482 vext_ldst_elem_fn *ldst_elem, 483 uint32_t esz, uintptr_t ra) 484 { 485 void *host; 486 uint32_t i, k, vl = 0; 487 uint32_t nf = vext_nf(desc); 488 uint32_t vm = vext_vm(desc); 489 uint32_t max_elems = vext_max_elems(desc, esz); 490 target_ulong addr, offset, remain; 491 492 /* probe every access*/ 493 for (i = env->vstart; i < env->vl; i++) { 494 if (!vm && !vext_elem_mask(v0, i)) { 495 continue; 496 } 497 addr = adjust_addr(env, base + i * (nf << esz)); 498 if (i == 0) { 499 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 500 } else { 501 /* if it triggers an exception, no need to check watchpoint */ 502 remain = nf << esz; 503 while (remain > 0) { 504 offset = -(addr | TARGET_PAGE_MASK); 505 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 506 cpu_mmu_index(env, false)); 507 if (host) { 508 #ifdef CONFIG_USER_ONLY 509 if (page_check_range(addr, offset, PAGE_READ) < 0) { 510 vl = i; 511 goto ProbeSuccess; 512 } 513 #else 514 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 515 #endif 516 } else { 517 vl = i; 518 goto ProbeSuccess; 519 } 520 if (remain <= offset) { 521 break; 522 } 523 remain -= offset; 524 addr = adjust_addr(env, addr + offset); 525 } 526 } 527 } 528 ProbeSuccess: 529 /* load bytes from guest memory */ 530 if (vl != 0) { 531 env->vl = vl; 532 } 533 for (i = env->vstart; i < env->vl; i++) { 534 k = 0; 535 if (!vm && !vext_elem_mask(v0, i)) { 536 continue; 537 } 538 while (k < nf) { 539 target_ulong addr = base + ((i * nf + k) << esz); 540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 541 k++; 542 } 543 } 544 env->vstart = 0; 545 } 546 547 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 548 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 549 CPURISCVState *env, uint32_t desc) \ 550 { \ 551 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 552 ctzl(sizeof(ETYPE)), GETPC()); \ 553 } 554 555 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 556 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 557 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 558 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 559 560 #define DO_SWAP(N, M) (M) 561 #define DO_AND(N, M) (N & M) 562 #define DO_XOR(N, M) (N ^ M) 563 #define DO_OR(N, M) (N | M) 564 #define DO_ADD(N, M) (N + M) 565 566 /* Signed min/max */ 567 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 568 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 569 570 /* Unsigned min/max */ 571 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 572 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 573 574 /* 575 *** load and store whole register instructions 576 */ 577 static void 578 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 579 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 580 MMUAccessType access_type) 581 { 582 uint32_t i, k, off, pos; 583 uint32_t nf = vext_nf(desc); 584 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 585 uint32_t max_elems = vlenb >> esz; 586 587 k = env->vstart / max_elems; 588 off = env->vstart % max_elems; 589 590 if (off) { 591 /* load/store rest of elements of current segment pointed by vstart */ 592 for (pos = off; pos < max_elems; pos++, env->vstart++) { 593 target_ulong addr = base + ((pos + k * max_elems) << esz); 594 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 595 } 596 k++; 597 } 598 599 /* load/store elements for rest of segments */ 600 for (; k < nf; k++) { 601 for (i = 0; i < max_elems; i++, env->vstart++) { 602 target_ulong addr = base + ((i + k * max_elems) << esz); 603 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 604 } 605 } 606 607 env->vstart = 0; 608 } 609 610 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 611 void HELPER(NAME)(void *vd, target_ulong base, \ 612 CPURISCVState *env, uint32_t desc) \ 613 { \ 614 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 615 ctzl(sizeof(ETYPE)), GETPC(), \ 616 MMU_DATA_LOAD); \ 617 } 618 619 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 620 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 621 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 622 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 623 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 624 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 625 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 626 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 627 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 628 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 629 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 630 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 631 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 632 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 633 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 634 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 635 636 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 637 void HELPER(NAME)(void *vd, target_ulong base, \ 638 CPURISCVState *env, uint32_t desc) \ 639 { \ 640 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 641 ctzl(sizeof(ETYPE)), GETPC(), \ 642 MMU_DATA_STORE); \ 643 } 644 645 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 646 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 647 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 648 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 649 650 /* 651 *** Vector Integer Arithmetic Instructions 652 */ 653 654 /* expand macro args before macro */ 655 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 656 657 /* (TD, T1, T2, TX1, TX2) */ 658 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 659 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 660 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 661 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 662 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 663 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 664 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 665 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 666 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 667 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 668 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 669 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 670 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 671 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 672 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 673 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 674 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 675 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 676 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 677 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 678 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 679 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 680 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 681 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 682 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 683 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 684 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 685 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 686 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 687 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 688 689 /* operation of two vector elements */ 690 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 691 692 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 693 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 694 { \ 695 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 696 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 697 *((TD *)vd + HD(i)) = OP(s2, s1); \ 698 } 699 #define DO_SUB(N, M) (N - M) 700 #define DO_RSUB(N, M) (M - N) 701 702 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 703 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 704 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 705 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 706 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 707 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 708 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 709 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 710 711 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 712 CPURISCVState *env, uint32_t desc, 713 opivv2_fn *fn) 714 { 715 uint32_t vm = vext_vm(desc); 716 uint32_t vl = env->vl; 717 uint32_t i; 718 719 for (i = env->vstart; i < vl; i++) { 720 if (!vm && !vext_elem_mask(v0, i)) { 721 continue; 722 } 723 fn(vd, vs1, vs2, i); 724 } 725 env->vstart = 0; 726 } 727 728 /* generate the helpers for OPIVV */ 729 #define GEN_VEXT_VV(NAME) \ 730 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 731 void *vs2, CPURISCVState *env, \ 732 uint32_t desc) \ 733 { \ 734 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 735 do_##NAME); \ 736 } 737 738 GEN_VEXT_VV(vadd_vv_b) 739 GEN_VEXT_VV(vadd_vv_h) 740 GEN_VEXT_VV(vadd_vv_w) 741 GEN_VEXT_VV(vadd_vv_d) 742 GEN_VEXT_VV(vsub_vv_b) 743 GEN_VEXT_VV(vsub_vv_h) 744 GEN_VEXT_VV(vsub_vv_w) 745 GEN_VEXT_VV(vsub_vv_d) 746 747 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 748 749 /* 750 * (T1)s1 gives the real operator type. 751 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 752 */ 753 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 754 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 755 { \ 756 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 757 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 758 } 759 760 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 761 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 762 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 763 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 764 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 765 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 766 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 767 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 768 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 769 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 770 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 771 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 772 773 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 774 CPURISCVState *env, uint32_t desc, 775 opivx2_fn fn) 776 { 777 uint32_t vm = vext_vm(desc); 778 uint32_t vl = env->vl; 779 uint32_t i; 780 781 for (i = env->vstart; i < vl; i++) { 782 if (!vm && !vext_elem_mask(v0, i)) { 783 continue; 784 } 785 fn(vd, s1, vs2, i); 786 } 787 env->vstart = 0; 788 } 789 790 /* generate the helpers for OPIVX */ 791 #define GEN_VEXT_VX(NAME) \ 792 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 793 void *vs2, CPURISCVState *env, \ 794 uint32_t desc) \ 795 { \ 796 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 797 do_##NAME); \ 798 } 799 800 GEN_VEXT_VX(vadd_vx_b) 801 GEN_VEXT_VX(vadd_vx_h) 802 GEN_VEXT_VX(vadd_vx_w) 803 GEN_VEXT_VX(vadd_vx_d) 804 GEN_VEXT_VX(vsub_vx_b) 805 GEN_VEXT_VX(vsub_vx_h) 806 GEN_VEXT_VX(vsub_vx_w) 807 GEN_VEXT_VX(vsub_vx_d) 808 GEN_VEXT_VX(vrsub_vx_b) 809 GEN_VEXT_VX(vrsub_vx_h) 810 GEN_VEXT_VX(vrsub_vx_w) 811 GEN_VEXT_VX(vrsub_vx_d) 812 813 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 814 { 815 intptr_t oprsz = simd_oprsz(desc); 816 intptr_t i; 817 818 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 819 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 820 } 821 } 822 823 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 824 { 825 intptr_t oprsz = simd_oprsz(desc); 826 intptr_t i; 827 828 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 829 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 830 } 831 } 832 833 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 834 { 835 intptr_t oprsz = simd_oprsz(desc); 836 intptr_t i; 837 838 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 839 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 840 } 841 } 842 843 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 844 { 845 intptr_t oprsz = simd_oprsz(desc); 846 intptr_t i; 847 848 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 849 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 850 } 851 } 852 853 /* Vector Widening Integer Add/Subtract */ 854 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 855 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 856 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 857 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 858 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 859 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 860 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 861 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 862 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 863 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 864 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 865 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 866 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 867 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 868 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 869 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 870 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 871 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 872 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 873 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 874 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 875 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 876 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 877 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 878 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 879 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 880 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 881 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 882 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 883 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 884 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 885 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 886 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 887 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 888 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 889 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 890 GEN_VEXT_VV(vwaddu_vv_b) 891 GEN_VEXT_VV(vwaddu_vv_h) 892 GEN_VEXT_VV(vwaddu_vv_w) 893 GEN_VEXT_VV(vwsubu_vv_b) 894 GEN_VEXT_VV(vwsubu_vv_h) 895 GEN_VEXT_VV(vwsubu_vv_w) 896 GEN_VEXT_VV(vwadd_vv_b) 897 GEN_VEXT_VV(vwadd_vv_h) 898 GEN_VEXT_VV(vwadd_vv_w) 899 GEN_VEXT_VV(vwsub_vv_b) 900 GEN_VEXT_VV(vwsub_vv_h) 901 GEN_VEXT_VV(vwsub_vv_w) 902 GEN_VEXT_VV(vwaddu_wv_b) 903 GEN_VEXT_VV(vwaddu_wv_h) 904 GEN_VEXT_VV(vwaddu_wv_w) 905 GEN_VEXT_VV(vwsubu_wv_b) 906 GEN_VEXT_VV(vwsubu_wv_h) 907 GEN_VEXT_VV(vwsubu_wv_w) 908 GEN_VEXT_VV(vwadd_wv_b) 909 GEN_VEXT_VV(vwadd_wv_h) 910 GEN_VEXT_VV(vwadd_wv_w) 911 GEN_VEXT_VV(vwsub_wv_b) 912 GEN_VEXT_VV(vwsub_wv_h) 913 GEN_VEXT_VV(vwsub_wv_w) 914 915 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 916 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 917 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 918 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 919 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 920 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 921 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 922 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 923 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 924 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 925 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 926 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 927 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 928 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 929 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 930 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 931 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 932 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 933 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 934 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 935 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 936 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 937 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 938 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 939 GEN_VEXT_VX(vwaddu_vx_b) 940 GEN_VEXT_VX(vwaddu_vx_h) 941 GEN_VEXT_VX(vwaddu_vx_w) 942 GEN_VEXT_VX(vwsubu_vx_b) 943 GEN_VEXT_VX(vwsubu_vx_h) 944 GEN_VEXT_VX(vwsubu_vx_w) 945 GEN_VEXT_VX(vwadd_vx_b) 946 GEN_VEXT_VX(vwadd_vx_h) 947 GEN_VEXT_VX(vwadd_vx_w) 948 GEN_VEXT_VX(vwsub_vx_b) 949 GEN_VEXT_VX(vwsub_vx_h) 950 GEN_VEXT_VX(vwsub_vx_w) 951 GEN_VEXT_VX(vwaddu_wx_b) 952 GEN_VEXT_VX(vwaddu_wx_h) 953 GEN_VEXT_VX(vwaddu_wx_w) 954 GEN_VEXT_VX(vwsubu_wx_b) 955 GEN_VEXT_VX(vwsubu_wx_h) 956 GEN_VEXT_VX(vwsubu_wx_w) 957 GEN_VEXT_VX(vwadd_wx_b) 958 GEN_VEXT_VX(vwadd_wx_h) 959 GEN_VEXT_VX(vwadd_wx_w) 960 GEN_VEXT_VX(vwsub_wx_b) 961 GEN_VEXT_VX(vwsub_wx_h) 962 GEN_VEXT_VX(vwsub_wx_w) 963 964 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 965 #define DO_VADC(N, M, C) (N + M + C) 966 #define DO_VSBC(N, M, C) (N - M - C) 967 968 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 969 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 970 CPURISCVState *env, uint32_t desc) \ 971 { \ 972 uint32_t vl = env->vl; \ 973 uint32_t i; \ 974 \ 975 for (i = env->vstart; i < vl; i++) { \ 976 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 977 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 978 ETYPE carry = vext_elem_mask(v0, i); \ 979 \ 980 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 981 } \ 982 env->vstart = 0; \ 983 } 984 985 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 986 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 987 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 988 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 989 990 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 991 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 992 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 993 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 994 995 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 996 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 997 CPURISCVState *env, uint32_t desc) \ 998 { \ 999 uint32_t vl = env->vl; \ 1000 uint32_t i; \ 1001 \ 1002 for (i = env->vstart; i < vl; i++) { \ 1003 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1004 ETYPE carry = vext_elem_mask(v0, i); \ 1005 \ 1006 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1007 } \ 1008 env->vstart = 0; \ 1009 } 1010 1011 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1012 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1013 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1014 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1015 1016 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1017 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1018 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1019 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1020 1021 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1022 (__typeof(N))(N + M) < N) 1023 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1024 1025 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1026 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1027 CPURISCVState *env, uint32_t desc) \ 1028 { \ 1029 uint32_t vl = env->vl; \ 1030 uint32_t vm = vext_vm(desc); \ 1031 uint32_t i; \ 1032 \ 1033 for (i = env->vstart; i < vl; i++) { \ 1034 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1035 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1036 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1037 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1038 } \ 1039 env->vstart = 0; \ 1040 } 1041 1042 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1043 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1044 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1045 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1046 1047 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1048 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1049 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1050 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1051 1052 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1053 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1054 void *vs2, CPURISCVState *env, uint32_t desc) \ 1055 { \ 1056 uint32_t vl = env->vl; \ 1057 uint32_t vm = vext_vm(desc); \ 1058 uint32_t i; \ 1059 \ 1060 for (i = env->vstart; i < vl; i++) { \ 1061 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1062 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1063 vext_set_elem_mask(vd, i, \ 1064 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1065 } \ 1066 env->vstart = 0; \ 1067 } 1068 1069 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1070 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1072 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1073 1074 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1075 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1077 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1078 1079 /* Vector Bitwise Logical Instructions */ 1080 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1081 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1082 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1083 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1084 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1085 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1086 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1087 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1088 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1089 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1090 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1091 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1092 GEN_VEXT_VV(vand_vv_b) 1093 GEN_VEXT_VV(vand_vv_h) 1094 GEN_VEXT_VV(vand_vv_w) 1095 GEN_VEXT_VV(vand_vv_d) 1096 GEN_VEXT_VV(vor_vv_b) 1097 GEN_VEXT_VV(vor_vv_h) 1098 GEN_VEXT_VV(vor_vv_w) 1099 GEN_VEXT_VV(vor_vv_d) 1100 GEN_VEXT_VV(vxor_vv_b) 1101 GEN_VEXT_VV(vxor_vv_h) 1102 GEN_VEXT_VV(vxor_vv_w) 1103 GEN_VEXT_VV(vxor_vv_d) 1104 1105 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1106 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1107 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1108 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1109 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1110 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1111 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1112 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1113 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1114 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1115 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1116 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1117 GEN_VEXT_VX(vand_vx_b) 1118 GEN_VEXT_VX(vand_vx_h) 1119 GEN_VEXT_VX(vand_vx_w) 1120 GEN_VEXT_VX(vand_vx_d) 1121 GEN_VEXT_VX(vor_vx_b) 1122 GEN_VEXT_VX(vor_vx_h) 1123 GEN_VEXT_VX(vor_vx_w) 1124 GEN_VEXT_VX(vor_vx_d) 1125 GEN_VEXT_VX(vxor_vx_b) 1126 GEN_VEXT_VX(vxor_vx_h) 1127 GEN_VEXT_VX(vxor_vx_w) 1128 GEN_VEXT_VX(vxor_vx_d) 1129 1130 /* Vector Single-Width Bit Shift Instructions */ 1131 #define DO_SLL(N, M) (N << (M)) 1132 #define DO_SRL(N, M) (N >> (M)) 1133 1134 /* generate the helpers for shift instructions with two vector operators */ 1135 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1136 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1137 void *vs2, CPURISCVState *env, uint32_t desc) \ 1138 { \ 1139 uint32_t vm = vext_vm(desc); \ 1140 uint32_t vl = env->vl; \ 1141 uint32_t i; \ 1142 \ 1143 for (i = env->vstart; i < vl; i++) { \ 1144 if (!vm && !vext_elem_mask(v0, i)) { \ 1145 continue; \ 1146 } \ 1147 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1148 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1149 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1150 } \ 1151 env->vstart = 0; \ 1152 } 1153 1154 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1155 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1156 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1157 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1158 1159 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1160 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1161 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1162 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1163 1164 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1165 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1166 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1167 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1168 1169 /* generate the helpers for shift instructions with one vector and one scalar */ 1170 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1171 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1172 void *vs2, CPURISCVState *env, uint32_t desc) \ 1173 { \ 1174 uint32_t vm = vext_vm(desc); \ 1175 uint32_t vl = env->vl; \ 1176 uint32_t i; \ 1177 \ 1178 for (i = env->vstart; i < vl; i++) { \ 1179 if (!vm && !vext_elem_mask(v0, i)) { \ 1180 continue; \ 1181 } \ 1182 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1183 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1184 } \ 1185 env->vstart = 0; \ 1186 } 1187 1188 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1189 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1190 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1191 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1192 1193 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1194 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1195 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1196 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1197 1198 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1199 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1200 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1201 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1202 1203 /* Vector Narrowing Integer Right Shift Instructions */ 1204 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1205 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1206 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1207 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1208 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1209 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1210 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1211 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1212 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1213 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1214 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1215 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1216 1217 /* Vector Integer Comparison Instructions */ 1218 #define DO_MSEQ(N, M) (N == M) 1219 #define DO_MSNE(N, M) (N != M) 1220 #define DO_MSLT(N, M) (N < M) 1221 #define DO_MSLE(N, M) (N <= M) 1222 #define DO_MSGT(N, M) (N > M) 1223 1224 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1225 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1226 CPURISCVState *env, uint32_t desc) \ 1227 { \ 1228 uint32_t vm = vext_vm(desc); \ 1229 uint32_t vl = env->vl; \ 1230 uint32_t i; \ 1231 \ 1232 for (i = env->vstart; i < vl; i++) { \ 1233 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1234 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1235 if (!vm && !vext_elem_mask(v0, i)) { \ 1236 continue; \ 1237 } \ 1238 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1239 } \ 1240 env->vstart = 0; \ 1241 } 1242 1243 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1244 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1245 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1246 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1247 1248 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1249 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1250 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1251 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1252 1253 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1254 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1255 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1256 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1257 1258 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1259 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1260 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1261 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1262 1263 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1264 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1265 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1266 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1267 1268 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1269 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1270 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1271 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1272 1273 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1274 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1275 CPURISCVState *env, uint32_t desc) \ 1276 { \ 1277 uint32_t vm = vext_vm(desc); \ 1278 uint32_t vl = env->vl; \ 1279 uint32_t i; \ 1280 \ 1281 for (i = env->vstart; i < vl; i++) { \ 1282 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1283 if (!vm && !vext_elem_mask(v0, i)) { \ 1284 continue; \ 1285 } \ 1286 vext_set_elem_mask(vd, i, \ 1287 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1288 } \ 1289 env->vstart = 0; \ 1290 } 1291 1292 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1293 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1294 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1295 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1296 1297 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1298 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1299 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1300 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1301 1302 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1303 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1304 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1305 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1306 1307 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1308 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1309 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1310 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1311 1312 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1313 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1314 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1315 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1316 1317 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1318 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1319 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1320 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1321 1322 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1323 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1324 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1325 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1326 1327 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1328 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1329 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1330 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1331 1332 /* Vector Integer Min/Max Instructions */ 1333 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1334 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1335 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1336 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1337 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1338 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1339 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1340 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1341 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1342 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1343 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1344 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1345 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1346 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1347 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1348 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1349 GEN_VEXT_VV(vminu_vv_b) 1350 GEN_VEXT_VV(vminu_vv_h) 1351 GEN_VEXT_VV(vminu_vv_w) 1352 GEN_VEXT_VV(vminu_vv_d) 1353 GEN_VEXT_VV(vmin_vv_b) 1354 GEN_VEXT_VV(vmin_vv_h) 1355 GEN_VEXT_VV(vmin_vv_w) 1356 GEN_VEXT_VV(vmin_vv_d) 1357 GEN_VEXT_VV(vmaxu_vv_b) 1358 GEN_VEXT_VV(vmaxu_vv_h) 1359 GEN_VEXT_VV(vmaxu_vv_w) 1360 GEN_VEXT_VV(vmaxu_vv_d) 1361 GEN_VEXT_VV(vmax_vv_b) 1362 GEN_VEXT_VV(vmax_vv_h) 1363 GEN_VEXT_VV(vmax_vv_w) 1364 GEN_VEXT_VV(vmax_vv_d) 1365 1366 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1367 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1368 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1369 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1370 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1371 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1372 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1373 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1374 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1375 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1376 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1377 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1378 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1379 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1380 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1381 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1382 GEN_VEXT_VX(vminu_vx_b) 1383 GEN_VEXT_VX(vminu_vx_h) 1384 GEN_VEXT_VX(vminu_vx_w) 1385 GEN_VEXT_VX(vminu_vx_d) 1386 GEN_VEXT_VX(vmin_vx_b) 1387 GEN_VEXT_VX(vmin_vx_h) 1388 GEN_VEXT_VX(vmin_vx_w) 1389 GEN_VEXT_VX(vmin_vx_d) 1390 GEN_VEXT_VX(vmaxu_vx_b) 1391 GEN_VEXT_VX(vmaxu_vx_h) 1392 GEN_VEXT_VX(vmaxu_vx_w) 1393 GEN_VEXT_VX(vmaxu_vx_d) 1394 GEN_VEXT_VX(vmax_vx_b) 1395 GEN_VEXT_VX(vmax_vx_h) 1396 GEN_VEXT_VX(vmax_vx_w) 1397 GEN_VEXT_VX(vmax_vx_d) 1398 1399 /* Vector Single-Width Integer Multiply Instructions */ 1400 #define DO_MUL(N, M) (N * M) 1401 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1402 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1403 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1404 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1405 GEN_VEXT_VV(vmul_vv_b) 1406 GEN_VEXT_VV(vmul_vv_h) 1407 GEN_VEXT_VV(vmul_vv_w) 1408 GEN_VEXT_VV(vmul_vv_d) 1409 1410 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1411 { 1412 return (int16_t)s2 * (int16_t)s1 >> 8; 1413 } 1414 1415 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1416 { 1417 return (int32_t)s2 * (int32_t)s1 >> 16; 1418 } 1419 1420 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1421 { 1422 return (int64_t)s2 * (int64_t)s1 >> 32; 1423 } 1424 1425 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1426 { 1427 uint64_t hi_64, lo_64; 1428 1429 muls64(&lo_64, &hi_64, s1, s2); 1430 return hi_64; 1431 } 1432 1433 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1434 { 1435 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1436 } 1437 1438 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1439 { 1440 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1441 } 1442 1443 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1444 { 1445 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1446 } 1447 1448 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1449 { 1450 uint64_t hi_64, lo_64; 1451 1452 mulu64(&lo_64, &hi_64, s2, s1); 1453 return hi_64; 1454 } 1455 1456 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1457 { 1458 return (int16_t)s2 * (uint16_t)s1 >> 8; 1459 } 1460 1461 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1462 { 1463 return (int32_t)s2 * (uint32_t)s1 >> 16; 1464 } 1465 1466 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1467 { 1468 return (int64_t)s2 * (uint64_t)s1 >> 32; 1469 } 1470 1471 /* 1472 * Let A = signed operand, 1473 * B = unsigned operand 1474 * P = mulu64(A, B), unsigned product 1475 * 1476 * LET X = 2 ** 64 - A, 2's complement of A 1477 * SP = signed product 1478 * THEN 1479 * IF A < 0 1480 * SP = -X * B 1481 * = -(2 ** 64 - A) * B 1482 * = A * B - 2 ** 64 * B 1483 * = P - 2 ** 64 * B 1484 * ELSE 1485 * SP = P 1486 * THEN 1487 * HI_P -= (A < 0 ? B : 0) 1488 */ 1489 1490 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1491 { 1492 uint64_t hi_64, lo_64; 1493 1494 mulu64(&lo_64, &hi_64, s2, s1); 1495 1496 hi_64 -= s2 < 0 ? s1 : 0; 1497 return hi_64; 1498 } 1499 1500 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1501 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1502 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1503 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1504 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1505 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1506 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1507 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1508 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1509 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1510 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1511 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1512 GEN_VEXT_VV(vmulh_vv_b) 1513 GEN_VEXT_VV(vmulh_vv_h) 1514 GEN_VEXT_VV(vmulh_vv_w) 1515 GEN_VEXT_VV(vmulh_vv_d) 1516 GEN_VEXT_VV(vmulhu_vv_b) 1517 GEN_VEXT_VV(vmulhu_vv_h) 1518 GEN_VEXT_VV(vmulhu_vv_w) 1519 GEN_VEXT_VV(vmulhu_vv_d) 1520 GEN_VEXT_VV(vmulhsu_vv_b) 1521 GEN_VEXT_VV(vmulhsu_vv_h) 1522 GEN_VEXT_VV(vmulhsu_vv_w) 1523 GEN_VEXT_VV(vmulhsu_vv_d) 1524 1525 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1526 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1527 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1528 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1529 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1530 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1531 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1532 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1533 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1534 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1535 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1536 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1537 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1538 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1539 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1540 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1541 GEN_VEXT_VX(vmul_vx_b) 1542 GEN_VEXT_VX(vmul_vx_h) 1543 GEN_VEXT_VX(vmul_vx_w) 1544 GEN_VEXT_VX(vmul_vx_d) 1545 GEN_VEXT_VX(vmulh_vx_b) 1546 GEN_VEXT_VX(vmulh_vx_h) 1547 GEN_VEXT_VX(vmulh_vx_w) 1548 GEN_VEXT_VX(vmulh_vx_d) 1549 GEN_VEXT_VX(vmulhu_vx_b) 1550 GEN_VEXT_VX(vmulhu_vx_h) 1551 GEN_VEXT_VX(vmulhu_vx_w) 1552 GEN_VEXT_VX(vmulhu_vx_d) 1553 GEN_VEXT_VX(vmulhsu_vx_b) 1554 GEN_VEXT_VX(vmulhsu_vx_h) 1555 GEN_VEXT_VX(vmulhsu_vx_w) 1556 GEN_VEXT_VX(vmulhsu_vx_d) 1557 1558 /* Vector Integer Divide Instructions */ 1559 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1560 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1561 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1562 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1563 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1564 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1565 1566 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1567 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1568 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1569 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1570 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1571 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1572 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1573 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1574 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1575 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1576 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1577 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1578 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1579 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1580 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1581 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1582 GEN_VEXT_VV(vdivu_vv_b) 1583 GEN_VEXT_VV(vdivu_vv_h) 1584 GEN_VEXT_VV(vdivu_vv_w) 1585 GEN_VEXT_VV(vdivu_vv_d) 1586 GEN_VEXT_VV(vdiv_vv_b) 1587 GEN_VEXT_VV(vdiv_vv_h) 1588 GEN_VEXT_VV(vdiv_vv_w) 1589 GEN_VEXT_VV(vdiv_vv_d) 1590 GEN_VEXT_VV(vremu_vv_b) 1591 GEN_VEXT_VV(vremu_vv_h) 1592 GEN_VEXT_VV(vremu_vv_w) 1593 GEN_VEXT_VV(vremu_vv_d) 1594 GEN_VEXT_VV(vrem_vv_b) 1595 GEN_VEXT_VV(vrem_vv_h) 1596 GEN_VEXT_VV(vrem_vv_w) 1597 GEN_VEXT_VV(vrem_vv_d) 1598 1599 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1600 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1601 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1602 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1603 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1604 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1605 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1606 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1607 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1608 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1609 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1610 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1611 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1612 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1613 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1614 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1615 GEN_VEXT_VX(vdivu_vx_b) 1616 GEN_VEXT_VX(vdivu_vx_h) 1617 GEN_VEXT_VX(vdivu_vx_w) 1618 GEN_VEXT_VX(vdivu_vx_d) 1619 GEN_VEXT_VX(vdiv_vx_b) 1620 GEN_VEXT_VX(vdiv_vx_h) 1621 GEN_VEXT_VX(vdiv_vx_w) 1622 GEN_VEXT_VX(vdiv_vx_d) 1623 GEN_VEXT_VX(vremu_vx_b) 1624 GEN_VEXT_VX(vremu_vx_h) 1625 GEN_VEXT_VX(vremu_vx_w) 1626 GEN_VEXT_VX(vremu_vx_d) 1627 GEN_VEXT_VX(vrem_vx_b) 1628 GEN_VEXT_VX(vrem_vx_h) 1629 GEN_VEXT_VX(vrem_vx_w) 1630 GEN_VEXT_VX(vrem_vx_d) 1631 1632 /* Vector Widening Integer Multiply Instructions */ 1633 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1634 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1635 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1636 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1637 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1638 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1639 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1640 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1641 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1642 GEN_VEXT_VV(vwmul_vv_b) 1643 GEN_VEXT_VV(vwmul_vv_h) 1644 GEN_VEXT_VV(vwmul_vv_w) 1645 GEN_VEXT_VV(vwmulu_vv_b) 1646 GEN_VEXT_VV(vwmulu_vv_h) 1647 GEN_VEXT_VV(vwmulu_vv_w) 1648 GEN_VEXT_VV(vwmulsu_vv_b) 1649 GEN_VEXT_VV(vwmulsu_vv_h) 1650 GEN_VEXT_VV(vwmulsu_vv_w) 1651 1652 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1653 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1654 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1655 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1656 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1657 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1658 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1659 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1660 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1661 GEN_VEXT_VX(vwmul_vx_b) 1662 GEN_VEXT_VX(vwmul_vx_h) 1663 GEN_VEXT_VX(vwmul_vx_w) 1664 GEN_VEXT_VX(vwmulu_vx_b) 1665 GEN_VEXT_VX(vwmulu_vx_h) 1666 GEN_VEXT_VX(vwmulu_vx_w) 1667 GEN_VEXT_VX(vwmulsu_vx_b) 1668 GEN_VEXT_VX(vwmulsu_vx_h) 1669 GEN_VEXT_VX(vwmulsu_vx_w) 1670 1671 /* Vector Single-Width Integer Multiply-Add Instructions */ 1672 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1673 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1674 { \ 1675 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1676 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1677 TD d = *((TD *)vd + HD(i)); \ 1678 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1679 } 1680 1681 #define DO_MACC(N, M, D) (M * N + D) 1682 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1683 #define DO_MADD(N, M, D) (M * D + N) 1684 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1685 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1686 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1687 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1688 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1689 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1690 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1691 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1692 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1693 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1694 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1695 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1696 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1697 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1698 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1699 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1700 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1701 GEN_VEXT_VV(vmacc_vv_b) 1702 GEN_VEXT_VV(vmacc_vv_h) 1703 GEN_VEXT_VV(vmacc_vv_w) 1704 GEN_VEXT_VV(vmacc_vv_d) 1705 GEN_VEXT_VV(vnmsac_vv_b) 1706 GEN_VEXT_VV(vnmsac_vv_h) 1707 GEN_VEXT_VV(vnmsac_vv_w) 1708 GEN_VEXT_VV(vnmsac_vv_d) 1709 GEN_VEXT_VV(vmadd_vv_b) 1710 GEN_VEXT_VV(vmadd_vv_h) 1711 GEN_VEXT_VV(vmadd_vv_w) 1712 GEN_VEXT_VV(vmadd_vv_d) 1713 GEN_VEXT_VV(vnmsub_vv_b) 1714 GEN_VEXT_VV(vnmsub_vv_h) 1715 GEN_VEXT_VV(vnmsub_vv_w) 1716 GEN_VEXT_VV(vnmsub_vv_d) 1717 1718 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1719 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1720 { \ 1721 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1722 TD d = *((TD *)vd + HD(i)); \ 1723 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1724 } 1725 1726 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1727 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1728 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1729 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1730 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1731 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1732 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1733 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1734 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1735 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1736 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1737 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1738 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1739 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1740 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1741 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1742 GEN_VEXT_VX(vmacc_vx_b) 1743 GEN_VEXT_VX(vmacc_vx_h) 1744 GEN_VEXT_VX(vmacc_vx_w) 1745 GEN_VEXT_VX(vmacc_vx_d) 1746 GEN_VEXT_VX(vnmsac_vx_b) 1747 GEN_VEXT_VX(vnmsac_vx_h) 1748 GEN_VEXT_VX(vnmsac_vx_w) 1749 GEN_VEXT_VX(vnmsac_vx_d) 1750 GEN_VEXT_VX(vmadd_vx_b) 1751 GEN_VEXT_VX(vmadd_vx_h) 1752 GEN_VEXT_VX(vmadd_vx_w) 1753 GEN_VEXT_VX(vmadd_vx_d) 1754 GEN_VEXT_VX(vnmsub_vx_b) 1755 GEN_VEXT_VX(vnmsub_vx_h) 1756 GEN_VEXT_VX(vnmsub_vx_w) 1757 GEN_VEXT_VX(vnmsub_vx_d) 1758 1759 /* Vector Widening Integer Multiply-Add Instructions */ 1760 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1761 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1762 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1763 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1764 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1765 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1766 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1767 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1768 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1769 GEN_VEXT_VV(vwmaccu_vv_b) 1770 GEN_VEXT_VV(vwmaccu_vv_h) 1771 GEN_VEXT_VV(vwmaccu_vv_w) 1772 GEN_VEXT_VV(vwmacc_vv_b) 1773 GEN_VEXT_VV(vwmacc_vv_h) 1774 GEN_VEXT_VV(vwmacc_vv_w) 1775 GEN_VEXT_VV(vwmaccsu_vv_b) 1776 GEN_VEXT_VV(vwmaccsu_vv_h) 1777 GEN_VEXT_VV(vwmaccsu_vv_w) 1778 1779 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1780 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1781 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1782 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1783 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1784 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1785 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1786 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1787 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1788 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1789 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1790 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1791 GEN_VEXT_VX(vwmaccu_vx_b) 1792 GEN_VEXT_VX(vwmaccu_vx_h) 1793 GEN_VEXT_VX(vwmaccu_vx_w) 1794 GEN_VEXT_VX(vwmacc_vx_b) 1795 GEN_VEXT_VX(vwmacc_vx_h) 1796 GEN_VEXT_VX(vwmacc_vx_w) 1797 GEN_VEXT_VX(vwmaccsu_vx_b) 1798 GEN_VEXT_VX(vwmaccsu_vx_h) 1799 GEN_VEXT_VX(vwmaccsu_vx_w) 1800 GEN_VEXT_VX(vwmaccus_vx_b) 1801 GEN_VEXT_VX(vwmaccus_vx_h) 1802 GEN_VEXT_VX(vwmaccus_vx_w) 1803 1804 /* Vector Integer Merge and Move Instructions */ 1805 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1806 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1807 uint32_t desc) \ 1808 { \ 1809 uint32_t vl = env->vl; \ 1810 uint32_t i; \ 1811 \ 1812 for (i = env->vstart; i < vl; i++) { \ 1813 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1814 *((ETYPE *)vd + H(i)) = s1; \ 1815 } \ 1816 env->vstart = 0; \ 1817 } 1818 1819 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1820 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1821 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1822 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1823 1824 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1825 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1826 uint32_t desc) \ 1827 { \ 1828 uint32_t vl = env->vl; \ 1829 uint32_t i; \ 1830 \ 1831 for (i = env->vstart; i < vl; i++) { \ 1832 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1833 } \ 1834 env->vstart = 0; \ 1835 } 1836 1837 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1838 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1839 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1840 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1841 1842 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1843 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1844 CPURISCVState *env, uint32_t desc) \ 1845 { \ 1846 uint32_t vl = env->vl; \ 1847 uint32_t i; \ 1848 \ 1849 for (i = env->vstart; i < vl; i++) { \ 1850 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1851 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1852 } \ 1853 env->vstart = 0; \ 1854 } 1855 1856 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1857 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1860 1861 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1862 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1863 void *vs2, CPURISCVState *env, uint32_t desc) \ 1864 { \ 1865 uint32_t vl = env->vl; \ 1866 uint32_t i; \ 1867 \ 1868 for (i = env->vstart; i < vl; i++) { \ 1869 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1870 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1871 (ETYPE)(target_long)s1); \ 1872 *((ETYPE *)vd + H(i)) = d; \ 1873 } \ 1874 env->vstart = 0; \ 1875 } 1876 1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1878 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1880 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1881 1882 /* 1883 *** Vector Fixed-Point Arithmetic Instructions 1884 */ 1885 1886 /* Vector Single-Width Saturating Add and Subtract */ 1887 1888 /* 1889 * As fixed point instructions probably have round mode and saturation, 1890 * define common macros for fixed point here. 1891 */ 1892 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1893 CPURISCVState *env, int vxrm); 1894 1895 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1896 static inline void \ 1897 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1898 CPURISCVState *env, int vxrm) \ 1899 { \ 1900 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1901 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1902 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1903 } 1904 1905 static inline void 1906 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1907 CPURISCVState *env, 1908 uint32_t vl, uint32_t vm, int vxrm, 1909 opivv2_rm_fn *fn) 1910 { 1911 for (uint32_t i = env->vstart; i < vl; i++) { 1912 if (!vm && !vext_elem_mask(v0, i)) { 1913 continue; 1914 } 1915 fn(vd, vs1, vs2, i, env, vxrm); 1916 } 1917 env->vstart = 0; 1918 } 1919 1920 static inline void 1921 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1922 CPURISCVState *env, 1923 uint32_t desc, 1924 opivv2_rm_fn *fn) 1925 { 1926 uint32_t vm = vext_vm(desc); 1927 uint32_t vl = env->vl; 1928 1929 switch (env->vxrm) { 1930 case 0: /* rnu */ 1931 vext_vv_rm_1(vd, v0, vs1, vs2, 1932 env, vl, vm, 0, fn); 1933 break; 1934 case 1: /* rne */ 1935 vext_vv_rm_1(vd, v0, vs1, vs2, 1936 env, vl, vm, 1, fn); 1937 break; 1938 case 2: /* rdn */ 1939 vext_vv_rm_1(vd, v0, vs1, vs2, 1940 env, vl, vm, 2, fn); 1941 break; 1942 default: /* rod */ 1943 vext_vv_rm_1(vd, v0, vs1, vs2, 1944 env, vl, vm, 3, fn); 1945 break; 1946 } 1947 } 1948 1949 /* generate helpers for fixed point instructions with OPIVV format */ 1950 #define GEN_VEXT_VV_RM(NAME) \ 1951 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1952 CPURISCVState *env, uint32_t desc) \ 1953 { \ 1954 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1955 do_##NAME); \ 1956 } 1957 1958 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1959 { 1960 uint8_t res = a + b; 1961 if (res < a) { 1962 res = UINT8_MAX; 1963 env->vxsat = 0x1; 1964 } 1965 return res; 1966 } 1967 1968 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1969 uint16_t b) 1970 { 1971 uint16_t res = a + b; 1972 if (res < a) { 1973 res = UINT16_MAX; 1974 env->vxsat = 0x1; 1975 } 1976 return res; 1977 } 1978 1979 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1980 uint32_t b) 1981 { 1982 uint32_t res = a + b; 1983 if (res < a) { 1984 res = UINT32_MAX; 1985 env->vxsat = 0x1; 1986 } 1987 return res; 1988 } 1989 1990 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1991 uint64_t b) 1992 { 1993 uint64_t res = a + b; 1994 if (res < a) { 1995 res = UINT64_MAX; 1996 env->vxsat = 0x1; 1997 } 1998 return res; 1999 } 2000 2001 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2002 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2003 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2004 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2005 GEN_VEXT_VV_RM(vsaddu_vv_b) 2006 GEN_VEXT_VV_RM(vsaddu_vv_h) 2007 GEN_VEXT_VV_RM(vsaddu_vv_w) 2008 GEN_VEXT_VV_RM(vsaddu_vv_d) 2009 2010 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2011 CPURISCVState *env, int vxrm); 2012 2013 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2014 static inline void \ 2015 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2016 CPURISCVState *env, int vxrm) \ 2017 { \ 2018 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2019 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2020 } 2021 2022 static inline void 2023 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2024 CPURISCVState *env, 2025 uint32_t vl, uint32_t vm, int vxrm, 2026 opivx2_rm_fn *fn) 2027 { 2028 for (uint32_t i = env->vstart; i < vl; i++) { 2029 if (!vm && !vext_elem_mask(v0, i)) { 2030 continue; 2031 } 2032 fn(vd, s1, vs2, i, env, vxrm); 2033 } 2034 env->vstart = 0; 2035 } 2036 2037 static inline void 2038 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2039 CPURISCVState *env, 2040 uint32_t desc, 2041 opivx2_rm_fn *fn) 2042 { 2043 uint32_t vm = vext_vm(desc); 2044 uint32_t vl = env->vl; 2045 2046 switch (env->vxrm) { 2047 case 0: /* rnu */ 2048 vext_vx_rm_1(vd, v0, s1, vs2, 2049 env, vl, vm, 0, fn); 2050 break; 2051 case 1: /* rne */ 2052 vext_vx_rm_1(vd, v0, s1, vs2, 2053 env, vl, vm, 1, fn); 2054 break; 2055 case 2: /* rdn */ 2056 vext_vx_rm_1(vd, v0, s1, vs2, 2057 env, vl, vm, 2, fn); 2058 break; 2059 default: /* rod */ 2060 vext_vx_rm_1(vd, v0, s1, vs2, 2061 env, vl, vm, 3, fn); 2062 break; 2063 } 2064 } 2065 2066 /* generate helpers for fixed point instructions with OPIVX format */ 2067 #define GEN_VEXT_VX_RM(NAME) \ 2068 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2069 void *vs2, CPURISCVState *env, uint32_t desc) \ 2070 { \ 2071 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2072 do_##NAME); \ 2073 } 2074 2075 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2076 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2077 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2078 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2079 GEN_VEXT_VX_RM(vsaddu_vx_b) 2080 GEN_VEXT_VX_RM(vsaddu_vx_h) 2081 GEN_VEXT_VX_RM(vsaddu_vx_w) 2082 GEN_VEXT_VX_RM(vsaddu_vx_d) 2083 2084 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2085 { 2086 int8_t res = a + b; 2087 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2088 res = a > 0 ? INT8_MAX : INT8_MIN; 2089 env->vxsat = 0x1; 2090 } 2091 return res; 2092 } 2093 2094 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2095 { 2096 int16_t res = a + b; 2097 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2098 res = a > 0 ? INT16_MAX : INT16_MIN; 2099 env->vxsat = 0x1; 2100 } 2101 return res; 2102 } 2103 2104 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2105 { 2106 int32_t res = a + b; 2107 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2108 res = a > 0 ? INT32_MAX : INT32_MIN; 2109 env->vxsat = 0x1; 2110 } 2111 return res; 2112 } 2113 2114 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2115 { 2116 int64_t res = a + b; 2117 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2118 res = a > 0 ? INT64_MAX : INT64_MIN; 2119 env->vxsat = 0x1; 2120 } 2121 return res; 2122 } 2123 2124 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2125 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2126 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2127 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2128 GEN_VEXT_VV_RM(vsadd_vv_b) 2129 GEN_VEXT_VV_RM(vsadd_vv_h) 2130 GEN_VEXT_VV_RM(vsadd_vv_w) 2131 GEN_VEXT_VV_RM(vsadd_vv_d) 2132 2133 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2134 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2135 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2136 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2137 GEN_VEXT_VX_RM(vsadd_vx_b) 2138 GEN_VEXT_VX_RM(vsadd_vx_h) 2139 GEN_VEXT_VX_RM(vsadd_vx_w) 2140 GEN_VEXT_VX_RM(vsadd_vx_d) 2141 2142 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2143 { 2144 uint8_t res = a - b; 2145 if (res > a) { 2146 res = 0; 2147 env->vxsat = 0x1; 2148 } 2149 return res; 2150 } 2151 2152 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2153 uint16_t b) 2154 { 2155 uint16_t res = a - b; 2156 if (res > a) { 2157 res = 0; 2158 env->vxsat = 0x1; 2159 } 2160 return res; 2161 } 2162 2163 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2164 uint32_t b) 2165 { 2166 uint32_t res = a - b; 2167 if (res > a) { 2168 res = 0; 2169 env->vxsat = 0x1; 2170 } 2171 return res; 2172 } 2173 2174 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2175 uint64_t b) 2176 { 2177 uint64_t res = a - b; 2178 if (res > a) { 2179 res = 0; 2180 env->vxsat = 0x1; 2181 } 2182 return res; 2183 } 2184 2185 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2186 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2187 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2188 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2189 GEN_VEXT_VV_RM(vssubu_vv_b) 2190 GEN_VEXT_VV_RM(vssubu_vv_h) 2191 GEN_VEXT_VV_RM(vssubu_vv_w) 2192 GEN_VEXT_VV_RM(vssubu_vv_d) 2193 2194 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2195 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2196 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2197 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2198 GEN_VEXT_VX_RM(vssubu_vx_b) 2199 GEN_VEXT_VX_RM(vssubu_vx_h) 2200 GEN_VEXT_VX_RM(vssubu_vx_w) 2201 GEN_VEXT_VX_RM(vssubu_vx_d) 2202 2203 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2204 { 2205 int8_t res = a - b; 2206 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2207 res = a >= 0 ? INT8_MAX : INT8_MIN; 2208 env->vxsat = 0x1; 2209 } 2210 return res; 2211 } 2212 2213 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2214 { 2215 int16_t res = a - b; 2216 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2217 res = a >= 0 ? INT16_MAX : INT16_MIN; 2218 env->vxsat = 0x1; 2219 } 2220 return res; 2221 } 2222 2223 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2224 { 2225 int32_t res = a - b; 2226 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2227 res = a >= 0 ? INT32_MAX : INT32_MIN; 2228 env->vxsat = 0x1; 2229 } 2230 return res; 2231 } 2232 2233 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2234 { 2235 int64_t res = a - b; 2236 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2237 res = a >= 0 ? INT64_MAX : INT64_MIN; 2238 env->vxsat = 0x1; 2239 } 2240 return res; 2241 } 2242 2243 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2244 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2245 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2246 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2247 GEN_VEXT_VV_RM(vssub_vv_b) 2248 GEN_VEXT_VV_RM(vssub_vv_h) 2249 GEN_VEXT_VV_RM(vssub_vv_w) 2250 GEN_VEXT_VV_RM(vssub_vv_d) 2251 2252 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2253 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2254 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2255 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2256 GEN_VEXT_VX_RM(vssub_vx_b) 2257 GEN_VEXT_VX_RM(vssub_vx_h) 2258 GEN_VEXT_VX_RM(vssub_vx_w) 2259 GEN_VEXT_VX_RM(vssub_vx_d) 2260 2261 /* Vector Single-Width Averaging Add and Subtract */ 2262 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2263 { 2264 uint8_t d = extract64(v, shift, 1); 2265 uint8_t d1; 2266 uint64_t D1, D2; 2267 2268 if (shift == 0 || shift > 64) { 2269 return 0; 2270 } 2271 2272 d1 = extract64(v, shift - 1, 1); 2273 D1 = extract64(v, 0, shift); 2274 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2275 return d1; 2276 } else if (vxrm == 1) { /* round-to-nearest-even */ 2277 if (shift > 1) { 2278 D2 = extract64(v, 0, shift - 1); 2279 return d1 & ((D2 != 0) | d); 2280 } else { 2281 return d1 & d; 2282 } 2283 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2284 return !d & (D1 != 0); 2285 } 2286 return 0; /* round-down (truncate) */ 2287 } 2288 2289 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2290 { 2291 int64_t res = (int64_t)a + b; 2292 uint8_t round = get_round(vxrm, res, 1); 2293 2294 return (res >> 1) + round; 2295 } 2296 2297 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2298 { 2299 int64_t res = a + b; 2300 uint8_t round = get_round(vxrm, res, 1); 2301 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2302 2303 /* With signed overflow, bit 64 is inverse of bit 63. */ 2304 return ((res >> 1) ^ over) + round; 2305 } 2306 2307 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2308 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2309 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2310 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2311 GEN_VEXT_VV_RM(vaadd_vv_b) 2312 GEN_VEXT_VV_RM(vaadd_vv_h) 2313 GEN_VEXT_VV_RM(vaadd_vv_w) 2314 GEN_VEXT_VV_RM(vaadd_vv_d) 2315 2316 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2317 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2318 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2319 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2320 GEN_VEXT_VX_RM(vaadd_vx_b) 2321 GEN_VEXT_VX_RM(vaadd_vx_h) 2322 GEN_VEXT_VX_RM(vaadd_vx_w) 2323 GEN_VEXT_VX_RM(vaadd_vx_d) 2324 2325 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2326 uint32_t a, uint32_t b) 2327 { 2328 uint64_t res = (uint64_t)a + b; 2329 uint8_t round = get_round(vxrm, res, 1); 2330 2331 return (res >> 1) + round; 2332 } 2333 2334 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2335 uint64_t a, uint64_t b) 2336 { 2337 uint64_t res = a + b; 2338 uint8_t round = get_round(vxrm, res, 1); 2339 uint64_t over = (uint64_t)(res < a) << 63; 2340 2341 return ((res >> 1) | over) + round; 2342 } 2343 2344 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2345 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2346 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2347 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2348 GEN_VEXT_VV_RM(vaaddu_vv_b) 2349 GEN_VEXT_VV_RM(vaaddu_vv_h) 2350 GEN_VEXT_VV_RM(vaaddu_vv_w) 2351 GEN_VEXT_VV_RM(vaaddu_vv_d) 2352 2353 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2354 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2355 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2356 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2357 GEN_VEXT_VX_RM(vaaddu_vx_b) 2358 GEN_VEXT_VX_RM(vaaddu_vx_h) 2359 GEN_VEXT_VX_RM(vaaddu_vx_w) 2360 GEN_VEXT_VX_RM(vaaddu_vx_d) 2361 2362 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2363 { 2364 int64_t res = (int64_t)a - b; 2365 uint8_t round = get_round(vxrm, res, 1); 2366 2367 return (res >> 1) + round; 2368 } 2369 2370 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2371 { 2372 int64_t res = (int64_t)a - b; 2373 uint8_t round = get_round(vxrm, res, 1); 2374 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2375 2376 /* With signed overflow, bit 64 is inverse of bit 63. */ 2377 return ((res >> 1) ^ over) + round; 2378 } 2379 2380 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2381 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2382 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2383 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2384 GEN_VEXT_VV_RM(vasub_vv_b) 2385 GEN_VEXT_VV_RM(vasub_vv_h) 2386 GEN_VEXT_VV_RM(vasub_vv_w) 2387 GEN_VEXT_VV_RM(vasub_vv_d) 2388 2389 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2390 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2391 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2392 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2393 GEN_VEXT_VX_RM(vasub_vx_b) 2394 GEN_VEXT_VX_RM(vasub_vx_h) 2395 GEN_VEXT_VX_RM(vasub_vx_w) 2396 GEN_VEXT_VX_RM(vasub_vx_d) 2397 2398 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2399 uint32_t a, uint32_t b) 2400 { 2401 int64_t res = (int64_t)a - b; 2402 uint8_t round = get_round(vxrm, res, 1); 2403 2404 return (res >> 1) + round; 2405 } 2406 2407 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2408 uint64_t a, uint64_t b) 2409 { 2410 uint64_t res = (uint64_t)a - b; 2411 uint8_t round = get_round(vxrm, res, 1); 2412 uint64_t over = (uint64_t)(res > a) << 63; 2413 2414 return ((res >> 1) | over) + round; 2415 } 2416 2417 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2418 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2419 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2420 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2421 GEN_VEXT_VV_RM(vasubu_vv_b) 2422 GEN_VEXT_VV_RM(vasubu_vv_h) 2423 GEN_VEXT_VV_RM(vasubu_vv_w) 2424 GEN_VEXT_VV_RM(vasubu_vv_d) 2425 2426 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2427 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2428 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2429 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2430 GEN_VEXT_VX_RM(vasubu_vx_b) 2431 GEN_VEXT_VX_RM(vasubu_vx_h) 2432 GEN_VEXT_VX_RM(vasubu_vx_w) 2433 GEN_VEXT_VX_RM(vasubu_vx_d) 2434 2435 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2436 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2437 { 2438 uint8_t round; 2439 int16_t res; 2440 2441 res = (int16_t)a * (int16_t)b; 2442 round = get_round(vxrm, res, 7); 2443 res = (res >> 7) + round; 2444 2445 if (res > INT8_MAX) { 2446 env->vxsat = 0x1; 2447 return INT8_MAX; 2448 } else if (res < INT8_MIN) { 2449 env->vxsat = 0x1; 2450 return INT8_MIN; 2451 } else { 2452 return res; 2453 } 2454 } 2455 2456 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2457 { 2458 uint8_t round; 2459 int32_t res; 2460 2461 res = (int32_t)a * (int32_t)b; 2462 round = get_round(vxrm, res, 15); 2463 res = (res >> 15) + round; 2464 2465 if (res > INT16_MAX) { 2466 env->vxsat = 0x1; 2467 return INT16_MAX; 2468 } else if (res < INT16_MIN) { 2469 env->vxsat = 0x1; 2470 return INT16_MIN; 2471 } else { 2472 return res; 2473 } 2474 } 2475 2476 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2477 { 2478 uint8_t round; 2479 int64_t res; 2480 2481 res = (int64_t)a * (int64_t)b; 2482 round = get_round(vxrm, res, 31); 2483 res = (res >> 31) + round; 2484 2485 if (res > INT32_MAX) { 2486 env->vxsat = 0x1; 2487 return INT32_MAX; 2488 } else if (res < INT32_MIN) { 2489 env->vxsat = 0x1; 2490 return INT32_MIN; 2491 } else { 2492 return res; 2493 } 2494 } 2495 2496 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2497 { 2498 uint8_t round; 2499 uint64_t hi_64, lo_64; 2500 int64_t res; 2501 2502 if (a == INT64_MIN && b == INT64_MIN) { 2503 env->vxsat = 1; 2504 return INT64_MAX; 2505 } 2506 2507 muls64(&lo_64, &hi_64, a, b); 2508 round = get_round(vxrm, lo_64, 63); 2509 /* 2510 * Cannot overflow, as there are always 2511 * 2 sign bits after multiply. 2512 */ 2513 res = (hi_64 << 1) | (lo_64 >> 63); 2514 if (round) { 2515 if (res == INT64_MAX) { 2516 env->vxsat = 1; 2517 } else { 2518 res += 1; 2519 } 2520 } 2521 return res; 2522 } 2523 2524 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2525 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2526 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2527 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2528 GEN_VEXT_VV_RM(vsmul_vv_b) 2529 GEN_VEXT_VV_RM(vsmul_vv_h) 2530 GEN_VEXT_VV_RM(vsmul_vv_w) 2531 GEN_VEXT_VV_RM(vsmul_vv_d) 2532 2533 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2534 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2535 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2536 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2537 GEN_VEXT_VX_RM(vsmul_vx_b) 2538 GEN_VEXT_VX_RM(vsmul_vx_h) 2539 GEN_VEXT_VX_RM(vsmul_vx_w) 2540 GEN_VEXT_VX_RM(vsmul_vx_d) 2541 2542 /* Vector Single-Width Scaling Shift Instructions */ 2543 static inline uint8_t 2544 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2545 { 2546 uint8_t round, shift = b & 0x7; 2547 uint8_t res; 2548 2549 round = get_round(vxrm, a, shift); 2550 res = (a >> shift) + round; 2551 return res; 2552 } 2553 static inline uint16_t 2554 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2555 { 2556 uint8_t round, shift = b & 0xf; 2557 uint16_t res; 2558 2559 round = get_round(vxrm, a, shift); 2560 res = (a >> shift) + round; 2561 return res; 2562 } 2563 static inline uint32_t 2564 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2565 { 2566 uint8_t round, shift = b & 0x1f; 2567 uint32_t res; 2568 2569 round = get_round(vxrm, a, shift); 2570 res = (a >> shift) + round; 2571 return res; 2572 } 2573 static inline uint64_t 2574 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2575 { 2576 uint8_t round, shift = b & 0x3f; 2577 uint64_t res; 2578 2579 round = get_round(vxrm, a, shift); 2580 res = (a >> shift) + round; 2581 return res; 2582 } 2583 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2584 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2585 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2586 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2587 GEN_VEXT_VV_RM(vssrl_vv_b) 2588 GEN_VEXT_VV_RM(vssrl_vv_h) 2589 GEN_VEXT_VV_RM(vssrl_vv_w) 2590 GEN_VEXT_VV_RM(vssrl_vv_d) 2591 2592 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2593 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2594 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2595 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2596 GEN_VEXT_VX_RM(vssrl_vx_b) 2597 GEN_VEXT_VX_RM(vssrl_vx_h) 2598 GEN_VEXT_VX_RM(vssrl_vx_w) 2599 GEN_VEXT_VX_RM(vssrl_vx_d) 2600 2601 static inline int8_t 2602 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2603 { 2604 uint8_t round, shift = b & 0x7; 2605 int8_t res; 2606 2607 round = get_round(vxrm, a, shift); 2608 res = (a >> shift) + round; 2609 return res; 2610 } 2611 static inline int16_t 2612 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2613 { 2614 uint8_t round, shift = b & 0xf; 2615 int16_t res; 2616 2617 round = get_round(vxrm, a, shift); 2618 res = (a >> shift) + round; 2619 return res; 2620 } 2621 static inline int32_t 2622 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2623 { 2624 uint8_t round, shift = b & 0x1f; 2625 int32_t res; 2626 2627 round = get_round(vxrm, a, shift); 2628 res = (a >> shift) + round; 2629 return res; 2630 } 2631 static inline int64_t 2632 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2633 { 2634 uint8_t round, shift = b & 0x3f; 2635 int64_t res; 2636 2637 round = get_round(vxrm, a, shift); 2638 res = (a >> shift) + round; 2639 return res; 2640 } 2641 2642 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2643 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2644 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2645 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2646 GEN_VEXT_VV_RM(vssra_vv_b) 2647 GEN_VEXT_VV_RM(vssra_vv_h) 2648 GEN_VEXT_VV_RM(vssra_vv_w) 2649 GEN_VEXT_VV_RM(vssra_vv_d) 2650 2651 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2652 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2653 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2654 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2655 GEN_VEXT_VX_RM(vssra_vx_b) 2656 GEN_VEXT_VX_RM(vssra_vx_h) 2657 GEN_VEXT_VX_RM(vssra_vx_w) 2658 GEN_VEXT_VX_RM(vssra_vx_d) 2659 2660 /* Vector Narrowing Fixed-Point Clip Instructions */ 2661 static inline int8_t 2662 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2663 { 2664 uint8_t round, shift = b & 0xf; 2665 int16_t res; 2666 2667 round = get_round(vxrm, a, shift); 2668 res = (a >> shift) + round; 2669 if (res > INT8_MAX) { 2670 env->vxsat = 0x1; 2671 return INT8_MAX; 2672 } else if (res < INT8_MIN) { 2673 env->vxsat = 0x1; 2674 return INT8_MIN; 2675 } else { 2676 return res; 2677 } 2678 } 2679 2680 static inline int16_t 2681 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2682 { 2683 uint8_t round, shift = b & 0x1f; 2684 int32_t res; 2685 2686 round = get_round(vxrm, a, shift); 2687 res = (a >> shift) + round; 2688 if (res > INT16_MAX) { 2689 env->vxsat = 0x1; 2690 return INT16_MAX; 2691 } else if (res < INT16_MIN) { 2692 env->vxsat = 0x1; 2693 return INT16_MIN; 2694 } else { 2695 return res; 2696 } 2697 } 2698 2699 static inline int32_t 2700 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2701 { 2702 uint8_t round, shift = b & 0x3f; 2703 int64_t res; 2704 2705 round = get_round(vxrm, a, shift); 2706 res = (a >> shift) + round; 2707 if (res > INT32_MAX) { 2708 env->vxsat = 0x1; 2709 return INT32_MAX; 2710 } else if (res < INT32_MIN) { 2711 env->vxsat = 0x1; 2712 return INT32_MIN; 2713 } else { 2714 return res; 2715 } 2716 } 2717 2718 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2719 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2720 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2721 GEN_VEXT_VV_RM(vnclip_wv_b) 2722 GEN_VEXT_VV_RM(vnclip_wv_h) 2723 GEN_VEXT_VV_RM(vnclip_wv_w) 2724 2725 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2726 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2727 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2728 GEN_VEXT_VX_RM(vnclip_wx_b) 2729 GEN_VEXT_VX_RM(vnclip_wx_h) 2730 GEN_VEXT_VX_RM(vnclip_wx_w) 2731 2732 static inline uint8_t 2733 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2734 { 2735 uint8_t round, shift = b & 0xf; 2736 uint16_t res; 2737 2738 round = get_round(vxrm, a, shift); 2739 res = (a >> shift) + round; 2740 if (res > UINT8_MAX) { 2741 env->vxsat = 0x1; 2742 return UINT8_MAX; 2743 } else { 2744 return res; 2745 } 2746 } 2747 2748 static inline uint16_t 2749 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2750 { 2751 uint8_t round, shift = b & 0x1f; 2752 uint32_t res; 2753 2754 round = get_round(vxrm, a, shift); 2755 res = (a >> shift) + round; 2756 if (res > UINT16_MAX) { 2757 env->vxsat = 0x1; 2758 return UINT16_MAX; 2759 } else { 2760 return res; 2761 } 2762 } 2763 2764 static inline uint32_t 2765 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2766 { 2767 uint8_t round, shift = b & 0x3f; 2768 uint64_t res; 2769 2770 round = get_round(vxrm, a, shift); 2771 res = (a >> shift) + round; 2772 if (res > UINT32_MAX) { 2773 env->vxsat = 0x1; 2774 return UINT32_MAX; 2775 } else { 2776 return res; 2777 } 2778 } 2779 2780 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2781 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2782 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2783 GEN_VEXT_VV_RM(vnclipu_wv_b) 2784 GEN_VEXT_VV_RM(vnclipu_wv_h) 2785 GEN_VEXT_VV_RM(vnclipu_wv_w) 2786 2787 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2788 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2789 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2790 GEN_VEXT_VX_RM(vnclipu_wx_b) 2791 GEN_VEXT_VX_RM(vnclipu_wx_h) 2792 GEN_VEXT_VX_RM(vnclipu_wx_w) 2793 2794 /* 2795 *** Vector Float Point Arithmetic Instructions 2796 */ 2797 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2798 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2799 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2800 CPURISCVState *env) \ 2801 { \ 2802 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2803 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2804 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2805 } 2806 2807 #define GEN_VEXT_VV_ENV(NAME) \ 2808 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2809 void *vs2, CPURISCVState *env, \ 2810 uint32_t desc) \ 2811 { \ 2812 uint32_t vm = vext_vm(desc); \ 2813 uint32_t vl = env->vl; \ 2814 uint32_t i; \ 2815 \ 2816 for (i = env->vstart; i < vl; i++) { \ 2817 if (!vm && !vext_elem_mask(v0, i)) { \ 2818 continue; \ 2819 } \ 2820 do_##NAME(vd, vs1, vs2, i, env); \ 2821 } \ 2822 env->vstart = 0; \ 2823 } 2824 2825 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2826 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2827 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2828 GEN_VEXT_VV_ENV(vfadd_vv_h) 2829 GEN_VEXT_VV_ENV(vfadd_vv_w) 2830 GEN_VEXT_VV_ENV(vfadd_vv_d) 2831 2832 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2833 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2834 CPURISCVState *env) \ 2835 { \ 2836 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2837 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2838 } 2839 2840 #define GEN_VEXT_VF(NAME) \ 2841 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2842 void *vs2, CPURISCVState *env, \ 2843 uint32_t desc) \ 2844 { \ 2845 uint32_t vm = vext_vm(desc); \ 2846 uint32_t vl = env->vl; \ 2847 uint32_t i; \ 2848 \ 2849 for (i = env->vstart; i < vl; i++) { \ 2850 if (!vm && !vext_elem_mask(v0, i)) { \ 2851 continue; \ 2852 } \ 2853 do_##NAME(vd, s1, vs2, i, env); \ 2854 } \ 2855 env->vstart = 0; \ 2856 } 2857 2858 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2859 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2860 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2861 GEN_VEXT_VF(vfadd_vf_h) 2862 GEN_VEXT_VF(vfadd_vf_w) 2863 GEN_VEXT_VF(vfadd_vf_d) 2864 2865 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2866 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2867 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2868 GEN_VEXT_VV_ENV(vfsub_vv_h) 2869 GEN_VEXT_VV_ENV(vfsub_vv_w) 2870 GEN_VEXT_VV_ENV(vfsub_vv_d) 2871 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2872 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2873 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2874 GEN_VEXT_VF(vfsub_vf_h) 2875 GEN_VEXT_VF(vfsub_vf_w) 2876 GEN_VEXT_VF(vfsub_vf_d) 2877 2878 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2879 { 2880 return float16_sub(b, a, s); 2881 } 2882 2883 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2884 { 2885 return float32_sub(b, a, s); 2886 } 2887 2888 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2889 { 2890 return float64_sub(b, a, s); 2891 } 2892 2893 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2894 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2895 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2896 GEN_VEXT_VF(vfrsub_vf_h) 2897 GEN_VEXT_VF(vfrsub_vf_w) 2898 GEN_VEXT_VF(vfrsub_vf_d) 2899 2900 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2901 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2902 { 2903 return float32_add(float16_to_float32(a, true, s), 2904 float16_to_float32(b, true, s), s); 2905 } 2906 2907 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2908 { 2909 return float64_add(float32_to_float64(a, s), 2910 float32_to_float64(b, s), s); 2911 2912 } 2913 2914 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2915 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2916 GEN_VEXT_VV_ENV(vfwadd_vv_h) 2917 GEN_VEXT_VV_ENV(vfwadd_vv_w) 2918 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2919 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2920 GEN_VEXT_VF(vfwadd_vf_h) 2921 GEN_VEXT_VF(vfwadd_vf_w) 2922 2923 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2924 { 2925 return float32_sub(float16_to_float32(a, true, s), 2926 float16_to_float32(b, true, s), s); 2927 } 2928 2929 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2930 { 2931 return float64_sub(float32_to_float64(a, s), 2932 float32_to_float64(b, s), s); 2933 2934 } 2935 2936 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2937 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2938 GEN_VEXT_VV_ENV(vfwsub_vv_h) 2939 GEN_VEXT_VV_ENV(vfwsub_vv_w) 2940 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2941 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2942 GEN_VEXT_VF(vfwsub_vf_h) 2943 GEN_VEXT_VF(vfwsub_vf_w) 2944 2945 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2946 { 2947 return float32_add(a, float16_to_float32(b, true, s), s); 2948 } 2949 2950 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2951 { 2952 return float64_add(a, float32_to_float64(b, s), s); 2953 } 2954 2955 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2956 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2957 GEN_VEXT_VV_ENV(vfwadd_wv_h) 2958 GEN_VEXT_VV_ENV(vfwadd_wv_w) 2959 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2960 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2961 GEN_VEXT_VF(vfwadd_wf_h) 2962 GEN_VEXT_VF(vfwadd_wf_w) 2963 2964 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2965 { 2966 return float32_sub(a, float16_to_float32(b, true, s), s); 2967 } 2968 2969 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2970 { 2971 return float64_sub(a, float32_to_float64(b, s), s); 2972 } 2973 2974 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2975 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2976 GEN_VEXT_VV_ENV(vfwsub_wv_h) 2977 GEN_VEXT_VV_ENV(vfwsub_wv_w) 2978 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2979 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2980 GEN_VEXT_VF(vfwsub_wf_h) 2981 GEN_VEXT_VF(vfwsub_wf_w) 2982 2983 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2984 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2985 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2986 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2987 GEN_VEXT_VV_ENV(vfmul_vv_h) 2988 GEN_VEXT_VV_ENV(vfmul_vv_w) 2989 GEN_VEXT_VV_ENV(vfmul_vv_d) 2990 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2991 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2992 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2993 GEN_VEXT_VF(vfmul_vf_h) 2994 GEN_VEXT_VF(vfmul_vf_w) 2995 GEN_VEXT_VF(vfmul_vf_d) 2996 2997 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2998 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2999 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3000 GEN_VEXT_VV_ENV(vfdiv_vv_h) 3001 GEN_VEXT_VV_ENV(vfdiv_vv_w) 3002 GEN_VEXT_VV_ENV(vfdiv_vv_d) 3003 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3004 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3005 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3006 GEN_VEXT_VF(vfdiv_vf_h) 3007 GEN_VEXT_VF(vfdiv_vf_w) 3008 GEN_VEXT_VF(vfdiv_vf_d) 3009 3010 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3011 { 3012 return float16_div(b, a, s); 3013 } 3014 3015 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3016 { 3017 return float32_div(b, a, s); 3018 } 3019 3020 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3021 { 3022 return float64_div(b, a, s); 3023 } 3024 3025 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3026 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3027 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3028 GEN_VEXT_VF(vfrdiv_vf_h) 3029 GEN_VEXT_VF(vfrdiv_vf_w) 3030 GEN_VEXT_VF(vfrdiv_vf_d) 3031 3032 /* Vector Widening Floating-Point Multiply */ 3033 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3034 { 3035 return float32_mul(float16_to_float32(a, true, s), 3036 float16_to_float32(b, true, s), s); 3037 } 3038 3039 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3040 { 3041 return float64_mul(float32_to_float64(a, s), 3042 float32_to_float64(b, s), s); 3043 3044 } 3045 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3046 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3047 GEN_VEXT_VV_ENV(vfwmul_vv_h) 3048 GEN_VEXT_VV_ENV(vfwmul_vv_w) 3049 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3050 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3051 GEN_VEXT_VF(vfwmul_vf_h) 3052 GEN_VEXT_VF(vfwmul_vf_w) 3053 3054 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3055 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3056 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3057 CPURISCVState *env) \ 3058 { \ 3059 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3060 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3061 TD d = *((TD *)vd + HD(i)); \ 3062 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3063 } 3064 3065 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3066 { 3067 return float16_muladd(a, b, d, 0, s); 3068 } 3069 3070 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3071 { 3072 return float32_muladd(a, b, d, 0, s); 3073 } 3074 3075 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3076 { 3077 return float64_muladd(a, b, d, 0, s); 3078 } 3079 3080 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3081 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3082 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3083 GEN_VEXT_VV_ENV(vfmacc_vv_h) 3084 GEN_VEXT_VV_ENV(vfmacc_vv_w) 3085 GEN_VEXT_VV_ENV(vfmacc_vv_d) 3086 3087 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3088 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3089 CPURISCVState *env) \ 3090 { \ 3091 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3092 TD d = *((TD *)vd + HD(i)); \ 3093 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3094 } 3095 3096 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3097 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3098 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3099 GEN_VEXT_VF(vfmacc_vf_h) 3100 GEN_VEXT_VF(vfmacc_vf_w) 3101 GEN_VEXT_VF(vfmacc_vf_d) 3102 3103 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3104 { 3105 return float16_muladd(a, b, d, 3106 float_muladd_negate_c | float_muladd_negate_product, s); 3107 } 3108 3109 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3110 { 3111 return float32_muladd(a, b, d, 3112 float_muladd_negate_c | float_muladd_negate_product, s); 3113 } 3114 3115 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3116 { 3117 return float64_muladd(a, b, d, 3118 float_muladd_negate_c | float_muladd_negate_product, s); 3119 } 3120 3121 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3122 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3123 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3124 GEN_VEXT_VV_ENV(vfnmacc_vv_h) 3125 GEN_VEXT_VV_ENV(vfnmacc_vv_w) 3126 GEN_VEXT_VV_ENV(vfnmacc_vv_d) 3127 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3128 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3129 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3130 GEN_VEXT_VF(vfnmacc_vf_h) 3131 GEN_VEXT_VF(vfnmacc_vf_w) 3132 GEN_VEXT_VF(vfnmacc_vf_d) 3133 3134 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3135 { 3136 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3137 } 3138 3139 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3140 { 3141 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3142 } 3143 3144 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3145 { 3146 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3147 } 3148 3149 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3150 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3151 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3152 GEN_VEXT_VV_ENV(vfmsac_vv_h) 3153 GEN_VEXT_VV_ENV(vfmsac_vv_w) 3154 GEN_VEXT_VV_ENV(vfmsac_vv_d) 3155 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3156 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3157 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3158 GEN_VEXT_VF(vfmsac_vf_h) 3159 GEN_VEXT_VF(vfmsac_vf_w) 3160 GEN_VEXT_VF(vfmsac_vf_d) 3161 3162 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3163 { 3164 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3165 } 3166 3167 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3168 { 3169 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3170 } 3171 3172 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3173 { 3174 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3175 } 3176 3177 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3178 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3179 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3180 GEN_VEXT_VV_ENV(vfnmsac_vv_h) 3181 GEN_VEXT_VV_ENV(vfnmsac_vv_w) 3182 GEN_VEXT_VV_ENV(vfnmsac_vv_d) 3183 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3184 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3185 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3186 GEN_VEXT_VF(vfnmsac_vf_h) 3187 GEN_VEXT_VF(vfnmsac_vf_w) 3188 GEN_VEXT_VF(vfnmsac_vf_d) 3189 3190 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3191 { 3192 return float16_muladd(d, b, a, 0, s); 3193 } 3194 3195 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3196 { 3197 return float32_muladd(d, b, a, 0, s); 3198 } 3199 3200 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3201 { 3202 return float64_muladd(d, b, a, 0, s); 3203 } 3204 3205 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3206 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3207 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3208 GEN_VEXT_VV_ENV(vfmadd_vv_h) 3209 GEN_VEXT_VV_ENV(vfmadd_vv_w) 3210 GEN_VEXT_VV_ENV(vfmadd_vv_d) 3211 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3212 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3213 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3214 GEN_VEXT_VF(vfmadd_vf_h) 3215 GEN_VEXT_VF(vfmadd_vf_w) 3216 GEN_VEXT_VF(vfmadd_vf_d) 3217 3218 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3219 { 3220 return float16_muladd(d, b, a, 3221 float_muladd_negate_c | float_muladd_negate_product, s); 3222 } 3223 3224 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3225 { 3226 return float32_muladd(d, b, a, 3227 float_muladd_negate_c | float_muladd_negate_product, s); 3228 } 3229 3230 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3231 { 3232 return float64_muladd(d, b, a, 3233 float_muladd_negate_c | float_muladd_negate_product, s); 3234 } 3235 3236 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3237 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3238 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3239 GEN_VEXT_VV_ENV(vfnmadd_vv_h) 3240 GEN_VEXT_VV_ENV(vfnmadd_vv_w) 3241 GEN_VEXT_VV_ENV(vfnmadd_vv_d) 3242 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3243 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3244 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3245 GEN_VEXT_VF(vfnmadd_vf_h) 3246 GEN_VEXT_VF(vfnmadd_vf_w) 3247 GEN_VEXT_VF(vfnmadd_vf_d) 3248 3249 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3250 { 3251 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3252 } 3253 3254 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3255 { 3256 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3257 } 3258 3259 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3260 { 3261 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3262 } 3263 3264 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3265 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3266 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3267 GEN_VEXT_VV_ENV(vfmsub_vv_h) 3268 GEN_VEXT_VV_ENV(vfmsub_vv_w) 3269 GEN_VEXT_VV_ENV(vfmsub_vv_d) 3270 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3271 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3272 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3273 GEN_VEXT_VF(vfmsub_vf_h) 3274 GEN_VEXT_VF(vfmsub_vf_w) 3275 GEN_VEXT_VF(vfmsub_vf_d) 3276 3277 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3278 { 3279 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3280 } 3281 3282 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3283 { 3284 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3285 } 3286 3287 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3288 { 3289 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3290 } 3291 3292 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3293 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3294 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3295 GEN_VEXT_VV_ENV(vfnmsub_vv_h) 3296 GEN_VEXT_VV_ENV(vfnmsub_vv_w) 3297 GEN_VEXT_VV_ENV(vfnmsub_vv_d) 3298 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3299 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3300 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3301 GEN_VEXT_VF(vfnmsub_vf_h) 3302 GEN_VEXT_VF(vfnmsub_vf_w) 3303 GEN_VEXT_VF(vfnmsub_vf_d) 3304 3305 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3306 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3307 { 3308 return float32_muladd(float16_to_float32(a, true, s), 3309 float16_to_float32(b, true, s), d, 0, s); 3310 } 3311 3312 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3313 { 3314 return float64_muladd(float32_to_float64(a, s), 3315 float32_to_float64(b, s), d, 0, s); 3316 } 3317 3318 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3319 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3320 GEN_VEXT_VV_ENV(vfwmacc_vv_h) 3321 GEN_VEXT_VV_ENV(vfwmacc_vv_w) 3322 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3323 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3324 GEN_VEXT_VF(vfwmacc_vf_h) 3325 GEN_VEXT_VF(vfwmacc_vf_w) 3326 3327 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3328 { 3329 return float32_muladd(float16_to_float32(a, true, s), 3330 float16_to_float32(b, true, s), d, 3331 float_muladd_negate_c | float_muladd_negate_product, s); 3332 } 3333 3334 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3335 { 3336 return float64_muladd(float32_to_float64(a, s), 3337 float32_to_float64(b, s), d, 3338 float_muladd_negate_c | float_muladd_negate_product, s); 3339 } 3340 3341 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3342 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3343 GEN_VEXT_VV_ENV(vfwnmacc_vv_h) 3344 GEN_VEXT_VV_ENV(vfwnmacc_vv_w) 3345 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3346 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3347 GEN_VEXT_VF(vfwnmacc_vf_h) 3348 GEN_VEXT_VF(vfwnmacc_vf_w) 3349 3350 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3351 { 3352 return float32_muladd(float16_to_float32(a, true, s), 3353 float16_to_float32(b, true, s), d, 3354 float_muladd_negate_c, s); 3355 } 3356 3357 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3358 { 3359 return float64_muladd(float32_to_float64(a, s), 3360 float32_to_float64(b, s), d, 3361 float_muladd_negate_c, s); 3362 } 3363 3364 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3365 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3366 GEN_VEXT_VV_ENV(vfwmsac_vv_h) 3367 GEN_VEXT_VV_ENV(vfwmsac_vv_w) 3368 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3369 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3370 GEN_VEXT_VF(vfwmsac_vf_h) 3371 GEN_VEXT_VF(vfwmsac_vf_w) 3372 3373 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3374 { 3375 return float32_muladd(float16_to_float32(a, true, s), 3376 float16_to_float32(b, true, s), d, 3377 float_muladd_negate_product, s); 3378 } 3379 3380 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3381 { 3382 return float64_muladd(float32_to_float64(a, s), 3383 float32_to_float64(b, s), d, 3384 float_muladd_negate_product, s); 3385 } 3386 3387 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3388 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3389 GEN_VEXT_VV_ENV(vfwnmsac_vv_h) 3390 GEN_VEXT_VV_ENV(vfwnmsac_vv_w) 3391 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3392 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3393 GEN_VEXT_VF(vfwnmsac_vf_h) 3394 GEN_VEXT_VF(vfwnmsac_vf_w) 3395 3396 /* Vector Floating-Point Square-Root Instruction */ 3397 /* (TD, T2, TX2) */ 3398 #define OP_UU_H uint16_t, uint16_t, uint16_t 3399 #define OP_UU_W uint32_t, uint32_t, uint32_t 3400 #define OP_UU_D uint64_t, uint64_t, uint64_t 3401 3402 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3403 static void do_##NAME(void *vd, void *vs2, int i, \ 3404 CPURISCVState *env) \ 3405 { \ 3406 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3407 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3408 } 3409 3410 #define GEN_VEXT_V_ENV(NAME) \ 3411 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3412 CPURISCVState *env, uint32_t desc) \ 3413 { \ 3414 uint32_t vm = vext_vm(desc); \ 3415 uint32_t vl = env->vl; \ 3416 uint32_t i; \ 3417 \ 3418 if (vl == 0) { \ 3419 return; \ 3420 } \ 3421 for (i = env->vstart; i < vl; i++) { \ 3422 if (!vm && !vext_elem_mask(v0, i)) { \ 3423 continue; \ 3424 } \ 3425 do_##NAME(vd, vs2, i, env); \ 3426 } \ 3427 env->vstart = 0; \ 3428 } 3429 3430 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3431 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3432 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3433 GEN_VEXT_V_ENV(vfsqrt_v_h) 3434 GEN_VEXT_V_ENV(vfsqrt_v_w) 3435 GEN_VEXT_V_ENV(vfsqrt_v_d) 3436 3437 /* 3438 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3439 * 3440 * Adapted from riscv-v-spec recip.c: 3441 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3442 */ 3443 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3444 { 3445 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3446 uint64_t exp = extract64(f, frac_size, exp_size); 3447 uint64_t frac = extract64(f, 0, frac_size); 3448 3449 const uint8_t lookup_table[] = { 3450 52, 51, 50, 48, 47, 46, 44, 43, 3451 42, 41, 40, 39, 38, 36, 35, 34, 3452 33, 32, 31, 30, 30, 29, 28, 27, 3453 26, 25, 24, 23, 23, 22, 21, 20, 3454 19, 19, 18, 17, 16, 16, 15, 14, 3455 14, 13, 12, 12, 11, 10, 10, 9, 3456 9, 8, 7, 7, 6, 6, 5, 4, 3457 4, 3, 3, 2, 2, 1, 1, 0, 3458 127, 125, 123, 121, 119, 118, 116, 114, 3459 113, 111, 109, 108, 106, 105, 103, 102, 3460 100, 99, 97, 96, 95, 93, 92, 91, 3461 90, 88, 87, 86, 85, 84, 83, 82, 3462 80, 79, 78, 77, 76, 75, 74, 73, 3463 72, 71, 70, 70, 69, 68, 67, 66, 3464 65, 64, 63, 63, 62, 61, 60, 59, 3465 59, 58, 57, 56, 56, 55, 54, 53 3466 }; 3467 const int precision = 7; 3468 3469 if (exp == 0 && frac != 0) { /* subnormal */ 3470 /* Normalize the subnormal. */ 3471 while (extract64(frac, frac_size - 1, 1) == 0) { 3472 exp--; 3473 frac <<= 1; 3474 } 3475 3476 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3477 } 3478 3479 int idx = ((exp & 1) << (precision - 1)) | 3480 (frac >> (frac_size - precision + 1)); 3481 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3482 (frac_size - precision); 3483 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3484 3485 uint64_t val = 0; 3486 val = deposit64(val, 0, frac_size, out_frac); 3487 val = deposit64(val, frac_size, exp_size, out_exp); 3488 val = deposit64(val, frac_size + exp_size, 1, sign); 3489 return val; 3490 } 3491 3492 static float16 frsqrt7_h(float16 f, float_status *s) 3493 { 3494 int exp_size = 5, frac_size = 10; 3495 bool sign = float16_is_neg(f); 3496 3497 /* 3498 * frsqrt7(sNaN) = canonical NaN 3499 * frsqrt7(-inf) = canonical NaN 3500 * frsqrt7(-normal) = canonical NaN 3501 * frsqrt7(-subnormal) = canonical NaN 3502 */ 3503 if (float16_is_signaling_nan(f, s) || 3504 (float16_is_infinity(f) && sign) || 3505 (float16_is_normal(f) && sign) || 3506 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3507 s->float_exception_flags |= float_flag_invalid; 3508 return float16_default_nan(s); 3509 } 3510 3511 /* frsqrt7(qNaN) = canonical NaN */ 3512 if (float16_is_quiet_nan(f, s)) { 3513 return float16_default_nan(s); 3514 } 3515 3516 /* frsqrt7(+-0) = +-inf */ 3517 if (float16_is_zero(f)) { 3518 s->float_exception_flags |= float_flag_divbyzero; 3519 return float16_set_sign(float16_infinity, sign); 3520 } 3521 3522 /* frsqrt7(+inf) = +0 */ 3523 if (float16_is_infinity(f) && !sign) { 3524 return float16_set_sign(float16_zero, sign); 3525 } 3526 3527 /* +normal, +subnormal */ 3528 uint64_t val = frsqrt7(f, exp_size, frac_size); 3529 return make_float16(val); 3530 } 3531 3532 static float32 frsqrt7_s(float32 f, float_status *s) 3533 { 3534 int exp_size = 8, frac_size = 23; 3535 bool sign = float32_is_neg(f); 3536 3537 /* 3538 * frsqrt7(sNaN) = canonical NaN 3539 * frsqrt7(-inf) = canonical NaN 3540 * frsqrt7(-normal) = canonical NaN 3541 * frsqrt7(-subnormal) = canonical NaN 3542 */ 3543 if (float32_is_signaling_nan(f, s) || 3544 (float32_is_infinity(f) && sign) || 3545 (float32_is_normal(f) && sign) || 3546 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3547 s->float_exception_flags |= float_flag_invalid; 3548 return float32_default_nan(s); 3549 } 3550 3551 /* frsqrt7(qNaN) = canonical NaN */ 3552 if (float32_is_quiet_nan(f, s)) { 3553 return float32_default_nan(s); 3554 } 3555 3556 /* frsqrt7(+-0) = +-inf */ 3557 if (float32_is_zero(f)) { 3558 s->float_exception_flags |= float_flag_divbyzero; 3559 return float32_set_sign(float32_infinity, sign); 3560 } 3561 3562 /* frsqrt7(+inf) = +0 */ 3563 if (float32_is_infinity(f) && !sign) { 3564 return float32_set_sign(float32_zero, sign); 3565 } 3566 3567 /* +normal, +subnormal */ 3568 uint64_t val = frsqrt7(f, exp_size, frac_size); 3569 return make_float32(val); 3570 } 3571 3572 static float64 frsqrt7_d(float64 f, float_status *s) 3573 { 3574 int exp_size = 11, frac_size = 52; 3575 bool sign = float64_is_neg(f); 3576 3577 /* 3578 * frsqrt7(sNaN) = canonical NaN 3579 * frsqrt7(-inf) = canonical NaN 3580 * frsqrt7(-normal) = canonical NaN 3581 * frsqrt7(-subnormal) = canonical NaN 3582 */ 3583 if (float64_is_signaling_nan(f, s) || 3584 (float64_is_infinity(f) && sign) || 3585 (float64_is_normal(f) && sign) || 3586 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3587 s->float_exception_flags |= float_flag_invalid; 3588 return float64_default_nan(s); 3589 } 3590 3591 /* frsqrt7(qNaN) = canonical NaN */ 3592 if (float64_is_quiet_nan(f, s)) { 3593 return float64_default_nan(s); 3594 } 3595 3596 /* frsqrt7(+-0) = +-inf */ 3597 if (float64_is_zero(f)) { 3598 s->float_exception_flags |= float_flag_divbyzero; 3599 return float64_set_sign(float64_infinity, sign); 3600 } 3601 3602 /* frsqrt7(+inf) = +0 */ 3603 if (float64_is_infinity(f) && !sign) { 3604 return float64_set_sign(float64_zero, sign); 3605 } 3606 3607 /* +normal, +subnormal */ 3608 uint64_t val = frsqrt7(f, exp_size, frac_size); 3609 return make_float64(val); 3610 } 3611 3612 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3613 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3614 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3615 GEN_VEXT_V_ENV(vfrsqrt7_v_h) 3616 GEN_VEXT_V_ENV(vfrsqrt7_v_w) 3617 GEN_VEXT_V_ENV(vfrsqrt7_v_d) 3618 3619 /* 3620 * Vector Floating-Point Reciprocal Estimate Instruction 3621 * 3622 * Adapted from riscv-v-spec recip.c: 3623 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3624 */ 3625 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3626 float_status *s) 3627 { 3628 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3629 uint64_t exp = extract64(f, frac_size, exp_size); 3630 uint64_t frac = extract64(f, 0, frac_size); 3631 3632 const uint8_t lookup_table[] = { 3633 127, 125, 123, 121, 119, 117, 116, 114, 3634 112, 110, 109, 107, 105, 104, 102, 100, 3635 99, 97, 96, 94, 93, 91, 90, 88, 3636 87, 85, 84, 83, 81, 80, 79, 77, 3637 76, 75, 74, 72, 71, 70, 69, 68, 3638 66, 65, 64, 63, 62, 61, 60, 59, 3639 58, 57, 56, 55, 54, 53, 52, 51, 3640 50, 49, 48, 47, 46, 45, 44, 43, 3641 42, 41, 40, 40, 39, 38, 37, 36, 3642 35, 35, 34, 33, 32, 31, 31, 30, 3643 29, 28, 28, 27, 26, 25, 25, 24, 3644 23, 23, 22, 21, 21, 20, 19, 19, 3645 18, 17, 17, 16, 15, 15, 14, 14, 3646 13, 12, 12, 11, 11, 10, 9, 9, 3647 8, 8, 7, 7, 6, 5, 5, 4, 3648 4, 3, 3, 2, 2, 1, 1, 0 3649 }; 3650 const int precision = 7; 3651 3652 if (exp == 0 && frac != 0) { /* subnormal */ 3653 /* Normalize the subnormal. */ 3654 while (extract64(frac, frac_size - 1, 1) == 0) { 3655 exp--; 3656 frac <<= 1; 3657 } 3658 3659 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3660 3661 if (exp != 0 && exp != UINT64_MAX) { 3662 /* 3663 * Overflow to inf or max value of same sign, 3664 * depending on sign and rounding mode. 3665 */ 3666 s->float_exception_flags |= (float_flag_inexact | 3667 float_flag_overflow); 3668 3669 if ((s->float_rounding_mode == float_round_to_zero) || 3670 ((s->float_rounding_mode == float_round_down) && !sign) || 3671 ((s->float_rounding_mode == float_round_up) && sign)) { 3672 /* Return greatest/negative finite value. */ 3673 return (sign << (exp_size + frac_size)) | 3674 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3675 } else { 3676 /* Return +-inf. */ 3677 return (sign << (exp_size + frac_size)) | 3678 MAKE_64BIT_MASK(frac_size, exp_size); 3679 } 3680 } 3681 } 3682 3683 int idx = frac >> (frac_size - precision); 3684 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3685 (frac_size - precision); 3686 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3687 3688 if (out_exp == 0 || out_exp == UINT64_MAX) { 3689 /* 3690 * The result is subnormal, but don't raise the underflow exception, 3691 * because there's no additional loss of precision. 3692 */ 3693 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3694 if (out_exp == UINT64_MAX) { 3695 out_frac >>= 1; 3696 out_exp = 0; 3697 } 3698 } 3699 3700 uint64_t val = 0; 3701 val = deposit64(val, 0, frac_size, out_frac); 3702 val = deposit64(val, frac_size, exp_size, out_exp); 3703 val = deposit64(val, frac_size + exp_size, 1, sign); 3704 return val; 3705 } 3706 3707 static float16 frec7_h(float16 f, float_status *s) 3708 { 3709 int exp_size = 5, frac_size = 10; 3710 bool sign = float16_is_neg(f); 3711 3712 /* frec7(+-inf) = +-0 */ 3713 if (float16_is_infinity(f)) { 3714 return float16_set_sign(float16_zero, sign); 3715 } 3716 3717 /* frec7(+-0) = +-inf */ 3718 if (float16_is_zero(f)) { 3719 s->float_exception_flags |= float_flag_divbyzero; 3720 return float16_set_sign(float16_infinity, sign); 3721 } 3722 3723 /* frec7(sNaN) = canonical NaN */ 3724 if (float16_is_signaling_nan(f, s)) { 3725 s->float_exception_flags |= float_flag_invalid; 3726 return float16_default_nan(s); 3727 } 3728 3729 /* frec7(qNaN) = canonical NaN */ 3730 if (float16_is_quiet_nan(f, s)) { 3731 return float16_default_nan(s); 3732 } 3733 3734 /* +-normal, +-subnormal */ 3735 uint64_t val = frec7(f, exp_size, frac_size, s); 3736 return make_float16(val); 3737 } 3738 3739 static float32 frec7_s(float32 f, float_status *s) 3740 { 3741 int exp_size = 8, frac_size = 23; 3742 bool sign = float32_is_neg(f); 3743 3744 /* frec7(+-inf) = +-0 */ 3745 if (float32_is_infinity(f)) { 3746 return float32_set_sign(float32_zero, sign); 3747 } 3748 3749 /* frec7(+-0) = +-inf */ 3750 if (float32_is_zero(f)) { 3751 s->float_exception_flags |= float_flag_divbyzero; 3752 return float32_set_sign(float32_infinity, sign); 3753 } 3754 3755 /* frec7(sNaN) = canonical NaN */ 3756 if (float32_is_signaling_nan(f, s)) { 3757 s->float_exception_flags |= float_flag_invalid; 3758 return float32_default_nan(s); 3759 } 3760 3761 /* frec7(qNaN) = canonical NaN */ 3762 if (float32_is_quiet_nan(f, s)) { 3763 return float32_default_nan(s); 3764 } 3765 3766 /* +-normal, +-subnormal */ 3767 uint64_t val = frec7(f, exp_size, frac_size, s); 3768 return make_float32(val); 3769 } 3770 3771 static float64 frec7_d(float64 f, float_status *s) 3772 { 3773 int exp_size = 11, frac_size = 52; 3774 bool sign = float64_is_neg(f); 3775 3776 /* frec7(+-inf) = +-0 */ 3777 if (float64_is_infinity(f)) { 3778 return float64_set_sign(float64_zero, sign); 3779 } 3780 3781 /* frec7(+-0) = +-inf */ 3782 if (float64_is_zero(f)) { 3783 s->float_exception_flags |= float_flag_divbyzero; 3784 return float64_set_sign(float64_infinity, sign); 3785 } 3786 3787 /* frec7(sNaN) = canonical NaN */ 3788 if (float64_is_signaling_nan(f, s)) { 3789 s->float_exception_flags |= float_flag_invalid; 3790 return float64_default_nan(s); 3791 } 3792 3793 /* frec7(qNaN) = canonical NaN */ 3794 if (float64_is_quiet_nan(f, s)) { 3795 return float64_default_nan(s); 3796 } 3797 3798 /* +-normal, +-subnormal */ 3799 uint64_t val = frec7(f, exp_size, frac_size, s); 3800 return make_float64(val); 3801 } 3802 3803 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3804 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3805 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3806 GEN_VEXT_V_ENV(vfrec7_v_h) 3807 GEN_VEXT_V_ENV(vfrec7_v_w) 3808 GEN_VEXT_V_ENV(vfrec7_v_d) 3809 3810 /* Vector Floating-Point MIN/MAX Instructions */ 3811 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3812 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3813 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3814 GEN_VEXT_VV_ENV(vfmin_vv_h) 3815 GEN_VEXT_VV_ENV(vfmin_vv_w) 3816 GEN_VEXT_VV_ENV(vfmin_vv_d) 3817 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3818 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3819 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3820 GEN_VEXT_VF(vfmin_vf_h) 3821 GEN_VEXT_VF(vfmin_vf_w) 3822 GEN_VEXT_VF(vfmin_vf_d) 3823 3824 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3825 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3826 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3827 GEN_VEXT_VV_ENV(vfmax_vv_h) 3828 GEN_VEXT_VV_ENV(vfmax_vv_w) 3829 GEN_VEXT_VV_ENV(vfmax_vv_d) 3830 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3831 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3832 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3833 GEN_VEXT_VF(vfmax_vf_h) 3834 GEN_VEXT_VF(vfmax_vf_w) 3835 GEN_VEXT_VF(vfmax_vf_d) 3836 3837 /* Vector Floating-Point Sign-Injection Instructions */ 3838 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3839 { 3840 return deposit64(b, 0, 15, a); 3841 } 3842 3843 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3844 { 3845 return deposit64(b, 0, 31, a); 3846 } 3847 3848 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3849 { 3850 return deposit64(b, 0, 63, a); 3851 } 3852 3853 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3854 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3855 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3856 GEN_VEXT_VV_ENV(vfsgnj_vv_h) 3857 GEN_VEXT_VV_ENV(vfsgnj_vv_w) 3858 GEN_VEXT_VV_ENV(vfsgnj_vv_d) 3859 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3860 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3861 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3862 GEN_VEXT_VF(vfsgnj_vf_h) 3863 GEN_VEXT_VF(vfsgnj_vf_w) 3864 GEN_VEXT_VF(vfsgnj_vf_d) 3865 3866 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3867 { 3868 return deposit64(~b, 0, 15, a); 3869 } 3870 3871 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3872 { 3873 return deposit64(~b, 0, 31, a); 3874 } 3875 3876 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3877 { 3878 return deposit64(~b, 0, 63, a); 3879 } 3880 3881 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3882 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3883 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3884 GEN_VEXT_VV_ENV(vfsgnjn_vv_h) 3885 GEN_VEXT_VV_ENV(vfsgnjn_vv_w) 3886 GEN_VEXT_VV_ENV(vfsgnjn_vv_d) 3887 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3888 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3889 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3890 GEN_VEXT_VF(vfsgnjn_vf_h) 3891 GEN_VEXT_VF(vfsgnjn_vf_w) 3892 GEN_VEXT_VF(vfsgnjn_vf_d) 3893 3894 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3895 { 3896 return deposit64(b ^ a, 0, 15, a); 3897 } 3898 3899 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3900 { 3901 return deposit64(b ^ a, 0, 31, a); 3902 } 3903 3904 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3905 { 3906 return deposit64(b ^ a, 0, 63, a); 3907 } 3908 3909 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3910 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3911 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3912 GEN_VEXT_VV_ENV(vfsgnjx_vv_h) 3913 GEN_VEXT_VV_ENV(vfsgnjx_vv_w) 3914 GEN_VEXT_VV_ENV(vfsgnjx_vv_d) 3915 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3916 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3917 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3918 GEN_VEXT_VF(vfsgnjx_vf_h) 3919 GEN_VEXT_VF(vfsgnjx_vf_w) 3920 GEN_VEXT_VF(vfsgnjx_vf_d) 3921 3922 /* Vector Floating-Point Compare Instructions */ 3923 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3924 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3925 CPURISCVState *env, uint32_t desc) \ 3926 { \ 3927 uint32_t vm = vext_vm(desc); \ 3928 uint32_t vl = env->vl; \ 3929 uint32_t i; \ 3930 \ 3931 for (i = env->vstart; i < vl; i++) { \ 3932 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3933 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3934 if (!vm && !vext_elem_mask(v0, i)) { \ 3935 continue; \ 3936 } \ 3937 vext_set_elem_mask(vd, i, \ 3938 DO_OP(s2, s1, &env->fp_status)); \ 3939 } \ 3940 env->vstart = 0; \ 3941 } 3942 3943 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3944 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3945 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3946 3947 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3948 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3949 CPURISCVState *env, uint32_t desc) \ 3950 { \ 3951 uint32_t vm = vext_vm(desc); \ 3952 uint32_t vl = env->vl; \ 3953 uint32_t i; \ 3954 \ 3955 for (i = env->vstart; i < vl; i++) { \ 3956 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3957 if (!vm && !vext_elem_mask(v0, i)) { \ 3958 continue; \ 3959 } \ 3960 vext_set_elem_mask(vd, i, \ 3961 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3962 } \ 3963 env->vstart = 0; \ 3964 } 3965 3966 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3967 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3968 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3969 3970 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3971 { 3972 FloatRelation compare = float16_compare_quiet(a, b, s); 3973 return compare != float_relation_equal; 3974 } 3975 3976 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3977 { 3978 FloatRelation compare = float32_compare_quiet(a, b, s); 3979 return compare != float_relation_equal; 3980 } 3981 3982 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3983 { 3984 FloatRelation compare = float64_compare_quiet(a, b, s); 3985 return compare != float_relation_equal; 3986 } 3987 3988 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3989 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3990 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3991 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3992 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3993 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3994 3995 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3996 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3997 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3998 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3999 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4000 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4001 4002 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4003 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4004 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4005 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4006 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4007 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4008 4009 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4010 { 4011 FloatRelation compare = float16_compare(a, b, s); 4012 return compare == float_relation_greater; 4013 } 4014 4015 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4016 { 4017 FloatRelation compare = float32_compare(a, b, s); 4018 return compare == float_relation_greater; 4019 } 4020 4021 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4022 { 4023 FloatRelation compare = float64_compare(a, b, s); 4024 return compare == float_relation_greater; 4025 } 4026 4027 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4028 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4029 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4030 4031 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4032 { 4033 FloatRelation compare = float16_compare(a, b, s); 4034 return compare == float_relation_greater || 4035 compare == float_relation_equal; 4036 } 4037 4038 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4039 { 4040 FloatRelation compare = float32_compare(a, b, s); 4041 return compare == float_relation_greater || 4042 compare == float_relation_equal; 4043 } 4044 4045 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4046 { 4047 FloatRelation compare = float64_compare(a, b, s); 4048 return compare == float_relation_greater || 4049 compare == float_relation_equal; 4050 } 4051 4052 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4053 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4054 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4055 4056 /* Vector Floating-Point Classify Instruction */ 4057 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4058 static void do_##NAME(void *vd, void *vs2, int i) \ 4059 { \ 4060 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4061 *((TD *)vd + HD(i)) = OP(s2); \ 4062 } 4063 4064 #define GEN_VEXT_V(NAME) \ 4065 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4066 CPURISCVState *env, uint32_t desc) \ 4067 { \ 4068 uint32_t vm = vext_vm(desc); \ 4069 uint32_t vl = env->vl; \ 4070 uint32_t i; \ 4071 \ 4072 for (i = env->vstart; i < vl; i++) { \ 4073 if (!vm && !vext_elem_mask(v0, i)) { \ 4074 continue; \ 4075 } \ 4076 do_##NAME(vd, vs2, i); \ 4077 } \ 4078 env->vstart = 0; \ 4079 } 4080 4081 target_ulong fclass_h(uint64_t frs1) 4082 { 4083 float16 f = frs1; 4084 bool sign = float16_is_neg(f); 4085 4086 if (float16_is_infinity(f)) { 4087 return sign ? 1 << 0 : 1 << 7; 4088 } else if (float16_is_zero(f)) { 4089 return sign ? 1 << 3 : 1 << 4; 4090 } else if (float16_is_zero_or_denormal(f)) { 4091 return sign ? 1 << 2 : 1 << 5; 4092 } else if (float16_is_any_nan(f)) { 4093 float_status s = { }; /* for snan_bit_is_one */ 4094 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4095 } else { 4096 return sign ? 1 << 1 : 1 << 6; 4097 } 4098 } 4099 4100 target_ulong fclass_s(uint64_t frs1) 4101 { 4102 float32 f = frs1; 4103 bool sign = float32_is_neg(f); 4104 4105 if (float32_is_infinity(f)) { 4106 return sign ? 1 << 0 : 1 << 7; 4107 } else if (float32_is_zero(f)) { 4108 return sign ? 1 << 3 : 1 << 4; 4109 } else if (float32_is_zero_or_denormal(f)) { 4110 return sign ? 1 << 2 : 1 << 5; 4111 } else if (float32_is_any_nan(f)) { 4112 float_status s = { }; /* for snan_bit_is_one */ 4113 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4114 } else { 4115 return sign ? 1 << 1 : 1 << 6; 4116 } 4117 } 4118 4119 target_ulong fclass_d(uint64_t frs1) 4120 { 4121 float64 f = frs1; 4122 bool sign = float64_is_neg(f); 4123 4124 if (float64_is_infinity(f)) { 4125 return sign ? 1 << 0 : 1 << 7; 4126 } else if (float64_is_zero(f)) { 4127 return sign ? 1 << 3 : 1 << 4; 4128 } else if (float64_is_zero_or_denormal(f)) { 4129 return sign ? 1 << 2 : 1 << 5; 4130 } else if (float64_is_any_nan(f)) { 4131 float_status s = { }; /* for snan_bit_is_one */ 4132 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4133 } else { 4134 return sign ? 1 << 1 : 1 << 6; 4135 } 4136 } 4137 4138 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4139 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4140 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4141 GEN_VEXT_V(vfclass_v_h) 4142 GEN_VEXT_V(vfclass_v_w) 4143 GEN_VEXT_V(vfclass_v_d) 4144 4145 /* Vector Floating-Point Merge Instruction */ 4146 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4147 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4148 CPURISCVState *env, uint32_t desc) \ 4149 { \ 4150 uint32_t vm = vext_vm(desc); \ 4151 uint32_t vl = env->vl; \ 4152 uint32_t i; \ 4153 \ 4154 for (i = env->vstart; i < vl; i++) { \ 4155 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4156 *((ETYPE *)vd + H(i)) \ 4157 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4158 } \ 4159 env->vstart = 0; \ 4160 } 4161 4162 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4163 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4164 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4165 4166 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4167 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4168 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4169 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4170 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4171 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) 4172 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) 4173 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) 4174 4175 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4176 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4177 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4178 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4179 GEN_VEXT_V_ENV(vfcvt_x_f_v_h) 4180 GEN_VEXT_V_ENV(vfcvt_x_f_v_w) 4181 GEN_VEXT_V_ENV(vfcvt_x_f_v_d) 4182 4183 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4184 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4185 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4186 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4187 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) 4188 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) 4189 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) 4190 4191 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4192 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4193 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4194 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4195 GEN_VEXT_V_ENV(vfcvt_f_x_v_h) 4196 GEN_VEXT_V_ENV(vfcvt_f_x_v_w) 4197 GEN_VEXT_V_ENV(vfcvt_f_x_v_d) 4198 4199 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4200 /* (TD, T2, TX2) */ 4201 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4202 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4203 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4204 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4205 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4206 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4207 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) 4208 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) 4209 4210 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4211 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4212 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4213 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) 4214 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) 4215 4216 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4217 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4218 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4219 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4220 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) 4221 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) 4222 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) 4223 4224 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4225 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4226 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4227 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4228 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) 4229 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) 4230 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) 4231 4232 /* 4233 * vfwcvt.f.f.v vd, vs2, vm 4234 * Convert single-width float to double-width float. 4235 */ 4236 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4237 { 4238 return float16_to_float32(a, true, s); 4239 } 4240 4241 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4242 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4243 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) 4244 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) 4245 4246 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4247 /* (TD, T2, TX2) */ 4248 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4249 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4250 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4251 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4252 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4253 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4254 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4255 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) 4256 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) 4257 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) 4258 4259 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4260 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4261 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4262 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4263 GEN_VEXT_V_ENV(vfncvt_x_f_w_b) 4264 GEN_VEXT_V_ENV(vfncvt_x_f_w_h) 4265 GEN_VEXT_V_ENV(vfncvt_x_f_w_w) 4266 4267 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4268 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4269 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4270 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) 4271 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) 4272 4273 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4274 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4275 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4276 GEN_VEXT_V_ENV(vfncvt_f_x_w_h) 4277 GEN_VEXT_V_ENV(vfncvt_f_x_w_w) 4278 4279 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4280 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4281 { 4282 return float32_to_float16(a, true, s); 4283 } 4284 4285 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4286 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4287 GEN_VEXT_V_ENV(vfncvt_f_f_w_h) 4288 GEN_VEXT_V_ENV(vfncvt_f_f_w_w) 4289 4290 /* 4291 *** Vector Reduction Operations 4292 */ 4293 /* Vector Single-Width Integer Reduction Instructions */ 4294 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4295 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4296 void *vs2, CPURISCVState *env, uint32_t desc) \ 4297 { \ 4298 uint32_t vm = vext_vm(desc); \ 4299 uint32_t vl = env->vl; \ 4300 uint32_t i; \ 4301 TD s1 = *((TD *)vs1 + HD(0)); \ 4302 \ 4303 for (i = env->vstart; i < vl; i++) { \ 4304 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4305 if (!vm && !vext_elem_mask(v0, i)) { \ 4306 continue; \ 4307 } \ 4308 s1 = OP(s1, (TD)s2); \ 4309 } \ 4310 *((TD *)vd + HD(0)) = s1; \ 4311 env->vstart = 0; \ 4312 } 4313 4314 /* vd[0] = sum(vs1[0], vs2[*]) */ 4315 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4316 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4317 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4318 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4319 4320 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4321 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4322 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4323 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4324 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4325 4326 /* vd[0] = max(vs1[0], vs2[*]) */ 4327 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4328 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4329 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4330 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4331 4332 /* vd[0] = minu(vs1[0], vs2[*]) */ 4333 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4334 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4335 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4336 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4337 4338 /* vd[0] = min(vs1[0], vs2[*]) */ 4339 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4340 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4341 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4342 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4343 4344 /* vd[0] = and(vs1[0], vs2[*]) */ 4345 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4346 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4347 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4348 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4349 4350 /* vd[0] = or(vs1[0], vs2[*]) */ 4351 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4352 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4353 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4354 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4355 4356 /* vd[0] = xor(vs1[0], vs2[*]) */ 4357 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4358 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4359 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4360 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4361 4362 /* Vector Widening Integer Reduction Instructions */ 4363 /* signed sum reduction into double-width accumulator */ 4364 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4365 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4366 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4367 4368 /* Unsigned sum reduction into double-width accumulator */ 4369 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4370 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4371 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4372 4373 /* Vector Single-Width Floating-Point Reduction Instructions */ 4374 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4375 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4376 void *vs2, CPURISCVState *env, \ 4377 uint32_t desc) \ 4378 { \ 4379 uint32_t vm = vext_vm(desc); \ 4380 uint32_t vl = env->vl; \ 4381 uint32_t i; \ 4382 TD s1 = *((TD *)vs1 + HD(0)); \ 4383 \ 4384 for (i = env->vstart; i < vl; i++) { \ 4385 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4386 if (!vm && !vext_elem_mask(v0, i)) { \ 4387 continue; \ 4388 } \ 4389 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4390 } \ 4391 *((TD *)vd + HD(0)) = s1; \ 4392 env->vstart = 0; \ 4393 } 4394 4395 /* Unordered sum */ 4396 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4397 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4398 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4399 4400 /* Maximum value */ 4401 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4402 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4403 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4404 4405 /* Minimum value */ 4406 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4407 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4408 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4409 4410 /* Vector Widening Floating-Point Reduction Instructions */ 4411 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4412 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4413 void *vs2, CPURISCVState *env, uint32_t desc) 4414 { 4415 uint32_t vm = vext_vm(desc); 4416 uint32_t vl = env->vl; 4417 uint32_t i; 4418 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4419 4420 for (i = env->vstart; i < vl; i++) { 4421 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4422 if (!vm && !vext_elem_mask(v0, i)) { 4423 continue; 4424 } 4425 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4426 &env->fp_status); 4427 } 4428 *((uint32_t *)vd + H4(0)) = s1; 4429 env->vstart = 0; 4430 } 4431 4432 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4433 void *vs2, CPURISCVState *env, uint32_t desc) 4434 { 4435 uint32_t vm = vext_vm(desc); 4436 uint32_t vl = env->vl; 4437 uint32_t i; 4438 uint64_t s1 = *((uint64_t *)vs1); 4439 4440 for (i = env->vstart; i < vl; i++) { 4441 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4442 if (!vm && !vext_elem_mask(v0, i)) { 4443 continue; 4444 } 4445 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4446 &env->fp_status); 4447 } 4448 *((uint64_t *)vd) = s1; 4449 env->vstart = 0; 4450 } 4451 4452 /* 4453 *** Vector Mask Operations 4454 */ 4455 /* Vector Mask-Register Logical Instructions */ 4456 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4457 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4458 void *vs2, CPURISCVState *env, \ 4459 uint32_t desc) \ 4460 { \ 4461 uint32_t vl = env->vl; \ 4462 uint32_t i; \ 4463 int a, b; \ 4464 \ 4465 for (i = env->vstart; i < vl; i++) { \ 4466 a = vext_elem_mask(vs1, i); \ 4467 b = vext_elem_mask(vs2, i); \ 4468 vext_set_elem_mask(vd, i, OP(b, a)); \ 4469 } \ 4470 env->vstart = 0; \ 4471 } 4472 4473 #define DO_NAND(N, M) (!(N & M)) 4474 #define DO_ANDNOT(N, M) (N & !M) 4475 #define DO_NOR(N, M) (!(N | M)) 4476 #define DO_ORNOT(N, M) (N | !M) 4477 #define DO_XNOR(N, M) (!(N ^ M)) 4478 4479 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4480 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4481 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4482 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4483 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4484 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4485 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4486 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4487 4488 /* Vector count population in mask vcpop */ 4489 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4490 uint32_t desc) 4491 { 4492 target_ulong cnt = 0; 4493 uint32_t vm = vext_vm(desc); 4494 uint32_t vl = env->vl; 4495 int i; 4496 4497 for (i = env->vstart; i < vl; i++) { 4498 if (vm || vext_elem_mask(v0, i)) { 4499 if (vext_elem_mask(vs2, i)) { 4500 cnt++; 4501 } 4502 } 4503 } 4504 env->vstart = 0; 4505 return cnt; 4506 } 4507 4508 /* vfirst find-first-set mask bit*/ 4509 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4510 uint32_t desc) 4511 { 4512 uint32_t vm = vext_vm(desc); 4513 uint32_t vl = env->vl; 4514 int i; 4515 4516 for (i = env->vstart; i < vl; i++) { 4517 if (vm || vext_elem_mask(v0, i)) { 4518 if (vext_elem_mask(vs2, i)) { 4519 return i; 4520 } 4521 } 4522 } 4523 env->vstart = 0; 4524 return -1LL; 4525 } 4526 4527 enum set_mask_type { 4528 ONLY_FIRST = 1, 4529 INCLUDE_FIRST, 4530 BEFORE_FIRST, 4531 }; 4532 4533 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4534 uint32_t desc, enum set_mask_type type) 4535 { 4536 uint32_t vm = vext_vm(desc); 4537 uint32_t vl = env->vl; 4538 int i; 4539 bool first_mask_bit = false; 4540 4541 for (i = env->vstart; i < vl; i++) { 4542 if (!vm && !vext_elem_mask(v0, i)) { 4543 continue; 4544 } 4545 /* write a zero to all following active elements */ 4546 if (first_mask_bit) { 4547 vext_set_elem_mask(vd, i, 0); 4548 continue; 4549 } 4550 if (vext_elem_mask(vs2, i)) { 4551 first_mask_bit = true; 4552 if (type == BEFORE_FIRST) { 4553 vext_set_elem_mask(vd, i, 0); 4554 } else { 4555 vext_set_elem_mask(vd, i, 1); 4556 } 4557 } else { 4558 if (type == ONLY_FIRST) { 4559 vext_set_elem_mask(vd, i, 0); 4560 } else { 4561 vext_set_elem_mask(vd, i, 1); 4562 } 4563 } 4564 } 4565 env->vstart = 0; 4566 } 4567 4568 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4569 uint32_t desc) 4570 { 4571 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4572 } 4573 4574 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4575 uint32_t desc) 4576 { 4577 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4578 } 4579 4580 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4581 uint32_t desc) 4582 { 4583 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4584 } 4585 4586 /* Vector Iota Instruction */ 4587 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4588 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4589 uint32_t desc) \ 4590 { \ 4591 uint32_t vm = vext_vm(desc); \ 4592 uint32_t vl = env->vl; \ 4593 uint32_t sum = 0; \ 4594 int i; \ 4595 \ 4596 for (i = env->vstart; i < vl; i++) { \ 4597 if (!vm && !vext_elem_mask(v0, i)) { \ 4598 continue; \ 4599 } \ 4600 *((ETYPE *)vd + H(i)) = sum; \ 4601 if (vext_elem_mask(vs2, i)) { \ 4602 sum++; \ 4603 } \ 4604 } \ 4605 env->vstart = 0; \ 4606 } 4607 4608 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4609 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4610 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4611 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4612 4613 /* Vector Element Index Instruction */ 4614 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4615 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4616 { \ 4617 uint32_t vm = vext_vm(desc); \ 4618 uint32_t vl = env->vl; \ 4619 int i; \ 4620 \ 4621 for (i = env->vstart; i < vl; i++) { \ 4622 if (!vm && !vext_elem_mask(v0, i)) { \ 4623 continue; \ 4624 } \ 4625 *((ETYPE *)vd + H(i)) = i; \ 4626 } \ 4627 env->vstart = 0; \ 4628 } 4629 4630 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4631 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4632 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4633 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4634 4635 /* 4636 *** Vector Permutation Instructions 4637 */ 4638 4639 /* Vector Slide Instructions */ 4640 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4641 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4642 CPURISCVState *env, uint32_t desc) \ 4643 { \ 4644 uint32_t vm = vext_vm(desc); \ 4645 uint32_t vl = env->vl; \ 4646 target_ulong offset = s1, i_min, i; \ 4647 \ 4648 i_min = MAX(env->vstart, offset); \ 4649 for (i = i_min; i < vl; i++) { \ 4650 if (!vm && !vext_elem_mask(v0, i)) { \ 4651 continue; \ 4652 } \ 4653 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4654 } \ 4655 } 4656 4657 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4658 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4659 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4660 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4661 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4662 4663 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4664 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4665 CPURISCVState *env, uint32_t desc) \ 4666 { \ 4667 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4668 uint32_t vm = vext_vm(desc); \ 4669 uint32_t vl = env->vl; \ 4670 target_ulong i_max, i; \ 4671 \ 4672 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4673 for (i = env->vstart; i < i_max; ++i) { \ 4674 if (vm || vext_elem_mask(v0, i)) { \ 4675 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4676 } \ 4677 } \ 4678 \ 4679 for (i = i_max; i < vl; ++i) { \ 4680 if (vm || vext_elem_mask(v0, i)) { \ 4681 *((ETYPE *)vd + H(i)) = 0; \ 4682 } \ 4683 } \ 4684 \ 4685 env->vstart = 0; \ 4686 } 4687 4688 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4689 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4690 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4691 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4692 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4693 4694 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4695 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4696 CPURISCVState *env, uint32_t desc) \ 4697 { \ 4698 typedef uint##ESZ##_t ETYPE; \ 4699 uint32_t vm = vext_vm(desc); \ 4700 uint32_t vl = env->vl; \ 4701 uint32_t i; \ 4702 \ 4703 for (i = env->vstart; i < vl; i++) { \ 4704 if (!vm && !vext_elem_mask(v0, i)) { \ 4705 continue; \ 4706 } \ 4707 if (i == 0) { \ 4708 *((ETYPE *)vd + H(i)) = s1; \ 4709 } else { \ 4710 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4711 } \ 4712 } \ 4713 env->vstart = 0; \ 4714 } 4715 4716 GEN_VEXT_VSLIE1UP(8, H1) 4717 GEN_VEXT_VSLIE1UP(16, H2) 4718 GEN_VEXT_VSLIE1UP(32, H4) 4719 GEN_VEXT_VSLIE1UP(64, H8) 4720 4721 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4722 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4723 CPURISCVState *env, uint32_t desc) \ 4724 { \ 4725 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4726 } 4727 4728 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4729 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4730 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4731 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4732 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4733 4734 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4735 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4736 CPURISCVState *env, uint32_t desc) \ 4737 { \ 4738 typedef uint##ESZ##_t ETYPE; \ 4739 uint32_t vm = vext_vm(desc); \ 4740 uint32_t vl = env->vl; \ 4741 uint32_t i; \ 4742 \ 4743 for (i = env->vstart; i < vl; i++) { \ 4744 if (!vm && !vext_elem_mask(v0, i)) { \ 4745 continue; \ 4746 } \ 4747 if (i == vl - 1) { \ 4748 *((ETYPE *)vd + H(i)) = s1; \ 4749 } else { \ 4750 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4751 } \ 4752 } \ 4753 env->vstart = 0; \ 4754 } 4755 4756 GEN_VEXT_VSLIDE1DOWN(8, H1) 4757 GEN_VEXT_VSLIDE1DOWN(16, H2) 4758 GEN_VEXT_VSLIDE1DOWN(32, H4) 4759 GEN_VEXT_VSLIDE1DOWN(64, H8) 4760 4761 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4762 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4763 CPURISCVState *env, uint32_t desc) \ 4764 { \ 4765 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4766 } 4767 4768 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4769 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4770 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4771 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4772 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4773 4774 /* Vector Floating-Point Slide Instructions */ 4775 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4776 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4777 CPURISCVState *env, uint32_t desc) \ 4778 { \ 4779 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4780 } 4781 4782 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4783 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4784 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4785 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4786 4787 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4788 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4789 CPURISCVState *env, uint32_t desc) \ 4790 { \ 4791 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4792 } 4793 4794 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4795 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4796 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4797 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4798 4799 /* Vector Register Gather Instruction */ 4800 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4801 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4802 CPURISCVState *env, uint32_t desc) \ 4803 { \ 4804 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4805 uint32_t vm = vext_vm(desc); \ 4806 uint32_t vl = env->vl; \ 4807 uint64_t index; \ 4808 uint32_t i; \ 4809 \ 4810 for (i = env->vstart; i < vl; i++) { \ 4811 if (!vm && !vext_elem_mask(v0, i)) { \ 4812 continue; \ 4813 } \ 4814 index = *((TS1 *)vs1 + HS1(i)); \ 4815 if (index >= vlmax) { \ 4816 *((TS2 *)vd + HS2(i)) = 0; \ 4817 } else { \ 4818 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4819 } \ 4820 } \ 4821 env->vstart = 0; \ 4822 } 4823 4824 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4825 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4826 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4827 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4828 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4829 4830 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4831 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4832 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4833 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4834 4835 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4836 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4837 CPURISCVState *env, uint32_t desc) \ 4838 { \ 4839 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4840 uint32_t vm = vext_vm(desc); \ 4841 uint32_t vl = env->vl; \ 4842 uint64_t index = s1; \ 4843 uint32_t i; \ 4844 \ 4845 for (i = env->vstart; i < vl; i++) { \ 4846 if (!vm && !vext_elem_mask(v0, i)) { \ 4847 continue; \ 4848 } \ 4849 if (index >= vlmax) { \ 4850 *((ETYPE *)vd + H(i)) = 0; \ 4851 } else { \ 4852 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4853 } \ 4854 } \ 4855 env->vstart = 0; \ 4856 } 4857 4858 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4859 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4860 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4861 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4862 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4863 4864 /* Vector Compress Instruction */ 4865 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4866 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4867 CPURISCVState *env, uint32_t desc) \ 4868 { \ 4869 uint32_t vl = env->vl; \ 4870 uint32_t num = 0, i; \ 4871 \ 4872 for (i = env->vstart; i < vl; i++) { \ 4873 if (!vext_elem_mask(vs1, i)) { \ 4874 continue; \ 4875 } \ 4876 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4877 num++; \ 4878 } \ 4879 env->vstart = 0; \ 4880 } 4881 4882 /* Compress into vd elements of vs2 where vs1 is enabled */ 4883 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4884 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4885 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4886 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4887 4888 /* Vector Whole Register Move */ 4889 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 4890 { 4891 /* EEW = SEW */ 4892 uint32_t maxsz = simd_maxsz(desc); 4893 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 4894 uint32_t startb = env->vstart * sewb; 4895 uint32_t i = startb; 4896 4897 memcpy((uint8_t *)vd + H1(i), 4898 (uint8_t *)vs2 + H1(i), 4899 maxsz - startb); 4900 4901 env->vstart = 0; 4902 } 4903 4904 /* Vector Integer Extension */ 4905 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4906 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4907 CPURISCVState *env, uint32_t desc) \ 4908 { \ 4909 uint32_t vl = env->vl; \ 4910 uint32_t vm = vext_vm(desc); \ 4911 uint32_t i; \ 4912 \ 4913 for (i = env->vstart; i < vl; i++) { \ 4914 if (!vm && !vext_elem_mask(v0, i)) { \ 4915 continue; \ 4916 } \ 4917 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4918 } \ 4919 env->vstart = 0; \ 4920 } 4921 4922 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4923 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4924 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4925 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4926 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4927 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4928 4929 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4930 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4931 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4932 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4933 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4934 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4935