1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 return vl; 75 } 76 77 /* 78 * Note that vector data is stored in host-endian 64-bit chunks, 79 * so addressing units smaller than that needs a host-endian fixup. 80 */ 81 #ifdef HOST_WORDS_BIGENDIAN 82 #define H1(x) ((x) ^ 7) 83 #define H1_2(x) ((x) ^ 6) 84 #define H1_4(x) ((x) ^ 4) 85 #define H2(x) ((x) ^ 3) 86 #define H4(x) ((x) ^ 1) 87 #define H8(x) ((x)) 88 #else 89 #define H1(x) (x) 90 #define H1_2(x) (x) 91 #define H1_4(x) (x) 92 #define H2(x) (x) 93 #define H4(x) (x) 94 #define H8(x) (x) 95 #endif 96 97 static inline uint32_t vext_nf(uint32_t desc) 98 { 99 return FIELD_EX32(simd_data(desc), VDATA, NF); 100 } 101 102 static inline uint32_t vext_vm(uint32_t desc) 103 { 104 return FIELD_EX32(simd_data(desc), VDATA, VM); 105 } 106 107 /* 108 * Encode LMUL to lmul as following: 109 * LMUL vlmul lmul 110 * 1 000 0 111 * 2 001 1 112 * 4 010 2 113 * 8 011 3 114 * - 100 - 115 * 1/8 101 -3 116 * 1/4 110 -2 117 * 1/2 111 -1 118 */ 119 static inline int32_t vext_lmul(uint32_t desc) 120 { 121 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 122 } 123 124 /* 125 * Get the maximum number of elements can be operated. 126 * 127 * esz: log2 of element size in bytes. 128 */ 129 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 130 { 131 /* 132 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 133 * so vlen in bytes (vlenb) is encoded as maxsz. 134 */ 135 uint32_t vlenb = simd_maxsz(desc); 136 137 /* Return VLMAX */ 138 int scale = vext_lmul(desc) - esz; 139 return scale < 0 ? vlenb >> -scale : vlenb << scale; 140 } 141 142 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 143 { 144 return (addr & env->cur_pmmask) | env->cur_pmbase; 145 } 146 147 /* 148 * This function checks watchpoint before real load operation. 149 * 150 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 151 * In user mode, there is no watchpoint support now. 152 * 153 * It will trigger an exception if there is no mapping in TLB 154 * and page table walk can't fill the TLB entry. Then the guest 155 * software can return here after process the exception or never return. 156 */ 157 static void probe_pages(CPURISCVState *env, target_ulong addr, 158 target_ulong len, uintptr_t ra, 159 MMUAccessType access_type) 160 { 161 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 162 target_ulong curlen = MIN(pagelen, len); 163 164 probe_access(env, adjust_addr(env, addr), curlen, access_type, 165 cpu_mmu_index(env, false), ra); 166 if (len > curlen) { 167 addr += curlen; 168 curlen = len - curlen; 169 probe_access(env, adjust_addr(env, addr), curlen, access_type, 170 cpu_mmu_index(env, false), ra); 171 } 172 } 173 174 static inline void vext_set_elem_mask(void *v0, int index, 175 uint8_t value) 176 { 177 int idx = index / 64; 178 int pos = index % 64; 179 uint64_t old = ((uint64_t *)v0)[idx]; 180 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 181 } 182 183 /* 184 * Earlier designs (pre-0.9) had a varying number of bits 185 * per mask value (MLEN). In the 0.9 design, MLEN=1. 186 * (Section 4.5) 187 */ 188 static inline int vext_elem_mask(void *v0, int index) 189 { 190 int idx = index / 64; 191 int pos = index % 64; 192 return (((uint64_t *)v0)[idx] >> pos) & 1; 193 } 194 195 /* elements operations for load and store */ 196 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 197 uint32_t idx, void *vd, uintptr_t retaddr); 198 199 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 200 static void NAME(CPURISCVState *env, abi_ptr addr, \ 201 uint32_t idx, void *vd, uintptr_t retaddr)\ 202 { \ 203 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 204 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 205 } \ 206 207 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 208 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 209 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 210 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 211 212 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 213 static void NAME(CPURISCVState *env, abi_ptr addr, \ 214 uint32_t idx, void *vd, uintptr_t retaddr)\ 215 { \ 216 ETYPE data = *((ETYPE *)vd + H(idx)); \ 217 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 218 } 219 220 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 221 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 222 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 223 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 224 225 /* 226 *** stride: access vector element from strided memory 227 */ 228 static void 229 vext_ldst_stride(void *vd, void *v0, target_ulong base, 230 target_ulong stride, CPURISCVState *env, 231 uint32_t desc, uint32_t vm, 232 vext_ldst_elem_fn *ldst_elem, 233 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 234 { 235 uint32_t i, k; 236 uint32_t nf = vext_nf(desc); 237 uint32_t max_elems = vext_max_elems(desc, esz); 238 239 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 240 if (!vm && !vext_elem_mask(v0, i)) { 241 continue; 242 } 243 244 k = 0; 245 while (k < nf) { 246 target_ulong addr = base + stride * i + (k << esz); 247 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 248 k++; 249 } 250 } 251 env->vstart = 0; 252 } 253 254 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 255 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 256 target_ulong stride, CPURISCVState *env, \ 257 uint32_t desc) \ 258 { \ 259 uint32_t vm = vext_vm(desc); \ 260 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 261 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 262 } 263 264 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 265 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 266 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 267 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 268 269 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 270 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 271 target_ulong stride, CPURISCVState *env, \ 272 uint32_t desc) \ 273 { \ 274 uint32_t vm = vext_vm(desc); \ 275 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 276 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 277 } 278 279 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 280 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 281 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 282 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 283 284 /* 285 *** unit-stride: access elements stored contiguously in memory 286 */ 287 288 /* unmasked unit-stride load and store operation*/ 289 static void 290 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 291 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 292 uintptr_t ra, MMUAccessType access_type) 293 { 294 uint32_t i, k; 295 uint32_t nf = vext_nf(desc); 296 uint32_t max_elems = vext_max_elems(desc, esz); 297 298 /* load bytes from guest memory */ 299 for (i = env->vstart; i < evl; i++, env->vstart++) { 300 k = 0; 301 while (k < nf) { 302 target_ulong addr = base + ((i * nf + k) << esz); 303 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 304 k++; 305 } 306 } 307 env->vstart = 0; 308 } 309 310 /* 311 * masked unit-stride load and store operation will be a special case of stride, 312 * stride = NF * sizeof (MTYPE) 313 */ 314 315 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 316 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 317 CPURISCVState *env, uint32_t desc) \ 318 { \ 319 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 320 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 321 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 322 } \ 323 \ 324 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 325 CPURISCVState *env, uint32_t desc) \ 326 { \ 327 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 328 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ 329 } 330 331 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 332 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 333 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 334 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 335 336 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 337 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 338 CPURISCVState *env, uint32_t desc) \ 339 { \ 340 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 341 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 342 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 343 } \ 344 \ 345 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 346 CPURISCVState *env, uint32_t desc) \ 347 { \ 348 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 349 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ 350 } 351 352 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 353 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 354 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 355 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 356 357 /* 358 *** unit stride mask load and store, EEW = 1 359 */ 360 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 361 CPURISCVState *env, uint32_t desc) 362 { 363 /* evl = ceil(vl/8) */ 364 uint8_t evl = (env->vl + 7) >> 3; 365 vext_ldst_us(vd, base, env, desc, lde_b, 366 0, evl, GETPC(), MMU_DATA_LOAD); 367 } 368 369 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 370 CPURISCVState *env, uint32_t desc) 371 { 372 /* evl = ceil(vl/8) */ 373 uint8_t evl = (env->vl + 7) >> 3; 374 vext_ldst_us(vd, base, env, desc, ste_b, 375 0, evl, GETPC(), MMU_DATA_STORE); 376 } 377 378 /* 379 *** index: access vector element from indexed memory 380 */ 381 typedef target_ulong vext_get_index_addr(target_ulong base, 382 uint32_t idx, void *vs2); 383 384 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 385 static target_ulong NAME(target_ulong base, \ 386 uint32_t idx, void *vs2) \ 387 { \ 388 return (base + *((ETYPE *)vs2 + H(idx))); \ 389 } 390 391 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 392 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 393 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 394 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 395 396 static inline void 397 vext_ldst_index(void *vd, void *v0, target_ulong base, 398 void *vs2, CPURISCVState *env, uint32_t desc, 399 vext_get_index_addr get_index_addr, 400 vext_ldst_elem_fn *ldst_elem, 401 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 402 { 403 uint32_t i, k; 404 uint32_t nf = vext_nf(desc); 405 uint32_t vm = vext_vm(desc); 406 uint32_t max_elems = vext_max_elems(desc, esz); 407 408 /* load bytes from guest memory */ 409 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 410 if (!vm && !vext_elem_mask(v0, i)) { 411 continue; 412 } 413 414 k = 0; 415 while (k < nf) { 416 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 417 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 418 k++; 419 } 420 } 421 env->vstart = 0; 422 } 423 424 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 425 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 426 void *vs2, CPURISCVState *env, uint32_t desc) \ 427 { \ 428 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 429 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 430 } 431 432 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 433 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 434 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 435 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 436 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 437 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 438 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 439 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 440 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 441 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 442 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 443 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 444 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 445 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 446 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 447 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 448 449 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 450 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 451 void *vs2, CPURISCVState *env, uint32_t desc) \ 452 { \ 453 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 454 STORE_FN, ctzl(sizeof(ETYPE)), \ 455 GETPC(), MMU_DATA_STORE); \ 456 } 457 458 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 459 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 460 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 461 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 462 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 463 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 464 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 465 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 466 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 467 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 468 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 469 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 470 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 471 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 472 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 473 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 474 475 /* 476 *** unit-stride fault-only-fisrt load instructions 477 */ 478 static inline void 479 vext_ldff(void *vd, void *v0, target_ulong base, 480 CPURISCVState *env, uint32_t desc, 481 vext_ldst_elem_fn *ldst_elem, 482 uint32_t esz, uintptr_t ra) 483 { 484 void *host; 485 uint32_t i, k, vl = 0; 486 uint32_t nf = vext_nf(desc); 487 uint32_t vm = vext_vm(desc); 488 uint32_t max_elems = vext_max_elems(desc, esz); 489 target_ulong addr, offset, remain; 490 491 /* probe every access*/ 492 for (i = env->vstart; i < env->vl; i++) { 493 if (!vm && !vext_elem_mask(v0, i)) { 494 continue; 495 } 496 addr = adjust_addr(env, base + i * (nf << esz)); 497 if (i == 0) { 498 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 499 } else { 500 /* if it triggers an exception, no need to check watchpoint */ 501 remain = nf << esz; 502 while (remain > 0) { 503 offset = -(addr | TARGET_PAGE_MASK); 504 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 505 cpu_mmu_index(env, false)); 506 if (host) { 507 #ifdef CONFIG_USER_ONLY 508 if (page_check_range(addr, offset, PAGE_READ) < 0) { 509 vl = i; 510 goto ProbeSuccess; 511 } 512 #else 513 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 514 #endif 515 } else { 516 vl = i; 517 goto ProbeSuccess; 518 } 519 if (remain <= offset) { 520 break; 521 } 522 remain -= offset; 523 addr = adjust_addr(env, addr + offset); 524 } 525 } 526 } 527 ProbeSuccess: 528 /* load bytes from guest memory */ 529 if (vl != 0) { 530 env->vl = vl; 531 } 532 for (i = env->vstart; i < env->vl; i++) { 533 k = 0; 534 if (!vm && !vext_elem_mask(v0, i)) { 535 continue; 536 } 537 while (k < nf) { 538 target_ulong addr = base + ((i * nf + k) << esz); 539 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 540 k++; 541 } 542 } 543 env->vstart = 0; 544 } 545 546 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 547 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 548 CPURISCVState *env, uint32_t desc) \ 549 { \ 550 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 551 ctzl(sizeof(ETYPE)), GETPC()); \ 552 } 553 554 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 555 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 556 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 557 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 558 559 #define DO_SWAP(N, M) (M) 560 #define DO_AND(N, M) (N & M) 561 #define DO_XOR(N, M) (N ^ M) 562 #define DO_OR(N, M) (N | M) 563 #define DO_ADD(N, M) (N + M) 564 565 /* Signed min/max */ 566 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 567 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 568 569 /* Unsigned min/max */ 570 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 571 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 572 573 /* 574 *** load and store whole register instructions 575 */ 576 static void 577 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 578 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 579 MMUAccessType access_type) 580 { 581 uint32_t i, k, off, pos; 582 uint32_t nf = vext_nf(desc); 583 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 584 uint32_t max_elems = vlenb >> esz; 585 586 k = env->vstart / max_elems; 587 off = env->vstart % max_elems; 588 589 if (off) { 590 /* load/store rest of elements of current segment pointed by vstart */ 591 for (pos = off; pos < max_elems; pos++, env->vstart++) { 592 target_ulong addr = base + ((pos + k * max_elems) << esz); 593 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 594 } 595 k++; 596 } 597 598 /* load/store elements for rest of segments */ 599 for (; k < nf; k++) { 600 for (i = 0; i < max_elems; i++, env->vstart++) { 601 target_ulong addr = base + ((i + k * max_elems) << esz); 602 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 603 } 604 } 605 606 env->vstart = 0; 607 } 608 609 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 610 void HELPER(NAME)(void *vd, target_ulong base, \ 611 CPURISCVState *env, uint32_t desc) \ 612 { \ 613 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 614 ctzl(sizeof(ETYPE)), GETPC(), \ 615 MMU_DATA_LOAD); \ 616 } 617 618 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 619 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 620 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 621 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 622 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 623 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 624 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 625 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 626 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 627 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 628 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 629 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 630 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 631 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 632 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 633 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 634 635 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 636 void HELPER(NAME)(void *vd, target_ulong base, \ 637 CPURISCVState *env, uint32_t desc) \ 638 { \ 639 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 640 ctzl(sizeof(ETYPE)), GETPC(), \ 641 MMU_DATA_STORE); \ 642 } 643 644 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 645 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 646 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 647 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 648 649 /* 650 *** Vector Integer Arithmetic Instructions 651 */ 652 653 /* expand macro args before macro */ 654 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 655 656 /* (TD, T1, T2, TX1, TX2) */ 657 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 658 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 659 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 660 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 661 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 662 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 663 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 664 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 665 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 666 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 667 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 668 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 669 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 670 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 671 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 672 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 673 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 674 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 675 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 676 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 677 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 678 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 679 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 680 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 681 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 682 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 683 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 684 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 685 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 686 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 687 688 /* operation of two vector elements */ 689 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 690 691 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 692 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 693 { \ 694 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 695 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 696 *((TD *)vd + HD(i)) = OP(s2, s1); \ 697 } 698 #define DO_SUB(N, M) (N - M) 699 #define DO_RSUB(N, M) (M - N) 700 701 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 702 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 703 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 704 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 705 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 706 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 707 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 708 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 709 710 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 711 CPURISCVState *env, uint32_t desc, 712 uint32_t esz, uint32_t dsz, 713 opivv2_fn *fn) 714 { 715 uint32_t vm = vext_vm(desc); 716 uint32_t vl = env->vl; 717 uint32_t i; 718 719 for (i = env->vstart; i < vl; i++) { 720 if (!vm && !vext_elem_mask(v0, i)) { 721 continue; 722 } 723 fn(vd, vs1, vs2, i); 724 } 725 env->vstart = 0; 726 } 727 728 /* generate the helpers for OPIVV */ 729 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 730 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 731 void *vs2, CPURISCVState *env, \ 732 uint32_t desc) \ 733 { \ 734 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 735 do_##NAME); \ 736 } 737 738 GEN_VEXT_VV(vadd_vv_b, 1, 1) 739 GEN_VEXT_VV(vadd_vv_h, 2, 2) 740 GEN_VEXT_VV(vadd_vv_w, 4, 4) 741 GEN_VEXT_VV(vadd_vv_d, 8, 8) 742 GEN_VEXT_VV(vsub_vv_b, 1, 1) 743 GEN_VEXT_VV(vsub_vv_h, 2, 2) 744 GEN_VEXT_VV(vsub_vv_w, 4, 4) 745 GEN_VEXT_VV(vsub_vv_d, 8, 8) 746 747 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 748 749 /* 750 * (T1)s1 gives the real operator type. 751 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 752 */ 753 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 754 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 755 { \ 756 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 757 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 758 } 759 760 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 761 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 762 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 763 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 764 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 765 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 766 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 767 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 768 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 769 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 770 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 771 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 772 773 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 774 CPURISCVState *env, uint32_t desc, 775 uint32_t esz, uint32_t dsz, 776 opivx2_fn fn) 777 { 778 uint32_t vm = vext_vm(desc); 779 uint32_t vl = env->vl; 780 uint32_t i; 781 782 for (i = env->vstart; i < vl; i++) { 783 if (!vm && !vext_elem_mask(v0, i)) { 784 continue; 785 } 786 fn(vd, s1, vs2, i); 787 } 788 env->vstart = 0; 789 } 790 791 /* generate the helpers for OPIVX */ 792 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 793 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 794 void *vs2, CPURISCVState *env, \ 795 uint32_t desc) \ 796 { \ 797 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 798 do_##NAME); \ 799 } 800 801 GEN_VEXT_VX(vadd_vx_b, 1, 1) 802 GEN_VEXT_VX(vadd_vx_h, 2, 2) 803 GEN_VEXT_VX(vadd_vx_w, 4, 4) 804 GEN_VEXT_VX(vadd_vx_d, 8, 8) 805 GEN_VEXT_VX(vsub_vx_b, 1, 1) 806 GEN_VEXT_VX(vsub_vx_h, 2, 2) 807 GEN_VEXT_VX(vsub_vx_w, 4, 4) 808 GEN_VEXT_VX(vsub_vx_d, 8, 8) 809 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 810 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 811 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 812 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 813 814 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 815 { 816 intptr_t oprsz = simd_oprsz(desc); 817 intptr_t i; 818 819 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 820 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 821 } 822 } 823 824 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 825 { 826 intptr_t oprsz = simd_oprsz(desc); 827 intptr_t i; 828 829 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 830 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 831 } 832 } 833 834 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 835 { 836 intptr_t oprsz = simd_oprsz(desc); 837 intptr_t i; 838 839 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 840 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 841 } 842 } 843 844 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 845 { 846 intptr_t oprsz = simd_oprsz(desc); 847 intptr_t i; 848 849 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 850 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 851 } 852 } 853 854 /* Vector Widening Integer Add/Subtract */ 855 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 856 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 857 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 858 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 859 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 860 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 861 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 862 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 863 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 864 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 865 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 866 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 867 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 868 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 869 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 870 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 871 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 872 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 873 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 874 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 875 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 876 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 877 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 878 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 879 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 880 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 881 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 882 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 883 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 884 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 885 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 886 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 887 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 888 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 889 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 890 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 891 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 892 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 893 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 894 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 895 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 896 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 897 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 898 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 899 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 900 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 901 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 902 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 903 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 904 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 905 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 906 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 907 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 908 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 909 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 910 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 911 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 912 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 913 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 914 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 915 916 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 917 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 918 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 919 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 920 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 921 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 922 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 923 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 924 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 925 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 926 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 927 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 928 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 929 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 930 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 931 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 932 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 933 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 934 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 935 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 936 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 937 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 938 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 939 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 940 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 941 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 942 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 943 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 944 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 945 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 946 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 947 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 948 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 949 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 950 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 951 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 952 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 953 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 954 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 955 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 956 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 957 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 958 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 959 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 960 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 961 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 962 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 963 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 964 965 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 966 #define DO_VADC(N, M, C) (N + M + C) 967 #define DO_VSBC(N, M, C) (N - M - C) 968 969 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 970 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 971 CPURISCVState *env, uint32_t desc) \ 972 { \ 973 uint32_t vl = env->vl; \ 974 uint32_t i; \ 975 \ 976 for (i = env->vstart; i < vl; i++) { \ 977 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 978 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 979 ETYPE carry = vext_elem_mask(v0, i); \ 980 \ 981 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 982 } \ 983 env->vstart = 0; \ 984 } 985 986 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 987 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 988 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 989 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 990 991 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 992 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 993 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 994 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 995 996 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 997 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 998 CPURISCVState *env, uint32_t desc) \ 999 { \ 1000 uint32_t vl = env->vl; \ 1001 uint32_t i; \ 1002 \ 1003 for (i = env->vstart; i < vl; i++) { \ 1004 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1005 ETYPE carry = vext_elem_mask(v0, i); \ 1006 \ 1007 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1008 } \ 1009 env->vstart = 0; \ 1010 } 1011 1012 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1013 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1014 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1015 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1016 1017 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1018 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1019 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1020 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1021 1022 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1023 (__typeof(N))(N + M) < N) 1024 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1025 1026 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1027 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1028 CPURISCVState *env, uint32_t desc) \ 1029 { \ 1030 uint32_t vl = env->vl; \ 1031 uint32_t vm = vext_vm(desc); \ 1032 uint32_t i; \ 1033 \ 1034 for (i = env->vstart; i < vl; i++) { \ 1035 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1036 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1037 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1038 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1039 } \ 1040 env->vstart = 0; \ 1041 } 1042 1043 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1044 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1045 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1046 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1047 1048 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1049 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1050 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1051 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1052 1053 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1054 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1055 void *vs2, CPURISCVState *env, uint32_t desc) \ 1056 { \ 1057 uint32_t vl = env->vl; \ 1058 uint32_t vm = vext_vm(desc); \ 1059 uint32_t i; \ 1060 \ 1061 for (i = env->vstart; i < vl; i++) { \ 1062 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1063 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1064 vext_set_elem_mask(vd, i, \ 1065 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1066 } \ 1067 env->vstart = 0; \ 1068 } 1069 1070 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1072 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1073 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1074 1075 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1077 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1078 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1079 1080 /* Vector Bitwise Logical Instructions */ 1081 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1082 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1083 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1084 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1085 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1086 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1087 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1088 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1089 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1090 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1091 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1092 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1093 GEN_VEXT_VV(vand_vv_b, 1, 1) 1094 GEN_VEXT_VV(vand_vv_h, 2, 2) 1095 GEN_VEXT_VV(vand_vv_w, 4, 4) 1096 GEN_VEXT_VV(vand_vv_d, 8, 8) 1097 GEN_VEXT_VV(vor_vv_b, 1, 1) 1098 GEN_VEXT_VV(vor_vv_h, 2, 2) 1099 GEN_VEXT_VV(vor_vv_w, 4, 4) 1100 GEN_VEXT_VV(vor_vv_d, 8, 8) 1101 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1102 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1103 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1104 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1105 1106 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1107 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1108 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1109 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1110 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1111 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1112 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1113 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1114 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1115 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1116 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1117 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1118 GEN_VEXT_VX(vand_vx_b, 1, 1) 1119 GEN_VEXT_VX(vand_vx_h, 2, 2) 1120 GEN_VEXT_VX(vand_vx_w, 4, 4) 1121 GEN_VEXT_VX(vand_vx_d, 8, 8) 1122 GEN_VEXT_VX(vor_vx_b, 1, 1) 1123 GEN_VEXT_VX(vor_vx_h, 2, 2) 1124 GEN_VEXT_VX(vor_vx_w, 4, 4) 1125 GEN_VEXT_VX(vor_vx_d, 8, 8) 1126 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1127 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1128 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1129 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1130 1131 /* Vector Single-Width Bit Shift Instructions */ 1132 #define DO_SLL(N, M) (N << (M)) 1133 #define DO_SRL(N, M) (N >> (M)) 1134 1135 /* generate the helpers for shift instructions with two vector operators */ 1136 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1137 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1138 void *vs2, CPURISCVState *env, uint32_t desc) \ 1139 { \ 1140 uint32_t vm = vext_vm(desc); \ 1141 uint32_t vl = env->vl; \ 1142 uint32_t i; \ 1143 \ 1144 for (i = env->vstart; i < vl; i++) { \ 1145 if (!vm && !vext_elem_mask(v0, i)) { \ 1146 continue; \ 1147 } \ 1148 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1149 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1150 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1151 } \ 1152 env->vstart = 0; \ 1153 } 1154 1155 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1156 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1157 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1158 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1159 1160 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1161 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1162 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1163 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1164 1165 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1166 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1167 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1168 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1169 1170 /* generate the helpers for shift instructions with one vector and one scalar */ 1171 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1172 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1173 void *vs2, CPURISCVState *env, uint32_t desc) \ 1174 { \ 1175 uint32_t vm = vext_vm(desc); \ 1176 uint32_t vl = env->vl; \ 1177 uint32_t i; \ 1178 \ 1179 for (i = env->vstart; i < vl; i++) { \ 1180 if (!vm && !vext_elem_mask(v0, i)) { \ 1181 continue; \ 1182 } \ 1183 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1184 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1185 } \ 1186 env->vstart = 0; \ 1187 } 1188 1189 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1190 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1191 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1192 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1193 1194 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1195 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1196 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1197 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1198 1199 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1200 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1201 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1202 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1203 1204 /* Vector Narrowing Integer Right Shift Instructions */ 1205 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1206 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1207 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1208 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1209 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1210 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1211 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1212 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1213 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1214 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1215 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1216 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1217 1218 /* Vector Integer Comparison Instructions */ 1219 #define DO_MSEQ(N, M) (N == M) 1220 #define DO_MSNE(N, M) (N != M) 1221 #define DO_MSLT(N, M) (N < M) 1222 #define DO_MSLE(N, M) (N <= M) 1223 #define DO_MSGT(N, M) (N > M) 1224 1225 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1226 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1227 CPURISCVState *env, uint32_t desc) \ 1228 { \ 1229 uint32_t vm = vext_vm(desc); \ 1230 uint32_t vl = env->vl; \ 1231 uint32_t i; \ 1232 \ 1233 for (i = env->vstart; i < vl; i++) { \ 1234 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1235 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1236 if (!vm && !vext_elem_mask(v0, i)) { \ 1237 continue; \ 1238 } \ 1239 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1240 } \ 1241 env->vstart = 0; \ 1242 } 1243 1244 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1245 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1246 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1247 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1248 1249 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1250 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1251 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1252 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1253 1254 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1255 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1256 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1257 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1258 1259 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1260 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1261 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1262 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1263 1264 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1265 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1266 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1267 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1268 1269 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1270 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1271 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1272 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1273 1274 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1275 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1276 CPURISCVState *env, uint32_t desc) \ 1277 { \ 1278 uint32_t vm = vext_vm(desc); \ 1279 uint32_t vl = env->vl; \ 1280 uint32_t i; \ 1281 \ 1282 for (i = env->vstart; i < vl; i++) { \ 1283 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1284 if (!vm && !vext_elem_mask(v0, i)) { \ 1285 continue; \ 1286 } \ 1287 vext_set_elem_mask(vd, i, \ 1288 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1289 } \ 1290 env->vstart = 0; \ 1291 } 1292 1293 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1294 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1295 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1296 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1297 1298 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1299 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1300 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1301 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1302 1303 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1304 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1305 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1306 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1307 1308 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1309 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1310 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1311 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1312 1313 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1314 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1315 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1316 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1317 1318 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1319 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1320 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1321 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1322 1323 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1324 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1325 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1326 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1327 1328 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1329 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1330 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1331 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1332 1333 /* Vector Integer Min/Max Instructions */ 1334 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1335 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1336 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1337 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1338 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1339 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1340 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1341 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1342 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1343 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1344 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1345 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1346 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1347 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1348 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1349 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1350 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1351 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1352 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1353 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1354 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1355 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1356 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1357 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1358 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1359 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1360 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1361 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1362 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1363 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1364 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1365 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1366 1367 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1368 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1369 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1370 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1371 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1372 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1373 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1374 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1375 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1376 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1377 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1378 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1379 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1380 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1381 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1382 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1383 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1384 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1385 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1386 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1387 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1388 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1389 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1390 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1391 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1392 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1393 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1394 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1395 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1396 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1397 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1398 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1399 1400 /* Vector Single-Width Integer Multiply Instructions */ 1401 #define DO_MUL(N, M) (N * M) 1402 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1403 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1404 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1405 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1406 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1407 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1408 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1409 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1410 1411 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1412 { 1413 return (int16_t)s2 * (int16_t)s1 >> 8; 1414 } 1415 1416 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1417 { 1418 return (int32_t)s2 * (int32_t)s1 >> 16; 1419 } 1420 1421 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1422 { 1423 return (int64_t)s2 * (int64_t)s1 >> 32; 1424 } 1425 1426 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1427 { 1428 uint64_t hi_64, lo_64; 1429 1430 muls64(&lo_64, &hi_64, s1, s2); 1431 return hi_64; 1432 } 1433 1434 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1435 { 1436 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1437 } 1438 1439 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1440 { 1441 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1442 } 1443 1444 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1445 { 1446 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1447 } 1448 1449 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1450 { 1451 uint64_t hi_64, lo_64; 1452 1453 mulu64(&lo_64, &hi_64, s2, s1); 1454 return hi_64; 1455 } 1456 1457 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1458 { 1459 return (int16_t)s2 * (uint16_t)s1 >> 8; 1460 } 1461 1462 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1463 { 1464 return (int32_t)s2 * (uint32_t)s1 >> 16; 1465 } 1466 1467 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1468 { 1469 return (int64_t)s2 * (uint64_t)s1 >> 32; 1470 } 1471 1472 /* 1473 * Let A = signed operand, 1474 * B = unsigned operand 1475 * P = mulu64(A, B), unsigned product 1476 * 1477 * LET X = 2 ** 64 - A, 2's complement of A 1478 * SP = signed product 1479 * THEN 1480 * IF A < 0 1481 * SP = -X * B 1482 * = -(2 ** 64 - A) * B 1483 * = A * B - 2 ** 64 * B 1484 * = P - 2 ** 64 * B 1485 * ELSE 1486 * SP = P 1487 * THEN 1488 * HI_P -= (A < 0 ? B : 0) 1489 */ 1490 1491 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1492 { 1493 uint64_t hi_64, lo_64; 1494 1495 mulu64(&lo_64, &hi_64, s2, s1); 1496 1497 hi_64 -= s2 < 0 ? s1 : 0; 1498 return hi_64; 1499 } 1500 1501 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1502 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1503 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1504 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1505 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1506 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1507 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1508 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1509 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1510 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1511 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1512 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1513 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1514 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1515 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1516 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1517 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1518 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1519 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1520 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1521 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1522 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1523 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1524 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1525 1526 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1527 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1528 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1529 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1530 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1531 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1532 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1533 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1534 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1535 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1536 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1537 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1538 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1539 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1540 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1541 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1542 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1543 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1544 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1545 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1546 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1547 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1548 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1549 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1550 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1551 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1552 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1553 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1554 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1555 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1556 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1557 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1558 1559 /* Vector Integer Divide Instructions */ 1560 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1561 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1562 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1563 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1564 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1565 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1566 1567 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1568 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1569 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1570 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1571 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1572 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1573 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1574 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1575 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1576 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1577 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1578 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1579 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1580 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1581 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1582 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1583 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1584 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1585 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1586 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1587 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1588 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1589 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1590 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1591 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1592 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1593 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1594 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1595 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1596 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1597 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1598 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1599 1600 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1601 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1602 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1603 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1604 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1605 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1606 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1607 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1608 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1609 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1610 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1611 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1612 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1613 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1614 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1615 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1616 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1617 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1618 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1619 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1620 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1621 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1622 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1623 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1624 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1625 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1626 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1627 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1628 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1629 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1630 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1631 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1632 1633 /* Vector Widening Integer Multiply Instructions */ 1634 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1635 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1636 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1637 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1638 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1639 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1640 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1641 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1642 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1643 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1644 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1645 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1646 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1647 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1648 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1649 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1650 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1651 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1652 1653 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1654 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1655 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1656 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1657 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1658 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1659 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1660 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1661 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1662 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1663 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1664 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1665 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1666 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1667 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1668 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1669 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1670 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1671 1672 /* Vector Single-Width Integer Multiply-Add Instructions */ 1673 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1674 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1675 { \ 1676 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1677 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1678 TD d = *((TD *)vd + HD(i)); \ 1679 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1680 } 1681 1682 #define DO_MACC(N, M, D) (M * N + D) 1683 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1684 #define DO_MADD(N, M, D) (M * D + N) 1685 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1686 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1687 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1688 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1689 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1690 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1691 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1692 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1693 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1694 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1695 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1696 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1697 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1698 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1699 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1700 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1701 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1702 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1703 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1704 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1705 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1706 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1707 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1708 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1709 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1710 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1711 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1712 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1713 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1714 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1715 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1716 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1717 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1718 1719 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1720 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1721 { \ 1722 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1723 TD d = *((TD *)vd + HD(i)); \ 1724 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1725 } 1726 1727 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1728 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1729 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1730 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1731 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1732 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1733 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1734 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1735 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1736 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1737 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1738 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1739 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1740 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1741 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1742 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1743 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1744 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1745 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1746 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1747 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1748 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1749 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1750 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1751 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1752 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1753 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1754 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1755 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1756 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1757 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1758 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1759 1760 /* Vector Widening Integer Multiply-Add Instructions */ 1761 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1762 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1763 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1764 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1765 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1766 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1767 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1768 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1769 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1770 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1771 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1772 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1773 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1774 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1775 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1776 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1777 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1778 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1779 1780 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1781 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1782 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1783 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1784 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1785 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1786 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1787 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1788 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1789 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1790 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1791 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1792 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1793 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1794 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1795 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1796 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1797 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1798 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1799 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1800 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1801 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1802 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1803 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1804 1805 /* Vector Integer Merge and Move Instructions */ 1806 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1807 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1808 uint32_t desc) \ 1809 { \ 1810 uint32_t vl = env->vl; \ 1811 uint32_t i; \ 1812 \ 1813 for (i = env->vstart; i < vl; i++) { \ 1814 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1815 *((ETYPE *)vd + H(i)) = s1; \ 1816 } \ 1817 env->vstart = 0; \ 1818 } 1819 1820 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1821 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1822 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1823 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1824 1825 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1826 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1827 uint32_t desc) \ 1828 { \ 1829 uint32_t vl = env->vl; \ 1830 uint32_t i; \ 1831 \ 1832 for (i = env->vstart; i < vl; i++) { \ 1833 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1834 } \ 1835 env->vstart = 0; \ 1836 } 1837 1838 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1839 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1840 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1841 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1842 1843 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1844 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1845 CPURISCVState *env, uint32_t desc) \ 1846 { \ 1847 uint32_t vl = env->vl; \ 1848 uint32_t i; \ 1849 \ 1850 for (i = env->vstart; i < vl; i++) { \ 1851 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1852 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1853 } \ 1854 env->vstart = 0; \ 1855 } 1856 1857 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1860 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1861 1862 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1863 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1864 void *vs2, CPURISCVState *env, uint32_t desc) \ 1865 { \ 1866 uint32_t vl = env->vl; \ 1867 uint32_t i; \ 1868 \ 1869 for (i = env->vstart; i < vl; i++) { \ 1870 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1871 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1872 (ETYPE)(target_long)s1); \ 1873 *((ETYPE *)vd + H(i)) = d; \ 1874 } \ 1875 env->vstart = 0; \ 1876 } 1877 1878 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1880 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1881 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1882 1883 /* 1884 *** Vector Fixed-Point Arithmetic Instructions 1885 */ 1886 1887 /* Vector Single-Width Saturating Add and Subtract */ 1888 1889 /* 1890 * As fixed point instructions probably have round mode and saturation, 1891 * define common macros for fixed point here. 1892 */ 1893 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1894 CPURISCVState *env, int vxrm); 1895 1896 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1897 static inline void \ 1898 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1899 CPURISCVState *env, int vxrm) \ 1900 { \ 1901 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1902 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1903 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1904 } 1905 1906 static inline void 1907 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1908 CPURISCVState *env, 1909 uint32_t vl, uint32_t vm, int vxrm, 1910 opivv2_rm_fn *fn) 1911 { 1912 for (uint32_t i = env->vstart; i < vl; i++) { 1913 if (!vm && !vext_elem_mask(v0, i)) { 1914 continue; 1915 } 1916 fn(vd, vs1, vs2, i, env, vxrm); 1917 } 1918 env->vstart = 0; 1919 } 1920 1921 static inline void 1922 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1923 CPURISCVState *env, 1924 uint32_t desc, uint32_t esz, uint32_t dsz, 1925 opivv2_rm_fn *fn) 1926 { 1927 uint32_t vm = vext_vm(desc); 1928 uint32_t vl = env->vl; 1929 1930 switch (env->vxrm) { 1931 case 0: /* rnu */ 1932 vext_vv_rm_1(vd, v0, vs1, vs2, 1933 env, vl, vm, 0, fn); 1934 break; 1935 case 1: /* rne */ 1936 vext_vv_rm_1(vd, v0, vs1, vs2, 1937 env, vl, vm, 1, fn); 1938 break; 1939 case 2: /* rdn */ 1940 vext_vv_rm_1(vd, v0, vs1, vs2, 1941 env, vl, vm, 2, fn); 1942 break; 1943 default: /* rod */ 1944 vext_vv_rm_1(vd, v0, vs1, vs2, 1945 env, vl, vm, 3, fn); 1946 break; 1947 } 1948 } 1949 1950 /* generate helpers for fixed point instructions with OPIVV format */ 1951 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1952 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1953 CPURISCVState *env, uint32_t desc) \ 1954 { \ 1955 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1956 do_##NAME); \ 1957 } 1958 1959 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1960 { 1961 uint8_t res = a + b; 1962 if (res < a) { 1963 res = UINT8_MAX; 1964 env->vxsat = 0x1; 1965 } 1966 return res; 1967 } 1968 1969 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1970 uint16_t b) 1971 { 1972 uint16_t res = a + b; 1973 if (res < a) { 1974 res = UINT16_MAX; 1975 env->vxsat = 0x1; 1976 } 1977 return res; 1978 } 1979 1980 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1981 uint32_t b) 1982 { 1983 uint32_t res = a + b; 1984 if (res < a) { 1985 res = UINT32_MAX; 1986 env->vxsat = 0x1; 1987 } 1988 return res; 1989 } 1990 1991 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1992 uint64_t b) 1993 { 1994 uint64_t res = a + b; 1995 if (res < a) { 1996 res = UINT64_MAX; 1997 env->vxsat = 0x1; 1998 } 1999 return res; 2000 } 2001 2002 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2003 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2004 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2005 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2006 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 2007 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 2008 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2009 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2010 2011 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2012 CPURISCVState *env, int vxrm); 2013 2014 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2015 static inline void \ 2016 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2017 CPURISCVState *env, int vxrm) \ 2018 { \ 2019 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2020 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2021 } 2022 2023 static inline void 2024 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2025 CPURISCVState *env, 2026 uint32_t vl, uint32_t vm, int vxrm, 2027 opivx2_rm_fn *fn) 2028 { 2029 for (uint32_t i = env->vstart; i < vl; i++) { 2030 if (!vm && !vext_elem_mask(v0, i)) { 2031 continue; 2032 } 2033 fn(vd, s1, vs2, i, env, vxrm); 2034 } 2035 env->vstart = 0; 2036 } 2037 2038 static inline void 2039 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2040 CPURISCVState *env, 2041 uint32_t desc, uint32_t esz, uint32_t dsz, 2042 opivx2_rm_fn *fn) 2043 { 2044 uint32_t vm = vext_vm(desc); 2045 uint32_t vl = env->vl; 2046 2047 switch (env->vxrm) { 2048 case 0: /* rnu */ 2049 vext_vx_rm_1(vd, v0, s1, vs2, 2050 env, vl, vm, 0, fn); 2051 break; 2052 case 1: /* rne */ 2053 vext_vx_rm_1(vd, v0, s1, vs2, 2054 env, vl, vm, 1, fn); 2055 break; 2056 case 2: /* rdn */ 2057 vext_vx_rm_1(vd, v0, s1, vs2, 2058 env, vl, vm, 2, fn); 2059 break; 2060 default: /* rod */ 2061 vext_vx_rm_1(vd, v0, s1, vs2, 2062 env, vl, vm, 3, fn); 2063 break; 2064 } 2065 } 2066 2067 /* generate helpers for fixed point instructions with OPIVX format */ 2068 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2069 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2070 void *vs2, CPURISCVState *env, uint32_t desc) \ 2071 { \ 2072 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2073 do_##NAME); \ 2074 } 2075 2076 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2077 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2078 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2079 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2080 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2081 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2082 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2083 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2084 2085 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2086 { 2087 int8_t res = a + b; 2088 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2089 res = a > 0 ? INT8_MAX : INT8_MIN; 2090 env->vxsat = 0x1; 2091 } 2092 return res; 2093 } 2094 2095 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2096 { 2097 int16_t res = a + b; 2098 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2099 res = a > 0 ? INT16_MAX : INT16_MIN; 2100 env->vxsat = 0x1; 2101 } 2102 return res; 2103 } 2104 2105 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2106 { 2107 int32_t res = a + b; 2108 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2109 res = a > 0 ? INT32_MAX : INT32_MIN; 2110 env->vxsat = 0x1; 2111 } 2112 return res; 2113 } 2114 2115 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2116 { 2117 int64_t res = a + b; 2118 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2119 res = a > 0 ? INT64_MAX : INT64_MIN; 2120 env->vxsat = 0x1; 2121 } 2122 return res; 2123 } 2124 2125 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2126 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2127 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2128 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2129 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2130 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2131 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2132 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2133 2134 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2135 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2136 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2137 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2138 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2139 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2140 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2141 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2142 2143 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2144 { 2145 uint8_t res = a - b; 2146 if (res > a) { 2147 res = 0; 2148 env->vxsat = 0x1; 2149 } 2150 return res; 2151 } 2152 2153 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2154 uint16_t b) 2155 { 2156 uint16_t res = a - b; 2157 if (res > a) { 2158 res = 0; 2159 env->vxsat = 0x1; 2160 } 2161 return res; 2162 } 2163 2164 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2165 uint32_t b) 2166 { 2167 uint32_t res = a - b; 2168 if (res > a) { 2169 res = 0; 2170 env->vxsat = 0x1; 2171 } 2172 return res; 2173 } 2174 2175 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2176 uint64_t b) 2177 { 2178 uint64_t res = a - b; 2179 if (res > a) { 2180 res = 0; 2181 env->vxsat = 0x1; 2182 } 2183 return res; 2184 } 2185 2186 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2187 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2188 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2189 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2190 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2191 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2192 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2193 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2194 2195 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2196 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2197 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2198 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2199 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2200 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2201 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2202 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2203 2204 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2205 { 2206 int8_t res = a - b; 2207 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2208 res = a >= 0 ? INT8_MAX : INT8_MIN; 2209 env->vxsat = 0x1; 2210 } 2211 return res; 2212 } 2213 2214 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2215 { 2216 int16_t res = a - b; 2217 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2218 res = a >= 0 ? INT16_MAX : INT16_MIN; 2219 env->vxsat = 0x1; 2220 } 2221 return res; 2222 } 2223 2224 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2225 { 2226 int32_t res = a - b; 2227 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2228 res = a >= 0 ? INT32_MAX : INT32_MIN; 2229 env->vxsat = 0x1; 2230 } 2231 return res; 2232 } 2233 2234 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2235 { 2236 int64_t res = a - b; 2237 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2238 res = a >= 0 ? INT64_MAX : INT64_MIN; 2239 env->vxsat = 0x1; 2240 } 2241 return res; 2242 } 2243 2244 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2245 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2246 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2247 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2248 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2249 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2250 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2251 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2252 2253 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2254 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2255 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2256 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2257 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2258 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2259 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2260 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2261 2262 /* Vector Single-Width Averaging Add and Subtract */ 2263 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2264 { 2265 uint8_t d = extract64(v, shift, 1); 2266 uint8_t d1; 2267 uint64_t D1, D2; 2268 2269 if (shift == 0 || shift > 64) { 2270 return 0; 2271 } 2272 2273 d1 = extract64(v, shift - 1, 1); 2274 D1 = extract64(v, 0, shift); 2275 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2276 return d1; 2277 } else if (vxrm == 1) { /* round-to-nearest-even */ 2278 if (shift > 1) { 2279 D2 = extract64(v, 0, shift - 1); 2280 return d1 & ((D2 != 0) | d); 2281 } else { 2282 return d1 & d; 2283 } 2284 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2285 return !d & (D1 != 0); 2286 } 2287 return 0; /* round-down (truncate) */ 2288 } 2289 2290 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2291 { 2292 int64_t res = (int64_t)a + b; 2293 uint8_t round = get_round(vxrm, res, 1); 2294 2295 return (res >> 1) + round; 2296 } 2297 2298 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2299 { 2300 int64_t res = a + b; 2301 uint8_t round = get_round(vxrm, res, 1); 2302 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2303 2304 /* With signed overflow, bit 64 is inverse of bit 63. */ 2305 return ((res >> 1) ^ over) + round; 2306 } 2307 2308 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2309 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2310 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2311 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2312 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2313 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2314 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2315 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2316 2317 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2318 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2319 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2320 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2321 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2322 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2323 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2324 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2325 2326 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2327 uint32_t a, uint32_t b) 2328 { 2329 uint64_t res = (uint64_t)a + b; 2330 uint8_t round = get_round(vxrm, res, 1); 2331 2332 return (res >> 1) + round; 2333 } 2334 2335 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2336 uint64_t a, uint64_t b) 2337 { 2338 uint64_t res = a + b; 2339 uint8_t round = get_round(vxrm, res, 1); 2340 uint64_t over = (uint64_t)(res < a) << 63; 2341 2342 return ((res >> 1) | over) + round; 2343 } 2344 2345 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2346 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2347 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2348 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2349 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2350 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2351 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2352 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2353 2354 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2355 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2356 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2357 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2358 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2359 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2360 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2361 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2362 2363 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2364 { 2365 int64_t res = (int64_t)a - b; 2366 uint8_t round = get_round(vxrm, res, 1); 2367 2368 return (res >> 1) + round; 2369 } 2370 2371 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2372 { 2373 int64_t res = (int64_t)a - b; 2374 uint8_t round = get_round(vxrm, res, 1); 2375 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2376 2377 /* With signed overflow, bit 64 is inverse of bit 63. */ 2378 return ((res >> 1) ^ over) + round; 2379 } 2380 2381 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2382 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2383 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2384 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2385 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2386 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2387 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2388 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2389 2390 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2391 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2392 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2393 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2394 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2395 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2396 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2397 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2398 2399 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2400 uint32_t a, uint32_t b) 2401 { 2402 int64_t res = (int64_t)a - b; 2403 uint8_t round = get_round(vxrm, res, 1); 2404 2405 return (res >> 1) + round; 2406 } 2407 2408 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2409 uint64_t a, uint64_t b) 2410 { 2411 uint64_t res = (uint64_t)a - b; 2412 uint8_t round = get_round(vxrm, res, 1); 2413 uint64_t over = (uint64_t)(res > a) << 63; 2414 2415 return ((res >> 1) | over) + round; 2416 } 2417 2418 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2419 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2420 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2421 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2422 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2423 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2424 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2425 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2426 2427 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2428 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2429 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2430 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2431 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2432 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2433 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2434 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2435 2436 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2437 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2438 { 2439 uint8_t round; 2440 int16_t res; 2441 2442 res = (int16_t)a * (int16_t)b; 2443 round = get_round(vxrm, res, 7); 2444 res = (res >> 7) + round; 2445 2446 if (res > INT8_MAX) { 2447 env->vxsat = 0x1; 2448 return INT8_MAX; 2449 } else if (res < INT8_MIN) { 2450 env->vxsat = 0x1; 2451 return INT8_MIN; 2452 } else { 2453 return res; 2454 } 2455 } 2456 2457 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2458 { 2459 uint8_t round; 2460 int32_t res; 2461 2462 res = (int32_t)a * (int32_t)b; 2463 round = get_round(vxrm, res, 15); 2464 res = (res >> 15) + round; 2465 2466 if (res > INT16_MAX) { 2467 env->vxsat = 0x1; 2468 return INT16_MAX; 2469 } else if (res < INT16_MIN) { 2470 env->vxsat = 0x1; 2471 return INT16_MIN; 2472 } else { 2473 return res; 2474 } 2475 } 2476 2477 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2478 { 2479 uint8_t round; 2480 int64_t res; 2481 2482 res = (int64_t)a * (int64_t)b; 2483 round = get_round(vxrm, res, 31); 2484 res = (res >> 31) + round; 2485 2486 if (res > INT32_MAX) { 2487 env->vxsat = 0x1; 2488 return INT32_MAX; 2489 } else if (res < INT32_MIN) { 2490 env->vxsat = 0x1; 2491 return INT32_MIN; 2492 } else { 2493 return res; 2494 } 2495 } 2496 2497 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2498 { 2499 uint8_t round; 2500 uint64_t hi_64, lo_64; 2501 int64_t res; 2502 2503 if (a == INT64_MIN && b == INT64_MIN) { 2504 env->vxsat = 1; 2505 return INT64_MAX; 2506 } 2507 2508 muls64(&lo_64, &hi_64, a, b); 2509 round = get_round(vxrm, lo_64, 63); 2510 /* 2511 * Cannot overflow, as there are always 2512 * 2 sign bits after multiply. 2513 */ 2514 res = (hi_64 << 1) | (lo_64 >> 63); 2515 if (round) { 2516 if (res == INT64_MAX) { 2517 env->vxsat = 1; 2518 } else { 2519 res += 1; 2520 } 2521 } 2522 return res; 2523 } 2524 2525 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2526 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2527 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2528 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2529 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2530 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2531 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2532 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2533 2534 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2535 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2536 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2537 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2538 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2539 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2540 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2541 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2542 2543 /* Vector Single-Width Scaling Shift Instructions */ 2544 static inline uint8_t 2545 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2546 { 2547 uint8_t round, shift = b & 0x7; 2548 uint8_t res; 2549 2550 round = get_round(vxrm, a, shift); 2551 res = (a >> shift) + round; 2552 return res; 2553 } 2554 static inline uint16_t 2555 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2556 { 2557 uint8_t round, shift = b & 0xf; 2558 uint16_t res; 2559 2560 round = get_round(vxrm, a, shift); 2561 res = (a >> shift) + round; 2562 return res; 2563 } 2564 static inline uint32_t 2565 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2566 { 2567 uint8_t round, shift = b & 0x1f; 2568 uint32_t res; 2569 2570 round = get_round(vxrm, a, shift); 2571 res = (a >> shift) + round; 2572 return res; 2573 } 2574 static inline uint64_t 2575 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2576 { 2577 uint8_t round, shift = b & 0x3f; 2578 uint64_t res; 2579 2580 round = get_round(vxrm, a, shift); 2581 res = (a >> shift) + round; 2582 return res; 2583 } 2584 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2585 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2586 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2587 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2588 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2589 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2590 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2591 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2592 2593 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2594 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2595 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2596 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2597 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2598 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2599 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2600 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2601 2602 static inline int8_t 2603 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2604 { 2605 uint8_t round, shift = b & 0x7; 2606 int8_t res; 2607 2608 round = get_round(vxrm, a, shift); 2609 res = (a >> shift) + round; 2610 return res; 2611 } 2612 static inline int16_t 2613 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2614 { 2615 uint8_t round, shift = b & 0xf; 2616 int16_t res; 2617 2618 round = get_round(vxrm, a, shift); 2619 res = (a >> shift) + round; 2620 return res; 2621 } 2622 static inline int32_t 2623 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2624 { 2625 uint8_t round, shift = b & 0x1f; 2626 int32_t res; 2627 2628 round = get_round(vxrm, a, shift); 2629 res = (a >> shift) + round; 2630 return res; 2631 } 2632 static inline int64_t 2633 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2634 { 2635 uint8_t round, shift = b & 0x3f; 2636 int64_t res; 2637 2638 round = get_round(vxrm, a, shift); 2639 res = (a >> shift) + round; 2640 return res; 2641 } 2642 2643 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2644 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2645 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2646 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2647 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2648 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2649 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2650 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2651 2652 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2653 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2654 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2655 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2656 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2657 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2658 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2659 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2660 2661 /* Vector Narrowing Fixed-Point Clip Instructions */ 2662 static inline int8_t 2663 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2664 { 2665 uint8_t round, shift = b & 0xf; 2666 int16_t res; 2667 2668 round = get_round(vxrm, a, shift); 2669 res = (a >> shift) + round; 2670 if (res > INT8_MAX) { 2671 env->vxsat = 0x1; 2672 return INT8_MAX; 2673 } else if (res < INT8_MIN) { 2674 env->vxsat = 0x1; 2675 return INT8_MIN; 2676 } else { 2677 return res; 2678 } 2679 } 2680 2681 static inline int16_t 2682 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2683 { 2684 uint8_t round, shift = b & 0x1f; 2685 int32_t res; 2686 2687 round = get_round(vxrm, a, shift); 2688 res = (a >> shift) + round; 2689 if (res > INT16_MAX) { 2690 env->vxsat = 0x1; 2691 return INT16_MAX; 2692 } else if (res < INT16_MIN) { 2693 env->vxsat = 0x1; 2694 return INT16_MIN; 2695 } else { 2696 return res; 2697 } 2698 } 2699 2700 static inline int32_t 2701 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2702 { 2703 uint8_t round, shift = b & 0x3f; 2704 int64_t res; 2705 2706 round = get_round(vxrm, a, shift); 2707 res = (a >> shift) + round; 2708 if (res > INT32_MAX) { 2709 env->vxsat = 0x1; 2710 return INT32_MAX; 2711 } else if (res < INT32_MIN) { 2712 env->vxsat = 0x1; 2713 return INT32_MIN; 2714 } else { 2715 return res; 2716 } 2717 } 2718 2719 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2720 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2721 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2722 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2723 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2724 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2725 2726 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2727 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2728 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2729 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2730 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2731 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2732 2733 static inline uint8_t 2734 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2735 { 2736 uint8_t round, shift = b & 0xf; 2737 uint16_t res; 2738 2739 round = get_round(vxrm, a, shift); 2740 res = (a >> shift) + round; 2741 if (res > UINT8_MAX) { 2742 env->vxsat = 0x1; 2743 return UINT8_MAX; 2744 } else { 2745 return res; 2746 } 2747 } 2748 2749 static inline uint16_t 2750 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2751 { 2752 uint8_t round, shift = b & 0x1f; 2753 uint32_t res; 2754 2755 round = get_round(vxrm, a, shift); 2756 res = (a >> shift) + round; 2757 if (res > UINT16_MAX) { 2758 env->vxsat = 0x1; 2759 return UINT16_MAX; 2760 } else { 2761 return res; 2762 } 2763 } 2764 2765 static inline uint32_t 2766 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2767 { 2768 uint8_t round, shift = b & 0x3f; 2769 uint64_t res; 2770 2771 round = get_round(vxrm, a, shift); 2772 res = (a >> shift) + round; 2773 if (res > UINT32_MAX) { 2774 env->vxsat = 0x1; 2775 return UINT32_MAX; 2776 } else { 2777 return res; 2778 } 2779 } 2780 2781 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2782 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2783 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2784 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2785 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2786 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2787 2788 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2789 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2790 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2791 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2792 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2793 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2794 2795 /* 2796 *** Vector Float Point Arithmetic Instructions 2797 */ 2798 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2799 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2800 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2801 CPURISCVState *env) \ 2802 { \ 2803 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2804 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2805 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2806 } 2807 2808 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2809 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2810 void *vs2, CPURISCVState *env, \ 2811 uint32_t desc) \ 2812 { \ 2813 uint32_t vm = vext_vm(desc); \ 2814 uint32_t vl = env->vl; \ 2815 uint32_t i; \ 2816 \ 2817 for (i = env->vstart; i < vl; i++) { \ 2818 if (!vm && !vext_elem_mask(v0, i)) { \ 2819 continue; \ 2820 } \ 2821 do_##NAME(vd, vs1, vs2, i, env); \ 2822 } \ 2823 env->vstart = 0; \ 2824 } 2825 2826 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2827 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2828 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2829 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2830 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2831 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2832 2833 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2834 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2835 CPURISCVState *env) \ 2836 { \ 2837 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2838 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2839 } 2840 2841 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2842 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2843 void *vs2, CPURISCVState *env, \ 2844 uint32_t desc) \ 2845 { \ 2846 uint32_t vm = vext_vm(desc); \ 2847 uint32_t vl = env->vl; \ 2848 uint32_t i; \ 2849 \ 2850 for (i = env->vstart; i < vl; i++) { \ 2851 if (!vm && !vext_elem_mask(v0, i)) { \ 2852 continue; \ 2853 } \ 2854 do_##NAME(vd, s1, vs2, i, env); \ 2855 } \ 2856 env->vstart = 0; \ 2857 } 2858 2859 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2860 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2861 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2862 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2863 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2864 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2865 2866 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2867 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2868 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2869 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2870 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2871 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2872 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2873 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2874 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2875 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2876 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2877 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2878 2879 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2880 { 2881 return float16_sub(b, a, s); 2882 } 2883 2884 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2885 { 2886 return float32_sub(b, a, s); 2887 } 2888 2889 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2890 { 2891 return float64_sub(b, a, s); 2892 } 2893 2894 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2895 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2896 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2897 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2898 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2899 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2900 2901 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2902 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2903 { 2904 return float32_add(float16_to_float32(a, true, s), 2905 float16_to_float32(b, true, s), s); 2906 } 2907 2908 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2909 { 2910 return float64_add(float32_to_float64(a, s), 2911 float32_to_float64(b, s), s); 2912 2913 } 2914 2915 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2916 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2917 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2918 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2919 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2920 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2921 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2922 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2923 2924 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2925 { 2926 return float32_sub(float16_to_float32(a, true, s), 2927 float16_to_float32(b, true, s), s); 2928 } 2929 2930 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2931 { 2932 return float64_sub(float32_to_float64(a, s), 2933 float32_to_float64(b, s), s); 2934 2935 } 2936 2937 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2938 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2939 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2940 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2941 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2942 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2943 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2944 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2945 2946 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2947 { 2948 return float32_add(a, float16_to_float32(b, true, s), s); 2949 } 2950 2951 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2952 { 2953 return float64_add(a, float32_to_float64(b, s), s); 2954 } 2955 2956 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2957 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2958 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2959 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2960 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2961 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2962 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2963 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2964 2965 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2966 { 2967 return float32_sub(a, float16_to_float32(b, true, s), s); 2968 } 2969 2970 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2971 { 2972 return float64_sub(a, float32_to_float64(b, s), s); 2973 } 2974 2975 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2976 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2977 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2978 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2979 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2980 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2981 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2982 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2983 2984 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2985 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2986 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2987 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2988 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2989 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2990 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2991 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2992 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2993 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2994 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2995 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2996 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2997 2998 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2999 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3000 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3001 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3002 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3003 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3004 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3005 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3006 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3007 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3008 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3009 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3010 3011 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3012 { 3013 return float16_div(b, a, s); 3014 } 3015 3016 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3017 { 3018 return float32_div(b, a, s); 3019 } 3020 3021 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3022 { 3023 return float64_div(b, a, s); 3024 } 3025 3026 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3027 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3028 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3029 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3030 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3031 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3032 3033 /* Vector Widening Floating-Point Multiply */ 3034 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3035 { 3036 return float32_mul(float16_to_float32(a, true, s), 3037 float16_to_float32(b, true, s), s); 3038 } 3039 3040 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3041 { 3042 return float64_mul(float32_to_float64(a, s), 3043 float32_to_float64(b, s), s); 3044 3045 } 3046 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3047 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3048 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3049 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3050 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3051 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3052 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3053 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3054 3055 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3056 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3057 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3058 CPURISCVState *env) \ 3059 { \ 3060 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3061 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3062 TD d = *((TD *)vd + HD(i)); \ 3063 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3064 } 3065 3066 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3067 { 3068 return float16_muladd(a, b, d, 0, s); 3069 } 3070 3071 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3072 { 3073 return float32_muladd(a, b, d, 0, s); 3074 } 3075 3076 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3077 { 3078 return float64_muladd(a, b, d, 0, s); 3079 } 3080 3081 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3082 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3083 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3084 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3085 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3086 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3087 3088 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3089 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3090 CPURISCVState *env) \ 3091 { \ 3092 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3093 TD d = *((TD *)vd + HD(i)); \ 3094 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3095 } 3096 3097 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3098 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3099 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3100 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3101 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3102 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3103 3104 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3105 { 3106 return float16_muladd(a, b, d, 3107 float_muladd_negate_c | float_muladd_negate_product, s); 3108 } 3109 3110 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3111 { 3112 return float32_muladd(a, b, d, 3113 float_muladd_negate_c | float_muladd_negate_product, s); 3114 } 3115 3116 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3117 { 3118 return float64_muladd(a, b, d, 3119 float_muladd_negate_c | float_muladd_negate_product, s); 3120 } 3121 3122 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3123 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3124 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3125 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3126 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3127 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3128 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3129 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3130 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3131 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3132 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3133 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3134 3135 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3136 { 3137 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3138 } 3139 3140 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3141 { 3142 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3143 } 3144 3145 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3146 { 3147 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3148 } 3149 3150 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3151 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3152 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3153 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3154 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3155 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3156 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3157 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3158 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3159 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3160 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3161 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3162 3163 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3164 { 3165 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3166 } 3167 3168 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3169 { 3170 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3171 } 3172 3173 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3174 { 3175 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3176 } 3177 3178 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3179 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3180 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3181 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3182 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3183 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3184 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3185 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3186 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3187 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3188 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3189 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3190 3191 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3192 { 3193 return float16_muladd(d, b, a, 0, s); 3194 } 3195 3196 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3197 { 3198 return float32_muladd(d, b, a, 0, s); 3199 } 3200 3201 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3202 { 3203 return float64_muladd(d, b, a, 0, s); 3204 } 3205 3206 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3207 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3208 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3209 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3210 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3211 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3212 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3213 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3214 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3215 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3216 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3217 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3218 3219 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3220 { 3221 return float16_muladd(d, b, a, 3222 float_muladd_negate_c | float_muladd_negate_product, s); 3223 } 3224 3225 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3226 { 3227 return float32_muladd(d, b, a, 3228 float_muladd_negate_c | float_muladd_negate_product, s); 3229 } 3230 3231 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3232 { 3233 return float64_muladd(d, b, a, 3234 float_muladd_negate_c | float_muladd_negate_product, s); 3235 } 3236 3237 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3238 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3239 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3240 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3241 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3242 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3243 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3244 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3245 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3246 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3247 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3248 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3249 3250 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3251 { 3252 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3253 } 3254 3255 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3256 { 3257 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3258 } 3259 3260 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3261 { 3262 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3263 } 3264 3265 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3266 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3267 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3268 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3269 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3270 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3271 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3272 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3273 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3274 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3275 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3276 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3277 3278 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3279 { 3280 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3281 } 3282 3283 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3284 { 3285 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3286 } 3287 3288 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3289 { 3290 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3291 } 3292 3293 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3294 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3295 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3296 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3297 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3298 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3299 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3300 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3301 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3302 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3303 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3304 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3305 3306 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3307 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3308 { 3309 return float32_muladd(float16_to_float32(a, true, s), 3310 float16_to_float32(b, true, s), d, 0, s); 3311 } 3312 3313 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3314 { 3315 return float64_muladd(float32_to_float64(a, s), 3316 float32_to_float64(b, s), d, 0, s); 3317 } 3318 3319 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3320 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3321 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3322 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3323 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3324 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3325 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3326 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3327 3328 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3329 { 3330 return float32_muladd(float16_to_float32(a, true, s), 3331 float16_to_float32(b, true, s), d, 3332 float_muladd_negate_c | float_muladd_negate_product, s); 3333 } 3334 3335 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3336 { 3337 return float64_muladd(float32_to_float64(a, s), 3338 float32_to_float64(b, s), d, 3339 float_muladd_negate_c | float_muladd_negate_product, s); 3340 } 3341 3342 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3343 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3344 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3345 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3346 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3347 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3348 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3349 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3350 3351 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3352 { 3353 return float32_muladd(float16_to_float32(a, true, s), 3354 float16_to_float32(b, true, s), d, 3355 float_muladd_negate_c, s); 3356 } 3357 3358 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3359 { 3360 return float64_muladd(float32_to_float64(a, s), 3361 float32_to_float64(b, s), d, 3362 float_muladd_negate_c, s); 3363 } 3364 3365 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3366 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3367 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3368 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3369 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3370 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3371 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3372 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3373 3374 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3375 { 3376 return float32_muladd(float16_to_float32(a, true, s), 3377 float16_to_float32(b, true, s), d, 3378 float_muladd_negate_product, s); 3379 } 3380 3381 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3382 { 3383 return float64_muladd(float32_to_float64(a, s), 3384 float32_to_float64(b, s), d, 3385 float_muladd_negate_product, s); 3386 } 3387 3388 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3389 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3390 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3391 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3392 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3393 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3394 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3395 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3396 3397 /* Vector Floating-Point Square-Root Instruction */ 3398 /* (TD, T2, TX2) */ 3399 #define OP_UU_H uint16_t, uint16_t, uint16_t 3400 #define OP_UU_W uint32_t, uint32_t, uint32_t 3401 #define OP_UU_D uint64_t, uint64_t, uint64_t 3402 3403 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3404 static void do_##NAME(void *vd, void *vs2, int i, \ 3405 CPURISCVState *env) \ 3406 { \ 3407 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3408 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3409 } 3410 3411 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3412 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3413 CPURISCVState *env, uint32_t desc) \ 3414 { \ 3415 uint32_t vm = vext_vm(desc); \ 3416 uint32_t vl = env->vl; \ 3417 uint32_t i; \ 3418 \ 3419 if (vl == 0) { \ 3420 return; \ 3421 } \ 3422 for (i = env->vstart; i < vl; i++) { \ 3423 if (!vm && !vext_elem_mask(v0, i)) { \ 3424 continue; \ 3425 } \ 3426 do_##NAME(vd, vs2, i, env); \ 3427 } \ 3428 env->vstart = 0; \ 3429 } 3430 3431 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3432 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3433 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3434 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3435 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3436 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3437 3438 /* 3439 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3440 * 3441 * Adapted from riscv-v-spec recip.c: 3442 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3443 */ 3444 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3445 { 3446 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3447 uint64_t exp = extract64(f, frac_size, exp_size); 3448 uint64_t frac = extract64(f, 0, frac_size); 3449 3450 const uint8_t lookup_table[] = { 3451 52, 51, 50, 48, 47, 46, 44, 43, 3452 42, 41, 40, 39, 38, 36, 35, 34, 3453 33, 32, 31, 30, 30, 29, 28, 27, 3454 26, 25, 24, 23, 23, 22, 21, 20, 3455 19, 19, 18, 17, 16, 16, 15, 14, 3456 14, 13, 12, 12, 11, 10, 10, 9, 3457 9, 8, 7, 7, 6, 6, 5, 4, 3458 4, 3, 3, 2, 2, 1, 1, 0, 3459 127, 125, 123, 121, 119, 118, 116, 114, 3460 113, 111, 109, 108, 106, 105, 103, 102, 3461 100, 99, 97, 96, 95, 93, 92, 91, 3462 90, 88, 87, 86, 85, 84, 83, 82, 3463 80, 79, 78, 77, 76, 75, 74, 73, 3464 72, 71, 70, 70, 69, 68, 67, 66, 3465 65, 64, 63, 63, 62, 61, 60, 59, 3466 59, 58, 57, 56, 56, 55, 54, 53 3467 }; 3468 const int precision = 7; 3469 3470 if (exp == 0 && frac != 0) { /* subnormal */ 3471 /* Normalize the subnormal. */ 3472 while (extract64(frac, frac_size - 1, 1) == 0) { 3473 exp--; 3474 frac <<= 1; 3475 } 3476 3477 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3478 } 3479 3480 int idx = ((exp & 1) << (precision - 1)) | 3481 (frac >> (frac_size - precision + 1)); 3482 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3483 (frac_size - precision); 3484 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3485 3486 uint64_t val = 0; 3487 val = deposit64(val, 0, frac_size, out_frac); 3488 val = deposit64(val, frac_size, exp_size, out_exp); 3489 val = deposit64(val, frac_size + exp_size, 1, sign); 3490 return val; 3491 } 3492 3493 static float16 frsqrt7_h(float16 f, float_status *s) 3494 { 3495 int exp_size = 5, frac_size = 10; 3496 bool sign = float16_is_neg(f); 3497 3498 /* 3499 * frsqrt7(sNaN) = canonical NaN 3500 * frsqrt7(-inf) = canonical NaN 3501 * frsqrt7(-normal) = canonical NaN 3502 * frsqrt7(-subnormal) = canonical NaN 3503 */ 3504 if (float16_is_signaling_nan(f, s) || 3505 (float16_is_infinity(f) && sign) || 3506 (float16_is_normal(f) && sign) || 3507 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3508 s->float_exception_flags |= float_flag_invalid; 3509 return float16_default_nan(s); 3510 } 3511 3512 /* frsqrt7(qNaN) = canonical NaN */ 3513 if (float16_is_quiet_nan(f, s)) { 3514 return float16_default_nan(s); 3515 } 3516 3517 /* frsqrt7(+-0) = +-inf */ 3518 if (float16_is_zero(f)) { 3519 s->float_exception_flags |= float_flag_divbyzero; 3520 return float16_set_sign(float16_infinity, sign); 3521 } 3522 3523 /* frsqrt7(+inf) = +0 */ 3524 if (float16_is_infinity(f) && !sign) { 3525 return float16_set_sign(float16_zero, sign); 3526 } 3527 3528 /* +normal, +subnormal */ 3529 uint64_t val = frsqrt7(f, exp_size, frac_size); 3530 return make_float16(val); 3531 } 3532 3533 static float32 frsqrt7_s(float32 f, float_status *s) 3534 { 3535 int exp_size = 8, frac_size = 23; 3536 bool sign = float32_is_neg(f); 3537 3538 /* 3539 * frsqrt7(sNaN) = canonical NaN 3540 * frsqrt7(-inf) = canonical NaN 3541 * frsqrt7(-normal) = canonical NaN 3542 * frsqrt7(-subnormal) = canonical NaN 3543 */ 3544 if (float32_is_signaling_nan(f, s) || 3545 (float32_is_infinity(f) && sign) || 3546 (float32_is_normal(f) && sign) || 3547 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3548 s->float_exception_flags |= float_flag_invalid; 3549 return float32_default_nan(s); 3550 } 3551 3552 /* frsqrt7(qNaN) = canonical NaN */ 3553 if (float32_is_quiet_nan(f, s)) { 3554 return float32_default_nan(s); 3555 } 3556 3557 /* frsqrt7(+-0) = +-inf */ 3558 if (float32_is_zero(f)) { 3559 s->float_exception_flags |= float_flag_divbyzero; 3560 return float32_set_sign(float32_infinity, sign); 3561 } 3562 3563 /* frsqrt7(+inf) = +0 */ 3564 if (float32_is_infinity(f) && !sign) { 3565 return float32_set_sign(float32_zero, sign); 3566 } 3567 3568 /* +normal, +subnormal */ 3569 uint64_t val = frsqrt7(f, exp_size, frac_size); 3570 return make_float32(val); 3571 } 3572 3573 static float64 frsqrt7_d(float64 f, float_status *s) 3574 { 3575 int exp_size = 11, frac_size = 52; 3576 bool sign = float64_is_neg(f); 3577 3578 /* 3579 * frsqrt7(sNaN) = canonical NaN 3580 * frsqrt7(-inf) = canonical NaN 3581 * frsqrt7(-normal) = canonical NaN 3582 * frsqrt7(-subnormal) = canonical NaN 3583 */ 3584 if (float64_is_signaling_nan(f, s) || 3585 (float64_is_infinity(f) && sign) || 3586 (float64_is_normal(f) && sign) || 3587 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3588 s->float_exception_flags |= float_flag_invalid; 3589 return float64_default_nan(s); 3590 } 3591 3592 /* frsqrt7(qNaN) = canonical NaN */ 3593 if (float64_is_quiet_nan(f, s)) { 3594 return float64_default_nan(s); 3595 } 3596 3597 /* frsqrt7(+-0) = +-inf */ 3598 if (float64_is_zero(f)) { 3599 s->float_exception_flags |= float_flag_divbyzero; 3600 return float64_set_sign(float64_infinity, sign); 3601 } 3602 3603 /* frsqrt7(+inf) = +0 */ 3604 if (float64_is_infinity(f) && !sign) { 3605 return float64_set_sign(float64_zero, sign); 3606 } 3607 3608 /* +normal, +subnormal */ 3609 uint64_t val = frsqrt7(f, exp_size, frac_size); 3610 return make_float64(val); 3611 } 3612 3613 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3614 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3615 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3616 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) 3617 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) 3618 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) 3619 3620 /* 3621 * Vector Floating-Point Reciprocal Estimate Instruction 3622 * 3623 * Adapted from riscv-v-spec recip.c: 3624 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3625 */ 3626 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3627 float_status *s) 3628 { 3629 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3630 uint64_t exp = extract64(f, frac_size, exp_size); 3631 uint64_t frac = extract64(f, 0, frac_size); 3632 3633 const uint8_t lookup_table[] = { 3634 127, 125, 123, 121, 119, 117, 116, 114, 3635 112, 110, 109, 107, 105, 104, 102, 100, 3636 99, 97, 96, 94, 93, 91, 90, 88, 3637 87, 85, 84, 83, 81, 80, 79, 77, 3638 76, 75, 74, 72, 71, 70, 69, 68, 3639 66, 65, 64, 63, 62, 61, 60, 59, 3640 58, 57, 56, 55, 54, 53, 52, 51, 3641 50, 49, 48, 47, 46, 45, 44, 43, 3642 42, 41, 40, 40, 39, 38, 37, 36, 3643 35, 35, 34, 33, 32, 31, 31, 30, 3644 29, 28, 28, 27, 26, 25, 25, 24, 3645 23, 23, 22, 21, 21, 20, 19, 19, 3646 18, 17, 17, 16, 15, 15, 14, 14, 3647 13, 12, 12, 11, 11, 10, 9, 9, 3648 8, 8, 7, 7, 6, 5, 5, 4, 3649 4, 3, 3, 2, 2, 1, 1, 0 3650 }; 3651 const int precision = 7; 3652 3653 if (exp == 0 && frac != 0) { /* subnormal */ 3654 /* Normalize the subnormal. */ 3655 while (extract64(frac, frac_size - 1, 1) == 0) { 3656 exp--; 3657 frac <<= 1; 3658 } 3659 3660 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3661 3662 if (exp != 0 && exp != UINT64_MAX) { 3663 /* 3664 * Overflow to inf or max value of same sign, 3665 * depending on sign and rounding mode. 3666 */ 3667 s->float_exception_flags |= (float_flag_inexact | 3668 float_flag_overflow); 3669 3670 if ((s->float_rounding_mode == float_round_to_zero) || 3671 ((s->float_rounding_mode == float_round_down) && !sign) || 3672 ((s->float_rounding_mode == float_round_up) && sign)) { 3673 /* Return greatest/negative finite value. */ 3674 return (sign << (exp_size + frac_size)) | 3675 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3676 } else { 3677 /* Return +-inf. */ 3678 return (sign << (exp_size + frac_size)) | 3679 MAKE_64BIT_MASK(frac_size, exp_size); 3680 } 3681 } 3682 } 3683 3684 int idx = frac >> (frac_size - precision); 3685 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3686 (frac_size - precision); 3687 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3688 3689 if (out_exp == 0 || out_exp == UINT64_MAX) { 3690 /* 3691 * The result is subnormal, but don't raise the underflow exception, 3692 * because there's no additional loss of precision. 3693 */ 3694 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3695 if (out_exp == UINT64_MAX) { 3696 out_frac >>= 1; 3697 out_exp = 0; 3698 } 3699 } 3700 3701 uint64_t val = 0; 3702 val = deposit64(val, 0, frac_size, out_frac); 3703 val = deposit64(val, frac_size, exp_size, out_exp); 3704 val = deposit64(val, frac_size + exp_size, 1, sign); 3705 return val; 3706 } 3707 3708 static float16 frec7_h(float16 f, float_status *s) 3709 { 3710 int exp_size = 5, frac_size = 10; 3711 bool sign = float16_is_neg(f); 3712 3713 /* frec7(+-inf) = +-0 */ 3714 if (float16_is_infinity(f)) { 3715 return float16_set_sign(float16_zero, sign); 3716 } 3717 3718 /* frec7(+-0) = +-inf */ 3719 if (float16_is_zero(f)) { 3720 s->float_exception_flags |= float_flag_divbyzero; 3721 return float16_set_sign(float16_infinity, sign); 3722 } 3723 3724 /* frec7(sNaN) = canonical NaN */ 3725 if (float16_is_signaling_nan(f, s)) { 3726 s->float_exception_flags |= float_flag_invalid; 3727 return float16_default_nan(s); 3728 } 3729 3730 /* frec7(qNaN) = canonical NaN */ 3731 if (float16_is_quiet_nan(f, s)) { 3732 return float16_default_nan(s); 3733 } 3734 3735 /* +-normal, +-subnormal */ 3736 uint64_t val = frec7(f, exp_size, frac_size, s); 3737 return make_float16(val); 3738 } 3739 3740 static float32 frec7_s(float32 f, float_status *s) 3741 { 3742 int exp_size = 8, frac_size = 23; 3743 bool sign = float32_is_neg(f); 3744 3745 /* frec7(+-inf) = +-0 */ 3746 if (float32_is_infinity(f)) { 3747 return float32_set_sign(float32_zero, sign); 3748 } 3749 3750 /* frec7(+-0) = +-inf */ 3751 if (float32_is_zero(f)) { 3752 s->float_exception_flags |= float_flag_divbyzero; 3753 return float32_set_sign(float32_infinity, sign); 3754 } 3755 3756 /* frec7(sNaN) = canonical NaN */ 3757 if (float32_is_signaling_nan(f, s)) { 3758 s->float_exception_flags |= float_flag_invalid; 3759 return float32_default_nan(s); 3760 } 3761 3762 /* frec7(qNaN) = canonical NaN */ 3763 if (float32_is_quiet_nan(f, s)) { 3764 return float32_default_nan(s); 3765 } 3766 3767 /* +-normal, +-subnormal */ 3768 uint64_t val = frec7(f, exp_size, frac_size, s); 3769 return make_float32(val); 3770 } 3771 3772 static float64 frec7_d(float64 f, float_status *s) 3773 { 3774 int exp_size = 11, frac_size = 52; 3775 bool sign = float64_is_neg(f); 3776 3777 /* frec7(+-inf) = +-0 */ 3778 if (float64_is_infinity(f)) { 3779 return float64_set_sign(float64_zero, sign); 3780 } 3781 3782 /* frec7(+-0) = +-inf */ 3783 if (float64_is_zero(f)) { 3784 s->float_exception_flags |= float_flag_divbyzero; 3785 return float64_set_sign(float64_infinity, sign); 3786 } 3787 3788 /* frec7(sNaN) = canonical NaN */ 3789 if (float64_is_signaling_nan(f, s)) { 3790 s->float_exception_flags |= float_flag_invalid; 3791 return float64_default_nan(s); 3792 } 3793 3794 /* frec7(qNaN) = canonical NaN */ 3795 if (float64_is_quiet_nan(f, s)) { 3796 return float64_default_nan(s); 3797 } 3798 3799 /* +-normal, +-subnormal */ 3800 uint64_t val = frec7(f, exp_size, frac_size, s); 3801 return make_float64(val); 3802 } 3803 3804 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3805 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3806 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3807 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) 3808 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) 3809 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) 3810 3811 /* Vector Floating-Point MIN/MAX Instructions */ 3812 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3813 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3814 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3815 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3816 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3817 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3818 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3819 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3820 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3821 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3822 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3823 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3824 3825 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3826 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3827 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3828 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3829 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3830 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3831 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3832 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3833 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3834 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3835 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3836 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3837 3838 /* Vector Floating-Point Sign-Injection Instructions */ 3839 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3840 { 3841 return deposit64(b, 0, 15, a); 3842 } 3843 3844 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3845 { 3846 return deposit64(b, 0, 31, a); 3847 } 3848 3849 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3850 { 3851 return deposit64(b, 0, 63, a); 3852 } 3853 3854 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3855 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3856 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3857 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3858 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3859 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3860 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3861 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3862 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3863 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3864 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3865 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3866 3867 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3868 { 3869 return deposit64(~b, 0, 15, a); 3870 } 3871 3872 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3873 { 3874 return deposit64(~b, 0, 31, a); 3875 } 3876 3877 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3878 { 3879 return deposit64(~b, 0, 63, a); 3880 } 3881 3882 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3883 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3884 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3885 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3886 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3887 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3888 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3889 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3890 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3891 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3892 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3893 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3894 3895 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3896 { 3897 return deposit64(b ^ a, 0, 15, a); 3898 } 3899 3900 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3901 { 3902 return deposit64(b ^ a, 0, 31, a); 3903 } 3904 3905 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3906 { 3907 return deposit64(b ^ a, 0, 63, a); 3908 } 3909 3910 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3911 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3912 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3913 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3914 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3915 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3916 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3917 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3918 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3919 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3920 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3921 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3922 3923 /* Vector Floating-Point Compare Instructions */ 3924 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3925 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3926 CPURISCVState *env, uint32_t desc) \ 3927 { \ 3928 uint32_t vm = vext_vm(desc); \ 3929 uint32_t vl = env->vl; \ 3930 uint32_t i; \ 3931 \ 3932 for (i = env->vstart; i < vl; i++) { \ 3933 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3934 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3935 if (!vm && !vext_elem_mask(v0, i)) { \ 3936 continue; \ 3937 } \ 3938 vext_set_elem_mask(vd, i, \ 3939 DO_OP(s2, s1, &env->fp_status)); \ 3940 } \ 3941 env->vstart = 0; \ 3942 } 3943 3944 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3945 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3946 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3947 3948 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3949 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3950 CPURISCVState *env, uint32_t desc) \ 3951 { \ 3952 uint32_t vm = vext_vm(desc); \ 3953 uint32_t vl = env->vl; \ 3954 uint32_t i; \ 3955 \ 3956 for (i = env->vstart; i < vl; i++) { \ 3957 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3958 if (!vm && !vext_elem_mask(v0, i)) { \ 3959 continue; \ 3960 } \ 3961 vext_set_elem_mask(vd, i, \ 3962 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3963 } \ 3964 env->vstart = 0; \ 3965 } 3966 3967 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3968 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3969 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3970 3971 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3972 { 3973 FloatRelation compare = float16_compare_quiet(a, b, s); 3974 return compare != float_relation_equal; 3975 } 3976 3977 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3978 { 3979 FloatRelation compare = float32_compare_quiet(a, b, s); 3980 return compare != float_relation_equal; 3981 } 3982 3983 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3984 { 3985 FloatRelation compare = float64_compare_quiet(a, b, s); 3986 return compare != float_relation_equal; 3987 } 3988 3989 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3990 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3991 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3992 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3993 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3994 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3995 3996 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3997 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3998 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3999 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4000 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4001 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4002 4003 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4004 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4005 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4006 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4007 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4008 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4009 4010 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4011 { 4012 FloatRelation compare = float16_compare(a, b, s); 4013 return compare == float_relation_greater; 4014 } 4015 4016 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4017 { 4018 FloatRelation compare = float32_compare(a, b, s); 4019 return compare == float_relation_greater; 4020 } 4021 4022 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4023 { 4024 FloatRelation compare = float64_compare(a, b, s); 4025 return compare == float_relation_greater; 4026 } 4027 4028 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4029 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4030 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4031 4032 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4033 { 4034 FloatRelation compare = float16_compare(a, b, s); 4035 return compare == float_relation_greater || 4036 compare == float_relation_equal; 4037 } 4038 4039 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4040 { 4041 FloatRelation compare = float32_compare(a, b, s); 4042 return compare == float_relation_greater || 4043 compare == float_relation_equal; 4044 } 4045 4046 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4047 { 4048 FloatRelation compare = float64_compare(a, b, s); 4049 return compare == float_relation_greater || 4050 compare == float_relation_equal; 4051 } 4052 4053 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4054 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4055 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4056 4057 /* Vector Floating-Point Classify Instruction */ 4058 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4059 static void do_##NAME(void *vd, void *vs2, int i) \ 4060 { \ 4061 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4062 *((TD *)vd + HD(i)) = OP(s2); \ 4063 } 4064 4065 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 4066 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4067 CPURISCVState *env, uint32_t desc) \ 4068 { \ 4069 uint32_t vm = vext_vm(desc); \ 4070 uint32_t vl = env->vl; \ 4071 uint32_t i; \ 4072 \ 4073 for (i = env->vstart; i < vl; i++) { \ 4074 if (!vm && !vext_elem_mask(v0, i)) { \ 4075 continue; \ 4076 } \ 4077 do_##NAME(vd, vs2, i); \ 4078 } \ 4079 env->vstart = 0; \ 4080 } 4081 4082 target_ulong fclass_h(uint64_t frs1) 4083 { 4084 float16 f = frs1; 4085 bool sign = float16_is_neg(f); 4086 4087 if (float16_is_infinity(f)) { 4088 return sign ? 1 << 0 : 1 << 7; 4089 } else if (float16_is_zero(f)) { 4090 return sign ? 1 << 3 : 1 << 4; 4091 } else if (float16_is_zero_or_denormal(f)) { 4092 return sign ? 1 << 2 : 1 << 5; 4093 } else if (float16_is_any_nan(f)) { 4094 float_status s = { }; /* for snan_bit_is_one */ 4095 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4096 } else { 4097 return sign ? 1 << 1 : 1 << 6; 4098 } 4099 } 4100 4101 target_ulong fclass_s(uint64_t frs1) 4102 { 4103 float32 f = frs1; 4104 bool sign = float32_is_neg(f); 4105 4106 if (float32_is_infinity(f)) { 4107 return sign ? 1 << 0 : 1 << 7; 4108 } else if (float32_is_zero(f)) { 4109 return sign ? 1 << 3 : 1 << 4; 4110 } else if (float32_is_zero_or_denormal(f)) { 4111 return sign ? 1 << 2 : 1 << 5; 4112 } else if (float32_is_any_nan(f)) { 4113 float_status s = { }; /* for snan_bit_is_one */ 4114 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4115 } else { 4116 return sign ? 1 << 1 : 1 << 6; 4117 } 4118 } 4119 4120 target_ulong fclass_d(uint64_t frs1) 4121 { 4122 float64 f = frs1; 4123 bool sign = float64_is_neg(f); 4124 4125 if (float64_is_infinity(f)) { 4126 return sign ? 1 << 0 : 1 << 7; 4127 } else if (float64_is_zero(f)) { 4128 return sign ? 1 << 3 : 1 << 4; 4129 } else if (float64_is_zero_or_denormal(f)) { 4130 return sign ? 1 << 2 : 1 << 5; 4131 } else if (float64_is_any_nan(f)) { 4132 float_status s = { }; /* for snan_bit_is_one */ 4133 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4134 } else { 4135 return sign ? 1 << 1 : 1 << 6; 4136 } 4137 } 4138 4139 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4140 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4141 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4142 GEN_VEXT_V(vfclass_v_h, 2, 2) 4143 GEN_VEXT_V(vfclass_v_w, 4, 4) 4144 GEN_VEXT_V(vfclass_v_d, 8, 8) 4145 4146 /* Vector Floating-Point Merge Instruction */ 4147 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4148 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4149 CPURISCVState *env, uint32_t desc) \ 4150 { \ 4151 uint32_t vm = vext_vm(desc); \ 4152 uint32_t vl = env->vl; \ 4153 uint32_t i; \ 4154 \ 4155 for (i = env->vstart; i < vl; i++) { \ 4156 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4157 *((ETYPE *)vd + H(i)) \ 4158 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4159 } \ 4160 env->vstart = 0; \ 4161 } 4162 4163 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4164 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4165 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4166 4167 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4168 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4169 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4170 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4171 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4172 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 4173 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 4174 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 4175 4176 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4177 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4178 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4179 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4180 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 4181 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 4182 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 4183 4184 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4185 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4186 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4187 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4188 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 4189 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 4190 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 4191 4192 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4193 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4194 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4195 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4196 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4197 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4198 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4199 4200 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4201 /* (TD, T2, TX2) */ 4202 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4203 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4204 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4205 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4206 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4207 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4208 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4209 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4210 4211 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4212 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4213 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4214 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4215 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4216 4217 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4218 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4219 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4220 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4221 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 4222 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4223 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4224 4225 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4226 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4227 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4228 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4229 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 4230 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4231 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4232 4233 /* 4234 * vfwcvt.f.f.v vd, vs2, vm 4235 * Convert single-width float to double-width float. 4236 */ 4237 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4238 { 4239 return float16_to_float32(a, true, s); 4240 } 4241 4242 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4243 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4244 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4245 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4246 4247 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4248 /* (TD, T2, TX2) */ 4249 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4250 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4251 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4252 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4253 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4254 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4255 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4256 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 4257 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 4258 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 4259 4260 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4261 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4262 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4263 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4264 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 4265 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 4266 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 4267 4268 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4269 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4270 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4271 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 4272 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 4273 4274 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4275 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4276 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4277 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 4278 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 4279 4280 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4281 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4282 { 4283 return float32_to_float16(a, true, s); 4284 } 4285 4286 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4287 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4288 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 4289 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 4290 4291 /* 4292 *** Vector Reduction Operations 4293 */ 4294 /* Vector Single-Width Integer Reduction Instructions */ 4295 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4296 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4297 void *vs2, CPURISCVState *env, uint32_t desc) \ 4298 { \ 4299 uint32_t vm = vext_vm(desc); \ 4300 uint32_t vl = env->vl; \ 4301 uint32_t i; \ 4302 TD s1 = *((TD *)vs1 + HD(0)); \ 4303 \ 4304 for (i = env->vstart; i < vl; i++) { \ 4305 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4306 if (!vm && !vext_elem_mask(v0, i)) { \ 4307 continue; \ 4308 } \ 4309 s1 = OP(s1, (TD)s2); \ 4310 } \ 4311 *((TD *)vd + HD(0)) = s1; \ 4312 env->vstart = 0; \ 4313 } 4314 4315 /* vd[0] = sum(vs1[0], vs2[*]) */ 4316 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4317 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4318 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4319 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4320 4321 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4322 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4323 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4324 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4325 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4326 4327 /* vd[0] = max(vs1[0], vs2[*]) */ 4328 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4329 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4330 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4331 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4332 4333 /* vd[0] = minu(vs1[0], vs2[*]) */ 4334 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4335 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4336 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4337 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4338 4339 /* vd[0] = min(vs1[0], vs2[*]) */ 4340 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4341 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4342 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4343 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4344 4345 /* vd[0] = and(vs1[0], vs2[*]) */ 4346 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4347 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4348 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4349 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4350 4351 /* vd[0] = or(vs1[0], vs2[*]) */ 4352 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4353 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4354 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4355 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4356 4357 /* vd[0] = xor(vs1[0], vs2[*]) */ 4358 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4359 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4360 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4361 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4362 4363 /* Vector Widening Integer Reduction Instructions */ 4364 /* signed sum reduction into double-width accumulator */ 4365 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4366 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4367 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4368 4369 /* Unsigned sum reduction into double-width accumulator */ 4370 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4371 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4372 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4373 4374 /* Vector Single-Width Floating-Point Reduction Instructions */ 4375 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4376 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4377 void *vs2, CPURISCVState *env, \ 4378 uint32_t desc) \ 4379 { \ 4380 uint32_t vm = vext_vm(desc); \ 4381 uint32_t vl = env->vl; \ 4382 uint32_t i; \ 4383 TD s1 = *((TD *)vs1 + HD(0)); \ 4384 \ 4385 for (i = env->vstart; i < vl; i++) { \ 4386 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4387 if (!vm && !vext_elem_mask(v0, i)) { \ 4388 continue; \ 4389 } \ 4390 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4391 } \ 4392 *((TD *)vd + HD(0)) = s1; \ 4393 env->vstart = 0; \ 4394 } 4395 4396 /* Unordered sum */ 4397 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4398 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4399 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4400 4401 /* Maximum value */ 4402 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4403 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4404 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4405 4406 /* Minimum value */ 4407 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4408 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4409 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4410 4411 /* Vector Widening Floating-Point Reduction Instructions */ 4412 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4413 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4414 void *vs2, CPURISCVState *env, uint32_t desc) 4415 { 4416 uint32_t vm = vext_vm(desc); 4417 uint32_t vl = env->vl; 4418 uint32_t i; 4419 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4420 4421 for (i = env->vstart; i < vl; i++) { 4422 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4423 if (!vm && !vext_elem_mask(v0, i)) { 4424 continue; 4425 } 4426 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4427 &env->fp_status); 4428 } 4429 *((uint32_t *)vd + H4(0)) = s1; 4430 env->vstart = 0; 4431 } 4432 4433 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4434 void *vs2, CPURISCVState *env, uint32_t desc) 4435 { 4436 uint32_t vm = vext_vm(desc); 4437 uint32_t vl = env->vl; 4438 uint32_t i; 4439 uint64_t s1 = *((uint64_t *)vs1); 4440 4441 for (i = env->vstart; i < vl; i++) { 4442 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4443 if (!vm && !vext_elem_mask(v0, i)) { 4444 continue; 4445 } 4446 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4447 &env->fp_status); 4448 } 4449 *((uint64_t *)vd) = s1; 4450 env->vstart = 0; 4451 } 4452 4453 /* 4454 *** Vector Mask Operations 4455 */ 4456 /* Vector Mask-Register Logical Instructions */ 4457 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4458 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4459 void *vs2, CPURISCVState *env, \ 4460 uint32_t desc) \ 4461 { \ 4462 uint32_t vl = env->vl; \ 4463 uint32_t i; \ 4464 int a, b; \ 4465 \ 4466 for (i = env->vstart; i < vl; i++) { \ 4467 a = vext_elem_mask(vs1, i); \ 4468 b = vext_elem_mask(vs2, i); \ 4469 vext_set_elem_mask(vd, i, OP(b, a)); \ 4470 } \ 4471 env->vstart = 0; \ 4472 } 4473 4474 #define DO_NAND(N, M) (!(N & M)) 4475 #define DO_ANDNOT(N, M) (N & !M) 4476 #define DO_NOR(N, M) (!(N | M)) 4477 #define DO_ORNOT(N, M) (N | !M) 4478 #define DO_XNOR(N, M) (!(N ^ M)) 4479 4480 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4481 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4482 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4483 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4484 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4485 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4486 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4487 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4488 4489 /* Vector count population in mask vcpop */ 4490 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4491 uint32_t desc) 4492 { 4493 target_ulong cnt = 0; 4494 uint32_t vm = vext_vm(desc); 4495 uint32_t vl = env->vl; 4496 int i; 4497 4498 for (i = env->vstart; i < vl; i++) { 4499 if (vm || vext_elem_mask(v0, i)) { 4500 if (vext_elem_mask(vs2, i)) { 4501 cnt++; 4502 } 4503 } 4504 } 4505 env->vstart = 0; 4506 return cnt; 4507 } 4508 4509 /* vfirst find-first-set mask bit*/ 4510 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4511 uint32_t desc) 4512 { 4513 uint32_t vm = vext_vm(desc); 4514 uint32_t vl = env->vl; 4515 int i; 4516 4517 for (i = env->vstart; i < vl; i++) { 4518 if (vm || vext_elem_mask(v0, i)) { 4519 if (vext_elem_mask(vs2, i)) { 4520 return i; 4521 } 4522 } 4523 } 4524 env->vstart = 0; 4525 return -1LL; 4526 } 4527 4528 enum set_mask_type { 4529 ONLY_FIRST = 1, 4530 INCLUDE_FIRST, 4531 BEFORE_FIRST, 4532 }; 4533 4534 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4535 uint32_t desc, enum set_mask_type type) 4536 { 4537 uint32_t vm = vext_vm(desc); 4538 uint32_t vl = env->vl; 4539 int i; 4540 bool first_mask_bit = false; 4541 4542 for (i = env->vstart; i < vl; i++) { 4543 if (!vm && !vext_elem_mask(v0, i)) { 4544 continue; 4545 } 4546 /* write a zero to all following active elements */ 4547 if (first_mask_bit) { 4548 vext_set_elem_mask(vd, i, 0); 4549 continue; 4550 } 4551 if (vext_elem_mask(vs2, i)) { 4552 first_mask_bit = true; 4553 if (type == BEFORE_FIRST) { 4554 vext_set_elem_mask(vd, i, 0); 4555 } else { 4556 vext_set_elem_mask(vd, i, 1); 4557 } 4558 } else { 4559 if (type == ONLY_FIRST) { 4560 vext_set_elem_mask(vd, i, 0); 4561 } else { 4562 vext_set_elem_mask(vd, i, 1); 4563 } 4564 } 4565 } 4566 env->vstart = 0; 4567 } 4568 4569 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4570 uint32_t desc) 4571 { 4572 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4573 } 4574 4575 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4576 uint32_t desc) 4577 { 4578 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4579 } 4580 4581 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4582 uint32_t desc) 4583 { 4584 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4585 } 4586 4587 /* Vector Iota Instruction */ 4588 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4589 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4590 uint32_t desc) \ 4591 { \ 4592 uint32_t vm = vext_vm(desc); \ 4593 uint32_t vl = env->vl; \ 4594 uint32_t sum = 0; \ 4595 int i; \ 4596 \ 4597 for (i = env->vstart; i < vl; i++) { \ 4598 if (!vm && !vext_elem_mask(v0, i)) { \ 4599 continue; \ 4600 } \ 4601 *((ETYPE *)vd + H(i)) = sum; \ 4602 if (vext_elem_mask(vs2, i)) { \ 4603 sum++; \ 4604 } \ 4605 } \ 4606 env->vstart = 0; \ 4607 } 4608 4609 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4610 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4611 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4612 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4613 4614 /* Vector Element Index Instruction */ 4615 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4616 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4617 { \ 4618 uint32_t vm = vext_vm(desc); \ 4619 uint32_t vl = env->vl; \ 4620 int i; \ 4621 \ 4622 for (i = env->vstart; i < vl; i++) { \ 4623 if (!vm && !vext_elem_mask(v0, i)) { \ 4624 continue; \ 4625 } \ 4626 *((ETYPE *)vd + H(i)) = i; \ 4627 } \ 4628 env->vstart = 0; \ 4629 } 4630 4631 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4632 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4633 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4634 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4635 4636 /* 4637 *** Vector Permutation Instructions 4638 */ 4639 4640 /* Vector Slide Instructions */ 4641 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4642 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4643 CPURISCVState *env, uint32_t desc) \ 4644 { \ 4645 uint32_t vm = vext_vm(desc); \ 4646 uint32_t vl = env->vl; \ 4647 target_ulong offset = s1, i_min, i; \ 4648 \ 4649 i_min = MAX(env->vstart, offset); \ 4650 for (i = i_min; i < vl; i++) { \ 4651 if (!vm && !vext_elem_mask(v0, i)) { \ 4652 continue; \ 4653 } \ 4654 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4655 } \ 4656 } 4657 4658 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4659 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4660 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4661 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4662 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4663 4664 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4665 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4666 CPURISCVState *env, uint32_t desc) \ 4667 { \ 4668 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4669 uint32_t vm = vext_vm(desc); \ 4670 uint32_t vl = env->vl; \ 4671 target_ulong i_max, i; \ 4672 \ 4673 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4674 for (i = env->vstart; i < i_max; ++i) { \ 4675 if (vm || vext_elem_mask(v0, i)) { \ 4676 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4677 } \ 4678 } \ 4679 \ 4680 for (i = i_max; i < vl; ++i) { \ 4681 if (vm || vext_elem_mask(v0, i)) { \ 4682 *((ETYPE *)vd + H(i)) = 0; \ 4683 } \ 4684 } \ 4685 \ 4686 env->vstart = 0; \ 4687 } 4688 4689 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4690 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4691 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4692 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4693 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4694 4695 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4696 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4697 CPURISCVState *env, uint32_t desc) \ 4698 { \ 4699 typedef uint##ESZ##_t ETYPE; \ 4700 uint32_t vm = vext_vm(desc); \ 4701 uint32_t vl = env->vl; \ 4702 uint32_t i; \ 4703 \ 4704 for (i = env->vstart; i < vl; i++) { \ 4705 if (!vm && !vext_elem_mask(v0, i)) { \ 4706 continue; \ 4707 } \ 4708 if (i == 0) { \ 4709 *((ETYPE *)vd + H(i)) = s1; \ 4710 } else { \ 4711 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4712 } \ 4713 } \ 4714 env->vstart = 0; \ 4715 } 4716 4717 GEN_VEXT_VSLIE1UP(8, H1) 4718 GEN_VEXT_VSLIE1UP(16, H2) 4719 GEN_VEXT_VSLIE1UP(32, H4) 4720 GEN_VEXT_VSLIE1UP(64, H8) 4721 4722 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4723 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4724 CPURISCVState *env, uint32_t desc) \ 4725 { \ 4726 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4727 } 4728 4729 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4730 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4731 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4732 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4733 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4734 4735 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4736 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4737 CPURISCVState *env, uint32_t desc) \ 4738 { \ 4739 typedef uint##ESZ##_t ETYPE; \ 4740 uint32_t vm = vext_vm(desc); \ 4741 uint32_t vl = env->vl; \ 4742 uint32_t i; \ 4743 \ 4744 for (i = env->vstart; i < vl; i++) { \ 4745 if (!vm && !vext_elem_mask(v0, i)) { \ 4746 continue; \ 4747 } \ 4748 if (i == vl - 1) { \ 4749 *((ETYPE *)vd + H(i)) = s1; \ 4750 } else { \ 4751 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4752 } \ 4753 } \ 4754 env->vstart = 0; \ 4755 } 4756 4757 GEN_VEXT_VSLIDE1DOWN(8, H1) 4758 GEN_VEXT_VSLIDE1DOWN(16, H2) 4759 GEN_VEXT_VSLIDE1DOWN(32, H4) 4760 GEN_VEXT_VSLIDE1DOWN(64, H8) 4761 4762 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4764 CPURISCVState *env, uint32_t desc) \ 4765 { \ 4766 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4767 } 4768 4769 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4770 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4771 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4772 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4773 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4774 4775 /* Vector Floating-Point Slide Instructions */ 4776 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4777 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4778 CPURISCVState *env, uint32_t desc) \ 4779 { \ 4780 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4781 } 4782 4783 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4784 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4785 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4786 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4787 4788 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4789 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4790 CPURISCVState *env, uint32_t desc) \ 4791 { \ 4792 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4793 } 4794 4795 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4796 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4797 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4798 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4799 4800 /* Vector Register Gather Instruction */ 4801 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4802 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4803 CPURISCVState *env, uint32_t desc) \ 4804 { \ 4805 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4806 uint32_t vm = vext_vm(desc); \ 4807 uint32_t vl = env->vl; \ 4808 uint64_t index; \ 4809 uint32_t i; \ 4810 \ 4811 for (i = env->vstart; i < vl; i++) { \ 4812 if (!vm && !vext_elem_mask(v0, i)) { \ 4813 continue; \ 4814 } \ 4815 index = *((TS1 *)vs1 + HS1(i)); \ 4816 if (index >= vlmax) { \ 4817 *((TS2 *)vd + HS2(i)) = 0; \ 4818 } else { \ 4819 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4820 } \ 4821 } \ 4822 env->vstart = 0; \ 4823 } 4824 4825 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4826 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4827 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4828 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4829 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4830 4831 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4832 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4833 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4834 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4835 4836 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4837 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4838 CPURISCVState *env, uint32_t desc) \ 4839 { \ 4840 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4841 uint32_t vm = vext_vm(desc); \ 4842 uint32_t vl = env->vl; \ 4843 uint64_t index = s1; \ 4844 uint32_t i; \ 4845 \ 4846 for (i = env->vstart; i < vl; i++) { \ 4847 if (!vm && !vext_elem_mask(v0, i)) { \ 4848 continue; \ 4849 } \ 4850 if (index >= vlmax) { \ 4851 *((ETYPE *)vd + H(i)) = 0; \ 4852 } else { \ 4853 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4854 } \ 4855 } \ 4856 env->vstart = 0; \ 4857 } 4858 4859 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4860 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4861 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4862 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4863 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4864 4865 /* Vector Compress Instruction */ 4866 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4867 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4868 CPURISCVState *env, uint32_t desc) \ 4869 { \ 4870 uint32_t vl = env->vl; \ 4871 uint32_t num = 0, i; \ 4872 \ 4873 for (i = env->vstart; i < vl; i++) { \ 4874 if (!vext_elem_mask(vs1, i)) { \ 4875 continue; \ 4876 } \ 4877 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4878 num++; \ 4879 } \ 4880 env->vstart = 0; \ 4881 } 4882 4883 /* Compress into vd elements of vs2 where vs1 is enabled */ 4884 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4885 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4886 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4887 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4888 4889 /* Vector Whole Register Move */ 4890 #define GEN_VEXT_VMV_WHOLE(NAME, LEN) \ 4891 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ 4892 uint32_t desc) \ 4893 { \ 4894 /* EEW = 8 */ \ 4895 uint32_t maxsz = simd_maxsz(desc); \ 4896 uint32_t i = env->vstart; \ 4897 \ 4898 memcpy((uint8_t *)vd + H1(i), \ 4899 (uint8_t *)vs2 + H1(i), \ 4900 maxsz - env->vstart); \ 4901 \ 4902 env->vstart = 0; \ 4903 } 4904 4905 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1) 4906 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2) 4907 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4) 4908 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8) 4909 4910 /* Vector Integer Extension */ 4911 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4912 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4913 CPURISCVState *env, uint32_t desc) \ 4914 { \ 4915 uint32_t vl = env->vl; \ 4916 uint32_t vm = vext_vm(desc); \ 4917 uint32_t i; \ 4918 \ 4919 for (i = env->vstart; i < vl; i++) { \ 4920 if (!vm && !vext_elem_mask(v0, i)) { \ 4921 continue; \ 4922 } \ 4923 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4924 } \ 4925 env->vstart = 0; \ 4926 } 4927 4928 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4929 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4930 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4931 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4932 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4933 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4934 4935 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4936 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4937 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4938 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4939 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4940 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4941