1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 return vl; 75 } 76 77 /* 78 * Note that vector data is stored in host-endian 64-bit chunks, 79 * so addressing units smaller than that needs a host-endian fixup. 80 */ 81 #ifdef HOST_WORDS_BIGENDIAN 82 #define H1(x) ((x) ^ 7) 83 #define H1_2(x) ((x) ^ 6) 84 #define H1_4(x) ((x) ^ 4) 85 #define H2(x) ((x) ^ 3) 86 #define H4(x) ((x) ^ 1) 87 #define H8(x) ((x)) 88 #else 89 #define H1(x) (x) 90 #define H1_2(x) (x) 91 #define H1_4(x) (x) 92 #define H2(x) (x) 93 #define H4(x) (x) 94 #define H8(x) (x) 95 #endif 96 97 static inline uint32_t vext_nf(uint32_t desc) 98 { 99 return FIELD_EX32(simd_data(desc), VDATA, NF); 100 } 101 102 static inline uint32_t vext_vm(uint32_t desc) 103 { 104 return FIELD_EX32(simd_data(desc), VDATA, VM); 105 } 106 107 /* 108 * Encode LMUL to lmul as following: 109 * LMUL vlmul lmul 110 * 1 000 0 111 * 2 001 1 112 * 4 010 2 113 * 8 011 3 114 * - 100 - 115 * 1/8 101 -3 116 * 1/4 110 -2 117 * 1/2 111 -1 118 */ 119 static inline int32_t vext_lmul(uint32_t desc) 120 { 121 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 122 } 123 124 /* 125 * Get the maximum number of elements can be operated. 126 * 127 * esz: log2 of element size in bytes. 128 */ 129 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 130 { 131 /* 132 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 133 * so vlen in bytes (vlenb) is encoded as maxsz. 134 */ 135 uint32_t vlenb = simd_maxsz(desc); 136 137 /* Return VLMAX */ 138 int scale = vext_lmul(desc) - esz; 139 return scale < 0 ? vlenb >> -scale : vlenb << scale; 140 } 141 142 /* 143 * This function checks watchpoint before real load operation. 144 * 145 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 146 * In user mode, there is no watchpoint support now. 147 * 148 * It will trigger an exception if there is no mapping in TLB 149 * and page table walk can't fill the TLB entry. Then the guest 150 * software can return here after process the exception or never return. 151 */ 152 static void probe_pages(CPURISCVState *env, target_ulong addr, 153 target_ulong len, uintptr_t ra, 154 MMUAccessType access_type) 155 { 156 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 157 target_ulong curlen = MIN(pagelen, len); 158 159 probe_access(env, addr, curlen, access_type, 160 cpu_mmu_index(env, false), ra); 161 if (len > curlen) { 162 addr += curlen; 163 curlen = len - curlen; 164 probe_access(env, addr, curlen, access_type, 165 cpu_mmu_index(env, false), ra); 166 } 167 } 168 169 static inline void vext_set_elem_mask(void *v0, int index, 170 uint8_t value) 171 { 172 int idx = index / 64; 173 int pos = index % 64; 174 uint64_t old = ((uint64_t *)v0)[idx]; 175 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 176 } 177 178 /* 179 * Earlier designs (pre-0.9) had a varying number of bits 180 * per mask value (MLEN). In the 0.9 design, MLEN=1. 181 * (Section 4.5) 182 */ 183 static inline int vext_elem_mask(void *v0, int index) 184 { 185 int idx = index / 64; 186 int pos = index % 64; 187 return (((uint64_t *)v0)[idx] >> pos) & 1; 188 } 189 190 /* elements operations for load and store */ 191 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 192 uint32_t idx, void *vd, uintptr_t retaddr); 193 194 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 195 static void NAME(CPURISCVState *env, abi_ptr addr, \ 196 uint32_t idx, void *vd, uintptr_t retaddr)\ 197 { \ 198 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 199 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 200 } \ 201 202 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 203 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 204 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 205 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 206 207 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 208 static void NAME(CPURISCVState *env, abi_ptr addr, \ 209 uint32_t idx, void *vd, uintptr_t retaddr)\ 210 { \ 211 ETYPE data = *((ETYPE *)vd + H(idx)); \ 212 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 213 } 214 215 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 216 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 217 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 218 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 219 220 /* 221 *** stride: access vector element from strided memory 222 */ 223 static void 224 vext_ldst_stride(void *vd, void *v0, target_ulong base, 225 target_ulong stride, CPURISCVState *env, 226 uint32_t desc, uint32_t vm, 227 vext_ldst_elem_fn *ldst_elem, 228 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 229 { 230 uint32_t i, k; 231 uint32_t nf = vext_nf(desc); 232 uint32_t max_elems = vext_max_elems(desc, esz); 233 234 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 235 if (!vm && !vext_elem_mask(v0, i)) { 236 continue; 237 } 238 239 k = 0; 240 while (k < nf) { 241 target_ulong addr = base + stride * i + (k << esz); 242 ldst_elem(env, addr, i + k * max_elems, vd, ra); 243 k++; 244 } 245 } 246 env->vstart = 0; 247 } 248 249 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 250 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 251 target_ulong stride, CPURISCVState *env, \ 252 uint32_t desc) \ 253 { \ 254 uint32_t vm = vext_vm(desc); \ 255 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 256 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 257 } 258 259 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 260 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 261 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 262 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 263 264 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 265 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 266 target_ulong stride, CPURISCVState *env, \ 267 uint32_t desc) \ 268 { \ 269 uint32_t vm = vext_vm(desc); \ 270 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 271 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 272 } 273 274 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 275 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 276 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 277 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 278 279 /* 280 *** unit-stride: access elements stored contiguously in memory 281 */ 282 283 /* unmasked unit-stride load and store operation*/ 284 static void 285 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 286 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 287 uintptr_t ra, MMUAccessType access_type) 288 { 289 uint32_t i, k; 290 uint32_t nf = vext_nf(desc); 291 uint32_t max_elems = vext_max_elems(desc, esz); 292 293 /* load bytes from guest memory */ 294 for (i = env->vstart; i < evl; i++, env->vstart++) { 295 k = 0; 296 while (k < nf) { 297 target_ulong addr = base + ((i * nf + k) << esz); 298 ldst_elem(env, addr, i + k * max_elems, vd, ra); 299 k++; 300 } 301 } 302 env->vstart = 0; 303 } 304 305 /* 306 * masked unit-stride load and store operation will be a special case of stride, 307 * stride = NF * sizeof (MTYPE) 308 */ 309 310 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 311 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 312 CPURISCVState *env, uint32_t desc) \ 313 { \ 314 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 315 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 316 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 317 } \ 318 \ 319 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 320 CPURISCVState *env, uint32_t desc) \ 321 { \ 322 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 323 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ 324 } 325 326 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 327 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 328 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 329 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 330 331 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 332 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 333 CPURISCVState *env, uint32_t desc) \ 334 { \ 335 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 336 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 337 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 338 } \ 339 \ 340 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 341 CPURISCVState *env, uint32_t desc) \ 342 { \ 343 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 344 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ 345 } 346 347 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 348 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 349 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 350 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 351 352 /* 353 *** unit stride mask load and store, EEW = 1 354 */ 355 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 356 CPURISCVState *env, uint32_t desc) 357 { 358 /* evl = ceil(vl/8) */ 359 uint8_t evl = (env->vl + 7) >> 3; 360 vext_ldst_us(vd, base, env, desc, lde_b, 361 0, evl, GETPC(), MMU_DATA_LOAD); 362 } 363 364 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 365 CPURISCVState *env, uint32_t desc) 366 { 367 /* evl = ceil(vl/8) */ 368 uint8_t evl = (env->vl + 7) >> 3; 369 vext_ldst_us(vd, base, env, desc, ste_b, 370 0, evl, GETPC(), MMU_DATA_STORE); 371 } 372 373 /* 374 *** index: access vector element from indexed memory 375 */ 376 typedef target_ulong vext_get_index_addr(target_ulong base, 377 uint32_t idx, void *vs2); 378 379 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 380 static target_ulong NAME(target_ulong base, \ 381 uint32_t idx, void *vs2) \ 382 { \ 383 return (base + *((ETYPE *)vs2 + H(idx))); \ 384 } 385 386 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 387 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 388 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 389 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 390 391 static inline void 392 vext_ldst_index(void *vd, void *v0, target_ulong base, 393 void *vs2, CPURISCVState *env, uint32_t desc, 394 vext_get_index_addr get_index_addr, 395 vext_ldst_elem_fn *ldst_elem, 396 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 397 { 398 uint32_t i, k; 399 uint32_t nf = vext_nf(desc); 400 uint32_t vm = vext_vm(desc); 401 uint32_t max_elems = vext_max_elems(desc, esz); 402 403 /* load bytes from guest memory */ 404 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 405 if (!vm && !vext_elem_mask(v0, i)) { 406 continue; 407 } 408 409 k = 0; 410 while (k < nf) { 411 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 412 ldst_elem(env, addr, i + k * max_elems, vd, ra); 413 k++; 414 } 415 } 416 env->vstart = 0; 417 } 418 419 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 420 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 421 void *vs2, CPURISCVState *env, uint32_t desc) \ 422 { \ 423 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 424 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 425 } 426 427 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 428 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 429 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 430 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 431 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 432 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 433 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 434 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 435 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 436 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 437 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 438 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 439 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 440 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 441 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 442 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 443 444 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 445 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 446 void *vs2, CPURISCVState *env, uint32_t desc) \ 447 { \ 448 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 449 STORE_FN, ctzl(sizeof(ETYPE)), \ 450 GETPC(), MMU_DATA_STORE); \ 451 } 452 453 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 454 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 455 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 456 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 457 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 458 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 459 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 460 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 461 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 462 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 463 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 464 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 465 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 466 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 467 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 468 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 469 470 /* 471 *** unit-stride fault-only-fisrt load instructions 472 */ 473 static inline void 474 vext_ldff(void *vd, void *v0, target_ulong base, 475 CPURISCVState *env, uint32_t desc, 476 vext_ldst_elem_fn *ldst_elem, 477 uint32_t esz, uintptr_t ra) 478 { 479 void *host; 480 uint32_t i, k, vl = 0; 481 uint32_t nf = vext_nf(desc); 482 uint32_t vm = vext_vm(desc); 483 uint32_t max_elems = vext_max_elems(desc, esz); 484 target_ulong addr, offset, remain; 485 486 /* probe every access*/ 487 for (i = env->vstart; i < env->vl; i++) { 488 if (!vm && !vext_elem_mask(v0, i)) { 489 continue; 490 } 491 addr = base + i * (nf << esz); 492 if (i == 0) { 493 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 494 } else { 495 /* if it triggers an exception, no need to check watchpoint */ 496 remain = nf << esz; 497 while (remain > 0) { 498 offset = -(addr | TARGET_PAGE_MASK); 499 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 500 cpu_mmu_index(env, false)); 501 if (host) { 502 #ifdef CONFIG_USER_ONLY 503 if (page_check_range(addr, offset, PAGE_READ) < 0) { 504 vl = i; 505 goto ProbeSuccess; 506 } 507 #else 508 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 509 #endif 510 } else { 511 vl = i; 512 goto ProbeSuccess; 513 } 514 if (remain <= offset) { 515 break; 516 } 517 remain -= offset; 518 addr += offset; 519 } 520 } 521 } 522 ProbeSuccess: 523 /* load bytes from guest memory */ 524 if (vl != 0) { 525 env->vl = vl; 526 } 527 for (i = env->vstart; i < env->vl; i++) { 528 k = 0; 529 if (!vm && !vext_elem_mask(v0, i)) { 530 continue; 531 } 532 while (k < nf) { 533 target_ulong addr = base + ((i * nf + k) << esz); 534 ldst_elem(env, addr, i + k * max_elems, vd, ra); 535 k++; 536 } 537 } 538 env->vstart = 0; 539 } 540 541 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 542 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 543 CPURISCVState *env, uint32_t desc) \ 544 { \ 545 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 546 ctzl(sizeof(ETYPE)), GETPC()); \ 547 } 548 549 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 550 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 551 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 552 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 553 554 #define DO_SWAP(N, M) (M) 555 #define DO_AND(N, M) (N & M) 556 #define DO_XOR(N, M) (N ^ M) 557 #define DO_OR(N, M) (N | M) 558 #define DO_ADD(N, M) (N + M) 559 560 /* Signed min/max */ 561 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 562 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 563 564 /* Unsigned min/max */ 565 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 566 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 567 568 /* 569 *** load and store whole register instructions 570 */ 571 static void 572 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 573 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 574 MMUAccessType access_type) 575 { 576 uint32_t i, k, off, pos; 577 uint32_t nf = vext_nf(desc); 578 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 579 uint32_t max_elems = vlenb >> esz; 580 581 k = env->vstart / max_elems; 582 off = env->vstart % max_elems; 583 584 if (off) { 585 /* load/store rest of elements of current segment pointed by vstart */ 586 for (pos = off; pos < max_elems; pos++, env->vstart++) { 587 target_ulong addr = base + ((pos + k * max_elems) << esz); 588 ldst_elem(env, addr, pos + k * max_elems, vd, ra); 589 } 590 k++; 591 } 592 593 /* load/store elements for rest of segments */ 594 for (; k < nf; k++) { 595 for (i = 0; i < max_elems; i++, env->vstart++) { 596 target_ulong addr = base + ((i + k * max_elems) << esz); 597 ldst_elem(env, addr, i + k * max_elems, vd, ra); 598 } 599 } 600 601 env->vstart = 0; 602 } 603 604 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 605 void HELPER(NAME)(void *vd, target_ulong base, \ 606 CPURISCVState *env, uint32_t desc) \ 607 { \ 608 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 609 ctzl(sizeof(ETYPE)), GETPC(), \ 610 MMU_DATA_LOAD); \ 611 } 612 613 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 614 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 615 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 616 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 617 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 618 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 619 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 620 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 621 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 622 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 623 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 624 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 625 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 626 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 627 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 628 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 629 630 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 631 void HELPER(NAME)(void *vd, target_ulong base, \ 632 CPURISCVState *env, uint32_t desc) \ 633 { \ 634 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 635 ctzl(sizeof(ETYPE)), GETPC(), \ 636 MMU_DATA_STORE); \ 637 } 638 639 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 640 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 641 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 642 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 643 644 /* 645 *** Vector Integer Arithmetic Instructions 646 */ 647 648 /* expand macro args before macro */ 649 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 650 651 /* (TD, T1, T2, TX1, TX2) */ 652 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 653 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 654 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 655 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 656 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 657 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 658 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 659 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 660 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 661 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 662 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 663 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 664 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 665 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 666 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 667 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 668 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 669 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 670 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 671 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 672 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 673 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 674 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 675 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 676 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 677 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 678 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 679 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 680 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 681 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 682 683 /* operation of two vector elements */ 684 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 685 686 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 687 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 688 { \ 689 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 690 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 691 *((TD *)vd + HD(i)) = OP(s2, s1); \ 692 } 693 #define DO_SUB(N, M) (N - M) 694 #define DO_RSUB(N, M) (M - N) 695 696 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 697 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 698 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 699 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 700 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 701 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 702 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 703 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 704 705 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 706 CPURISCVState *env, uint32_t desc, 707 uint32_t esz, uint32_t dsz, 708 opivv2_fn *fn) 709 { 710 uint32_t vm = vext_vm(desc); 711 uint32_t vl = env->vl; 712 uint32_t i; 713 714 for (i = env->vstart; i < vl; i++) { 715 if (!vm && !vext_elem_mask(v0, i)) { 716 continue; 717 } 718 fn(vd, vs1, vs2, i); 719 } 720 env->vstart = 0; 721 } 722 723 /* generate the helpers for OPIVV */ 724 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 725 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 726 void *vs2, CPURISCVState *env, \ 727 uint32_t desc) \ 728 { \ 729 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 730 do_##NAME); \ 731 } 732 733 GEN_VEXT_VV(vadd_vv_b, 1, 1) 734 GEN_VEXT_VV(vadd_vv_h, 2, 2) 735 GEN_VEXT_VV(vadd_vv_w, 4, 4) 736 GEN_VEXT_VV(vadd_vv_d, 8, 8) 737 GEN_VEXT_VV(vsub_vv_b, 1, 1) 738 GEN_VEXT_VV(vsub_vv_h, 2, 2) 739 GEN_VEXT_VV(vsub_vv_w, 4, 4) 740 GEN_VEXT_VV(vsub_vv_d, 8, 8) 741 742 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 743 744 /* 745 * (T1)s1 gives the real operator type. 746 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 747 */ 748 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 749 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 750 { \ 751 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 752 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 753 } 754 755 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 756 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 757 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 758 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 759 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 760 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 761 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 762 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 763 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 764 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 765 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 766 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 767 768 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 769 CPURISCVState *env, uint32_t desc, 770 uint32_t esz, uint32_t dsz, 771 opivx2_fn fn) 772 { 773 uint32_t vm = vext_vm(desc); 774 uint32_t vl = env->vl; 775 uint32_t i; 776 777 for (i = env->vstart; i < vl; i++) { 778 if (!vm && !vext_elem_mask(v0, i)) { 779 continue; 780 } 781 fn(vd, s1, vs2, i); 782 } 783 env->vstart = 0; 784 } 785 786 /* generate the helpers for OPIVX */ 787 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 788 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 789 void *vs2, CPURISCVState *env, \ 790 uint32_t desc) \ 791 { \ 792 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 793 do_##NAME); \ 794 } 795 796 GEN_VEXT_VX(vadd_vx_b, 1, 1) 797 GEN_VEXT_VX(vadd_vx_h, 2, 2) 798 GEN_VEXT_VX(vadd_vx_w, 4, 4) 799 GEN_VEXT_VX(vadd_vx_d, 8, 8) 800 GEN_VEXT_VX(vsub_vx_b, 1, 1) 801 GEN_VEXT_VX(vsub_vx_h, 2, 2) 802 GEN_VEXT_VX(vsub_vx_w, 4, 4) 803 GEN_VEXT_VX(vsub_vx_d, 8, 8) 804 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 805 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 806 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 807 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 808 809 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 810 { 811 intptr_t oprsz = simd_oprsz(desc); 812 intptr_t i; 813 814 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 815 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 816 } 817 } 818 819 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 820 { 821 intptr_t oprsz = simd_oprsz(desc); 822 intptr_t i; 823 824 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 825 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 826 } 827 } 828 829 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 830 { 831 intptr_t oprsz = simd_oprsz(desc); 832 intptr_t i; 833 834 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 835 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 836 } 837 } 838 839 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 840 { 841 intptr_t oprsz = simd_oprsz(desc); 842 intptr_t i; 843 844 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 845 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 846 } 847 } 848 849 /* Vector Widening Integer Add/Subtract */ 850 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 851 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 852 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 853 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 854 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 855 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 856 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 857 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 858 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 859 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 860 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 861 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 862 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 863 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 864 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 865 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 866 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 867 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 868 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 869 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 870 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 871 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 872 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 873 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 874 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 875 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 876 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 877 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 878 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 879 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 880 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 881 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 882 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 883 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 884 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 885 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 886 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 887 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 888 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 889 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 890 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 891 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 892 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 893 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 894 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 895 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 896 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 897 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 898 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 899 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 900 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 901 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 902 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 903 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 904 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 905 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 906 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 907 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 908 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 909 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 910 911 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 912 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 913 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 914 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 915 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 916 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 917 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 918 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 919 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 920 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 921 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 922 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 923 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 924 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 925 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 926 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 927 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 928 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 929 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 930 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 931 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 932 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 933 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 934 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 935 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 936 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 937 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 938 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 939 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 940 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 941 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 942 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 943 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 944 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 945 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 946 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 947 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 948 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 949 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 950 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 951 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 952 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 953 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 954 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 955 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 956 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 957 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 958 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 959 960 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 961 #define DO_VADC(N, M, C) (N + M + C) 962 #define DO_VSBC(N, M, C) (N - M - C) 963 964 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 965 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 966 CPURISCVState *env, uint32_t desc) \ 967 { \ 968 uint32_t vl = env->vl; \ 969 uint32_t i; \ 970 \ 971 for (i = env->vstart; i < vl; i++) { \ 972 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 973 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 974 ETYPE carry = vext_elem_mask(v0, i); \ 975 \ 976 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 977 } \ 978 env->vstart = 0; \ 979 } 980 981 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 982 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 983 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 984 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 985 986 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 987 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 988 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 989 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 990 991 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 992 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 993 CPURISCVState *env, uint32_t desc) \ 994 { \ 995 uint32_t vl = env->vl; \ 996 uint32_t i; \ 997 \ 998 for (i = env->vstart; i < vl; i++) { \ 999 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1000 ETYPE carry = vext_elem_mask(v0, i); \ 1001 \ 1002 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1003 } \ 1004 env->vstart = 0; \ 1005 } 1006 1007 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1008 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1009 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1010 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1011 1012 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1013 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1014 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1015 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1016 1017 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1018 (__typeof(N))(N + M) < N) 1019 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1020 1021 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1022 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1023 CPURISCVState *env, uint32_t desc) \ 1024 { \ 1025 uint32_t vl = env->vl; \ 1026 uint32_t vm = vext_vm(desc); \ 1027 uint32_t i; \ 1028 \ 1029 for (i = env->vstart; i < vl; i++) { \ 1030 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1031 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1032 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1033 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1034 } \ 1035 env->vstart = 0; \ 1036 } 1037 1038 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1039 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1040 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1041 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1042 1043 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1044 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1045 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1046 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1047 1048 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1049 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1050 void *vs2, CPURISCVState *env, uint32_t desc) \ 1051 { \ 1052 uint32_t vl = env->vl; \ 1053 uint32_t vm = vext_vm(desc); \ 1054 uint32_t i; \ 1055 \ 1056 for (i = env->vstart; i < vl; i++) { \ 1057 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1058 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1059 vext_set_elem_mask(vd, i, \ 1060 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1061 } \ 1062 env->vstart = 0; \ 1063 } 1064 1065 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1066 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1067 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1068 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1069 1070 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1071 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1072 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1073 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1074 1075 /* Vector Bitwise Logical Instructions */ 1076 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1077 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1078 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1079 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1080 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1081 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1082 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1083 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1084 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1085 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1086 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1087 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1088 GEN_VEXT_VV(vand_vv_b, 1, 1) 1089 GEN_VEXT_VV(vand_vv_h, 2, 2) 1090 GEN_VEXT_VV(vand_vv_w, 4, 4) 1091 GEN_VEXT_VV(vand_vv_d, 8, 8) 1092 GEN_VEXT_VV(vor_vv_b, 1, 1) 1093 GEN_VEXT_VV(vor_vv_h, 2, 2) 1094 GEN_VEXT_VV(vor_vv_w, 4, 4) 1095 GEN_VEXT_VV(vor_vv_d, 8, 8) 1096 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1097 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1098 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1099 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1100 1101 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1102 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1103 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1104 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1105 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1106 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1107 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1108 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1109 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1110 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1111 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1112 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1113 GEN_VEXT_VX(vand_vx_b, 1, 1) 1114 GEN_VEXT_VX(vand_vx_h, 2, 2) 1115 GEN_VEXT_VX(vand_vx_w, 4, 4) 1116 GEN_VEXT_VX(vand_vx_d, 8, 8) 1117 GEN_VEXT_VX(vor_vx_b, 1, 1) 1118 GEN_VEXT_VX(vor_vx_h, 2, 2) 1119 GEN_VEXT_VX(vor_vx_w, 4, 4) 1120 GEN_VEXT_VX(vor_vx_d, 8, 8) 1121 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1122 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1123 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1124 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1125 1126 /* Vector Single-Width Bit Shift Instructions */ 1127 #define DO_SLL(N, M) (N << (M)) 1128 #define DO_SRL(N, M) (N >> (M)) 1129 1130 /* generate the helpers for shift instructions with two vector operators */ 1131 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1132 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1133 void *vs2, CPURISCVState *env, uint32_t desc) \ 1134 { \ 1135 uint32_t vm = vext_vm(desc); \ 1136 uint32_t vl = env->vl; \ 1137 uint32_t i; \ 1138 \ 1139 for (i = env->vstart; i < vl; i++) { \ 1140 if (!vm && !vext_elem_mask(v0, i)) { \ 1141 continue; \ 1142 } \ 1143 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1144 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1145 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1146 } \ 1147 env->vstart = 0; \ 1148 } 1149 1150 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1151 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1152 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1153 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1154 1155 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1156 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1157 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1158 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1159 1160 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1161 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1162 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1163 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1164 1165 /* generate the helpers for shift instructions with one vector and one scalar */ 1166 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1167 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1168 void *vs2, CPURISCVState *env, uint32_t desc) \ 1169 { \ 1170 uint32_t vm = vext_vm(desc); \ 1171 uint32_t vl = env->vl; \ 1172 uint32_t i; \ 1173 \ 1174 for (i = env->vstart; i < vl; i++) { \ 1175 if (!vm && !vext_elem_mask(v0, i)) { \ 1176 continue; \ 1177 } \ 1178 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1179 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1180 } \ 1181 env->vstart = 0; \ 1182 } 1183 1184 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1185 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1186 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1187 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1188 1189 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1190 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1191 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1192 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1193 1194 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1195 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1196 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1197 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1198 1199 /* Vector Narrowing Integer Right Shift Instructions */ 1200 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1201 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1202 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1203 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1204 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1205 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1206 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1207 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1208 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1209 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1210 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1211 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1212 1213 /* Vector Integer Comparison Instructions */ 1214 #define DO_MSEQ(N, M) (N == M) 1215 #define DO_MSNE(N, M) (N != M) 1216 #define DO_MSLT(N, M) (N < M) 1217 #define DO_MSLE(N, M) (N <= M) 1218 #define DO_MSGT(N, M) (N > M) 1219 1220 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1221 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1222 CPURISCVState *env, uint32_t desc) \ 1223 { \ 1224 uint32_t vm = vext_vm(desc); \ 1225 uint32_t vl = env->vl; \ 1226 uint32_t i; \ 1227 \ 1228 for (i = env->vstart; i < vl; i++) { \ 1229 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1230 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1231 if (!vm && !vext_elem_mask(v0, i)) { \ 1232 continue; \ 1233 } \ 1234 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1235 } \ 1236 env->vstart = 0; \ 1237 } 1238 1239 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1240 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1241 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1242 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1243 1244 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1245 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1246 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1247 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1248 1249 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1250 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1251 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1252 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1253 1254 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1255 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1256 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1257 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1258 1259 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1260 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1261 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1262 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1263 1264 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1265 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1266 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1267 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1268 1269 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1270 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1271 CPURISCVState *env, uint32_t desc) \ 1272 { \ 1273 uint32_t vm = vext_vm(desc); \ 1274 uint32_t vl = env->vl; \ 1275 uint32_t i; \ 1276 \ 1277 for (i = env->vstart; i < vl; i++) { \ 1278 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1279 if (!vm && !vext_elem_mask(v0, i)) { \ 1280 continue; \ 1281 } \ 1282 vext_set_elem_mask(vd, i, \ 1283 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1284 } \ 1285 env->vstart = 0; \ 1286 } 1287 1288 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1289 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1290 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1291 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1292 1293 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1294 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1295 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1296 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1297 1298 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1299 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1300 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1301 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1302 1303 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1304 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1305 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1306 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1307 1308 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1309 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1310 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1311 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1312 1313 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1314 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1315 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1316 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1317 1318 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1319 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1320 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1321 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1322 1323 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1324 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1325 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1326 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1327 1328 /* Vector Integer Min/Max Instructions */ 1329 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1330 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1331 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1332 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1333 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1334 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1335 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1336 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1337 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1338 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1339 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1340 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1341 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1342 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1343 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1344 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1345 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1346 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1347 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1348 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1349 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1350 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1351 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1352 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1353 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1354 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1355 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1356 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1357 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1358 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1359 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1360 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1361 1362 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1363 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1364 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1365 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1366 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1367 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1368 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1369 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1370 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1371 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1372 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1373 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1374 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1375 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1376 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1377 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1378 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1379 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1380 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1381 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1382 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1383 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1384 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1385 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1386 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1387 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1388 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1389 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1390 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1391 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1392 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1393 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1394 1395 /* Vector Single-Width Integer Multiply Instructions */ 1396 #define DO_MUL(N, M) (N * M) 1397 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1398 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1399 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1400 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1401 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1402 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1403 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1404 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1405 1406 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1407 { 1408 return (int16_t)s2 * (int16_t)s1 >> 8; 1409 } 1410 1411 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1412 { 1413 return (int32_t)s2 * (int32_t)s1 >> 16; 1414 } 1415 1416 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1417 { 1418 return (int64_t)s2 * (int64_t)s1 >> 32; 1419 } 1420 1421 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1422 { 1423 uint64_t hi_64, lo_64; 1424 1425 muls64(&lo_64, &hi_64, s1, s2); 1426 return hi_64; 1427 } 1428 1429 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1430 { 1431 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1432 } 1433 1434 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1435 { 1436 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1437 } 1438 1439 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1440 { 1441 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1442 } 1443 1444 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1445 { 1446 uint64_t hi_64, lo_64; 1447 1448 mulu64(&lo_64, &hi_64, s2, s1); 1449 return hi_64; 1450 } 1451 1452 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1453 { 1454 return (int16_t)s2 * (uint16_t)s1 >> 8; 1455 } 1456 1457 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1458 { 1459 return (int32_t)s2 * (uint32_t)s1 >> 16; 1460 } 1461 1462 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1463 { 1464 return (int64_t)s2 * (uint64_t)s1 >> 32; 1465 } 1466 1467 /* 1468 * Let A = signed operand, 1469 * B = unsigned operand 1470 * P = mulu64(A, B), unsigned product 1471 * 1472 * LET X = 2 ** 64 - A, 2's complement of A 1473 * SP = signed product 1474 * THEN 1475 * IF A < 0 1476 * SP = -X * B 1477 * = -(2 ** 64 - A) * B 1478 * = A * B - 2 ** 64 * B 1479 * = P - 2 ** 64 * B 1480 * ELSE 1481 * SP = P 1482 * THEN 1483 * HI_P -= (A < 0 ? B : 0) 1484 */ 1485 1486 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1487 { 1488 uint64_t hi_64, lo_64; 1489 1490 mulu64(&lo_64, &hi_64, s2, s1); 1491 1492 hi_64 -= s2 < 0 ? s1 : 0; 1493 return hi_64; 1494 } 1495 1496 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1497 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1498 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1499 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1500 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1501 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1502 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1503 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1504 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1505 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1506 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1507 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1508 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1509 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1510 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1511 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1512 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1513 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1514 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1515 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1516 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1517 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1518 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1519 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1520 1521 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1522 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1523 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1524 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1525 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1526 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1527 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1528 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1529 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1530 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1531 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1532 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1533 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1534 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1535 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1536 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1537 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1538 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1539 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1540 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1541 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1542 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1543 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1544 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1545 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1546 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1547 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1548 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1549 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1550 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1551 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1552 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1553 1554 /* Vector Integer Divide Instructions */ 1555 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1556 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1557 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1558 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1559 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1560 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1561 1562 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1563 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1564 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1565 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1566 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1567 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1568 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1569 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1570 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1571 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1572 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1573 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1574 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1575 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1576 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1577 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1578 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1579 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1580 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1581 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1582 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1583 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1584 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1585 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1586 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1587 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1588 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1589 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1590 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1591 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1592 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1593 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1594 1595 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1596 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1597 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1598 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1599 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1600 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1601 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1602 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1603 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1604 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1605 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1606 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1607 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1608 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1609 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1610 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1611 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1612 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1613 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1614 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1615 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1616 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1617 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1618 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1619 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1620 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1621 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1622 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1623 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1624 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1625 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1626 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1627 1628 /* Vector Widening Integer Multiply Instructions */ 1629 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1630 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1631 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1632 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1633 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1634 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1635 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1636 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1637 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1638 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1639 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1640 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1641 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1642 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1643 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1644 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1645 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1646 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1647 1648 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1649 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1650 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1651 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1652 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1653 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1654 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1655 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1656 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1657 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1658 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1659 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1660 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1661 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1662 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1663 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1664 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1665 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1666 1667 /* Vector Single-Width Integer Multiply-Add Instructions */ 1668 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1669 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1670 { \ 1671 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1672 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1673 TD d = *((TD *)vd + HD(i)); \ 1674 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1675 } 1676 1677 #define DO_MACC(N, M, D) (M * N + D) 1678 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1679 #define DO_MADD(N, M, D) (M * D + N) 1680 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1681 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1682 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1683 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1684 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1685 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1686 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1687 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1688 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1689 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1690 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1691 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1692 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1693 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1694 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1695 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1696 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1697 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1698 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1699 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1700 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1701 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1702 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1703 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1704 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1705 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1706 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1707 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1708 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1709 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1710 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1711 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1712 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1713 1714 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1715 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1716 { \ 1717 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1718 TD d = *((TD *)vd + HD(i)); \ 1719 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1720 } 1721 1722 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1723 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1724 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1725 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1726 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1727 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1728 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1729 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1730 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1731 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1732 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1733 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1734 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1735 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1736 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1737 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1738 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1739 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1740 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1741 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1742 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1743 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1744 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1745 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1746 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1747 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1748 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1749 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1750 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1751 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1752 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1753 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1754 1755 /* Vector Widening Integer Multiply-Add Instructions */ 1756 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1757 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1758 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1759 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1760 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1761 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1762 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1763 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1764 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1765 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1766 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1767 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1768 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1769 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1770 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1771 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1772 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1773 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1774 1775 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1776 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1777 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1778 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1779 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1780 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1781 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1782 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1783 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1784 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1785 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1786 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1787 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1788 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1789 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1790 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1791 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1792 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1793 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1794 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1795 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1796 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1797 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1798 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1799 1800 /* Vector Integer Merge and Move Instructions */ 1801 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1802 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1803 uint32_t desc) \ 1804 { \ 1805 uint32_t vl = env->vl; \ 1806 uint32_t i; \ 1807 \ 1808 for (i = env->vstart; i < vl; i++) { \ 1809 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1810 *((ETYPE *)vd + H(i)) = s1; \ 1811 } \ 1812 env->vstart = 0; \ 1813 } 1814 1815 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1816 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1817 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1818 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1819 1820 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1821 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1822 uint32_t desc) \ 1823 { \ 1824 uint32_t vl = env->vl; \ 1825 uint32_t i; \ 1826 \ 1827 for (i = env->vstart; i < vl; i++) { \ 1828 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1829 } \ 1830 env->vstart = 0; \ 1831 } 1832 1833 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1834 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1835 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1836 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1837 1838 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1839 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1840 CPURISCVState *env, uint32_t desc) \ 1841 { \ 1842 uint32_t vl = env->vl; \ 1843 uint32_t i; \ 1844 \ 1845 for (i = env->vstart; i < vl; i++) { \ 1846 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1847 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1848 } \ 1849 env->vstart = 0; \ 1850 } 1851 1852 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1853 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1854 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1855 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1856 1857 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1858 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1859 void *vs2, CPURISCVState *env, uint32_t desc) \ 1860 { \ 1861 uint32_t vl = env->vl; \ 1862 uint32_t i; \ 1863 \ 1864 for (i = env->vstart; i < vl; i++) { \ 1865 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1866 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1867 (ETYPE)(target_long)s1); \ 1868 *((ETYPE *)vd + H(i)) = d; \ 1869 } \ 1870 env->vstart = 0; \ 1871 } 1872 1873 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1874 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1875 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1877 1878 /* 1879 *** Vector Fixed-Point Arithmetic Instructions 1880 */ 1881 1882 /* Vector Single-Width Saturating Add and Subtract */ 1883 1884 /* 1885 * As fixed point instructions probably have round mode and saturation, 1886 * define common macros for fixed point here. 1887 */ 1888 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1889 CPURISCVState *env, int vxrm); 1890 1891 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1892 static inline void \ 1893 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1894 CPURISCVState *env, int vxrm) \ 1895 { \ 1896 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1897 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1898 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1899 } 1900 1901 static inline void 1902 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1903 CPURISCVState *env, 1904 uint32_t vl, uint32_t vm, int vxrm, 1905 opivv2_rm_fn *fn) 1906 { 1907 for (uint32_t i = env->vstart; i < vl; i++) { 1908 if (!vm && !vext_elem_mask(v0, i)) { 1909 continue; 1910 } 1911 fn(vd, vs1, vs2, i, env, vxrm); 1912 } 1913 env->vstart = 0; 1914 } 1915 1916 static inline void 1917 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1918 CPURISCVState *env, 1919 uint32_t desc, uint32_t esz, uint32_t dsz, 1920 opivv2_rm_fn *fn) 1921 { 1922 uint32_t vm = vext_vm(desc); 1923 uint32_t vl = env->vl; 1924 1925 switch (env->vxrm) { 1926 case 0: /* rnu */ 1927 vext_vv_rm_1(vd, v0, vs1, vs2, 1928 env, vl, vm, 0, fn); 1929 break; 1930 case 1: /* rne */ 1931 vext_vv_rm_1(vd, v0, vs1, vs2, 1932 env, vl, vm, 1, fn); 1933 break; 1934 case 2: /* rdn */ 1935 vext_vv_rm_1(vd, v0, vs1, vs2, 1936 env, vl, vm, 2, fn); 1937 break; 1938 default: /* rod */ 1939 vext_vv_rm_1(vd, v0, vs1, vs2, 1940 env, vl, vm, 3, fn); 1941 break; 1942 } 1943 } 1944 1945 /* generate helpers for fixed point instructions with OPIVV format */ 1946 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1947 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1948 CPURISCVState *env, uint32_t desc) \ 1949 { \ 1950 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1951 do_##NAME); \ 1952 } 1953 1954 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1955 { 1956 uint8_t res = a + b; 1957 if (res < a) { 1958 res = UINT8_MAX; 1959 env->vxsat = 0x1; 1960 } 1961 return res; 1962 } 1963 1964 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1965 uint16_t b) 1966 { 1967 uint16_t res = a + b; 1968 if (res < a) { 1969 res = UINT16_MAX; 1970 env->vxsat = 0x1; 1971 } 1972 return res; 1973 } 1974 1975 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1976 uint32_t b) 1977 { 1978 uint32_t res = a + b; 1979 if (res < a) { 1980 res = UINT32_MAX; 1981 env->vxsat = 0x1; 1982 } 1983 return res; 1984 } 1985 1986 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1987 uint64_t b) 1988 { 1989 uint64_t res = a + b; 1990 if (res < a) { 1991 res = UINT64_MAX; 1992 env->vxsat = 0x1; 1993 } 1994 return res; 1995 } 1996 1997 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1998 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1999 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2000 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2001 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 2002 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 2003 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2004 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2005 2006 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2007 CPURISCVState *env, int vxrm); 2008 2009 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2010 static inline void \ 2011 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2012 CPURISCVState *env, int vxrm) \ 2013 { \ 2014 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2015 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2016 } 2017 2018 static inline void 2019 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2020 CPURISCVState *env, 2021 uint32_t vl, uint32_t vm, int vxrm, 2022 opivx2_rm_fn *fn) 2023 { 2024 for (uint32_t i = env->vstart; i < vl; i++) { 2025 if (!vm && !vext_elem_mask(v0, i)) { 2026 continue; 2027 } 2028 fn(vd, s1, vs2, i, env, vxrm); 2029 } 2030 env->vstart = 0; 2031 } 2032 2033 static inline void 2034 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2035 CPURISCVState *env, 2036 uint32_t desc, uint32_t esz, uint32_t dsz, 2037 opivx2_rm_fn *fn) 2038 { 2039 uint32_t vm = vext_vm(desc); 2040 uint32_t vl = env->vl; 2041 2042 switch (env->vxrm) { 2043 case 0: /* rnu */ 2044 vext_vx_rm_1(vd, v0, s1, vs2, 2045 env, vl, vm, 0, fn); 2046 break; 2047 case 1: /* rne */ 2048 vext_vx_rm_1(vd, v0, s1, vs2, 2049 env, vl, vm, 1, fn); 2050 break; 2051 case 2: /* rdn */ 2052 vext_vx_rm_1(vd, v0, s1, vs2, 2053 env, vl, vm, 2, fn); 2054 break; 2055 default: /* rod */ 2056 vext_vx_rm_1(vd, v0, s1, vs2, 2057 env, vl, vm, 3, fn); 2058 break; 2059 } 2060 } 2061 2062 /* generate helpers for fixed point instructions with OPIVX format */ 2063 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2064 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2065 void *vs2, CPURISCVState *env, uint32_t desc) \ 2066 { \ 2067 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2068 do_##NAME); \ 2069 } 2070 2071 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2072 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2073 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2074 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2075 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2076 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2077 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2078 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2079 2080 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2081 { 2082 int8_t res = a + b; 2083 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2084 res = a > 0 ? INT8_MAX : INT8_MIN; 2085 env->vxsat = 0x1; 2086 } 2087 return res; 2088 } 2089 2090 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2091 { 2092 int16_t res = a + b; 2093 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2094 res = a > 0 ? INT16_MAX : INT16_MIN; 2095 env->vxsat = 0x1; 2096 } 2097 return res; 2098 } 2099 2100 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2101 { 2102 int32_t res = a + b; 2103 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2104 res = a > 0 ? INT32_MAX : INT32_MIN; 2105 env->vxsat = 0x1; 2106 } 2107 return res; 2108 } 2109 2110 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2111 { 2112 int64_t res = a + b; 2113 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2114 res = a > 0 ? INT64_MAX : INT64_MIN; 2115 env->vxsat = 0x1; 2116 } 2117 return res; 2118 } 2119 2120 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2121 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2122 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2123 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2124 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2125 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2126 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2127 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2128 2129 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2130 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2131 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2132 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2133 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2134 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2135 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2136 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2137 2138 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2139 { 2140 uint8_t res = a - b; 2141 if (res > a) { 2142 res = 0; 2143 env->vxsat = 0x1; 2144 } 2145 return res; 2146 } 2147 2148 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2149 uint16_t b) 2150 { 2151 uint16_t res = a - b; 2152 if (res > a) { 2153 res = 0; 2154 env->vxsat = 0x1; 2155 } 2156 return res; 2157 } 2158 2159 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2160 uint32_t b) 2161 { 2162 uint32_t res = a - b; 2163 if (res > a) { 2164 res = 0; 2165 env->vxsat = 0x1; 2166 } 2167 return res; 2168 } 2169 2170 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2171 uint64_t b) 2172 { 2173 uint64_t res = a - b; 2174 if (res > a) { 2175 res = 0; 2176 env->vxsat = 0x1; 2177 } 2178 return res; 2179 } 2180 2181 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2182 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2183 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2184 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2185 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2186 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2187 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2188 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2189 2190 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2191 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2192 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2193 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2194 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2195 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2196 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2197 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2198 2199 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2200 { 2201 int8_t res = a - b; 2202 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2203 res = a >= 0 ? INT8_MAX : INT8_MIN; 2204 env->vxsat = 0x1; 2205 } 2206 return res; 2207 } 2208 2209 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2210 { 2211 int16_t res = a - b; 2212 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2213 res = a >= 0 ? INT16_MAX : INT16_MIN; 2214 env->vxsat = 0x1; 2215 } 2216 return res; 2217 } 2218 2219 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2220 { 2221 int32_t res = a - b; 2222 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2223 res = a >= 0 ? INT32_MAX : INT32_MIN; 2224 env->vxsat = 0x1; 2225 } 2226 return res; 2227 } 2228 2229 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2230 { 2231 int64_t res = a - b; 2232 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2233 res = a >= 0 ? INT64_MAX : INT64_MIN; 2234 env->vxsat = 0x1; 2235 } 2236 return res; 2237 } 2238 2239 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2240 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2241 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2242 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2243 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2244 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2245 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2246 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2247 2248 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2249 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2250 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2251 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2252 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2253 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2254 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2255 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2256 2257 /* Vector Single-Width Averaging Add and Subtract */ 2258 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2259 { 2260 uint8_t d = extract64(v, shift, 1); 2261 uint8_t d1; 2262 uint64_t D1, D2; 2263 2264 if (shift == 0 || shift > 64) { 2265 return 0; 2266 } 2267 2268 d1 = extract64(v, shift - 1, 1); 2269 D1 = extract64(v, 0, shift); 2270 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2271 return d1; 2272 } else if (vxrm == 1) { /* round-to-nearest-even */ 2273 if (shift > 1) { 2274 D2 = extract64(v, 0, shift - 1); 2275 return d1 & ((D2 != 0) | d); 2276 } else { 2277 return d1 & d; 2278 } 2279 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2280 return !d & (D1 != 0); 2281 } 2282 return 0; /* round-down (truncate) */ 2283 } 2284 2285 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2286 { 2287 int64_t res = (int64_t)a + b; 2288 uint8_t round = get_round(vxrm, res, 1); 2289 2290 return (res >> 1) + round; 2291 } 2292 2293 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2294 { 2295 int64_t res = a + b; 2296 uint8_t round = get_round(vxrm, res, 1); 2297 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2298 2299 /* With signed overflow, bit 64 is inverse of bit 63. */ 2300 return ((res >> 1) ^ over) + round; 2301 } 2302 2303 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2304 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2305 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2306 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2307 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2308 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2309 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2310 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2311 2312 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2313 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2314 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2315 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2316 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2317 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2318 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2319 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2320 2321 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2322 uint32_t a, uint32_t b) 2323 { 2324 uint64_t res = (uint64_t)a + b; 2325 uint8_t round = get_round(vxrm, res, 1); 2326 2327 return (res >> 1) + round; 2328 } 2329 2330 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2331 uint64_t a, uint64_t b) 2332 { 2333 uint64_t res = a + b; 2334 uint8_t round = get_round(vxrm, res, 1); 2335 uint64_t over = (uint64_t)(res < a) << 63; 2336 2337 return ((res >> 1) | over) + round; 2338 } 2339 2340 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2341 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2342 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2343 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2344 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2345 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2346 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2347 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2348 2349 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2350 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2351 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2352 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2353 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2354 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2355 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2356 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2357 2358 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2359 { 2360 int64_t res = (int64_t)a - b; 2361 uint8_t round = get_round(vxrm, res, 1); 2362 2363 return (res >> 1) + round; 2364 } 2365 2366 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2367 { 2368 int64_t res = (int64_t)a - b; 2369 uint8_t round = get_round(vxrm, res, 1); 2370 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2371 2372 /* With signed overflow, bit 64 is inverse of bit 63. */ 2373 return ((res >> 1) ^ over) + round; 2374 } 2375 2376 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2377 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2378 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2379 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2380 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2381 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2382 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2383 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2384 2385 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2386 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2387 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2388 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2389 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2390 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2391 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2392 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2393 2394 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2395 uint32_t a, uint32_t b) 2396 { 2397 int64_t res = (int64_t)a - b; 2398 uint8_t round = get_round(vxrm, res, 1); 2399 2400 return (res >> 1) + round; 2401 } 2402 2403 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2404 uint64_t a, uint64_t b) 2405 { 2406 uint64_t res = (uint64_t)a - b; 2407 uint8_t round = get_round(vxrm, res, 1); 2408 uint64_t over = (uint64_t)(res > a) << 63; 2409 2410 return ((res >> 1) | over) + round; 2411 } 2412 2413 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2414 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2415 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2416 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2417 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2418 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2419 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2420 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2421 2422 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2423 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2424 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2425 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2426 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2427 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2428 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2429 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2430 2431 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2432 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2433 { 2434 uint8_t round; 2435 int16_t res; 2436 2437 res = (int16_t)a * (int16_t)b; 2438 round = get_round(vxrm, res, 7); 2439 res = (res >> 7) + round; 2440 2441 if (res > INT8_MAX) { 2442 env->vxsat = 0x1; 2443 return INT8_MAX; 2444 } else if (res < INT8_MIN) { 2445 env->vxsat = 0x1; 2446 return INT8_MIN; 2447 } else { 2448 return res; 2449 } 2450 } 2451 2452 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2453 { 2454 uint8_t round; 2455 int32_t res; 2456 2457 res = (int32_t)a * (int32_t)b; 2458 round = get_round(vxrm, res, 15); 2459 res = (res >> 15) + round; 2460 2461 if (res > INT16_MAX) { 2462 env->vxsat = 0x1; 2463 return INT16_MAX; 2464 } else if (res < INT16_MIN) { 2465 env->vxsat = 0x1; 2466 return INT16_MIN; 2467 } else { 2468 return res; 2469 } 2470 } 2471 2472 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2473 { 2474 uint8_t round; 2475 int64_t res; 2476 2477 res = (int64_t)a * (int64_t)b; 2478 round = get_round(vxrm, res, 31); 2479 res = (res >> 31) + round; 2480 2481 if (res > INT32_MAX) { 2482 env->vxsat = 0x1; 2483 return INT32_MAX; 2484 } else if (res < INT32_MIN) { 2485 env->vxsat = 0x1; 2486 return INT32_MIN; 2487 } else { 2488 return res; 2489 } 2490 } 2491 2492 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2493 { 2494 uint8_t round; 2495 uint64_t hi_64, lo_64; 2496 int64_t res; 2497 2498 if (a == INT64_MIN && b == INT64_MIN) { 2499 env->vxsat = 1; 2500 return INT64_MAX; 2501 } 2502 2503 muls64(&lo_64, &hi_64, a, b); 2504 round = get_round(vxrm, lo_64, 63); 2505 /* 2506 * Cannot overflow, as there are always 2507 * 2 sign bits after multiply. 2508 */ 2509 res = (hi_64 << 1) | (lo_64 >> 63); 2510 if (round) { 2511 if (res == INT64_MAX) { 2512 env->vxsat = 1; 2513 } else { 2514 res += 1; 2515 } 2516 } 2517 return res; 2518 } 2519 2520 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2521 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2522 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2523 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2524 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2525 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2526 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2527 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2528 2529 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2530 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2531 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2532 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2533 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2534 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2535 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2536 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2537 2538 /* Vector Single-Width Scaling Shift Instructions */ 2539 static inline uint8_t 2540 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2541 { 2542 uint8_t round, shift = b & 0x7; 2543 uint8_t res; 2544 2545 round = get_round(vxrm, a, shift); 2546 res = (a >> shift) + round; 2547 return res; 2548 } 2549 static inline uint16_t 2550 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2551 { 2552 uint8_t round, shift = b & 0xf; 2553 uint16_t res; 2554 2555 round = get_round(vxrm, a, shift); 2556 res = (a >> shift) + round; 2557 return res; 2558 } 2559 static inline uint32_t 2560 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2561 { 2562 uint8_t round, shift = b & 0x1f; 2563 uint32_t res; 2564 2565 round = get_round(vxrm, a, shift); 2566 res = (a >> shift) + round; 2567 return res; 2568 } 2569 static inline uint64_t 2570 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2571 { 2572 uint8_t round, shift = b & 0x3f; 2573 uint64_t res; 2574 2575 round = get_round(vxrm, a, shift); 2576 res = (a >> shift) + round; 2577 return res; 2578 } 2579 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2580 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2581 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2582 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2583 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2584 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2585 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2586 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2587 2588 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2589 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2590 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2591 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2592 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2593 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2594 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2595 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2596 2597 static inline int8_t 2598 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2599 { 2600 uint8_t round, shift = b & 0x7; 2601 int8_t res; 2602 2603 round = get_round(vxrm, a, shift); 2604 res = (a >> shift) + round; 2605 return res; 2606 } 2607 static inline int16_t 2608 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2609 { 2610 uint8_t round, shift = b & 0xf; 2611 int16_t res; 2612 2613 round = get_round(vxrm, a, shift); 2614 res = (a >> shift) + round; 2615 return res; 2616 } 2617 static inline int32_t 2618 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2619 { 2620 uint8_t round, shift = b & 0x1f; 2621 int32_t res; 2622 2623 round = get_round(vxrm, a, shift); 2624 res = (a >> shift) + round; 2625 return res; 2626 } 2627 static inline int64_t 2628 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2629 { 2630 uint8_t round, shift = b & 0x3f; 2631 int64_t res; 2632 2633 round = get_round(vxrm, a, shift); 2634 res = (a >> shift) + round; 2635 return res; 2636 } 2637 2638 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2639 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2640 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2641 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2642 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2643 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2644 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2645 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2646 2647 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2648 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2649 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2650 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2651 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2652 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2653 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2654 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2655 2656 /* Vector Narrowing Fixed-Point Clip Instructions */ 2657 static inline int8_t 2658 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2659 { 2660 uint8_t round, shift = b & 0xf; 2661 int16_t res; 2662 2663 round = get_round(vxrm, a, shift); 2664 res = (a >> shift) + round; 2665 if (res > INT8_MAX) { 2666 env->vxsat = 0x1; 2667 return INT8_MAX; 2668 } else if (res < INT8_MIN) { 2669 env->vxsat = 0x1; 2670 return INT8_MIN; 2671 } else { 2672 return res; 2673 } 2674 } 2675 2676 static inline int16_t 2677 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2678 { 2679 uint8_t round, shift = b & 0x1f; 2680 int32_t res; 2681 2682 round = get_round(vxrm, a, shift); 2683 res = (a >> shift) + round; 2684 if (res > INT16_MAX) { 2685 env->vxsat = 0x1; 2686 return INT16_MAX; 2687 } else if (res < INT16_MIN) { 2688 env->vxsat = 0x1; 2689 return INT16_MIN; 2690 } else { 2691 return res; 2692 } 2693 } 2694 2695 static inline int32_t 2696 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2697 { 2698 uint8_t round, shift = b & 0x3f; 2699 int64_t res; 2700 2701 round = get_round(vxrm, a, shift); 2702 res = (a >> shift) + round; 2703 if (res > INT32_MAX) { 2704 env->vxsat = 0x1; 2705 return INT32_MAX; 2706 } else if (res < INT32_MIN) { 2707 env->vxsat = 0x1; 2708 return INT32_MIN; 2709 } else { 2710 return res; 2711 } 2712 } 2713 2714 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2715 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2716 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2717 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2718 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2719 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2720 2721 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2722 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2723 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2724 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2725 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2726 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2727 2728 static inline uint8_t 2729 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2730 { 2731 uint8_t round, shift = b & 0xf; 2732 uint16_t res; 2733 2734 round = get_round(vxrm, a, shift); 2735 res = (a >> shift) + round; 2736 if (res > UINT8_MAX) { 2737 env->vxsat = 0x1; 2738 return UINT8_MAX; 2739 } else { 2740 return res; 2741 } 2742 } 2743 2744 static inline uint16_t 2745 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2746 { 2747 uint8_t round, shift = b & 0x1f; 2748 uint32_t res; 2749 2750 round = get_round(vxrm, a, shift); 2751 res = (a >> shift) + round; 2752 if (res > UINT16_MAX) { 2753 env->vxsat = 0x1; 2754 return UINT16_MAX; 2755 } else { 2756 return res; 2757 } 2758 } 2759 2760 static inline uint32_t 2761 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2762 { 2763 uint8_t round, shift = b & 0x3f; 2764 uint64_t res; 2765 2766 round = get_round(vxrm, a, shift); 2767 res = (a >> shift) + round; 2768 if (res > UINT32_MAX) { 2769 env->vxsat = 0x1; 2770 return UINT32_MAX; 2771 } else { 2772 return res; 2773 } 2774 } 2775 2776 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2777 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2778 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2779 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2780 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2781 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2782 2783 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2784 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2785 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2786 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2787 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2788 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2789 2790 /* 2791 *** Vector Float Point Arithmetic Instructions 2792 */ 2793 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2794 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2795 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2796 CPURISCVState *env) \ 2797 { \ 2798 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2799 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2800 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2801 } 2802 2803 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2804 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2805 void *vs2, CPURISCVState *env, \ 2806 uint32_t desc) \ 2807 { \ 2808 uint32_t vm = vext_vm(desc); \ 2809 uint32_t vl = env->vl; \ 2810 uint32_t i; \ 2811 \ 2812 for (i = env->vstart; i < vl; i++) { \ 2813 if (!vm && !vext_elem_mask(v0, i)) { \ 2814 continue; \ 2815 } \ 2816 do_##NAME(vd, vs1, vs2, i, env); \ 2817 } \ 2818 env->vstart = 0; \ 2819 } 2820 2821 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2822 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2823 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2824 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2825 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2826 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2827 2828 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2829 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2830 CPURISCVState *env) \ 2831 { \ 2832 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2833 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2834 } 2835 2836 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2837 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2838 void *vs2, CPURISCVState *env, \ 2839 uint32_t desc) \ 2840 { \ 2841 uint32_t vm = vext_vm(desc); \ 2842 uint32_t vl = env->vl; \ 2843 uint32_t i; \ 2844 \ 2845 for (i = env->vstart; i < vl; i++) { \ 2846 if (!vm && !vext_elem_mask(v0, i)) { \ 2847 continue; \ 2848 } \ 2849 do_##NAME(vd, s1, vs2, i, env); \ 2850 } \ 2851 env->vstart = 0; \ 2852 } 2853 2854 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2855 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2856 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2857 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2858 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2859 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2860 2861 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2862 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2863 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2864 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2865 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2866 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2867 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2868 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2869 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2870 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2871 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2872 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2873 2874 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2875 { 2876 return float16_sub(b, a, s); 2877 } 2878 2879 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2880 { 2881 return float32_sub(b, a, s); 2882 } 2883 2884 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2885 { 2886 return float64_sub(b, a, s); 2887 } 2888 2889 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2890 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2891 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2892 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2893 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2894 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2895 2896 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2897 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2898 { 2899 return float32_add(float16_to_float32(a, true, s), 2900 float16_to_float32(b, true, s), s); 2901 } 2902 2903 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2904 { 2905 return float64_add(float32_to_float64(a, s), 2906 float32_to_float64(b, s), s); 2907 2908 } 2909 2910 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2911 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2912 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2913 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2914 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2915 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2916 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2917 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2918 2919 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2920 { 2921 return float32_sub(float16_to_float32(a, true, s), 2922 float16_to_float32(b, true, s), s); 2923 } 2924 2925 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2926 { 2927 return float64_sub(float32_to_float64(a, s), 2928 float32_to_float64(b, s), s); 2929 2930 } 2931 2932 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2933 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2934 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2935 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2936 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2937 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2938 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2939 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2940 2941 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2942 { 2943 return float32_add(a, float16_to_float32(b, true, s), s); 2944 } 2945 2946 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2947 { 2948 return float64_add(a, float32_to_float64(b, s), s); 2949 } 2950 2951 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2952 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2953 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2954 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2955 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2956 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2957 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2958 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2959 2960 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2961 { 2962 return float32_sub(a, float16_to_float32(b, true, s), s); 2963 } 2964 2965 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2966 { 2967 return float64_sub(a, float32_to_float64(b, s), s); 2968 } 2969 2970 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2971 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2972 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2973 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2974 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2975 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2976 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2977 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2978 2979 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2980 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2981 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2982 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2983 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2984 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2985 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2986 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2987 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2988 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2989 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2990 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2991 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2992 2993 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2994 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2995 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2996 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 2997 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 2998 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 2999 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3000 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3001 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3002 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3003 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3004 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3005 3006 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3007 { 3008 return float16_div(b, a, s); 3009 } 3010 3011 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3012 { 3013 return float32_div(b, a, s); 3014 } 3015 3016 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3017 { 3018 return float64_div(b, a, s); 3019 } 3020 3021 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3022 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3023 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3024 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3025 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3026 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3027 3028 /* Vector Widening Floating-Point Multiply */ 3029 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3030 { 3031 return float32_mul(float16_to_float32(a, true, s), 3032 float16_to_float32(b, true, s), s); 3033 } 3034 3035 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3036 { 3037 return float64_mul(float32_to_float64(a, s), 3038 float32_to_float64(b, s), s); 3039 3040 } 3041 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3042 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3043 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3044 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3045 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3046 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3047 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3048 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3049 3050 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3051 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3052 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3053 CPURISCVState *env) \ 3054 { \ 3055 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3056 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3057 TD d = *((TD *)vd + HD(i)); \ 3058 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3059 } 3060 3061 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3062 { 3063 return float16_muladd(a, b, d, 0, s); 3064 } 3065 3066 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3067 { 3068 return float32_muladd(a, b, d, 0, s); 3069 } 3070 3071 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3072 { 3073 return float64_muladd(a, b, d, 0, s); 3074 } 3075 3076 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3077 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3078 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3079 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3080 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3081 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3082 3083 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3084 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3085 CPURISCVState *env) \ 3086 { \ 3087 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3088 TD d = *((TD *)vd + HD(i)); \ 3089 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3090 } 3091 3092 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3093 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3094 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3095 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3096 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3097 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3098 3099 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3100 { 3101 return float16_muladd(a, b, d, 3102 float_muladd_negate_c | float_muladd_negate_product, s); 3103 } 3104 3105 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3106 { 3107 return float32_muladd(a, b, d, 3108 float_muladd_negate_c | float_muladd_negate_product, s); 3109 } 3110 3111 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3112 { 3113 return float64_muladd(a, b, d, 3114 float_muladd_negate_c | float_muladd_negate_product, s); 3115 } 3116 3117 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3118 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3119 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3120 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3121 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3122 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3123 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3124 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3125 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3126 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3127 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3128 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3129 3130 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3131 { 3132 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3133 } 3134 3135 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3136 { 3137 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3138 } 3139 3140 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3141 { 3142 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3143 } 3144 3145 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3146 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3147 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3148 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3149 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3150 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3151 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3152 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3153 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3154 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3155 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3156 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3157 3158 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3159 { 3160 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3161 } 3162 3163 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3164 { 3165 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3166 } 3167 3168 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3169 { 3170 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3171 } 3172 3173 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3174 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3175 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3176 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3177 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3178 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3179 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3180 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3181 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3182 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3183 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3184 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3185 3186 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3187 { 3188 return float16_muladd(d, b, a, 0, s); 3189 } 3190 3191 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3192 { 3193 return float32_muladd(d, b, a, 0, s); 3194 } 3195 3196 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3197 { 3198 return float64_muladd(d, b, a, 0, s); 3199 } 3200 3201 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3202 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3203 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3204 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3205 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3206 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3207 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3208 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3209 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3210 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3211 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3212 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3213 3214 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3215 { 3216 return float16_muladd(d, b, a, 3217 float_muladd_negate_c | float_muladd_negate_product, s); 3218 } 3219 3220 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3221 { 3222 return float32_muladd(d, b, a, 3223 float_muladd_negate_c | float_muladd_negate_product, s); 3224 } 3225 3226 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3227 { 3228 return float64_muladd(d, b, a, 3229 float_muladd_negate_c | float_muladd_negate_product, s); 3230 } 3231 3232 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3233 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3234 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3235 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3236 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3237 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3238 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3239 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3240 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3241 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3242 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3243 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3244 3245 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3246 { 3247 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3248 } 3249 3250 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3251 { 3252 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3253 } 3254 3255 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3256 { 3257 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3258 } 3259 3260 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3261 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3262 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3263 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3264 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3265 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3266 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3267 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3268 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3269 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3270 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3271 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3272 3273 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3274 { 3275 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3276 } 3277 3278 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3279 { 3280 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3281 } 3282 3283 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3284 { 3285 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3286 } 3287 3288 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3289 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3290 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3291 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3292 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3293 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3294 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3295 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3296 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3297 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3298 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3299 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3300 3301 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3302 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3303 { 3304 return float32_muladd(float16_to_float32(a, true, s), 3305 float16_to_float32(b, true, s), d, 0, s); 3306 } 3307 3308 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3309 { 3310 return float64_muladd(float32_to_float64(a, s), 3311 float32_to_float64(b, s), d, 0, s); 3312 } 3313 3314 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3315 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3316 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3317 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3318 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3319 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3320 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3321 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3322 3323 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3324 { 3325 return float32_muladd(float16_to_float32(a, true, s), 3326 float16_to_float32(b, true, s), d, 3327 float_muladd_negate_c | float_muladd_negate_product, s); 3328 } 3329 3330 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3331 { 3332 return float64_muladd(float32_to_float64(a, s), 3333 float32_to_float64(b, s), d, 3334 float_muladd_negate_c | float_muladd_negate_product, s); 3335 } 3336 3337 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3338 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3339 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3340 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3341 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3342 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3343 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3344 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3345 3346 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3347 { 3348 return float32_muladd(float16_to_float32(a, true, s), 3349 float16_to_float32(b, true, s), d, 3350 float_muladd_negate_c, s); 3351 } 3352 3353 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3354 { 3355 return float64_muladd(float32_to_float64(a, s), 3356 float32_to_float64(b, s), d, 3357 float_muladd_negate_c, s); 3358 } 3359 3360 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3361 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3362 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3363 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3364 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3365 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3366 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3367 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3368 3369 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3370 { 3371 return float32_muladd(float16_to_float32(a, true, s), 3372 float16_to_float32(b, true, s), d, 3373 float_muladd_negate_product, s); 3374 } 3375 3376 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3377 { 3378 return float64_muladd(float32_to_float64(a, s), 3379 float32_to_float64(b, s), d, 3380 float_muladd_negate_product, s); 3381 } 3382 3383 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3384 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3385 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3386 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3387 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3388 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3389 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3390 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3391 3392 /* Vector Floating-Point Square-Root Instruction */ 3393 /* (TD, T2, TX2) */ 3394 #define OP_UU_H uint16_t, uint16_t, uint16_t 3395 #define OP_UU_W uint32_t, uint32_t, uint32_t 3396 #define OP_UU_D uint64_t, uint64_t, uint64_t 3397 3398 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3399 static void do_##NAME(void *vd, void *vs2, int i, \ 3400 CPURISCVState *env) \ 3401 { \ 3402 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3403 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3404 } 3405 3406 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3407 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3408 CPURISCVState *env, uint32_t desc) \ 3409 { \ 3410 uint32_t vm = vext_vm(desc); \ 3411 uint32_t vl = env->vl; \ 3412 uint32_t i; \ 3413 \ 3414 if (vl == 0) { \ 3415 return; \ 3416 } \ 3417 for (i = env->vstart; i < vl; i++) { \ 3418 if (!vm && !vext_elem_mask(v0, i)) { \ 3419 continue; \ 3420 } \ 3421 do_##NAME(vd, vs2, i, env); \ 3422 } \ 3423 env->vstart = 0; \ 3424 } 3425 3426 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3427 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3428 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3429 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3430 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3431 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3432 3433 /* 3434 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3435 * 3436 * Adapted from riscv-v-spec recip.c: 3437 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3438 */ 3439 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3440 { 3441 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3442 uint64_t exp = extract64(f, frac_size, exp_size); 3443 uint64_t frac = extract64(f, 0, frac_size); 3444 3445 const uint8_t lookup_table[] = { 3446 52, 51, 50, 48, 47, 46, 44, 43, 3447 42, 41, 40, 39, 38, 36, 35, 34, 3448 33, 32, 31, 30, 30, 29, 28, 27, 3449 26, 25, 24, 23, 23, 22, 21, 20, 3450 19, 19, 18, 17, 16, 16, 15, 14, 3451 14, 13, 12, 12, 11, 10, 10, 9, 3452 9, 8, 7, 7, 6, 6, 5, 4, 3453 4, 3, 3, 2, 2, 1, 1, 0, 3454 127, 125, 123, 121, 119, 118, 116, 114, 3455 113, 111, 109, 108, 106, 105, 103, 102, 3456 100, 99, 97, 96, 95, 93, 92, 91, 3457 90, 88, 87, 86, 85, 84, 83, 82, 3458 80, 79, 78, 77, 76, 75, 74, 73, 3459 72, 71, 70, 70, 69, 68, 67, 66, 3460 65, 64, 63, 63, 62, 61, 60, 59, 3461 59, 58, 57, 56, 56, 55, 54, 53 3462 }; 3463 const int precision = 7; 3464 3465 if (exp == 0 && frac != 0) { /* subnormal */ 3466 /* Normalize the subnormal. */ 3467 while (extract64(frac, frac_size - 1, 1) == 0) { 3468 exp--; 3469 frac <<= 1; 3470 } 3471 3472 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3473 } 3474 3475 int idx = ((exp & 1) << (precision - 1)) | 3476 (frac >> (frac_size - precision + 1)); 3477 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3478 (frac_size - precision); 3479 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3480 3481 uint64_t val = 0; 3482 val = deposit64(val, 0, frac_size, out_frac); 3483 val = deposit64(val, frac_size, exp_size, out_exp); 3484 val = deposit64(val, frac_size + exp_size, 1, sign); 3485 return val; 3486 } 3487 3488 static float16 frsqrt7_h(float16 f, float_status *s) 3489 { 3490 int exp_size = 5, frac_size = 10; 3491 bool sign = float16_is_neg(f); 3492 3493 /* 3494 * frsqrt7(sNaN) = canonical NaN 3495 * frsqrt7(-inf) = canonical NaN 3496 * frsqrt7(-normal) = canonical NaN 3497 * frsqrt7(-subnormal) = canonical NaN 3498 */ 3499 if (float16_is_signaling_nan(f, s) || 3500 (float16_is_infinity(f) && sign) || 3501 (float16_is_normal(f) && sign) || 3502 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3503 s->float_exception_flags |= float_flag_invalid; 3504 return float16_default_nan(s); 3505 } 3506 3507 /* frsqrt7(qNaN) = canonical NaN */ 3508 if (float16_is_quiet_nan(f, s)) { 3509 return float16_default_nan(s); 3510 } 3511 3512 /* frsqrt7(+-0) = +-inf */ 3513 if (float16_is_zero(f)) { 3514 s->float_exception_flags |= float_flag_divbyzero; 3515 return float16_set_sign(float16_infinity, sign); 3516 } 3517 3518 /* frsqrt7(+inf) = +0 */ 3519 if (float16_is_infinity(f) && !sign) { 3520 return float16_set_sign(float16_zero, sign); 3521 } 3522 3523 /* +normal, +subnormal */ 3524 uint64_t val = frsqrt7(f, exp_size, frac_size); 3525 return make_float16(val); 3526 } 3527 3528 static float32 frsqrt7_s(float32 f, float_status *s) 3529 { 3530 int exp_size = 8, frac_size = 23; 3531 bool sign = float32_is_neg(f); 3532 3533 /* 3534 * frsqrt7(sNaN) = canonical NaN 3535 * frsqrt7(-inf) = canonical NaN 3536 * frsqrt7(-normal) = canonical NaN 3537 * frsqrt7(-subnormal) = canonical NaN 3538 */ 3539 if (float32_is_signaling_nan(f, s) || 3540 (float32_is_infinity(f) && sign) || 3541 (float32_is_normal(f) && sign) || 3542 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3543 s->float_exception_flags |= float_flag_invalid; 3544 return float32_default_nan(s); 3545 } 3546 3547 /* frsqrt7(qNaN) = canonical NaN */ 3548 if (float32_is_quiet_nan(f, s)) { 3549 return float32_default_nan(s); 3550 } 3551 3552 /* frsqrt7(+-0) = +-inf */ 3553 if (float32_is_zero(f)) { 3554 s->float_exception_flags |= float_flag_divbyzero; 3555 return float32_set_sign(float32_infinity, sign); 3556 } 3557 3558 /* frsqrt7(+inf) = +0 */ 3559 if (float32_is_infinity(f) && !sign) { 3560 return float32_set_sign(float32_zero, sign); 3561 } 3562 3563 /* +normal, +subnormal */ 3564 uint64_t val = frsqrt7(f, exp_size, frac_size); 3565 return make_float32(val); 3566 } 3567 3568 static float64 frsqrt7_d(float64 f, float_status *s) 3569 { 3570 int exp_size = 11, frac_size = 52; 3571 bool sign = float64_is_neg(f); 3572 3573 /* 3574 * frsqrt7(sNaN) = canonical NaN 3575 * frsqrt7(-inf) = canonical NaN 3576 * frsqrt7(-normal) = canonical NaN 3577 * frsqrt7(-subnormal) = canonical NaN 3578 */ 3579 if (float64_is_signaling_nan(f, s) || 3580 (float64_is_infinity(f) && sign) || 3581 (float64_is_normal(f) && sign) || 3582 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3583 s->float_exception_flags |= float_flag_invalid; 3584 return float64_default_nan(s); 3585 } 3586 3587 /* frsqrt7(qNaN) = canonical NaN */ 3588 if (float64_is_quiet_nan(f, s)) { 3589 return float64_default_nan(s); 3590 } 3591 3592 /* frsqrt7(+-0) = +-inf */ 3593 if (float64_is_zero(f)) { 3594 s->float_exception_flags |= float_flag_divbyzero; 3595 return float64_set_sign(float64_infinity, sign); 3596 } 3597 3598 /* frsqrt7(+inf) = +0 */ 3599 if (float64_is_infinity(f) && !sign) { 3600 return float64_set_sign(float64_zero, sign); 3601 } 3602 3603 /* +normal, +subnormal */ 3604 uint64_t val = frsqrt7(f, exp_size, frac_size); 3605 return make_float64(val); 3606 } 3607 3608 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3609 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3610 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3611 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) 3612 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) 3613 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) 3614 3615 /* 3616 * Vector Floating-Point Reciprocal Estimate Instruction 3617 * 3618 * Adapted from riscv-v-spec recip.c: 3619 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3620 */ 3621 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3622 float_status *s) 3623 { 3624 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3625 uint64_t exp = extract64(f, frac_size, exp_size); 3626 uint64_t frac = extract64(f, 0, frac_size); 3627 3628 const uint8_t lookup_table[] = { 3629 127, 125, 123, 121, 119, 117, 116, 114, 3630 112, 110, 109, 107, 105, 104, 102, 100, 3631 99, 97, 96, 94, 93, 91, 90, 88, 3632 87, 85, 84, 83, 81, 80, 79, 77, 3633 76, 75, 74, 72, 71, 70, 69, 68, 3634 66, 65, 64, 63, 62, 61, 60, 59, 3635 58, 57, 56, 55, 54, 53, 52, 51, 3636 50, 49, 48, 47, 46, 45, 44, 43, 3637 42, 41, 40, 40, 39, 38, 37, 36, 3638 35, 35, 34, 33, 32, 31, 31, 30, 3639 29, 28, 28, 27, 26, 25, 25, 24, 3640 23, 23, 22, 21, 21, 20, 19, 19, 3641 18, 17, 17, 16, 15, 15, 14, 14, 3642 13, 12, 12, 11, 11, 10, 9, 9, 3643 8, 8, 7, 7, 6, 5, 5, 4, 3644 4, 3, 3, 2, 2, 1, 1, 0 3645 }; 3646 const int precision = 7; 3647 3648 if (exp == 0 && frac != 0) { /* subnormal */ 3649 /* Normalize the subnormal. */ 3650 while (extract64(frac, frac_size - 1, 1) == 0) { 3651 exp--; 3652 frac <<= 1; 3653 } 3654 3655 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3656 3657 if (exp != 0 && exp != UINT64_MAX) { 3658 /* 3659 * Overflow to inf or max value of same sign, 3660 * depending on sign and rounding mode. 3661 */ 3662 s->float_exception_flags |= (float_flag_inexact | 3663 float_flag_overflow); 3664 3665 if ((s->float_rounding_mode == float_round_to_zero) || 3666 ((s->float_rounding_mode == float_round_down) && !sign) || 3667 ((s->float_rounding_mode == float_round_up) && sign)) { 3668 /* Return greatest/negative finite value. */ 3669 return (sign << (exp_size + frac_size)) | 3670 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3671 } else { 3672 /* Return +-inf. */ 3673 return (sign << (exp_size + frac_size)) | 3674 MAKE_64BIT_MASK(frac_size, exp_size); 3675 } 3676 } 3677 } 3678 3679 int idx = frac >> (frac_size - precision); 3680 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3681 (frac_size - precision); 3682 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3683 3684 if (out_exp == 0 || out_exp == UINT64_MAX) { 3685 /* 3686 * The result is subnormal, but don't raise the underflow exception, 3687 * because there's no additional loss of precision. 3688 */ 3689 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3690 if (out_exp == UINT64_MAX) { 3691 out_frac >>= 1; 3692 out_exp = 0; 3693 } 3694 } 3695 3696 uint64_t val = 0; 3697 val = deposit64(val, 0, frac_size, out_frac); 3698 val = deposit64(val, frac_size, exp_size, out_exp); 3699 val = deposit64(val, frac_size + exp_size, 1, sign); 3700 return val; 3701 } 3702 3703 static float16 frec7_h(float16 f, float_status *s) 3704 { 3705 int exp_size = 5, frac_size = 10; 3706 bool sign = float16_is_neg(f); 3707 3708 /* frec7(+-inf) = +-0 */ 3709 if (float16_is_infinity(f)) { 3710 return float16_set_sign(float16_zero, sign); 3711 } 3712 3713 /* frec7(+-0) = +-inf */ 3714 if (float16_is_zero(f)) { 3715 s->float_exception_flags |= float_flag_divbyzero; 3716 return float16_set_sign(float16_infinity, sign); 3717 } 3718 3719 /* frec7(sNaN) = canonical NaN */ 3720 if (float16_is_signaling_nan(f, s)) { 3721 s->float_exception_flags |= float_flag_invalid; 3722 return float16_default_nan(s); 3723 } 3724 3725 /* frec7(qNaN) = canonical NaN */ 3726 if (float16_is_quiet_nan(f, s)) { 3727 return float16_default_nan(s); 3728 } 3729 3730 /* +-normal, +-subnormal */ 3731 uint64_t val = frec7(f, exp_size, frac_size, s); 3732 return make_float16(val); 3733 } 3734 3735 static float32 frec7_s(float32 f, float_status *s) 3736 { 3737 int exp_size = 8, frac_size = 23; 3738 bool sign = float32_is_neg(f); 3739 3740 /* frec7(+-inf) = +-0 */ 3741 if (float32_is_infinity(f)) { 3742 return float32_set_sign(float32_zero, sign); 3743 } 3744 3745 /* frec7(+-0) = +-inf */ 3746 if (float32_is_zero(f)) { 3747 s->float_exception_flags |= float_flag_divbyzero; 3748 return float32_set_sign(float32_infinity, sign); 3749 } 3750 3751 /* frec7(sNaN) = canonical NaN */ 3752 if (float32_is_signaling_nan(f, s)) { 3753 s->float_exception_flags |= float_flag_invalid; 3754 return float32_default_nan(s); 3755 } 3756 3757 /* frec7(qNaN) = canonical NaN */ 3758 if (float32_is_quiet_nan(f, s)) { 3759 return float32_default_nan(s); 3760 } 3761 3762 /* +-normal, +-subnormal */ 3763 uint64_t val = frec7(f, exp_size, frac_size, s); 3764 return make_float32(val); 3765 } 3766 3767 static float64 frec7_d(float64 f, float_status *s) 3768 { 3769 int exp_size = 11, frac_size = 52; 3770 bool sign = float64_is_neg(f); 3771 3772 /* frec7(+-inf) = +-0 */ 3773 if (float64_is_infinity(f)) { 3774 return float64_set_sign(float64_zero, sign); 3775 } 3776 3777 /* frec7(+-0) = +-inf */ 3778 if (float64_is_zero(f)) { 3779 s->float_exception_flags |= float_flag_divbyzero; 3780 return float64_set_sign(float64_infinity, sign); 3781 } 3782 3783 /* frec7(sNaN) = canonical NaN */ 3784 if (float64_is_signaling_nan(f, s)) { 3785 s->float_exception_flags |= float_flag_invalid; 3786 return float64_default_nan(s); 3787 } 3788 3789 /* frec7(qNaN) = canonical NaN */ 3790 if (float64_is_quiet_nan(f, s)) { 3791 return float64_default_nan(s); 3792 } 3793 3794 /* +-normal, +-subnormal */ 3795 uint64_t val = frec7(f, exp_size, frac_size, s); 3796 return make_float64(val); 3797 } 3798 3799 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3800 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3801 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3802 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) 3803 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) 3804 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) 3805 3806 /* Vector Floating-Point MIN/MAX Instructions */ 3807 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3808 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3809 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3810 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3811 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3812 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3813 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3814 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3815 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3816 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3817 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3818 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3819 3820 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3821 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3822 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3823 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3824 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3825 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3826 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3827 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3828 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3829 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3830 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3831 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3832 3833 /* Vector Floating-Point Sign-Injection Instructions */ 3834 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3835 { 3836 return deposit64(b, 0, 15, a); 3837 } 3838 3839 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3840 { 3841 return deposit64(b, 0, 31, a); 3842 } 3843 3844 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3845 { 3846 return deposit64(b, 0, 63, a); 3847 } 3848 3849 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3850 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3851 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3852 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3853 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3854 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3855 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3856 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3857 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3858 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3859 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3860 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3861 3862 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3863 { 3864 return deposit64(~b, 0, 15, a); 3865 } 3866 3867 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3868 { 3869 return deposit64(~b, 0, 31, a); 3870 } 3871 3872 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3873 { 3874 return deposit64(~b, 0, 63, a); 3875 } 3876 3877 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3878 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3879 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3880 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3881 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3882 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3883 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3884 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3885 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3886 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3887 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3888 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3889 3890 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3891 { 3892 return deposit64(b ^ a, 0, 15, a); 3893 } 3894 3895 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3896 { 3897 return deposit64(b ^ a, 0, 31, a); 3898 } 3899 3900 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3901 { 3902 return deposit64(b ^ a, 0, 63, a); 3903 } 3904 3905 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3906 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3907 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3908 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3909 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3910 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3911 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3912 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3913 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3914 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3915 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3916 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3917 3918 /* Vector Floating-Point Compare Instructions */ 3919 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3920 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3921 CPURISCVState *env, uint32_t desc) \ 3922 { \ 3923 uint32_t vm = vext_vm(desc); \ 3924 uint32_t vl = env->vl; \ 3925 uint32_t i; \ 3926 \ 3927 for (i = env->vstart; i < vl; i++) { \ 3928 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3929 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3930 if (!vm && !vext_elem_mask(v0, i)) { \ 3931 continue; \ 3932 } \ 3933 vext_set_elem_mask(vd, i, \ 3934 DO_OP(s2, s1, &env->fp_status)); \ 3935 } \ 3936 env->vstart = 0; \ 3937 } 3938 3939 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3940 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3941 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3942 3943 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3944 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3945 CPURISCVState *env, uint32_t desc) \ 3946 { \ 3947 uint32_t vm = vext_vm(desc); \ 3948 uint32_t vl = env->vl; \ 3949 uint32_t i; \ 3950 \ 3951 for (i = env->vstart; i < vl; i++) { \ 3952 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3953 if (!vm && !vext_elem_mask(v0, i)) { \ 3954 continue; \ 3955 } \ 3956 vext_set_elem_mask(vd, i, \ 3957 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3958 } \ 3959 env->vstart = 0; \ 3960 } 3961 3962 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3963 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3964 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3965 3966 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3967 { 3968 FloatRelation compare = float16_compare_quiet(a, b, s); 3969 return compare != float_relation_equal; 3970 } 3971 3972 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3973 { 3974 FloatRelation compare = float32_compare_quiet(a, b, s); 3975 return compare != float_relation_equal; 3976 } 3977 3978 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3979 { 3980 FloatRelation compare = float64_compare_quiet(a, b, s); 3981 return compare != float_relation_equal; 3982 } 3983 3984 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3985 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3986 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3987 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3988 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3989 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3990 3991 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3992 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3993 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3994 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3995 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3996 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3997 3998 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3999 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4000 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4001 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4002 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4003 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4004 4005 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4006 { 4007 FloatRelation compare = float16_compare(a, b, s); 4008 return compare == float_relation_greater; 4009 } 4010 4011 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4012 { 4013 FloatRelation compare = float32_compare(a, b, s); 4014 return compare == float_relation_greater; 4015 } 4016 4017 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4018 { 4019 FloatRelation compare = float64_compare(a, b, s); 4020 return compare == float_relation_greater; 4021 } 4022 4023 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4024 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4025 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4026 4027 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4028 { 4029 FloatRelation compare = float16_compare(a, b, s); 4030 return compare == float_relation_greater || 4031 compare == float_relation_equal; 4032 } 4033 4034 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4035 { 4036 FloatRelation compare = float32_compare(a, b, s); 4037 return compare == float_relation_greater || 4038 compare == float_relation_equal; 4039 } 4040 4041 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4042 { 4043 FloatRelation compare = float64_compare(a, b, s); 4044 return compare == float_relation_greater || 4045 compare == float_relation_equal; 4046 } 4047 4048 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4049 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4050 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4051 4052 /* Vector Floating-Point Classify Instruction */ 4053 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4054 static void do_##NAME(void *vd, void *vs2, int i) \ 4055 { \ 4056 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4057 *((TD *)vd + HD(i)) = OP(s2); \ 4058 } 4059 4060 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 4061 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4062 CPURISCVState *env, uint32_t desc) \ 4063 { \ 4064 uint32_t vm = vext_vm(desc); \ 4065 uint32_t vl = env->vl; \ 4066 uint32_t i; \ 4067 \ 4068 for (i = env->vstart; i < vl; i++) { \ 4069 if (!vm && !vext_elem_mask(v0, i)) { \ 4070 continue; \ 4071 } \ 4072 do_##NAME(vd, vs2, i); \ 4073 } \ 4074 env->vstart = 0; \ 4075 } 4076 4077 target_ulong fclass_h(uint64_t frs1) 4078 { 4079 float16 f = frs1; 4080 bool sign = float16_is_neg(f); 4081 4082 if (float16_is_infinity(f)) { 4083 return sign ? 1 << 0 : 1 << 7; 4084 } else if (float16_is_zero(f)) { 4085 return sign ? 1 << 3 : 1 << 4; 4086 } else if (float16_is_zero_or_denormal(f)) { 4087 return sign ? 1 << 2 : 1 << 5; 4088 } else if (float16_is_any_nan(f)) { 4089 float_status s = { }; /* for snan_bit_is_one */ 4090 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4091 } else { 4092 return sign ? 1 << 1 : 1 << 6; 4093 } 4094 } 4095 4096 target_ulong fclass_s(uint64_t frs1) 4097 { 4098 float32 f = frs1; 4099 bool sign = float32_is_neg(f); 4100 4101 if (float32_is_infinity(f)) { 4102 return sign ? 1 << 0 : 1 << 7; 4103 } else if (float32_is_zero(f)) { 4104 return sign ? 1 << 3 : 1 << 4; 4105 } else if (float32_is_zero_or_denormal(f)) { 4106 return sign ? 1 << 2 : 1 << 5; 4107 } else if (float32_is_any_nan(f)) { 4108 float_status s = { }; /* for snan_bit_is_one */ 4109 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4110 } else { 4111 return sign ? 1 << 1 : 1 << 6; 4112 } 4113 } 4114 4115 target_ulong fclass_d(uint64_t frs1) 4116 { 4117 float64 f = frs1; 4118 bool sign = float64_is_neg(f); 4119 4120 if (float64_is_infinity(f)) { 4121 return sign ? 1 << 0 : 1 << 7; 4122 } else if (float64_is_zero(f)) { 4123 return sign ? 1 << 3 : 1 << 4; 4124 } else if (float64_is_zero_or_denormal(f)) { 4125 return sign ? 1 << 2 : 1 << 5; 4126 } else if (float64_is_any_nan(f)) { 4127 float_status s = { }; /* for snan_bit_is_one */ 4128 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4129 } else { 4130 return sign ? 1 << 1 : 1 << 6; 4131 } 4132 } 4133 4134 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4135 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4136 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4137 GEN_VEXT_V(vfclass_v_h, 2, 2) 4138 GEN_VEXT_V(vfclass_v_w, 4, 4) 4139 GEN_VEXT_V(vfclass_v_d, 8, 8) 4140 4141 /* Vector Floating-Point Merge Instruction */ 4142 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4143 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4144 CPURISCVState *env, uint32_t desc) \ 4145 { \ 4146 uint32_t vm = vext_vm(desc); \ 4147 uint32_t vl = env->vl; \ 4148 uint32_t i; \ 4149 \ 4150 for (i = env->vstart; i < vl; i++) { \ 4151 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4152 *((ETYPE *)vd + H(i)) \ 4153 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4154 } \ 4155 env->vstart = 0; \ 4156 } 4157 4158 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4159 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4160 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4161 4162 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4163 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4164 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4165 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4166 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4167 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 4168 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 4169 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 4170 4171 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4172 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4173 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4174 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4175 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 4176 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 4177 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 4178 4179 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4180 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4181 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4182 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4183 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 4184 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 4185 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 4186 4187 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4188 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4189 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4190 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4191 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4192 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4193 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4194 4195 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4196 /* (TD, T2, TX2) */ 4197 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4198 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4199 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4200 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4201 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4202 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4203 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4204 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4205 4206 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4207 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4208 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4209 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4210 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4211 4212 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4213 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4214 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4215 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4216 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 4217 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4218 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4219 4220 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4221 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4222 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4223 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4224 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 4225 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4226 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4227 4228 /* 4229 * vfwcvt.f.f.v vd, vs2, vm 4230 * Convert single-width float to double-width float. 4231 */ 4232 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4233 { 4234 return float16_to_float32(a, true, s); 4235 } 4236 4237 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4238 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4239 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4240 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4241 4242 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4243 /* (TD, T2, TX2) */ 4244 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4245 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4246 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4247 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4248 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4249 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4250 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4251 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 4252 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 4253 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 4254 4255 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4256 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4257 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4258 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4259 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 4260 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 4261 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 4262 4263 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4264 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4265 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4266 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 4267 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 4268 4269 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4270 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4271 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4272 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 4273 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 4274 4275 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4276 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4277 { 4278 return float32_to_float16(a, true, s); 4279 } 4280 4281 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4282 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4283 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 4284 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 4285 4286 /* 4287 *** Vector Reduction Operations 4288 */ 4289 /* Vector Single-Width Integer Reduction Instructions */ 4290 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4291 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4292 void *vs2, CPURISCVState *env, uint32_t desc) \ 4293 { \ 4294 uint32_t vm = vext_vm(desc); \ 4295 uint32_t vl = env->vl; \ 4296 uint32_t i; \ 4297 TD s1 = *((TD *)vs1 + HD(0)); \ 4298 \ 4299 for (i = env->vstart; i < vl; i++) { \ 4300 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4301 if (!vm && !vext_elem_mask(v0, i)) { \ 4302 continue; \ 4303 } \ 4304 s1 = OP(s1, (TD)s2); \ 4305 } \ 4306 *((TD *)vd + HD(0)) = s1; \ 4307 env->vstart = 0; \ 4308 } 4309 4310 /* vd[0] = sum(vs1[0], vs2[*]) */ 4311 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4312 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4313 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4314 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4315 4316 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4317 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4318 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4319 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4320 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4321 4322 /* vd[0] = max(vs1[0], vs2[*]) */ 4323 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4324 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4325 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4326 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4327 4328 /* vd[0] = minu(vs1[0], vs2[*]) */ 4329 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4330 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4331 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4332 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4333 4334 /* vd[0] = min(vs1[0], vs2[*]) */ 4335 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4336 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4337 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4338 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4339 4340 /* vd[0] = and(vs1[0], vs2[*]) */ 4341 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4342 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4343 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4344 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4345 4346 /* vd[0] = or(vs1[0], vs2[*]) */ 4347 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4348 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4349 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4350 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4351 4352 /* vd[0] = xor(vs1[0], vs2[*]) */ 4353 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4354 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4355 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4356 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4357 4358 /* Vector Widening Integer Reduction Instructions */ 4359 /* signed sum reduction into double-width accumulator */ 4360 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4361 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4362 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4363 4364 /* Unsigned sum reduction into double-width accumulator */ 4365 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4366 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4367 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4368 4369 /* Vector Single-Width Floating-Point Reduction Instructions */ 4370 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4371 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4372 void *vs2, CPURISCVState *env, \ 4373 uint32_t desc) \ 4374 { \ 4375 uint32_t vm = vext_vm(desc); \ 4376 uint32_t vl = env->vl; \ 4377 uint32_t i; \ 4378 TD s1 = *((TD *)vs1 + HD(0)); \ 4379 \ 4380 for (i = env->vstart; i < vl; i++) { \ 4381 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4382 if (!vm && !vext_elem_mask(v0, i)) { \ 4383 continue; \ 4384 } \ 4385 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4386 } \ 4387 *((TD *)vd + HD(0)) = s1; \ 4388 env->vstart = 0; \ 4389 } 4390 4391 /* Unordered sum */ 4392 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4393 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4394 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4395 4396 /* Maximum value */ 4397 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4398 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4399 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4400 4401 /* Minimum value */ 4402 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4403 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4404 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4405 4406 /* Vector Widening Floating-Point Reduction Instructions */ 4407 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4408 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4409 void *vs2, CPURISCVState *env, uint32_t desc) 4410 { 4411 uint32_t vm = vext_vm(desc); 4412 uint32_t vl = env->vl; 4413 uint32_t i; 4414 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4415 4416 for (i = env->vstart; i < vl; i++) { 4417 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4418 if (!vm && !vext_elem_mask(v0, i)) { 4419 continue; 4420 } 4421 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4422 &env->fp_status); 4423 } 4424 *((uint32_t *)vd + H4(0)) = s1; 4425 env->vstart = 0; 4426 } 4427 4428 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4429 void *vs2, CPURISCVState *env, uint32_t desc) 4430 { 4431 uint32_t vm = vext_vm(desc); 4432 uint32_t vl = env->vl; 4433 uint32_t i; 4434 uint64_t s1 = *((uint64_t *)vs1); 4435 4436 for (i = env->vstart; i < vl; i++) { 4437 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4438 if (!vm && !vext_elem_mask(v0, i)) { 4439 continue; 4440 } 4441 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4442 &env->fp_status); 4443 } 4444 *((uint64_t *)vd) = s1; 4445 env->vstart = 0; 4446 } 4447 4448 /* 4449 *** Vector Mask Operations 4450 */ 4451 /* Vector Mask-Register Logical Instructions */ 4452 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4453 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4454 void *vs2, CPURISCVState *env, \ 4455 uint32_t desc) \ 4456 { \ 4457 uint32_t vl = env->vl; \ 4458 uint32_t i; \ 4459 int a, b; \ 4460 \ 4461 for (i = env->vstart; i < vl; i++) { \ 4462 a = vext_elem_mask(vs1, i); \ 4463 b = vext_elem_mask(vs2, i); \ 4464 vext_set_elem_mask(vd, i, OP(b, a)); \ 4465 } \ 4466 env->vstart = 0; \ 4467 } 4468 4469 #define DO_NAND(N, M) (!(N & M)) 4470 #define DO_ANDNOT(N, M) (N & !M) 4471 #define DO_NOR(N, M) (!(N | M)) 4472 #define DO_ORNOT(N, M) (N | !M) 4473 #define DO_XNOR(N, M) (!(N ^ M)) 4474 4475 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4476 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4477 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4478 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4479 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4480 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4481 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4482 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4483 4484 /* Vector count population in mask vcpop */ 4485 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4486 uint32_t desc) 4487 { 4488 target_ulong cnt = 0; 4489 uint32_t vm = vext_vm(desc); 4490 uint32_t vl = env->vl; 4491 int i; 4492 4493 for (i = env->vstart; i < vl; i++) { 4494 if (vm || vext_elem_mask(v0, i)) { 4495 if (vext_elem_mask(vs2, i)) { 4496 cnt++; 4497 } 4498 } 4499 } 4500 env->vstart = 0; 4501 return cnt; 4502 } 4503 4504 /* vfirst find-first-set mask bit*/ 4505 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4506 uint32_t desc) 4507 { 4508 uint32_t vm = vext_vm(desc); 4509 uint32_t vl = env->vl; 4510 int i; 4511 4512 for (i = env->vstart; i < vl; i++) { 4513 if (vm || vext_elem_mask(v0, i)) { 4514 if (vext_elem_mask(vs2, i)) { 4515 return i; 4516 } 4517 } 4518 } 4519 env->vstart = 0; 4520 return -1LL; 4521 } 4522 4523 enum set_mask_type { 4524 ONLY_FIRST = 1, 4525 INCLUDE_FIRST, 4526 BEFORE_FIRST, 4527 }; 4528 4529 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4530 uint32_t desc, enum set_mask_type type) 4531 { 4532 uint32_t vm = vext_vm(desc); 4533 uint32_t vl = env->vl; 4534 int i; 4535 bool first_mask_bit = false; 4536 4537 for (i = env->vstart; i < vl; i++) { 4538 if (!vm && !vext_elem_mask(v0, i)) { 4539 continue; 4540 } 4541 /* write a zero to all following active elements */ 4542 if (first_mask_bit) { 4543 vext_set_elem_mask(vd, i, 0); 4544 continue; 4545 } 4546 if (vext_elem_mask(vs2, i)) { 4547 first_mask_bit = true; 4548 if (type == BEFORE_FIRST) { 4549 vext_set_elem_mask(vd, i, 0); 4550 } else { 4551 vext_set_elem_mask(vd, i, 1); 4552 } 4553 } else { 4554 if (type == ONLY_FIRST) { 4555 vext_set_elem_mask(vd, i, 0); 4556 } else { 4557 vext_set_elem_mask(vd, i, 1); 4558 } 4559 } 4560 } 4561 env->vstart = 0; 4562 } 4563 4564 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4565 uint32_t desc) 4566 { 4567 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4568 } 4569 4570 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4571 uint32_t desc) 4572 { 4573 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4574 } 4575 4576 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4577 uint32_t desc) 4578 { 4579 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4580 } 4581 4582 /* Vector Iota Instruction */ 4583 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4584 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4585 uint32_t desc) \ 4586 { \ 4587 uint32_t vm = vext_vm(desc); \ 4588 uint32_t vl = env->vl; \ 4589 uint32_t sum = 0; \ 4590 int i; \ 4591 \ 4592 for (i = env->vstart; i < vl; i++) { \ 4593 if (!vm && !vext_elem_mask(v0, i)) { \ 4594 continue; \ 4595 } \ 4596 *((ETYPE *)vd + H(i)) = sum; \ 4597 if (vext_elem_mask(vs2, i)) { \ 4598 sum++; \ 4599 } \ 4600 } \ 4601 env->vstart = 0; \ 4602 } 4603 4604 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4605 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4606 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4607 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4608 4609 /* Vector Element Index Instruction */ 4610 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4611 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4612 { \ 4613 uint32_t vm = vext_vm(desc); \ 4614 uint32_t vl = env->vl; \ 4615 int i; \ 4616 \ 4617 for (i = env->vstart; i < vl; i++) { \ 4618 if (!vm && !vext_elem_mask(v0, i)) { \ 4619 continue; \ 4620 } \ 4621 *((ETYPE *)vd + H(i)) = i; \ 4622 } \ 4623 env->vstart = 0; \ 4624 } 4625 4626 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4627 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4628 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4629 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4630 4631 /* 4632 *** Vector Permutation Instructions 4633 */ 4634 4635 /* Vector Slide Instructions */ 4636 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4637 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4638 CPURISCVState *env, uint32_t desc) \ 4639 { \ 4640 uint32_t vm = vext_vm(desc); \ 4641 uint32_t vl = env->vl; \ 4642 target_ulong offset = s1, i_min, i; \ 4643 \ 4644 i_min = MAX(env->vstart, offset); \ 4645 for (i = i_min; i < vl; i++) { \ 4646 if (!vm && !vext_elem_mask(v0, i)) { \ 4647 continue; \ 4648 } \ 4649 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4650 } \ 4651 } 4652 4653 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4654 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4655 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4656 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4657 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4658 4659 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4660 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4661 CPURISCVState *env, uint32_t desc) \ 4662 { \ 4663 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4664 uint32_t vm = vext_vm(desc); \ 4665 uint32_t vl = env->vl; \ 4666 target_ulong i_max, i; \ 4667 \ 4668 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4669 for (i = env->vstart; i < i_max; ++i) { \ 4670 if (vm || vext_elem_mask(v0, i)) { \ 4671 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4672 } \ 4673 } \ 4674 \ 4675 for (i = i_max; i < vl; ++i) { \ 4676 if (vm || vext_elem_mask(v0, i)) { \ 4677 *((ETYPE *)vd + H(i)) = 0; \ 4678 } \ 4679 } \ 4680 \ 4681 env->vstart = 0; \ 4682 } 4683 4684 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4685 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4686 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4687 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4688 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4689 4690 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4691 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4692 CPURISCVState *env, uint32_t desc) \ 4693 { \ 4694 typedef uint##ESZ##_t ETYPE; \ 4695 uint32_t vm = vext_vm(desc); \ 4696 uint32_t vl = env->vl; \ 4697 uint32_t i; \ 4698 \ 4699 for (i = env->vstart; i < vl; i++) { \ 4700 if (!vm && !vext_elem_mask(v0, i)) { \ 4701 continue; \ 4702 } \ 4703 if (i == 0) { \ 4704 *((ETYPE *)vd + H(i)) = s1; \ 4705 } else { \ 4706 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4707 } \ 4708 } \ 4709 env->vstart = 0; \ 4710 } 4711 4712 GEN_VEXT_VSLIE1UP(8, H1) 4713 GEN_VEXT_VSLIE1UP(16, H2) 4714 GEN_VEXT_VSLIE1UP(32, H4) 4715 GEN_VEXT_VSLIE1UP(64, H8) 4716 4717 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4718 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4719 CPURISCVState *env, uint32_t desc) \ 4720 { \ 4721 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4722 } 4723 4724 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4725 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4726 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4727 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4728 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4729 4730 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4731 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4732 CPURISCVState *env, uint32_t desc) \ 4733 { \ 4734 typedef uint##ESZ##_t ETYPE; \ 4735 uint32_t vm = vext_vm(desc); \ 4736 uint32_t vl = env->vl; \ 4737 uint32_t i; \ 4738 \ 4739 for (i = env->vstart; i < vl; i++) { \ 4740 if (!vm && !vext_elem_mask(v0, i)) { \ 4741 continue; \ 4742 } \ 4743 if (i == vl - 1) { \ 4744 *((ETYPE *)vd + H(i)) = s1; \ 4745 } else { \ 4746 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4747 } \ 4748 } \ 4749 env->vstart = 0; \ 4750 } 4751 4752 GEN_VEXT_VSLIDE1DOWN(8, H1) 4753 GEN_VEXT_VSLIDE1DOWN(16, H2) 4754 GEN_VEXT_VSLIDE1DOWN(32, H4) 4755 GEN_VEXT_VSLIDE1DOWN(64, H8) 4756 4757 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4758 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4759 CPURISCVState *env, uint32_t desc) \ 4760 { \ 4761 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4762 } 4763 4764 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4765 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4766 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4767 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4768 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4769 4770 /* Vector Floating-Point Slide Instructions */ 4771 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4772 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4773 CPURISCVState *env, uint32_t desc) \ 4774 { \ 4775 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4776 } 4777 4778 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4779 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4780 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4781 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4782 4783 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4784 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4785 CPURISCVState *env, uint32_t desc) \ 4786 { \ 4787 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4788 } 4789 4790 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4791 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4792 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4793 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4794 4795 /* Vector Register Gather Instruction */ 4796 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4797 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4798 CPURISCVState *env, uint32_t desc) \ 4799 { \ 4800 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4801 uint32_t vm = vext_vm(desc); \ 4802 uint32_t vl = env->vl; \ 4803 uint64_t index; \ 4804 uint32_t i; \ 4805 \ 4806 for (i = env->vstart; i < vl; i++) { \ 4807 if (!vm && !vext_elem_mask(v0, i)) { \ 4808 continue; \ 4809 } \ 4810 index = *((TS1 *)vs1 + HS1(i)); \ 4811 if (index >= vlmax) { \ 4812 *((TS2 *)vd + HS2(i)) = 0; \ 4813 } else { \ 4814 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4815 } \ 4816 } \ 4817 env->vstart = 0; \ 4818 } 4819 4820 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4821 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4822 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4823 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4824 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4825 4826 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4827 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4828 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4829 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4830 4831 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4832 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4833 CPURISCVState *env, uint32_t desc) \ 4834 { \ 4835 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4836 uint32_t vm = vext_vm(desc); \ 4837 uint32_t vl = env->vl; \ 4838 uint64_t index = s1; \ 4839 uint32_t i; \ 4840 \ 4841 for (i = env->vstart; i < vl; i++) { \ 4842 if (!vm && !vext_elem_mask(v0, i)) { \ 4843 continue; \ 4844 } \ 4845 if (index >= vlmax) { \ 4846 *((ETYPE *)vd + H(i)) = 0; \ 4847 } else { \ 4848 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4849 } \ 4850 } \ 4851 env->vstart = 0; \ 4852 } 4853 4854 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4855 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4856 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4857 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4858 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4859 4860 /* Vector Compress Instruction */ 4861 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4862 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4863 CPURISCVState *env, uint32_t desc) \ 4864 { \ 4865 uint32_t vl = env->vl; \ 4866 uint32_t num = 0, i; \ 4867 \ 4868 for (i = env->vstart; i < vl; i++) { \ 4869 if (!vext_elem_mask(vs1, i)) { \ 4870 continue; \ 4871 } \ 4872 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4873 num++; \ 4874 } \ 4875 env->vstart = 0; \ 4876 } 4877 4878 /* Compress into vd elements of vs2 where vs1 is enabled */ 4879 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4880 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4881 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4882 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4883 4884 /* Vector Whole Register Move */ 4885 #define GEN_VEXT_VMV_WHOLE(NAME, LEN) \ 4886 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ 4887 uint32_t desc) \ 4888 { \ 4889 /* EEW = 8 */ \ 4890 uint32_t maxsz = simd_maxsz(desc); \ 4891 uint32_t i = env->vstart; \ 4892 \ 4893 memcpy((uint8_t *)vd + H1(i), \ 4894 (uint8_t *)vs2 + H1(i), \ 4895 maxsz - env->vstart); \ 4896 \ 4897 env->vstart = 0; \ 4898 } 4899 4900 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1) 4901 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2) 4902 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4) 4903 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8) 4904 4905 /* Vector Integer Extension */ 4906 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4907 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4908 CPURISCVState *env, uint32_t desc) \ 4909 { \ 4910 uint32_t vl = env->vl; \ 4911 uint32_t vm = vext_vm(desc); \ 4912 uint32_t i; \ 4913 \ 4914 for (i = env->vstart; i < vl; i++) { \ 4915 if (!vm && !vext_elem_mask(v0, i)) { \ 4916 continue; \ 4917 } \ 4918 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4919 } \ 4920 env->vstart = 0; \ 4921 } 4922 4923 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4924 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4925 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4926 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4927 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4928 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4929 4930 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4931 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4932 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4933 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4934 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4935 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4936