1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "cpu.h" 22 #include "exec/memop.h" 23 #include "exec/exec-all.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "tcg/tcg-gvec-desc.h" 27 #include "internals.h" 28 #include <math.h> 29 30 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 31 target_ulong s2) 32 { 33 int vlmax, vl; 34 RISCVCPU *cpu = env_archcpu(env); 35 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 36 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 37 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 38 bool vill = FIELD_EX64(s2, VTYPE, VILL); 39 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 40 41 if (lmul & 4) { 42 /* Fractional LMUL. */ 43 if (lmul == 4 || 44 cpu->cfg.elen >> (8 - lmul) < sew) { 45 vill = true; 46 } 47 } 48 49 if ((sew > cpu->cfg.elen) 50 || vill 51 || (ediv != 0) 52 || (reserved != 0)) { 53 /* only set vill bit. */ 54 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 55 env->vl = 0; 56 env->vstart = 0; 57 return 0; 58 } 59 60 vlmax = vext_get_vlmax(cpu, s2); 61 if (s1 <= vlmax) { 62 vl = s1; 63 } else { 64 vl = vlmax; 65 } 66 env->vl = vl; 67 env->vtype = s2; 68 env->vstart = 0; 69 return vl; 70 } 71 72 /* 73 * Note that vector data is stored in host-endian 64-bit chunks, 74 * so addressing units smaller than that needs a host-endian fixup. 75 */ 76 #ifdef HOST_WORDS_BIGENDIAN 77 #define H1(x) ((x) ^ 7) 78 #define H1_2(x) ((x) ^ 6) 79 #define H1_4(x) ((x) ^ 4) 80 #define H2(x) ((x) ^ 3) 81 #define H4(x) ((x) ^ 1) 82 #define H8(x) ((x)) 83 #else 84 #define H1(x) (x) 85 #define H1_2(x) (x) 86 #define H1_4(x) (x) 87 #define H2(x) (x) 88 #define H4(x) (x) 89 #define H8(x) (x) 90 #endif 91 92 static inline uint32_t vext_nf(uint32_t desc) 93 { 94 return FIELD_EX32(simd_data(desc), VDATA, NF); 95 } 96 97 static inline uint32_t vext_vm(uint32_t desc) 98 { 99 return FIELD_EX32(simd_data(desc), VDATA, VM); 100 } 101 102 /* 103 * Encode LMUL to lmul as following: 104 * LMUL vlmul lmul 105 * 1 000 0 106 * 2 001 1 107 * 4 010 2 108 * 8 011 3 109 * - 100 - 110 * 1/8 101 -3 111 * 1/4 110 -2 112 * 1/2 111 -1 113 */ 114 static inline int32_t vext_lmul(uint32_t desc) 115 { 116 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 117 } 118 119 /* 120 * Get the maximum number of elements can be operated. 121 * 122 * esz: log2 of element size in bytes. 123 */ 124 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 125 { 126 /* 127 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 128 * so vlen in bytes (vlenb) is encoded as maxsz. 129 */ 130 uint32_t vlenb = simd_maxsz(desc); 131 132 /* Return VLMAX */ 133 int scale = vext_lmul(desc) - esz; 134 return scale < 0 ? vlenb >> -scale : vlenb << scale; 135 } 136 137 /* 138 * This function checks watchpoint before real load operation. 139 * 140 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 141 * In user mode, there is no watchpoint support now. 142 * 143 * It will trigger an exception if there is no mapping in TLB 144 * and page table walk can't fill the TLB entry. Then the guest 145 * software can return here after process the exception or never return. 146 */ 147 static void probe_pages(CPURISCVState *env, target_ulong addr, 148 target_ulong len, uintptr_t ra, 149 MMUAccessType access_type) 150 { 151 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 152 target_ulong curlen = MIN(pagelen, len); 153 154 probe_access(env, addr, curlen, access_type, 155 cpu_mmu_index(env, false), ra); 156 if (len > curlen) { 157 addr += curlen; 158 curlen = len - curlen; 159 probe_access(env, addr, curlen, access_type, 160 cpu_mmu_index(env, false), ra); 161 } 162 } 163 164 static inline void vext_set_elem_mask(void *v0, int index, 165 uint8_t value) 166 { 167 int idx = index / 64; 168 int pos = index % 64; 169 uint64_t old = ((uint64_t *)v0)[idx]; 170 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 171 } 172 173 /* 174 * Earlier designs (pre-0.9) had a varying number of bits 175 * per mask value (MLEN). In the 0.9 design, MLEN=1. 176 * (Section 4.5) 177 */ 178 static inline int vext_elem_mask(void *v0, int index) 179 { 180 int idx = index / 64; 181 int pos = index % 64; 182 return (((uint64_t *)v0)[idx] >> pos) & 1; 183 } 184 185 /* elements operations for load and store */ 186 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 187 uint32_t idx, void *vd, uintptr_t retaddr); 188 189 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 190 static void NAME(CPURISCVState *env, abi_ptr addr, \ 191 uint32_t idx, void *vd, uintptr_t retaddr)\ 192 { \ 193 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 194 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 195 } \ 196 197 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 198 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 199 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 200 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 201 202 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 203 static void NAME(CPURISCVState *env, abi_ptr addr, \ 204 uint32_t idx, void *vd, uintptr_t retaddr)\ 205 { \ 206 ETYPE data = *((ETYPE *)vd + H(idx)); \ 207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 208 } 209 210 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 211 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 212 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 213 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 214 215 /* 216 *** stride: access vector element from strided memory 217 */ 218 static void 219 vext_ldst_stride(void *vd, void *v0, target_ulong base, 220 target_ulong stride, CPURISCVState *env, 221 uint32_t desc, uint32_t vm, 222 vext_ldst_elem_fn *ldst_elem, 223 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 224 { 225 uint32_t i, k; 226 uint32_t nf = vext_nf(desc); 227 uint32_t max_elems = vext_max_elems(desc, esz); 228 229 /* probe every access*/ 230 for (i = 0; i < env->vl; i++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 probe_pages(env, base + stride * i, nf << esz, ra, access_type); 235 } 236 /* do real access */ 237 for (i = 0; i < env->vl; i++) { 238 k = 0; 239 if (!vm && !vext_elem_mask(v0, i)) { 240 continue; 241 } 242 while (k < nf) { 243 target_ulong addr = base + stride * i + (k << esz); 244 ldst_elem(env, addr, i + k * max_elems, vd, ra); 245 k++; 246 } 247 } 248 } 249 250 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 251 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 252 target_ulong stride, CPURISCVState *env, \ 253 uint32_t desc) \ 254 { \ 255 uint32_t vm = vext_vm(desc); \ 256 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 257 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 258 } 259 260 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 261 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 262 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 263 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 264 265 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 266 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 267 target_ulong stride, CPURISCVState *env, \ 268 uint32_t desc) \ 269 { \ 270 uint32_t vm = vext_vm(desc); \ 271 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 272 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 273 } 274 275 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 276 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 277 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 278 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 279 280 /* 281 *** unit-stride: access elements stored contiguously in memory 282 */ 283 284 /* unmasked unit-stride load and store operation*/ 285 static void 286 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 287 vext_ldst_elem_fn *ldst_elem, 288 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 289 { 290 uint32_t i, k; 291 uint32_t nf = vext_nf(desc); 292 uint32_t max_elems = vext_max_elems(desc, esz); 293 294 /* probe every access */ 295 probe_pages(env, base, env->vl * (nf << esz), ra, access_type); 296 /* load bytes from guest memory */ 297 for (i = 0; i < env->vl; i++) { 298 k = 0; 299 while (k < nf) { 300 target_ulong addr = base + ((i * nf + k) << esz); 301 ldst_elem(env, addr, i + k * max_elems, vd, ra); 302 k++; 303 } 304 } 305 } 306 307 /* 308 * masked unit-stride load and store operation will be a special case of stride, 309 * stride = NF * sizeof (MTYPE) 310 */ 311 312 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 313 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 317 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 318 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 319 } \ 320 \ 321 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 322 CPURISCVState *env, uint32_t desc) \ 323 { \ 324 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 325 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 326 } 327 328 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 329 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 330 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 331 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 332 333 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 334 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 335 CPURISCVState *env, uint32_t desc) \ 336 { \ 337 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 338 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 339 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 340 } \ 341 \ 342 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 343 CPURISCVState *env, uint32_t desc) \ 344 { \ 345 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 346 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 347 } 348 349 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 350 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 351 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 352 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 353 354 /* 355 *** index: access vector element from indexed memory 356 */ 357 typedef target_ulong vext_get_index_addr(target_ulong base, 358 uint32_t idx, void *vs2); 359 360 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 361 static target_ulong NAME(target_ulong base, \ 362 uint32_t idx, void *vs2) \ 363 { \ 364 return (base + *((ETYPE *)vs2 + H(idx))); \ 365 } 366 367 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 368 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 369 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 370 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 371 372 static inline void 373 vext_ldst_index(void *vd, void *v0, target_ulong base, 374 void *vs2, CPURISCVState *env, uint32_t desc, 375 vext_get_index_addr get_index_addr, 376 vext_ldst_elem_fn *ldst_elem, 377 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 378 { 379 uint32_t i, k; 380 uint32_t nf = vext_nf(desc); 381 uint32_t vm = vext_vm(desc); 382 uint32_t max_elems = vext_max_elems(desc, esz); 383 384 /* probe every access*/ 385 for (i = 0; i < env->vl; i++) { 386 if (!vm && !vext_elem_mask(v0, i)) { 387 continue; 388 } 389 probe_pages(env, get_index_addr(base, i, vs2), nf << esz, ra, 390 access_type); 391 } 392 /* load bytes from guest memory */ 393 for (i = 0; i < env->vl; i++) { 394 k = 0; 395 if (!vm && !vext_elem_mask(v0, i)) { 396 continue; 397 } 398 while (k < nf) { 399 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 400 ldst_elem(env, addr, i + k * max_elems, vd, ra); 401 k++; 402 } 403 } 404 } 405 406 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 407 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 408 void *vs2, CPURISCVState *env, uint32_t desc) \ 409 { \ 410 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 411 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 412 } 413 414 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 418 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 419 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 420 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 421 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 422 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 423 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 424 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 425 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 426 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 427 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 428 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 429 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 430 431 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 432 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 433 void *vs2, CPURISCVState *env, uint32_t desc) \ 434 { \ 435 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 436 STORE_FN, ctzl(sizeof(ETYPE)), \ 437 GETPC(), MMU_DATA_STORE); \ 438 } 439 440 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 444 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 445 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 446 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 447 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 448 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 449 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 450 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 451 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 452 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 453 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 454 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 455 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 456 457 /* 458 *** unit-stride fault-only-fisrt load instructions 459 */ 460 static inline void 461 vext_ldff(void *vd, void *v0, target_ulong base, 462 CPURISCVState *env, uint32_t desc, 463 vext_ldst_elem_fn *ldst_elem, 464 uint32_t esz, uintptr_t ra) 465 { 466 void *host; 467 uint32_t i, k, vl = 0; 468 uint32_t nf = vext_nf(desc); 469 uint32_t vm = vext_vm(desc); 470 uint32_t max_elems = vext_max_elems(desc, esz); 471 target_ulong addr, offset, remain; 472 473 /* probe every access*/ 474 for (i = 0; i < env->vl; i++) { 475 if (!vm && !vext_elem_mask(v0, i)) { 476 continue; 477 } 478 addr = base + i * (nf << esz); 479 if (i == 0) { 480 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 481 } else { 482 /* if it triggers an exception, no need to check watchpoint */ 483 remain = nf << esz; 484 while (remain > 0) { 485 offset = -(addr | TARGET_PAGE_MASK); 486 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 487 cpu_mmu_index(env, false)); 488 if (host) { 489 #ifdef CONFIG_USER_ONLY 490 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 491 vl = i; 492 goto ProbeSuccess; 493 } 494 #else 495 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 496 #endif 497 } else { 498 vl = i; 499 goto ProbeSuccess; 500 } 501 if (remain <= offset) { 502 break; 503 } 504 remain -= offset; 505 addr += offset; 506 } 507 } 508 } 509 ProbeSuccess: 510 /* load bytes from guest memory */ 511 if (vl != 0) { 512 env->vl = vl; 513 } 514 for (i = 0; i < env->vl; i++) { 515 k = 0; 516 if (!vm && !vext_elem_mask(v0, i)) { 517 continue; 518 } 519 while (k < nf) { 520 target_ulong addr = base + ((i * nf + k) << esz); 521 ldst_elem(env, addr, i + k * max_elems, vd, ra); 522 k++; 523 } 524 } 525 } 526 527 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 528 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 529 CPURISCVState *env, uint32_t desc) \ 530 { \ 531 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 532 ctzl(sizeof(ETYPE)), GETPC()); \ 533 } 534 535 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 536 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 537 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 538 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 539 540 #define DO_SWAP(N, M) (M) 541 #define DO_AND(N, M) (N & M) 542 #define DO_XOR(N, M) (N ^ M) 543 #define DO_OR(N, M) (N | M) 544 #define DO_ADD(N, M) (N + M) 545 546 /* Signed min/max */ 547 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 548 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 549 550 /* Unsigned min/max */ 551 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 552 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 553 554 /* 555 *** load and store whole register instructions 556 */ 557 static void 558 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 559 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 560 MMUAccessType access_type) 561 { 562 uint32_t i, k; 563 uint32_t nf = vext_nf(desc); 564 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 565 uint32_t max_elems = vlenb >> esz; 566 567 /* probe every access */ 568 probe_pages(env, base, vlenb * nf, ra, access_type); 569 570 /* load bytes from guest memory */ 571 for (k = 0; k < nf; k++) { 572 for (i = 0; i < max_elems; i++) { 573 target_ulong addr = base + ((i + k * max_elems) << esz); 574 ldst_elem(env, addr, i + k * max_elems, vd, ra); 575 } 576 } 577 } 578 579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 580 void HELPER(NAME)(void *vd, target_ulong base, \ 581 CPURISCVState *env, uint32_t desc) \ 582 { \ 583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 584 ctzl(sizeof(ETYPE)), GETPC(), \ 585 MMU_DATA_LOAD); \ 586 } 587 588 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 592 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 596 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 604 605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 606 void HELPER(NAME)(void *vd, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 610 ctzl(sizeof(ETYPE)), GETPC(), \ 611 MMU_DATA_STORE); \ 612 } 613 614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 618 619 /* 620 *** Vector Integer Arithmetic Instructions 621 */ 622 623 /* expand macro args before macro */ 624 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 625 626 /* (TD, T1, T2, TX1, TX2) */ 627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 /* operation of two vector elements */ 659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 660 661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 663 { \ 664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 666 *((TD *)vd + HD(i)) = OP(s2, s1); \ 667 } 668 #define DO_SUB(N, M) (N - M) 669 #define DO_RSUB(N, M) (M - N) 670 671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 679 680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 681 CPURISCVState *env, uint32_t desc, 682 uint32_t esz, uint32_t dsz, 683 opivv2_fn *fn) 684 { 685 uint32_t vm = vext_vm(desc); 686 uint32_t vl = env->vl; 687 uint32_t i; 688 689 for (i = 0; i < vl; i++) { 690 if (!vm && !vext_elem_mask(v0, i)) { 691 continue; 692 } 693 fn(vd, vs1, vs2, i); 694 } 695 } 696 697 /* generate the helpers for OPIVV */ 698 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 699 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 700 void *vs2, CPURISCVState *env, \ 701 uint32_t desc) \ 702 { \ 703 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 704 do_##NAME); \ 705 } 706 707 GEN_VEXT_VV(vadd_vv_b, 1, 1) 708 GEN_VEXT_VV(vadd_vv_h, 2, 2) 709 GEN_VEXT_VV(vadd_vv_w, 4, 4) 710 GEN_VEXT_VV(vadd_vv_d, 8, 8) 711 GEN_VEXT_VV(vsub_vv_b, 1, 1) 712 GEN_VEXT_VV(vsub_vv_h, 2, 2) 713 GEN_VEXT_VV(vsub_vv_w, 4, 4) 714 GEN_VEXT_VV(vsub_vv_d, 8, 8) 715 716 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 717 718 /* 719 * (T1)s1 gives the real operator type. 720 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 721 */ 722 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 723 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 724 { \ 725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 727 } 728 729 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 730 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 731 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 732 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 733 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 734 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 735 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 736 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 737 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 738 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 739 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 740 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 741 742 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 743 CPURISCVState *env, uint32_t desc, 744 uint32_t esz, uint32_t dsz, 745 opivx2_fn fn) 746 { 747 uint32_t vm = vext_vm(desc); 748 uint32_t vl = env->vl; 749 uint32_t i; 750 751 for (i = 0; i < vl; i++) { 752 if (!vm && !vext_elem_mask(v0, i)) { 753 continue; 754 } 755 fn(vd, s1, vs2, i); 756 } 757 } 758 759 /* generate the helpers for OPIVX */ 760 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 761 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 762 void *vs2, CPURISCVState *env, \ 763 uint32_t desc) \ 764 { \ 765 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 766 do_##NAME); \ 767 } 768 769 GEN_VEXT_VX(vadd_vx_b, 1, 1) 770 GEN_VEXT_VX(vadd_vx_h, 2, 2) 771 GEN_VEXT_VX(vadd_vx_w, 4, 4) 772 GEN_VEXT_VX(vadd_vx_d, 8, 8) 773 GEN_VEXT_VX(vsub_vx_b, 1, 1) 774 GEN_VEXT_VX(vsub_vx_h, 2, 2) 775 GEN_VEXT_VX(vsub_vx_w, 4, 4) 776 GEN_VEXT_VX(vsub_vx_d, 8, 8) 777 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 778 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 779 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 780 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 781 782 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 783 { 784 intptr_t oprsz = simd_oprsz(desc); 785 intptr_t i; 786 787 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 788 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 789 } 790 } 791 792 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 793 { 794 intptr_t oprsz = simd_oprsz(desc); 795 intptr_t i; 796 797 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 798 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 799 } 800 } 801 802 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 803 { 804 intptr_t oprsz = simd_oprsz(desc); 805 intptr_t i; 806 807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 808 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 809 } 810 } 811 812 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 813 { 814 intptr_t oprsz = simd_oprsz(desc); 815 intptr_t i; 816 817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 819 } 820 } 821 822 /* Vector Widening Integer Add/Subtract */ 823 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 824 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 825 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 826 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 827 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 828 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 829 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 830 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 831 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 832 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 833 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 834 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 835 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 836 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 837 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 838 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 839 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 840 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 841 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 842 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 843 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 844 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 845 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 846 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 847 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 848 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 849 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 850 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 851 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 852 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 853 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 854 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 855 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 856 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 857 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 858 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 859 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 860 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 861 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 862 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 863 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 864 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 865 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 866 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 867 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 868 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 869 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 870 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 871 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 872 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 873 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 874 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 875 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 876 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 877 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 878 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 879 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 880 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 881 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 882 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 883 884 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 885 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 886 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 887 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 888 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 889 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 890 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 891 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 892 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 893 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 894 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 895 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 896 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 897 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 898 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 899 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 900 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 901 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 902 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 903 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 904 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 905 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 906 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 907 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 908 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 909 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 910 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 911 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 912 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 913 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 914 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 915 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 916 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 917 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 918 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 919 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 920 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 921 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 922 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 923 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 924 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 925 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 926 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 927 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 928 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 929 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 930 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 931 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 932 933 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 934 #define DO_VADC(N, M, C) (N + M + C) 935 #define DO_VSBC(N, M, C) (N - M - C) 936 937 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 938 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 939 CPURISCVState *env, uint32_t desc) \ 940 { \ 941 uint32_t vl = env->vl; \ 942 uint32_t i; \ 943 \ 944 for (i = 0; i < vl; i++) { \ 945 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 946 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 947 ETYPE carry = vext_elem_mask(v0, i); \ 948 \ 949 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 950 } \ 951 } 952 953 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 954 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 955 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 956 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 957 958 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 959 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 960 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 961 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 962 963 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 964 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 965 CPURISCVState *env, uint32_t desc) \ 966 { \ 967 uint32_t vl = env->vl; \ 968 uint32_t i; \ 969 \ 970 for (i = 0; i < vl; i++) { \ 971 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 972 ETYPE carry = vext_elem_mask(v0, i); \ 973 \ 974 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 975 } \ 976 } 977 978 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 979 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 980 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 981 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 982 983 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 984 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 985 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 986 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 987 988 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 989 (__typeof(N))(N + M) < N) 990 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 991 992 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 993 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 994 CPURISCVState *env, uint32_t desc) \ 995 { \ 996 uint32_t vl = env->vl; \ 997 uint32_t vm = vext_vm(desc); \ 998 uint32_t i; \ 999 \ 1000 for (i = 0; i < vl; i++) { \ 1001 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1002 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1003 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1004 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1005 } \ 1006 } 1007 1008 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1009 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1010 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1011 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1012 1013 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1014 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1015 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1016 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1017 1018 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1019 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1020 void *vs2, CPURISCVState *env, uint32_t desc) \ 1021 { \ 1022 uint32_t vl = env->vl; \ 1023 uint32_t vm = vext_vm(desc); \ 1024 uint32_t i; \ 1025 \ 1026 for (i = 0; i < vl; i++) { \ 1027 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1028 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1029 vext_set_elem_mask(vd, i, \ 1030 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1031 } \ 1032 } 1033 1034 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1035 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1036 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1037 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1038 1039 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1040 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1041 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1042 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1043 1044 /* Vector Bitwise Logical Instructions */ 1045 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1046 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1047 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1048 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1049 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1050 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1051 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1052 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1053 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1054 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1055 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1056 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1057 GEN_VEXT_VV(vand_vv_b, 1, 1) 1058 GEN_VEXT_VV(vand_vv_h, 2, 2) 1059 GEN_VEXT_VV(vand_vv_w, 4, 4) 1060 GEN_VEXT_VV(vand_vv_d, 8, 8) 1061 GEN_VEXT_VV(vor_vv_b, 1, 1) 1062 GEN_VEXT_VV(vor_vv_h, 2, 2) 1063 GEN_VEXT_VV(vor_vv_w, 4, 4) 1064 GEN_VEXT_VV(vor_vv_d, 8, 8) 1065 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1066 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1067 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1068 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1069 1070 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1071 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1072 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1073 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1074 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1075 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1076 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1077 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1078 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1079 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1080 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1081 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1082 GEN_VEXT_VX(vand_vx_b, 1, 1) 1083 GEN_VEXT_VX(vand_vx_h, 2, 2) 1084 GEN_VEXT_VX(vand_vx_w, 4, 4) 1085 GEN_VEXT_VX(vand_vx_d, 8, 8) 1086 GEN_VEXT_VX(vor_vx_b, 1, 1) 1087 GEN_VEXT_VX(vor_vx_h, 2, 2) 1088 GEN_VEXT_VX(vor_vx_w, 4, 4) 1089 GEN_VEXT_VX(vor_vx_d, 8, 8) 1090 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1091 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1092 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1093 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1094 1095 /* Vector Single-Width Bit Shift Instructions */ 1096 #define DO_SLL(N, M) (N << (M)) 1097 #define DO_SRL(N, M) (N >> (M)) 1098 1099 /* generate the helpers for shift instructions with two vector operators */ 1100 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1101 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1102 void *vs2, CPURISCVState *env, uint32_t desc) \ 1103 { \ 1104 uint32_t vm = vext_vm(desc); \ 1105 uint32_t vl = env->vl; \ 1106 uint32_t i; \ 1107 \ 1108 for (i = 0; i < vl; i++) { \ 1109 if (!vm && !vext_elem_mask(v0, i)) { \ 1110 continue; \ 1111 } \ 1112 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1113 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1114 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1115 } \ 1116 } 1117 1118 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1119 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1120 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1121 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1122 1123 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1124 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1125 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1126 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1127 1128 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1129 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1130 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1131 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1132 1133 /* generate the helpers for shift instructions with one vector and one scalar */ 1134 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1135 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1136 void *vs2, CPURISCVState *env, uint32_t desc) \ 1137 { \ 1138 uint32_t vm = vext_vm(desc); \ 1139 uint32_t vl = env->vl; \ 1140 uint32_t i; \ 1141 \ 1142 for (i = 0; i < vl; i++) { \ 1143 if (!vm && !vext_elem_mask(v0, i)) { \ 1144 continue; \ 1145 } \ 1146 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1147 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1148 } \ 1149 } 1150 1151 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1152 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1153 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1154 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1155 1156 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1157 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1160 1161 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1162 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1163 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1164 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1165 1166 /* Vector Narrowing Integer Right Shift Instructions */ 1167 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1168 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1169 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1170 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1171 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1172 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1173 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1174 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1175 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1176 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1177 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1178 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1179 1180 /* Vector Integer Comparison Instructions */ 1181 #define DO_MSEQ(N, M) (N == M) 1182 #define DO_MSNE(N, M) (N != M) 1183 #define DO_MSLT(N, M) (N < M) 1184 #define DO_MSLE(N, M) (N <= M) 1185 #define DO_MSGT(N, M) (N > M) 1186 1187 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1188 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1189 CPURISCVState *env, uint32_t desc) \ 1190 { \ 1191 uint32_t vm = vext_vm(desc); \ 1192 uint32_t vl = env->vl; \ 1193 uint32_t i; \ 1194 \ 1195 for (i = 0; i < vl; i++) { \ 1196 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1197 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1198 if (!vm && !vext_elem_mask(v0, i)) { \ 1199 continue; \ 1200 } \ 1201 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1202 } \ 1203 } 1204 1205 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1206 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1207 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1208 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1209 1210 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1211 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1212 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1213 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1214 1215 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1216 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1217 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1218 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1219 1220 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1221 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1222 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1223 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1224 1225 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1226 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1227 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1228 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1229 1230 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1231 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1232 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1233 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1234 1235 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1236 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1237 CPURISCVState *env, uint32_t desc) \ 1238 { \ 1239 uint32_t vm = vext_vm(desc); \ 1240 uint32_t vl = env->vl; \ 1241 uint32_t i; \ 1242 \ 1243 for (i = 0; i < vl; i++) { \ 1244 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1245 if (!vm && !vext_elem_mask(v0, i)) { \ 1246 continue; \ 1247 } \ 1248 vext_set_elem_mask(vd, i, \ 1249 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1250 } \ 1251 } 1252 1253 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1254 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1255 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1256 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1257 1258 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1259 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1260 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1261 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1262 1263 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1264 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1265 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1266 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1267 1268 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1269 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1270 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1271 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1272 1273 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1274 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1275 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1276 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1277 1278 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1279 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1280 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1281 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1282 1283 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1284 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1285 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1286 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1287 1288 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1289 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1290 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1291 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1292 1293 /* Vector Integer Min/Max Instructions */ 1294 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1295 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1296 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1297 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1298 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1299 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1300 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1301 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1302 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1303 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1304 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1305 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1306 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1307 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1308 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1309 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1310 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1311 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1312 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1313 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1314 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1315 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1316 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1317 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1318 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1319 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1320 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1321 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1322 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1323 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1324 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1325 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1326 1327 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1328 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1329 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1330 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1331 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1332 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1333 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1334 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1335 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1336 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1337 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1338 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1339 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1340 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1341 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1342 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1343 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1344 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1345 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1346 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1347 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1348 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1349 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1350 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1351 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1352 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1353 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1354 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1355 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1356 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1357 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1358 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1359 1360 /* Vector Single-Width Integer Multiply Instructions */ 1361 #define DO_MUL(N, M) (N * M) 1362 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1363 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1364 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1365 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1366 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1367 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1368 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1369 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1370 1371 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1372 { 1373 return (int16_t)s2 * (int16_t)s1 >> 8; 1374 } 1375 1376 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1377 { 1378 return (int32_t)s2 * (int32_t)s1 >> 16; 1379 } 1380 1381 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1382 { 1383 return (int64_t)s2 * (int64_t)s1 >> 32; 1384 } 1385 1386 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1387 { 1388 uint64_t hi_64, lo_64; 1389 1390 muls64(&lo_64, &hi_64, s1, s2); 1391 return hi_64; 1392 } 1393 1394 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1395 { 1396 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1397 } 1398 1399 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1400 { 1401 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1402 } 1403 1404 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1405 { 1406 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1407 } 1408 1409 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1410 { 1411 uint64_t hi_64, lo_64; 1412 1413 mulu64(&lo_64, &hi_64, s2, s1); 1414 return hi_64; 1415 } 1416 1417 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1418 { 1419 return (int16_t)s2 * (uint16_t)s1 >> 8; 1420 } 1421 1422 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1423 { 1424 return (int32_t)s2 * (uint32_t)s1 >> 16; 1425 } 1426 1427 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1428 { 1429 return (int64_t)s2 * (uint64_t)s1 >> 32; 1430 } 1431 1432 /* 1433 * Let A = signed operand, 1434 * B = unsigned operand 1435 * P = mulu64(A, B), unsigned product 1436 * 1437 * LET X = 2 ** 64 - A, 2's complement of A 1438 * SP = signed product 1439 * THEN 1440 * IF A < 0 1441 * SP = -X * B 1442 * = -(2 ** 64 - A) * B 1443 * = A * B - 2 ** 64 * B 1444 * = P - 2 ** 64 * B 1445 * ELSE 1446 * SP = P 1447 * THEN 1448 * HI_P -= (A < 0 ? B : 0) 1449 */ 1450 1451 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1452 { 1453 uint64_t hi_64, lo_64; 1454 1455 mulu64(&lo_64, &hi_64, s2, s1); 1456 1457 hi_64 -= s2 < 0 ? s1 : 0; 1458 return hi_64; 1459 } 1460 1461 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1462 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1463 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1464 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1465 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1466 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1467 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1468 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1469 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1470 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1471 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1472 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1473 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1474 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1475 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1476 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1477 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1478 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1479 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1480 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1481 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1482 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1483 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1484 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1485 1486 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1487 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1488 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1489 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1490 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1491 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1492 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1493 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1494 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1495 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1496 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1497 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1498 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1499 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1500 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1501 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1502 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1503 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1504 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1505 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1506 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1507 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1508 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1509 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1510 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1511 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1512 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1513 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1514 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1515 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1516 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1517 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1518 1519 /* Vector Integer Divide Instructions */ 1520 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1521 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1522 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1523 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1524 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1525 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1526 1527 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1528 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1529 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1530 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1531 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1532 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1533 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1534 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1535 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1536 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1537 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1538 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1539 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1540 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1541 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1542 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1543 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1544 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1545 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1546 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1547 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1548 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1549 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1550 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1551 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1552 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1553 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1554 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1555 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1556 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1557 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1558 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1559 1560 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1561 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1562 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1563 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1564 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1565 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1566 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1567 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1568 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1569 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1570 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1571 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1572 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1573 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1574 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1575 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1576 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1577 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1578 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1579 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1580 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1581 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1582 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1583 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1584 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1585 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1586 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1587 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1588 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1589 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1590 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1591 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1592 1593 /* Vector Widening Integer Multiply Instructions */ 1594 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1595 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1596 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1597 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1598 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1599 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1600 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1601 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1602 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1603 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1604 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1605 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1606 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1607 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1608 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1609 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1610 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1611 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1612 1613 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1614 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1615 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1616 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1617 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1618 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1619 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1620 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1621 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1622 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1623 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1624 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1625 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1626 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1627 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1628 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1629 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1630 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1631 1632 /* Vector Single-Width Integer Multiply-Add Instructions */ 1633 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1634 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1635 { \ 1636 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1637 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1638 TD d = *((TD *)vd + HD(i)); \ 1639 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1640 } 1641 1642 #define DO_MACC(N, M, D) (M * N + D) 1643 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1644 #define DO_MADD(N, M, D) (M * D + N) 1645 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1646 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1647 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1648 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1649 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1650 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1651 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1652 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1653 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1654 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1655 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1656 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1657 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1658 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1659 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1660 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1661 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1662 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1663 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1664 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1665 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1666 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1667 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1668 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1669 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1670 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1671 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1672 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1673 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1674 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1675 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1676 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1677 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1678 1679 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1680 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1681 { \ 1682 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1683 TD d = *((TD *)vd + HD(i)); \ 1684 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1685 } 1686 1687 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1688 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1689 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1690 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1691 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1692 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1693 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1694 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1695 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1696 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1697 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1698 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1699 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1700 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1701 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1702 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1703 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1704 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1705 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1706 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1707 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1708 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1709 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1710 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1711 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1712 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1713 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1714 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1715 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1716 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1717 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1718 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1719 1720 /* Vector Widening Integer Multiply-Add Instructions */ 1721 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1722 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1723 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1724 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1725 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1726 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1727 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1728 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1729 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1730 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1731 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1732 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1733 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1734 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1735 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1736 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1737 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1738 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1739 1740 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1741 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1742 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1743 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1744 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1745 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1746 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1747 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1748 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1749 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1750 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1751 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1752 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1753 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1754 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1755 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1756 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1757 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1758 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1759 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1760 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1761 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1762 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1763 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1764 1765 /* Vector Integer Merge and Move Instructions */ 1766 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1767 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1768 uint32_t desc) \ 1769 { \ 1770 uint32_t vl = env->vl; \ 1771 uint32_t i; \ 1772 \ 1773 for (i = 0; i < vl; i++) { \ 1774 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1775 *((ETYPE *)vd + H(i)) = s1; \ 1776 } \ 1777 } 1778 1779 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1780 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1781 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1782 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1783 1784 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1785 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1786 uint32_t desc) \ 1787 { \ 1788 uint32_t vl = env->vl; \ 1789 uint32_t i; \ 1790 \ 1791 for (i = 0; i < vl; i++) { \ 1792 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1793 } \ 1794 } 1795 1796 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1797 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1798 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1799 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1800 1801 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1802 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1803 CPURISCVState *env, uint32_t desc) \ 1804 { \ 1805 uint32_t vl = env->vl; \ 1806 uint32_t i; \ 1807 \ 1808 for (i = 0; i < vl; i++) { \ 1809 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1810 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1811 } \ 1812 } 1813 1814 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1815 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1816 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1817 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1818 1819 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1820 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1821 void *vs2, CPURISCVState *env, uint32_t desc) \ 1822 { \ 1823 uint32_t vl = env->vl; \ 1824 uint32_t i; \ 1825 \ 1826 for (i = 0; i < vl; i++) { \ 1827 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1828 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1829 (ETYPE)(target_long)s1); \ 1830 *((ETYPE *)vd + H(i)) = d; \ 1831 } \ 1832 } 1833 1834 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1835 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1836 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1837 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1838 1839 /* 1840 *** Vector Fixed-Point Arithmetic Instructions 1841 */ 1842 1843 /* Vector Single-Width Saturating Add and Subtract */ 1844 1845 /* 1846 * As fixed point instructions probably have round mode and saturation, 1847 * define common macros for fixed point here. 1848 */ 1849 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1850 CPURISCVState *env, int vxrm); 1851 1852 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1853 static inline void \ 1854 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1855 CPURISCVState *env, int vxrm) \ 1856 { \ 1857 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1858 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1859 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1860 } 1861 1862 static inline void 1863 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1864 CPURISCVState *env, 1865 uint32_t vl, uint32_t vm, int vxrm, 1866 opivv2_rm_fn *fn) 1867 { 1868 for (uint32_t i = 0; i < vl; i++) { 1869 if (!vm && !vext_elem_mask(v0, i)) { 1870 continue; 1871 } 1872 fn(vd, vs1, vs2, i, env, vxrm); 1873 } 1874 } 1875 1876 static inline void 1877 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1878 CPURISCVState *env, 1879 uint32_t desc, uint32_t esz, uint32_t dsz, 1880 opivv2_rm_fn *fn) 1881 { 1882 uint32_t vm = vext_vm(desc); 1883 uint32_t vl = env->vl; 1884 1885 switch (env->vxrm) { 1886 case 0: /* rnu */ 1887 vext_vv_rm_1(vd, v0, vs1, vs2, 1888 env, vl, vm, 0, fn); 1889 break; 1890 case 1: /* rne */ 1891 vext_vv_rm_1(vd, v0, vs1, vs2, 1892 env, vl, vm, 1, fn); 1893 break; 1894 case 2: /* rdn */ 1895 vext_vv_rm_1(vd, v0, vs1, vs2, 1896 env, vl, vm, 2, fn); 1897 break; 1898 default: /* rod */ 1899 vext_vv_rm_1(vd, v0, vs1, vs2, 1900 env, vl, vm, 3, fn); 1901 break; 1902 } 1903 } 1904 1905 /* generate helpers for fixed point instructions with OPIVV format */ 1906 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1907 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1908 CPURISCVState *env, uint32_t desc) \ 1909 { \ 1910 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1911 do_##NAME); \ 1912 } 1913 1914 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1915 { 1916 uint8_t res = a + b; 1917 if (res < a) { 1918 res = UINT8_MAX; 1919 env->vxsat = 0x1; 1920 } 1921 return res; 1922 } 1923 1924 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1925 uint16_t b) 1926 { 1927 uint16_t res = a + b; 1928 if (res < a) { 1929 res = UINT16_MAX; 1930 env->vxsat = 0x1; 1931 } 1932 return res; 1933 } 1934 1935 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1936 uint32_t b) 1937 { 1938 uint32_t res = a + b; 1939 if (res < a) { 1940 res = UINT32_MAX; 1941 env->vxsat = 0x1; 1942 } 1943 return res; 1944 } 1945 1946 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1947 uint64_t b) 1948 { 1949 uint64_t res = a + b; 1950 if (res < a) { 1951 res = UINT64_MAX; 1952 env->vxsat = 0x1; 1953 } 1954 return res; 1955 } 1956 1957 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1958 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1959 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1960 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1961 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1962 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1963 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1964 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1965 1966 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1967 CPURISCVState *env, int vxrm); 1968 1969 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1970 static inline void \ 1971 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1972 CPURISCVState *env, int vxrm) \ 1973 { \ 1974 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1975 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1976 } 1977 1978 static inline void 1979 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1980 CPURISCVState *env, 1981 uint32_t vl, uint32_t vm, int vxrm, 1982 opivx2_rm_fn *fn) 1983 { 1984 for (uint32_t i = 0; i < vl; i++) { 1985 if (!vm && !vext_elem_mask(v0, i)) { 1986 continue; 1987 } 1988 fn(vd, s1, vs2, i, env, vxrm); 1989 } 1990 } 1991 1992 static inline void 1993 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 1994 CPURISCVState *env, 1995 uint32_t desc, uint32_t esz, uint32_t dsz, 1996 opivx2_rm_fn *fn) 1997 { 1998 uint32_t vm = vext_vm(desc); 1999 uint32_t vl = env->vl; 2000 2001 switch (env->vxrm) { 2002 case 0: /* rnu */ 2003 vext_vx_rm_1(vd, v0, s1, vs2, 2004 env, vl, vm, 0, fn); 2005 break; 2006 case 1: /* rne */ 2007 vext_vx_rm_1(vd, v0, s1, vs2, 2008 env, vl, vm, 1, fn); 2009 break; 2010 case 2: /* rdn */ 2011 vext_vx_rm_1(vd, v0, s1, vs2, 2012 env, vl, vm, 2, fn); 2013 break; 2014 default: /* rod */ 2015 vext_vx_rm_1(vd, v0, s1, vs2, 2016 env, vl, vm, 3, fn); 2017 break; 2018 } 2019 } 2020 2021 /* generate helpers for fixed point instructions with OPIVX format */ 2022 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2023 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2024 void *vs2, CPURISCVState *env, uint32_t desc) \ 2025 { \ 2026 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2027 do_##NAME); \ 2028 } 2029 2030 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2031 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2032 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2033 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2034 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2035 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2036 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2037 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2038 2039 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2040 { 2041 int8_t res = a + b; 2042 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2043 res = a > 0 ? INT8_MAX : INT8_MIN; 2044 env->vxsat = 0x1; 2045 } 2046 return res; 2047 } 2048 2049 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2050 { 2051 int16_t res = a + b; 2052 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2053 res = a > 0 ? INT16_MAX : INT16_MIN; 2054 env->vxsat = 0x1; 2055 } 2056 return res; 2057 } 2058 2059 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2060 { 2061 int32_t res = a + b; 2062 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2063 res = a > 0 ? INT32_MAX : INT32_MIN; 2064 env->vxsat = 0x1; 2065 } 2066 return res; 2067 } 2068 2069 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2070 { 2071 int64_t res = a + b; 2072 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2073 res = a > 0 ? INT64_MAX : INT64_MIN; 2074 env->vxsat = 0x1; 2075 } 2076 return res; 2077 } 2078 2079 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2080 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2081 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2082 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2083 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2084 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2085 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2086 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2087 2088 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2089 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2090 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2091 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2092 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2093 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2094 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2095 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2096 2097 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2098 { 2099 uint8_t res = a - b; 2100 if (res > a) { 2101 res = 0; 2102 env->vxsat = 0x1; 2103 } 2104 return res; 2105 } 2106 2107 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2108 uint16_t b) 2109 { 2110 uint16_t res = a - b; 2111 if (res > a) { 2112 res = 0; 2113 env->vxsat = 0x1; 2114 } 2115 return res; 2116 } 2117 2118 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2119 uint32_t b) 2120 { 2121 uint32_t res = a - b; 2122 if (res > a) { 2123 res = 0; 2124 env->vxsat = 0x1; 2125 } 2126 return res; 2127 } 2128 2129 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2130 uint64_t b) 2131 { 2132 uint64_t res = a - b; 2133 if (res > a) { 2134 res = 0; 2135 env->vxsat = 0x1; 2136 } 2137 return res; 2138 } 2139 2140 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2141 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2142 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2143 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2144 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2145 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2146 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2147 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2148 2149 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2150 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2151 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2152 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2153 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2154 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2155 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2156 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2157 2158 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2159 { 2160 int8_t res = a - b; 2161 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2162 res = a >= 0 ? INT8_MAX : INT8_MIN; 2163 env->vxsat = 0x1; 2164 } 2165 return res; 2166 } 2167 2168 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2169 { 2170 int16_t res = a - b; 2171 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2172 res = a >= 0 ? INT16_MAX : INT16_MIN; 2173 env->vxsat = 0x1; 2174 } 2175 return res; 2176 } 2177 2178 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2179 { 2180 int32_t res = a - b; 2181 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2182 res = a >= 0 ? INT32_MAX : INT32_MIN; 2183 env->vxsat = 0x1; 2184 } 2185 return res; 2186 } 2187 2188 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2189 { 2190 int64_t res = a - b; 2191 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2192 res = a >= 0 ? INT64_MAX : INT64_MIN; 2193 env->vxsat = 0x1; 2194 } 2195 return res; 2196 } 2197 2198 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2199 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2200 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2201 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2202 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2203 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2204 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2205 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2206 2207 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2208 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2209 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2210 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2211 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2212 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2213 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2214 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2215 2216 /* Vector Single-Width Averaging Add and Subtract */ 2217 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2218 { 2219 uint8_t d = extract64(v, shift, 1); 2220 uint8_t d1; 2221 uint64_t D1, D2; 2222 2223 if (shift == 0 || shift > 64) { 2224 return 0; 2225 } 2226 2227 d1 = extract64(v, shift - 1, 1); 2228 D1 = extract64(v, 0, shift); 2229 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2230 return d1; 2231 } else if (vxrm == 1) { /* round-to-nearest-even */ 2232 if (shift > 1) { 2233 D2 = extract64(v, 0, shift - 1); 2234 return d1 & ((D2 != 0) | d); 2235 } else { 2236 return d1 & d; 2237 } 2238 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2239 return !d & (D1 != 0); 2240 } 2241 return 0; /* round-down (truncate) */ 2242 } 2243 2244 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2245 { 2246 int64_t res = (int64_t)a + b; 2247 uint8_t round = get_round(vxrm, res, 1); 2248 2249 return (res >> 1) + round; 2250 } 2251 2252 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2253 { 2254 int64_t res = a + b; 2255 uint8_t round = get_round(vxrm, res, 1); 2256 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2257 2258 /* With signed overflow, bit 64 is inverse of bit 63. */ 2259 return ((res >> 1) ^ over) + round; 2260 } 2261 2262 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2263 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2264 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2265 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2266 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2267 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2268 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2269 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2270 2271 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2272 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2273 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2274 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2275 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2276 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2277 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2278 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2279 2280 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2281 uint32_t a, uint32_t b) 2282 { 2283 uint64_t res = (uint64_t)a + b; 2284 uint8_t round = get_round(vxrm, res, 1); 2285 2286 return (res >> 1) + round; 2287 } 2288 2289 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2290 uint64_t a, uint64_t b) 2291 { 2292 uint64_t res = a + b; 2293 uint8_t round = get_round(vxrm, res, 1); 2294 uint64_t over = (uint64_t)(res < a) << 63; 2295 2296 return ((res >> 1) | over) + round; 2297 } 2298 2299 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2300 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2301 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2302 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2303 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2304 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2305 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2306 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2307 2308 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2309 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2310 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2311 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2312 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2313 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2314 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2315 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2316 2317 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2318 { 2319 int64_t res = (int64_t)a - b; 2320 uint8_t round = get_round(vxrm, res, 1); 2321 2322 return (res >> 1) + round; 2323 } 2324 2325 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2326 { 2327 int64_t res = (int64_t)a - b; 2328 uint8_t round = get_round(vxrm, res, 1); 2329 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2330 2331 /* With signed overflow, bit 64 is inverse of bit 63. */ 2332 return ((res >> 1) ^ over) + round; 2333 } 2334 2335 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2336 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2337 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2338 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2339 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2340 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2341 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2342 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2343 2344 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2345 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2346 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2347 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2348 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2349 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2350 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2351 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2352 2353 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2354 uint32_t a, uint32_t b) 2355 { 2356 int64_t res = (int64_t)a - b; 2357 uint8_t round = get_round(vxrm, res, 1); 2358 2359 return (res >> 1) + round; 2360 } 2361 2362 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2363 uint64_t a, uint64_t b) 2364 { 2365 uint64_t res = (uint64_t)a - b; 2366 uint8_t round = get_round(vxrm, res, 1); 2367 uint64_t over = (uint64_t)(res > a) << 63; 2368 2369 return ((res >> 1) | over) + round; 2370 } 2371 2372 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2373 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2374 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2375 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2376 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2377 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2378 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2379 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2380 2381 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2382 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2383 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2384 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2385 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2386 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2387 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2388 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2389 2390 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2391 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2392 { 2393 uint8_t round; 2394 int16_t res; 2395 2396 res = (int16_t)a * (int16_t)b; 2397 round = get_round(vxrm, res, 7); 2398 res = (res >> 7) + round; 2399 2400 if (res > INT8_MAX) { 2401 env->vxsat = 0x1; 2402 return INT8_MAX; 2403 } else if (res < INT8_MIN) { 2404 env->vxsat = 0x1; 2405 return INT8_MIN; 2406 } else { 2407 return res; 2408 } 2409 } 2410 2411 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2412 { 2413 uint8_t round; 2414 int32_t res; 2415 2416 res = (int32_t)a * (int32_t)b; 2417 round = get_round(vxrm, res, 15); 2418 res = (res >> 15) + round; 2419 2420 if (res > INT16_MAX) { 2421 env->vxsat = 0x1; 2422 return INT16_MAX; 2423 } else if (res < INT16_MIN) { 2424 env->vxsat = 0x1; 2425 return INT16_MIN; 2426 } else { 2427 return res; 2428 } 2429 } 2430 2431 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2432 { 2433 uint8_t round; 2434 int64_t res; 2435 2436 res = (int64_t)a * (int64_t)b; 2437 round = get_round(vxrm, res, 31); 2438 res = (res >> 31) + round; 2439 2440 if (res > INT32_MAX) { 2441 env->vxsat = 0x1; 2442 return INT32_MAX; 2443 } else if (res < INT32_MIN) { 2444 env->vxsat = 0x1; 2445 return INT32_MIN; 2446 } else { 2447 return res; 2448 } 2449 } 2450 2451 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2452 { 2453 uint8_t round; 2454 uint64_t hi_64, lo_64; 2455 int64_t res; 2456 2457 if (a == INT64_MIN && b == INT64_MIN) { 2458 env->vxsat = 1; 2459 return INT64_MAX; 2460 } 2461 2462 muls64(&lo_64, &hi_64, a, b); 2463 round = get_round(vxrm, lo_64, 63); 2464 /* 2465 * Cannot overflow, as there are always 2466 * 2 sign bits after multiply. 2467 */ 2468 res = (hi_64 << 1) | (lo_64 >> 63); 2469 if (round) { 2470 if (res == INT64_MAX) { 2471 env->vxsat = 1; 2472 } else { 2473 res += 1; 2474 } 2475 } 2476 return res; 2477 } 2478 2479 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2480 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2481 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2482 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2483 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2484 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2485 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2486 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2487 2488 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2489 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2490 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2491 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2492 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2493 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2494 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2495 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2496 2497 /* Vector Single-Width Scaling Shift Instructions */ 2498 static inline uint8_t 2499 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2500 { 2501 uint8_t round, shift = b & 0x7; 2502 uint8_t res; 2503 2504 round = get_round(vxrm, a, shift); 2505 res = (a >> shift) + round; 2506 return res; 2507 } 2508 static inline uint16_t 2509 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2510 { 2511 uint8_t round, shift = b & 0xf; 2512 uint16_t res; 2513 2514 round = get_round(vxrm, a, shift); 2515 res = (a >> shift) + round; 2516 return res; 2517 } 2518 static inline uint32_t 2519 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2520 { 2521 uint8_t round, shift = b & 0x1f; 2522 uint32_t res; 2523 2524 round = get_round(vxrm, a, shift); 2525 res = (a >> shift) + round; 2526 return res; 2527 } 2528 static inline uint64_t 2529 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2530 { 2531 uint8_t round, shift = b & 0x3f; 2532 uint64_t res; 2533 2534 round = get_round(vxrm, a, shift); 2535 res = (a >> shift) + round; 2536 return res; 2537 } 2538 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2539 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2540 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2541 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2542 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2543 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2544 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2545 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2546 2547 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2548 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2549 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2550 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2551 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2552 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2553 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2554 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2555 2556 static inline int8_t 2557 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2558 { 2559 uint8_t round, shift = b & 0x7; 2560 int8_t res; 2561 2562 round = get_round(vxrm, a, shift); 2563 res = (a >> shift) + round; 2564 return res; 2565 } 2566 static inline int16_t 2567 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2568 { 2569 uint8_t round, shift = b & 0xf; 2570 int16_t res; 2571 2572 round = get_round(vxrm, a, shift); 2573 res = (a >> shift) + round; 2574 return res; 2575 } 2576 static inline int32_t 2577 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2578 { 2579 uint8_t round, shift = b & 0x1f; 2580 int32_t res; 2581 2582 round = get_round(vxrm, a, shift); 2583 res = (a >> shift) + round; 2584 return res; 2585 } 2586 static inline int64_t 2587 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2588 { 2589 uint8_t round, shift = b & 0x3f; 2590 int64_t res; 2591 2592 round = get_round(vxrm, a, shift); 2593 res = (a >> shift) + round; 2594 return res; 2595 } 2596 2597 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2598 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2599 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2600 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2601 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2602 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2603 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2604 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2605 2606 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2607 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2608 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2609 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2610 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2611 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2612 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2613 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2614 2615 /* Vector Narrowing Fixed-Point Clip Instructions */ 2616 static inline int8_t 2617 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2618 { 2619 uint8_t round, shift = b & 0xf; 2620 int16_t res; 2621 2622 round = get_round(vxrm, a, shift); 2623 res = (a >> shift) + round; 2624 if (res > INT8_MAX) { 2625 env->vxsat = 0x1; 2626 return INT8_MAX; 2627 } else if (res < INT8_MIN) { 2628 env->vxsat = 0x1; 2629 return INT8_MIN; 2630 } else { 2631 return res; 2632 } 2633 } 2634 2635 static inline int16_t 2636 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2637 { 2638 uint8_t round, shift = b & 0x1f; 2639 int32_t res; 2640 2641 round = get_round(vxrm, a, shift); 2642 res = (a >> shift) + round; 2643 if (res > INT16_MAX) { 2644 env->vxsat = 0x1; 2645 return INT16_MAX; 2646 } else if (res < INT16_MIN) { 2647 env->vxsat = 0x1; 2648 return INT16_MIN; 2649 } else { 2650 return res; 2651 } 2652 } 2653 2654 static inline int32_t 2655 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2656 { 2657 uint8_t round, shift = b & 0x3f; 2658 int64_t res; 2659 2660 round = get_round(vxrm, a, shift); 2661 res = (a >> shift) + round; 2662 if (res > INT32_MAX) { 2663 env->vxsat = 0x1; 2664 return INT32_MAX; 2665 } else if (res < INT32_MIN) { 2666 env->vxsat = 0x1; 2667 return INT32_MIN; 2668 } else { 2669 return res; 2670 } 2671 } 2672 2673 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2674 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2675 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2676 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2677 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2678 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2679 2680 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2681 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2682 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2683 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2684 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2685 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2686 2687 static inline uint8_t 2688 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2689 { 2690 uint8_t round, shift = b & 0xf; 2691 uint16_t res; 2692 2693 round = get_round(vxrm, a, shift); 2694 res = (a >> shift) + round; 2695 if (res > UINT8_MAX) { 2696 env->vxsat = 0x1; 2697 return UINT8_MAX; 2698 } else { 2699 return res; 2700 } 2701 } 2702 2703 static inline uint16_t 2704 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2705 { 2706 uint8_t round, shift = b & 0x1f; 2707 uint32_t res; 2708 2709 round = get_round(vxrm, a, shift); 2710 res = (a >> shift) + round; 2711 if (res > UINT16_MAX) { 2712 env->vxsat = 0x1; 2713 return UINT16_MAX; 2714 } else { 2715 return res; 2716 } 2717 } 2718 2719 static inline uint32_t 2720 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2721 { 2722 uint8_t round, shift = b & 0x3f; 2723 uint64_t res; 2724 2725 round = get_round(vxrm, a, shift); 2726 res = (a >> shift) + round; 2727 if (res > UINT32_MAX) { 2728 env->vxsat = 0x1; 2729 return UINT32_MAX; 2730 } else { 2731 return res; 2732 } 2733 } 2734 2735 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2736 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2737 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2738 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2739 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2740 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2741 2742 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2743 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2744 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2745 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2746 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2747 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2748 2749 /* 2750 *** Vector Float Point Arithmetic Instructions 2751 */ 2752 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2753 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2754 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2755 CPURISCVState *env) \ 2756 { \ 2757 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2758 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2759 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2760 } 2761 2762 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2763 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2764 void *vs2, CPURISCVState *env, \ 2765 uint32_t desc) \ 2766 { \ 2767 uint32_t vm = vext_vm(desc); \ 2768 uint32_t vl = env->vl; \ 2769 uint32_t i; \ 2770 \ 2771 for (i = 0; i < vl; i++) { \ 2772 if (!vm && !vext_elem_mask(v0, i)) { \ 2773 continue; \ 2774 } \ 2775 do_##NAME(vd, vs1, vs2, i, env); \ 2776 } \ 2777 } 2778 2779 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2780 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2781 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2782 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2783 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2784 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2785 2786 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2787 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2788 CPURISCVState *env) \ 2789 { \ 2790 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2791 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2792 } 2793 2794 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2795 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2796 void *vs2, CPURISCVState *env, \ 2797 uint32_t desc) \ 2798 { \ 2799 uint32_t vm = vext_vm(desc); \ 2800 uint32_t vl = env->vl; \ 2801 uint32_t i; \ 2802 \ 2803 for (i = 0; i < vl; i++) { \ 2804 if (!vm && !vext_elem_mask(v0, i)) { \ 2805 continue; \ 2806 } \ 2807 do_##NAME(vd, s1, vs2, i, env); \ 2808 } \ 2809 } 2810 2811 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2812 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2813 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2814 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2815 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2816 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2817 2818 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2819 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2820 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2821 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2822 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2823 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2824 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2825 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2826 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2827 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2828 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2829 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2830 2831 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2832 { 2833 return float16_sub(b, a, s); 2834 } 2835 2836 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2837 { 2838 return float32_sub(b, a, s); 2839 } 2840 2841 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2842 { 2843 return float64_sub(b, a, s); 2844 } 2845 2846 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2847 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2848 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2849 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2850 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2851 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2852 2853 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2854 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2855 { 2856 return float32_add(float16_to_float32(a, true, s), 2857 float16_to_float32(b, true, s), s); 2858 } 2859 2860 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2861 { 2862 return float64_add(float32_to_float64(a, s), 2863 float32_to_float64(b, s), s); 2864 2865 } 2866 2867 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2868 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2869 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2870 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2871 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2872 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2873 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2874 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2875 2876 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2877 { 2878 return float32_sub(float16_to_float32(a, true, s), 2879 float16_to_float32(b, true, s), s); 2880 } 2881 2882 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2883 { 2884 return float64_sub(float32_to_float64(a, s), 2885 float32_to_float64(b, s), s); 2886 2887 } 2888 2889 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2890 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2891 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2892 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2893 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2894 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2895 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2896 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2897 2898 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2899 { 2900 return float32_add(a, float16_to_float32(b, true, s), s); 2901 } 2902 2903 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2904 { 2905 return float64_add(a, float32_to_float64(b, s), s); 2906 } 2907 2908 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2909 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2910 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2911 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2912 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2913 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2914 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2915 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2916 2917 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2918 { 2919 return float32_sub(a, float16_to_float32(b, true, s), s); 2920 } 2921 2922 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2923 { 2924 return float64_sub(a, float32_to_float64(b, s), s); 2925 } 2926 2927 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2928 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2929 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2930 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2931 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2932 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2933 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2934 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2935 2936 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2937 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2938 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2939 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2940 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2941 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2942 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2943 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2944 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2945 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2946 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2947 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2948 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2949 2950 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2951 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2952 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2953 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 2954 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 2955 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 2956 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 2957 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 2958 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 2959 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 2960 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 2961 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 2962 2963 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 2964 { 2965 return float16_div(b, a, s); 2966 } 2967 2968 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 2969 { 2970 return float32_div(b, a, s); 2971 } 2972 2973 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 2974 { 2975 return float64_div(b, a, s); 2976 } 2977 2978 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 2979 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 2980 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 2981 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 2982 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 2983 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 2984 2985 /* Vector Widening Floating-Point Multiply */ 2986 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 2987 { 2988 return float32_mul(float16_to_float32(a, true, s), 2989 float16_to_float32(b, true, s), s); 2990 } 2991 2992 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 2993 { 2994 return float64_mul(float32_to_float64(a, s), 2995 float32_to_float64(b, s), s); 2996 2997 } 2998 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 2999 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3000 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3001 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3002 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3003 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3004 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3005 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3006 3007 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3008 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3009 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3010 CPURISCVState *env) \ 3011 { \ 3012 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3013 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3014 TD d = *((TD *)vd + HD(i)); \ 3015 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3016 } 3017 3018 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3019 { 3020 return float16_muladd(a, b, d, 0, s); 3021 } 3022 3023 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3024 { 3025 return float32_muladd(a, b, d, 0, s); 3026 } 3027 3028 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3029 { 3030 return float64_muladd(a, b, d, 0, s); 3031 } 3032 3033 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3034 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3035 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3036 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3037 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3038 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3039 3040 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3041 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3042 CPURISCVState *env) \ 3043 { \ 3044 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3045 TD d = *((TD *)vd + HD(i)); \ 3046 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3047 } 3048 3049 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3050 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3051 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3052 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3053 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3054 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3055 3056 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3057 { 3058 return float16_muladd(a, b, d, 3059 float_muladd_negate_c | float_muladd_negate_product, s); 3060 } 3061 3062 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3063 { 3064 return float32_muladd(a, b, d, 3065 float_muladd_negate_c | float_muladd_negate_product, s); 3066 } 3067 3068 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3069 { 3070 return float64_muladd(a, b, d, 3071 float_muladd_negate_c | float_muladd_negate_product, s); 3072 } 3073 3074 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3075 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3076 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3077 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3078 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3079 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3080 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3081 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3082 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3083 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3084 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3085 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3086 3087 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3088 { 3089 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3090 } 3091 3092 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3093 { 3094 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3095 } 3096 3097 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3098 { 3099 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3100 } 3101 3102 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3103 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3104 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3105 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3106 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3107 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3108 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3109 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3110 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3111 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3112 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3113 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3114 3115 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3116 { 3117 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3118 } 3119 3120 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3121 { 3122 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3123 } 3124 3125 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3126 { 3127 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3128 } 3129 3130 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3131 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3132 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3133 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3134 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3135 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3136 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3137 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3138 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3139 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3140 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3141 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3142 3143 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3144 { 3145 return float16_muladd(d, b, a, 0, s); 3146 } 3147 3148 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3149 { 3150 return float32_muladd(d, b, a, 0, s); 3151 } 3152 3153 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3154 { 3155 return float64_muladd(d, b, a, 0, s); 3156 } 3157 3158 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3159 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3160 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3161 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3162 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3163 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3164 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3165 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3166 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3167 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3168 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3169 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3170 3171 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3172 { 3173 return float16_muladd(d, b, a, 3174 float_muladd_negate_c | float_muladd_negate_product, s); 3175 } 3176 3177 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3178 { 3179 return float32_muladd(d, b, a, 3180 float_muladd_negate_c | float_muladd_negate_product, s); 3181 } 3182 3183 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3184 { 3185 return float64_muladd(d, b, a, 3186 float_muladd_negate_c | float_muladd_negate_product, s); 3187 } 3188 3189 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3190 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3191 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3192 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3193 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3194 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3195 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3196 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3197 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3198 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3199 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3200 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3201 3202 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3203 { 3204 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3205 } 3206 3207 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3208 { 3209 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3210 } 3211 3212 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3213 { 3214 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3215 } 3216 3217 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3218 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3219 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3220 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3221 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3222 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3223 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3224 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3225 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3226 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3227 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3228 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3229 3230 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3231 { 3232 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3233 } 3234 3235 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3236 { 3237 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3238 } 3239 3240 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3241 { 3242 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3243 } 3244 3245 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3246 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3247 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3248 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3249 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3250 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3251 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3252 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3253 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3254 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3255 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3256 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3257 3258 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3259 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3260 { 3261 return float32_muladd(float16_to_float32(a, true, s), 3262 float16_to_float32(b, true, s), d, 0, s); 3263 } 3264 3265 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3266 { 3267 return float64_muladd(float32_to_float64(a, s), 3268 float32_to_float64(b, s), d, 0, s); 3269 } 3270 3271 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3272 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3273 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3274 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3275 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3276 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3277 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3278 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3279 3280 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3281 { 3282 return float32_muladd(float16_to_float32(a, true, s), 3283 float16_to_float32(b, true, s), d, 3284 float_muladd_negate_c | float_muladd_negate_product, s); 3285 } 3286 3287 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3288 { 3289 return float64_muladd(float32_to_float64(a, s), 3290 float32_to_float64(b, s), d, 3291 float_muladd_negate_c | float_muladd_negate_product, s); 3292 } 3293 3294 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3295 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3296 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3297 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3298 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3299 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3300 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3301 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3302 3303 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3304 { 3305 return float32_muladd(float16_to_float32(a, true, s), 3306 float16_to_float32(b, true, s), d, 3307 float_muladd_negate_c, s); 3308 } 3309 3310 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3311 { 3312 return float64_muladd(float32_to_float64(a, s), 3313 float32_to_float64(b, s), d, 3314 float_muladd_negate_c, s); 3315 } 3316 3317 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3318 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3319 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3320 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3321 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3322 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3323 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3324 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3325 3326 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3327 { 3328 return float32_muladd(float16_to_float32(a, true, s), 3329 float16_to_float32(b, true, s), d, 3330 float_muladd_negate_product, s); 3331 } 3332 3333 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3334 { 3335 return float64_muladd(float32_to_float64(a, s), 3336 float32_to_float64(b, s), d, 3337 float_muladd_negate_product, s); 3338 } 3339 3340 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3341 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3342 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3343 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3344 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3345 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3346 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3347 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3348 3349 /* Vector Floating-Point Square-Root Instruction */ 3350 /* (TD, T2, TX2) */ 3351 #define OP_UU_H uint16_t, uint16_t, uint16_t 3352 #define OP_UU_W uint32_t, uint32_t, uint32_t 3353 #define OP_UU_D uint64_t, uint64_t, uint64_t 3354 3355 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3356 static void do_##NAME(void *vd, void *vs2, int i, \ 3357 CPURISCVState *env) \ 3358 { \ 3359 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3360 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3361 } 3362 3363 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3364 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3365 CPURISCVState *env, uint32_t desc) \ 3366 { \ 3367 uint32_t vm = vext_vm(desc); \ 3368 uint32_t vl = env->vl; \ 3369 uint32_t i; \ 3370 \ 3371 if (vl == 0) { \ 3372 return; \ 3373 } \ 3374 for (i = 0; i < vl; i++) { \ 3375 if (!vm && !vext_elem_mask(v0, i)) { \ 3376 continue; \ 3377 } \ 3378 do_##NAME(vd, vs2, i, env); \ 3379 } \ 3380 } 3381 3382 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3383 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3384 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3385 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3386 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3387 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3388 3389 /* Vector Floating-Point MIN/MAX Instructions */ 3390 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3391 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3392 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3393 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3394 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3395 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3396 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3397 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3398 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3399 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3400 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3401 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3402 3403 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3404 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3405 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3406 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3407 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3408 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3409 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3410 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3411 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3412 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3413 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3414 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3415 3416 /* Vector Floating-Point Sign-Injection Instructions */ 3417 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3418 { 3419 return deposit64(b, 0, 15, a); 3420 } 3421 3422 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3423 { 3424 return deposit64(b, 0, 31, a); 3425 } 3426 3427 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3428 { 3429 return deposit64(b, 0, 63, a); 3430 } 3431 3432 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3433 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3434 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3435 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3436 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3437 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3438 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3439 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3440 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3441 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3442 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3443 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3444 3445 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3446 { 3447 return deposit64(~b, 0, 15, a); 3448 } 3449 3450 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3451 { 3452 return deposit64(~b, 0, 31, a); 3453 } 3454 3455 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3456 { 3457 return deposit64(~b, 0, 63, a); 3458 } 3459 3460 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3461 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3462 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3463 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3464 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3465 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3466 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3467 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3468 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3469 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3470 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3471 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3472 3473 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3474 { 3475 return deposit64(b ^ a, 0, 15, a); 3476 } 3477 3478 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3479 { 3480 return deposit64(b ^ a, 0, 31, a); 3481 } 3482 3483 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3484 { 3485 return deposit64(b ^ a, 0, 63, a); 3486 } 3487 3488 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3489 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3490 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3491 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3492 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3493 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3494 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3495 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3496 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3497 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3498 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3499 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3500 3501 /* Vector Floating-Point Compare Instructions */ 3502 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3503 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3504 CPURISCVState *env, uint32_t desc) \ 3505 { \ 3506 uint32_t vm = vext_vm(desc); \ 3507 uint32_t vl = env->vl; \ 3508 uint32_t i; \ 3509 \ 3510 for (i = 0; i < vl; i++) { \ 3511 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3512 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3513 if (!vm && !vext_elem_mask(v0, i)) { \ 3514 continue; \ 3515 } \ 3516 vext_set_elem_mask(vd, i, \ 3517 DO_OP(s2, s1, &env->fp_status)); \ 3518 } \ 3519 } 3520 3521 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3522 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3523 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3524 3525 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3526 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3527 CPURISCVState *env, uint32_t desc) \ 3528 { \ 3529 uint32_t vm = vext_vm(desc); \ 3530 uint32_t vl = env->vl; \ 3531 uint32_t i; \ 3532 \ 3533 for (i = 0; i < vl; i++) { \ 3534 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3535 if (!vm && !vext_elem_mask(v0, i)) { \ 3536 continue; \ 3537 } \ 3538 vext_set_elem_mask(vd, i, \ 3539 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3540 } \ 3541 } 3542 3543 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3544 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3545 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3546 3547 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3548 { 3549 FloatRelation compare = float16_compare_quiet(a, b, s); 3550 return compare != float_relation_equal; 3551 } 3552 3553 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3554 { 3555 FloatRelation compare = float32_compare_quiet(a, b, s); 3556 return compare != float_relation_equal; 3557 } 3558 3559 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3560 { 3561 FloatRelation compare = float64_compare_quiet(a, b, s); 3562 return compare != float_relation_equal; 3563 } 3564 3565 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3566 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3567 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3568 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3569 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3570 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3571 3572 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3573 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3574 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3575 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3576 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3577 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3578 3579 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3580 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3581 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3582 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3583 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3584 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3585 3586 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3587 { 3588 FloatRelation compare = float16_compare(a, b, s); 3589 return compare == float_relation_greater; 3590 } 3591 3592 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3593 { 3594 FloatRelation compare = float32_compare(a, b, s); 3595 return compare == float_relation_greater; 3596 } 3597 3598 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3599 { 3600 FloatRelation compare = float64_compare(a, b, s); 3601 return compare == float_relation_greater; 3602 } 3603 3604 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3605 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3606 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3607 3608 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3609 { 3610 FloatRelation compare = float16_compare(a, b, s); 3611 return compare == float_relation_greater || 3612 compare == float_relation_equal; 3613 } 3614 3615 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3616 { 3617 FloatRelation compare = float32_compare(a, b, s); 3618 return compare == float_relation_greater || 3619 compare == float_relation_equal; 3620 } 3621 3622 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3623 { 3624 FloatRelation compare = float64_compare(a, b, s); 3625 return compare == float_relation_greater || 3626 compare == float_relation_equal; 3627 } 3628 3629 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3630 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3631 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3632 3633 /* Vector Floating-Point Classify Instruction */ 3634 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3635 static void do_##NAME(void *vd, void *vs2, int i) \ 3636 { \ 3637 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3638 *((TD *)vd + HD(i)) = OP(s2); \ 3639 } 3640 3641 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3642 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3643 CPURISCVState *env, uint32_t desc) \ 3644 { \ 3645 uint32_t vm = vext_vm(desc); \ 3646 uint32_t vl = env->vl; \ 3647 uint32_t i; \ 3648 \ 3649 for (i = 0; i < vl; i++) { \ 3650 if (!vm && !vext_elem_mask(v0, i)) { \ 3651 continue; \ 3652 } \ 3653 do_##NAME(vd, vs2, i); \ 3654 } \ 3655 } 3656 3657 target_ulong fclass_h(uint64_t frs1) 3658 { 3659 float16 f = frs1; 3660 bool sign = float16_is_neg(f); 3661 3662 if (float16_is_infinity(f)) { 3663 return sign ? 1 << 0 : 1 << 7; 3664 } else if (float16_is_zero(f)) { 3665 return sign ? 1 << 3 : 1 << 4; 3666 } else if (float16_is_zero_or_denormal(f)) { 3667 return sign ? 1 << 2 : 1 << 5; 3668 } else if (float16_is_any_nan(f)) { 3669 float_status s = { }; /* for snan_bit_is_one */ 3670 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3671 } else { 3672 return sign ? 1 << 1 : 1 << 6; 3673 } 3674 } 3675 3676 target_ulong fclass_s(uint64_t frs1) 3677 { 3678 float32 f = frs1; 3679 bool sign = float32_is_neg(f); 3680 3681 if (float32_is_infinity(f)) { 3682 return sign ? 1 << 0 : 1 << 7; 3683 } else if (float32_is_zero(f)) { 3684 return sign ? 1 << 3 : 1 << 4; 3685 } else if (float32_is_zero_or_denormal(f)) { 3686 return sign ? 1 << 2 : 1 << 5; 3687 } else if (float32_is_any_nan(f)) { 3688 float_status s = { }; /* for snan_bit_is_one */ 3689 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3690 } else { 3691 return sign ? 1 << 1 : 1 << 6; 3692 } 3693 } 3694 3695 target_ulong fclass_d(uint64_t frs1) 3696 { 3697 float64 f = frs1; 3698 bool sign = float64_is_neg(f); 3699 3700 if (float64_is_infinity(f)) { 3701 return sign ? 1 << 0 : 1 << 7; 3702 } else if (float64_is_zero(f)) { 3703 return sign ? 1 << 3 : 1 << 4; 3704 } else if (float64_is_zero_or_denormal(f)) { 3705 return sign ? 1 << 2 : 1 << 5; 3706 } else if (float64_is_any_nan(f)) { 3707 float_status s = { }; /* for snan_bit_is_one */ 3708 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3709 } else { 3710 return sign ? 1 << 1 : 1 << 6; 3711 } 3712 } 3713 3714 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3715 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3716 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3717 GEN_VEXT_V(vfclass_v_h, 2, 2) 3718 GEN_VEXT_V(vfclass_v_w, 4, 4) 3719 GEN_VEXT_V(vfclass_v_d, 8, 8) 3720 3721 /* Vector Floating-Point Merge Instruction */ 3722 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3723 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3724 CPURISCVState *env, uint32_t desc) \ 3725 { \ 3726 uint32_t vm = vext_vm(desc); \ 3727 uint32_t vl = env->vl; \ 3728 uint32_t i; \ 3729 \ 3730 for (i = 0; i < vl; i++) { \ 3731 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3732 *((ETYPE *)vd + H(i)) \ 3733 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3734 } \ 3735 } 3736 3737 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3738 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3739 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3740 3741 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3742 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3743 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3744 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3745 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3746 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3747 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3748 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3749 3750 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3751 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3752 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3753 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3754 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3755 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3756 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3757 3758 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3759 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3760 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3761 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3762 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3763 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3764 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3765 3766 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3767 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3768 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3769 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3770 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3771 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3772 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3773 3774 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3775 /* (TD, T2, TX2) */ 3776 #define WOP_UU_B uint16_t, uint8_t, uint8_t 3777 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3778 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3779 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3780 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3781 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3782 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3783 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3784 3785 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3786 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3787 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3788 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3789 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3790 3791 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3792 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 3793 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3794 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 3795 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 3796 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 3797 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 3798 3799 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 3800 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 3801 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 3802 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 3803 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 3804 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 3805 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 3806 3807 /* 3808 * vfwcvt.f.f.v vd, vs2, vm 3809 * Convert single-width float to double-width float. 3810 */ 3811 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 3812 { 3813 return float16_to_float32(a, true, s); 3814 } 3815 3816 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 3817 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 3818 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 3819 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 3820 3821 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 3822 /* (TD, T2, TX2) */ 3823 #define NOP_UU_B uint8_t, uint16_t, uint32_t 3824 #define NOP_UU_H uint16_t, uint32_t, uint32_t 3825 #define NOP_UU_W uint32_t, uint64_t, uint64_t 3826 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3827 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 3828 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 3829 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 3830 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 3831 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 3832 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 3833 3834 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 3835 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 3836 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 3837 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 3838 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 3839 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 3840 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 3841 3842 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 3843 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 3844 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 3845 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 3846 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 3847 3848 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 3849 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 3850 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 3851 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 3852 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 3853 3854 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 3855 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 3856 { 3857 return float32_to_float16(a, true, s); 3858 } 3859 3860 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 3861 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 3862 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 3863 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 3864 3865 /* 3866 *** Vector Reduction Operations 3867 */ 3868 /* Vector Single-Width Integer Reduction Instructions */ 3869 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 3870 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3871 void *vs2, CPURISCVState *env, uint32_t desc) \ 3872 { \ 3873 uint32_t vm = vext_vm(desc); \ 3874 uint32_t vl = env->vl; \ 3875 uint32_t i; \ 3876 TD s1 = *((TD *)vs1 + HD(0)); \ 3877 \ 3878 for (i = 0; i < vl; i++) { \ 3879 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 3880 if (!vm && !vext_elem_mask(v0, i)) { \ 3881 continue; \ 3882 } \ 3883 s1 = OP(s1, (TD)s2); \ 3884 } \ 3885 *((TD *)vd + HD(0)) = s1; \ 3886 } 3887 3888 /* vd[0] = sum(vs1[0], vs2[*]) */ 3889 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 3890 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 3891 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 3892 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 3893 3894 /* vd[0] = maxu(vs1[0], vs2[*]) */ 3895 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 3896 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 3897 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 3898 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 3899 3900 /* vd[0] = max(vs1[0], vs2[*]) */ 3901 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 3902 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 3903 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 3904 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 3905 3906 /* vd[0] = minu(vs1[0], vs2[*]) */ 3907 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 3908 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 3909 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 3910 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 3911 3912 /* vd[0] = min(vs1[0], vs2[*]) */ 3913 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 3914 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 3915 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 3916 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 3917 3918 /* vd[0] = and(vs1[0], vs2[*]) */ 3919 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 3920 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 3921 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 3922 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 3923 3924 /* vd[0] = or(vs1[0], vs2[*]) */ 3925 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 3926 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 3927 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 3928 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 3929 3930 /* vd[0] = xor(vs1[0], vs2[*]) */ 3931 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 3932 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 3933 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 3934 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 3935 3936 /* Vector Widening Integer Reduction Instructions */ 3937 /* signed sum reduction into double-width accumulator */ 3938 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 3939 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 3940 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 3941 3942 /* Unsigned sum reduction into double-width accumulator */ 3943 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 3944 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 3945 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 3946 3947 /* Vector Single-Width Floating-Point Reduction Instructions */ 3948 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 3949 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3950 void *vs2, CPURISCVState *env, \ 3951 uint32_t desc) \ 3952 { \ 3953 uint32_t vm = vext_vm(desc); \ 3954 uint32_t vl = env->vl; \ 3955 uint32_t i; \ 3956 TD s1 = *((TD *)vs1 + HD(0)); \ 3957 \ 3958 for (i = 0; i < vl; i++) { \ 3959 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 3960 if (!vm && !vext_elem_mask(v0, i)) { \ 3961 continue; \ 3962 } \ 3963 s1 = OP(s1, (TD)s2, &env->fp_status); \ 3964 } \ 3965 *((TD *)vd + HD(0)) = s1; \ 3966 } 3967 3968 /* Unordered sum */ 3969 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 3970 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 3971 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 3972 3973 /* Maximum value */ 3974 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 3975 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 3976 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 3977 3978 /* Minimum value */ 3979 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 3980 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 3981 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 3982 3983 /* Vector Widening Floating-Point Reduction Instructions */ 3984 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 3985 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 3986 void *vs2, CPURISCVState *env, uint32_t desc) 3987 { 3988 uint32_t vm = vext_vm(desc); 3989 uint32_t vl = env->vl; 3990 uint32_t i; 3991 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 3992 3993 for (i = 0; i < vl; i++) { 3994 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 3995 if (!vm && !vext_elem_mask(v0, i)) { 3996 continue; 3997 } 3998 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 3999 &env->fp_status); 4000 } 4001 *((uint32_t *)vd + H4(0)) = s1; 4002 } 4003 4004 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4005 void *vs2, CPURISCVState *env, uint32_t desc) 4006 { 4007 uint32_t vm = vext_vm(desc); 4008 uint32_t vl = env->vl; 4009 uint32_t i; 4010 uint64_t s1 = *((uint64_t *)vs1); 4011 4012 for (i = 0; i < vl; i++) { 4013 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4014 if (!vm && !vext_elem_mask(v0, i)) { 4015 continue; 4016 } 4017 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4018 &env->fp_status); 4019 } 4020 *((uint64_t *)vd) = s1; 4021 } 4022 4023 /* 4024 *** Vector Mask Operations 4025 */ 4026 /* Vector Mask-Register Logical Instructions */ 4027 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4028 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4029 void *vs2, CPURISCVState *env, \ 4030 uint32_t desc) \ 4031 { \ 4032 uint32_t vl = env->vl; \ 4033 uint32_t i; \ 4034 int a, b; \ 4035 \ 4036 for (i = 0; i < vl; i++) { \ 4037 a = vext_elem_mask(vs1, i); \ 4038 b = vext_elem_mask(vs2, i); \ 4039 vext_set_elem_mask(vd, i, OP(b, a)); \ 4040 } \ 4041 } 4042 4043 #define DO_NAND(N, M) (!(N & M)) 4044 #define DO_ANDNOT(N, M) (N & !M) 4045 #define DO_NOR(N, M) (!(N | M)) 4046 #define DO_ORNOT(N, M) (N | !M) 4047 #define DO_XNOR(N, M) (!(N ^ M)) 4048 4049 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4050 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4051 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4052 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4053 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4054 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4055 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4056 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4057 4058 /* Vector count population in mask vcpop */ 4059 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4060 uint32_t desc) 4061 { 4062 target_ulong cnt = 0; 4063 uint32_t vm = vext_vm(desc); 4064 uint32_t vl = env->vl; 4065 int i; 4066 4067 for (i = 0; i < vl; i++) { 4068 if (vm || vext_elem_mask(v0, i)) { 4069 if (vext_elem_mask(vs2, i)) { 4070 cnt++; 4071 } 4072 } 4073 } 4074 return cnt; 4075 } 4076 4077 /* vfirst find-first-set mask bit*/ 4078 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4079 uint32_t desc) 4080 { 4081 uint32_t vm = vext_vm(desc); 4082 uint32_t vl = env->vl; 4083 int i; 4084 4085 for (i = 0; i < vl; i++) { 4086 if (vm || vext_elem_mask(v0, i)) { 4087 if (vext_elem_mask(vs2, i)) { 4088 return i; 4089 } 4090 } 4091 } 4092 return -1LL; 4093 } 4094 4095 enum set_mask_type { 4096 ONLY_FIRST = 1, 4097 INCLUDE_FIRST, 4098 BEFORE_FIRST, 4099 }; 4100 4101 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4102 uint32_t desc, enum set_mask_type type) 4103 { 4104 uint32_t vm = vext_vm(desc); 4105 uint32_t vl = env->vl; 4106 int i; 4107 bool first_mask_bit = false; 4108 4109 for (i = 0; i < vl; i++) { 4110 if (!vm && !vext_elem_mask(v0, i)) { 4111 continue; 4112 } 4113 /* write a zero to all following active elements */ 4114 if (first_mask_bit) { 4115 vext_set_elem_mask(vd, i, 0); 4116 continue; 4117 } 4118 if (vext_elem_mask(vs2, i)) { 4119 first_mask_bit = true; 4120 if (type == BEFORE_FIRST) { 4121 vext_set_elem_mask(vd, i, 0); 4122 } else { 4123 vext_set_elem_mask(vd, i, 1); 4124 } 4125 } else { 4126 if (type == ONLY_FIRST) { 4127 vext_set_elem_mask(vd, i, 0); 4128 } else { 4129 vext_set_elem_mask(vd, i, 1); 4130 } 4131 } 4132 } 4133 } 4134 4135 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4136 uint32_t desc) 4137 { 4138 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4139 } 4140 4141 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4142 uint32_t desc) 4143 { 4144 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4145 } 4146 4147 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4148 uint32_t desc) 4149 { 4150 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4151 } 4152 4153 /* Vector Iota Instruction */ 4154 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4155 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4156 uint32_t desc) \ 4157 { \ 4158 uint32_t vm = vext_vm(desc); \ 4159 uint32_t vl = env->vl; \ 4160 uint32_t sum = 0; \ 4161 int i; \ 4162 \ 4163 for (i = 0; i < vl; i++) { \ 4164 if (!vm && !vext_elem_mask(v0, i)) { \ 4165 continue; \ 4166 } \ 4167 *((ETYPE *)vd + H(i)) = sum; \ 4168 if (vext_elem_mask(vs2, i)) { \ 4169 sum++; \ 4170 } \ 4171 } \ 4172 } 4173 4174 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4175 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4176 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4177 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4178 4179 /* Vector Element Index Instruction */ 4180 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4181 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4182 { \ 4183 uint32_t vm = vext_vm(desc); \ 4184 uint32_t vl = env->vl; \ 4185 int i; \ 4186 \ 4187 for (i = 0; i < vl; i++) { \ 4188 if (!vm && !vext_elem_mask(v0, i)) { \ 4189 continue; \ 4190 } \ 4191 *((ETYPE *)vd + H(i)) = i; \ 4192 } \ 4193 } 4194 4195 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4196 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4197 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4198 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4199 4200 /* 4201 *** Vector Permutation Instructions 4202 */ 4203 4204 /* Vector Slide Instructions */ 4205 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4206 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4207 CPURISCVState *env, uint32_t desc) \ 4208 { \ 4209 uint32_t vm = vext_vm(desc); \ 4210 uint32_t vl = env->vl; \ 4211 target_ulong offset = s1, i; \ 4212 \ 4213 for (i = offset; i < vl; i++) { \ 4214 if (!vm && !vext_elem_mask(v0, i)) { \ 4215 continue; \ 4216 } \ 4217 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4218 } \ 4219 } 4220 4221 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4222 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4223 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4224 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4225 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4226 4227 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4228 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4229 CPURISCVState *env, uint32_t desc) \ 4230 { \ 4231 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4232 uint32_t vm = vext_vm(desc); \ 4233 uint32_t vl = env->vl; \ 4234 target_ulong i_max, i; \ 4235 \ 4236 i_max = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 4237 for (i = 0; i < i_max; ++i) { \ 4238 if (vm || vext_elem_mask(v0, i)) { \ 4239 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4240 } \ 4241 } \ 4242 \ 4243 for (i = i_max; i < vl; ++i) { \ 4244 if (vm || vext_elem_mask(v0, i)) { \ 4245 *((ETYPE *)vd + H(i)) = 0; \ 4246 } \ 4247 } \ 4248 } 4249 4250 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4251 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4252 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4253 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4254 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4255 4256 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4257 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4258 CPURISCVState *env, uint32_t desc) \ 4259 { \ 4260 typedef uint##ESZ##_t ETYPE; \ 4261 uint32_t vm = vext_vm(desc); \ 4262 uint32_t vl = env->vl; \ 4263 uint32_t i; \ 4264 \ 4265 for (i = 0; i < vl; i++) { \ 4266 if (!vm && !vext_elem_mask(v0, i)) { \ 4267 continue; \ 4268 } \ 4269 if (i == 0) { \ 4270 *((ETYPE *)vd + H(i)) = s1; \ 4271 } else { \ 4272 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4273 } \ 4274 } \ 4275 } 4276 4277 GEN_VEXT_VSLIE1UP(8, H1) 4278 GEN_VEXT_VSLIE1UP(16, H2) 4279 GEN_VEXT_VSLIE1UP(32, H4) 4280 GEN_VEXT_VSLIE1UP(64, H8) 4281 4282 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4283 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4284 CPURISCVState *env, uint32_t desc) \ 4285 { \ 4286 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4287 } 4288 4289 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4290 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4291 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4292 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4293 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4294 4295 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4296 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4297 CPURISCVState *env, uint32_t desc) \ 4298 { \ 4299 typedef uint##ESZ##_t ETYPE; \ 4300 uint32_t vm = vext_vm(desc); \ 4301 uint32_t vl = env->vl; \ 4302 uint32_t i; \ 4303 \ 4304 for (i = 0; i < vl; i++) { \ 4305 if (!vm && !vext_elem_mask(v0, i)) { \ 4306 continue; \ 4307 } \ 4308 if (i == vl - 1) { \ 4309 *((ETYPE *)vd + H(i)) = s1; \ 4310 } else { \ 4311 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4312 } \ 4313 } \ 4314 } 4315 4316 GEN_VEXT_VSLIDE1DOWN(8, H1) 4317 GEN_VEXT_VSLIDE1DOWN(16, H2) 4318 GEN_VEXT_VSLIDE1DOWN(32, H4) 4319 GEN_VEXT_VSLIDE1DOWN(64, H8) 4320 4321 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4322 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4323 CPURISCVState *env, uint32_t desc) \ 4324 { \ 4325 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4326 } 4327 4328 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4329 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4330 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4331 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4332 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4333 4334 /* Vector Floating-Point Slide Instructions */ 4335 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4336 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4337 CPURISCVState *env, uint32_t desc) \ 4338 { \ 4339 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4340 } 4341 4342 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4343 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4344 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4345 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4346 4347 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4348 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4349 CPURISCVState *env, uint32_t desc) \ 4350 { \ 4351 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4352 } 4353 4354 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4355 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4356 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4357 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4358 4359 /* Vector Register Gather Instruction */ 4360 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4361 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4362 CPURISCVState *env, uint32_t desc) \ 4363 { \ 4364 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS1))); \ 4365 uint32_t vm = vext_vm(desc); \ 4366 uint32_t vl = env->vl; \ 4367 uint64_t index; \ 4368 uint32_t i; \ 4369 \ 4370 for (i = 0; i < vl; i++) { \ 4371 if (!vm && !vext_elem_mask(v0, i)) { \ 4372 continue; \ 4373 } \ 4374 index = *((TS1 *)vs1 + HS1(i)); \ 4375 if (index >= vlmax) { \ 4376 *((TS2 *)vd + HS2(i)) = 0; \ 4377 } else { \ 4378 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4379 } \ 4380 } \ 4381 } 4382 4383 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4384 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4385 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4386 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4387 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4388 4389 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4390 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4391 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4392 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4393 4394 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4395 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4396 CPURISCVState *env, uint32_t desc) \ 4397 { \ 4398 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4399 uint32_t vm = vext_vm(desc); \ 4400 uint32_t vl = env->vl; \ 4401 uint64_t index = s1; \ 4402 uint32_t i; \ 4403 \ 4404 for (i = 0; i < vl; i++) { \ 4405 if (!vm && !vext_elem_mask(v0, i)) { \ 4406 continue; \ 4407 } \ 4408 if (index >= vlmax) { \ 4409 *((ETYPE *)vd + H(i)) = 0; \ 4410 } else { \ 4411 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4412 } \ 4413 } \ 4414 } 4415 4416 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4417 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4418 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4419 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4420 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4421 4422 /* Vector Compress Instruction */ 4423 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4424 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4425 CPURISCVState *env, uint32_t desc) \ 4426 { \ 4427 uint32_t vl = env->vl; \ 4428 uint32_t num = 0, i; \ 4429 \ 4430 for (i = 0; i < vl; i++) { \ 4431 if (!vext_elem_mask(vs1, i)) { \ 4432 continue; \ 4433 } \ 4434 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4435 num++; \ 4436 } \ 4437 } 4438 4439 /* Compress into vd elements of vs2 where vs1 is enabled */ 4440 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4441 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4442 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4443 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4444 4445 /* Vector Integer Extension */ 4446 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4447 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4448 CPURISCVState *env, uint32_t desc) \ 4449 { \ 4450 uint32_t vl = env->vl; \ 4451 uint32_t vm = vext_vm(desc); \ 4452 uint32_t i; \ 4453 \ 4454 for (i = 0; i < vl; i++) { \ 4455 if (!vm && !vext_elem_mask(v0, i)) { \ 4456 continue; \ 4457 } \ 4458 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4459 } \ 4460 } 4461 4462 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4463 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4464 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4465 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4466 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4467 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4468 4469 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4470 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4471 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4472 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4473 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4474 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4475