1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "cpu.h" 22 #include "exec/memop.h" 23 #include "exec/exec-all.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "tcg/tcg-gvec-desc.h" 27 #include "internals.h" 28 #include <math.h> 29 30 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 31 target_ulong s2) 32 { 33 int vlmax, vl; 34 RISCVCPU *cpu = env_archcpu(env); 35 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 36 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 37 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 38 bool vill = FIELD_EX64(s2, VTYPE, VILL); 39 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 40 41 if (lmul & 4) { 42 /* Fractional LMUL. */ 43 if (lmul == 4 || 44 cpu->cfg.elen >> (8 - lmul) < sew) { 45 vill = true; 46 } 47 } 48 49 if ((sew > cpu->cfg.elen) 50 || vill 51 || (ediv != 0) 52 || (reserved != 0)) { 53 /* only set vill bit. */ 54 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 55 env->vl = 0; 56 env->vstart = 0; 57 return 0; 58 } 59 60 vlmax = vext_get_vlmax(cpu, s2); 61 if (s1 <= vlmax) { 62 vl = s1; 63 } else { 64 vl = vlmax; 65 } 66 env->vl = vl; 67 env->vtype = s2; 68 env->vstart = 0; 69 return vl; 70 } 71 72 /* 73 * Note that vector data is stored in host-endian 64-bit chunks, 74 * so addressing units smaller than that needs a host-endian fixup. 75 */ 76 #ifdef HOST_WORDS_BIGENDIAN 77 #define H1(x) ((x) ^ 7) 78 #define H1_2(x) ((x) ^ 6) 79 #define H1_4(x) ((x) ^ 4) 80 #define H2(x) ((x) ^ 3) 81 #define H4(x) ((x) ^ 1) 82 #define H8(x) ((x)) 83 #else 84 #define H1(x) (x) 85 #define H1_2(x) (x) 86 #define H1_4(x) (x) 87 #define H2(x) (x) 88 #define H4(x) (x) 89 #define H8(x) (x) 90 #endif 91 92 static inline uint32_t vext_nf(uint32_t desc) 93 { 94 return FIELD_EX32(simd_data(desc), VDATA, NF); 95 } 96 97 static inline uint32_t vext_vm(uint32_t desc) 98 { 99 return FIELD_EX32(simd_data(desc), VDATA, VM); 100 } 101 102 /* 103 * Encode LMUL to lmul as following: 104 * LMUL vlmul lmul 105 * 1 000 0 106 * 2 001 1 107 * 4 010 2 108 * 8 011 3 109 * - 100 - 110 * 1/8 101 -3 111 * 1/4 110 -2 112 * 1/2 111 -1 113 */ 114 static inline int32_t vext_lmul(uint32_t desc) 115 { 116 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 117 } 118 119 /* 120 * Get the maximum number of elements can be operated. 121 * 122 * esz: log2 of element size in bytes. 123 */ 124 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 125 { 126 /* 127 * As simd_desc support at most 256 bytes, the max vlen is 256 bits. 128 * so vlen in bytes (vlenb) is encoded as maxsz. 129 */ 130 uint32_t vlenb = simd_maxsz(desc); 131 132 /* Return VLMAX */ 133 int scale = vext_lmul(desc) - esz; 134 return scale < 0 ? vlenb >> -scale : vlenb << scale; 135 } 136 137 /* 138 * This function checks watchpoint before real load operation. 139 * 140 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 141 * In user mode, there is no watchpoint support now. 142 * 143 * It will trigger an exception if there is no mapping in TLB 144 * and page table walk can't fill the TLB entry. Then the guest 145 * software can return here after process the exception or never return. 146 */ 147 static void probe_pages(CPURISCVState *env, target_ulong addr, 148 target_ulong len, uintptr_t ra, 149 MMUAccessType access_type) 150 { 151 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 152 target_ulong curlen = MIN(pagelen, len); 153 154 probe_access(env, addr, curlen, access_type, 155 cpu_mmu_index(env, false), ra); 156 if (len > curlen) { 157 addr += curlen; 158 curlen = len - curlen; 159 probe_access(env, addr, curlen, access_type, 160 cpu_mmu_index(env, false), ra); 161 } 162 } 163 164 static inline void vext_set_elem_mask(void *v0, int index, 165 uint8_t value) 166 { 167 int idx = index / 64; 168 int pos = index % 64; 169 uint64_t old = ((uint64_t *)v0)[idx]; 170 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 171 } 172 173 /* 174 * Earlier designs (pre-0.9) had a varying number of bits 175 * per mask value (MLEN). In the 0.9 design, MLEN=1. 176 * (Section 4.5) 177 */ 178 static inline int vext_elem_mask(void *v0, int index) 179 { 180 int idx = index / 64; 181 int pos = index % 64; 182 return (((uint64_t *)v0)[idx] >> pos) & 1; 183 } 184 185 /* elements operations for load and store */ 186 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 187 uint32_t idx, void *vd, uintptr_t retaddr); 188 189 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 190 static void NAME(CPURISCVState *env, abi_ptr addr, \ 191 uint32_t idx, void *vd, uintptr_t retaddr)\ 192 { \ 193 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 194 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 195 } \ 196 197 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 198 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 199 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 200 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 201 202 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 203 static void NAME(CPURISCVState *env, abi_ptr addr, \ 204 uint32_t idx, void *vd, uintptr_t retaddr)\ 205 { \ 206 ETYPE data = *((ETYPE *)vd + H(idx)); \ 207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 208 } 209 210 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 211 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 212 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 213 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 214 215 /* 216 *** stride: access vector element from strided memory 217 */ 218 static void 219 vext_ldst_stride(void *vd, void *v0, target_ulong base, 220 target_ulong stride, CPURISCVState *env, 221 uint32_t desc, uint32_t vm, 222 vext_ldst_elem_fn *ldst_elem, 223 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 224 { 225 uint32_t i, k; 226 uint32_t nf = vext_nf(desc); 227 uint32_t max_elems = vext_max_elems(desc, esz); 228 229 /* probe every access*/ 230 for (i = 0; i < env->vl; i++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 probe_pages(env, base + stride * i, nf << esz, ra, access_type); 235 } 236 /* do real access */ 237 for (i = 0; i < env->vl; i++) { 238 k = 0; 239 if (!vm && !vext_elem_mask(v0, i)) { 240 continue; 241 } 242 while (k < nf) { 243 target_ulong addr = base + stride * i + (k << esz); 244 ldst_elem(env, addr, i + k * max_elems, vd, ra); 245 k++; 246 } 247 } 248 } 249 250 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 251 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 252 target_ulong stride, CPURISCVState *env, \ 253 uint32_t desc) \ 254 { \ 255 uint32_t vm = vext_vm(desc); \ 256 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 257 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 258 } 259 260 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 261 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 262 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 263 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 264 265 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 266 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 267 target_ulong stride, CPURISCVState *env, \ 268 uint32_t desc) \ 269 { \ 270 uint32_t vm = vext_vm(desc); \ 271 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 272 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 273 } 274 275 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 276 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 277 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 278 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 279 280 /* 281 *** unit-stride: access elements stored contiguously in memory 282 */ 283 284 /* unmasked unit-stride load and store operation*/ 285 static void 286 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 287 vext_ldst_elem_fn *ldst_elem, 288 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 289 { 290 uint32_t i, k; 291 uint32_t nf = vext_nf(desc); 292 uint32_t max_elems = vext_max_elems(desc, esz); 293 294 /* probe every access */ 295 probe_pages(env, base, env->vl * (nf << esz), ra, access_type); 296 /* load bytes from guest memory */ 297 for (i = 0; i < env->vl; i++) { 298 k = 0; 299 while (k < nf) { 300 target_ulong addr = base + ((i * nf + k) << esz); 301 ldst_elem(env, addr, i + k * max_elems, vd, ra); 302 k++; 303 } 304 } 305 } 306 307 /* 308 * masked unit-stride load and store operation will be a special case of stride, 309 * stride = NF * sizeof (MTYPE) 310 */ 311 312 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 313 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 317 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 318 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 319 } \ 320 \ 321 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 322 CPURISCVState *env, uint32_t desc) \ 323 { \ 324 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 325 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 326 } 327 328 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 329 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 330 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 331 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 332 333 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 334 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 335 CPURISCVState *env, uint32_t desc) \ 336 { \ 337 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 338 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 339 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 340 } \ 341 \ 342 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 343 CPURISCVState *env, uint32_t desc) \ 344 { \ 345 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 346 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 347 } 348 349 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 350 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 351 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 352 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 353 354 /* 355 *** index: access vector element from indexed memory 356 */ 357 typedef target_ulong vext_get_index_addr(target_ulong base, 358 uint32_t idx, void *vs2); 359 360 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 361 static target_ulong NAME(target_ulong base, \ 362 uint32_t idx, void *vs2) \ 363 { \ 364 return (base + *((ETYPE *)vs2 + H(idx))); \ 365 } 366 367 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 368 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 369 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 370 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 371 372 static inline void 373 vext_ldst_index(void *vd, void *v0, target_ulong base, 374 void *vs2, CPURISCVState *env, uint32_t desc, 375 vext_get_index_addr get_index_addr, 376 vext_ldst_elem_fn *ldst_elem, 377 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 378 { 379 uint32_t i, k; 380 uint32_t nf = vext_nf(desc); 381 uint32_t vm = vext_vm(desc); 382 uint32_t max_elems = vext_max_elems(desc, esz); 383 384 /* probe every access*/ 385 for (i = 0; i < env->vl; i++) { 386 if (!vm && !vext_elem_mask(v0, i)) { 387 continue; 388 } 389 probe_pages(env, get_index_addr(base, i, vs2), nf << esz, ra, 390 access_type); 391 } 392 /* load bytes from guest memory */ 393 for (i = 0; i < env->vl; i++) { 394 k = 0; 395 if (!vm && !vext_elem_mask(v0, i)) { 396 continue; 397 } 398 while (k < nf) { 399 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 400 ldst_elem(env, addr, i + k * max_elems, vd, ra); 401 k++; 402 } 403 } 404 } 405 406 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 407 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 408 void *vs2, CPURISCVState *env, uint32_t desc) \ 409 { \ 410 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 411 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 412 } 413 414 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 418 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 419 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 420 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 421 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 422 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 423 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 424 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 425 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 426 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 427 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 428 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 429 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 430 431 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 432 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 433 void *vs2, CPURISCVState *env, uint32_t desc) \ 434 { \ 435 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 436 STORE_FN, ctzl(sizeof(ETYPE)), \ 437 GETPC(), MMU_DATA_STORE); \ 438 } 439 440 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 444 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 445 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 446 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 447 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 448 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 449 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 450 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 451 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 452 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 453 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 454 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 455 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 456 457 /* 458 *** unit-stride fault-only-fisrt load instructions 459 */ 460 static inline void 461 vext_ldff(void *vd, void *v0, target_ulong base, 462 CPURISCVState *env, uint32_t desc, 463 vext_ldst_elem_fn *ldst_elem, 464 uint32_t esz, uintptr_t ra) 465 { 466 void *host; 467 uint32_t i, k, vl = 0; 468 uint32_t nf = vext_nf(desc); 469 uint32_t vm = vext_vm(desc); 470 uint32_t max_elems = vext_max_elems(desc, esz); 471 target_ulong addr, offset, remain; 472 473 /* probe every access*/ 474 for (i = 0; i < env->vl; i++) { 475 if (!vm && !vext_elem_mask(v0, i)) { 476 continue; 477 } 478 addr = base + i * (nf << esz); 479 if (i == 0) { 480 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 481 } else { 482 /* if it triggers an exception, no need to check watchpoint */ 483 remain = nf << esz; 484 while (remain > 0) { 485 offset = -(addr | TARGET_PAGE_MASK); 486 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 487 cpu_mmu_index(env, false)); 488 if (host) { 489 #ifdef CONFIG_USER_ONLY 490 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 491 vl = i; 492 goto ProbeSuccess; 493 } 494 #else 495 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 496 #endif 497 } else { 498 vl = i; 499 goto ProbeSuccess; 500 } 501 if (remain <= offset) { 502 break; 503 } 504 remain -= offset; 505 addr += offset; 506 } 507 } 508 } 509 ProbeSuccess: 510 /* load bytes from guest memory */ 511 if (vl != 0) { 512 env->vl = vl; 513 } 514 for (i = 0; i < env->vl; i++) { 515 k = 0; 516 if (!vm && !vext_elem_mask(v0, i)) { 517 continue; 518 } 519 while (k < nf) { 520 target_ulong addr = base + ((i * nf + k) << esz); 521 ldst_elem(env, addr, i + k * max_elems, vd, ra); 522 k++; 523 } 524 } 525 } 526 527 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 528 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 529 CPURISCVState *env, uint32_t desc) \ 530 { \ 531 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 532 ctzl(sizeof(ETYPE)), GETPC()); \ 533 } 534 535 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 536 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 537 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 538 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 539 540 #define DO_SWAP(N, M) (M) 541 #define DO_AND(N, M) (N & M) 542 #define DO_XOR(N, M) (N ^ M) 543 #define DO_OR(N, M) (N | M) 544 #define DO_ADD(N, M) (N + M) 545 546 /* Signed min/max */ 547 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 548 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 549 550 /* Unsigned min/max */ 551 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 552 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 553 554 /* 555 *** load and store whole register instructions 556 */ 557 static void 558 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 559 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 560 MMUAccessType access_type) 561 { 562 uint32_t i, k; 563 uint32_t nf = vext_nf(desc); 564 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 565 uint32_t max_elems = vlenb >> esz; 566 567 /* probe every access */ 568 probe_pages(env, base, vlenb * nf, ra, access_type); 569 570 /* load bytes from guest memory */ 571 for (k = 0; k < nf; k++) { 572 for (i = 0; i < max_elems; i++) { 573 target_ulong addr = base + ((i + k * max_elems) << esz); 574 ldst_elem(env, addr, i + k * max_elems, vd, ra); 575 } 576 } 577 } 578 579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 580 void HELPER(NAME)(void *vd, target_ulong base, \ 581 CPURISCVState *env, uint32_t desc) \ 582 { \ 583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 584 ctzl(sizeof(ETYPE)), GETPC(), \ 585 MMU_DATA_LOAD); \ 586 } 587 588 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 592 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 596 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 604 605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 606 void HELPER(NAME)(void *vd, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 610 ctzl(sizeof(ETYPE)), GETPC(), \ 611 MMU_DATA_STORE); \ 612 } 613 614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 618 619 /* 620 *** Vector Integer Arithmetic Instructions 621 */ 622 623 /* expand macro args before macro */ 624 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 625 626 /* (TD, T1, T2, TX1, TX2) */ 627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 /* operation of two vector elements */ 659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 660 661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 663 { \ 664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 666 *((TD *)vd + HD(i)) = OP(s2, s1); \ 667 } 668 #define DO_SUB(N, M) (N - M) 669 #define DO_RSUB(N, M) (M - N) 670 671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 679 680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 681 CPURISCVState *env, uint32_t desc, 682 uint32_t esz, uint32_t dsz, 683 opivv2_fn *fn) 684 { 685 uint32_t vm = vext_vm(desc); 686 uint32_t vl = env->vl; 687 uint32_t i; 688 689 for (i = 0; i < vl; i++) { 690 if (!vm && !vext_elem_mask(v0, i)) { 691 continue; 692 } 693 fn(vd, vs1, vs2, i); 694 } 695 } 696 697 /* generate the helpers for OPIVV */ 698 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 699 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 700 void *vs2, CPURISCVState *env, \ 701 uint32_t desc) \ 702 { \ 703 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 704 do_##NAME); \ 705 } 706 707 GEN_VEXT_VV(vadd_vv_b, 1, 1) 708 GEN_VEXT_VV(vadd_vv_h, 2, 2) 709 GEN_VEXT_VV(vadd_vv_w, 4, 4) 710 GEN_VEXT_VV(vadd_vv_d, 8, 8) 711 GEN_VEXT_VV(vsub_vv_b, 1, 1) 712 GEN_VEXT_VV(vsub_vv_h, 2, 2) 713 GEN_VEXT_VV(vsub_vv_w, 4, 4) 714 GEN_VEXT_VV(vsub_vv_d, 8, 8) 715 716 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 717 718 /* 719 * (T1)s1 gives the real operator type. 720 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 721 */ 722 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 723 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 724 { \ 725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 727 } 728 729 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 730 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 731 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 732 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 733 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 734 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 735 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 736 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 737 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 738 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 739 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 740 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 741 742 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 743 CPURISCVState *env, uint32_t desc, 744 uint32_t esz, uint32_t dsz, 745 opivx2_fn fn) 746 { 747 uint32_t vm = vext_vm(desc); 748 uint32_t vl = env->vl; 749 uint32_t i; 750 751 for (i = 0; i < vl; i++) { 752 if (!vm && !vext_elem_mask(v0, i)) { 753 continue; 754 } 755 fn(vd, s1, vs2, i); 756 } 757 } 758 759 /* generate the helpers for OPIVX */ 760 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 761 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 762 void *vs2, CPURISCVState *env, \ 763 uint32_t desc) \ 764 { \ 765 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 766 do_##NAME); \ 767 } 768 769 GEN_VEXT_VX(vadd_vx_b, 1, 1) 770 GEN_VEXT_VX(vadd_vx_h, 2, 2) 771 GEN_VEXT_VX(vadd_vx_w, 4, 4) 772 GEN_VEXT_VX(vadd_vx_d, 8, 8) 773 GEN_VEXT_VX(vsub_vx_b, 1, 1) 774 GEN_VEXT_VX(vsub_vx_h, 2, 2) 775 GEN_VEXT_VX(vsub_vx_w, 4, 4) 776 GEN_VEXT_VX(vsub_vx_d, 8, 8) 777 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 778 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 779 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 780 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 781 782 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 783 { 784 intptr_t oprsz = simd_oprsz(desc); 785 intptr_t i; 786 787 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 788 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 789 } 790 } 791 792 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 793 { 794 intptr_t oprsz = simd_oprsz(desc); 795 intptr_t i; 796 797 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 798 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 799 } 800 } 801 802 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 803 { 804 intptr_t oprsz = simd_oprsz(desc); 805 intptr_t i; 806 807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 808 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 809 } 810 } 811 812 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 813 { 814 intptr_t oprsz = simd_oprsz(desc); 815 intptr_t i; 816 817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 819 } 820 } 821 822 /* Vector Widening Integer Add/Subtract */ 823 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 824 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 825 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 826 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 827 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 828 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 829 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 830 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 831 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 832 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 833 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 834 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 835 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 836 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 837 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 838 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 839 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 840 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 841 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 842 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 843 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 844 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 845 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 846 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 847 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 848 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 849 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 850 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 851 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 852 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 853 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 854 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 855 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 856 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 857 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 858 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 859 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 860 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 861 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 862 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 863 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 864 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 865 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 866 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 867 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 868 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 869 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 870 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 871 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 872 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 873 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 874 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 875 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 876 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 877 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 878 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 879 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 880 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 881 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 882 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 883 884 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 885 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 886 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 887 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 888 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 889 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 890 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 891 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 892 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 893 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 894 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 895 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 896 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 897 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 898 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 899 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 900 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 901 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 902 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 903 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 904 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 905 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 906 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 907 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 908 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 909 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 910 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 911 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 912 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 913 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 914 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 915 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 916 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 917 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 918 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 919 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 920 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 921 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 922 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 923 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 924 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 925 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 926 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 927 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 928 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 929 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 930 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 931 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 932 933 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 934 #define DO_VADC(N, M, C) (N + M + C) 935 #define DO_VSBC(N, M, C) (N - M - C) 936 937 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 938 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 939 CPURISCVState *env, uint32_t desc) \ 940 { \ 941 uint32_t vl = env->vl; \ 942 uint32_t i; \ 943 \ 944 for (i = 0; i < vl; i++) { \ 945 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 946 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 947 uint8_t carry = vext_elem_mask(v0, i); \ 948 \ 949 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 950 } \ 951 } 952 953 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 954 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 955 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 956 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 957 958 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 959 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 960 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 961 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 962 963 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 964 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 965 CPURISCVState *env, uint32_t desc) \ 966 { \ 967 uint32_t vl = env->vl; \ 968 uint32_t i; \ 969 \ 970 for (i = 0; i < vl; i++) { \ 971 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 972 uint8_t carry = vext_elem_mask(v0, i); \ 973 \ 974 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 975 } \ 976 } 977 978 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 979 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 980 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 981 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 982 983 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 984 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 985 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 986 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 987 988 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 989 (__typeof(N))(N + M) < N) 990 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 991 992 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 993 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 994 CPURISCVState *env, uint32_t desc) \ 995 { \ 996 uint32_t vl = env->vl; \ 997 uint32_t vlmax = vext_max_elems(desc, \ 998 ctzl(sizeof(ETYPE))); \ 999 uint32_t i; \ 1000 \ 1001 for (i = 0; i < vl; i++) { \ 1002 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1003 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1004 uint8_t carry = vext_elem_mask(v0, i); \ 1005 \ 1006 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1007 } \ 1008 for (; i < vlmax; i++) { \ 1009 vext_set_elem_mask(vd, i, 0); \ 1010 } \ 1011 } 1012 1013 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1014 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1015 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1016 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1017 1018 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1019 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1020 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1021 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1022 1023 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1025 void *vs2, CPURISCVState *env, uint32_t desc) \ 1026 { \ 1027 uint32_t vl = env->vl; \ 1028 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 1029 uint32_t i; \ 1030 \ 1031 for (i = 0; i < vl; i++) { \ 1032 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1033 uint8_t carry = vext_elem_mask(v0, i); \ 1034 \ 1035 vext_set_elem_mask(vd, i, \ 1036 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1037 } \ 1038 for (; i < vlmax; i++) { \ 1039 vext_set_elem_mask(vd, i, 0); \ 1040 } \ 1041 } 1042 1043 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1044 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1045 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1046 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1047 1048 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1049 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1050 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1051 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1052 1053 /* Vector Bitwise Logical Instructions */ 1054 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1055 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1056 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1057 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1058 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1059 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1060 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1061 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1062 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1063 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1064 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1065 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1066 GEN_VEXT_VV(vand_vv_b, 1, 1) 1067 GEN_VEXT_VV(vand_vv_h, 2, 2) 1068 GEN_VEXT_VV(vand_vv_w, 4, 4) 1069 GEN_VEXT_VV(vand_vv_d, 8, 8) 1070 GEN_VEXT_VV(vor_vv_b, 1, 1) 1071 GEN_VEXT_VV(vor_vv_h, 2, 2) 1072 GEN_VEXT_VV(vor_vv_w, 4, 4) 1073 GEN_VEXT_VV(vor_vv_d, 8, 8) 1074 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1075 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1076 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1077 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1078 1079 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1080 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1081 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1082 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1083 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1084 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1085 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1086 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1087 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1088 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1089 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1090 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1091 GEN_VEXT_VX(vand_vx_b, 1, 1) 1092 GEN_VEXT_VX(vand_vx_h, 2, 2) 1093 GEN_VEXT_VX(vand_vx_w, 4, 4) 1094 GEN_VEXT_VX(vand_vx_d, 8, 8) 1095 GEN_VEXT_VX(vor_vx_b, 1, 1) 1096 GEN_VEXT_VX(vor_vx_h, 2, 2) 1097 GEN_VEXT_VX(vor_vx_w, 4, 4) 1098 GEN_VEXT_VX(vor_vx_d, 8, 8) 1099 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1100 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1101 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1102 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1103 1104 /* Vector Single-Width Bit Shift Instructions */ 1105 #define DO_SLL(N, M) (N << (M)) 1106 #define DO_SRL(N, M) (N >> (M)) 1107 1108 /* generate the helpers for shift instructions with two vector operators */ 1109 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1110 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1111 void *vs2, CPURISCVState *env, uint32_t desc) \ 1112 { \ 1113 uint32_t vm = vext_vm(desc); \ 1114 uint32_t vl = env->vl; \ 1115 uint32_t i; \ 1116 \ 1117 for (i = 0; i < vl; i++) { \ 1118 if (!vm && !vext_elem_mask(v0, i)) { \ 1119 continue; \ 1120 } \ 1121 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1122 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1123 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1124 } \ 1125 } 1126 1127 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1128 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1129 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1130 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1131 1132 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1133 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1134 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1135 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1136 1137 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1138 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1139 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1140 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1141 1142 /* generate the helpers for shift instructions with one vector and one scalar */ 1143 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1144 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1145 void *vs2, CPURISCVState *env, uint32_t desc) \ 1146 { \ 1147 uint32_t vm = vext_vm(desc); \ 1148 uint32_t vl = env->vl; \ 1149 uint32_t i; \ 1150 \ 1151 for (i = 0; i < vl; i++) { \ 1152 if (!vm && !vext_elem_mask(v0, i)) { \ 1153 continue; \ 1154 } \ 1155 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1156 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1157 } \ 1158 } 1159 1160 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1161 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1162 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1163 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1164 1165 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1166 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1167 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1168 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1169 1170 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1171 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1172 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1173 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1174 1175 /* Vector Narrowing Integer Right Shift Instructions */ 1176 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1177 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1178 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1179 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1180 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1181 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1182 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1183 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1184 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1185 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1186 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1187 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1188 1189 /* Vector Integer Comparison Instructions */ 1190 #define DO_MSEQ(N, M) (N == M) 1191 #define DO_MSNE(N, M) (N != M) 1192 #define DO_MSLT(N, M) (N < M) 1193 #define DO_MSLE(N, M) (N <= M) 1194 #define DO_MSGT(N, M) (N > M) 1195 1196 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1197 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1198 CPURISCVState *env, uint32_t desc) \ 1199 { \ 1200 uint32_t vm = vext_vm(desc); \ 1201 uint32_t vl = env->vl; \ 1202 uint32_t vlmax = vext_max_elems(desc, \ 1203 ctzl(sizeof(ETYPE))); \ 1204 uint32_t i; \ 1205 \ 1206 for (i = 0; i < vl; i++) { \ 1207 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1208 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1209 if (!vm && !vext_elem_mask(v0, i)) { \ 1210 continue; \ 1211 } \ 1212 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1213 } \ 1214 for (; i < vlmax; i++) { \ 1215 vext_set_elem_mask(vd, i, 0); \ 1216 } \ 1217 } 1218 1219 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1220 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1221 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1222 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1223 1224 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1225 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1226 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1227 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1228 1229 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1230 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1231 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1232 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1233 1234 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1235 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1236 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1237 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1238 1239 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1240 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1241 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1242 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1243 1244 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1245 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1246 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1247 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1248 1249 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1250 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1251 CPURISCVState *env, uint32_t desc) \ 1252 { \ 1253 uint32_t vm = vext_vm(desc); \ 1254 uint32_t vl = env->vl; \ 1255 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 1256 uint32_t i; \ 1257 \ 1258 for (i = 0; i < vl; i++) { \ 1259 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1260 if (!vm && !vext_elem_mask(v0, i)) { \ 1261 continue; \ 1262 } \ 1263 vext_set_elem_mask(vd, i, \ 1264 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1265 } \ 1266 for (; i < vlmax; i++) { \ 1267 vext_set_elem_mask(vd, i, 0); \ 1268 } \ 1269 } 1270 1271 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1272 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1273 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1274 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1275 1276 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1277 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1278 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1279 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1280 1281 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1282 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1283 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1284 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1285 1286 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1287 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1288 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1289 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1290 1291 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1292 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1293 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1294 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1295 1296 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1297 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1298 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1299 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1300 1301 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1302 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1303 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1304 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1305 1306 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1307 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1308 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1309 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1310 1311 /* Vector Integer Min/Max Instructions */ 1312 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1313 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1314 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1315 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1316 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1317 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1318 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1319 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1320 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1321 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1322 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1323 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1324 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1325 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1326 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1327 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1328 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1329 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1330 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1331 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1332 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1333 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1334 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1335 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1336 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1337 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1338 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1339 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1340 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1341 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1342 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1343 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1344 1345 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1346 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1347 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1348 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1349 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1350 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1351 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1352 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1353 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1354 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1355 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1356 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1357 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1358 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1359 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1360 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1361 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1362 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1363 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1364 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1365 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1366 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1367 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1368 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1369 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1370 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1371 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1372 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1373 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1374 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1375 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1376 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1377 1378 /* Vector Single-Width Integer Multiply Instructions */ 1379 #define DO_MUL(N, M) (N * M) 1380 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1381 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1382 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1383 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1384 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1385 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1386 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1387 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1388 1389 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1390 { 1391 return (int16_t)s2 * (int16_t)s1 >> 8; 1392 } 1393 1394 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1395 { 1396 return (int32_t)s2 * (int32_t)s1 >> 16; 1397 } 1398 1399 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1400 { 1401 return (int64_t)s2 * (int64_t)s1 >> 32; 1402 } 1403 1404 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1405 { 1406 uint64_t hi_64, lo_64; 1407 1408 muls64(&lo_64, &hi_64, s1, s2); 1409 return hi_64; 1410 } 1411 1412 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1413 { 1414 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1415 } 1416 1417 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1418 { 1419 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1420 } 1421 1422 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1423 { 1424 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1425 } 1426 1427 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1428 { 1429 uint64_t hi_64, lo_64; 1430 1431 mulu64(&lo_64, &hi_64, s2, s1); 1432 return hi_64; 1433 } 1434 1435 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1436 { 1437 return (int16_t)s2 * (uint16_t)s1 >> 8; 1438 } 1439 1440 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1441 { 1442 return (int32_t)s2 * (uint32_t)s1 >> 16; 1443 } 1444 1445 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1446 { 1447 return (int64_t)s2 * (uint64_t)s1 >> 32; 1448 } 1449 1450 /* 1451 * Let A = signed operand, 1452 * B = unsigned operand 1453 * P = mulu64(A, B), unsigned product 1454 * 1455 * LET X = 2 ** 64 - A, 2's complement of A 1456 * SP = signed product 1457 * THEN 1458 * IF A < 0 1459 * SP = -X * B 1460 * = -(2 ** 64 - A) * B 1461 * = A * B - 2 ** 64 * B 1462 * = P - 2 ** 64 * B 1463 * ELSE 1464 * SP = P 1465 * THEN 1466 * HI_P -= (A < 0 ? B : 0) 1467 */ 1468 1469 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1470 { 1471 uint64_t hi_64, lo_64; 1472 1473 mulu64(&lo_64, &hi_64, s2, s1); 1474 1475 hi_64 -= s2 < 0 ? s1 : 0; 1476 return hi_64; 1477 } 1478 1479 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1480 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1481 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1482 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1483 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1484 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1485 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1486 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1487 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1488 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1489 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1490 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1491 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1492 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1493 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1494 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1495 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1496 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1497 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1498 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1499 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1500 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1501 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1502 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1503 1504 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1505 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1506 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1507 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1508 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1509 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1510 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1511 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1512 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1513 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1514 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1515 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1516 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1517 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1518 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1519 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1520 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1521 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1522 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1523 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1524 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1525 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1526 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1527 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1528 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1529 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1530 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1531 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1532 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1533 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1534 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1535 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1536 1537 /* Vector Integer Divide Instructions */ 1538 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1539 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1540 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1541 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1542 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1543 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1544 1545 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1546 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1547 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1548 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1549 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1550 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1551 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1552 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1553 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1554 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1555 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1556 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1557 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1558 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1559 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1560 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1561 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1562 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1563 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1564 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1565 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1566 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1567 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1568 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1569 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1570 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1571 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1572 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1573 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1574 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1575 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1576 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1577 1578 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1579 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1580 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1581 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1582 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1583 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1584 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1585 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1586 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1587 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1588 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1589 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1590 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1591 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1592 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1593 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1594 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1595 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1596 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1597 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1598 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1599 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1600 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1601 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1602 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1603 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1604 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1605 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1606 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1607 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1608 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1609 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1610 1611 /* Vector Widening Integer Multiply Instructions */ 1612 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1613 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1614 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1615 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1616 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1617 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1618 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1619 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1620 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1621 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1622 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1623 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1624 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1625 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1626 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1627 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1628 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1629 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1630 1631 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1632 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1633 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1634 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1635 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1636 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1637 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1638 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1639 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1640 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1641 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1642 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1643 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1644 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1645 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1646 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1647 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1648 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1649 1650 /* Vector Single-Width Integer Multiply-Add Instructions */ 1651 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1652 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1653 { \ 1654 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1655 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1656 TD d = *((TD *)vd + HD(i)); \ 1657 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1658 } 1659 1660 #define DO_MACC(N, M, D) (M * N + D) 1661 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1662 #define DO_MADD(N, M, D) (M * D + N) 1663 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1664 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1665 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1666 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1667 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1668 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1669 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1670 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1671 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1672 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1673 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1674 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1675 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1676 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1677 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1678 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1679 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1680 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1681 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1682 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1683 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1684 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1685 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1686 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1687 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1688 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1689 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1690 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1691 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1692 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1693 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1694 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1695 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1696 1697 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1698 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1699 { \ 1700 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1701 TD d = *((TD *)vd + HD(i)); \ 1702 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1703 } 1704 1705 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1706 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1707 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1708 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1709 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1710 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1711 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1712 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1713 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1714 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1715 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1716 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1717 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1718 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1719 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1720 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1721 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1722 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1723 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1724 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1725 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1726 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1727 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1728 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1729 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1730 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1731 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1732 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1733 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1734 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1735 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1736 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1737 1738 /* Vector Widening Integer Multiply-Add Instructions */ 1739 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1740 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1741 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1742 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1743 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1744 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1745 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1746 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1747 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1748 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1749 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1750 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1751 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1752 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1753 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1754 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1755 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1756 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1757 1758 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1759 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1760 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1761 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1762 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1763 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1764 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1765 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1766 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1767 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1768 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1769 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1770 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1771 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1772 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1773 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1774 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1775 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1776 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1777 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1778 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1779 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1780 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1781 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1782 1783 /* Vector Integer Merge and Move Instructions */ 1784 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1785 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1786 uint32_t desc) \ 1787 { \ 1788 uint32_t vl = env->vl; \ 1789 uint32_t i; \ 1790 \ 1791 for (i = 0; i < vl; i++) { \ 1792 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1793 *((ETYPE *)vd + H(i)) = s1; \ 1794 } \ 1795 } 1796 1797 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1798 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1799 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1800 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1801 1802 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1803 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1804 uint32_t desc) \ 1805 { \ 1806 uint32_t vl = env->vl; \ 1807 uint32_t i; \ 1808 \ 1809 for (i = 0; i < vl; i++) { \ 1810 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1811 } \ 1812 } 1813 1814 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1815 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1816 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1817 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1818 1819 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1820 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1821 CPURISCVState *env, uint32_t desc) \ 1822 { \ 1823 uint32_t vl = env->vl; \ 1824 uint32_t i; \ 1825 \ 1826 for (i = 0; i < vl; i++) { \ 1827 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1828 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1829 } \ 1830 } 1831 1832 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1833 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1834 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1835 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1836 1837 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1838 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1839 void *vs2, CPURISCVState *env, uint32_t desc) \ 1840 { \ 1841 uint32_t vl = env->vl; \ 1842 uint32_t i; \ 1843 \ 1844 for (i = 0; i < vl; i++) { \ 1845 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1846 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1847 (ETYPE)(target_long)s1); \ 1848 *((ETYPE *)vd + H(i)) = d; \ 1849 } \ 1850 } 1851 1852 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1853 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1854 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1855 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1856 1857 /* 1858 *** Vector Fixed-Point Arithmetic Instructions 1859 */ 1860 1861 /* Vector Single-Width Saturating Add and Subtract */ 1862 1863 /* 1864 * As fixed point instructions probably have round mode and saturation, 1865 * define common macros for fixed point here. 1866 */ 1867 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1868 CPURISCVState *env, int vxrm); 1869 1870 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1871 static inline void \ 1872 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1873 CPURISCVState *env, int vxrm) \ 1874 { \ 1875 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1876 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1877 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1878 } 1879 1880 static inline void 1881 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1882 CPURISCVState *env, 1883 uint32_t vl, uint32_t vm, int vxrm, 1884 opivv2_rm_fn *fn) 1885 { 1886 for (uint32_t i = 0; i < vl; i++) { 1887 if (!vm && !vext_elem_mask(v0, i)) { 1888 continue; 1889 } 1890 fn(vd, vs1, vs2, i, env, vxrm); 1891 } 1892 } 1893 1894 static inline void 1895 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1896 CPURISCVState *env, 1897 uint32_t desc, uint32_t esz, uint32_t dsz, 1898 opivv2_rm_fn *fn) 1899 { 1900 uint32_t vm = vext_vm(desc); 1901 uint32_t vl = env->vl; 1902 1903 switch (env->vxrm) { 1904 case 0: /* rnu */ 1905 vext_vv_rm_1(vd, v0, vs1, vs2, 1906 env, vl, vm, 0, fn); 1907 break; 1908 case 1: /* rne */ 1909 vext_vv_rm_1(vd, v0, vs1, vs2, 1910 env, vl, vm, 1, fn); 1911 break; 1912 case 2: /* rdn */ 1913 vext_vv_rm_1(vd, v0, vs1, vs2, 1914 env, vl, vm, 2, fn); 1915 break; 1916 default: /* rod */ 1917 vext_vv_rm_1(vd, v0, vs1, vs2, 1918 env, vl, vm, 3, fn); 1919 break; 1920 } 1921 } 1922 1923 /* generate helpers for fixed point instructions with OPIVV format */ 1924 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1925 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1926 CPURISCVState *env, uint32_t desc) \ 1927 { \ 1928 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1929 do_##NAME); \ 1930 } 1931 1932 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1933 { 1934 uint8_t res = a + b; 1935 if (res < a) { 1936 res = UINT8_MAX; 1937 env->vxsat = 0x1; 1938 } 1939 return res; 1940 } 1941 1942 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1943 uint16_t b) 1944 { 1945 uint16_t res = a + b; 1946 if (res < a) { 1947 res = UINT16_MAX; 1948 env->vxsat = 0x1; 1949 } 1950 return res; 1951 } 1952 1953 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1954 uint32_t b) 1955 { 1956 uint32_t res = a + b; 1957 if (res < a) { 1958 res = UINT32_MAX; 1959 env->vxsat = 0x1; 1960 } 1961 return res; 1962 } 1963 1964 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1965 uint64_t b) 1966 { 1967 uint64_t res = a + b; 1968 if (res < a) { 1969 res = UINT64_MAX; 1970 env->vxsat = 0x1; 1971 } 1972 return res; 1973 } 1974 1975 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1976 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1977 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1978 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1979 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1980 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1981 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1982 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1983 1984 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1985 CPURISCVState *env, int vxrm); 1986 1987 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1988 static inline void \ 1989 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1990 CPURISCVState *env, int vxrm) \ 1991 { \ 1992 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1993 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1994 } 1995 1996 static inline void 1997 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1998 CPURISCVState *env, 1999 uint32_t vl, uint32_t vm, int vxrm, 2000 opivx2_rm_fn *fn) 2001 { 2002 for (uint32_t i = 0; i < vl; i++) { 2003 if (!vm && !vext_elem_mask(v0, i)) { 2004 continue; 2005 } 2006 fn(vd, s1, vs2, i, env, vxrm); 2007 } 2008 } 2009 2010 static inline void 2011 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2012 CPURISCVState *env, 2013 uint32_t desc, uint32_t esz, uint32_t dsz, 2014 opivx2_rm_fn *fn) 2015 { 2016 uint32_t vm = vext_vm(desc); 2017 uint32_t vl = env->vl; 2018 2019 switch (env->vxrm) { 2020 case 0: /* rnu */ 2021 vext_vx_rm_1(vd, v0, s1, vs2, 2022 env, vl, vm, 0, fn); 2023 break; 2024 case 1: /* rne */ 2025 vext_vx_rm_1(vd, v0, s1, vs2, 2026 env, vl, vm, 1, fn); 2027 break; 2028 case 2: /* rdn */ 2029 vext_vx_rm_1(vd, v0, s1, vs2, 2030 env, vl, vm, 2, fn); 2031 break; 2032 default: /* rod */ 2033 vext_vx_rm_1(vd, v0, s1, vs2, 2034 env, vl, vm, 3, fn); 2035 break; 2036 } 2037 } 2038 2039 /* generate helpers for fixed point instructions with OPIVX format */ 2040 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2041 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2042 void *vs2, CPURISCVState *env, uint32_t desc) \ 2043 { \ 2044 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2045 do_##NAME); \ 2046 } 2047 2048 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2049 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2050 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2051 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2052 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2053 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2054 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2055 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2056 2057 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2058 { 2059 int8_t res = a + b; 2060 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2061 res = a > 0 ? INT8_MAX : INT8_MIN; 2062 env->vxsat = 0x1; 2063 } 2064 return res; 2065 } 2066 2067 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2068 { 2069 int16_t res = a + b; 2070 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2071 res = a > 0 ? INT16_MAX : INT16_MIN; 2072 env->vxsat = 0x1; 2073 } 2074 return res; 2075 } 2076 2077 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2078 { 2079 int32_t res = a + b; 2080 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2081 res = a > 0 ? INT32_MAX : INT32_MIN; 2082 env->vxsat = 0x1; 2083 } 2084 return res; 2085 } 2086 2087 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2088 { 2089 int64_t res = a + b; 2090 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2091 res = a > 0 ? INT64_MAX : INT64_MIN; 2092 env->vxsat = 0x1; 2093 } 2094 return res; 2095 } 2096 2097 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2098 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2099 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2100 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2101 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2102 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2103 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2104 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2105 2106 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2107 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2108 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2109 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2110 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2111 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2112 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2113 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2114 2115 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2116 { 2117 uint8_t res = a - b; 2118 if (res > a) { 2119 res = 0; 2120 env->vxsat = 0x1; 2121 } 2122 return res; 2123 } 2124 2125 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2126 uint16_t b) 2127 { 2128 uint16_t res = a - b; 2129 if (res > a) { 2130 res = 0; 2131 env->vxsat = 0x1; 2132 } 2133 return res; 2134 } 2135 2136 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2137 uint32_t b) 2138 { 2139 uint32_t res = a - b; 2140 if (res > a) { 2141 res = 0; 2142 env->vxsat = 0x1; 2143 } 2144 return res; 2145 } 2146 2147 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2148 uint64_t b) 2149 { 2150 uint64_t res = a - b; 2151 if (res > a) { 2152 res = 0; 2153 env->vxsat = 0x1; 2154 } 2155 return res; 2156 } 2157 2158 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2159 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2160 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2161 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2162 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2163 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2164 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2165 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2166 2167 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2168 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2169 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2170 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2171 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2172 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2173 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2174 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2175 2176 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2177 { 2178 int8_t res = a - b; 2179 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2180 res = a >= 0 ? INT8_MAX : INT8_MIN; 2181 env->vxsat = 0x1; 2182 } 2183 return res; 2184 } 2185 2186 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2187 { 2188 int16_t res = a - b; 2189 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2190 res = a >= 0 ? INT16_MAX : INT16_MIN; 2191 env->vxsat = 0x1; 2192 } 2193 return res; 2194 } 2195 2196 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2197 { 2198 int32_t res = a - b; 2199 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2200 res = a >= 0 ? INT32_MAX : INT32_MIN; 2201 env->vxsat = 0x1; 2202 } 2203 return res; 2204 } 2205 2206 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2207 { 2208 int64_t res = a - b; 2209 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2210 res = a >= 0 ? INT64_MAX : INT64_MIN; 2211 env->vxsat = 0x1; 2212 } 2213 return res; 2214 } 2215 2216 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2217 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2218 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2219 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2220 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2221 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2222 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2223 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2224 2225 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2226 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2227 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2228 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2229 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2230 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2231 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2232 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2233 2234 /* Vector Single-Width Averaging Add and Subtract */ 2235 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2236 { 2237 uint8_t d = extract64(v, shift, 1); 2238 uint8_t d1; 2239 uint64_t D1, D2; 2240 2241 if (shift == 0 || shift > 64) { 2242 return 0; 2243 } 2244 2245 d1 = extract64(v, shift - 1, 1); 2246 D1 = extract64(v, 0, shift); 2247 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2248 return d1; 2249 } else if (vxrm == 1) { /* round-to-nearest-even */ 2250 if (shift > 1) { 2251 D2 = extract64(v, 0, shift - 1); 2252 return d1 & ((D2 != 0) | d); 2253 } else { 2254 return d1 & d; 2255 } 2256 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2257 return !d & (D1 != 0); 2258 } 2259 return 0; /* round-down (truncate) */ 2260 } 2261 2262 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2263 { 2264 int64_t res = (int64_t)a + b; 2265 uint8_t round = get_round(vxrm, res, 1); 2266 2267 return (res >> 1) + round; 2268 } 2269 2270 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2271 { 2272 int64_t res = a + b; 2273 uint8_t round = get_round(vxrm, res, 1); 2274 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2275 2276 /* With signed overflow, bit 64 is inverse of bit 63. */ 2277 return ((res >> 1) ^ over) + round; 2278 } 2279 2280 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2281 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2282 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2283 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2284 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2285 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2286 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2287 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2288 2289 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2290 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2291 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2292 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2293 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2294 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2295 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2296 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2297 2298 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2299 { 2300 int64_t res = (int64_t)a - b; 2301 uint8_t round = get_round(vxrm, res, 1); 2302 2303 return (res >> 1) + round; 2304 } 2305 2306 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2307 { 2308 int64_t res = (int64_t)a - b; 2309 uint8_t round = get_round(vxrm, res, 1); 2310 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2311 2312 /* With signed overflow, bit 64 is inverse of bit 63. */ 2313 return ((res >> 1) ^ over) + round; 2314 } 2315 2316 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2317 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2318 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2319 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2320 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2321 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2322 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2323 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2324 2325 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2326 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2327 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2328 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2329 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2330 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2331 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2332 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2333 2334 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2335 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2336 { 2337 uint8_t round; 2338 int16_t res; 2339 2340 res = (int16_t)a * (int16_t)b; 2341 round = get_round(vxrm, res, 7); 2342 res = (res >> 7) + round; 2343 2344 if (res > INT8_MAX) { 2345 env->vxsat = 0x1; 2346 return INT8_MAX; 2347 } else if (res < INT8_MIN) { 2348 env->vxsat = 0x1; 2349 return INT8_MIN; 2350 } else { 2351 return res; 2352 } 2353 } 2354 2355 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2356 { 2357 uint8_t round; 2358 int32_t res; 2359 2360 res = (int32_t)a * (int32_t)b; 2361 round = get_round(vxrm, res, 15); 2362 res = (res >> 15) + round; 2363 2364 if (res > INT16_MAX) { 2365 env->vxsat = 0x1; 2366 return INT16_MAX; 2367 } else if (res < INT16_MIN) { 2368 env->vxsat = 0x1; 2369 return INT16_MIN; 2370 } else { 2371 return res; 2372 } 2373 } 2374 2375 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2376 { 2377 uint8_t round; 2378 int64_t res; 2379 2380 res = (int64_t)a * (int64_t)b; 2381 round = get_round(vxrm, res, 31); 2382 res = (res >> 31) + round; 2383 2384 if (res > INT32_MAX) { 2385 env->vxsat = 0x1; 2386 return INT32_MAX; 2387 } else if (res < INT32_MIN) { 2388 env->vxsat = 0x1; 2389 return INT32_MIN; 2390 } else { 2391 return res; 2392 } 2393 } 2394 2395 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2396 { 2397 uint8_t round; 2398 uint64_t hi_64, lo_64; 2399 int64_t res; 2400 2401 if (a == INT64_MIN && b == INT64_MIN) { 2402 env->vxsat = 1; 2403 return INT64_MAX; 2404 } 2405 2406 muls64(&lo_64, &hi_64, a, b); 2407 round = get_round(vxrm, lo_64, 63); 2408 /* 2409 * Cannot overflow, as there are always 2410 * 2 sign bits after multiply. 2411 */ 2412 res = (hi_64 << 1) | (lo_64 >> 63); 2413 if (round) { 2414 if (res == INT64_MAX) { 2415 env->vxsat = 1; 2416 } else { 2417 res += 1; 2418 } 2419 } 2420 return res; 2421 } 2422 2423 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2424 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2425 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2426 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2427 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2428 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2429 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2430 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2431 2432 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2433 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2434 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2435 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2436 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2437 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2438 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2439 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2440 2441 /* Vector Widening Saturating Scaled Multiply-Add */ 2442 static inline uint16_t 2443 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2444 uint16_t c) 2445 { 2446 uint8_t round; 2447 uint16_t res = (uint16_t)a * b; 2448 2449 round = get_round(vxrm, res, 4); 2450 res = (res >> 4) + round; 2451 return saddu16(env, vxrm, c, res); 2452 } 2453 2454 static inline uint32_t 2455 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2456 uint32_t c) 2457 { 2458 uint8_t round; 2459 uint32_t res = (uint32_t)a * b; 2460 2461 round = get_round(vxrm, res, 8); 2462 res = (res >> 8) + round; 2463 return saddu32(env, vxrm, c, res); 2464 } 2465 2466 static inline uint64_t 2467 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2468 uint64_t c) 2469 { 2470 uint8_t round; 2471 uint64_t res = (uint64_t)a * b; 2472 2473 round = get_round(vxrm, res, 16); 2474 res = (res >> 16) + round; 2475 return saddu64(env, vxrm, c, res); 2476 } 2477 2478 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2479 static inline void \ 2480 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2481 CPURISCVState *env, int vxrm) \ 2482 { \ 2483 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2484 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2485 TD d = *((TD *)vd + HD(i)); \ 2486 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2487 } 2488 2489 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2490 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2491 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2492 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2493 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2494 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2495 2496 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2497 static inline void \ 2498 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2499 CPURISCVState *env, int vxrm) \ 2500 { \ 2501 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2502 TD d = *((TD *)vd + HD(i)); \ 2503 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2504 } 2505 2506 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2507 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2508 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2509 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2510 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2511 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2512 2513 static inline int16_t 2514 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2515 { 2516 uint8_t round; 2517 int16_t res = (int16_t)a * b; 2518 2519 round = get_round(vxrm, res, 4); 2520 res = (res >> 4) + round; 2521 return sadd16(env, vxrm, c, res); 2522 } 2523 2524 static inline int32_t 2525 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2526 { 2527 uint8_t round; 2528 int32_t res = (int32_t)a * b; 2529 2530 round = get_round(vxrm, res, 8); 2531 res = (res >> 8) + round; 2532 return sadd32(env, vxrm, c, res); 2533 2534 } 2535 2536 static inline int64_t 2537 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2538 { 2539 uint8_t round; 2540 int64_t res = (int64_t)a * b; 2541 2542 round = get_round(vxrm, res, 16); 2543 res = (res >> 16) + round; 2544 return sadd64(env, vxrm, c, res); 2545 } 2546 2547 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2548 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2549 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2550 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2551 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2552 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2553 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2554 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2555 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2556 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2557 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2558 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2559 2560 static inline int16_t 2561 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2562 { 2563 uint8_t round; 2564 int16_t res = a * (int16_t)b; 2565 2566 round = get_round(vxrm, res, 4); 2567 res = (res >> 4) + round; 2568 return ssub16(env, vxrm, c, res); 2569 } 2570 2571 static inline int32_t 2572 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2573 { 2574 uint8_t round; 2575 int32_t res = a * (int32_t)b; 2576 2577 round = get_round(vxrm, res, 8); 2578 res = (res >> 8) + round; 2579 return ssub32(env, vxrm, c, res); 2580 } 2581 2582 static inline int64_t 2583 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2584 { 2585 uint8_t round; 2586 int64_t res = a * (int64_t)b; 2587 2588 round = get_round(vxrm, res, 16); 2589 res = (res >> 16) + round; 2590 return ssub64(env, vxrm, c, res); 2591 } 2592 2593 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2594 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2595 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2596 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2597 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2598 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2599 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2600 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2601 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2602 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2603 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2604 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2605 2606 static inline int16_t 2607 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2608 { 2609 uint8_t round; 2610 int16_t res = (int16_t)a * b; 2611 2612 round = get_round(vxrm, res, 4); 2613 res = (res >> 4) + round; 2614 return ssub16(env, vxrm, c, res); 2615 } 2616 2617 static inline int32_t 2618 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2619 { 2620 uint8_t round; 2621 int32_t res = (int32_t)a * b; 2622 2623 round = get_round(vxrm, res, 8); 2624 res = (res >> 8) + round; 2625 return ssub32(env, vxrm, c, res); 2626 } 2627 2628 static inline int64_t 2629 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2630 { 2631 uint8_t round; 2632 int64_t res = (int64_t)a * b; 2633 2634 round = get_round(vxrm, res, 16); 2635 res = (res >> 16) + round; 2636 return ssub64(env, vxrm, c, res); 2637 } 2638 2639 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2640 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2641 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2642 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2643 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2644 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2645 2646 /* Vector Single-Width Scaling Shift Instructions */ 2647 static inline uint8_t 2648 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2649 { 2650 uint8_t round, shift = b & 0x7; 2651 uint8_t res; 2652 2653 round = get_round(vxrm, a, shift); 2654 res = (a >> shift) + round; 2655 return res; 2656 } 2657 static inline uint16_t 2658 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2659 { 2660 uint8_t round, shift = b & 0xf; 2661 uint16_t res; 2662 2663 round = get_round(vxrm, a, shift); 2664 res = (a >> shift) + round; 2665 return res; 2666 } 2667 static inline uint32_t 2668 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2669 { 2670 uint8_t round, shift = b & 0x1f; 2671 uint32_t res; 2672 2673 round = get_round(vxrm, a, shift); 2674 res = (a >> shift) + round; 2675 return res; 2676 } 2677 static inline uint64_t 2678 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2679 { 2680 uint8_t round, shift = b & 0x3f; 2681 uint64_t res; 2682 2683 round = get_round(vxrm, a, shift); 2684 res = (a >> shift) + round; 2685 return res; 2686 } 2687 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2688 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2689 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2690 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2691 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2692 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2693 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2694 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2695 2696 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2697 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2698 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2699 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2700 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2701 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2702 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2703 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2704 2705 static inline int8_t 2706 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2707 { 2708 uint8_t round, shift = b & 0x7; 2709 int8_t res; 2710 2711 round = get_round(vxrm, a, shift); 2712 res = (a >> shift) + round; 2713 return res; 2714 } 2715 static inline int16_t 2716 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2717 { 2718 uint8_t round, shift = b & 0xf; 2719 int16_t res; 2720 2721 round = get_round(vxrm, a, shift); 2722 res = (a >> shift) + round; 2723 return res; 2724 } 2725 static inline int32_t 2726 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2727 { 2728 uint8_t round, shift = b & 0x1f; 2729 int32_t res; 2730 2731 round = get_round(vxrm, a, shift); 2732 res = (a >> shift) + round; 2733 return res; 2734 } 2735 static inline int64_t 2736 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2737 { 2738 uint8_t round, shift = b & 0x3f; 2739 int64_t res; 2740 2741 round = get_round(vxrm, a, shift); 2742 res = (a >> shift) + round; 2743 return res; 2744 } 2745 2746 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2747 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2748 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2749 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2750 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2751 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2752 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2753 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2754 2755 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2756 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2757 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2758 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2759 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2760 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2761 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2762 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2763 2764 /* Vector Narrowing Fixed-Point Clip Instructions */ 2765 static inline int8_t 2766 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2767 { 2768 uint8_t round, shift = b & 0xf; 2769 int16_t res; 2770 2771 round = get_round(vxrm, a, shift); 2772 res = (a >> shift) + round; 2773 if (res > INT8_MAX) { 2774 env->vxsat = 0x1; 2775 return INT8_MAX; 2776 } else if (res < INT8_MIN) { 2777 env->vxsat = 0x1; 2778 return INT8_MIN; 2779 } else { 2780 return res; 2781 } 2782 } 2783 2784 static inline int16_t 2785 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2786 { 2787 uint8_t round, shift = b & 0x1f; 2788 int32_t res; 2789 2790 round = get_round(vxrm, a, shift); 2791 res = (a >> shift) + round; 2792 if (res > INT16_MAX) { 2793 env->vxsat = 0x1; 2794 return INT16_MAX; 2795 } else if (res < INT16_MIN) { 2796 env->vxsat = 0x1; 2797 return INT16_MIN; 2798 } else { 2799 return res; 2800 } 2801 } 2802 2803 static inline int32_t 2804 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2805 { 2806 uint8_t round, shift = b & 0x3f; 2807 int64_t res; 2808 2809 round = get_round(vxrm, a, shift); 2810 res = (a >> shift) + round; 2811 if (res > INT32_MAX) { 2812 env->vxsat = 0x1; 2813 return INT32_MAX; 2814 } else if (res < INT32_MIN) { 2815 env->vxsat = 0x1; 2816 return INT32_MIN; 2817 } else { 2818 return res; 2819 } 2820 } 2821 2822 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2823 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2824 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2825 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2826 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2827 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2828 2829 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2830 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2831 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2832 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2833 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2834 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2835 2836 static inline uint8_t 2837 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2838 { 2839 uint8_t round, shift = b & 0xf; 2840 uint16_t res; 2841 2842 round = get_round(vxrm, a, shift); 2843 res = (a >> shift) + round; 2844 if (res > UINT8_MAX) { 2845 env->vxsat = 0x1; 2846 return UINT8_MAX; 2847 } else { 2848 return res; 2849 } 2850 } 2851 2852 static inline uint16_t 2853 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2854 { 2855 uint8_t round, shift = b & 0x1f; 2856 uint32_t res; 2857 2858 round = get_round(vxrm, a, shift); 2859 res = (a >> shift) + round; 2860 if (res > UINT16_MAX) { 2861 env->vxsat = 0x1; 2862 return UINT16_MAX; 2863 } else { 2864 return res; 2865 } 2866 } 2867 2868 static inline uint32_t 2869 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2870 { 2871 uint8_t round, shift = b & 0x3f; 2872 int64_t res; 2873 2874 round = get_round(vxrm, a, shift); 2875 res = (a >> shift) + round; 2876 if (res > UINT32_MAX) { 2877 env->vxsat = 0x1; 2878 return UINT32_MAX; 2879 } else { 2880 return res; 2881 } 2882 } 2883 2884 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2885 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2886 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2887 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 2888 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 2889 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 2890 2891 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 2892 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 2893 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 2894 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 2895 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 2896 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 2897 2898 /* 2899 *** Vector Float Point Arithmetic Instructions 2900 */ 2901 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2902 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2903 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2904 CPURISCVState *env) \ 2905 { \ 2906 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2907 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2908 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2909 } 2910 2911 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2912 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2913 void *vs2, CPURISCVState *env, \ 2914 uint32_t desc) \ 2915 { \ 2916 uint32_t vm = vext_vm(desc); \ 2917 uint32_t vl = env->vl; \ 2918 uint32_t i; \ 2919 \ 2920 for (i = 0; i < vl; i++) { \ 2921 if (!vm && !vext_elem_mask(v0, i)) { \ 2922 continue; \ 2923 } \ 2924 do_##NAME(vd, vs1, vs2, i, env); \ 2925 } \ 2926 } 2927 2928 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2929 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2930 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2931 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2932 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2933 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2934 2935 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2936 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2937 CPURISCVState *env) \ 2938 { \ 2939 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2940 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2941 } 2942 2943 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2944 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2945 void *vs2, CPURISCVState *env, \ 2946 uint32_t desc) \ 2947 { \ 2948 uint32_t vm = vext_vm(desc); \ 2949 uint32_t vl = env->vl; \ 2950 uint32_t i; \ 2951 \ 2952 for (i = 0; i < vl; i++) { \ 2953 if (!vm && !vext_elem_mask(v0, i)) { \ 2954 continue; \ 2955 } \ 2956 do_##NAME(vd, s1, vs2, i, env); \ 2957 } \ 2958 } 2959 2960 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2961 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2962 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2963 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2964 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2965 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2966 2967 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2968 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2969 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2970 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2971 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2972 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2973 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2974 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2975 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2976 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2977 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2978 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2979 2980 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2981 { 2982 return float16_sub(b, a, s); 2983 } 2984 2985 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2986 { 2987 return float32_sub(b, a, s); 2988 } 2989 2990 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2991 { 2992 return float64_sub(b, a, s); 2993 } 2994 2995 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2996 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2997 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2998 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2999 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 3000 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 3001 3002 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3003 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3004 { 3005 return float32_add(float16_to_float32(a, true, s), 3006 float16_to_float32(b, true, s), s); 3007 } 3008 3009 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3010 { 3011 return float64_add(float32_to_float64(a, s), 3012 float32_to_float64(b, s), s); 3013 3014 } 3015 3016 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3017 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3018 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 3019 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 3020 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3021 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3022 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 3023 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 3024 3025 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3026 { 3027 return float32_sub(float16_to_float32(a, true, s), 3028 float16_to_float32(b, true, s), s); 3029 } 3030 3031 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3032 { 3033 return float64_sub(float32_to_float64(a, s), 3034 float32_to_float64(b, s), s); 3035 3036 } 3037 3038 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3039 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3040 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3041 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3042 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3043 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3044 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3045 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3046 3047 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3048 { 3049 return float32_add(a, float16_to_float32(b, true, s), s); 3050 } 3051 3052 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3053 { 3054 return float64_add(a, float32_to_float64(b, s), s); 3055 } 3056 3057 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3058 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3059 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3060 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3061 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3062 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3063 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3064 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3065 3066 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3067 { 3068 return float32_sub(a, float16_to_float32(b, true, s), s); 3069 } 3070 3071 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3072 { 3073 return float64_sub(a, float32_to_float64(b, s), s); 3074 } 3075 3076 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3077 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3078 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3079 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3080 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3081 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3082 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3083 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3084 3085 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3086 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3087 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3088 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3089 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3090 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3091 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3092 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3093 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3094 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3095 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3096 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3097 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3098 3099 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3100 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3101 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3102 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3103 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3104 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3105 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3106 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3107 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3108 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3109 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3110 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3111 3112 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3113 { 3114 return float16_div(b, a, s); 3115 } 3116 3117 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3118 { 3119 return float32_div(b, a, s); 3120 } 3121 3122 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3123 { 3124 return float64_div(b, a, s); 3125 } 3126 3127 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3128 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3129 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3130 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3131 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3132 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3133 3134 /* Vector Widening Floating-Point Multiply */ 3135 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3136 { 3137 return float32_mul(float16_to_float32(a, true, s), 3138 float16_to_float32(b, true, s), s); 3139 } 3140 3141 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3142 { 3143 return float64_mul(float32_to_float64(a, s), 3144 float32_to_float64(b, s), s); 3145 3146 } 3147 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3148 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3149 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3150 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3151 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3152 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3153 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3154 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3155 3156 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3157 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3158 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3159 CPURISCVState *env) \ 3160 { \ 3161 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3162 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3163 TD d = *((TD *)vd + HD(i)); \ 3164 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3165 } 3166 3167 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3168 { 3169 return float16_muladd(a, b, d, 0, s); 3170 } 3171 3172 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3173 { 3174 return float32_muladd(a, b, d, 0, s); 3175 } 3176 3177 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3178 { 3179 return float64_muladd(a, b, d, 0, s); 3180 } 3181 3182 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3183 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3184 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3185 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3186 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3187 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3188 3189 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3190 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3191 CPURISCVState *env) \ 3192 { \ 3193 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3194 TD d = *((TD *)vd + HD(i)); \ 3195 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3196 } 3197 3198 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3199 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3200 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3201 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3202 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3203 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3204 3205 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3206 { 3207 return float16_muladd(a, b, d, 3208 float_muladd_negate_c | float_muladd_negate_product, s); 3209 } 3210 3211 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3212 { 3213 return float32_muladd(a, b, d, 3214 float_muladd_negate_c | float_muladd_negate_product, s); 3215 } 3216 3217 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3218 { 3219 return float64_muladd(a, b, d, 3220 float_muladd_negate_c | float_muladd_negate_product, s); 3221 } 3222 3223 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3224 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3225 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3226 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3227 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3228 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3229 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3230 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3231 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3232 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3233 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3234 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3235 3236 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3237 { 3238 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3239 } 3240 3241 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3242 { 3243 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3244 } 3245 3246 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3247 { 3248 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3249 } 3250 3251 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3252 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3253 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3254 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3255 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3256 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3257 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3258 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3259 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3260 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3261 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3262 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3263 3264 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3265 { 3266 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3267 } 3268 3269 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3270 { 3271 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3272 } 3273 3274 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3275 { 3276 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3277 } 3278 3279 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3280 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3281 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3282 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3283 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3284 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3285 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3286 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3287 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3288 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3289 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3290 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3291 3292 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3293 { 3294 return float16_muladd(d, b, a, 0, s); 3295 } 3296 3297 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3298 { 3299 return float32_muladd(d, b, a, 0, s); 3300 } 3301 3302 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3303 { 3304 return float64_muladd(d, b, a, 0, s); 3305 } 3306 3307 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3308 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3309 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3310 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3311 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3312 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3313 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3314 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3315 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3316 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3317 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3318 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3319 3320 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3321 { 3322 return float16_muladd(d, b, a, 3323 float_muladd_negate_c | float_muladd_negate_product, s); 3324 } 3325 3326 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3327 { 3328 return float32_muladd(d, b, a, 3329 float_muladd_negate_c | float_muladd_negate_product, s); 3330 } 3331 3332 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3333 { 3334 return float64_muladd(d, b, a, 3335 float_muladd_negate_c | float_muladd_negate_product, s); 3336 } 3337 3338 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3339 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3340 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3341 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3342 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3343 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3344 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3345 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3346 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3347 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3348 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3349 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3350 3351 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3352 { 3353 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3354 } 3355 3356 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3357 { 3358 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3359 } 3360 3361 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3362 { 3363 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3364 } 3365 3366 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3367 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3368 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3369 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3370 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3371 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3372 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3373 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3374 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3375 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3376 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3377 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3378 3379 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3380 { 3381 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3382 } 3383 3384 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3385 { 3386 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3387 } 3388 3389 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3390 { 3391 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3392 } 3393 3394 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3395 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3396 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3397 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3398 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3399 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3400 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3401 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3402 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3403 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3404 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3405 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3406 3407 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3408 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3409 { 3410 return float32_muladd(float16_to_float32(a, true, s), 3411 float16_to_float32(b, true, s), d, 0, s); 3412 } 3413 3414 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3415 { 3416 return float64_muladd(float32_to_float64(a, s), 3417 float32_to_float64(b, s), d, 0, s); 3418 } 3419 3420 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3421 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3422 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3423 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3424 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3425 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3426 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3427 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3428 3429 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3430 { 3431 return float32_muladd(float16_to_float32(a, true, s), 3432 float16_to_float32(b, true, s), d, 3433 float_muladd_negate_c | float_muladd_negate_product, s); 3434 } 3435 3436 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3437 { 3438 return float64_muladd(float32_to_float64(a, s), 3439 float32_to_float64(b, s), d, 3440 float_muladd_negate_c | float_muladd_negate_product, s); 3441 } 3442 3443 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3444 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3445 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3446 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3447 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3448 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3449 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3450 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3451 3452 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3453 { 3454 return float32_muladd(float16_to_float32(a, true, s), 3455 float16_to_float32(b, true, s), d, 3456 float_muladd_negate_c, s); 3457 } 3458 3459 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3460 { 3461 return float64_muladd(float32_to_float64(a, s), 3462 float32_to_float64(b, s), d, 3463 float_muladd_negate_c, s); 3464 } 3465 3466 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3467 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3468 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3469 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3470 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3471 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3472 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3473 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3474 3475 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3476 { 3477 return float32_muladd(float16_to_float32(a, true, s), 3478 float16_to_float32(b, true, s), d, 3479 float_muladd_negate_product, s); 3480 } 3481 3482 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3483 { 3484 return float64_muladd(float32_to_float64(a, s), 3485 float32_to_float64(b, s), d, 3486 float_muladd_negate_product, s); 3487 } 3488 3489 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3490 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3491 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3492 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3493 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3494 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3495 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3496 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3497 3498 /* Vector Floating-Point Square-Root Instruction */ 3499 /* (TD, T2, TX2) */ 3500 #define OP_UU_H uint16_t, uint16_t, uint16_t 3501 #define OP_UU_W uint32_t, uint32_t, uint32_t 3502 #define OP_UU_D uint64_t, uint64_t, uint64_t 3503 3504 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3505 static void do_##NAME(void *vd, void *vs2, int i, \ 3506 CPURISCVState *env) \ 3507 { \ 3508 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3509 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3510 } 3511 3512 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3513 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3514 CPURISCVState *env, uint32_t desc) \ 3515 { \ 3516 uint32_t vm = vext_vm(desc); \ 3517 uint32_t vl = env->vl; \ 3518 uint32_t i; \ 3519 \ 3520 if (vl == 0) { \ 3521 return; \ 3522 } \ 3523 for (i = 0; i < vl; i++) { \ 3524 if (!vm && !vext_elem_mask(v0, i)) { \ 3525 continue; \ 3526 } \ 3527 do_##NAME(vd, vs2, i, env); \ 3528 } \ 3529 } 3530 3531 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3532 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3533 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3534 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3535 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3536 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3537 3538 /* Vector Floating-Point MIN/MAX Instructions */ 3539 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3540 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3541 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3542 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3543 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3544 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3545 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3546 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3547 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3548 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3549 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3550 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3551 3552 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3553 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3554 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3555 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3556 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3557 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3558 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3559 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3560 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3561 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3562 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3563 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3564 3565 /* Vector Floating-Point Sign-Injection Instructions */ 3566 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3567 { 3568 return deposit64(b, 0, 15, a); 3569 } 3570 3571 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3572 { 3573 return deposit64(b, 0, 31, a); 3574 } 3575 3576 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3577 { 3578 return deposit64(b, 0, 63, a); 3579 } 3580 3581 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3582 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3583 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3584 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3585 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3586 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3587 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3588 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3589 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3590 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3591 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3592 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3593 3594 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3595 { 3596 return deposit64(~b, 0, 15, a); 3597 } 3598 3599 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3600 { 3601 return deposit64(~b, 0, 31, a); 3602 } 3603 3604 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3605 { 3606 return deposit64(~b, 0, 63, a); 3607 } 3608 3609 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3610 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3611 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3612 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3613 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3614 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3615 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3616 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3617 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3618 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3619 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3620 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3621 3622 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3623 { 3624 return deposit64(b ^ a, 0, 15, a); 3625 } 3626 3627 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3628 { 3629 return deposit64(b ^ a, 0, 31, a); 3630 } 3631 3632 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3633 { 3634 return deposit64(b ^ a, 0, 63, a); 3635 } 3636 3637 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3638 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3639 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3640 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3641 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3642 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3643 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3644 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3645 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3646 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3647 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3648 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3649 3650 /* Vector Floating-Point Compare Instructions */ 3651 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3652 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3653 CPURISCVState *env, uint32_t desc) \ 3654 { \ 3655 uint32_t vm = vext_vm(desc); \ 3656 uint32_t vl = env->vl; \ 3657 uint32_t vlmax = vext_max_elems(desc, \ 3658 ctzl(sizeof(ETYPE))); \ 3659 uint32_t i; \ 3660 \ 3661 for (i = 0; i < vl; i++) { \ 3662 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3663 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3664 if (!vm && !vext_elem_mask(v0, i)) { \ 3665 continue; \ 3666 } \ 3667 vext_set_elem_mask(vd, i, \ 3668 DO_OP(s2, s1, &env->fp_status)); \ 3669 } \ 3670 for (; i < vlmax; i++) { \ 3671 vext_set_elem_mask(vd, i, 0); \ 3672 } \ 3673 } 3674 3675 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3676 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3677 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3678 3679 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3680 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3681 CPURISCVState *env, uint32_t desc) \ 3682 { \ 3683 uint32_t vm = vext_vm(desc); \ 3684 uint32_t vl = env->vl; \ 3685 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 3686 uint32_t i; \ 3687 \ 3688 for (i = 0; i < vl; i++) { \ 3689 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3690 if (!vm && !vext_elem_mask(v0, i)) { \ 3691 continue; \ 3692 } \ 3693 vext_set_elem_mask(vd, i, \ 3694 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3695 } \ 3696 for (; i < vlmax; i++) { \ 3697 vext_set_elem_mask(vd, i, 0); \ 3698 } \ 3699 } 3700 3701 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3702 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3703 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3704 3705 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3706 { 3707 FloatRelation compare = float16_compare_quiet(a, b, s); 3708 return compare != float_relation_equal; 3709 } 3710 3711 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3712 { 3713 FloatRelation compare = float32_compare_quiet(a, b, s); 3714 return compare != float_relation_equal; 3715 } 3716 3717 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3718 { 3719 FloatRelation compare = float64_compare_quiet(a, b, s); 3720 return compare != float_relation_equal; 3721 } 3722 3723 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3724 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3725 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3726 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3727 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3728 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3729 3730 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3731 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3732 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3733 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3734 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3735 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3736 3737 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3738 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3739 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3740 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3741 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3742 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3743 3744 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3745 { 3746 FloatRelation compare = float16_compare(a, b, s); 3747 return compare == float_relation_greater; 3748 } 3749 3750 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3751 { 3752 FloatRelation compare = float32_compare(a, b, s); 3753 return compare == float_relation_greater; 3754 } 3755 3756 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3757 { 3758 FloatRelation compare = float64_compare(a, b, s); 3759 return compare == float_relation_greater; 3760 } 3761 3762 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3763 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3764 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3765 3766 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3767 { 3768 FloatRelation compare = float16_compare(a, b, s); 3769 return compare == float_relation_greater || 3770 compare == float_relation_equal; 3771 } 3772 3773 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3774 { 3775 FloatRelation compare = float32_compare(a, b, s); 3776 return compare == float_relation_greater || 3777 compare == float_relation_equal; 3778 } 3779 3780 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3781 { 3782 FloatRelation compare = float64_compare(a, b, s); 3783 return compare == float_relation_greater || 3784 compare == float_relation_equal; 3785 } 3786 3787 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3788 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3789 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3790 3791 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3792 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3793 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3794 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3795 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3796 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3797 3798 /* Vector Floating-Point Classify Instruction */ 3799 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3800 static void do_##NAME(void *vd, void *vs2, int i) \ 3801 { \ 3802 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3803 *((TD *)vd + HD(i)) = OP(s2); \ 3804 } 3805 3806 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3807 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3808 CPURISCVState *env, uint32_t desc) \ 3809 { \ 3810 uint32_t vm = vext_vm(desc); \ 3811 uint32_t vl = env->vl; \ 3812 uint32_t i; \ 3813 \ 3814 for (i = 0; i < vl; i++) { \ 3815 if (!vm && !vext_elem_mask(v0, i)) { \ 3816 continue; \ 3817 } \ 3818 do_##NAME(vd, vs2, i); \ 3819 } \ 3820 } 3821 3822 target_ulong fclass_h(uint64_t frs1) 3823 { 3824 float16 f = frs1; 3825 bool sign = float16_is_neg(f); 3826 3827 if (float16_is_infinity(f)) { 3828 return sign ? 1 << 0 : 1 << 7; 3829 } else if (float16_is_zero(f)) { 3830 return sign ? 1 << 3 : 1 << 4; 3831 } else if (float16_is_zero_or_denormal(f)) { 3832 return sign ? 1 << 2 : 1 << 5; 3833 } else if (float16_is_any_nan(f)) { 3834 float_status s = { }; /* for snan_bit_is_one */ 3835 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3836 } else { 3837 return sign ? 1 << 1 : 1 << 6; 3838 } 3839 } 3840 3841 target_ulong fclass_s(uint64_t frs1) 3842 { 3843 float32 f = frs1; 3844 bool sign = float32_is_neg(f); 3845 3846 if (float32_is_infinity(f)) { 3847 return sign ? 1 << 0 : 1 << 7; 3848 } else if (float32_is_zero(f)) { 3849 return sign ? 1 << 3 : 1 << 4; 3850 } else if (float32_is_zero_or_denormal(f)) { 3851 return sign ? 1 << 2 : 1 << 5; 3852 } else if (float32_is_any_nan(f)) { 3853 float_status s = { }; /* for snan_bit_is_one */ 3854 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3855 } else { 3856 return sign ? 1 << 1 : 1 << 6; 3857 } 3858 } 3859 3860 target_ulong fclass_d(uint64_t frs1) 3861 { 3862 float64 f = frs1; 3863 bool sign = float64_is_neg(f); 3864 3865 if (float64_is_infinity(f)) { 3866 return sign ? 1 << 0 : 1 << 7; 3867 } else if (float64_is_zero(f)) { 3868 return sign ? 1 << 3 : 1 << 4; 3869 } else if (float64_is_zero_or_denormal(f)) { 3870 return sign ? 1 << 2 : 1 << 5; 3871 } else if (float64_is_any_nan(f)) { 3872 float_status s = { }; /* for snan_bit_is_one */ 3873 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3874 } else { 3875 return sign ? 1 << 1 : 1 << 6; 3876 } 3877 } 3878 3879 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3880 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3881 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3882 GEN_VEXT_V(vfclass_v_h, 2, 2) 3883 GEN_VEXT_V(vfclass_v_w, 4, 4) 3884 GEN_VEXT_V(vfclass_v_d, 8, 8) 3885 3886 /* Vector Floating-Point Merge Instruction */ 3887 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3888 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3889 CPURISCVState *env, uint32_t desc) \ 3890 { \ 3891 uint32_t vm = vext_vm(desc); \ 3892 uint32_t vl = env->vl; \ 3893 uint32_t i; \ 3894 \ 3895 for (i = 0; i < vl; i++) { \ 3896 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3897 *((ETYPE *)vd + H(i)) \ 3898 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3899 } \ 3900 } 3901 3902 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3903 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3904 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3905 3906 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3907 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3908 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3909 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3910 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3911 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3912 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3913 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3914 3915 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3916 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3917 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3918 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3919 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3920 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3921 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3922 3923 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3924 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3925 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3926 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3927 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3928 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3929 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3930 3931 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3932 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3933 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3934 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3935 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3936 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3937 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3938 3939 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3940 /* (TD, T2, TX2) */ 3941 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3942 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3943 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3944 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3945 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3946 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3947 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3948 3949 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3950 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3951 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3952 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3953 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3954 3955 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3956 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3957 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 3958 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 3959 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 3960 3961 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 3962 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 3963 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 3964 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 3965 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 3966 3967 /* 3968 * vfwcvt.f.f.v vd, vs2, vm # 3969 * Convert single-width float to double-width float. 3970 */ 3971 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 3972 { 3973 return float16_to_float32(a, true, s); 3974 } 3975 3976 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 3977 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 3978 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 3979 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 3980 3981 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 3982 /* (TD, T2, TX2) */ 3983 #define NOP_UU_H uint16_t, uint32_t, uint32_t 3984 #define NOP_UU_W uint32_t, uint64_t, uint64_t 3985 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3986 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 3987 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 3988 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 3989 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 3990 3991 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 3992 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 3993 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 3994 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 3995 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 3996 3997 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 3998 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 3999 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4000 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 4001 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 4002 4003 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4004 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4005 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4006 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 4007 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 4008 4009 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4010 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4011 { 4012 return float32_to_float16(a, true, s); 4013 } 4014 4015 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4016 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4017 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 4018 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 4019 4020 /* 4021 *** Vector Reduction Operations 4022 */ 4023 /* Vector Single-Width Integer Reduction Instructions */ 4024 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4025 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4026 void *vs2, CPURISCVState *env, uint32_t desc) \ 4027 { \ 4028 uint32_t vm = vext_vm(desc); \ 4029 uint32_t vl = env->vl; \ 4030 uint32_t i; \ 4031 TD s1 = *((TD *)vs1 + HD(0)); \ 4032 \ 4033 for (i = 0; i < vl; i++) { \ 4034 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4035 if (!vm && !vext_elem_mask(v0, i)) { \ 4036 continue; \ 4037 } \ 4038 s1 = OP(s1, (TD)s2); \ 4039 } \ 4040 *((TD *)vd + HD(0)) = s1; \ 4041 } 4042 4043 /* vd[0] = sum(vs1[0], vs2[*]) */ 4044 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4045 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4046 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4047 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4048 4049 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4050 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4051 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4052 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4053 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4054 4055 /* vd[0] = max(vs1[0], vs2[*]) */ 4056 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4057 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4058 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4059 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4060 4061 /* vd[0] = minu(vs1[0], vs2[*]) */ 4062 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4063 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4064 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4065 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4066 4067 /* vd[0] = min(vs1[0], vs2[*]) */ 4068 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4069 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4070 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4071 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4072 4073 /* vd[0] = and(vs1[0], vs2[*]) */ 4074 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4075 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4076 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4077 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4078 4079 /* vd[0] = or(vs1[0], vs2[*]) */ 4080 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4081 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4082 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4083 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4084 4085 /* vd[0] = xor(vs1[0], vs2[*]) */ 4086 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4087 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4088 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4089 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4090 4091 /* Vector Widening Integer Reduction Instructions */ 4092 /* signed sum reduction into double-width accumulator */ 4093 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4094 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4095 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4096 4097 /* Unsigned sum reduction into double-width accumulator */ 4098 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4099 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4100 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4101 4102 /* Vector Single-Width Floating-Point Reduction Instructions */ 4103 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4104 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4105 void *vs2, CPURISCVState *env, \ 4106 uint32_t desc) \ 4107 { \ 4108 uint32_t vm = vext_vm(desc); \ 4109 uint32_t vl = env->vl; \ 4110 uint32_t i; \ 4111 TD s1 = *((TD *)vs1 + HD(0)); \ 4112 \ 4113 for (i = 0; i < vl; i++) { \ 4114 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4115 if (!vm && !vext_elem_mask(v0, i)) { \ 4116 continue; \ 4117 } \ 4118 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4119 } \ 4120 *((TD *)vd + HD(0)) = s1; \ 4121 } 4122 4123 /* Unordered sum */ 4124 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4125 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4126 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4127 4128 /* Maximum value */ 4129 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4130 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4131 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4132 4133 /* Minimum value */ 4134 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4135 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4136 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4137 4138 /* Vector Widening Floating-Point Reduction Instructions */ 4139 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4140 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4141 void *vs2, CPURISCVState *env, uint32_t desc) 4142 { 4143 uint32_t vm = vext_vm(desc); 4144 uint32_t vl = env->vl; 4145 uint32_t i; 4146 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4147 4148 for (i = 0; i < vl; i++) { 4149 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4150 if (!vm && !vext_elem_mask(v0, i)) { 4151 continue; 4152 } 4153 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4154 &env->fp_status); 4155 } 4156 *((uint32_t *)vd + H4(0)) = s1; 4157 } 4158 4159 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4160 void *vs2, CPURISCVState *env, uint32_t desc) 4161 { 4162 uint32_t vm = vext_vm(desc); 4163 uint32_t vl = env->vl; 4164 uint32_t i; 4165 uint64_t s1 = *((uint64_t *)vs1); 4166 4167 for (i = 0; i < vl; i++) { 4168 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4169 if (!vm && !vext_elem_mask(v0, i)) { 4170 continue; 4171 } 4172 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4173 &env->fp_status); 4174 } 4175 *((uint64_t *)vd) = s1; 4176 } 4177 4178 /* 4179 *** Vector Mask Operations 4180 */ 4181 /* Vector Mask-Register Logical Instructions */ 4182 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4183 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4184 void *vs2, CPURISCVState *env, \ 4185 uint32_t desc) \ 4186 { \ 4187 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4188 uint32_t vl = env->vl; \ 4189 uint32_t i; \ 4190 int a, b; \ 4191 \ 4192 for (i = 0; i < vl; i++) { \ 4193 a = vext_elem_mask(vs1, i); \ 4194 b = vext_elem_mask(vs2, i); \ 4195 vext_set_elem_mask(vd, i, OP(b, a)); \ 4196 } \ 4197 for (; i < vlmax; i++) { \ 4198 vext_set_elem_mask(vd, i, 0); \ 4199 } \ 4200 } 4201 4202 #define DO_NAND(N, M) (!(N & M)) 4203 #define DO_ANDNOT(N, M) (N & !M) 4204 #define DO_NOR(N, M) (!(N | M)) 4205 #define DO_ORNOT(N, M) (N | !M) 4206 #define DO_XNOR(N, M) (!(N ^ M)) 4207 4208 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4209 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4210 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4211 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4212 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4213 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4214 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4215 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4216 4217 /* Vector count population in mask vcpop */ 4218 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4219 uint32_t desc) 4220 { 4221 target_ulong cnt = 0; 4222 uint32_t vm = vext_vm(desc); 4223 uint32_t vl = env->vl; 4224 int i; 4225 4226 for (i = 0; i < vl; i++) { 4227 if (vm || vext_elem_mask(v0, i)) { 4228 if (vext_elem_mask(vs2, i)) { 4229 cnt++; 4230 } 4231 } 4232 } 4233 return cnt; 4234 } 4235 4236 /* vfirst find-first-set mask bit*/ 4237 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4238 uint32_t desc) 4239 { 4240 uint32_t vm = vext_vm(desc); 4241 uint32_t vl = env->vl; 4242 int i; 4243 4244 for (i = 0; i < vl; i++) { 4245 if (vm || vext_elem_mask(v0, i)) { 4246 if (vext_elem_mask(vs2, i)) { 4247 return i; 4248 } 4249 } 4250 } 4251 return -1LL; 4252 } 4253 4254 enum set_mask_type { 4255 ONLY_FIRST = 1, 4256 INCLUDE_FIRST, 4257 BEFORE_FIRST, 4258 }; 4259 4260 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4261 uint32_t desc, enum set_mask_type type) 4262 { 4263 uint32_t vm = vext_vm(desc); 4264 uint32_t vl = env->vl; 4265 int i; 4266 bool first_mask_bit = false; 4267 4268 for (i = 0; i < vl; i++) { 4269 if (!vm && !vext_elem_mask(v0, i)) { 4270 continue; 4271 } 4272 /* write a zero to all following active elements */ 4273 if (first_mask_bit) { 4274 vext_set_elem_mask(vd, i, 0); 4275 continue; 4276 } 4277 if (vext_elem_mask(vs2, i)) { 4278 first_mask_bit = true; 4279 if (type == BEFORE_FIRST) { 4280 vext_set_elem_mask(vd, i, 0); 4281 } else { 4282 vext_set_elem_mask(vd, i, 1); 4283 } 4284 } else { 4285 if (type == ONLY_FIRST) { 4286 vext_set_elem_mask(vd, i, 0); 4287 } else { 4288 vext_set_elem_mask(vd, i, 1); 4289 } 4290 } 4291 } 4292 } 4293 4294 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4295 uint32_t desc) 4296 { 4297 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4298 } 4299 4300 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4301 uint32_t desc) 4302 { 4303 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4304 } 4305 4306 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4307 uint32_t desc) 4308 { 4309 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4310 } 4311 4312 /* Vector Iota Instruction */ 4313 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4314 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4315 uint32_t desc) \ 4316 { \ 4317 uint32_t vm = vext_vm(desc); \ 4318 uint32_t vl = env->vl; \ 4319 uint32_t sum = 0; \ 4320 int i; \ 4321 \ 4322 for (i = 0; i < vl; i++) { \ 4323 if (!vm && !vext_elem_mask(v0, i)) { \ 4324 continue; \ 4325 } \ 4326 *((ETYPE *)vd + H(i)) = sum; \ 4327 if (vext_elem_mask(vs2, i)) { \ 4328 sum++; \ 4329 } \ 4330 } \ 4331 } 4332 4333 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4334 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4335 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4336 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4337 4338 /* Vector Element Index Instruction */ 4339 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4340 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4341 { \ 4342 uint32_t vm = vext_vm(desc); \ 4343 uint32_t vl = env->vl; \ 4344 int i; \ 4345 \ 4346 for (i = 0; i < vl; i++) { \ 4347 if (!vm && !vext_elem_mask(v0, i)) { \ 4348 continue; \ 4349 } \ 4350 *((ETYPE *)vd + H(i)) = i; \ 4351 } \ 4352 } 4353 4354 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4355 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4356 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4357 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4358 4359 /* 4360 *** Vector Permutation Instructions 4361 */ 4362 4363 /* Vector Slide Instructions */ 4364 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4365 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4366 CPURISCVState *env, uint32_t desc) \ 4367 { \ 4368 uint32_t vm = vext_vm(desc); \ 4369 uint32_t vl = env->vl; \ 4370 target_ulong offset = s1, i; \ 4371 \ 4372 for (i = offset; i < vl; i++) { \ 4373 if (!vm && !vext_elem_mask(v0, i)) { \ 4374 continue; \ 4375 } \ 4376 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4377 } \ 4378 } 4379 4380 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4381 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4382 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4383 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4384 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4385 4386 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4387 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4388 CPURISCVState *env, uint32_t desc) \ 4389 { \ 4390 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4391 uint32_t vm = vext_vm(desc); \ 4392 uint32_t vl = env->vl; \ 4393 target_ulong offset = s1, i; \ 4394 \ 4395 for (i = 0; i < vl; ++i) { \ 4396 target_ulong j = i + offset; \ 4397 if (!vm && !vext_elem_mask(v0, i)) { \ 4398 continue; \ 4399 } \ 4400 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4401 } \ 4402 } 4403 4404 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4405 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4406 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4407 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4408 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4409 4410 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4411 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4412 CPURISCVState *env, uint32_t desc) \ 4413 { \ 4414 uint32_t vm = vext_vm(desc); \ 4415 uint32_t vl = env->vl; \ 4416 uint32_t i; \ 4417 \ 4418 for (i = 0; i < vl; i++) { \ 4419 if (!vm && !vext_elem_mask(v0, i)) { \ 4420 continue; \ 4421 } \ 4422 if (i == 0) { \ 4423 *((ETYPE *)vd + H(i)) = s1; \ 4424 } else { \ 4425 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4426 } \ 4427 } \ 4428 } 4429 4430 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4431 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4432 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4433 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4434 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4435 4436 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4437 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4438 CPURISCVState *env, uint32_t desc) \ 4439 { \ 4440 uint32_t vm = vext_vm(desc); \ 4441 uint32_t vl = env->vl; \ 4442 uint32_t i; \ 4443 \ 4444 for (i = 0; i < vl; i++) { \ 4445 if (!vm && !vext_elem_mask(v0, i)) { \ 4446 continue; \ 4447 } \ 4448 if (i == vl - 1) { \ 4449 *((ETYPE *)vd + H(i)) = s1; \ 4450 } else { \ 4451 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4452 } \ 4453 } \ 4454 } 4455 4456 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4457 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4458 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4459 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4460 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4461 4462 /* Vector Register Gather Instruction */ 4463 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4464 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4465 CPURISCVState *env, uint32_t desc) \ 4466 { \ 4467 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS1))); \ 4468 uint32_t vm = vext_vm(desc); \ 4469 uint32_t vl = env->vl; \ 4470 uint64_t index; \ 4471 uint32_t i; \ 4472 \ 4473 for (i = 0; i < vl; i++) { \ 4474 if (!vm && !vext_elem_mask(v0, i)) { \ 4475 continue; \ 4476 } \ 4477 index = *((TS1 *)vs1 + HS1(i)); \ 4478 if (index >= vlmax) { \ 4479 *((TS2 *)vd + HS2(i)) = 0; \ 4480 } else { \ 4481 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4482 } \ 4483 } \ 4484 } 4485 4486 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4487 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4488 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4489 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4490 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4491 4492 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4493 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4494 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4495 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4496 4497 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4498 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4499 CPURISCVState *env, uint32_t desc) \ 4500 { \ 4501 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4502 uint32_t vm = vext_vm(desc); \ 4503 uint32_t vl = env->vl; \ 4504 uint64_t index = s1; \ 4505 uint32_t i; \ 4506 \ 4507 for (i = 0; i < vl; i++) { \ 4508 if (!vm && !vext_elem_mask(v0, i)) { \ 4509 continue; \ 4510 } \ 4511 if (index >= vlmax) { \ 4512 *((ETYPE *)vd + H(i)) = 0; \ 4513 } else { \ 4514 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4515 } \ 4516 } \ 4517 } 4518 4519 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4520 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4521 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4522 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4523 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4524 4525 /* Vector Compress Instruction */ 4526 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4527 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4528 CPURISCVState *env, uint32_t desc) \ 4529 { \ 4530 uint32_t vl = env->vl; \ 4531 uint32_t num = 0, i; \ 4532 \ 4533 for (i = 0; i < vl; i++) { \ 4534 if (!vext_elem_mask(vs1, i)) { \ 4535 continue; \ 4536 } \ 4537 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4538 num++; \ 4539 } \ 4540 } 4541 4542 /* Compress into vd elements of vs2 where vs1 is enabled */ 4543 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4544 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4545 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4546 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4547