1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "cpu.h" 22 #include "exec/memop.h" 23 #include "exec/exec-all.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "tcg/tcg-gvec-desc.h" 27 #include "internals.h" 28 #include <math.h> 29 30 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 31 target_ulong s2) 32 { 33 int vlmax, vl; 34 RISCVCPU *cpu = env_archcpu(env); 35 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 36 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 37 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 38 bool vill = FIELD_EX64(s2, VTYPE, VILL); 39 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 40 41 if (lmul & 4) { 42 /* Fractional LMUL. */ 43 if (lmul == 4 || 44 cpu->cfg.elen >> (8 - lmul) < sew) { 45 vill = true; 46 } 47 } 48 49 if ((sew > cpu->cfg.elen) 50 || vill 51 || (ediv != 0) 52 || (reserved != 0)) { 53 /* only set vill bit. */ 54 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 55 env->vl = 0; 56 env->vstart = 0; 57 return 0; 58 } 59 60 vlmax = vext_get_vlmax(cpu, s2); 61 if (s1 <= vlmax) { 62 vl = s1; 63 } else { 64 vl = vlmax; 65 } 66 env->vl = vl; 67 env->vtype = s2; 68 env->vstart = 0; 69 return vl; 70 } 71 72 /* 73 * Note that vector data is stored in host-endian 64-bit chunks, 74 * so addressing units smaller than that needs a host-endian fixup. 75 */ 76 #ifdef HOST_WORDS_BIGENDIAN 77 #define H1(x) ((x) ^ 7) 78 #define H1_2(x) ((x) ^ 6) 79 #define H1_4(x) ((x) ^ 4) 80 #define H2(x) ((x) ^ 3) 81 #define H4(x) ((x) ^ 1) 82 #define H8(x) ((x)) 83 #else 84 #define H1(x) (x) 85 #define H1_2(x) (x) 86 #define H1_4(x) (x) 87 #define H2(x) (x) 88 #define H4(x) (x) 89 #define H8(x) (x) 90 #endif 91 92 static inline uint32_t vext_nf(uint32_t desc) 93 { 94 return FIELD_EX32(simd_data(desc), VDATA, NF); 95 } 96 97 static inline uint32_t vext_vm(uint32_t desc) 98 { 99 return FIELD_EX32(simd_data(desc), VDATA, VM); 100 } 101 102 /* 103 * Encode LMUL to lmul as following: 104 * LMUL vlmul lmul 105 * 1 000 0 106 * 2 001 1 107 * 4 010 2 108 * 8 011 3 109 * - 100 - 110 * 1/8 101 -3 111 * 1/4 110 -2 112 * 1/2 111 -1 113 */ 114 static inline int32_t vext_lmul(uint32_t desc) 115 { 116 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 117 } 118 119 /* 120 * Get the maximum number of elements can be operated. 121 * 122 * esz: log2 of element size in bytes. 123 */ 124 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 125 { 126 /* 127 * As simd_desc support at most 256 bytes, the max vlen is 256 bits. 128 * so vlen in bytes (vlenb) is encoded as maxsz. 129 */ 130 uint32_t vlenb = simd_maxsz(desc); 131 132 /* Return VLMAX */ 133 int scale = vext_lmul(desc) - esz; 134 return scale < 0 ? vlenb >> -scale : vlenb << scale; 135 } 136 137 /* 138 * This function checks watchpoint before real load operation. 139 * 140 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 141 * In user mode, there is no watchpoint support now. 142 * 143 * It will trigger an exception if there is no mapping in TLB 144 * and page table walk can't fill the TLB entry. Then the guest 145 * software can return here after process the exception or never return. 146 */ 147 static void probe_pages(CPURISCVState *env, target_ulong addr, 148 target_ulong len, uintptr_t ra, 149 MMUAccessType access_type) 150 { 151 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 152 target_ulong curlen = MIN(pagelen, len); 153 154 probe_access(env, addr, curlen, access_type, 155 cpu_mmu_index(env, false), ra); 156 if (len > curlen) { 157 addr += curlen; 158 curlen = len - curlen; 159 probe_access(env, addr, curlen, access_type, 160 cpu_mmu_index(env, false), ra); 161 } 162 } 163 164 static inline void vext_set_elem_mask(void *v0, int index, 165 uint8_t value) 166 { 167 int idx = index / 64; 168 int pos = index % 64; 169 uint64_t old = ((uint64_t *)v0)[idx]; 170 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 171 } 172 173 /* 174 * Earlier designs (pre-0.9) had a varying number of bits 175 * per mask value (MLEN). In the 0.9 design, MLEN=1. 176 * (Section 4.5) 177 */ 178 static inline int vext_elem_mask(void *v0, int index) 179 { 180 int idx = index / 64; 181 int pos = index % 64; 182 return (((uint64_t *)v0)[idx] >> pos) & 1; 183 } 184 185 /* elements operations for load and store */ 186 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 187 uint32_t idx, void *vd, uintptr_t retaddr); 188 189 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 190 static void NAME(CPURISCVState *env, abi_ptr addr, \ 191 uint32_t idx, void *vd, uintptr_t retaddr)\ 192 { \ 193 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 194 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 195 } \ 196 197 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 198 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 199 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 200 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 201 202 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 203 static void NAME(CPURISCVState *env, abi_ptr addr, \ 204 uint32_t idx, void *vd, uintptr_t retaddr)\ 205 { \ 206 ETYPE data = *((ETYPE *)vd + H(idx)); \ 207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 208 } 209 210 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 211 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 212 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 213 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 214 215 /* 216 *** stride: access vector element from strided memory 217 */ 218 static void 219 vext_ldst_stride(void *vd, void *v0, target_ulong base, 220 target_ulong stride, CPURISCVState *env, 221 uint32_t desc, uint32_t vm, 222 vext_ldst_elem_fn *ldst_elem, 223 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 224 { 225 uint32_t i, k; 226 uint32_t nf = vext_nf(desc); 227 uint32_t max_elems = vext_max_elems(desc, esz); 228 229 /* probe every access*/ 230 for (i = 0; i < env->vl; i++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 probe_pages(env, base + stride * i, nf << esz, ra, access_type); 235 } 236 /* do real access */ 237 for (i = 0; i < env->vl; i++) { 238 k = 0; 239 if (!vm && !vext_elem_mask(v0, i)) { 240 continue; 241 } 242 while (k < nf) { 243 target_ulong addr = base + stride * i + (k << esz); 244 ldst_elem(env, addr, i + k * max_elems, vd, ra); 245 k++; 246 } 247 } 248 } 249 250 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 251 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 252 target_ulong stride, CPURISCVState *env, \ 253 uint32_t desc) \ 254 { \ 255 uint32_t vm = vext_vm(desc); \ 256 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 257 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 258 } 259 260 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 261 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 262 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 263 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 264 265 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 266 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 267 target_ulong stride, CPURISCVState *env, \ 268 uint32_t desc) \ 269 { \ 270 uint32_t vm = vext_vm(desc); \ 271 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 272 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 273 } 274 275 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 276 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 277 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 278 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 279 280 /* 281 *** unit-stride: access elements stored contiguously in memory 282 */ 283 284 /* unmasked unit-stride load and store operation*/ 285 static void 286 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 287 vext_ldst_elem_fn *ldst_elem, 288 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 289 { 290 uint32_t i, k; 291 uint32_t nf = vext_nf(desc); 292 uint32_t max_elems = vext_max_elems(desc, esz); 293 294 /* probe every access */ 295 probe_pages(env, base, env->vl * (nf << esz), ra, access_type); 296 /* load bytes from guest memory */ 297 for (i = 0; i < env->vl; i++) { 298 k = 0; 299 while (k < nf) { 300 target_ulong addr = base + ((i * nf + k) << esz); 301 ldst_elem(env, addr, i + k * max_elems, vd, ra); 302 k++; 303 } 304 } 305 } 306 307 /* 308 * masked unit-stride load and store operation will be a special case of stride, 309 * stride = NF * sizeof (MTYPE) 310 */ 311 312 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 313 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 317 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 318 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 319 } \ 320 \ 321 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 322 CPURISCVState *env, uint32_t desc) \ 323 { \ 324 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 325 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 326 } 327 328 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 329 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 330 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 331 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 332 333 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 334 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 335 CPURISCVState *env, uint32_t desc) \ 336 { \ 337 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 338 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 339 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 340 } \ 341 \ 342 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 343 CPURISCVState *env, uint32_t desc) \ 344 { \ 345 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 346 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 347 } 348 349 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 350 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 351 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 352 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 353 354 /* 355 *** index: access vector element from indexed memory 356 */ 357 typedef target_ulong vext_get_index_addr(target_ulong base, 358 uint32_t idx, void *vs2); 359 360 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 361 static target_ulong NAME(target_ulong base, \ 362 uint32_t idx, void *vs2) \ 363 { \ 364 return (base + *((ETYPE *)vs2 + H(idx))); \ 365 } 366 367 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 368 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 369 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 370 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 371 372 static inline void 373 vext_ldst_index(void *vd, void *v0, target_ulong base, 374 void *vs2, CPURISCVState *env, uint32_t desc, 375 vext_get_index_addr get_index_addr, 376 vext_ldst_elem_fn *ldst_elem, 377 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 378 { 379 uint32_t i, k; 380 uint32_t nf = vext_nf(desc); 381 uint32_t vm = vext_vm(desc); 382 uint32_t max_elems = vext_max_elems(desc, esz); 383 384 /* probe every access*/ 385 for (i = 0; i < env->vl; i++) { 386 if (!vm && !vext_elem_mask(v0, i)) { 387 continue; 388 } 389 probe_pages(env, get_index_addr(base, i, vs2), nf << esz, ra, 390 access_type); 391 } 392 /* load bytes from guest memory */ 393 for (i = 0; i < env->vl; i++) { 394 k = 0; 395 if (!vm && !vext_elem_mask(v0, i)) { 396 continue; 397 } 398 while (k < nf) { 399 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 400 ldst_elem(env, addr, i + k * max_elems, vd, ra); 401 k++; 402 } 403 } 404 } 405 406 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 407 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 408 void *vs2, CPURISCVState *env, uint32_t desc) \ 409 { \ 410 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 411 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 412 } 413 414 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 418 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 419 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 420 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 421 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 422 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 423 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 424 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 425 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 426 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 427 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 428 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 429 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 430 431 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 432 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 433 void *vs2, CPURISCVState *env, uint32_t desc) \ 434 { \ 435 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 436 STORE_FN, ctzl(sizeof(ETYPE)), \ 437 GETPC(), MMU_DATA_STORE); \ 438 } 439 440 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 444 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 445 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 446 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 447 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 448 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 449 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 450 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 451 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 452 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 453 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 454 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 455 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 456 457 /* 458 *** unit-stride fault-only-fisrt load instructions 459 */ 460 static inline void 461 vext_ldff(void *vd, void *v0, target_ulong base, 462 CPURISCVState *env, uint32_t desc, 463 vext_ldst_elem_fn *ldst_elem, 464 uint32_t esz, uintptr_t ra) 465 { 466 void *host; 467 uint32_t i, k, vl = 0; 468 uint32_t nf = vext_nf(desc); 469 uint32_t vm = vext_vm(desc); 470 uint32_t max_elems = vext_max_elems(desc, esz); 471 target_ulong addr, offset, remain; 472 473 /* probe every access*/ 474 for (i = 0; i < env->vl; i++) { 475 if (!vm && !vext_elem_mask(v0, i)) { 476 continue; 477 } 478 addr = base + i * (nf << esz); 479 if (i == 0) { 480 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 481 } else { 482 /* if it triggers an exception, no need to check watchpoint */ 483 remain = nf << esz; 484 while (remain > 0) { 485 offset = -(addr | TARGET_PAGE_MASK); 486 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 487 cpu_mmu_index(env, false)); 488 if (host) { 489 #ifdef CONFIG_USER_ONLY 490 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 491 vl = i; 492 goto ProbeSuccess; 493 } 494 #else 495 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 496 #endif 497 } else { 498 vl = i; 499 goto ProbeSuccess; 500 } 501 if (remain <= offset) { 502 break; 503 } 504 remain -= offset; 505 addr += offset; 506 } 507 } 508 } 509 ProbeSuccess: 510 /* load bytes from guest memory */ 511 if (vl != 0) { 512 env->vl = vl; 513 } 514 for (i = 0; i < env->vl; i++) { 515 k = 0; 516 if (!vm && !vext_elem_mask(v0, i)) { 517 continue; 518 } 519 while (k < nf) { 520 target_ulong addr = base + ((i * nf + k) << esz); 521 ldst_elem(env, addr, i + k * max_elems, vd, ra); 522 k++; 523 } 524 } 525 } 526 527 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 528 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 529 CPURISCVState *env, uint32_t desc) \ 530 { \ 531 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 532 ctzl(sizeof(ETYPE)), GETPC()); \ 533 } 534 535 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 536 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 537 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 538 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 539 540 #define DO_SWAP(N, M) (M) 541 #define DO_AND(N, M) (N & M) 542 #define DO_XOR(N, M) (N ^ M) 543 #define DO_OR(N, M) (N | M) 544 #define DO_ADD(N, M) (N + M) 545 546 /* Signed min/max */ 547 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 548 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 549 550 /* Unsigned min/max */ 551 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 552 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 553 554 /* 555 *** load and store whole register instructions 556 */ 557 static void 558 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 559 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 560 MMUAccessType access_type) 561 { 562 uint32_t i, k; 563 uint32_t nf = vext_nf(desc); 564 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 565 uint32_t max_elems = vlenb >> esz; 566 567 /* probe every access */ 568 probe_pages(env, base, vlenb * nf, ra, access_type); 569 570 /* load bytes from guest memory */ 571 for (k = 0; k < nf; k++) { 572 for (i = 0; i < max_elems; i++) { 573 target_ulong addr = base + ((i + k * max_elems) << esz); 574 ldst_elem(env, addr, i + k * max_elems, vd, ra); 575 } 576 } 577 } 578 579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 580 void HELPER(NAME)(void *vd, target_ulong base, \ 581 CPURISCVState *env, uint32_t desc) \ 582 { \ 583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 584 ctzl(sizeof(ETYPE)), GETPC(), \ 585 MMU_DATA_LOAD); \ 586 } 587 588 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 592 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 596 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 604 605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 606 void HELPER(NAME)(void *vd, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 610 ctzl(sizeof(ETYPE)), GETPC(), \ 611 MMU_DATA_STORE); \ 612 } 613 614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 618 619 /* 620 *** Vector Integer Arithmetic Instructions 621 */ 622 623 /* expand macro args before macro */ 624 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 625 626 /* (TD, T1, T2, TX1, TX2) */ 627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 /* operation of two vector elements */ 659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 660 661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 663 { \ 664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 666 *((TD *)vd + HD(i)) = OP(s2, s1); \ 667 } 668 #define DO_SUB(N, M) (N - M) 669 #define DO_RSUB(N, M) (M - N) 670 671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 679 680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 681 CPURISCVState *env, uint32_t desc, 682 uint32_t esz, uint32_t dsz, 683 opivv2_fn *fn) 684 { 685 uint32_t vm = vext_vm(desc); 686 uint32_t vl = env->vl; 687 uint32_t i; 688 689 for (i = 0; i < vl; i++) { 690 if (!vm && !vext_elem_mask(v0, i)) { 691 continue; 692 } 693 fn(vd, vs1, vs2, i); 694 } 695 } 696 697 /* generate the helpers for OPIVV */ 698 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 699 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 700 void *vs2, CPURISCVState *env, \ 701 uint32_t desc) \ 702 { \ 703 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 704 do_##NAME); \ 705 } 706 707 GEN_VEXT_VV(vadd_vv_b, 1, 1) 708 GEN_VEXT_VV(vadd_vv_h, 2, 2) 709 GEN_VEXT_VV(vadd_vv_w, 4, 4) 710 GEN_VEXT_VV(vadd_vv_d, 8, 8) 711 GEN_VEXT_VV(vsub_vv_b, 1, 1) 712 GEN_VEXT_VV(vsub_vv_h, 2, 2) 713 GEN_VEXT_VV(vsub_vv_w, 4, 4) 714 GEN_VEXT_VV(vsub_vv_d, 8, 8) 715 716 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 717 718 /* 719 * (T1)s1 gives the real operator type. 720 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 721 */ 722 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 723 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 724 { \ 725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 727 } 728 729 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 730 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 731 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 732 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 733 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 734 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 735 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 736 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 737 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 738 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 739 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 740 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 741 742 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 743 CPURISCVState *env, uint32_t desc, 744 uint32_t esz, uint32_t dsz, 745 opivx2_fn fn) 746 { 747 uint32_t vm = vext_vm(desc); 748 uint32_t vl = env->vl; 749 uint32_t i; 750 751 for (i = 0; i < vl; i++) { 752 if (!vm && !vext_elem_mask(v0, i)) { 753 continue; 754 } 755 fn(vd, s1, vs2, i); 756 } 757 } 758 759 /* generate the helpers for OPIVX */ 760 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 761 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 762 void *vs2, CPURISCVState *env, \ 763 uint32_t desc) \ 764 { \ 765 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 766 do_##NAME); \ 767 } 768 769 GEN_VEXT_VX(vadd_vx_b, 1, 1) 770 GEN_VEXT_VX(vadd_vx_h, 2, 2) 771 GEN_VEXT_VX(vadd_vx_w, 4, 4) 772 GEN_VEXT_VX(vadd_vx_d, 8, 8) 773 GEN_VEXT_VX(vsub_vx_b, 1, 1) 774 GEN_VEXT_VX(vsub_vx_h, 2, 2) 775 GEN_VEXT_VX(vsub_vx_w, 4, 4) 776 GEN_VEXT_VX(vsub_vx_d, 8, 8) 777 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 778 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 779 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 780 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 781 782 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 783 { 784 intptr_t oprsz = simd_oprsz(desc); 785 intptr_t i; 786 787 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 788 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 789 } 790 } 791 792 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 793 { 794 intptr_t oprsz = simd_oprsz(desc); 795 intptr_t i; 796 797 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 798 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 799 } 800 } 801 802 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 803 { 804 intptr_t oprsz = simd_oprsz(desc); 805 intptr_t i; 806 807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 808 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 809 } 810 } 811 812 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 813 { 814 intptr_t oprsz = simd_oprsz(desc); 815 intptr_t i; 816 817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 819 } 820 } 821 822 /* Vector Widening Integer Add/Subtract */ 823 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 824 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 825 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 826 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 827 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 828 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 829 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 830 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 831 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 832 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 833 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 834 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 835 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 836 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 837 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 838 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 839 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 840 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 841 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 842 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 843 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 844 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 845 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 846 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 847 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 848 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 849 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 850 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 851 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 852 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 853 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 854 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 855 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 856 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 857 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 858 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 859 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 860 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 861 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 862 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 863 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 864 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 865 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 866 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 867 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 868 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 869 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 870 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 871 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 872 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 873 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 874 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 875 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 876 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 877 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 878 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 879 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 880 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 881 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 882 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 883 884 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 885 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 886 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 887 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 888 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 889 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 890 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 891 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 892 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 893 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 894 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 895 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 896 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 897 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 898 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 899 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 900 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 901 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 902 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 903 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 904 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 905 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 906 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 907 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 908 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 909 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 910 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 911 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 912 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 913 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 914 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 915 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 916 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 917 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 918 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 919 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 920 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 921 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 922 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 923 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 924 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 925 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 926 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 927 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 928 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 929 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 930 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 931 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 932 933 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 934 #define DO_VADC(N, M, C) (N + M + C) 935 #define DO_VSBC(N, M, C) (N - M - C) 936 937 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 938 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 939 CPURISCVState *env, uint32_t desc) \ 940 { \ 941 uint32_t vl = env->vl; \ 942 uint32_t i; \ 943 \ 944 for (i = 0; i < vl; i++) { \ 945 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 946 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 947 ETYPE carry = vext_elem_mask(v0, i); \ 948 \ 949 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 950 } \ 951 } 952 953 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 954 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 955 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 956 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 957 958 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 959 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 960 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 961 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 962 963 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 964 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 965 CPURISCVState *env, uint32_t desc) \ 966 { \ 967 uint32_t vl = env->vl; \ 968 uint32_t i; \ 969 \ 970 for (i = 0; i < vl; i++) { \ 971 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 972 ETYPE carry = vext_elem_mask(v0, i); \ 973 \ 974 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 975 } \ 976 } 977 978 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 979 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 980 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 981 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 982 983 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 984 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 985 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 986 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 987 988 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 989 (__typeof(N))(N + M) < N) 990 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 991 992 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 993 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 994 CPURISCVState *env, uint32_t desc) \ 995 { \ 996 uint32_t vl = env->vl; \ 997 uint32_t vm = vext_vm(desc); \ 998 uint32_t i; \ 999 \ 1000 for (i = 0; i < vl; i++) { \ 1001 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1002 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1003 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1004 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1005 } \ 1006 } 1007 1008 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1009 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1010 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1011 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1012 1013 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1014 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1015 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1016 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1017 1018 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1019 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1020 void *vs2, CPURISCVState *env, uint32_t desc) \ 1021 { \ 1022 uint32_t vl = env->vl; \ 1023 uint32_t vm = vext_vm(desc); \ 1024 uint32_t i; \ 1025 \ 1026 for (i = 0; i < vl; i++) { \ 1027 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1028 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1029 vext_set_elem_mask(vd, i, \ 1030 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1031 } \ 1032 } 1033 1034 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1035 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1036 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1037 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1038 1039 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1040 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1041 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1042 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1043 1044 /* Vector Bitwise Logical Instructions */ 1045 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1046 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1047 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1048 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1049 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1050 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1051 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1052 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1053 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1054 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1055 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1056 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1057 GEN_VEXT_VV(vand_vv_b, 1, 1) 1058 GEN_VEXT_VV(vand_vv_h, 2, 2) 1059 GEN_VEXT_VV(vand_vv_w, 4, 4) 1060 GEN_VEXT_VV(vand_vv_d, 8, 8) 1061 GEN_VEXT_VV(vor_vv_b, 1, 1) 1062 GEN_VEXT_VV(vor_vv_h, 2, 2) 1063 GEN_VEXT_VV(vor_vv_w, 4, 4) 1064 GEN_VEXT_VV(vor_vv_d, 8, 8) 1065 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1066 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1067 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1068 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1069 1070 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1071 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1072 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1073 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1074 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1075 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1076 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1077 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1078 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1079 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1080 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1081 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1082 GEN_VEXT_VX(vand_vx_b, 1, 1) 1083 GEN_VEXT_VX(vand_vx_h, 2, 2) 1084 GEN_VEXT_VX(vand_vx_w, 4, 4) 1085 GEN_VEXT_VX(vand_vx_d, 8, 8) 1086 GEN_VEXT_VX(vor_vx_b, 1, 1) 1087 GEN_VEXT_VX(vor_vx_h, 2, 2) 1088 GEN_VEXT_VX(vor_vx_w, 4, 4) 1089 GEN_VEXT_VX(vor_vx_d, 8, 8) 1090 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1091 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1092 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1093 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1094 1095 /* Vector Single-Width Bit Shift Instructions */ 1096 #define DO_SLL(N, M) (N << (M)) 1097 #define DO_SRL(N, M) (N >> (M)) 1098 1099 /* generate the helpers for shift instructions with two vector operators */ 1100 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1101 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1102 void *vs2, CPURISCVState *env, uint32_t desc) \ 1103 { \ 1104 uint32_t vm = vext_vm(desc); \ 1105 uint32_t vl = env->vl; \ 1106 uint32_t i; \ 1107 \ 1108 for (i = 0; i < vl; i++) { \ 1109 if (!vm && !vext_elem_mask(v0, i)) { \ 1110 continue; \ 1111 } \ 1112 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1113 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1114 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1115 } \ 1116 } 1117 1118 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1119 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1120 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1121 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1122 1123 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1124 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1125 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1126 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1127 1128 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1129 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1130 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1131 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1132 1133 /* generate the helpers for shift instructions with one vector and one scalar */ 1134 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1135 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1136 void *vs2, CPURISCVState *env, uint32_t desc) \ 1137 { \ 1138 uint32_t vm = vext_vm(desc); \ 1139 uint32_t vl = env->vl; \ 1140 uint32_t i; \ 1141 \ 1142 for (i = 0; i < vl; i++) { \ 1143 if (!vm && !vext_elem_mask(v0, i)) { \ 1144 continue; \ 1145 } \ 1146 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1147 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1148 } \ 1149 } 1150 1151 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1152 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1153 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1154 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1155 1156 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1157 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1160 1161 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1162 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1163 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1164 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1165 1166 /* Vector Narrowing Integer Right Shift Instructions */ 1167 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1168 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1169 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1170 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1171 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1172 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1173 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1174 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1175 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1176 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1177 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1178 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1179 1180 /* Vector Integer Comparison Instructions */ 1181 #define DO_MSEQ(N, M) (N == M) 1182 #define DO_MSNE(N, M) (N != M) 1183 #define DO_MSLT(N, M) (N < M) 1184 #define DO_MSLE(N, M) (N <= M) 1185 #define DO_MSGT(N, M) (N > M) 1186 1187 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1188 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1189 CPURISCVState *env, uint32_t desc) \ 1190 { \ 1191 uint32_t vm = vext_vm(desc); \ 1192 uint32_t vl = env->vl; \ 1193 uint32_t i; \ 1194 \ 1195 for (i = 0; i < vl; i++) { \ 1196 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1197 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1198 if (!vm && !vext_elem_mask(v0, i)) { \ 1199 continue; \ 1200 } \ 1201 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1202 } \ 1203 } 1204 1205 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1206 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1207 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1208 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1209 1210 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1211 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1212 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1213 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1214 1215 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1216 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1217 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1218 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1219 1220 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1221 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1222 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1223 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1224 1225 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1226 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1227 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1228 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1229 1230 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1231 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1232 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1233 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1234 1235 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1236 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1237 CPURISCVState *env, uint32_t desc) \ 1238 { \ 1239 uint32_t vm = vext_vm(desc); \ 1240 uint32_t vl = env->vl; \ 1241 uint32_t i; \ 1242 \ 1243 for (i = 0; i < vl; i++) { \ 1244 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1245 if (!vm && !vext_elem_mask(v0, i)) { \ 1246 continue; \ 1247 } \ 1248 vext_set_elem_mask(vd, i, \ 1249 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1250 } \ 1251 } 1252 1253 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1254 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1255 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1256 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1257 1258 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1259 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1260 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1261 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1262 1263 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1264 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1265 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1266 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1267 1268 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1269 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1270 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1271 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1272 1273 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1274 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1275 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1276 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1277 1278 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1279 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1280 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1281 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1282 1283 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1284 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1285 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1286 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1287 1288 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1289 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1290 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1291 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1292 1293 /* Vector Integer Min/Max Instructions */ 1294 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1295 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1296 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1297 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1298 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1299 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1300 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1301 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1302 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1303 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1304 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1305 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1306 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1307 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1308 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1309 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1310 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1311 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1312 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1313 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1314 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1315 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1316 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1317 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1318 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1319 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1320 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1321 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1322 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1323 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1324 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1325 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1326 1327 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1328 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1329 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1330 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1331 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1332 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1333 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1334 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1335 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1336 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1337 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1338 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1339 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1340 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1341 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1342 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1343 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1344 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1345 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1346 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1347 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1348 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1349 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1350 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1351 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1352 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1353 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1354 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1355 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1356 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1357 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1358 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1359 1360 /* Vector Single-Width Integer Multiply Instructions */ 1361 #define DO_MUL(N, M) (N * M) 1362 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1363 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1364 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1365 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1366 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1367 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1368 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1369 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1370 1371 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1372 { 1373 return (int16_t)s2 * (int16_t)s1 >> 8; 1374 } 1375 1376 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1377 { 1378 return (int32_t)s2 * (int32_t)s1 >> 16; 1379 } 1380 1381 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1382 { 1383 return (int64_t)s2 * (int64_t)s1 >> 32; 1384 } 1385 1386 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1387 { 1388 uint64_t hi_64, lo_64; 1389 1390 muls64(&lo_64, &hi_64, s1, s2); 1391 return hi_64; 1392 } 1393 1394 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1395 { 1396 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1397 } 1398 1399 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1400 { 1401 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1402 } 1403 1404 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1405 { 1406 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1407 } 1408 1409 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1410 { 1411 uint64_t hi_64, lo_64; 1412 1413 mulu64(&lo_64, &hi_64, s2, s1); 1414 return hi_64; 1415 } 1416 1417 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1418 { 1419 return (int16_t)s2 * (uint16_t)s1 >> 8; 1420 } 1421 1422 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1423 { 1424 return (int32_t)s2 * (uint32_t)s1 >> 16; 1425 } 1426 1427 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1428 { 1429 return (int64_t)s2 * (uint64_t)s1 >> 32; 1430 } 1431 1432 /* 1433 * Let A = signed operand, 1434 * B = unsigned operand 1435 * P = mulu64(A, B), unsigned product 1436 * 1437 * LET X = 2 ** 64 - A, 2's complement of A 1438 * SP = signed product 1439 * THEN 1440 * IF A < 0 1441 * SP = -X * B 1442 * = -(2 ** 64 - A) * B 1443 * = A * B - 2 ** 64 * B 1444 * = P - 2 ** 64 * B 1445 * ELSE 1446 * SP = P 1447 * THEN 1448 * HI_P -= (A < 0 ? B : 0) 1449 */ 1450 1451 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1452 { 1453 uint64_t hi_64, lo_64; 1454 1455 mulu64(&lo_64, &hi_64, s2, s1); 1456 1457 hi_64 -= s2 < 0 ? s1 : 0; 1458 return hi_64; 1459 } 1460 1461 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1462 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1463 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1464 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1465 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1466 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1467 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1468 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1469 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1470 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1471 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1472 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1473 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1474 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1475 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1476 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1477 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1478 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1479 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1480 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1481 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1482 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1483 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1484 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1485 1486 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1487 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1488 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1489 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1490 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1491 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1492 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1493 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1494 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1495 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1496 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1497 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1498 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1499 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1500 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1501 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1502 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1503 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1504 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1505 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1506 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1507 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1508 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1509 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1510 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1511 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1512 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1513 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1514 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1515 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1516 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1517 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1518 1519 /* Vector Integer Divide Instructions */ 1520 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1521 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1522 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1523 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1524 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1525 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1526 1527 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1528 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1529 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1530 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1531 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1532 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1533 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1534 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1535 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1536 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1537 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1538 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1539 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1540 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1541 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1542 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1543 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1544 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1545 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1546 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1547 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1548 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1549 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1550 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1551 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1552 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1553 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1554 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1555 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1556 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1557 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1558 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1559 1560 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1561 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1562 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1563 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1564 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1565 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1566 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1567 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1568 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1569 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1570 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1571 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1572 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1573 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1574 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1575 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1576 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1577 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1578 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1579 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1580 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1581 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1582 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1583 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1584 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1585 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1586 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1587 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1588 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1589 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1590 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1591 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1592 1593 /* Vector Widening Integer Multiply Instructions */ 1594 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1595 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1596 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1597 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1598 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1599 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1600 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1601 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1602 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1603 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1604 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1605 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1606 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1607 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1608 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1609 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1610 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1611 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1612 1613 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1614 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1615 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1616 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1617 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1618 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1619 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1620 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1621 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1622 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1623 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1624 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1625 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1626 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1627 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1628 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1629 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1630 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1631 1632 /* Vector Single-Width Integer Multiply-Add Instructions */ 1633 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1634 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1635 { \ 1636 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1637 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1638 TD d = *((TD *)vd + HD(i)); \ 1639 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1640 } 1641 1642 #define DO_MACC(N, M, D) (M * N + D) 1643 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1644 #define DO_MADD(N, M, D) (M * D + N) 1645 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1646 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1647 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1648 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1649 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1650 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1651 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1652 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1653 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1654 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1655 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1656 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1657 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1658 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1659 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1660 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1661 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1662 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1663 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1664 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1665 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1666 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1667 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1668 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1669 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1670 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1671 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1672 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1673 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1674 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1675 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1676 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1677 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1678 1679 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1680 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1681 { \ 1682 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1683 TD d = *((TD *)vd + HD(i)); \ 1684 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1685 } 1686 1687 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1688 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1689 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1690 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1691 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1692 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1693 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1694 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1695 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1696 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1697 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1698 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1699 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1700 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1701 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1702 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1703 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1704 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1705 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1706 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1707 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1708 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1709 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1710 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1711 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1712 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1713 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1714 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1715 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1716 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1717 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1718 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1719 1720 /* Vector Widening Integer Multiply-Add Instructions */ 1721 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1722 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1723 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1724 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1725 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1726 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1727 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1728 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1729 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1730 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1731 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1732 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1733 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1734 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1735 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1736 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1737 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1738 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1739 1740 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1741 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1742 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1743 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1744 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1745 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1746 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1747 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1748 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1749 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1750 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1751 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1752 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1753 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1754 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1755 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1756 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1757 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1758 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1759 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1760 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1761 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1762 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1763 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1764 1765 /* Vector Integer Merge and Move Instructions */ 1766 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1767 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1768 uint32_t desc) \ 1769 { \ 1770 uint32_t vl = env->vl; \ 1771 uint32_t i; \ 1772 \ 1773 for (i = 0; i < vl; i++) { \ 1774 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1775 *((ETYPE *)vd + H(i)) = s1; \ 1776 } \ 1777 } 1778 1779 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1780 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1781 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1782 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1783 1784 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1785 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1786 uint32_t desc) \ 1787 { \ 1788 uint32_t vl = env->vl; \ 1789 uint32_t i; \ 1790 \ 1791 for (i = 0; i < vl; i++) { \ 1792 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1793 } \ 1794 } 1795 1796 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1797 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1798 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1799 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1800 1801 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1802 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1803 CPURISCVState *env, uint32_t desc) \ 1804 { \ 1805 uint32_t vl = env->vl; \ 1806 uint32_t i; \ 1807 \ 1808 for (i = 0; i < vl; i++) { \ 1809 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1810 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1811 } \ 1812 } 1813 1814 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1815 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1816 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1817 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1818 1819 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1820 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1821 void *vs2, CPURISCVState *env, uint32_t desc) \ 1822 { \ 1823 uint32_t vl = env->vl; \ 1824 uint32_t i; \ 1825 \ 1826 for (i = 0; i < vl; i++) { \ 1827 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1828 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1829 (ETYPE)(target_long)s1); \ 1830 *((ETYPE *)vd + H(i)) = d; \ 1831 } \ 1832 } 1833 1834 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1835 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1836 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1837 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1838 1839 /* 1840 *** Vector Fixed-Point Arithmetic Instructions 1841 */ 1842 1843 /* Vector Single-Width Saturating Add and Subtract */ 1844 1845 /* 1846 * As fixed point instructions probably have round mode and saturation, 1847 * define common macros for fixed point here. 1848 */ 1849 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1850 CPURISCVState *env, int vxrm); 1851 1852 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1853 static inline void \ 1854 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1855 CPURISCVState *env, int vxrm) \ 1856 { \ 1857 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1858 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1859 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1860 } 1861 1862 static inline void 1863 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1864 CPURISCVState *env, 1865 uint32_t vl, uint32_t vm, int vxrm, 1866 opivv2_rm_fn *fn) 1867 { 1868 for (uint32_t i = 0; i < vl; i++) { 1869 if (!vm && !vext_elem_mask(v0, i)) { 1870 continue; 1871 } 1872 fn(vd, vs1, vs2, i, env, vxrm); 1873 } 1874 } 1875 1876 static inline void 1877 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1878 CPURISCVState *env, 1879 uint32_t desc, uint32_t esz, uint32_t dsz, 1880 opivv2_rm_fn *fn) 1881 { 1882 uint32_t vm = vext_vm(desc); 1883 uint32_t vl = env->vl; 1884 1885 switch (env->vxrm) { 1886 case 0: /* rnu */ 1887 vext_vv_rm_1(vd, v0, vs1, vs2, 1888 env, vl, vm, 0, fn); 1889 break; 1890 case 1: /* rne */ 1891 vext_vv_rm_1(vd, v0, vs1, vs2, 1892 env, vl, vm, 1, fn); 1893 break; 1894 case 2: /* rdn */ 1895 vext_vv_rm_1(vd, v0, vs1, vs2, 1896 env, vl, vm, 2, fn); 1897 break; 1898 default: /* rod */ 1899 vext_vv_rm_1(vd, v0, vs1, vs2, 1900 env, vl, vm, 3, fn); 1901 break; 1902 } 1903 } 1904 1905 /* generate helpers for fixed point instructions with OPIVV format */ 1906 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1907 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1908 CPURISCVState *env, uint32_t desc) \ 1909 { \ 1910 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1911 do_##NAME); \ 1912 } 1913 1914 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1915 { 1916 uint8_t res = a + b; 1917 if (res < a) { 1918 res = UINT8_MAX; 1919 env->vxsat = 0x1; 1920 } 1921 return res; 1922 } 1923 1924 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1925 uint16_t b) 1926 { 1927 uint16_t res = a + b; 1928 if (res < a) { 1929 res = UINT16_MAX; 1930 env->vxsat = 0x1; 1931 } 1932 return res; 1933 } 1934 1935 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1936 uint32_t b) 1937 { 1938 uint32_t res = a + b; 1939 if (res < a) { 1940 res = UINT32_MAX; 1941 env->vxsat = 0x1; 1942 } 1943 return res; 1944 } 1945 1946 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1947 uint64_t b) 1948 { 1949 uint64_t res = a + b; 1950 if (res < a) { 1951 res = UINT64_MAX; 1952 env->vxsat = 0x1; 1953 } 1954 return res; 1955 } 1956 1957 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1958 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1959 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1960 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1961 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1962 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1963 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1964 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1965 1966 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1967 CPURISCVState *env, int vxrm); 1968 1969 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1970 static inline void \ 1971 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1972 CPURISCVState *env, int vxrm) \ 1973 { \ 1974 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1975 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1976 } 1977 1978 static inline void 1979 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1980 CPURISCVState *env, 1981 uint32_t vl, uint32_t vm, int vxrm, 1982 opivx2_rm_fn *fn) 1983 { 1984 for (uint32_t i = 0; i < vl; i++) { 1985 if (!vm && !vext_elem_mask(v0, i)) { 1986 continue; 1987 } 1988 fn(vd, s1, vs2, i, env, vxrm); 1989 } 1990 } 1991 1992 static inline void 1993 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 1994 CPURISCVState *env, 1995 uint32_t desc, uint32_t esz, uint32_t dsz, 1996 opivx2_rm_fn *fn) 1997 { 1998 uint32_t vm = vext_vm(desc); 1999 uint32_t vl = env->vl; 2000 2001 switch (env->vxrm) { 2002 case 0: /* rnu */ 2003 vext_vx_rm_1(vd, v0, s1, vs2, 2004 env, vl, vm, 0, fn); 2005 break; 2006 case 1: /* rne */ 2007 vext_vx_rm_1(vd, v0, s1, vs2, 2008 env, vl, vm, 1, fn); 2009 break; 2010 case 2: /* rdn */ 2011 vext_vx_rm_1(vd, v0, s1, vs2, 2012 env, vl, vm, 2, fn); 2013 break; 2014 default: /* rod */ 2015 vext_vx_rm_1(vd, v0, s1, vs2, 2016 env, vl, vm, 3, fn); 2017 break; 2018 } 2019 } 2020 2021 /* generate helpers for fixed point instructions with OPIVX format */ 2022 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2023 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2024 void *vs2, CPURISCVState *env, uint32_t desc) \ 2025 { \ 2026 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2027 do_##NAME); \ 2028 } 2029 2030 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2031 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2032 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2033 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2034 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2035 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2036 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2037 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2038 2039 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2040 { 2041 int8_t res = a + b; 2042 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2043 res = a > 0 ? INT8_MAX : INT8_MIN; 2044 env->vxsat = 0x1; 2045 } 2046 return res; 2047 } 2048 2049 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2050 { 2051 int16_t res = a + b; 2052 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2053 res = a > 0 ? INT16_MAX : INT16_MIN; 2054 env->vxsat = 0x1; 2055 } 2056 return res; 2057 } 2058 2059 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2060 { 2061 int32_t res = a + b; 2062 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2063 res = a > 0 ? INT32_MAX : INT32_MIN; 2064 env->vxsat = 0x1; 2065 } 2066 return res; 2067 } 2068 2069 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2070 { 2071 int64_t res = a + b; 2072 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2073 res = a > 0 ? INT64_MAX : INT64_MIN; 2074 env->vxsat = 0x1; 2075 } 2076 return res; 2077 } 2078 2079 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2080 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2081 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2082 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2083 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2084 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2085 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2086 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2087 2088 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2089 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2090 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2091 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2092 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2093 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2094 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2095 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2096 2097 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2098 { 2099 uint8_t res = a - b; 2100 if (res > a) { 2101 res = 0; 2102 env->vxsat = 0x1; 2103 } 2104 return res; 2105 } 2106 2107 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2108 uint16_t b) 2109 { 2110 uint16_t res = a - b; 2111 if (res > a) { 2112 res = 0; 2113 env->vxsat = 0x1; 2114 } 2115 return res; 2116 } 2117 2118 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2119 uint32_t b) 2120 { 2121 uint32_t res = a - b; 2122 if (res > a) { 2123 res = 0; 2124 env->vxsat = 0x1; 2125 } 2126 return res; 2127 } 2128 2129 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2130 uint64_t b) 2131 { 2132 uint64_t res = a - b; 2133 if (res > a) { 2134 res = 0; 2135 env->vxsat = 0x1; 2136 } 2137 return res; 2138 } 2139 2140 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2141 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2142 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2143 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2144 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2145 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2146 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2147 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2148 2149 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2150 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2151 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2152 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2153 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2154 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2155 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2156 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2157 2158 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2159 { 2160 int8_t res = a - b; 2161 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2162 res = a >= 0 ? INT8_MAX : INT8_MIN; 2163 env->vxsat = 0x1; 2164 } 2165 return res; 2166 } 2167 2168 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2169 { 2170 int16_t res = a - b; 2171 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2172 res = a >= 0 ? INT16_MAX : INT16_MIN; 2173 env->vxsat = 0x1; 2174 } 2175 return res; 2176 } 2177 2178 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2179 { 2180 int32_t res = a - b; 2181 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2182 res = a >= 0 ? INT32_MAX : INT32_MIN; 2183 env->vxsat = 0x1; 2184 } 2185 return res; 2186 } 2187 2188 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2189 { 2190 int64_t res = a - b; 2191 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2192 res = a >= 0 ? INT64_MAX : INT64_MIN; 2193 env->vxsat = 0x1; 2194 } 2195 return res; 2196 } 2197 2198 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2199 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2200 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2201 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2202 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2203 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2204 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2205 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2206 2207 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2208 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2209 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2210 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2211 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2212 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2213 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2214 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2215 2216 /* Vector Single-Width Averaging Add and Subtract */ 2217 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2218 { 2219 uint8_t d = extract64(v, shift, 1); 2220 uint8_t d1; 2221 uint64_t D1, D2; 2222 2223 if (shift == 0 || shift > 64) { 2224 return 0; 2225 } 2226 2227 d1 = extract64(v, shift - 1, 1); 2228 D1 = extract64(v, 0, shift); 2229 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2230 return d1; 2231 } else if (vxrm == 1) { /* round-to-nearest-even */ 2232 if (shift > 1) { 2233 D2 = extract64(v, 0, shift - 1); 2234 return d1 & ((D2 != 0) | d); 2235 } else { 2236 return d1 & d; 2237 } 2238 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2239 return !d & (D1 != 0); 2240 } 2241 return 0; /* round-down (truncate) */ 2242 } 2243 2244 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2245 { 2246 int64_t res = (int64_t)a + b; 2247 uint8_t round = get_round(vxrm, res, 1); 2248 2249 return (res >> 1) + round; 2250 } 2251 2252 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2253 { 2254 int64_t res = a + b; 2255 uint8_t round = get_round(vxrm, res, 1); 2256 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2257 2258 /* With signed overflow, bit 64 is inverse of bit 63. */ 2259 return ((res >> 1) ^ over) + round; 2260 } 2261 2262 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2263 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2264 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2265 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2266 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2267 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2268 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2269 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2270 2271 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2272 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2273 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2274 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2275 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2276 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2277 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2278 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2279 2280 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2281 uint32_t a, uint32_t b) 2282 { 2283 uint64_t res = (uint64_t)a + b; 2284 uint8_t round = get_round(vxrm, res, 1); 2285 2286 return (res >> 1) + round; 2287 } 2288 2289 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2290 uint64_t a, uint64_t b) 2291 { 2292 uint64_t res = a + b; 2293 uint8_t round = get_round(vxrm, res, 1); 2294 uint64_t over = (uint64_t)(res < a) << 63; 2295 2296 return ((res >> 1) | over) + round; 2297 } 2298 2299 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2300 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2301 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2302 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2303 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2304 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2305 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2306 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2307 2308 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2309 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2310 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2311 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2312 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2313 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2314 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2315 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2316 2317 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2318 { 2319 int64_t res = (int64_t)a - b; 2320 uint8_t round = get_round(vxrm, res, 1); 2321 2322 return (res >> 1) + round; 2323 } 2324 2325 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2326 { 2327 int64_t res = (int64_t)a - b; 2328 uint8_t round = get_round(vxrm, res, 1); 2329 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2330 2331 /* With signed overflow, bit 64 is inverse of bit 63. */ 2332 return ((res >> 1) ^ over) + round; 2333 } 2334 2335 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2336 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2337 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2338 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2339 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2340 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2341 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2342 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2343 2344 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2345 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2346 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2347 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2348 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2349 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2350 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2351 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2352 2353 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2354 uint32_t a, uint32_t b) 2355 { 2356 int64_t res = (int64_t)a - b; 2357 uint8_t round = get_round(vxrm, res, 1); 2358 2359 return (res >> 1) + round; 2360 } 2361 2362 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2363 uint64_t a, uint64_t b) 2364 { 2365 uint64_t res = (uint64_t)a - b; 2366 uint8_t round = get_round(vxrm, res, 1); 2367 uint64_t over = (uint64_t)(res > a) << 63; 2368 2369 return ((res >> 1) | over) + round; 2370 } 2371 2372 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2373 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2374 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2375 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2376 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2377 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2378 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2379 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2380 2381 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2382 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2383 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2384 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2385 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2386 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2387 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2388 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2389 2390 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2391 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2392 { 2393 uint8_t round; 2394 int16_t res; 2395 2396 res = (int16_t)a * (int16_t)b; 2397 round = get_round(vxrm, res, 7); 2398 res = (res >> 7) + round; 2399 2400 if (res > INT8_MAX) { 2401 env->vxsat = 0x1; 2402 return INT8_MAX; 2403 } else if (res < INT8_MIN) { 2404 env->vxsat = 0x1; 2405 return INT8_MIN; 2406 } else { 2407 return res; 2408 } 2409 } 2410 2411 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2412 { 2413 uint8_t round; 2414 int32_t res; 2415 2416 res = (int32_t)a * (int32_t)b; 2417 round = get_round(vxrm, res, 15); 2418 res = (res >> 15) + round; 2419 2420 if (res > INT16_MAX) { 2421 env->vxsat = 0x1; 2422 return INT16_MAX; 2423 } else if (res < INT16_MIN) { 2424 env->vxsat = 0x1; 2425 return INT16_MIN; 2426 } else { 2427 return res; 2428 } 2429 } 2430 2431 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2432 { 2433 uint8_t round; 2434 int64_t res; 2435 2436 res = (int64_t)a * (int64_t)b; 2437 round = get_round(vxrm, res, 31); 2438 res = (res >> 31) + round; 2439 2440 if (res > INT32_MAX) { 2441 env->vxsat = 0x1; 2442 return INT32_MAX; 2443 } else if (res < INT32_MIN) { 2444 env->vxsat = 0x1; 2445 return INT32_MIN; 2446 } else { 2447 return res; 2448 } 2449 } 2450 2451 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2452 { 2453 uint8_t round; 2454 uint64_t hi_64, lo_64; 2455 int64_t res; 2456 2457 if (a == INT64_MIN && b == INT64_MIN) { 2458 env->vxsat = 1; 2459 return INT64_MAX; 2460 } 2461 2462 muls64(&lo_64, &hi_64, a, b); 2463 round = get_round(vxrm, lo_64, 63); 2464 /* 2465 * Cannot overflow, as there are always 2466 * 2 sign bits after multiply. 2467 */ 2468 res = (hi_64 << 1) | (lo_64 >> 63); 2469 if (round) { 2470 if (res == INT64_MAX) { 2471 env->vxsat = 1; 2472 } else { 2473 res += 1; 2474 } 2475 } 2476 return res; 2477 } 2478 2479 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2480 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2481 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2482 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2483 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2484 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2485 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2486 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2487 2488 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2489 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2490 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2491 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2492 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2493 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2494 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2495 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2496 2497 /* Vector Widening Saturating Scaled Multiply-Add */ 2498 static inline uint16_t 2499 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2500 uint16_t c) 2501 { 2502 uint8_t round; 2503 uint16_t res = (uint16_t)a * b; 2504 2505 round = get_round(vxrm, res, 4); 2506 res = (res >> 4) + round; 2507 return saddu16(env, vxrm, c, res); 2508 } 2509 2510 static inline uint32_t 2511 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2512 uint32_t c) 2513 { 2514 uint8_t round; 2515 uint32_t res = (uint32_t)a * b; 2516 2517 round = get_round(vxrm, res, 8); 2518 res = (res >> 8) + round; 2519 return saddu32(env, vxrm, c, res); 2520 } 2521 2522 static inline uint64_t 2523 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2524 uint64_t c) 2525 { 2526 uint8_t round; 2527 uint64_t res = (uint64_t)a * b; 2528 2529 round = get_round(vxrm, res, 16); 2530 res = (res >> 16) + round; 2531 return saddu64(env, vxrm, c, res); 2532 } 2533 2534 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2535 static inline void \ 2536 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2537 CPURISCVState *env, int vxrm) \ 2538 { \ 2539 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2540 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2541 TD d = *((TD *)vd + HD(i)); \ 2542 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2543 } 2544 2545 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2546 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2547 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2548 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2549 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2550 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2551 2552 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2553 static inline void \ 2554 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2555 CPURISCVState *env, int vxrm) \ 2556 { \ 2557 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2558 TD d = *((TD *)vd + HD(i)); \ 2559 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2560 } 2561 2562 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2563 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2564 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2565 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2566 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2567 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2568 2569 static inline int16_t 2570 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2571 { 2572 uint8_t round; 2573 int16_t res = (int16_t)a * b; 2574 2575 round = get_round(vxrm, res, 4); 2576 res = (res >> 4) + round; 2577 return sadd16(env, vxrm, c, res); 2578 } 2579 2580 static inline int32_t 2581 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2582 { 2583 uint8_t round; 2584 int32_t res = (int32_t)a * b; 2585 2586 round = get_round(vxrm, res, 8); 2587 res = (res >> 8) + round; 2588 return sadd32(env, vxrm, c, res); 2589 2590 } 2591 2592 static inline int64_t 2593 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2594 { 2595 uint8_t round; 2596 int64_t res = (int64_t)a * b; 2597 2598 round = get_round(vxrm, res, 16); 2599 res = (res >> 16) + round; 2600 return sadd64(env, vxrm, c, res); 2601 } 2602 2603 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2604 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2605 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2606 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2607 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2608 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2609 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2610 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2611 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2612 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2613 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2614 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2615 2616 static inline int16_t 2617 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2618 { 2619 uint8_t round; 2620 int16_t res = a * (int16_t)b; 2621 2622 round = get_round(vxrm, res, 4); 2623 res = (res >> 4) + round; 2624 return ssub16(env, vxrm, c, res); 2625 } 2626 2627 static inline int32_t 2628 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2629 { 2630 uint8_t round; 2631 int32_t res = a * (int32_t)b; 2632 2633 round = get_round(vxrm, res, 8); 2634 res = (res >> 8) + round; 2635 return ssub32(env, vxrm, c, res); 2636 } 2637 2638 static inline int64_t 2639 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2640 { 2641 uint8_t round; 2642 int64_t res = a * (int64_t)b; 2643 2644 round = get_round(vxrm, res, 16); 2645 res = (res >> 16) + round; 2646 return ssub64(env, vxrm, c, res); 2647 } 2648 2649 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2650 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2651 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2652 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2653 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2654 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2655 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2656 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2657 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2658 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2659 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2660 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2661 2662 static inline int16_t 2663 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2664 { 2665 uint8_t round; 2666 int16_t res = (int16_t)a * b; 2667 2668 round = get_round(vxrm, res, 4); 2669 res = (res >> 4) + round; 2670 return ssub16(env, vxrm, c, res); 2671 } 2672 2673 static inline int32_t 2674 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2675 { 2676 uint8_t round; 2677 int32_t res = (int32_t)a * b; 2678 2679 round = get_round(vxrm, res, 8); 2680 res = (res >> 8) + round; 2681 return ssub32(env, vxrm, c, res); 2682 } 2683 2684 static inline int64_t 2685 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2686 { 2687 uint8_t round; 2688 int64_t res = (int64_t)a * b; 2689 2690 round = get_round(vxrm, res, 16); 2691 res = (res >> 16) + round; 2692 return ssub64(env, vxrm, c, res); 2693 } 2694 2695 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2696 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2697 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2698 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2699 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2700 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2701 2702 /* Vector Single-Width Scaling Shift Instructions */ 2703 static inline uint8_t 2704 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2705 { 2706 uint8_t round, shift = b & 0x7; 2707 uint8_t res; 2708 2709 round = get_round(vxrm, a, shift); 2710 res = (a >> shift) + round; 2711 return res; 2712 } 2713 static inline uint16_t 2714 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2715 { 2716 uint8_t round, shift = b & 0xf; 2717 uint16_t res; 2718 2719 round = get_round(vxrm, a, shift); 2720 res = (a >> shift) + round; 2721 return res; 2722 } 2723 static inline uint32_t 2724 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2725 { 2726 uint8_t round, shift = b & 0x1f; 2727 uint32_t res; 2728 2729 round = get_round(vxrm, a, shift); 2730 res = (a >> shift) + round; 2731 return res; 2732 } 2733 static inline uint64_t 2734 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2735 { 2736 uint8_t round, shift = b & 0x3f; 2737 uint64_t res; 2738 2739 round = get_round(vxrm, a, shift); 2740 res = (a >> shift) + round; 2741 return res; 2742 } 2743 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2744 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2745 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2746 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2747 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2748 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2749 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2750 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2751 2752 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2753 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2754 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2755 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2756 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2757 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2758 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2759 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2760 2761 static inline int8_t 2762 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2763 { 2764 uint8_t round, shift = b & 0x7; 2765 int8_t res; 2766 2767 round = get_round(vxrm, a, shift); 2768 res = (a >> shift) + round; 2769 return res; 2770 } 2771 static inline int16_t 2772 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2773 { 2774 uint8_t round, shift = b & 0xf; 2775 int16_t res; 2776 2777 round = get_round(vxrm, a, shift); 2778 res = (a >> shift) + round; 2779 return res; 2780 } 2781 static inline int32_t 2782 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2783 { 2784 uint8_t round, shift = b & 0x1f; 2785 int32_t res; 2786 2787 round = get_round(vxrm, a, shift); 2788 res = (a >> shift) + round; 2789 return res; 2790 } 2791 static inline int64_t 2792 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2793 { 2794 uint8_t round, shift = b & 0x3f; 2795 int64_t res; 2796 2797 round = get_round(vxrm, a, shift); 2798 res = (a >> shift) + round; 2799 return res; 2800 } 2801 2802 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2803 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2804 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2805 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2806 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2807 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2808 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2809 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2810 2811 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2812 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2813 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2814 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2815 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2816 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2817 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2818 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2819 2820 /* Vector Narrowing Fixed-Point Clip Instructions */ 2821 static inline int8_t 2822 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2823 { 2824 uint8_t round, shift = b & 0xf; 2825 int16_t res; 2826 2827 round = get_round(vxrm, a, shift); 2828 res = (a >> shift) + round; 2829 if (res > INT8_MAX) { 2830 env->vxsat = 0x1; 2831 return INT8_MAX; 2832 } else if (res < INT8_MIN) { 2833 env->vxsat = 0x1; 2834 return INT8_MIN; 2835 } else { 2836 return res; 2837 } 2838 } 2839 2840 static inline int16_t 2841 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2842 { 2843 uint8_t round, shift = b & 0x1f; 2844 int32_t res; 2845 2846 round = get_round(vxrm, a, shift); 2847 res = (a >> shift) + round; 2848 if (res > INT16_MAX) { 2849 env->vxsat = 0x1; 2850 return INT16_MAX; 2851 } else if (res < INT16_MIN) { 2852 env->vxsat = 0x1; 2853 return INT16_MIN; 2854 } else { 2855 return res; 2856 } 2857 } 2858 2859 static inline int32_t 2860 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2861 { 2862 uint8_t round, shift = b & 0x3f; 2863 int64_t res; 2864 2865 round = get_round(vxrm, a, shift); 2866 res = (a >> shift) + round; 2867 if (res > INT32_MAX) { 2868 env->vxsat = 0x1; 2869 return INT32_MAX; 2870 } else if (res < INT32_MIN) { 2871 env->vxsat = 0x1; 2872 return INT32_MIN; 2873 } else { 2874 return res; 2875 } 2876 } 2877 2878 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2879 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2880 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2881 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2882 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2883 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2884 2885 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2886 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2887 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2888 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2889 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2890 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2891 2892 static inline uint8_t 2893 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2894 { 2895 uint8_t round, shift = b & 0xf; 2896 uint16_t res; 2897 2898 round = get_round(vxrm, a, shift); 2899 res = (a >> shift) + round; 2900 if (res > UINT8_MAX) { 2901 env->vxsat = 0x1; 2902 return UINT8_MAX; 2903 } else { 2904 return res; 2905 } 2906 } 2907 2908 static inline uint16_t 2909 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2910 { 2911 uint8_t round, shift = b & 0x1f; 2912 uint32_t res; 2913 2914 round = get_round(vxrm, a, shift); 2915 res = (a >> shift) + round; 2916 if (res > UINT16_MAX) { 2917 env->vxsat = 0x1; 2918 return UINT16_MAX; 2919 } else { 2920 return res; 2921 } 2922 } 2923 2924 static inline uint32_t 2925 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2926 { 2927 uint8_t round, shift = b & 0x3f; 2928 uint64_t res; 2929 2930 round = get_round(vxrm, a, shift); 2931 res = (a >> shift) + round; 2932 if (res > UINT32_MAX) { 2933 env->vxsat = 0x1; 2934 return UINT32_MAX; 2935 } else { 2936 return res; 2937 } 2938 } 2939 2940 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2941 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2942 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2943 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2944 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2945 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2946 2947 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2948 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2949 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2950 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2951 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2952 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2953 2954 /* 2955 *** Vector Float Point Arithmetic Instructions 2956 */ 2957 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2958 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2959 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2960 CPURISCVState *env) \ 2961 { \ 2962 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2963 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2964 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2965 } 2966 2967 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2968 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2969 void *vs2, CPURISCVState *env, \ 2970 uint32_t desc) \ 2971 { \ 2972 uint32_t vm = vext_vm(desc); \ 2973 uint32_t vl = env->vl; \ 2974 uint32_t i; \ 2975 \ 2976 for (i = 0; i < vl; i++) { \ 2977 if (!vm && !vext_elem_mask(v0, i)) { \ 2978 continue; \ 2979 } \ 2980 do_##NAME(vd, vs1, vs2, i, env); \ 2981 } \ 2982 } 2983 2984 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2985 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2986 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2987 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2988 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2989 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2990 2991 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2992 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2993 CPURISCVState *env) \ 2994 { \ 2995 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2996 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2997 } 2998 2999 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 3000 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3001 void *vs2, CPURISCVState *env, \ 3002 uint32_t desc) \ 3003 { \ 3004 uint32_t vm = vext_vm(desc); \ 3005 uint32_t vl = env->vl; \ 3006 uint32_t i; \ 3007 \ 3008 for (i = 0; i < vl; i++) { \ 3009 if (!vm && !vext_elem_mask(v0, i)) { \ 3010 continue; \ 3011 } \ 3012 do_##NAME(vd, s1, vs2, i, env); \ 3013 } \ 3014 } 3015 3016 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3017 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3018 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3019 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 3020 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 3021 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 3022 3023 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3024 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3025 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3026 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 3027 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 3028 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 3029 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3030 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3031 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3032 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 3033 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 3034 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 3035 3036 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3037 { 3038 return float16_sub(b, a, s); 3039 } 3040 3041 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3042 { 3043 return float32_sub(b, a, s); 3044 } 3045 3046 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3047 { 3048 return float64_sub(b, a, s); 3049 } 3050 3051 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3052 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3053 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3054 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 3055 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 3056 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 3057 3058 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3059 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3060 { 3061 return float32_add(float16_to_float32(a, true, s), 3062 float16_to_float32(b, true, s), s); 3063 } 3064 3065 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3066 { 3067 return float64_add(float32_to_float64(a, s), 3068 float32_to_float64(b, s), s); 3069 3070 } 3071 3072 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3073 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3074 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 3075 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 3076 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3077 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3078 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 3079 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 3080 3081 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3082 { 3083 return float32_sub(float16_to_float32(a, true, s), 3084 float16_to_float32(b, true, s), s); 3085 } 3086 3087 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3088 { 3089 return float64_sub(float32_to_float64(a, s), 3090 float32_to_float64(b, s), s); 3091 3092 } 3093 3094 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3095 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3096 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3097 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3098 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3099 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3100 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3101 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3102 3103 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3104 { 3105 return float32_add(a, float16_to_float32(b, true, s), s); 3106 } 3107 3108 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3109 { 3110 return float64_add(a, float32_to_float64(b, s), s); 3111 } 3112 3113 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3114 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3115 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3116 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3117 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3118 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3119 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3120 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3121 3122 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3123 { 3124 return float32_sub(a, float16_to_float32(b, true, s), s); 3125 } 3126 3127 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3128 { 3129 return float64_sub(a, float32_to_float64(b, s), s); 3130 } 3131 3132 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3133 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3134 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3135 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3136 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3137 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3138 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3139 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3140 3141 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3142 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3143 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3144 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3145 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3146 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3147 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3148 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3149 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3150 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3151 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3152 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3153 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3154 3155 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3156 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3157 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3158 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3159 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3160 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3161 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3162 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3163 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3164 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3165 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3166 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3167 3168 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3169 { 3170 return float16_div(b, a, s); 3171 } 3172 3173 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3174 { 3175 return float32_div(b, a, s); 3176 } 3177 3178 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3179 { 3180 return float64_div(b, a, s); 3181 } 3182 3183 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3184 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3185 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3186 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3187 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3188 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3189 3190 /* Vector Widening Floating-Point Multiply */ 3191 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3192 { 3193 return float32_mul(float16_to_float32(a, true, s), 3194 float16_to_float32(b, true, s), s); 3195 } 3196 3197 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3198 { 3199 return float64_mul(float32_to_float64(a, s), 3200 float32_to_float64(b, s), s); 3201 3202 } 3203 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3204 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3205 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3206 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3207 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3208 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3209 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3210 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3211 3212 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3213 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3214 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3215 CPURISCVState *env) \ 3216 { \ 3217 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3218 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3219 TD d = *((TD *)vd + HD(i)); \ 3220 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3221 } 3222 3223 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3224 { 3225 return float16_muladd(a, b, d, 0, s); 3226 } 3227 3228 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3229 { 3230 return float32_muladd(a, b, d, 0, s); 3231 } 3232 3233 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3234 { 3235 return float64_muladd(a, b, d, 0, s); 3236 } 3237 3238 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3239 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3240 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3241 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3242 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3243 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3244 3245 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3246 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3247 CPURISCVState *env) \ 3248 { \ 3249 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3250 TD d = *((TD *)vd + HD(i)); \ 3251 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3252 } 3253 3254 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3255 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3256 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3257 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3258 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3259 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3260 3261 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3262 { 3263 return float16_muladd(a, b, d, 3264 float_muladd_negate_c | float_muladd_negate_product, s); 3265 } 3266 3267 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3268 { 3269 return float32_muladd(a, b, d, 3270 float_muladd_negate_c | float_muladd_negate_product, s); 3271 } 3272 3273 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3274 { 3275 return float64_muladd(a, b, d, 3276 float_muladd_negate_c | float_muladd_negate_product, s); 3277 } 3278 3279 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3280 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3281 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3282 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3283 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3284 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3285 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3286 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3287 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3288 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3289 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3290 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3291 3292 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3293 { 3294 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3295 } 3296 3297 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3298 { 3299 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3300 } 3301 3302 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3303 { 3304 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3305 } 3306 3307 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3308 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3309 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3310 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3311 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3312 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3313 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3314 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3315 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3316 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3317 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3318 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3319 3320 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3321 { 3322 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3323 } 3324 3325 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3326 { 3327 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3328 } 3329 3330 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3331 { 3332 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3333 } 3334 3335 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3336 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3337 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3338 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3339 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3340 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3341 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3342 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3343 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3344 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3345 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3346 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3347 3348 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3349 { 3350 return float16_muladd(d, b, a, 0, s); 3351 } 3352 3353 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3354 { 3355 return float32_muladd(d, b, a, 0, s); 3356 } 3357 3358 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3359 { 3360 return float64_muladd(d, b, a, 0, s); 3361 } 3362 3363 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3364 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3365 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3366 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3367 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3368 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3369 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3370 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3371 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3372 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3373 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3374 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3375 3376 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3377 { 3378 return float16_muladd(d, b, a, 3379 float_muladd_negate_c | float_muladd_negate_product, s); 3380 } 3381 3382 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3383 { 3384 return float32_muladd(d, b, a, 3385 float_muladd_negate_c | float_muladd_negate_product, s); 3386 } 3387 3388 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3389 { 3390 return float64_muladd(d, b, a, 3391 float_muladd_negate_c | float_muladd_negate_product, s); 3392 } 3393 3394 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3395 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3396 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3397 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3398 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3399 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3400 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3401 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3402 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3403 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3404 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3405 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3406 3407 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3408 { 3409 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3410 } 3411 3412 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3413 { 3414 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3415 } 3416 3417 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3418 { 3419 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3420 } 3421 3422 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3423 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3424 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3425 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3426 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3427 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3428 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3429 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3430 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3431 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3432 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3433 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3434 3435 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3436 { 3437 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3438 } 3439 3440 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3441 { 3442 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3443 } 3444 3445 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3446 { 3447 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3448 } 3449 3450 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3451 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3452 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3453 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3454 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3455 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3456 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3457 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3458 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3459 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3460 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3461 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3462 3463 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3464 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3465 { 3466 return float32_muladd(float16_to_float32(a, true, s), 3467 float16_to_float32(b, true, s), d, 0, s); 3468 } 3469 3470 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3471 { 3472 return float64_muladd(float32_to_float64(a, s), 3473 float32_to_float64(b, s), d, 0, s); 3474 } 3475 3476 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3477 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3478 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3479 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3480 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3481 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3482 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3483 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3484 3485 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3486 { 3487 return float32_muladd(float16_to_float32(a, true, s), 3488 float16_to_float32(b, true, s), d, 3489 float_muladd_negate_c | float_muladd_negate_product, s); 3490 } 3491 3492 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3493 { 3494 return float64_muladd(float32_to_float64(a, s), 3495 float32_to_float64(b, s), d, 3496 float_muladd_negate_c | float_muladd_negate_product, s); 3497 } 3498 3499 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3500 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3501 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3502 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3503 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3504 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3505 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3506 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3507 3508 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3509 { 3510 return float32_muladd(float16_to_float32(a, true, s), 3511 float16_to_float32(b, true, s), d, 3512 float_muladd_negate_c, s); 3513 } 3514 3515 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3516 { 3517 return float64_muladd(float32_to_float64(a, s), 3518 float32_to_float64(b, s), d, 3519 float_muladd_negate_c, s); 3520 } 3521 3522 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3523 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3524 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3525 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3526 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3527 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3528 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3529 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3530 3531 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3532 { 3533 return float32_muladd(float16_to_float32(a, true, s), 3534 float16_to_float32(b, true, s), d, 3535 float_muladd_negate_product, s); 3536 } 3537 3538 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3539 { 3540 return float64_muladd(float32_to_float64(a, s), 3541 float32_to_float64(b, s), d, 3542 float_muladd_negate_product, s); 3543 } 3544 3545 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3546 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3547 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3548 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3549 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3550 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3551 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3552 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3553 3554 /* Vector Floating-Point Square-Root Instruction */ 3555 /* (TD, T2, TX2) */ 3556 #define OP_UU_H uint16_t, uint16_t, uint16_t 3557 #define OP_UU_W uint32_t, uint32_t, uint32_t 3558 #define OP_UU_D uint64_t, uint64_t, uint64_t 3559 3560 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3561 static void do_##NAME(void *vd, void *vs2, int i, \ 3562 CPURISCVState *env) \ 3563 { \ 3564 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3565 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3566 } 3567 3568 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3569 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3570 CPURISCVState *env, uint32_t desc) \ 3571 { \ 3572 uint32_t vm = vext_vm(desc); \ 3573 uint32_t vl = env->vl; \ 3574 uint32_t i; \ 3575 \ 3576 if (vl == 0) { \ 3577 return; \ 3578 } \ 3579 for (i = 0; i < vl; i++) { \ 3580 if (!vm && !vext_elem_mask(v0, i)) { \ 3581 continue; \ 3582 } \ 3583 do_##NAME(vd, vs2, i, env); \ 3584 } \ 3585 } 3586 3587 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3588 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3589 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3590 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3591 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3592 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3593 3594 /* Vector Floating-Point MIN/MAX Instructions */ 3595 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3596 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3597 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3598 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3599 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3600 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3601 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3602 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3603 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3604 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3605 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3606 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3607 3608 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3609 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3610 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3611 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3612 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3613 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3614 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3615 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3616 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3617 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3618 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3619 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3620 3621 /* Vector Floating-Point Sign-Injection Instructions */ 3622 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3623 { 3624 return deposit64(b, 0, 15, a); 3625 } 3626 3627 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3628 { 3629 return deposit64(b, 0, 31, a); 3630 } 3631 3632 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3633 { 3634 return deposit64(b, 0, 63, a); 3635 } 3636 3637 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3638 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3639 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3640 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3641 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3642 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3643 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3644 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3645 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3646 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3647 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3648 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3649 3650 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3651 { 3652 return deposit64(~b, 0, 15, a); 3653 } 3654 3655 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3656 { 3657 return deposit64(~b, 0, 31, a); 3658 } 3659 3660 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3661 { 3662 return deposit64(~b, 0, 63, a); 3663 } 3664 3665 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3666 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3667 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3668 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3669 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3670 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3671 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3672 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3673 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3674 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3675 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3676 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3677 3678 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3679 { 3680 return deposit64(b ^ a, 0, 15, a); 3681 } 3682 3683 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3684 { 3685 return deposit64(b ^ a, 0, 31, a); 3686 } 3687 3688 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3689 { 3690 return deposit64(b ^ a, 0, 63, a); 3691 } 3692 3693 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3694 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3695 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3696 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3697 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3698 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3699 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3700 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3701 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3702 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3703 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3704 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3705 3706 /* Vector Floating-Point Compare Instructions */ 3707 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3708 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3709 CPURISCVState *env, uint32_t desc) \ 3710 { \ 3711 uint32_t vm = vext_vm(desc); \ 3712 uint32_t vl = env->vl; \ 3713 uint32_t i; \ 3714 \ 3715 for (i = 0; i < vl; i++) { \ 3716 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3717 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3718 if (!vm && !vext_elem_mask(v0, i)) { \ 3719 continue; \ 3720 } \ 3721 vext_set_elem_mask(vd, i, \ 3722 DO_OP(s2, s1, &env->fp_status)); \ 3723 } \ 3724 } 3725 3726 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3727 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3728 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3729 3730 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3731 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3732 CPURISCVState *env, uint32_t desc) \ 3733 { \ 3734 uint32_t vm = vext_vm(desc); \ 3735 uint32_t vl = env->vl; \ 3736 uint32_t i; \ 3737 \ 3738 for (i = 0; i < vl; i++) { \ 3739 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3740 if (!vm && !vext_elem_mask(v0, i)) { \ 3741 continue; \ 3742 } \ 3743 vext_set_elem_mask(vd, i, \ 3744 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3745 } \ 3746 } 3747 3748 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3749 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3750 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3751 3752 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3753 { 3754 FloatRelation compare = float16_compare_quiet(a, b, s); 3755 return compare != float_relation_equal; 3756 } 3757 3758 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3759 { 3760 FloatRelation compare = float32_compare_quiet(a, b, s); 3761 return compare != float_relation_equal; 3762 } 3763 3764 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3765 { 3766 FloatRelation compare = float64_compare_quiet(a, b, s); 3767 return compare != float_relation_equal; 3768 } 3769 3770 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3771 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3772 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3773 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3774 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3775 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3776 3777 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3778 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3779 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3780 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3781 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3782 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3783 3784 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3785 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3786 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3787 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3788 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3789 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3790 3791 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3792 { 3793 FloatRelation compare = float16_compare(a, b, s); 3794 return compare == float_relation_greater; 3795 } 3796 3797 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3798 { 3799 FloatRelation compare = float32_compare(a, b, s); 3800 return compare == float_relation_greater; 3801 } 3802 3803 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3804 { 3805 FloatRelation compare = float64_compare(a, b, s); 3806 return compare == float_relation_greater; 3807 } 3808 3809 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3810 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3811 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3812 3813 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3814 { 3815 FloatRelation compare = float16_compare(a, b, s); 3816 return compare == float_relation_greater || 3817 compare == float_relation_equal; 3818 } 3819 3820 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3821 { 3822 FloatRelation compare = float32_compare(a, b, s); 3823 return compare == float_relation_greater || 3824 compare == float_relation_equal; 3825 } 3826 3827 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3828 { 3829 FloatRelation compare = float64_compare(a, b, s); 3830 return compare == float_relation_greater || 3831 compare == float_relation_equal; 3832 } 3833 3834 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3835 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3836 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3837 3838 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3839 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3840 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3841 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3842 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3843 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3844 3845 /* Vector Floating-Point Classify Instruction */ 3846 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3847 static void do_##NAME(void *vd, void *vs2, int i) \ 3848 { \ 3849 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3850 *((TD *)vd + HD(i)) = OP(s2); \ 3851 } 3852 3853 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3854 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3855 CPURISCVState *env, uint32_t desc) \ 3856 { \ 3857 uint32_t vm = vext_vm(desc); \ 3858 uint32_t vl = env->vl; \ 3859 uint32_t i; \ 3860 \ 3861 for (i = 0; i < vl; i++) { \ 3862 if (!vm && !vext_elem_mask(v0, i)) { \ 3863 continue; \ 3864 } \ 3865 do_##NAME(vd, vs2, i); \ 3866 } \ 3867 } 3868 3869 target_ulong fclass_h(uint64_t frs1) 3870 { 3871 float16 f = frs1; 3872 bool sign = float16_is_neg(f); 3873 3874 if (float16_is_infinity(f)) { 3875 return sign ? 1 << 0 : 1 << 7; 3876 } else if (float16_is_zero(f)) { 3877 return sign ? 1 << 3 : 1 << 4; 3878 } else if (float16_is_zero_or_denormal(f)) { 3879 return sign ? 1 << 2 : 1 << 5; 3880 } else if (float16_is_any_nan(f)) { 3881 float_status s = { }; /* for snan_bit_is_one */ 3882 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3883 } else { 3884 return sign ? 1 << 1 : 1 << 6; 3885 } 3886 } 3887 3888 target_ulong fclass_s(uint64_t frs1) 3889 { 3890 float32 f = frs1; 3891 bool sign = float32_is_neg(f); 3892 3893 if (float32_is_infinity(f)) { 3894 return sign ? 1 << 0 : 1 << 7; 3895 } else if (float32_is_zero(f)) { 3896 return sign ? 1 << 3 : 1 << 4; 3897 } else if (float32_is_zero_or_denormal(f)) { 3898 return sign ? 1 << 2 : 1 << 5; 3899 } else if (float32_is_any_nan(f)) { 3900 float_status s = { }; /* for snan_bit_is_one */ 3901 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3902 } else { 3903 return sign ? 1 << 1 : 1 << 6; 3904 } 3905 } 3906 3907 target_ulong fclass_d(uint64_t frs1) 3908 { 3909 float64 f = frs1; 3910 bool sign = float64_is_neg(f); 3911 3912 if (float64_is_infinity(f)) { 3913 return sign ? 1 << 0 : 1 << 7; 3914 } else if (float64_is_zero(f)) { 3915 return sign ? 1 << 3 : 1 << 4; 3916 } else if (float64_is_zero_or_denormal(f)) { 3917 return sign ? 1 << 2 : 1 << 5; 3918 } else if (float64_is_any_nan(f)) { 3919 float_status s = { }; /* for snan_bit_is_one */ 3920 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3921 } else { 3922 return sign ? 1 << 1 : 1 << 6; 3923 } 3924 } 3925 3926 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3927 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3928 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3929 GEN_VEXT_V(vfclass_v_h, 2, 2) 3930 GEN_VEXT_V(vfclass_v_w, 4, 4) 3931 GEN_VEXT_V(vfclass_v_d, 8, 8) 3932 3933 /* Vector Floating-Point Merge Instruction */ 3934 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3935 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3936 CPURISCVState *env, uint32_t desc) \ 3937 { \ 3938 uint32_t vm = vext_vm(desc); \ 3939 uint32_t vl = env->vl; \ 3940 uint32_t i; \ 3941 \ 3942 for (i = 0; i < vl; i++) { \ 3943 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3944 *((ETYPE *)vd + H(i)) \ 3945 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3946 } \ 3947 } 3948 3949 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3950 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3951 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3952 3953 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3954 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3955 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3956 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3957 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3958 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3959 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3960 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3961 3962 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3963 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3964 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3965 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3966 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3967 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3968 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3969 3970 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3971 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3972 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3973 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3974 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3975 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3976 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3977 3978 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3979 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3980 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3981 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3982 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3983 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3984 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3985 3986 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3987 /* (TD, T2, TX2) */ 3988 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3989 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3990 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3991 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3992 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3993 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3994 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3995 3996 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3997 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3998 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3999 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4000 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4001 4002 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4003 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4004 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4005 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4006 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4007 4008 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4009 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4010 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4011 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4012 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4013 4014 /* 4015 * vfwcvt.f.f.v vd, vs2, vm # 4016 * Convert single-width float to double-width float. 4017 */ 4018 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4019 { 4020 return float16_to_float32(a, true, s); 4021 } 4022 4023 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4024 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4025 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4026 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4027 4028 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4029 /* (TD, T2, TX2) */ 4030 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4031 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4032 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4033 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 4034 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 4035 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 4036 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 4037 4038 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4039 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 4040 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 4041 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 4042 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 4043 4044 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4045 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 4046 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4047 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 4048 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 4049 4050 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4051 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4052 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4053 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 4054 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 4055 4056 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4057 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4058 { 4059 return float32_to_float16(a, true, s); 4060 } 4061 4062 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4063 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4064 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 4065 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 4066 4067 /* 4068 *** Vector Reduction Operations 4069 */ 4070 /* Vector Single-Width Integer Reduction Instructions */ 4071 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4072 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4073 void *vs2, CPURISCVState *env, uint32_t desc) \ 4074 { \ 4075 uint32_t vm = vext_vm(desc); \ 4076 uint32_t vl = env->vl; \ 4077 uint32_t i; \ 4078 TD s1 = *((TD *)vs1 + HD(0)); \ 4079 \ 4080 for (i = 0; i < vl; i++) { \ 4081 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4082 if (!vm && !vext_elem_mask(v0, i)) { \ 4083 continue; \ 4084 } \ 4085 s1 = OP(s1, (TD)s2); \ 4086 } \ 4087 *((TD *)vd + HD(0)) = s1; \ 4088 } 4089 4090 /* vd[0] = sum(vs1[0], vs2[*]) */ 4091 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4092 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4093 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4094 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4095 4096 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4097 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4098 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4099 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4100 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4101 4102 /* vd[0] = max(vs1[0], vs2[*]) */ 4103 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4104 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4105 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4106 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4107 4108 /* vd[0] = minu(vs1[0], vs2[*]) */ 4109 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4110 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4111 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4112 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4113 4114 /* vd[0] = min(vs1[0], vs2[*]) */ 4115 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4116 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4117 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4118 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4119 4120 /* vd[0] = and(vs1[0], vs2[*]) */ 4121 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4122 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4123 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4124 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4125 4126 /* vd[0] = or(vs1[0], vs2[*]) */ 4127 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4128 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4129 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4130 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4131 4132 /* vd[0] = xor(vs1[0], vs2[*]) */ 4133 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4134 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4135 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4136 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4137 4138 /* Vector Widening Integer Reduction Instructions */ 4139 /* signed sum reduction into double-width accumulator */ 4140 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4141 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4142 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4143 4144 /* Unsigned sum reduction into double-width accumulator */ 4145 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4146 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4147 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4148 4149 /* Vector Single-Width Floating-Point Reduction Instructions */ 4150 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4151 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4152 void *vs2, CPURISCVState *env, \ 4153 uint32_t desc) \ 4154 { \ 4155 uint32_t vm = vext_vm(desc); \ 4156 uint32_t vl = env->vl; \ 4157 uint32_t i; \ 4158 TD s1 = *((TD *)vs1 + HD(0)); \ 4159 \ 4160 for (i = 0; i < vl; i++) { \ 4161 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4162 if (!vm && !vext_elem_mask(v0, i)) { \ 4163 continue; \ 4164 } \ 4165 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4166 } \ 4167 *((TD *)vd + HD(0)) = s1; \ 4168 } 4169 4170 /* Unordered sum */ 4171 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4172 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4173 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4174 4175 /* Maximum value */ 4176 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4177 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4178 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4179 4180 /* Minimum value */ 4181 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4182 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4183 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4184 4185 /* Vector Widening Floating-Point Reduction Instructions */ 4186 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4187 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4188 void *vs2, CPURISCVState *env, uint32_t desc) 4189 { 4190 uint32_t vm = vext_vm(desc); 4191 uint32_t vl = env->vl; 4192 uint32_t i; 4193 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4194 4195 for (i = 0; i < vl; i++) { 4196 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4197 if (!vm && !vext_elem_mask(v0, i)) { 4198 continue; 4199 } 4200 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4201 &env->fp_status); 4202 } 4203 *((uint32_t *)vd + H4(0)) = s1; 4204 } 4205 4206 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4207 void *vs2, CPURISCVState *env, uint32_t desc) 4208 { 4209 uint32_t vm = vext_vm(desc); 4210 uint32_t vl = env->vl; 4211 uint32_t i; 4212 uint64_t s1 = *((uint64_t *)vs1); 4213 4214 for (i = 0; i < vl; i++) { 4215 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4216 if (!vm && !vext_elem_mask(v0, i)) { 4217 continue; 4218 } 4219 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4220 &env->fp_status); 4221 } 4222 *((uint64_t *)vd) = s1; 4223 } 4224 4225 /* 4226 *** Vector Mask Operations 4227 */ 4228 /* Vector Mask-Register Logical Instructions */ 4229 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4230 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4231 void *vs2, CPURISCVState *env, \ 4232 uint32_t desc) \ 4233 { \ 4234 uint32_t vl = env->vl; \ 4235 uint32_t i; \ 4236 int a, b; \ 4237 \ 4238 for (i = 0; i < vl; i++) { \ 4239 a = vext_elem_mask(vs1, i); \ 4240 b = vext_elem_mask(vs2, i); \ 4241 vext_set_elem_mask(vd, i, OP(b, a)); \ 4242 } \ 4243 } 4244 4245 #define DO_NAND(N, M) (!(N & M)) 4246 #define DO_ANDNOT(N, M) (N & !M) 4247 #define DO_NOR(N, M) (!(N | M)) 4248 #define DO_ORNOT(N, M) (N | !M) 4249 #define DO_XNOR(N, M) (!(N ^ M)) 4250 4251 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4252 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4253 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4254 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4255 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4256 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4257 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4258 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4259 4260 /* Vector count population in mask vcpop */ 4261 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4262 uint32_t desc) 4263 { 4264 target_ulong cnt = 0; 4265 uint32_t vm = vext_vm(desc); 4266 uint32_t vl = env->vl; 4267 int i; 4268 4269 for (i = 0; i < vl; i++) { 4270 if (vm || vext_elem_mask(v0, i)) { 4271 if (vext_elem_mask(vs2, i)) { 4272 cnt++; 4273 } 4274 } 4275 } 4276 return cnt; 4277 } 4278 4279 /* vfirst find-first-set mask bit*/ 4280 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4281 uint32_t desc) 4282 { 4283 uint32_t vm = vext_vm(desc); 4284 uint32_t vl = env->vl; 4285 int i; 4286 4287 for (i = 0; i < vl; i++) { 4288 if (vm || vext_elem_mask(v0, i)) { 4289 if (vext_elem_mask(vs2, i)) { 4290 return i; 4291 } 4292 } 4293 } 4294 return -1LL; 4295 } 4296 4297 enum set_mask_type { 4298 ONLY_FIRST = 1, 4299 INCLUDE_FIRST, 4300 BEFORE_FIRST, 4301 }; 4302 4303 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4304 uint32_t desc, enum set_mask_type type) 4305 { 4306 uint32_t vm = vext_vm(desc); 4307 uint32_t vl = env->vl; 4308 int i; 4309 bool first_mask_bit = false; 4310 4311 for (i = 0; i < vl; i++) { 4312 if (!vm && !vext_elem_mask(v0, i)) { 4313 continue; 4314 } 4315 /* write a zero to all following active elements */ 4316 if (first_mask_bit) { 4317 vext_set_elem_mask(vd, i, 0); 4318 continue; 4319 } 4320 if (vext_elem_mask(vs2, i)) { 4321 first_mask_bit = true; 4322 if (type == BEFORE_FIRST) { 4323 vext_set_elem_mask(vd, i, 0); 4324 } else { 4325 vext_set_elem_mask(vd, i, 1); 4326 } 4327 } else { 4328 if (type == ONLY_FIRST) { 4329 vext_set_elem_mask(vd, i, 0); 4330 } else { 4331 vext_set_elem_mask(vd, i, 1); 4332 } 4333 } 4334 } 4335 } 4336 4337 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4338 uint32_t desc) 4339 { 4340 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4341 } 4342 4343 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4344 uint32_t desc) 4345 { 4346 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4347 } 4348 4349 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4350 uint32_t desc) 4351 { 4352 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4353 } 4354 4355 /* Vector Iota Instruction */ 4356 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4357 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4358 uint32_t desc) \ 4359 { \ 4360 uint32_t vm = vext_vm(desc); \ 4361 uint32_t vl = env->vl; \ 4362 uint32_t sum = 0; \ 4363 int i; \ 4364 \ 4365 for (i = 0; i < vl; i++) { \ 4366 if (!vm && !vext_elem_mask(v0, i)) { \ 4367 continue; \ 4368 } \ 4369 *((ETYPE *)vd + H(i)) = sum; \ 4370 if (vext_elem_mask(vs2, i)) { \ 4371 sum++; \ 4372 } \ 4373 } \ 4374 } 4375 4376 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4377 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4378 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4379 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4380 4381 /* Vector Element Index Instruction */ 4382 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4383 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4384 { \ 4385 uint32_t vm = vext_vm(desc); \ 4386 uint32_t vl = env->vl; \ 4387 int i; \ 4388 \ 4389 for (i = 0; i < vl; i++) { \ 4390 if (!vm && !vext_elem_mask(v0, i)) { \ 4391 continue; \ 4392 } \ 4393 *((ETYPE *)vd + H(i)) = i; \ 4394 } \ 4395 } 4396 4397 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4398 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4399 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4400 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4401 4402 /* 4403 *** Vector Permutation Instructions 4404 */ 4405 4406 /* Vector Slide Instructions */ 4407 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4408 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4409 CPURISCVState *env, uint32_t desc) \ 4410 { \ 4411 uint32_t vm = vext_vm(desc); \ 4412 uint32_t vl = env->vl; \ 4413 target_ulong offset = s1, i; \ 4414 \ 4415 for (i = offset; i < vl; i++) { \ 4416 if (!vm && !vext_elem_mask(v0, i)) { \ 4417 continue; \ 4418 } \ 4419 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4420 } \ 4421 } 4422 4423 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4424 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4425 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4426 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4427 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4428 4429 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4430 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4431 CPURISCVState *env, uint32_t desc) \ 4432 { \ 4433 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4434 uint32_t vm = vext_vm(desc); \ 4435 uint32_t vl = env->vl; \ 4436 target_ulong i_max, i; \ 4437 \ 4438 i_max = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 4439 for (i = 0; i < i_max; ++i) { \ 4440 if (vm || vext_elem_mask(v0, i)) { \ 4441 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4442 } \ 4443 } \ 4444 \ 4445 for (i = i_max; i < vl; ++i) { \ 4446 if (vm || vext_elem_mask(v0, i)) { \ 4447 *((ETYPE *)vd + H(i)) = 0; \ 4448 } \ 4449 } \ 4450 } 4451 4452 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4453 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4454 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4455 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4456 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4457 4458 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4459 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4460 CPURISCVState *env, uint32_t desc) \ 4461 { \ 4462 typedef uint##ESZ##_t ETYPE; \ 4463 uint32_t vm = vext_vm(desc); \ 4464 uint32_t vl = env->vl; \ 4465 uint32_t i; \ 4466 \ 4467 for (i = 0; i < vl; i++) { \ 4468 if (!vm && !vext_elem_mask(v0, i)) { \ 4469 continue; \ 4470 } \ 4471 if (i == 0) { \ 4472 *((ETYPE *)vd + H(i)) = s1; \ 4473 } else { \ 4474 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4475 } \ 4476 } \ 4477 } 4478 4479 GEN_VEXT_VSLIE1UP(8, H1) 4480 GEN_VEXT_VSLIE1UP(16, H2) 4481 GEN_VEXT_VSLIE1UP(32, H4) 4482 GEN_VEXT_VSLIE1UP(64, H8) 4483 4484 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4485 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4486 CPURISCVState *env, uint32_t desc) \ 4487 { \ 4488 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4489 } 4490 4491 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4492 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4493 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4494 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4495 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4496 4497 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4498 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4499 CPURISCVState *env, uint32_t desc) \ 4500 { \ 4501 typedef uint##ESZ##_t ETYPE; \ 4502 uint32_t vm = vext_vm(desc); \ 4503 uint32_t vl = env->vl; \ 4504 uint32_t i; \ 4505 \ 4506 for (i = 0; i < vl; i++) { \ 4507 if (!vm && !vext_elem_mask(v0, i)) { \ 4508 continue; \ 4509 } \ 4510 if (i == vl - 1) { \ 4511 *((ETYPE *)vd + H(i)) = s1; \ 4512 } else { \ 4513 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4514 } \ 4515 } \ 4516 } 4517 4518 GEN_VEXT_VSLIDE1DOWN(8, H1) 4519 GEN_VEXT_VSLIDE1DOWN(16, H2) 4520 GEN_VEXT_VSLIDE1DOWN(32, H4) 4521 GEN_VEXT_VSLIDE1DOWN(64, H8) 4522 4523 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4524 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4525 CPURISCVState *env, uint32_t desc) \ 4526 { \ 4527 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4528 } 4529 4530 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4531 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4532 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4533 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4534 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4535 4536 /* Vector Floating-Point Slide Instructions */ 4537 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4538 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4539 CPURISCVState *env, uint32_t desc) \ 4540 { \ 4541 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4542 } 4543 4544 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4545 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4546 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4547 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4548 4549 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4550 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4551 CPURISCVState *env, uint32_t desc) \ 4552 { \ 4553 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4554 } 4555 4556 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4557 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4558 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4559 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4560 4561 /* Vector Register Gather Instruction */ 4562 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4563 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4564 CPURISCVState *env, uint32_t desc) \ 4565 { \ 4566 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS1))); \ 4567 uint32_t vm = vext_vm(desc); \ 4568 uint32_t vl = env->vl; \ 4569 uint64_t index; \ 4570 uint32_t i; \ 4571 \ 4572 for (i = 0; i < vl; i++) { \ 4573 if (!vm && !vext_elem_mask(v0, i)) { \ 4574 continue; \ 4575 } \ 4576 index = *((TS1 *)vs1 + HS1(i)); \ 4577 if (index >= vlmax) { \ 4578 *((TS2 *)vd + HS2(i)) = 0; \ 4579 } else { \ 4580 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4581 } \ 4582 } \ 4583 } 4584 4585 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4586 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4587 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4588 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4589 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4590 4591 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4592 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4593 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4594 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4595 4596 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4597 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4598 CPURISCVState *env, uint32_t desc) \ 4599 { \ 4600 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4601 uint32_t vm = vext_vm(desc); \ 4602 uint32_t vl = env->vl; \ 4603 uint64_t index = s1; \ 4604 uint32_t i; \ 4605 \ 4606 for (i = 0; i < vl; i++) { \ 4607 if (!vm && !vext_elem_mask(v0, i)) { \ 4608 continue; \ 4609 } \ 4610 if (index >= vlmax) { \ 4611 *((ETYPE *)vd + H(i)) = 0; \ 4612 } else { \ 4613 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4614 } \ 4615 } \ 4616 } 4617 4618 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4619 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4620 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4621 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4622 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4623 4624 /* Vector Compress Instruction */ 4625 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4626 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4627 CPURISCVState *env, uint32_t desc) \ 4628 { \ 4629 uint32_t vl = env->vl; \ 4630 uint32_t num = 0, i; \ 4631 \ 4632 for (i = 0; i < vl; i++) { \ 4633 if (!vext_elem_mask(vs1, i)) { \ 4634 continue; \ 4635 } \ 4636 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4637 num++; \ 4638 } \ 4639 } 4640 4641 /* Compress into vd elements of vs2 where vs1 is enabled */ 4642 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4643 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4644 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4645 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4646 4647 /* Vector Integer Extension */ 4648 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4649 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4650 CPURISCVState *env, uint32_t desc) \ 4651 { \ 4652 uint32_t vl = env->vl; \ 4653 uint32_t vm = vext_vm(desc); \ 4654 uint32_t i; \ 4655 \ 4656 for (i = 0; i < vl; i++) { \ 4657 if (!vm && !vext_elem_mask(v0, i)) { \ 4658 continue; \ 4659 } \ 4660 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4661 } \ 4662 } 4663 4664 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4665 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4666 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4667 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4668 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4669 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4670 4671 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4672 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4673 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4674 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4675 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4676 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4677