1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "cpu.h" 22 #include "exec/memop.h" 23 #include "exec/exec-all.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "tcg/tcg-gvec-desc.h" 27 #include "internals.h" 28 #include <math.h> 29 30 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 31 target_ulong s2) 32 { 33 int vlmax, vl; 34 RISCVCPU *cpu = env_archcpu(env); 35 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 36 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 37 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 38 bool vill = FIELD_EX64(s2, VTYPE, VILL); 39 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 40 41 if (lmul & 4) { 42 /* Fractional LMUL. */ 43 if (lmul == 4 || 44 cpu->cfg.elen >> (8 - lmul) < sew) { 45 vill = true; 46 } 47 } 48 49 if ((sew > cpu->cfg.elen) 50 || vill 51 || (ediv != 0) 52 || (reserved != 0)) { 53 /* only set vill bit. */ 54 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 55 env->vl = 0; 56 env->vstart = 0; 57 return 0; 58 } 59 60 vlmax = vext_get_vlmax(cpu, s2); 61 if (s1 <= vlmax) { 62 vl = s1; 63 } else { 64 vl = vlmax; 65 } 66 env->vl = vl; 67 env->vtype = s2; 68 env->vstart = 0; 69 return vl; 70 } 71 72 /* 73 * Note that vector data is stored in host-endian 64-bit chunks, 74 * so addressing units smaller than that needs a host-endian fixup. 75 */ 76 #ifdef HOST_WORDS_BIGENDIAN 77 #define H1(x) ((x) ^ 7) 78 #define H1_2(x) ((x) ^ 6) 79 #define H1_4(x) ((x) ^ 4) 80 #define H2(x) ((x) ^ 3) 81 #define H4(x) ((x) ^ 1) 82 #define H8(x) ((x)) 83 #else 84 #define H1(x) (x) 85 #define H1_2(x) (x) 86 #define H1_4(x) (x) 87 #define H2(x) (x) 88 #define H4(x) (x) 89 #define H8(x) (x) 90 #endif 91 92 static inline uint32_t vext_nf(uint32_t desc) 93 { 94 return FIELD_EX32(simd_data(desc), VDATA, NF); 95 } 96 97 static inline uint32_t vext_vm(uint32_t desc) 98 { 99 return FIELD_EX32(simd_data(desc), VDATA, VM); 100 } 101 102 /* 103 * Encode LMUL to lmul as following: 104 * LMUL vlmul lmul 105 * 1 000 0 106 * 2 001 1 107 * 4 010 2 108 * 8 011 3 109 * - 100 - 110 * 1/8 101 -3 111 * 1/4 110 -2 112 * 1/2 111 -1 113 */ 114 static inline int32_t vext_lmul(uint32_t desc) 115 { 116 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 117 } 118 119 /* 120 * Get the maximum number of elements can be operated. 121 * 122 * esz: log2 of element size in bytes. 123 */ 124 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 125 { 126 /* 127 * As simd_desc support at most 256 bytes, the max vlen is 256 bits. 128 * so vlen in bytes (vlenb) is encoded as maxsz. 129 */ 130 uint32_t vlenb = simd_maxsz(desc); 131 132 /* Return VLMAX */ 133 int scale = vext_lmul(desc) - esz; 134 return scale < 0 ? vlenb >> -scale : vlenb << scale; 135 } 136 137 /* 138 * This function checks watchpoint before real load operation. 139 * 140 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 141 * In user mode, there is no watchpoint support now. 142 * 143 * It will trigger an exception if there is no mapping in TLB 144 * and page table walk can't fill the TLB entry. Then the guest 145 * software can return here after process the exception or never return. 146 */ 147 static void probe_pages(CPURISCVState *env, target_ulong addr, 148 target_ulong len, uintptr_t ra, 149 MMUAccessType access_type) 150 { 151 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 152 target_ulong curlen = MIN(pagelen, len); 153 154 probe_access(env, addr, curlen, access_type, 155 cpu_mmu_index(env, false), ra); 156 if (len > curlen) { 157 addr += curlen; 158 curlen = len - curlen; 159 probe_access(env, addr, curlen, access_type, 160 cpu_mmu_index(env, false), ra); 161 } 162 } 163 164 static inline void vext_set_elem_mask(void *v0, int index, 165 uint8_t value) 166 { 167 int idx = index / 64; 168 int pos = index % 64; 169 uint64_t old = ((uint64_t *)v0)[idx]; 170 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 171 } 172 173 /* 174 * Earlier designs (pre-0.9) had a varying number of bits 175 * per mask value (MLEN). In the 0.9 design, MLEN=1. 176 * (Section 4.5) 177 */ 178 static inline int vext_elem_mask(void *v0, int index) 179 { 180 int idx = index / 64; 181 int pos = index % 64; 182 return (((uint64_t *)v0)[idx] >> pos) & 1; 183 } 184 185 /* elements operations for load and store */ 186 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 187 uint32_t idx, void *vd, uintptr_t retaddr); 188 189 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 190 static void NAME(CPURISCVState *env, abi_ptr addr, \ 191 uint32_t idx, void *vd, uintptr_t retaddr)\ 192 { \ 193 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 194 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 195 } \ 196 197 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 198 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 199 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 200 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 201 202 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 203 static void NAME(CPURISCVState *env, abi_ptr addr, \ 204 uint32_t idx, void *vd, uintptr_t retaddr)\ 205 { \ 206 ETYPE data = *((ETYPE *)vd + H(idx)); \ 207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 208 } 209 210 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 211 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 212 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 213 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 214 215 /* 216 *** stride: access vector element from strided memory 217 */ 218 static void 219 vext_ldst_stride(void *vd, void *v0, target_ulong base, 220 target_ulong stride, CPURISCVState *env, 221 uint32_t desc, uint32_t vm, 222 vext_ldst_elem_fn *ldst_elem, 223 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 224 { 225 uint32_t i, k; 226 uint32_t nf = vext_nf(desc); 227 uint32_t max_elems = vext_max_elems(desc, esz); 228 229 /* probe every access*/ 230 for (i = 0; i < env->vl; i++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 probe_pages(env, base + stride * i, nf << esz, ra, access_type); 235 } 236 /* do real access */ 237 for (i = 0; i < env->vl; i++) { 238 k = 0; 239 if (!vm && !vext_elem_mask(v0, i)) { 240 continue; 241 } 242 while (k < nf) { 243 target_ulong addr = base + stride * i + (k << esz); 244 ldst_elem(env, addr, i + k * max_elems, vd, ra); 245 k++; 246 } 247 } 248 } 249 250 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 251 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 252 target_ulong stride, CPURISCVState *env, \ 253 uint32_t desc) \ 254 { \ 255 uint32_t vm = vext_vm(desc); \ 256 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 257 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 258 } 259 260 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 261 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 262 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 263 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 264 265 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 266 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 267 target_ulong stride, CPURISCVState *env, \ 268 uint32_t desc) \ 269 { \ 270 uint32_t vm = vext_vm(desc); \ 271 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 272 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 273 } 274 275 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 276 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 277 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 278 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 279 280 /* 281 *** unit-stride: access elements stored contiguously in memory 282 */ 283 284 /* unmasked unit-stride load and store operation*/ 285 static void 286 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 287 vext_ldst_elem_fn *ldst_elem, 288 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 289 { 290 uint32_t i, k; 291 uint32_t nf = vext_nf(desc); 292 uint32_t max_elems = vext_max_elems(desc, esz); 293 294 /* probe every access */ 295 probe_pages(env, base, env->vl * (nf << esz), ra, access_type); 296 /* load bytes from guest memory */ 297 for (i = 0; i < env->vl; i++) { 298 k = 0; 299 while (k < nf) { 300 target_ulong addr = base + ((i * nf + k) << esz); 301 ldst_elem(env, addr, i + k * max_elems, vd, ra); 302 k++; 303 } 304 } 305 } 306 307 /* 308 * masked unit-stride load and store operation will be a special case of stride, 309 * stride = NF * sizeof (MTYPE) 310 */ 311 312 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 313 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 317 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 318 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 319 } \ 320 \ 321 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 322 CPURISCVState *env, uint32_t desc) \ 323 { \ 324 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 325 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 326 } 327 328 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 329 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 330 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 331 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 332 333 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 334 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 335 CPURISCVState *env, uint32_t desc) \ 336 { \ 337 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 338 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 339 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 340 } \ 341 \ 342 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 343 CPURISCVState *env, uint32_t desc) \ 344 { \ 345 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 346 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 347 } 348 349 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 350 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 351 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 352 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 353 354 /* 355 *** index: access vector element from indexed memory 356 */ 357 typedef target_ulong vext_get_index_addr(target_ulong base, 358 uint32_t idx, void *vs2); 359 360 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 361 static target_ulong NAME(target_ulong base, \ 362 uint32_t idx, void *vs2) \ 363 { \ 364 return (base + *((ETYPE *)vs2 + H(idx))); \ 365 } 366 367 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 368 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 369 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 370 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 371 372 static inline void 373 vext_ldst_index(void *vd, void *v0, target_ulong base, 374 void *vs2, CPURISCVState *env, uint32_t desc, 375 vext_get_index_addr get_index_addr, 376 vext_ldst_elem_fn *ldst_elem, 377 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 378 { 379 uint32_t i, k; 380 uint32_t nf = vext_nf(desc); 381 uint32_t vm = vext_vm(desc); 382 uint32_t max_elems = vext_max_elems(desc, esz); 383 384 /* probe every access*/ 385 for (i = 0; i < env->vl; i++) { 386 if (!vm && !vext_elem_mask(v0, i)) { 387 continue; 388 } 389 probe_pages(env, get_index_addr(base, i, vs2), nf << esz, ra, 390 access_type); 391 } 392 /* load bytes from guest memory */ 393 for (i = 0; i < env->vl; i++) { 394 k = 0; 395 if (!vm && !vext_elem_mask(v0, i)) { 396 continue; 397 } 398 while (k < nf) { 399 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 400 ldst_elem(env, addr, i + k * max_elems, vd, ra); 401 k++; 402 } 403 } 404 } 405 406 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 407 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 408 void *vs2, CPURISCVState *env, uint32_t desc) \ 409 { \ 410 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 411 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 412 } 413 414 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 418 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 419 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 420 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 421 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 422 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 423 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 424 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 425 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 426 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 427 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 428 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 429 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 430 431 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 432 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 433 void *vs2, CPURISCVState *env, uint32_t desc) \ 434 { \ 435 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 436 STORE_FN, ctzl(sizeof(ETYPE)), \ 437 GETPC(), MMU_DATA_STORE); \ 438 } 439 440 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 444 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 445 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 446 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 447 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 448 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 449 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 450 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 451 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 452 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 453 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 454 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 455 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 456 457 /* 458 *** unit-stride fault-only-fisrt load instructions 459 */ 460 static inline void 461 vext_ldff(void *vd, void *v0, target_ulong base, 462 CPURISCVState *env, uint32_t desc, 463 vext_ldst_elem_fn *ldst_elem, 464 uint32_t esz, uintptr_t ra) 465 { 466 void *host; 467 uint32_t i, k, vl = 0; 468 uint32_t nf = vext_nf(desc); 469 uint32_t vm = vext_vm(desc); 470 uint32_t max_elems = vext_max_elems(desc, esz); 471 target_ulong addr, offset, remain; 472 473 /* probe every access*/ 474 for (i = 0; i < env->vl; i++) { 475 if (!vm && !vext_elem_mask(v0, i)) { 476 continue; 477 } 478 addr = base + i * (nf << esz); 479 if (i == 0) { 480 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 481 } else { 482 /* if it triggers an exception, no need to check watchpoint */ 483 remain = nf << esz; 484 while (remain > 0) { 485 offset = -(addr | TARGET_PAGE_MASK); 486 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 487 cpu_mmu_index(env, false)); 488 if (host) { 489 #ifdef CONFIG_USER_ONLY 490 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 491 vl = i; 492 goto ProbeSuccess; 493 } 494 #else 495 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 496 #endif 497 } else { 498 vl = i; 499 goto ProbeSuccess; 500 } 501 if (remain <= offset) { 502 break; 503 } 504 remain -= offset; 505 addr += offset; 506 } 507 } 508 } 509 ProbeSuccess: 510 /* load bytes from guest memory */ 511 if (vl != 0) { 512 env->vl = vl; 513 } 514 for (i = 0; i < env->vl; i++) { 515 k = 0; 516 if (!vm && !vext_elem_mask(v0, i)) { 517 continue; 518 } 519 while (k < nf) { 520 target_ulong addr = base + ((i * nf + k) << esz); 521 ldst_elem(env, addr, i + k * max_elems, vd, ra); 522 k++; 523 } 524 } 525 } 526 527 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 528 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 529 CPURISCVState *env, uint32_t desc) \ 530 { \ 531 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 532 ctzl(sizeof(ETYPE)), GETPC()); \ 533 } 534 535 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 536 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 537 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 538 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 539 540 #define DO_SWAP(N, M) (M) 541 #define DO_AND(N, M) (N & M) 542 #define DO_XOR(N, M) (N ^ M) 543 #define DO_OR(N, M) (N | M) 544 #define DO_ADD(N, M) (N + M) 545 546 /* Signed min/max */ 547 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 548 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 549 550 /* Unsigned min/max */ 551 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 552 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 553 554 /* 555 *** load and store whole register instructions 556 */ 557 static void 558 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 559 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 560 MMUAccessType access_type) 561 { 562 uint32_t i, k; 563 uint32_t nf = vext_nf(desc); 564 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 565 uint32_t max_elems = vlenb >> esz; 566 567 /* probe every access */ 568 probe_pages(env, base, vlenb * nf, ra, access_type); 569 570 /* load bytes from guest memory */ 571 for (k = 0; k < nf; k++) { 572 for (i = 0; i < max_elems; i++) { 573 target_ulong addr = base + ((i + k * max_elems) << esz); 574 ldst_elem(env, addr, i + k * max_elems, vd, ra); 575 } 576 } 577 } 578 579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 580 void HELPER(NAME)(void *vd, target_ulong base, \ 581 CPURISCVState *env, uint32_t desc) \ 582 { \ 583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 584 ctzl(sizeof(ETYPE)), GETPC(), \ 585 MMU_DATA_LOAD); \ 586 } 587 588 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 592 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 596 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 604 605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 606 void HELPER(NAME)(void *vd, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 610 ctzl(sizeof(ETYPE)), GETPC(), \ 611 MMU_DATA_STORE); \ 612 } 613 614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 618 619 /* 620 *** Vector Integer Arithmetic Instructions 621 */ 622 623 /* expand macro args before macro */ 624 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 625 626 /* (TD, T1, T2, TX1, TX2) */ 627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 /* operation of two vector elements */ 659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 660 661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 663 { \ 664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 666 *((TD *)vd + HD(i)) = OP(s2, s1); \ 667 } 668 #define DO_SUB(N, M) (N - M) 669 #define DO_RSUB(N, M) (M - N) 670 671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 679 680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 681 CPURISCVState *env, uint32_t desc, 682 uint32_t esz, uint32_t dsz, 683 opivv2_fn *fn) 684 { 685 uint32_t vm = vext_vm(desc); 686 uint32_t vl = env->vl; 687 uint32_t i; 688 689 for (i = 0; i < vl; i++) { 690 if (!vm && !vext_elem_mask(v0, i)) { 691 continue; 692 } 693 fn(vd, vs1, vs2, i); 694 } 695 } 696 697 /* generate the helpers for OPIVV */ 698 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 699 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 700 void *vs2, CPURISCVState *env, \ 701 uint32_t desc) \ 702 { \ 703 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 704 do_##NAME); \ 705 } 706 707 GEN_VEXT_VV(vadd_vv_b, 1, 1) 708 GEN_VEXT_VV(vadd_vv_h, 2, 2) 709 GEN_VEXT_VV(vadd_vv_w, 4, 4) 710 GEN_VEXT_VV(vadd_vv_d, 8, 8) 711 GEN_VEXT_VV(vsub_vv_b, 1, 1) 712 GEN_VEXT_VV(vsub_vv_h, 2, 2) 713 GEN_VEXT_VV(vsub_vv_w, 4, 4) 714 GEN_VEXT_VV(vsub_vv_d, 8, 8) 715 716 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 717 718 /* 719 * (T1)s1 gives the real operator type. 720 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 721 */ 722 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 723 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 724 { \ 725 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 727 } 728 729 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 730 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 731 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 732 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 733 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 734 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 735 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 736 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 737 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 738 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 739 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 740 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 741 742 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 743 CPURISCVState *env, uint32_t desc, 744 uint32_t esz, uint32_t dsz, 745 opivx2_fn fn) 746 { 747 uint32_t vm = vext_vm(desc); 748 uint32_t vl = env->vl; 749 uint32_t i; 750 751 for (i = 0; i < vl; i++) { 752 if (!vm && !vext_elem_mask(v0, i)) { 753 continue; 754 } 755 fn(vd, s1, vs2, i); 756 } 757 } 758 759 /* generate the helpers for OPIVX */ 760 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 761 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 762 void *vs2, CPURISCVState *env, \ 763 uint32_t desc) \ 764 { \ 765 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 766 do_##NAME); \ 767 } 768 769 GEN_VEXT_VX(vadd_vx_b, 1, 1) 770 GEN_VEXT_VX(vadd_vx_h, 2, 2) 771 GEN_VEXT_VX(vadd_vx_w, 4, 4) 772 GEN_VEXT_VX(vadd_vx_d, 8, 8) 773 GEN_VEXT_VX(vsub_vx_b, 1, 1) 774 GEN_VEXT_VX(vsub_vx_h, 2, 2) 775 GEN_VEXT_VX(vsub_vx_w, 4, 4) 776 GEN_VEXT_VX(vsub_vx_d, 8, 8) 777 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 778 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 779 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 780 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 781 782 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 783 { 784 intptr_t oprsz = simd_oprsz(desc); 785 intptr_t i; 786 787 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 788 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 789 } 790 } 791 792 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 793 { 794 intptr_t oprsz = simd_oprsz(desc); 795 intptr_t i; 796 797 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 798 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 799 } 800 } 801 802 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 803 { 804 intptr_t oprsz = simd_oprsz(desc); 805 intptr_t i; 806 807 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 808 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 809 } 810 } 811 812 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 813 { 814 intptr_t oprsz = simd_oprsz(desc); 815 intptr_t i; 816 817 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 818 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 819 } 820 } 821 822 /* Vector Widening Integer Add/Subtract */ 823 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 824 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 825 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 826 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 827 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 828 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 829 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 830 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 831 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 832 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 833 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 834 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 835 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 836 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 837 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 838 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 839 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 840 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 841 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 842 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 843 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 844 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 845 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 846 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 847 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 848 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 849 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 850 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 851 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 852 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 853 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 854 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 855 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 856 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 857 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 858 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 859 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 860 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 861 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 862 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 863 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 864 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 865 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 866 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 867 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 868 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 869 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 870 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 871 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 872 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 873 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 874 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 875 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 876 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 877 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 878 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 879 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 880 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 881 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 882 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 883 884 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 885 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 886 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 887 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 888 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 889 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 890 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 891 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 892 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 893 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 894 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 895 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 896 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 897 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 898 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 899 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 900 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 901 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 902 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 903 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 904 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 905 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 906 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 907 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 908 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 909 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 910 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 911 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 912 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 913 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 914 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 915 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 916 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 917 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 918 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 919 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 920 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 921 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 922 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 923 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 924 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 925 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 926 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 927 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 928 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 929 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 930 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 931 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 932 933 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 934 #define DO_VADC(N, M, C) (N + M + C) 935 #define DO_VSBC(N, M, C) (N - M - C) 936 937 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 938 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 939 CPURISCVState *env, uint32_t desc) \ 940 { \ 941 uint32_t vl = env->vl; \ 942 uint32_t i; \ 943 \ 944 for (i = 0; i < vl; i++) { \ 945 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 946 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 947 ETYPE carry = vext_elem_mask(v0, i); \ 948 \ 949 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 950 } \ 951 } 952 953 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 954 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 955 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 956 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 957 958 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 959 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 960 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 961 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 962 963 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 964 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 965 CPURISCVState *env, uint32_t desc) \ 966 { \ 967 uint32_t vl = env->vl; \ 968 uint32_t i; \ 969 \ 970 for (i = 0; i < vl; i++) { \ 971 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 972 ETYPE carry = vext_elem_mask(v0, i); \ 973 \ 974 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 975 } \ 976 } 977 978 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 979 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 980 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 981 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 982 983 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 984 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 985 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 986 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 987 988 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 989 (__typeof(N))(N + M) < N) 990 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 991 992 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 993 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 994 CPURISCVState *env, uint32_t desc) \ 995 { \ 996 uint32_t vl = env->vl; \ 997 uint32_t vm = vext_vm(desc); \ 998 uint32_t i; \ 999 \ 1000 for (i = 0; i < vl; i++) { \ 1001 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1002 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1003 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1004 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1005 } \ 1006 } 1007 1008 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1009 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1010 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1011 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1012 1013 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1014 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1015 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1016 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1017 1018 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1019 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1020 void *vs2, CPURISCVState *env, uint32_t desc) \ 1021 { \ 1022 uint32_t vl = env->vl; \ 1023 uint32_t vm = vext_vm(desc); \ 1024 uint32_t i; \ 1025 \ 1026 for (i = 0; i < vl; i++) { \ 1027 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1028 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1029 vext_set_elem_mask(vd, i, \ 1030 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1031 } \ 1032 } 1033 1034 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1035 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1036 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1037 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1038 1039 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1040 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1041 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1042 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1043 1044 /* Vector Bitwise Logical Instructions */ 1045 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1046 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1047 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1048 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1049 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1050 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1051 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1052 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1053 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1054 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1055 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1056 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1057 GEN_VEXT_VV(vand_vv_b, 1, 1) 1058 GEN_VEXT_VV(vand_vv_h, 2, 2) 1059 GEN_VEXT_VV(vand_vv_w, 4, 4) 1060 GEN_VEXT_VV(vand_vv_d, 8, 8) 1061 GEN_VEXT_VV(vor_vv_b, 1, 1) 1062 GEN_VEXT_VV(vor_vv_h, 2, 2) 1063 GEN_VEXT_VV(vor_vv_w, 4, 4) 1064 GEN_VEXT_VV(vor_vv_d, 8, 8) 1065 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1066 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1067 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1068 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1069 1070 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1071 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1072 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1073 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1074 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1075 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1076 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1077 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1078 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1079 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1080 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1081 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1082 GEN_VEXT_VX(vand_vx_b, 1, 1) 1083 GEN_VEXT_VX(vand_vx_h, 2, 2) 1084 GEN_VEXT_VX(vand_vx_w, 4, 4) 1085 GEN_VEXT_VX(vand_vx_d, 8, 8) 1086 GEN_VEXT_VX(vor_vx_b, 1, 1) 1087 GEN_VEXT_VX(vor_vx_h, 2, 2) 1088 GEN_VEXT_VX(vor_vx_w, 4, 4) 1089 GEN_VEXT_VX(vor_vx_d, 8, 8) 1090 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1091 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1092 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1093 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1094 1095 /* Vector Single-Width Bit Shift Instructions */ 1096 #define DO_SLL(N, M) (N << (M)) 1097 #define DO_SRL(N, M) (N >> (M)) 1098 1099 /* generate the helpers for shift instructions with two vector operators */ 1100 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1101 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1102 void *vs2, CPURISCVState *env, uint32_t desc) \ 1103 { \ 1104 uint32_t vm = vext_vm(desc); \ 1105 uint32_t vl = env->vl; \ 1106 uint32_t i; \ 1107 \ 1108 for (i = 0; i < vl; i++) { \ 1109 if (!vm && !vext_elem_mask(v0, i)) { \ 1110 continue; \ 1111 } \ 1112 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1113 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1114 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1115 } \ 1116 } 1117 1118 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1119 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1120 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1121 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1122 1123 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1124 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1125 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1126 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1127 1128 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1129 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1130 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1131 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1132 1133 /* generate the helpers for shift instructions with one vector and one scalar */ 1134 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1135 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1136 void *vs2, CPURISCVState *env, uint32_t desc) \ 1137 { \ 1138 uint32_t vm = vext_vm(desc); \ 1139 uint32_t vl = env->vl; \ 1140 uint32_t i; \ 1141 \ 1142 for (i = 0; i < vl; i++) { \ 1143 if (!vm && !vext_elem_mask(v0, i)) { \ 1144 continue; \ 1145 } \ 1146 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1147 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1148 } \ 1149 } 1150 1151 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1152 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1153 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1154 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1155 1156 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1157 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1160 1161 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1162 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1163 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1164 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1165 1166 /* Vector Narrowing Integer Right Shift Instructions */ 1167 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1168 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1169 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1170 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1171 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1172 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1173 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1174 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1175 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1176 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1177 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1178 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1179 1180 /* Vector Integer Comparison Instructions */ 1181 #define DO_MSEQ(N, M) (N == M) 1182 #define DO_MSNE(N, M) (N != M) 1183 #define DO_MSLT(N, M) (N < M) 1184 #define DO_MSLE(N, M) (N <= M) 1185 #define DO_MSGT(N, M) (N > M) 1186 1187 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1188 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1189 CPURISCVState *env, uint32_t desc) \ 1190 { \ 1191 uint32_t vm = vext_vm(desc); \ 1192 uint32_t vl = env->vl; \ 1193 uint32_t vlmax = vext_max_elems(desc, \ 1194 ctzl(sizeof(ETYPE))); \ 1195 uint32_t i; \ 1196 \ 1197 for (i = 0; i < vl; i++) { \ 1198 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1199 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1200 if (!vm && !vext_elem_mask(v0, i)) { \ 1201 continue; \ 1202 } \ 1203 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1204 } \ 1205 for (; i < vlmax; i++) { \ 1206 vext_set_elem_mask(vd, i, 0); \ 1207 } \ 1208 } 1209 1210 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1211 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1212 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1213 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1214 1215 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1216 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1217 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1218 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1219 1220 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1221 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1222 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1223 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1224 1225 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1226 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1227 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1228 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1229 1230 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1231 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1232 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1233 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1234 1235 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1236 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1237 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1238 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1239 1240 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1241 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1242 CPURISCVState *env, uint32_t desc) \ 1243 { \ 1244 uint32_t vm = vext_vm(desc); \ 1245 uint32_t vl = env->vl; \ 1246 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 1247 uint32_t i; \ 1248 \ 1249 for (i = 0; i < vl; i++) { \ 1250 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1251 if (!vm && !vext_elem_mask(v0, i)) { \ 1252 continue; \ 1253 } \ 1254 vext_set_elem_mask(vd, i, \ 1255 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1256 } \ 1257 for (; i < vlmax; i++) { \ 1258 vext_set_elem_mask(vd, i, 0); \ 1259 } \ 1260 } 1261 1262 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1263 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1264 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1265 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1266 1267 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1268 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1269 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1270 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1271 1272 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1273 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1274 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1275 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1276 1277 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1278 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1279 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1280 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1281 1282 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1283 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1284 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1285 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1286 1287 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1288 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1289 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1290 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1291 1292 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1293 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1294 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1295 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1296 1297 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1298 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1299 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1300 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1301 1302 /* Vector Integer Min/Max Instructions */ 1303 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1304 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1305 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1306 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1307 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1308 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1309 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1310 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1311 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1312 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1313 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1314 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1315 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1316 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1317 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1318 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1319 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1320 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1321 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1322 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1323 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1324 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1325 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1326 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1327 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1328 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1329 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1330 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1331 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1332 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1333 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1334 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1335 1336 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1337 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1338 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1339 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1340 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1341 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1342 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1343 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1344 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1345 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1346 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1347 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1348 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1349 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1350 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1351 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1352 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1353 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1354 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1355 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1356 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1357 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1358 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1359 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1360 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1361 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1362 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1363 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1364 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1365 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1366 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1367 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1368 1369 /* Vector Single-Width Integer Multiply Instructions */ 1370 #define DO_MUL(N, M) (N * M) 1371 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1372 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1373 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1374 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1375 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1376 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1377 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1378 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1379 1380 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1381 { 1382 return (int16_t)s2 * (int16_t)s1 >> 8; 1383 } 1384 1385 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1386 { 1387 return (int32_t)s2 * (int32_t)s1 >> 16; 1388 } 1389 1390 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1391 { 1392 return (int64_t)s2 * (int64_t)s1 >> 32; 1393 } 1394 1395 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1396 { 1397 uint64_t hi_64, lo_64; 1398 1399 muls64(&lo_64, &hi_64, s1, s2); 1400 return hi_64; 1401 } 1402 1403 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1404 { 1405 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1406 } 1407 1408 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1409 { 1410 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1411 } 1412 1413 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1414 { 1415 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1416 } 1417 1418 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1419 { 1420 uint64_t hi_64, lo_64; 1421 1422 mulu64(&lo_64, &hi_64, s2, s1); 1423 return hi_64; 1424 } 1425 1426 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1427 { 1428 return (int16_t)s2 * (uint16_t)s1 >> 8; 1429 } 1430 1431 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1432 { 1433 return (int32_t)s2 * (uint32_t)s1 >> 16; 1434 } 1435 1436 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1437 { 1438 return (int64_t)s2 * (uint64_t)s1 >> 32; 1439 } 1440 1441 /* 1442 * Let A = signed operand, 1443 * B = unsigned operand 1444 * P = mulu64(A, B), unsigned product 1445 * 1446 * LET X = 2 ** 64 - A, 2's complement of A 1447 * SP = signed product 1448 * THEN 1449 * IF A < 0 1450 * SP = -X * B 1451 * = -(2 ** 64 - A) * B 1452 * = A * B - 2 ** 64 * B 1453 * = P - 2 ** 64 * B 1454 * ELSE 1455 * SP = P 1456 * THEN 1457 * HI_P -= (A < 0 ? B : 0) 1458 */ 1459 1460 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1461 { 1462 uint64_t hi_64, lo_64; 1463 1464 mulu64(&lo_64, &hi_64, s2, s1); 1465 1466 hi_64 -= s2 < 0 ? s1 : 0; 1467 return hi_64; 1468 } 1469 1470 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1471 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1472 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1473 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1474 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1475 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1476 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1477 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1478 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1479 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1480 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1481 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1482 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1483 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1484 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1485 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1486 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1487 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1488 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1489 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1490 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1491 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1492 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1493 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1494 1495 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1496 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1497 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1498 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1499 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1500 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1501 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1502 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1503 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1504 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1505 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1506 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1507 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1508 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1509 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1510 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1511 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1512 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1513 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1514 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1515 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1516 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1517 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1518 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1519 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1520 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1521 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1522 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1523 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1524 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1525 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1526 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1527 1528 /* Vector Integer Divide Instructions */ 1529 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1530 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1531 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1532 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1533 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1534 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1535 1536 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1537 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1538 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1539 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1540 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1541 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1542 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1543 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1544 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1545 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1546 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1547 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1548 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1549 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1550 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1551 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1552 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1553 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1554 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1555 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1556 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1557 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1558 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1559 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1560 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1561 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1562 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1563 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1564 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1565 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1566 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1567 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1568 1569 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1570 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1571 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1572 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1573 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1574 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1575 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1576 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1577 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1578 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1579 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1580 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1581 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1582 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1583 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1584 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1585 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1586 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1587 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1588 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1589 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1590 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1591 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1592 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1593 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1594 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1595 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1596 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1597 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1598 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1599 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1600 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1601 1602 /* Vector Widening Integer Multiply Instructions */ 1603 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1604 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1605 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1606 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1607 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1608 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1609 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1610 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1611 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1612 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1613 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1614 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1615 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1616 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1617 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1618 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1619 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1620 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1621 1622 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1623 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1624 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1625 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1626 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1627 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1628 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1629 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1630 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1631 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1632 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1633 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1634 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1635 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1636 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1637 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1638 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1639 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1640 1641 /* Vector Single-Width Integer Multiply-Add Instructions */ 1642 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1643 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1644 { \ 1645 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1646 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1647 TD d = *((TD *)vd + HD(i)); \ 1648 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1649 } 1650 1651 #define DO_MACC(N, M, D) (M * N + D) 1652 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1653 #define DO_MADD(N, M, D) (M * D + N) 1654 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1655 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1656 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1657 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1658 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1659 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1660 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1661 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1662 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1663 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1664 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1665 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1666 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1667 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1668 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1669 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1670 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1671 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1672 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1673 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1674 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1675 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1676 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1677 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1678 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1679 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1680 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1681 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1682 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1683 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1684 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1685 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1686 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1687 1688 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1689 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1690 { \ 1691 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1692 TD d = *((TD *)vd + HD(i)); \ 1693 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1694 } 1695 1696 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1697 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1698 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1699 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1700 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1701 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1702 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1703 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1704 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1705 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1706 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1707 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1708 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1709 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1710 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1711 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1712 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1713 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1714 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1715 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1716 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1717 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1718 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1719 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1720 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1721 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1722 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1723 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1724 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1725 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1726 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1727 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1728 1729 /* Vector Widening Integer Multiply-Add Instructions */ 1730 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1731 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1732 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1733 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1734 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1735 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1736 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1737 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1738 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1739 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1740 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1741 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1742 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1743 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1744 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1745 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1746 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1747 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1748 1749 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1750 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1751 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1752 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1753 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1754 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1755 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1756 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1757 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1758 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1759 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1760 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1761 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1762 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1763 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1764 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1765 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1766 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1767 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1768 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1769 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1770 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1771 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1772 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1773 1774 /* Vector Integer Merge and Move Instructions */ 1775 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1776 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1777 uint32_t desc) \ 1778 { \ 1779 uint32_t vl = env->vl; \ 1780 uint32_t i; \ 1781 \ 1782 for (i = 0; i < vl; i++) { \ 1783 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1784 *((ETYPE *)vd + H(i)) = s1; \ 1785 } \ 1786 } 1787 1788 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1789 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1790 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1791 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1792 1793 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1794 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1795 uint32_t desc) \ 1796 { \ 1797 uint32_t vl = env->vl; \ 1798 uint32_t i; \ 1799 \ 1800 for (i = 0; i < vl; i++) { \ 1801 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1802 } \ 1803 } 1804 1805 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1806 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1807 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1808 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1809 1810 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1811 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1812 CPURISCVState *env, uint32_t desc) \ 1813 { \ 1814 uint32_t vl = env->vl; \ 1815 uint32_t i; \ 1816 \ 1817 for (i = 0; i < vl; i++) { \ 1818 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1819 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1820 } \ 1821 } 1822 1823 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1824 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1825 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1826 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1827 1828 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1829 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1830 void *vs2, CPURISCVState *env, uint32_t desc) \ 1831 { \ 1832 uint32_t vl = env->vl; \ 1833 uint32_t i; \ 1834 \ 1835 for (i = 0; i < vl; i++) { \ 1836 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1837 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1838 (ETYPE)(target_long)s1); \ 1839 *((ETYPE *)vd + H(i)) = d; \ 1840 } \ 1841 } 1842 1843 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1844 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1845 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1846 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1847 1848 /* 1849 *** Vector Fixed-Point Arithmetic Instructions 1850 */ 1851 1852 /* Vector Single-Width Saturating Add and Subtract */ 1853 1854 /* 1855 * As fixed point instructions probably have round mode and saturation, 1856 * define common macros for fixed point here. 1857 */ 1858 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1859 CPURISCVState *env, int vxrm); 1860 1861 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1862 static inline void \ 1863 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1864 CPURISCVState *env, int vxrm) \ 1865 { \ 1866 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1867 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1868 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1869 } 1870 1871 static inline void 1872 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1873 CPURISCVState *env, 1874 uint32_t vl, uint32_t vm, int vxrm, 1875 opivv2_rm_fn *fn) 1876 { 1877 for (uint32_t i = 0; i < vl; i++) { 1878 if (!vm && !vext_elem_mask(v0, i)) { 1879 continue; 1880 } 1881 fn(vd, vs1, vs2, i, env, vxrm); 1882 } 1883 } 1884 1885 static inline void 1886 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1887 CPURISCVState *env, 1888 uint32_t desc, uint32_t esz, uint32_t dsz, 1889 opivv2_rm_fn *fn) 1890 { 1891 uint32_t vm = vext_vm(desc); 1892 uint32_t vl = env->vl; 1893 1894 switch (env->vxrm) { 1895 case 0: /* rnu */ 1896 vext_vv_rm_1(vd, v0, vs1, vs2, 1897 env, vl, vm, 0, fn); 1898 break; 1899 case 1: /* rne */ 1900 vext_vv_rm_1(vd, v0, vs1, vs2, 1901 env, vl, vm, 1, fn); 1902 break; 1903 case 2: /* rdn */ 1904 vext_vv_rm_1(vd, v0, vs1, vs2, 1905 env, vl, vm, 2, fn); 1906 break; 1907 default: /* rod */ 1908 vext_vv_rm_1(vd, v0, vs1, vs2, 1909 env, vl, vm, 3, fn); 1910 break; 1911 } 1912 } 1913 1914 /* generate helpers for fixed point instructions with OPIVV format */ 1915 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1916 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1917 CPURISCVState *env, uint32_t desc) \ 1918 { \ 1919 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1920 do_##NAME); \ 1921 } 1922 1923 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1924 { 1925 uint8_t res = a + b; 1926 if (res < a) { 1927 res = UINT8_MAX; 1928 env->vxsat = 0x1; 1929 } 1930 return res; 1931 } 1932 1933 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1934 uint16_t b) 1935 { 1936 uint16_t res = a + b; 1937 if (res < a) { 1938 res = UINT16_MAX; 1939 env->vxsat = 0x1; 1940 } 1941 return res; 1942 } 1943 1944 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1945 uint32_t b) 1946 { 1947 uint32_t res = a + b; 1948 if (res < a) { 1949 res = UINT32_MAX; 1950 env->vxsat = 0x1; 1951 } 1952 return res; 1953 } 1954 1955 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1956 uint64_t b) 1957 { 1958 uint64_t res = a + b; 1959 if (res < a) { 1960 res = UINT64_MAX; 1961 env->vxsat = 0x1; 1962 } 1963 return res; 1964 } 1965 1966 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1967 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1968 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1969 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1970 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1971 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1972 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1973 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1974 1975 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1976 CPURISCVState *env, int vxrm); 1977 1978 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1979 static inline void \ 1980 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1981 CPURISCVState *env, int vxrm) \ 1982 { \ 1983 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1984 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1985 } 1986 1987 static inline void 1988 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1989 CPURISCVState *env, 1990 uint32_t vl, uint32_t vm, int vxrm, 1991 opivx2_rm_fn *fn) 1992 { 1993 for (uint32_t i = 0; i < vl; i++) { 1994 if (!vm && !vext_elem_mask(v0, i)) { 1995 continue; 1996 } 1997 fn(vd, s1, vs2, i, env, vxrm); 1998 } 1999 } 2000 2001 static inline void 2002 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2003 CPURISCVState *env, 2004 uint32_t desc, uint32_t esz, uint32_t dsz, 2005 opivx2_rm_fn *fn) 2006 { 2007 uint32_t vm = vext_vm(desc); 2008 uint32_t vl = env->vl; 2009 2010 switch (env->vxrm) { 2011 case 0: /* rnu */ 2012 vext_vx_rm_1(vd, v0, s1, vs2, 2013 env, vl, vm, 0, fn); 2014 break; 2015 case 1: /* rne */ 2016 vext_vx_rm_1(vd, v0, s1, vs2, 2017 env, vl, vm, 1, fn); 2018 break; 2019 case 2: /* rdn */ 2020 vext_vx_rm_1(vd, v0, s1, vs2, 2021 env, vl, vm, 2, fn); 2022 break; 2023 default: /* rod */ 2024 vext_vx_rm_1(vd, v0, s1, vs2, 2025 env, vl, vm, 3, fn); 2026 break; 2027 } 2028 } 2029 2030 /* generate helpers for fixed point instructions with OPIVX format */ 2031 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2032 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2033 void *vs2, CPURISCVState *env, uint32_t desc) \ 2034 { \ 2035 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2036 do_##NAME); \ 2037 } 2038 2039 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2040 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2041 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2042 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2043 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2044 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2045 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2046 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2047 2048 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2049 { 2050 int8_t res = a + b; 2051 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2052 res = a > 0 ? INT8_MAX : INT8_MIN; 2053 env->vxsat = 0x1; 2054 } 2055 return res; 2056 } 2057 2058 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2059 { 2060 int16_t res = a + b; 2061 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2062 res = a > 0 ? INT16_MAX : INT16_MIN; 2063 env->vxsat = 0x1; 2064 } 2065 return res; 2066 } 2067 2068 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2069 { 2070 int32_t res = a + b; 2071 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2072 res = a > 0 ? INT32_MAX : INT32_MIN; 2073 env->vxsat = 0x1; 2074 } 2075 return res; 2076 } 2077 2078 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2079 { 2080 int64_t res = a + b; 2081 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2082 res = a > 0 ? INT64_MAX : INT64_MIN; 2083 env->vxsat = 0x1; 2084 } 2085 return res; 2086 } 2087 2088 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2089 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2090 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2091 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2092 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2093 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2094 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2095 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2096 2097 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2098 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2099 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2100 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2101 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2102 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2103 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2104 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2105 2106 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2107 { 2108 uint8_t res = a - b; 2109 if (res > a) { 2110 res = 0; 2111 env->vxsat = 0x1; 2112 } 2113 return res; 2114 } 2115 2116 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2117 uint16_t b) 2118 { 2119 uint16_t res = a - b; 2120 if (res > a) { 2121 res = 0; 2122 env->vxsat = 0x1; 2123 } 2124 return res; 2125 } 2126 2127 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2128 uint32_t b) 2129 { 2130 uint32_t res = a - b; 2131 if (res > a) { 2132 res = 0; 2133 env->vxsat = 0x1; 2134 } 2135 return res; 2136 } 2137 2138 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2139 uint64_t b) 2140 { 2141 uint64_t res = a - b; 2142 if (res > a) { 2143 res = 0; 2144 env->vxsat = 0x1; 2145 } 2146 return res; 2147 } 2148 2149 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2150 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2151 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2152 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2153 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2154 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2155 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2156 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2157 2158 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2159 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2160 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2161 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2162 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2163 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2164 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2165 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2166 2167 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2168 { 2169 int8_t res = a - b; 2170 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2171 res = a >= 0 ? INT8_MAX : INT8_MIN; 2172 env->vxsat = 0x1; 2173 } 2174 return res; 2175 } 2176 2177 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2178 { 2179 int16_t res = a - b; 2180 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2181 res = a >= 0 ? INT16_MAX : INT16_MIN; 2182 env->vxsat = 0x1; 2183 } 2184 return res; 2185 } 2186 2187 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2188 { 2189 int32_t res = a - b; 2190 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2191 res = a >= 0 ? INT32_MAX : INT32_MIN; 2192 env->vxsat = 0x1; 2193 } 2194 return res; 2195 } 2196 2197 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2198 { 2199 int64_t res = a - b; 2200 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2201 res = a >= 0 ? INT64_MAX : INT64_MIN; 2202 env->vxsat = 0x1; 2203 } 2204 return res; 2205 } 2206 2207 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2208 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2209 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2210 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2211 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2212 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2213 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2214 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2215 2216 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2217 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2218 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2219 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2220 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2221 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2222 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2223 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2224 2225 /* Vector Single-Width Averaging Add and Subtract */ 2226 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2227 { 2228 uint8_t d = extract64(v, shift, 1); 2229 uint8_t d1; 2230 uint64_t D1, D2; 2231 2232 if (shift == 0 || shift > 64) { 2233 return 0; 2234 } 2235 2236 d1 = extract64(v, shift - 1, 1); 2237 D1 = extract64(v, 0, shift); 2238 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2239 return d1; 2240 } else if (vxrm == 1) { /* round-to-nearest-even */ 2241 if (shift > 1) { 2242 D2 = extract64(v, 0, shift - 1); 2243 return d1 & ((D2 != 0) | d); 2244 } else { 2245 return d1 & d; 2246 } 2247 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2248 return !d & (D1 != 0); 2249 } 2250 return 0; /* round-down (truncate) */ 2251 } 2252 2253 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2254 { 2255 int64_t res = (int64_t)a + b; 2256 uint8_t round = get_round(vxrm, res, 1); 2257 2258 return (res >> 1) + round; 2259 } 2260 2261 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2262 { 2263 int64_t res = a + b; 2264 uint8_t round = get_round(vxrm, res, 1); 2265 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2266 2267 /* With signed overflow, bit 64 is inverse of bit 63. */ 2268 return ((res >> 1) ^ over) + round; 2269 } 2270 2271 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2272 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2273 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2274 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2275 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2276 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2277 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2278 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2279 2280 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2281 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2282 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2283 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2284 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2285 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2286 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2287 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2288 2289 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2290 uint32_t a, uint32_t b) 2291 { 2292 uint64_t res = (uint64_t)a + b; 2293 uint8_t round = get_round(vxrm, res, 1); 2294 2295 return (res >> 1) + round; 2296 } 2297 2298 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2299 uint64_t a, uint64_t b) 2300 { 2301 uint64_t res = a + b; 2302 uint8_t round = get_round(vxrm, res, 1); 2303 uint64_t over = (uint64_t)(res < a) << 63; 2304 2305 return ((res >> 1) | over) + round; 2306 } 2307 2308 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2309 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2310 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2311 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2312 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2313 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2314 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2315 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2316 2317 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2318 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2319 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2320 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2321 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2322 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2323 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2324 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2325 2326 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2327 { 2328 int64_t res = (int64_t)a - b; 2329 uint8_t round = get_round(vxrm, res, 1); 2330 2331 return (res >> 1) + round; 2332 } 2333 2334 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2335 { 2336 int64_t res = (int64_t)a - b; 2337 uint8_t round = get_round(vxrm, res, 1); 2338 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2339 2340 /* With signed overflow, bit 64 is inverse of bit 63. */ 2341 return ((res >> 1) ^ over) + round; 2342 } 2343 2344 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2345 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2346 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2347 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2348 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2349 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2350 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2351 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2352 2353 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2354 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2355 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2356 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2357 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2358 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2359 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2360 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2361 2362 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2363 uint32_t a, uint32_t b) 2364 { 2365 int64_t res = (int64_t)a - b; 2366 uint8_t round = get_round(vxrm, res, 1); 2367 2368 return (res >> 1) + round; 2369 } 2370 2371 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2372 uint64_t a, uint64_t b) 2373 { 2374 uint64_t res = (uint64_t)a - b; 2375 uint8_t round = get_round(vxrm, res, 1); 2376 uint64_t over = (uint64_t)(res > a) << 63; 2377 2378 return ((res >> 1) | over) + round; 2379 } 2380 2381 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2382 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2383 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2384 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2385 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2386 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2387 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2388 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2389 2390 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2391 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2392 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2393 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2394 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2395 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2396 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2397 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2398 2399 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2400 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2401 { 2402 uint8_t round; 2403 int16_t res; 2404 2405 res = (int16_t)a * (int16_t)b; 2406 round = get_round(vxrm, res, 7); 2407 res = (res >> 7) + round; 2408 2409 if (res > INT8_MAX) { 2410 env->vxsat = 0x1; 2411 return INT8_MAX; 2412 } else if (res < INT8_MIN) { 2413 env->vxsat = 0x1; 2414 return INT8_MIN; 2415 } else { 2416 return res; 2417 } 2418 } 2419 2420 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2421 { 2422 uint8_t round; 2423 int32_t res; 2424 2425 res = (int32_t)a * (int32_t)b; 2426 round = get_round(vxrm, res, 15); 2427 res = (res >> 15) + round; 2428 2429 if (res > INT16_MAX) { 2430 env->vxsat = 0x1; 2431 return INT16_MAX; 2432 } else if (res < INT16_MIN) { 2433 env->vxsat = 0x1; 2434 return INT16_MIN; 2435 } else { 2436 return res; 2437 } 2438 } 2439 2440 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2441 { 2442 uint8_t round; 2443 int64_t res; 2444 2445 res = (int64_t)a * (int64_t)b; 2446 round = get_round(vxrm, res, 31); 2447 res = (res >> 31) + round; 2448 2449 if (res > INT32_MAX) { 2450 env->vxsat = 0x1; 2451 return INT32_MAX; 2452 } else if (res < INT32_MIN) { 2453 env->vxsat = 0x1; 2454 return INT32_MIN; 2455 } else { 2456 return res; 2457 } 2458 } 2459 2460 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2461 { 2462 uint8_t round; 2463 uint64_t hi_64, lo_64; 2464 int64_t res; 2465 2466 if (a == INT64_MIN && b == INT64_MIN) { 2467 env->vxsat = 1; 2468 return INT64_MAX; 2469 } 2470 2471 muls64(&lo_64, &hi_64, a, b); 2472 round = get_round(vxrm, lo_64, 63); 2473 /* 2474 * Cannot overflow, as there are always 2475 * 2 sign bits after multiply. 2476 */ 2477 res = (hi_64 << 1) | (lo_64 >> 63); 2478 if (round) { 2479 if (res == INT64_MAX) { 2480 env->vxsat = 1; 2481 } else { 2482 res += 1; 2483 } 2484 } 2485 return res; 2486 } 2487 2488 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2489 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2490 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2491 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2492 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2493 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2494 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2495 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2496 2497 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2498 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2499 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2500 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2501 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2502 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2503 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2504 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2505 2506 /* Vector Widening Saturating Scaled Multiply-Add */ 2507 static inline uint16_t 2508 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2509 uint16_t c) 2510 { 2511 uint8_t round; 2512 uint16_t res = (uint16_t)a * b; 2513 2514 round = get_round(vxrm, res, 4); 2515 res = (res >> 4) + round; 2516 return saddu16(env, vxrm, c, res); 2517 } 2518 2519 static inline uint32_t 2520 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2521 uint32_t c) 2522 { 2523 uint8_t round; 2524 uint32_t res = (uint32_t)a * b; 2525 2526 round = get_round(vxrm, res, 8); 2527 res = (res >> 8) + round; 2528 return saddu32(env, vxrm, c, res); 2529 } 2530 2531 static inline uint64_t 2532 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2533 uint64_t c) 2534 { 2535 uint8_t round; 2536 uint64_t res = (uint64_t)a * b; 2537 2538 round = get_round(vxrm, res, 16); 2539 res = (res >> 16) + round; 2540 return saddu64(env, vxrm, c, res); 2541 } 2542 2543 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2544 static inline void \ 2545 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2546 CPURISCVState *env, int vxrm) \ 2547 { \ 2548 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2549 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2550 TD d = *((TD *)vd + HD(i)); \ 2551 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2552 } 2553 2554 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2555 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2556 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2557 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2558 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2559 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2560 2561 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2562 static inline void \ 2563 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2564 CPURISCVState *env, int vxrm) \ 2565 { \ 2566 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2567 TD d = *((TD *)vd + HD(i)); \ 2568 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2569 } 2570 2571 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2572 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2573 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2574 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2575 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2576 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2577 2578 static inline int16_t 2579 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2580 { 2581 uint8_t round; 2582 int16_t res = (int16_t)a * b; 2583 2584 round = get_round(vxrm, res, 4); 2585 res = (res >> 4) + round; 2586 return sadd16(env, vxrm, c, res); 2587 } 2588 2589 static inline int32_t 2590 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2591 { 2592 uint8_t round; 2593 int32_t res = (int32_t)a * b; 2594 2595 round = get_round(vxrm, res, 8); 2596 res = (res >> 8) + round; 2597 return sadd32(env, vxrm, c, res); 2598 2599 } 2600 2601 static inline int64_t 2602 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2603 { 2604 uint8_t round; 2605 int64_t res = (int64_t)a * b; 2606 2607 round = get_round(vxrm, res, 16); 2608 res = (res >> 16) + round; 2609 return sadd64(env, vxrm, c, res); 2610 } 2611 2612 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2613 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2614 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2615 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2616 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2617 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2618 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2619 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2620 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2621 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2622 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2623 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2624 2625 static inline int16_t 2626 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2627 { 2628 uint8_t round; 2629 int16_t res = a * (int16_t)b; 2630 2631 round = get_round(vxrm, res, 4); 2632 res = (res >> 4) + round; 2633 return ssub16(env, vxrm, c, res); 2634 } 2635 2636 static inline int32_t 2637 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2638 { 2639 uint8_t round; 2640 int32_t res = a * (int32_t)b; 2641 2642 round = get_round(vxrm, res, 8); 2643 res = (res >> 8) + round; 2644 return ssub32(env, vxrm, c, res); 2645 } 2646 2647 static inline int64_t 2648 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2649 { 2650 uint8_t round; 2651 int64_t res = a * (int64_t)b; 2652 2653 round = get_round(vxrm, res, 16); 2654 res = (res >> 16) + round; 2655 return ssub64(env, vxrm, c, res); 2656 } 2657 2658 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2659 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2660 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2661 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2662 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2663 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2664 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2665 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2666 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2667 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2668 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2669 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2670 2671 static inline int16_t 2672 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2673 { 2674 uint8_t round; 2675 int16_t res = (int16_t)a * b; 2676 2677 round = get_round(vxrm, res, 4); 2678 res = (res >> 4) + round; 2679 return ssub16(env, vxrm, c, res); 2680 } 2681 2682 static inline int32_t 2683 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2684 { 2685 uint8_t round; 2686 int32_t res = (int32_t)a * b; 2687 2688 round = get_round(vxrm, res, 8); 2689 res = (res >> 8) + round; 2690 return ssub32(env, vxrm, c, res); 2691 } 2692 2693 static inline int64_t 2694 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2695 { 2696 uint8_t round; 2697 int64_t res = (int64_t)a * b; 2698 2699 round = get_round(vxrm, res, 16); 2700 res = (res >> 16) + round; 2701 return ssub64(env, vxrm, c, res); 2702 } 2703 2704 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2705 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2706 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2707 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2708 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2709 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2710 2711 /* Vector Single-Width Scaling Shift Instructions */ 2712 static inline uint8_t 2713 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2714 { 2715 uint8_t round, shift = b & 0x7; 2716 uint8_t res; 2717 2718 round = get_round(vxrm, a, shift); 2719 res = (a >> shift) + round; 2720 return res; 2721 } 2722 static inline uint16_t 2723 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2724 { 2725 uint8_t round, shift = b & 0xf; 2726 uint16_t res; 2727 2728 round = get_round(vxrm, a, shift); 2729 res = (a >> shift) + round; 2730 return res; 2731 } 2732 static inline uint32_t 2733 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2734 { 2735 uint8_t round, shift = b & 0x1f; 2736 uint32_t res; 2737 2738 round = get_round(vxrm, a, shift); 2739 res = (a >> shift) + round; 2740 return res; 2741 } 2742 static inline uint64_t 2743 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2744 { 2745 uint8_t round, shift = b & 0x3f; 2746 uint64_t res; 2747 2748 round = get_round(vxrm, a, shift); 2749 res = (a >> shift) + round; 2750 return res; 2751 } 2752 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2753 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2754 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2755 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2756 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2757 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2758 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2759 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2760 2761 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2762 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2763 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2764 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2765 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2766 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2767 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2768 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2769 2770 static inline int8_t 2771 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2772 { 2773 uint8_t round, shift = b & 0x7; 2774 int8_t res; 2775 2776 round = get_round(vxrm, a, shift); 2777 res = (a >> shift) + round; 2778 return res; 2779 } 2780 static inline int16_t 2781 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2782 { 2783 uint8_t round, shift = b & 0xf; 2784 int16_t res; 2785 2786 round = get_round(vxrm, a, shift); 2787 res = (a >> shift) + round; 2788 return res; 2789 } 2790 static inline int32_t 2791 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2792 { 2793 uint8_t round, shift = b & 0x1f; 2794 int32_t res; 2795 2796 round = get_round(vxrm, a, shift); 2797 res = (a >> shift) + round; 2798 return res; 2799 } 2800 static inline int64_t 2801 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2802 { 2803 uint8_t round, shift = b & 0x3f; 2804 int64_t res; 2805 2806 round = get_round(vxrm, a, shift); 2807 res = (a >> shift) + round; 2808 return res; 2809 } 2810 2811 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2812 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2813 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2814 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2815 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2816 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2817 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2818 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2819 2820 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2821 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2822 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2823 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2824 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2825 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2826 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2827 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2828 2829 /* Vector Narrowing Fixed-Point Clip Instructions */ 2830 static inline int8_t 2831 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2832 { 2833 uint8_t round, shift = b & 0xf; 2834 int16_t res; 2835 2836 round = get_round(vxrm, a, shift); 2837 res = (a >> shift) + round; 2838 if (res > INT8_MAX) { 2839 env->vxsat = 0x1; 2840 return INT8_MAX; 2841 } else if (res < INT8_MIN) { 2842 env->vxsat = 0x1; 2843 return INT8_MIN; 2844 } else { 2845 return res; 2846 } 2847 } 2848 2849 static inline int16_t 2850 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2851 { 2852 uint8_t round, shift = b & 0x1f; 2853 int32_t res; 2854 2855 round = get_round(vxrm, a, shift); 2856 res = (a >> shift) + round; 2857 if (res > INT16_MAX) { 2858 env->vxsat = 0x1; 2859 return INT16_MAX; 2860 } else if (res < INT16_MIN) { 2861 env->vxsat = 0x1; 2862 return INT16_MIN; 2863 } else { 2864 return res; 2865 } 2866 } 2867 2868 static inline int32_t 2869 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2870 { 2871 uint8_t round, shift = b & 0x3f; 2872 int64_t res; 2873 2874 round = get_round(vxrm, a, shift); 2875 res = (a >> shift) + round; 2876 if (res > INT32_MAX) { 2877 env->vxsat = 0x1; 2878 return INT32_MAX; 2879 } else if (res < INT32_MIN) { 2880 env->vxsat = 0x1; 2881 return INT32_MIN; 2882 } else { 2883 return res; 2884 } 2885 } 2886 2887 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2888 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2889 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2890 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2891 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2892 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2893 2894 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2895 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2896 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2897 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2898 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2899 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2900 2901 static inline uint8_t 2902 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2903 { 2904 uint8_t round, shift = b & 0xf; 2905 uint16_t res; 2906 2907 round = get_round(vxrm, a, shift); 2908 res = (a >> shift) + round; 2909 if (res > UINT8_MAX) { 2910 env->vxsat = 0x1; 2911 return UINT8_MAX; 2912 } else { 2913 return res; 2914 } 2915 } 2916 2917 static inline uint16_t 2918 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2919 { 2920 uint8_t round, shift = b & 0x1f; 2921 uint32_t res; 2922 2923 round = get_round(vxrm, a, shift); 2924 res = (a >> shift) + round; 2925 if (res > UINT16_MAX) { 2926 env->vxsat = 0x1; 2927 return UINT16_MAX; 2928 } else { 2929 return res; 2930 } 2931 } 2932 2933 static inline uint32_t 2934 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2935 { 2936 uint8_t round, shift = b & 0x3f; 2937 int64_t res; 2938 2939 round = get_round(vxrm, a, shift); 2940 res = (a >> shift) + round; 2941 if (res > UINT32_MAX) { 2942 env->vxsat = 0x1; 2943 return UINT32_MAX; 2944 } else { 2945 return res; 2946 } 2947 } 2948 2949 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2950 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2951 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2952 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 2953 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 2954 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 2955 2956 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 2957 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 2958 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 2959 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 2960 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 2961 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 2962 2963 /* 2964 *** Vector Float Point Arithmetic Instructions 2965 */ 2966 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2967 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2968 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2969 CPURISCVState *env) \ 2970 { \ 2971 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2972 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2973 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2974 } 2975 2976 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2977 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2978 void *vs2, CPURISCVState *env, \ 2979 uint32_t desc) \ 2980 { \ 2981 uint32_t vm = vext_vm(desc); \ 2982 uint32_t vl = env->vl; \ 2983 uint32_t i; \ 2984 \ 2985 for (i = 0; i < vl; i++) { \ 2986 if (!vm && !vext_elem_mask(v0, i)) { \ 2987 continue; \ 2988 } \ 2989 do_##NAME(vd, vs1, vs2, i, env); \ 2990 } \ 2991 } 2992 2993 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2994 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2995 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2996 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2997 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2998 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2999 3000 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3001 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3002 CPURISCVState *env) \ 3003 { \ 3004 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3005 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3006 } 3007 3008 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 3009 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3010 void *vs2, CPURISCVState *env, \ 3011 uint32_t desc) \ 3012 { \ 3013 uint32_t vm = vext_vm(desc); \ 3014 uint32_t vl = env->vl; \ 3015 uint32_t i; \ 3016 \ 3017 for (i = 0; i < vl; i++) { \ 3018 if (!vm && !vext_elem_mask(v0, i)) { \ 3019 continue; \ 3020 } \ 3021 do_##NAME(vd, s1, vs2, i, env); \ 3022 } \ 3023 } 3024 3025 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3026 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3027 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3028 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 3029 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 3030 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 3031 3032 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3033 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3034 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3035 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 3036 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 3037 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 3038 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3039 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3040 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3041 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 3042 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 3043 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 3044 3045 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3046 { 3047 return float16_sub(b, a, s); 3048 } 3049 3050 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3051 { 3052 return float32_sub(b, a, s); 3053 } 3054 3055 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3056 { 3057 return float64_sub(b, a, s); 3058 } 3059 3060 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3061 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3062 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3063 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 3064 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 3065 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 3066 3067 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3068 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3069 { 3070 return float32_add(float16_to_float32(a, true, s), 3071 float16_to_float32(b, true, s), s); 3072 } 3073 3074 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3075 { 3076 return float64_add(float32_to_float64(a, s), 3077 float32_to_float64(b, s), s); 3078 3079 } 3080 3081 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3082 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3083 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 3084 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 3085 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3086 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3087 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 3088 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 3089 3090 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3091 { 3092 return float32_sub(float16_to_float32(a, true, s), 3093 float16_to_float32(b, true, s), s); 3094 } 3095 3096 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3097 { 3098 return float64_sub(float32_to_float64(a, s), 3099 float32_to_float64(b, s), s); 3100 3101 } 3102 3103 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3104 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3105 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3106 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3107 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3108 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3109 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3110 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3111 3112 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3113 { 3114 return float32_add(a, float16_to_float32(b, true, s), s); 3115 } 3116 3117 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3118 { 3119 return float64_add(a, float32_to_float64(b, s), s); 3120 } 3121 3122 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3123 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3124 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3125 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3126 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3127 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3128 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3129 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3130 3131 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3132 { 3133 return float32_sub(a, float16_to_float32(b, true, s), s); 3134 } 3135 3136 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3137 { 3138 return float64_sub(a, float32_to_float64(b, s), s); 3139 } 3140 3141 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3142 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3143 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3144 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3145 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3146 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3147 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3148 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3149 3150 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3151 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3152 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3153 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3154 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3155 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3156 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3157 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3158 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3159 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3160 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3161 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3162 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3163 3164 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3165 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3166 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3167 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3168 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3169 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3170 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3171 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3172 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3173 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3174 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3175 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3176 3177 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3178 { 3179 return float16_div(b, a, s); 3180 } 3181 3182 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3183 { 3184 return float32_div(b, a, s); 3185 } 3186 3187 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3188 { 3189 return float64_div(b, a, s); 3190 } 3191 3192 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3193 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3194 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3195 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3196 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3197 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3198 3199 /* Vector Widening Floating-Point Multiply */ 3200 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3201 { 3202 return float32_mul(float16_to_float32(a, true, s), 3203 float16_to_float32(b, true, s), s); 3204 } 3205 3206 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3207 { 3208 return float64_mul(float32_to_float64(a, s), 3209 float32_to_float64(b, s), s); 3210 3211 } 3212 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3213 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3214 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3215 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3216 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3217 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3218 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3219 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3220 3221 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3222 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3223 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3224 CPURISCVState *env) \ 3225 { \ 3226 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3227 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3228 TD d = *((TD *)vd + HD(i)); \ 3229 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3230 } 3231 3232 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3233 { 3234 return float16_muladd(a, b, d, 0, s); 3235 } 3236 3237 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3238 { 3239 return float32_muladd(a, b, d, 0, s); 3240 } 3241 3242 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3243 { 3244 return float64_muladd(a, b, d, 0, s); 3245 } 3246 3247 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3248 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3249 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3250 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3251 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3252 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3253 3254 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3255 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3256 CPURISCVState *env) \ 3257 { \ 3258 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3259 TD d = *((TD *)vd + HD(i)); \ 3260 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3261 } 3262 3263 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3264 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3265 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3266 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3267 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3268 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3269 3270 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3271 { 3272 return float16_muladd(a, b, d, 3273 float_muladd_negate_c | float_muladd_negate_product, s); 3274 } 3275 3276 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3277 { 3278 return float32_muladd(a, b, d, 3279 float_muladd_negate_c | float_muladd_negate_product, s); 3280 } 3281 3282 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3283 { 3284 return float64_muladd(a, b, d, 3285 float_muladd_negate_c | float_muladd_negate_product, s); 3286 } 3287 3288 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3289 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3290 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3291 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3292 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3293 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3294 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3295 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3296 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3297 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3298 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3299 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3300 3301 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3302 { 3303 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3304 } 3305 3306 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3307 { 3308 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3309 } 3310 3311 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3312 { 3313 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3314 } 3315 3316 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3317 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3318 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3319 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3320 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3321 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3322 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3323 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3324 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3325 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3326 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3327 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3328 3329 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3330 { 3331 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3332 } 3333 3334 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3335 { 3336 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3337 } 3338 3339 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3340 { 3341 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3342 } 3343 3344 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3345 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3346 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3347 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3348 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3349 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3350 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3351 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3352 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3353 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3354 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3355 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3356 3357 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3358 { 3359 return float16_muladd(d, b, a, 0, s); 3360 } 3361 3362 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3363 { 3364 return float32_muladd(d, b, a, 0, s); 3365 } 3366 3367 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3368 { 3369 return float64_muladd(d, b, a, 0, s); 3370 } 3371 3372 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3373 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3374 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3375 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3376 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3377 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3378 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3379 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3380 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3381 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3382 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3383 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3384 3385 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3386 { 3387 return float16_muladd(d, b, a, 3388 float_muladd_negate_c | float_muladd_negate_product, s); 3389 } 3390 3391 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3392 { 3393 return float32_muladd(d, b, a, 3394 float_muladd_negate_c | float_muladd_negate_product, s); 3395 } 3396 3397 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3398 { 3399 return float64_muladd(d, b, a, 3400 float_muladd_negate_c | float_muladd_negate_product, s); 3401 } 3402 3403 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3404 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3405 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3406 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3407 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3408 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3409 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3410 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3411 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3412 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3413 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3414 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3415 3416 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3417 { 3418 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3419 } 3420 3421 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3422 { 3423 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3424 } 3425 3426 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3427 { 3428 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3429 } 3430 3431 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3432 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3433 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3434 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3435 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3436 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3437 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3438 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3439 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3440 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3441 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3442 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3443 3444 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3445 { 3446 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3447 } 3448 3449 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3450 { 3451 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3452 } 3453 3454 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3455 { 3456 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3457 } 3458 3459 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3460 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3461 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3462 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3463 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3464 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3465 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3466 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3467 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3468 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3469 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3470 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3471 3472 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3473 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3474 { 3475 return float32_muladd(float16_to_float32(a, true, s), 3476 float16_to_float32(b, true, s), d, 0, s); 3477 } 3478 3479 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3480 { 3481 return float64_muladd(float32_to_float64(a, s), 3482 float32_to_float64(b, s), d, 0, s); 3483 } 3484 3485 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3486 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3487 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3488 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3489 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3490 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3491 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3492 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3493 3494 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3495 { 3496 return float32_muladd(float16_to_float32(a, true, s), 3497 float16_to_float32(b, true, s), d, 3498 float_muladd_negate_c | float_muladd_negate_product, s); 3499 } 3500 3501 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3502 { 3503 return float64_muladd(float32_to_float64(a, s), 3504 float32_to_float64(b, s), d, 3505 float_muladd_negate_c | float_muladd_negate_product, s); 3506 } 3507 3508 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3509 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3510 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3511 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3512 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3513 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3514 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3515 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3516 3517 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3518 { 3519 return float32_muladd(float16_to_float32(a, true, s), 3520 float16_to_float32(b, true, s), d, 3521 float_muladd_negate_c, s); 3522 } 3523 3524 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3525 { 3526 return float64_muladd(float32_to_float64(a, s), 3527 float32_to_float64(b, s), d, 3528 float_muladd_negate_c, s); 3529 } 3530 3531 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3532 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3533 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3534 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3535 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3536 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3537 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3538 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3539 3540 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3541 { 3542 return float32_muladd(float16_to_float32(a, true, s), 3543 float16_to_float32(b, true, s), d, 3544 float_muladd_negate_product, s); 3545 } 3546 3547 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3548 { 3549 return float64_muladd(float32_to_float64(a, s), 3550 float32_to_float64(b, s), d, 3551 float_muladd_negate_product, s); 3552 } 3553 3554 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3555 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3556 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3557 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3558 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3559 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3560 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3561 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3562 3563 /* Vector Floating-Point Square-Root Instruction */ 3564 /* (TD, T2, TX2) */ 3565 #define OP_UU_H uint16_t, uint16_t, uint16_t 3566 #define OP_UU_W uint32_t, uint32_t, uint32_t 3567 #define OP_UU_D uint64_t, uint64_t, uint64_t 3568 3569 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3570 static void do_##NAME(void *vd, void *vs2, int i, \ 3571 CPURISCVState *env) \ 3572 { \ 3573 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3574 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3575 } 3576 3577 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3578 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3579 CPURISCVState *env, uint32_t desc) \ 3580 { \ 3581 uint32_t vm = vext_vm(desc); \ 3582 uint32_t vl = env->vl; \ 3583 uint32_t i; \ 3584 \ 3585 if (vl == 0) { \ 3586 return; \ 3587 } \ 3588 for (i = 0; i < vl; i++) { \ 3589 if (!vm && !vext_elem_mask(v0, i)) { \ 3590 continue; \ 3591 } \ 3592 do_##NAME(vd, vs2, i, env); \ 3593 } \ 3594 } 3595 3596 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3597 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3598 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3599 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3600 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3601 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3602 3603 /* Vector Floating-Point MIN/MAX Instructions */ 3604 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3605 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3606 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3607 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3608 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3609 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3610 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3611 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3612 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3613 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3614 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3615 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3616 3617 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3618 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3619 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3620 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3621 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3622 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3623 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3624 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3625 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3626 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3627 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3628 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3629 3630 /* Vector Floating-Point Sign-Injection Instructions */ 3631 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3632 { 3633 return deposit64(b, 0, 15, a); 3634 } 3635 3636 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3637 { 3638 return deposit64(b, 0, 31, a); 3639 } 3640 3641 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3642 { 3643 return deposit64(b, 0, 63, a); 3644 } 3645 3646 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3647 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3648 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3649 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3650 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3651 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3652 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3653 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3654 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3655 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3656 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3657 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3658 3659 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3660 { 3661 return deposit64(~b, 0, 15, a); 3662 } 3663 3664 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3665 { 3666 return deposit64(~b, 0, 31, a); 3667 } 3668 3669 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3670 { 3671 return deposit64(~b, 0, 63, a); 3672 } 3673 3674 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3675 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3676 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3677 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3678 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3679 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3680 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3681 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3682 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3683 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3684 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3685 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3686 3687 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3688 { 3689 return deposit64(b ^ a, 0, 15, a); 3690 } 3691 3692 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3693 { 3694 return deposit64(b ^ a, 0, 31, a); 3695 } 3696 3697 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3698 { 3699 return deposit64(b ^ a, 0, 63, a); 3700 } 3701 3702 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3703 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3704 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3705 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3706 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3707 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3708 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3709 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3710 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3711 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3712 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3713 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3714 3715 /* Vector Floating-Point Compare Instructions */ 3716 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3717 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3718 CPURISCVState *env, uint32_t desc) \ 3719 { \ 3720 uint32_t vm = vext_vm(desc); \ 3721 uint32_t vl = env->vl; \ 3722 uint32_t vlmax = vext_max_elems(desc, \ 3723 ctzl(sizeof(ETYPE))); \ 3724 uint32_t i; \ 3725 \ 3726 for (i = 0; i < vl; i++) { \ 3727 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3728 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3729 if (!vm && !vext_elem_mask(v0, i)) { \ 3730 continue; \ 3731 } \ 3732 vext_set_elem_mask(vd, i, \ 3733 DO_OP(s2, s1, &env->fp_status)); \ 3734 } \ 3735 for (; i < vlmax; i++) { \ 3736 vext_set_elem_mask(vd, i, 0); \ 3737 } \ 3738 } 3739 3740 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3741 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3742 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3743 3744 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3745 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3746 CPURISCVState *env, uint32_t desc) \ 3747 { \ 3748 uint32_t vm = vext_vm(desc); \ 3749 uint32_t vl = env->vl; \ 3750 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 3751 uint32_t i; \ 3752 \ 3753 for (i = 0; i < vl; i++) { \ 3754 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3755 if (!vm && !vext_elem_mask(v0, i)) { \ 3756 continue; \ 3757 } \ 3758 vext_set_elem_mask(vd, i, \ 3759 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3760 } \ 3761 for (; i < vlmax; i++) { \ 3762 vext_set_elem_mask(vd, i, 0); \ 3763 } \ 3764 } 3765 3766 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3767 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3768 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3769 3770 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3771 { 3772 FloatRelation compare = float16_compare_quiet(a, b, s); 3773 return compare != float_relation_equal; 3774 } 3775 3776 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3777 { 3778 FloatRelation compare = float32_compare_quiet(a, b, s); 3779 return compare != float_relation_equal; 3780 } 3781 3782 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3783 { 3784 FloatRelation compare = float64_compare_quiet(a, b, s); 3785 return compare != float_relation_equal; 3786 } 3787 3788 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3789 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3790 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3791 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3792 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3793 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3794 3795 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3796 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3797 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3798 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3799 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3800 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3801 3802 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3803 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3804 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3805 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3806 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3807 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3808 3809 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3810 { 3811 FloatRelation compare = float16_compare(a, b, s); 3812 return compare == float_relation_greater; 3813 } 3814 3815 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3816 { 3817 FloatRelation compare = float32_compare(a, b, s); 3818 return compare == float_relation_greater; 3819 } 3820 3821 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3822 { 3823 FloatRelation compare = float64_compare(a, b, s); 3824 return compare == float_relation_greater; 3825 } 3826 3827 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3828 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3829 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3830 3831 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3832 { 3833 FloatRelation compare = float16_compare(a, b, s); 3834 return compare == float_relation_greater || 3835 compare == float_relation_equal; 3836 } 3837 3838 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3839 { 3840 FloatRelation compare = float32_compare(a, b, s); 3841 return compare == float_relation_greater || 3842 compare == float_relation_equal; 3843 } 3844 3845 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3846 { 3847 FloatRelation compare = float64_compare(a, b, s); 3848 return compare == float_relation_greater || 3849 compare == float_relation_equal; 3850 } 3851 3852 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3853 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3854 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3855 3856 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3857 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3858 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3859 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3860 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3861 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3862 3863 /* Vector Floating-Point Classify Instruction */ 3864 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3865 static void do_##NAME(void *vd, void *vs2, int i) \ 3866 { \ 3867 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3868 *((TD *)vd + HD(i)) = OP(s2); \ 3869 } 3870 3871 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3872 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3873 CPURISCVState *env, uint32_t desc) \ 3874 { \ 3875 uint32_t vm = vext_vm(desc); \ 3876 uint32_t vl = env->vl; \ 3877 uint32_t i; \ 3878 \ 3879 for (i = 0; i < vl; i++) { \ 3880 if (!vm && !vext_elem_mask(v0, i)) { \ 3881 continue; \ 3882 } \ 3883 do_##NAME(vd, vs2, i); \ 3884 } \ 3885 } 3886 3887 target_ulong fclass_h(uint64_t frs1) 3888 { 3889 float16 f = frs1; 3890 bool sign = float16_is_neg(f); 3891 3892 if (float16_is_infinity(f)) { 3893 return sign ? 1 << 0 : 1 << 7; 3894 } else if (float16_is_zero(f)) { 3895 return sign ? 1 << 3 : 1 << 4; 3896 } else if (float16_is_zero_or_denormal(f)) { 3897 return sign ? 1 << 2 : 1 << 5; 3898 } else if (float16_is_any_nan(f)) { 3899 float_status s = { }; /* for snan_bit_is_one */ 3900 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3901 } else { 3902 return sign ? 1 << 1 : 1 << 6; 3903 } 3904 } 3905 3906 target_ulong fclass_s(uint64_t frs1) 3907 { 3908 float32 f = frs1; 3909 bool sign = float32_is_neg(f); 3910 3911 if (float32_is_infinity(f)) { 3912 return sign ? 1 << 0 : 1 << 7; 3913 } else if (float32_is_zero(f)) { 3914 return sign ? 1 << 3 : 1 << 4; 3915 } else if (float32_is_zero_or_denormal(f)) { 3916 return sign ? 1 << 2 : 1 << 5; 3917 } else if (float32_is_any_nan(f)) { 3918 float_status s = { }; /* for snan_bit_is_one */ 3919 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3920 } else { 3921 return sign ? 1 << 1 : 1 << 6; 3922 } 3923 } 3924 3925 target_ulong fclass_d(uint64_t frs1) 3926 { 3927 float64 f = frs1; 3928 bool sign = float64_is_neg(f); 3929 3930 if (float64_is_infinity(f)) { 3931 return sign ? 1 << 0 : 1 << 7; 3932 } else if (float64_is_zero(f)) { 3933 return sign ? 1 << 3 : 1 << 4; 3934 } else if (float64_is_zero_or_denormal(f)) { 3935 return sign ? 1 << 2 : 1 << 5; 3936 } else if (float64_is_any_nan(f)) { 3937 float_status s = { }; /* for snan_bit_is_one */ 3938 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3939 } else { 3940 return sign ? 1 << 1 : 1 << 6; 3941 } 3942 } 3943 3944 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3945 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3946 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3947 GEN_VEXT_V(vfclass_v_h, 2, 2) 3948 GEN_VEXT_V(vfclass_v_w, 4, 4) 3949 GEN_VEXT_V(vfclass_v_d, 8, 8) 3950 3951 /* Vector Floating-Point Merge Instruction */ 3952 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3953 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3954 CPURISCVState *env, uint32_t desc) \ 3955 { \ 3956 uint32_t vm = vext_vm(desc); \ 3957 uint32_t vl = env->vl; \ 3958 uint32_t i; \ 3959 \ 3960 for (i = 0; i < vl; i++) { \ 3961 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3962 *((ETYPE *)vd + H(i)) \ 3963 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3964 } \ 3965 } 3966 3967 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3968 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3969 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3970 3971 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3972 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3973 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3974 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3975 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3976 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3977 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3978 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3979 3980 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3981 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3982 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3983 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3984 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3985 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3986 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3987 3988 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3989 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3990 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3991 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3992 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3993 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3994 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3995 3996 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3997 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3998 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3999 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4000 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4001 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4002 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4003 4004 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4005 /* (TD, T2, TX2) */ 4006 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4007 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4008 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4009 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4010 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4011 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4012 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4013 4014 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4015 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4016 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4017 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4018 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4019 4020 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4021 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4022 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4023 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4024 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4025 4026 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4027 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4028 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4029 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4030 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4031 4032 /* 4033 * vfwcvt.f.f.v vd, vs2, vm # 4034 * Convert single-width float to double-width float. 4035 */ 4036 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4037 { 4038 return float16_to_float32(a, true, s); 4039 } 4040 4041 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4042 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4043 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4044 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4045 4046 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4047 /* (TD, T2, TX2) */ 4048 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4049 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4050 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4051 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 4052 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 4053 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 4054 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 4055 4056 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4057 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 4058 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 4059 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 4060 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 4061 4062 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4063 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 4064 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4065 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 4066 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 4067 4068 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4069 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4070 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4071 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 4072 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 4073 4074 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4075 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4076 { 4077 return float32_to_float16(a, true, s); 4078 } 4079 4080 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4081 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4082 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 4083 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 4084 4085 /* 4086 *** Vector Reduction Operations 4087 */ 4088 /* Vector Single-Width Integer Reduction Instructions */ 4089 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4090 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4091 void *vs2, CPURISCVState *env, uint32_t desc) \ 4092 { \ 4093 uint32_t vm = vext_vm(desc); \ 4094 uint32_t vl = env->vl; \ 4095 uint32_t i; \ 4096 TD s1 = *((TD *)vs1 + HD(0)); \ 4097 \ 4098 for (i = 0; i < vl; i++) { \ 4099 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4100 if (!vm && !vext_elem_mask(v0, i)) { \ 4101 continue; \ 4102 } \ 4103 s1 = OP(s1, (TD)s2); \ 4104 } \ 4105 *((TD *)vd + HD(0)) = s1; \ 4106 } 4107 4108 /* vd[0] = sum(vs1[0], vs2[*]) */ 4109 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4110 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4111 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4112 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4113 4114 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4115 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4116 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4117 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4118 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4119 4120 /* vd[0] = max(vs1[0], vs2[*]) */ 4121 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4122 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4123 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4124 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4125 4126 /* vd[0] = minu(vs1[0], vs2[*]) */ 4127 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4128 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4129 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4130 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4131 4132 /* vd[0] = min(vs1[0], vs2[*]) */ 4133 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4134 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4135 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4136 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4137 4138 /* vd[0] = and(vs1[0], vs2[*]) */ 4139 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4140 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4141 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4142 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4143 4144 /* vd[0] = or(vs1[0], vs2[*]) */ 4145 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4146 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4147 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4148 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4149 4150 /* vd[0] = xor(vs1[0], vs2[*]) */ 4151 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4152 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4153 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4154 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4155 4156 /* Vector Widening Integer Reduction Instructions */ 4157 /* signed sum reduction into double-width accumulator */ 4158 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4159 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4160 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4161 4162 /* Unsigned sum reduction into double-width accumulator */ 4163 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4164 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4165 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4166 4167 /* Vector Single-Width Floating-Point Reduction Instructions */ 4168 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4169 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4170 void *vs2, CPURISCVState *env, \ 4171 uint32_t desc) \ 4172 { \ 4173 uint32_t vm = vext_vm(desc); \ 4174 uint32_t vl = env->vl; \ 4175 uint32_t i; \ 4176 TD s1 = *((TD *)vs1 + HD(0)); \ 4177 \ 4178 for (i = 0; i < vl; i++) { \ 4179 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4180 if (!vm && !vext_elem_mask(v0, i)) { \ 4181 continue; \ 4182 } \ 4183 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4184 } \ 4185 *((TD *)vd + HD(0)) = s1; \ 4186 } 4187 4188 /* Unordered sum */ 4189 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4190 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4191 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4192 4193 /* Maximum value */ 4194 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4195 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4196 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4197 4198 /* Minimum value */ 4199 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4200 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4201 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4202 4203 /* Vector Widening Floating-Point Reduction Instructions */ 4204 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4205 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4206 void *vs2, CPURISCVState *env, uint32_t desc) 4207 { 4208 uint32_t vm = vext_vm(desc); 4209 uint32_t vl = env->vl; 4210 uint32_t i; 4211 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4212 4213 for (i = 0; i < vl; i++) { 4214 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4215 if (!vm && !vext_elem_mask(v0, i)) { 4216 continue; 4217 } 4218 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4219 &env->fp_status); 4220 } 4221 *((uint32_t *)vd + H4(0)) = s1; 4222 } 4223 4224 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4225 void *vs2, CPURISCVState *env, uint32_t desc) 4226 { 4227 uint32_t vm = vext_vm(desc); 4228 uint32_t vl = env->vl; 4229 uint32_t i; 4230 uint64_t s1 = *((uint64_t *)vs1); 4231 4232 for (i = 0; i < vl; i++) { 4233 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4234 if (!vm && !vext_elem_mask(v0, i)) { 4235 continue; 4236 } 4237 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4238 &env->fp_status); 4239 } 4240 *((uint64_t *)vd) = s1; 4241 } 4242 4243 /* 4244 *** Vector Mask Operations 4245 */ 4246 /* Vector Mask-Register Logical Instructions */ 4247 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4248 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4249 void *vs2, CPURISCVState *env, \ 4250 uint32_t desc) \ 4251 { \ 4252 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4253 uint32_t vl = env->vl; \ 4254 uint32_t i; \ 4255 int a, b; \ 4256 \ 4257 for (i = 0; i < vl; i++) { \ 4258 a = vext_elem_mask(vs1, i); \ 4259 b = vext_elem_mask(vs2, i); \ 4260 vext_set_elem_mask(vd, i, OP(b, a)); \ 4261 } \ 4262 for (; i < vlmax; i++) { \ 4263 vext_set_elem_mask(vd, i, 0); \ 4264 } \ 4265 } 4266 4267 #define DO_NAND(N, M) (!(N & M)) 4268 #define DO_ANDNOT(N, M) (N & !M) 4269 #define DO_NOR(N, M) (!(N | M)) 4270 #define DO_ORNOT(N, M) (N | !M) 4271 #define DO_XNOR(N, M) (!(N ^ M)) 4272 4273 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4274 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4275 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4276 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4277 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4278 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4279 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4280 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4281 4282 /* Vector count population in mask vcpop */ 4283 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4284 uint32_t desc) 4285 { 4286 target_ulong cnt = 0; 4287 uint32_t vm = vext_vm(desc); 4288 uint32_t vl = env->vl; 4289 int i; 4290 4291 for (i = 0; i < vl; i++) { 4292 if (vm || vext_elem_mask(v0, i)) { 4293 if (vext_elem_mask(vs2, i)) { 4294 cnt++; 4295 } 4296 } 4297 } 4298 return cnt; 4299 } 4300 4301 /* vfirst find-first-set mask bit*/ 4302 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4303 uint32_t desc) 4304 { 4305 uint32_t vm = vext_vm(desc); 4306 uint32_t vl = env->vl; 4307 int i; 4308 4309 for (i = 0; i < vl; i++) { 4310 if (vm || vext_elem_mask(v0, i)) { 4311 if (vext_elem_mask(vs2, i)) { 4312 return i; 4313 } 4314 } 4315 } 4316 return -1LL; 4317 } 4318 4319 enum set_mask_type { 4320 ONLY_FIRST = 1, 4321 INCLUDE_FIRST, 4322 BEFORE_FIRST, 4323 }; 4324 4325 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4326 uint32_t desc, enum set_mask_type type) 4327 { 4328 uint32_t vm = vext_vm(desc); 4329 uint32_t vl = env->vl; 4330 int i; 4331 bool first_mask_bit = false; 4332 4333 for (i = 0; i < vl; i++) { 4334 if (!vm && !vext_elem_mask(v0, i)) { 4335 continue; 4336 } 4337 /* write a zero to all following active elements */ 4338 if (first_mask_bit) { 4339 vext_set_elem_mask(vd, i, 0); 4340 continue; 4341 } 4342 if (vext_elem_mask(vs2, i)) { 4343 first_mask_bit = true; 4344 if (type == BEFORE_FIRST) { 4345 vext_set_elem_mask(vd, i, 0); 4346 } else { 4347 vext_set_elem_mask(vd, i, 1); 4348 } 4349 } else { 4350 if (type == ONLY_FIRST) { 4351 vext_set_elem_mask(vd, i, 0); 4352 } else { 4353 vext_set_elem_mask(vd, i, 1); 4354 } 4355 } 4356 } 4357 } 4358 4359 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4360 uint32_t desc) 4361 { 4362 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4363 } 4364 4365 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4366 uint32_t desc) 4367 { 4368 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4369 } 4370 4371 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4372 uint32_t desc) 4373 { 4374 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4375 } 4376 4377 /* Vector Iota Instruction */ 4378 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4379 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4380 uint32_t desc) \ 4381 { \ 4382 uint32_t vm = vext_vm(desc); \ 4383 uint32_t vl = env->vl; \ 4384 uint32_t sum = 0; \ 4385 int i; \ 4386 \ 4387 for (i = 0; i < vl; i++) { \ 4388 if (!vm && !vext_elem_mask(v0, i)) { \ 4389 continue; \ 4390 } \ 4391 *((ETYPE *)vd + H(i)) = sum; \ 4392 if (vext_elem_mask(vs2, i)) { \ 4393 sum++; \ 4394 } \ 4395 } \ 4396 } 4397 4398 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4399 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4400 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4401 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4402 4403 /* Vector Element Index Instruction */ 4404 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4405 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4406 { \ 4407 uint32_t vm = vext_vm(desc); \ 4408 uint32_t vl = env->vl; \ 4409 int i; \ 4410 \ 4411 for (i = 0; i < vl; i++) { \ 4412 if (!vm && !vext_elem_mask(v0, i)) { \ 4413 continue; \ 4414 } \ 4415 *((ETYPE *)vd + H(i)) = i; \ 4416 } \ 4417 } 4418 4419 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4420 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4421 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4422 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4423 4424 /* 4425 *** Vector Permutation Instructions 4426 */ 4427 4428 /* Vector Slide Instructions */ 4429 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4430 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4431 CPURISCVState *env, uint32_t desc) \ 4432 { \ 4433 uint32_t vm = vext_vm(desc); \ 4434 uint32_t vl = env->vl; \ 4435 target_ulong offset = s1, i; \ 4436 \ 4437 for (i = offset; i < vl; i++) { \ 4438 if (!vm && !vext_elem_mask(v0, i)) { \ 4439 continue; \ 4440 } \ 4441 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4442 } \ 4443 } 4444 4445 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4446 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4447 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4448 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4449 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4450 4451 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4452 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4453 CPURISCVState *env, uint32_t desc) \ 4454 { \ 4455 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4456 uint32_t vm = vext_vm(desc); \ 4457 uint32_t vl = env->vl; \ 4458 target_ulong offset = s1, i; \ 4459 \ 4460 for (i = 0; i < vl; ++i) { \ 4461 target_ulong j = i + offset; \ 4462 if (!vm && !vext_elem_mask(v0, i)) { \ 4463 continue; \ 4464 } \ 4465 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4466 } \ 4467 } 4468 4469 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4470 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4471 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4472 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4473 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4474 4475 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4476 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4477 CPURISCVState *env, uint32_t desc) \ 4478 { \ 4479 uint32_t vm = vext_vm(desc); \ 4480 uint32_t vl = env->vl; \ 4481 uint32_t i; \ 4482 \ 4483 for (i = 0; i < vl; i++) { \ 4484 if (!vm && !vext_elem_mask(v0, i)) { \ 4485 continue; \ 4486 } \ 4487 if (i == 0) { \ 4488 *((ETYPE *)vd + H(i)) = s1; \ 4489 } else { \ 4490 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4491 } \ 4492 } \ 4493 } 4494 4495 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4496 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4497 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4498 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4499 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4500 4501 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4502 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4503 CPURISCVState *env, uint32_t desc) \ 4504 { \ 4505 uint32_t vm = vext_vm(desc); \ 4506 uint32_t vl = env->vl; \ 4507 uint32_t i; \ 4508 \ 4509 for (i = 0; i < vl; i++) { \ 4510 if (!vm && !vext_elem_mask(v0, i)) { \ 4511 continue; \ 4512 } \ 4513 if (i == vl - 1) { \ 4514 *((ETYPE *)vd + H(i)) = s1; \ 4515 } else { \ 4516 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4517 } \ 4518 } \ 4519 } 4520 4521 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4522 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4523 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4524 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4525 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4526 4527 /* Vector Register Gather Instruction */ 4528 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4529 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4530 CPURISCVState *env, uint32_t desc) \ 4531 { \ 4532 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS1))); \ 4533 uint32_t vm = vext_vm(desc); \ 4534 uint32_t vl = env->vl; \ 4535 uint64_t index; \ 4536 uint32_t i; \ 4537 \ 4538 for (i = 0; i < vl; i++) { \ 4539 if (!vm && !vext_elem_mask(v0, i)) { \ 4540 continue; \ 4541 } \ 4542 index = *((TS1 *)vs1 + HS1(i)); \ 4543 if (index >= vlmax) { \ 4544 *((TS2 *)vd + HS2(i)) = 0; \ 4545 } else { \ 4546 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4547 } \ 4548 } \ 4549 } 4550 4551 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4552 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4553 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4554 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4555 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4556 4557 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4558 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4559 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4560 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4561 4562 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4563 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4564 CPURISCVState *env, uint32_t desc) \ 4565 { \ 4566 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4567 uint32_t vm = vext_vm(desc); \ 4568 uint32_t vl = env->vl; \ 4569 uint64_t index = s1; \ 4570 uint32_t i; \ 4571 \ 4572 for (i = 0; i < vl; i++) { \ 4573 if (!vm && !vext_elem_mask(v0, i)) { \ 4574 continue; \ 4575 } \ 4576 if (index >= vlmax) { \ 4577 *((ETYPE *)vd + H(i)) = 0; \ 4578 } else { \ 4579 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4580 } \ 4581 } \ 4582 } 4583 4584 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4585 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4586 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4587 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4588 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4589 4590 /* Vector Compress Instruction */ 4591 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4592 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4593 CPURISCVState *env, uint32_t desc) \ 4594 { \ 4595 uint32_t vl = env->vl; \ 4596 uint32_t num = 0, i; \ 4597 \ 4598 for (i = 0; i < vl; i++) { \ 4599 if (!vext_elem_mask(vs1, i)) { \ 4600 continue; \ 4601 } \ 4602 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4603 num++; \ 4604 } \ 4605 } 4606 4607 /* Compress into vd elements of vs2 where vs1 is enabled */ 4608 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4609 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4610 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4611 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4612 4613 /* Vector Integer Extension */ 4614 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4615 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4616 CPURISCVState *env, uint32_t desc) \ 4617 { \ 4618 uint32_t vl = env->vl; \ 4619 uint32_t vm = vext_vm(desc); \ 4620 uint32_t i; \ 4621 \ 4622 for (i = 0; i < vl; i++) { \ 4623 if (!vm && !vext_elem_mask(v0, i)) { \ 4624 continue; \ 4625 } \ 4626 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4627 } \ 4628 } 4629 4630 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4631 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4632 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4633 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4634 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4635 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4636 4637 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4638 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4639 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4640 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4641 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4642 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4643