1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 static inline uint32_t vext_vta_all_1s(uint32_t desc) 131 { 132 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); 133 } 134 135 /* 136 * Get the maximum number of elements can be operated. 137 * 138 * log2_esz: log2 of element size in bytes. 139 */ 140 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 141 { 142 /* 143 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 144 * so vlen in bytes (vlenb) is encoded as maxsz. 145 */ 146 uint32_t vlenb = simd_maxsz(desc); 147 148 /* Return VLMAX */ 149 int scale = vext_lmul(desc) - log2_esz; 150 return scale < 0 ? vlenb >> -scale : vlenb << scale; 151 } 152 153 /* 154 * Get number of total elements, including prestart, body and tail elements. 155 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 156 * are held in the same vector register. 157 */ 158 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 159 uint32_t esz) 160 { 161 uint32_t vlenb = simd_maxsz(desc); 162 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 163 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 164 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 165 return (vlenb << emul) / esz; 166 } 167 168 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 169 { 170 return (addr & env->cur_pmmask) | env->cur_pmbase; 171 } 172 173 /* 174 * This function checks watchpoint before real load operation. 175 * 176 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 177 * In user mode, there is no watchpoint support now. 178 * 179 * It will trigger an exception if there is no mapping in TLB 180 * and page table walk can't fill the TLB entry. Then the guest 181 * software can return here after process the exception or never return. 182 */ 183 static void probe_pages(CPURISCVState *env, target_ulong addr, 184 target_ulong len, uintptr_t ra, 185 MMUAccessType access_type) 186 { 187 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 188 target_ulong curlen = MIN(pagelen, len); 189 190 probe_access(env, adjust_addr(env, addr), curlen, access_type, 191 cpu_mmu_index(env, false), ra); 192 if (len > curlen) { 193 addr += curlen; 194 curlen = len - curlen; 195 probe_access(env, adjust_addr(env, addr), curlen, access_type, 196 cpu_mmu_index(env, false), ra); 197 } 198 } 199 200 /* set agnostic elements to 1s */ 201 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 202 uint32_t tot) 203 { 204 if (is_agnostic == 0) { 205 /* policy undisturbed */ 206 return; 207 } 208 if (tot - cnt == 0) { 209 return ; 210 } 211 memset(base + cnt, -1, tot - cnt); 212 } 213 214 static inline void vext_set_elem_mask(void *v0, int index, 215 uint8_t value) 216 { 217 int idx = index / 64; 218 int pos = index % 64; 219 uint64_t old = ((uint64_t *)v0)[idx]; 220 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 221 } 222 223 /* 224 * Earlier designs (pre-0.9) had a varying number of bits 225 * per mask value (MLEN). In the 0.9 design, MLEN=1. 226 * (Section 4.5) 227 */ 228 static inline int vext_elem_mask(void *v0, int index) 229 { 230 int idx = index / 64; 231 int pos = index % 64; 232 return (((uint64_t *)v0)[idx] >> pos) & 1; 233 } 234 235 /* elements operations for load and store */ 236 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 237 uint32_t idx, void *vd, uintptr_t retaddr); 238 239 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 240 static void NAME(CPURISCVState *env, abi_ptr addr, \ 241 uint32_t idx, void *vd, uintptr_t retaddr)\ 242 { \ 243 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 244 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 245 } \ 246 247 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 248 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 249 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 250 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 251 252 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 253 static void NAME(CPURISCVState *env, abi_ptr addr, \ 254 uint32_t idx, void *vd, uintptr_t retaddr)\ 255 { \ 256 ETYPE data = *((ETYPE *)vd + H(idx)); \ 257 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 258 } 259 260 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 261 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 262 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 263 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 264 265 /* 266 *** stride: access vector element from strided memory 267 */ 268 static void 269 vext_ldst_stride(void *vd, void *v0, target_ulong base, 270 target_ulong stride, CPURISCVState *env, 271 uint32_t desc, uint32_t vm, 272 vext_ldst_elem_fn *ldst_elem, 273 uint32_t log2_esz, uintptr_t ra) 274 { 275 uint32_t i, k; 276 uint32_t nf = vext_nf(desc); 277 uint32_t max_elems = vext_max_elems(desc, log2_esz); 278 uint32_t esz = 1 << log2_esz; 279 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 280 uint32_t vta = vext_vta(desc); 281 282 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 283 if (!vm && !vext_elem_mask(v0, i)) { 284 continue; 285 } 286 287 k = 0; 288 while (k < nf) { 289 target_ulong addr = base + stride * i + (k << log2_esz); 290 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 291 k++; 292 } 293 } 294 env->vstart = 0; 295 /* set tail elements to 1s */ 296 for (k = 0; k < nf; ++k) { 297 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 298 (k * max_elems + max_elems) * esz); 299 } 300 if (nf * max_elems % total_elems != 0) { 301 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 302 uint32_t registers_used = 303 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 304 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 305 registers_used * vlenb); 306 } 307 } 308 309 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 310 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 311 target_ulong stride, CPURISCVState *env, \ 312 uint32_t desc) \ 313 { \ 314 uint32_t vm = vext_vm(desc); \ 315 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 316 ctzl(sizeof(ETYPE)), GETPC()); \ 317 } 318 319 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 320 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 321 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 322 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 323 324 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 326 target_ulong stride, CPURISCVState *env, \ 327 uint32_t desc) \ 328 { \ 329 uint32_t vm = vext_vm(desc); \ 330 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 331 ctzl(sizeof(ETYPE)), GETPC()); \ 332 } 333 334 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 335 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 336 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 337 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 338 339 /* 340 *** unit-stride: access elements stored contiguously in memory 341 */ 342 343 /* unmasked unit-stride load and store operation*/ 344 static void 345 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 346 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 347 uintptr_t ra) 348 { 349 uint32_t i, k; 350 uint32_t nf = vext_nf(desc); 351 uint32_t max_elems = vext_max_elems(desc, log2_esz); 352 uint32_t esz = 1 << log2_esz; 353 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 354 uint32_t vta = vext_vta(desc); 355 356 /* load bytes from guest memory */ 357 for (i = env->vstart; i < evl; i++, env->vstart++) { 358 k = 0; 359 while (k < nf) { 360 target_ulong addr = base + ((i * nf + k) << log2_esz); 361 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 362 k++; 363 } 364 } 365 env->vstart = 0; 366 /* set tail elements to 1s */ 367 for (k = 0; k < nf; ++k) { 368 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz, 369 (k * max_elems + max_elems) * esz); 370 } 371 if (nf * max_elems % total_elems != 0) { 372 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 373 uint32_t registers_used = 374 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 375 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 376 registers_used * vlenb); 377 } 378 } 379 380 /* 381 * masked unit-stride load and store operation will be a special case of stride, 382 * stride = NF * sizeof (MTYPE) 383 */ 384 385 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 386 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 387 CPURISCVState *env, uint32_t desc) \ 388 { \ 389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 391 ctzl(sizeof(ETYPE)), GETPC()); \ 392 } \ 393 \ 394 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 395 CPURISCVState *env, uint32_t desc) \ 396 { \ 397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 399 } 400 401 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 402 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 403 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 404 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 405 406 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 407 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 408 CPURISCVState *env, uint32_t desc) \ 409 { \ 410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 412 ctzl(sizeof(ETYPE)), GETPC()); \ 413 } \ 414 \ 415 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 416 CPURISCVState *env, uint32_t desc) \ 417 { \ 418 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 420 } 421 422 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 423 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 424 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 425 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 426 427 /* 428 *** unit stride mask load and store, EEW = 1 429 */ 430 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 431 CPURISCVState *env, uint32_t desc) 432 { 433 /* evl = ceil(vl/8) */ 434 uint8_t evl = (env->vl + 7) >> 3; 435 vext_ldst_us(vd, base, env, desc, lde_b, 436 0, evl, GETPC()); 437 } 438 439 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 440 CPURISCVState *env, uint32_t desc) 441 { 442 /* evl = ceil(vl/8) */ 443 uint8_t evl = (env->vl + 7) >> 3; 444 vext_ldst_us(vd, base, env, desc, ste_b, 445 0, evl, GETPC()); 446 } 447 448 /* 449 *** index: access vector element from indexed memory 450 */ 451 typedef target_ulong vext_get_index_addr(target_ulong base, 452 uint32_t idx, void *vs2); 453 454 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 455 static target_ulong NAME(target_ulong base, \ 456 uint32_t idx, void *vs2) \ 457 { \ 458 return (base + *((ETYPE *)vs2 + H(idx))); \ 459 } 460 461 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 462 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 463 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 464 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 465 466 static inline void 467 vext_ldst_index(void *vd, void *v0, target_ulong base, 468 void *vs2, CPURISCVState *env, uint32_t desc, 469 vext_get_index_addr get_index_addr, 470 vext_ldst_elem_fn *ldst_elem, 471 uint32_t log2_esz, uintptr_t ra) 472 { 473 uint32_t i, k; 474 uint32_t nf = vext_nf(desc); 475 uint32_t vm = vext_vm(desc); 476 uint32_t max_elems = vext_max_elems(desc, log2_esz); 477 uint32_t esz = 1 << log2_esz; 478 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 479 uint32_t vta = vext_vta(desc); 480 481 /* load bytes from guest memory */ 482 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 483 if (!vm && !vext_elem_mask(v0, i)) { 484 continue; 485 } 486 487 k = 0; 488 while (k < nf) { 489 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 490 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 491 k++; 492 } 493 } 494 env->vstart = 0; 495 /* set tail elements to 1s */ 496 for (k = 0; k < nf; ++k) { 497 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 498 (k * max_elems + max_elems) * esz); 499 } 500 if (nf * max_elems % total_elems != 0) { 501 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 502 uint32_t registers_used = 503 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 504 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 505 registers_used * vlenb); 506 } 507 } 508 509 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 510 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 511 void *vs2, CPURISCVState *env, uint32_t desc) \ 512 { \ 513 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 514 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 515 } 516 517 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 518 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 519 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 520 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 521 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 522 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 523 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 524 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 525 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 526 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 527 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 528 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 529 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 530 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 531 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 532 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 533 534 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 535 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 536 void *vs2, CPURISCVState *env, uint32_t desc) \ 537 { \ 538 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 539 STORE_FN, ctzl(sizeof(ETYPE)), \ 540 GETPC()); \ 541 } 542 543 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 544 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 545 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 546 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 547 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 548 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 549 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 550 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 551 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 552 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 553 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 554 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 555 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 556 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 557 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 558 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 559 560 /* 561 *** unit-stride fault-only-fisrt load instructions 562 */ 563 static inline void 564 vext_ldff(void *vd, void *v0, target_ulong base, 565 CPURISCVState *env, uint32_t desc, 566 vext_ldst_elem_fn *ldst_elem, 567 uint32_t log2_esz, uintptr_t ra) 568 { 569 void *host; 570 uint32_t i, k, vl = 0; 571 uint32_t nf = vext_nf(desc); 572 uint32_t vm = vext_vm(desc); 573 uint32_t max_elems = vext_max_elems(desc, log2_esz); 574 uint32_t esz = 1 << log2_esz; 575 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 576 uint32_t vta = vext_vta(desc); 577 target_ulong addr, offset, remain; 578 579 /* probe every access*/ 580 for (i = env->vstart; i < env->vl; i++) { 581 if (!vm && !vext_elem_mask(v0, i)) { 582 continue; 583 } 584 addr = adjust_addr(env, base + i * (nf << log2_esz)); 585 if (i == 0) { 586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 587 } else { 588 /* if it triggers an exception, no need to check watchpoint */ 589 remain = nf << log2_esz; 590 while (remain > 0) { 591 offset = -(addr | TARGET_PAGE_MASK); 592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 593 cpu_mmu_index(env, false)); 594 if (host) { 595 #ifdef CONFIG_USER_ONLY 596 if (page_check_range(addr, offset, PAGE_READ) < 0) { 597 vl = i; 598 goto ProbeSuccess; 599 } 600 #else 601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 602 #endif 603 } else { 604 vl = i; 605 goto ProbeSuccess; 606 } 607 if (remain <= offset) { 608 break; 609 } 610 remain -= offset; 611 addr = adjust_addr(env, addr + offset); 612 } 613 } 614 } 615 ProbeSuccess: 616 /* load bytes from guest memory */ 617 if (vl != 0) { 618 env->vl = vl; 619 } 620 for (i = env->vstart; i < env->vl; i++) { 621 k = 0; 622 if (!vm && !vext_elem_mask(v0, i)) { 623 continue; 624 } 625 while (k < nf) { 626 target_ulong addr = base + ((i * nf + k) << log2_esz); 627 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 628 k++; 629 } 630 } 631 env->vstart = 0; 632 /* set tail elements to 1s */ 633 for (k = 0; k < nf; ++k) { 634 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz, 635 (k * max_elems + max_elems) * esz); 636 } 637 if (nf * max_elems % total_elems != 0) { 638 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 639 uint32_t registers_used = 640 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb; 641 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz, 642 registers_used * vlenb); 643 } 644 } 645 646 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 647 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 648 CPURISCVState *env, uint32_t desc) \ 649 { \ 650 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 651 ctzl(sizeof(ETYPE)), GETPC()); \ 652 } 653 654 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 655 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 656 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 657 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 658 659 #define DO_SWAP(N, M) (M) 660 #define DO_AND(N, M) (N & M) 661 #define DO_XOR(N, M) (N ^ M) 662 #define DO_OR(N, M) (N | M) 663 #define DO_ADD(N, M) (N + M) 664 665 /* Signed min/max */ 666 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 667 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 668 669 /* Unsigned min/max */ 670 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 671 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 672 673 /* 674 *** load and store whole register instructions 675 */ 676 static void 677 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 678 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 679 { 680 uint32_t i, k, off, pos; 681 uint32_t nf = vext_nf(desc); 682 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 683 uint32_t max_elems = vlenb >> log2_esz; 684 685 k = env->vstart / max_elems; 686 off = env->vstart % max_elems; 687 688 if (off) { 689 /* load/store rest of elements of current segment pointed by vstart */ 690 for (pos = off; pos < max_elems; pos++, env->vstart++) { 691 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 692 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 693 } 694 k++; 695 } 696 697 /* load/store elements for rest of segments */ 698 for (; k < nf; k++) { 699 for (i = 0; i < max_elems; i++, env->vstart++) { 700 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 701 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 702 } 703 } 704 705 env->vstart = 0; 706 } 707 708 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 709 void HELPER(NAME)(void *vd, target_ulong base, \ 710 CPURISCVState *env, uint32_t desc) \ 711 { \ 712 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 713 ctzl(sizeof(ETYPE)), GETPC()); \ 714 } 715 716 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 717 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 718 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 719 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 720 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 721 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 722 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 723 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 724 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 725 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 726 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 727 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 728 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 729 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 730 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 731 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 732 733 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 734 void HELPER(NAME)(void *vd, target_ulong base, \ 735 CPURISCVState *env, uint32_t desc) \ 736 { \ 737 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 738 ctzl(sizeof(ETYPE)), GETPC()); \ 739 } 740 741 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 742 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 743 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 744 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 745 746 /* 747 *** Vector Integer Arithmetic Instructions 748 */ 749 750 /* expand macro args before macro */ 751 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 752 753 /* (TD, T1, T2, TX1, TX2) */ 754 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 755 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 756 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 757 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 758 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 759 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 760 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 761 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 762 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 763 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 764 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 765 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 766 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 767 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 768 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 769 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 770 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 771 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 772 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 773 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 774 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 775 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 776 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 777 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 778 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 779 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 780 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 781 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 782 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 783 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 784 785 /* operation of two vector elements */ 786 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 787 788 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 789 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 790 { \ 791 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 792 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 793 *((TD *)vd + HD(i)) = OP(s2, s1); \ 794 } 795 #define DO_SUB(N, M) (N - M) 796 #define DO_RSUB(N, M) (M - N) 797 798 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 799 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 800 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 801 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 802 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 803 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 804 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 805 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 806 807 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 808 CPURISCVState *env, uint32_t desc, 809 opivv2_fn *fn, uint32_t esz) 810 { 811 uint32_t vm = vext_vm(desc); 812 uint32_t vl = env->vl; 813 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 814 uint32_t vta = vext_vta(desc); 815 uint32_t i; 816 817 for (i = env->vstart; i < vl; i++) { 818 if (!vm && !vext_elem_mask(v0, i)) { 819 continue; 820 } 821 fn(vd, vs1, vs2, i); 822 } 823 env->vstart = 0; 824 /* set tail elements to 1s */ 825 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 826 } 827 828 /* generate the helpers for OPIVV */ 829 #define GEN_VEXT_VV(NAME, ESZ) \ 830 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 831 void *vs2, CPURISCVState *env, \ 832 uint32_t desc) \ 833 { \ 834 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 835 do_##NAME, ESZ); \ 836 } 837 838 GEN_VEXT_VV(vadd_vv_b, 1) 839 GEN_VEXT_VV(vadd_vv_h, 2) 840 GEN_VEXT_VV(vadd_vv_w, 4) 841 GEN_VEXT_VV(vadd_vv_d, 8) 842 GEN_VEXT_VV(vsub_vv_b, 1) 843 GEN_VEXT_VV(vsub_vv_h, 2) 844 GEN_VEXT_VV(vsub_vv_w, 4) 845 GEN_VEXT_VV(vsub_vv_d, 8) 846 847 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 848 849 /* 850 * (T1)s1 gives the real operator type. 851 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 852 */ 853 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 854 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 855 { \ 856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 858 } 859 860 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 861 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 862 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 863 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 864 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 865 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 866 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 867 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 868 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 869 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 870 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 871 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 872 873 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 874 CPURISCVState *env, uint32_t desc, 875 opivx2_fn fn, uint32_t esz) 876 { 877 uint32_t vm = vext_vm(desc); 878 uint32_t vl = env->vl; 879 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 880 uint32_t vta = vext_vta(desc); 881 uint32_t i; 882 883 for (i = env->vstart; i < vl; i++) { 884 if (!vm && !vext_elem_mask(v0, i)) { 885 continue; 886 } 887 fn(vd, s1, vs2, i); 888 } 889 env->vstart = 0; 890 /* set tail elements to 1s */ 891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 892 } 893 894 /* generate the helpers for OPIVX */ 895 #define GEN_VEXT_VX(NAME, ESZ) \ 896 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 897 void *vs2, CPURISCVState *env, \ 898 uint32_t desc) \ 899 { \ 900 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 901 do_##NAME, ESZ); \ 902 } 903 904 GEN_VEXT_VX(vadd_vx_b, 1) 905 GEN_VEXT_VX(vadd_vx_h, 2) 906 GEN_VEXT_VX(vadd_vx_w, 4) 907 GEN_VEXT_VX(vadd_vx_d, 8) 908 GEN_VEXT_VX(vsub_vx_b, 1) 909 GEN_VEXT_VX(vsub_vx_h, 2) 910 GEN_VEXT_VX(vsub_vx_w, 4) 911 GEN_VEXT_VX(vsub_vx_d, 8) 912 GEN_VEXT_VX(vrsub_vx_b, 1) 913 GEN_VEXT_VX(vrsub_vx_h, 2) 914 GEN_VEXT_VX(vrsub_vx_w, 4) 915 GEN_VEXT_VX(vrsub_vx_d, 8) 916 917 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 918 { 919 intptr_t oprsz = simd_oprsz(desc); 920 intptr_t i; 921 922 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 923 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 924 } 925 } 926 927 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 928 { 929 intptr_t oprsz = simd_oprsz(desc); 930 intptr_t i; 931 932 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 933 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 934 } 935 } 936 937 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 938 { 939 intptr_t oprsz = simd_oprsz(desc); 940 intptr_t i; 941 942 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 943 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 944 } 945 } 946 947 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 948 { 949 intptr_t oprsz = simd_oprsz(desc); 950 intptr_t i; 951 952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 953 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 954 } 955 } 956 957 /* Vector Widening Integer Add/Subtract */ 958 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 959 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 960 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 961 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 962 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 963 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 964 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 965 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 966 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 967 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 968 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 969 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 970 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 971 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 972 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 973 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 974 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 975 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 976 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 977 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 978 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 979 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 980 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 981 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 982 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 983 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 984 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 985 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 986 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 987 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 988 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 989 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 990 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 991 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 992 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 993 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 994 GEN_VEXT_VV(vwaddu_vv_b, 2) 995 GEN_VEXT_VV(vwaddu_vv_h, 4) 996 GEN_VEXT_VV(vwaddu_vv_w, 8) 997 GEN_VEXT_VV(vwsubu_vv_b, 2) 998 GEN_VEXT_VV(vwsubu_vv_h, 4) 999 GEN_VEXT_VV(vwsubu_vv_w, 8) 1000 GEN_VEXT_VV(vwadd_vv_b, 2) 1001 GEN_VEXT_VV(vwadd_vv_h, 4) 1002 GEN_VEXT_VV(vwadd_vv_w, 8) 1003 GEN_VEXT_VV(vwsub_vv_b, 2) 1004 GEN_VEXT_VV(vwsub_vv_h, 4) 1005 GEN_VEXT_VV(vwsub_vv_w, 8) 1006 GEN_VEXT_VV(vwaddu_wv_b, 2) 1007 GEN_VEXT_VV(vwaddu_wv_h, 4) 1008 GEN_VEXT_VV(vwaddu_wv_w, 8) 1009 GEN_VEXT_VV(vwsubu_wv_b, 2) 1010 GEN_VEXT_VV(vwsubu_wv_h, 4) 1011 GEN_VEXT_VV(vwsubu_wv_w, 8) 1012 GEN_VEXT_VV(vwadd_wv_b, 2) 1013 GEN_VEXT_VV(vwadd_wv_h, 4) 1014 GEN_VEXT_VV(vwadd_wv_w, 8) 1015 GEN_VEXT_VV(vwsub_wv_b, 2) 1016 GEN_VEXT_VV(vwsub_wv_h, 4) 1017 GEN_VEXT_VV(vwsub_wv_w, 8) 1018 1019 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1020 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1021 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1022 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1023 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1024 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1025 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1026 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1027 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1028 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1029 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1030 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1031 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1032 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1033 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1034 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1035 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1036 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1037 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1038 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1039 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1040 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1041 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1042 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1043 GEN_VEXT_VX(vwaddu_vx_b, 2) 1044 GEN_VEXT_VX(vwaddu_vx_h, 4) 1045 GEN_VEXT_VX(vwaddu_vx_w, 8) 1046 GEN_VEXT_VX(vwsubu_vx_b, 2) 1047 GEN_VEXT_VX(vwsubu_vx_h, 4) 1048 GEN_VEXT_VX(vwsubu_vx_w, 8) 1049 GEN_VEXT_VX(vwadd_vx_b, 2) 1050 GEN_VEXT_VX(vwadd_vx_h, 4) 1051 GEN_VEXT_VX(vwadd_vx_w, 8) 1052 GEN_VEXT_VX(vwsub_vx_b, 2) 1053 GEN_VEXT_VX(vwsub_vx_h, 4) 1054 GEN_VEXT_VX(vwsub_vx_w, 8) 1055 GEN_VEXT_VX(vwaddu_wx_b, 2) 1056 GEN_VEXT_VX(vwaddu_wx_h, 4) 1057 GEN_VEXT_VX(vwaddu_wx_w, 8) 1058 GEN_VEXT_VX(vwsubu_wx_b, 2) 1059 GEN_VEXT_VX(vwsubu_wx_h, 4) 1060 GEN_VEXT_VX(vwsubu_wx_w, 8) 1061 GEN_VEXT_VX(vwadd_wx_b, 2) 1062 GEN_VEXT_VX(vwadd_wx_h, 4) 1063 GEN_VEXT_VX(vwadd_wx_w, 8) 1064 GEN_VEXT_VX(vwsub_wx_b, 2) 1065 GEN_VEXT_VX(vwsub_wx_h, 4) 1066 GEN_VEXT_VX(vwsub_wx_w, 8) 1067 1068 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1069 #define DO_VADC(N, M, C) (N + M + C) 1070 #define DO_VSBC(N, M, C) (N - M - C) 1071 1072 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1073 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1074 CPURISCVState *env, uint32_t desc) \ 1075 { \ 1076 uint32_t vl = env->vl; \ 1077 uint32_t esz = sizeof(ETYPE); \ 1078 uint32_t total_elems = \ 1079 vext_get_total_elems(env, desc, esz); \ 1080 uint32_t vta = vext_vta(desc); \ 1081 uint32_t i; \ 1082 \ 1083 for (i = env->vstart; i < vl; i++) { \ 1084 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1085 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1086 ETYPE carry = vext_elem_mask(v0, i); \ 1087 \ 1088 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1089 } \ 1090 env->vstart = 0; \ 1091 /* set tail elements to 1s */ \ 1092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1093 } 1094 1095 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1096 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1097 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1098 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1099 1100 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1101 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1102 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1103 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1104 1105 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1106 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1107 CPURISCVState *env, uint32_t desc) \ 1108 { \ 1109 uint32_t vl = env->vl; \ 1110 uint32_t esz = sizeof(ETYPE); \ 1111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1112 uint32_t vta = vext_vta(desc); \ 1113 uint32_t i; \ 1114 \ 1115 for (i = env->vstart; i < vl; i++) { \ 1116 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1117 ETYPE carry = vext_elem_mask(v0, i); \ 1118 \ 1119 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1120 } \ 1121 env->vstart = 0; \ 1122 /* set tail elements to 1s */ \ 1123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1124 } 1125 1126 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1127 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1128 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1129 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1130 1131 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1132 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1133 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1134 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1135 1136 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1137 (__typeof(N))(N + M) < N) 1138 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1139 1140 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1141 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1142 CPURISCVState *env, uint32_t desc) \ 1143 { \ 1144 uint32_t vl = env->vl; \ 1145 uint32_t vm = vext_vm(desc); \ 1146 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1147 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1148 uint32_t i; \ 1149 \ 1150 for (i = env->vstart; i < vl; i++) { \ 1151 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1153 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1154 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1155 } \ 1156 env->vstart = 0; \ 1157 /* mask destination register are always tail-agnostic */ \ 1158 /* set tail elements to 1s */ \ 1159 if (vta_all_1s) { \ 1160 for (; i < total_elems; i++) { \ 1161 vext_set_elem_mask(vd, i, 1); \ 1162 } \ 1163 } \ 1164 } 1165 1166 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1167 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1168 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1169 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1170 1171 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1172 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1173 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1174 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1175 1176 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1177 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1178 void *vs2, CPURISCVState *env, uint32_t desc) \ 1179 { \ 1180 uint32_t vl = env->vl; \ 1181 uint32_t vm = vext_vm(desc); \ 1182 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \ 1183 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1184 uint32_t i; \ 1185 \ 1186 for (i = env->vstart; i < vl; i++) { \ 1187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1188 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1189 vext_set_elem_mask(vd, i, \ 1190 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1191 } \ 1192 env->vstart = 0; \ 1193 /* mask destination register are always tail-agnostic */ \ 1194 /* set tail elements to 1s */ \ 1195 if (vta_all_1s) { \ 1196 for (; i < total_elems; i++) { \ 1197 vext_set_elem_mask(vd, i, 1); \ 1198 } \ 1199 } \ 1200 } 1201 1202 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1203 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1204 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1205 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1206 1207 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1208 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1209 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1210 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1211 1212 /* Vector Bitwise Logical Instructions */ 1213 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1214 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1215 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1216 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1217 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1218 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1219 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1220 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1221 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1222 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1223 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1224 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1225 GEN_VEXT_VV(vand_vv_b, 1) 1226 GEN_VEXT_VV(vand_vv_h, 2) 1227 GEN_VEXT_VV(vand_vv_w, 4) 1228 GEN_VEXT_VV(vand_vv_d, 8) 1229 GEN_VEXT_VV(vor_vv_b, 1) 1230 GEN_VEXT_VV(vor_vv_h, 2) 1231 GEN_VEXT_VV(vor_vv_w, 4) 1232 GEN_VEXT_VV(vor_vv_d, 8) 1233 GEN_VEXT_VV(vxor_vv_b, 1) 1234 GEN_VEXT_VV(vxor_vv_h, 2) 1235 GEN_VEXT_VV(vxor_vv_w, 4) 1236 GEN_VEXT_VV(vxor_vv_d, 8) 1237 1238 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1239 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1240 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1241 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1242 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1243 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1244 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1245 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1246 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1247 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1248 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1249 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1250 GEN_VEXT_VX(vand_vx_b, 1) 1251 GEN_VEXT_VX(vand_vx_h, 2) 1252 GEN_VEXT_VX(vand_vx_w, 4) 1253 GEN_VEXT_VX(vand_vx_d, 8) 1254 GEN_VEXT_VX(vor_vx_b, 1) 1255 GEN_VEXT_VX(vor_vx_h, 2) 1256 GEN_VEXT_VX(vor_vx_w, 4) 1257 GEN_VEXT_VX(vor_vx_d, 8) 1258 GEN_VEXT_VX(vxor_vx_b, 1) 1259 GEN_VEXT_VX(vxor_vx_h, 2) 1260 GEN_VEXT_VX(vxor_vx_w, 4) 1261 GEN_VEXT_VX(vxor_vx_d, 8) 1262 1263 /* Vector Single-Width Bit Shift Instructions */ 1264 #define DO_SLL(N, M) (N << (M)) 1265 #define DO_SRL(N, M) (N >> (M)) 1266 1267 /* generate the helpers for shift instructions with two vector operators */ 1268 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1269 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1270 void *vs2, CPURISCVState *env, uint32_t desc) \ 1271 { \ 1272 uint32_t vm = vext_vm(desc); \ 1273 uint32_t vl = env->vl; \ 1274 uint32_t i; \ 1275 \ 1276 for (i = env->vstart; i < vl; i++) { \ 1277 if (!vm && !vext_elem_mask(v0, i)) { \ 1278 continue; \ 1279 } \ 1280 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1281 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1282 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1283 } \ 1284 env->vstart = 0; \ 1285 } 1286 1287 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1288 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1289 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1290 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1291 1292 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1293 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1294 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1295 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1296 1297 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1298 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1299 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1300 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1301 1302 /* generate the helpers for shift instructions with one vector and one scalar */ 1303 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1304 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1305 void *vs2, CPURISCVState *env, uint32_t desc) \ 1306 { \ 1307 uint32_t vm = vext_vm(desc); \ 1308 uint32_t vl = env->vl; \ 1309 uint32_t i; \ 1310 \ 1311 for (i = env->vstart; i < vl; i++) { \ 1312 if (!vm && !vext_elem_mask(v0, i)) { \ 1313 continue; \ 1314 } \ 1315 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1316 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1317 } \ 1318 env->vstart = 0; \ 1319 } 1320 1321 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1322 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1323 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1324 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1325 1326 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1327 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1328 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1329 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1330 1331 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1332 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1333 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1334 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1335 1336 /* Vector Narrowing Integer Right Shift Instructions */ 1337 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1338 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1339 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1340 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1341 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1342 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1343 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1344 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1345 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1346 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1347 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1348 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1349 1350 /* Vector Integer Comparison Instructions */ 1351 #define DO_MSEQ(N, M) (N == M) 1352 #define DO_MSNE(N, M) (N != M) 1353 #define DO_MSLT(N, M) (N < M) 1354 #define DO_MSLE(N, M) (N <= M) 1355 #define DO_MSGT(N, M) (N > M) 1356 1357 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1358 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1359 CPURISCVState *env, uint32_t desc) \ 1360 { \ 1361 uint32_t vm = vext_vm(desc); \ 1362 uint32_t vl = env->vl; \ 1363 uint32_t i; \ 1364 \ 1365 for (i = env->vstart; i < vl; i++) { \ 1366 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1367 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1368 if (!vm && !vext_elem_mask(v0, i)) { \ 1369 continue; \ 1370 } \ 1371 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1372 } \ 1373 env->vstart = 0; \ 1374 } 1375 1376 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1377 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1378 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1379 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1380 1381 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1382 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1383 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1384 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1385 1386 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1387 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1388 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1389 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1390 1391 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1392 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1393 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1394 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1395 1396 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1397 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1398 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1399 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1400 1401 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1402 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1403 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1404 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1405 1406 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1407 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1408 CPURISCVState *env, uint32_t desc) \ 1409 { \ 1410 uint32_t vm = vext_vm(desc); \ 1411 uint32_t vl = env->vl; \ 1412 uint32_t i; \ 1413 \ 1414 for (i = env->vstart; i < vl; i++) { \ 1415 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1416 if (!vm && !vext_elem_mask(v0, i)) { \ 1417 continue; \ 1418 } \ 1419 vext_set_elem_mask(vd, i, \ 1420 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1421 } \ 1422 env->vstart = 0; \ 1423 } 1424 1425 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1426 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1427 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1428 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1429 1430 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1431 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1432 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1433 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1434 1435 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1436 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1437 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1438 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1439 1440 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1441 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1442 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1443 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1444 1445 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1446 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1447 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1448 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1449 1450 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1451 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1452 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1453 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1454 1455 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1456 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1457 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1458 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1459 1460 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1461 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1462 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1463 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1464 1465 /* Vector Integer Min/Max Instructions */ 1466 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1467 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1468 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1469 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1470 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1471 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1472 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1473 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1474 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1475 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1476 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1477 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1478 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1479 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1480 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1481 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1482 GEN_VEXT_VV(vminu_vv_b, 1) 1483 GEN_VEXT_VV(vminu_vv_h, 2) 1484 GEN_VEXT_VV(vminu_vv_w, 4) 1485 GEN_VEXT_VV(vminu_vv_d, 8) 1486 GEN_VEXT_VV(vmin_vv_b, 1) 1487 GEN_VEXT_VV(vmin_vv_h, 2) 1488 GEN_VEXT_VV(vmin_vv_w, 4) 1489 GEN_VEXT_VV(vmin_vv_d, 8) 1490 GEN_VEXT_VV(vmaxu_vv_b, 1) 1491 GEN_VEXT_VV(vmaxu_vv_h, 2) 1492 GEN_VEXT_VV(vmaxu_vv_w, 4) 1493 GEN_VEXT_VV(vmaxu_vv_d, 8) 1494 GEN_VEXT_VV(vmax_vv_b, 1) 1495 GEN_VEXT_VV(vmax_vv_h, 2) 1496 GEN_VEXT_VV(vmax_vv_w, 4) 1497 GEN_VEXT_VV(vmax_vv_d, 8) 1498 1499 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1500 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1501 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1502 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1503 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1504 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1505 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1506 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1507 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1508 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1509 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1510 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1511 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1512 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1513 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1514 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1515 GEN_VEXT_VX(vminu_vx_b, 1) 1516 GEN_VEXT_VX(vminu_vx_h, 2) 1517 GEN_VEXT_VX(vminu_vx_w, 4) 1518 GEN_VEXT_VX(vminu_vx_d, 8) 1519 GEN_VEXT_VX(vmin_vx_b, 1) 1520 GEN_VEXT_VX(vmin_vx_h, 2) 1521 GEN_VEXT_VX(vmin_vx_w, 4) 1522 GEN_VEXT_VX(vmin_vx_d, 8) 1523 GEN_VEXT_VX(vmaxu_vx_b, 1) 1524 GEN_VEXT_VX(vmaxu_vx_h, 2) 1525 GEN_VEXT_VX(vmaxu_vx_w, 4) 1526 GEN_VEXT_VX(vmaxu_vx_d, 8) 1527 GEN_VEXT_VX(vmax_vx_b, 1) 1528 GEN_VEXT_VX(vmax_vx_h, 2) 1529 GEN_VEXT_VX(vmax_vx_w, 4) 1530 GEN_VEXT_VX(vmax_vx_d, 8) 1531 1532 /* Vector Single-Width Integer Multiply Instructions */ 1533 #define DO_MUL(N, M) (N * M) 1534 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1535 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1536 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1537 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1538 GEN_VEXT_VV(vmul_vv_b, 1) 1539 GEN_VEXT_VV(vmul_vv_h, 2) 1540 GEN_VEXT_VV(vmul_vv_w, 4) 1541 GEN_VEXT_VV(vmul_vv_d, 8) 1542 1543 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1544 { 1545 return (int16_t)s2 * (int16_t)s1 >> 8; 1546 } 1547 1548 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1549 { 1550 return (int32_t)s2 * (int32_t)s1 >> 16; 1551 } 1552 1553 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1554 { 1555 return (int64_t)s2 * (int64_t)s1 >> 32; 1556 } 1557 1558 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1559 { 1560 uint64_t hi_64, lo_64; 1561 1562 muls64(&lo_64, &hi_64, s1, s2); 1563 return hi_64; 1564 } 1565 1566 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1567 { 1568 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1569 } 1570 1571 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1572 { 1573 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1574 } 1575 1576 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1577 { 1578 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1579 } 1580 1581 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1582 { 1583 uint64_t hi_64, lo_64; 1584 1585 mulu64(&lo_64, &hi_64, s2, s1); 1586 return hi_64; 1587 } 1588 1589 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1590 { 1591 return (int16_t)s2 * (uint16_t)s1 >> 8; 1592 } 1593 1594 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1595 { 1596 return (int32_t)s2 * (uint32_t)s1 >> 16; 1597 } 1598 1599 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1600 { 1601 return (int64_t)s2 * (uint64_t)s1 >> 32; 1602 } 1603 1604 /* 1605 * Let A = signed operand, 1606 * B = unsigned operand 1607 * P = mulu64(A, B), unsigned product 1608 * 1609 * LET X = 2 ** 64 - A, 2's complement of A 1610 * SP = signed product 1611 * THEN 1612 * IF A < 0 1613 * SP = -X * B 1614 * = -(2 ** 64 - A) * B 1615 * = A * B - 2 ** 64 * B 1616 * = P - 2 ** 64 * B 1617 * ELSE 1618 * SP = P 1619 * THEN 1620 * HI_P -= (A < 0 ? B : 0) 1621 */ 1622 1623 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1624 { 1625 uint64_t hi_64, lo_64; 1626 1627 mulu64(&lo_64, &hi_64, s2, s1); 1628 1629 hi_64 -= s2 < 0 ? s1 : 0; 1630 return hi_64; 1631 } 1632 1633 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1634 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1635 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1636 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1637 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1638 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1639 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1640 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1641 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1642 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1643 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1644 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1645 GEN_VEXT_VV(vmulh_vv_b, 1) 1646 GEN_VEXT_VV(vmulh_vv_h, 2) 1647 GEN_VEXT_VV(vmulh_vv_w, 4) 1648 GEN_VEXT_VV(vmulh_vv_d, 8) 1649 GEN_VEXT_VV(vmulhu_vv_b, 1) 1650 GEN_VEXT_VV(vmulhu_vv_h, 2) 1651 GEN_VEXT_VV(vmulhu_vv_w, 4) 1652 GEN_VEXT_VV(vmulhu_vv_d, 8) 1653 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1654 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1655 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1656 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1657 1658 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1659 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1660 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1661 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1662 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1663 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1664 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1665 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1666 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1667 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1668 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1669 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1670 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1671 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1672 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1673 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1674 GEN_VEXT_VX(vmul_vx_b, 1) 1675 GEN_VEXT_VX(vmul_vx_h, 2) 1676 GEN_VEXT_VX(vmul_vx_w, 4) 1677 GEN_VEXT_VX(vmul_vx_d, 8) 1678 GEN_VEXT_VX(vmulh_vx_b, 1) 1679 GEN_VEXT_VX(vmulh_vx_h, 2) 1680 GEN_VEXT_VX(vmulh_vx_w, 4) 1681 GEN_VEXT_VX(vmulh_vx_d, 8) 1682 GEN_VEXT_VX(vmulhu_vx_b, 1) 1683 GEN_VEXT_VX(vmulhu_vx_h, 2) 1684 GEN_VEXT_VX(vmulhu_vx_w, 4) 1685 GEN_VEXT_VX(vmulhu_vx_d, 8) 1686 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1687 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1688 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1689 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1690 1691 /* Vector Integer Divide Instructions */ 1692 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1693 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1694 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1695 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1696 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1697 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1698 1699 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1700 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1701 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1702 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1703 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1704 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1705 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1706 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1707 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1708 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1709 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1710 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1711 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1712 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1713 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1714 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1715 GEN_VEXT_VV(vdivu_vv_b, 1) 1716 GEN_VEXT_VV(vdivu_vv_h, 2) 1717 GEN_VEXT_VV(vdivu_vv_w, 4) 1718 GEN_VEXT_VV(vdivu_vv_d, 8) 1719 GEN_VEXT_VV(vdiv_vv_b, 1) 1720 GEN_VEXT_VV(vdiv_vv_h, 2) 1721 GEN_VEXT_VV(vdiv_vv_w, 4) 1722 GEN_VEXT_VV(vdiv_vv_d, 8) 1723 GEN_VEXT_VV(vremu_vv_b, 1) 1724 GEN_VEXT_VV(vremu_vv_h, 2) 1725 GEN_VEXT_VV(vremu_vv_w, 4) 1726 GEN_VEXT_VV(vremu_vv_d, 8) 1727 GEN_VEXT_VV(vrem_vv_b, 1) 1728 GEN_VEXT_VV(vrem_vv_h, 2) 1729 GEN_VEXT_VV(vrem_vv_w, 4) 1730 GEN_VEXT_VV(vrem_vv_d, 8) 1731 1732 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1733 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1734 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1735 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1736 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1737 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1738 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1739 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1740 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1741 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1742 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1743 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1744 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1745 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1746 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1747 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1748 GEN_VEXT_VX(vdivu_vx_b, 1) 1749 GEN_VEXT_VX(vdivu_vx_h, 2) 1750 GEN_VEXT_VX(vdivu_vx_w, 4) 1751 GEN_VEXT_VX(vdivu_vx_d, 8) 1752 GEN_VEXT_VX(vdiv_vx_b, 1) 1753 GEN_VEXT_VX(vdiv_vx_h, 2) 1754 GEN_VEXT_VX(vdiv_vx_w, 4) 1755 GEN_VEXT_VX(vdiv_vx_d, 8) 1756 GEN_VEXT_VX(vremu_vx_b, 1) 1757 GEN_VEXT_VX(vremu_vx_h, 2) 1758 GEN_VEXT_VX(vremu_vx_w, 4) 1759 GEN_VEXT_VX(vremu_vx_d, 8) 1760 GEN_VEXT_VX(vrem_vx_b, 1) 1761 GEN_VEXT_VX(vrem_vx_h, 2) 1762 GEN_VEXT_VX(vrem_vx_w, 4) 1763 GEN_VEXT_VX(vrem_vx_d, 8) 1764 1765 /* Vector Widening Integer Multiply Instructions */ 1766 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1767 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1768 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1769 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1770 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1771 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1772 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1773 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1774 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1775 GEN_VEXT_VV(vwmul_vv_b, 2) 1776 GEN_VEXT_VV(vwmul_vv_h, 4) 1777 GEN_VEXT_VV(vwmul_vv_w, 8) 1778 GEN_VEXT_VV(vwmulu_vv_b, 2) 1779 GEN_VEXT_VV(vwmulu_vv_h, 4) 1780 GEN_VEXT_VV(vwmulu_vv_w, 8) 1781 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1782 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1783 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1784 1785 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1786 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1787 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1788 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1789 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1790 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1791 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1792 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1793 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1794 GEN_VEXT_VX(vwmul_vx_b, 2) 1795 GEN_VEXT_VX(vwmul_vx_h, 4) 1796 GEN_VEXT_VX(vwmul_vx_w, 8) 1797 GEN_VEXT_VX(vwmulu_vx_b, 2) 1798 GEN_VEXT_VX(vwmulu_vx_h, 4) 1799 GEN_VEXT_VX(vwmulu_vx_w, 8) 1800 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1801 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1802 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1803 1804 /* Vector Single-Width Integer Multiply-Add Instructions */ 1805 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1806 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1807 { \ 1808 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1809 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1810 TD d = *((TD *)vd + HD(i)); \ 1811 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1812 } 1813 1814 #define DO_MACC(N, M, D) (M * N + D) 1815 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1816 #define DO_MADD(N, M, D) (M * D + N) 1817 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1818 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1819 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1820 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1821 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1822 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1823 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1824 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1825 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1826 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1827 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1828 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1829 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1830 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1831 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1832 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1833 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1834 GEN_VEXT_VV(vmacc_vv_b, 1) 1835 GEN_VEXT_VV(vmacc_vv_h, 2) 1836 GEN_VEXT_VV(vmacc_vv_w, 4) 1837 GEN_VEXT_VV(vmacc_vv_d, 8) 1838 GEN_VEXT_VV(vnmsac_vv_b, 1) 1839 GEN_VEXT_VV(vnmsac_vv_h, 2) 1840 GEN_VEXT_VV(vnmsac_vv_w, 4) 1841 GEN_VEXT_VV(vnmsac_vv_d, 8) 1842 GEN_VEXT_VV(vmadd_vv_b, 1) 1843 GEN_VEXT_VV(vmadd_vv_h, 2) 1844 GEN_VEXT_VV(vmadd_vv_w, 4) 1845 GEN_VEXT_VV(vmadd_vv_d, 8) 1846 GEN_VEXT_VV(vnmsub_vv_b, 1) 1847 GEN_VEXT_VV(vnmsub_vv_h, 2) 1848 GEN_VEXT_VV(vnmsub_vv_w, 4) 1849 GEN_VEXT_VV(vnmsub_vv_d, 8) 1850 1851 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1852 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1853 { \ 1854 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1855 TD d = *((TD *)vd + HD(i)); \ 1856 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1857 } 1858 1859 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1860 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1861 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1862 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1863 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1864 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1865 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1866 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1867 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1868 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1869 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1870 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1871 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1872 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1873 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1874 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1875 GEN_VEXT_VX(vmacc_vx_b, 1) 1876 GEN_VEXT_VX(vmacc_vx_h, 2) 1877 GEN_VEXT_VX(vmacc_vx_w, 4) 1878 GEN_VEXT_VX(vmacc_vx_d, 8) 1879 GEN_VEXT_VX(vnmsac_vx_b, 1) 1880 GEN_VEXT_VX(vnmsac_vx_h, 2) 1881 GEN_VEXT_VX(vnmsac_vx_w, 4) 1882 GEN_VEXT_VX(vnmsac_vx_d, 8) 1883 GEN_VEXT_VX(vmadd_vx_b, 1) 1884 GEN_VEXT_VX(vmadd_vx_h, 2) 1885 GEN_VEXT_VX(vmadd_vx_w, 4) 1886 GEN_VEXT_VX(vmadd_vx_d, 8) 1887 GEN_VEXT_VX(vnmsub_vx_b, 1) 1888 GEN_VEXT_VX(vnmsub_vx_h, 2) 1889 GEN_VEXT_VX(vnmsub_vx_w, 4) 1890 GEN_VEXT_VX(vnmsub_vx_d, 8) 1891 1892 /* Vector Widening Integer Multiply-Add Instructions */ 1893 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1894 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1895 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1896 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1897 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1898 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1899 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1900 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1901 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1902 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1903 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1904 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1905 GEN_VEXT_VV(vwmacc_vv_b, 2) 1906 GEN_VEXT_VV(vwmacc_vv_h, 4) 1907 GEN_VEXT_VV(vwmacc_vv_w, 8) 1908 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1909 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1910 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1911 1912 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1913 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1914 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1915 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1916 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1917 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1918 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1919 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1920 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1921 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1922 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1923 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1924 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1925 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1926 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1927 GEN_VEXT_VX(vwmacc_vx_b, 2) 1928 GEN_VEXT_VX(vwmacc_vx_h, 4) 1929 GEN_VEXT_VX(vwmacc_vx_w, 8) 1930 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1931 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1932 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1933 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1934 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1935 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1936 1937 /* Vector Integer Merge and Move Instructions */ 1938 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1939 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1940 uint32_t desc) \ 1941 { \ 1942 uint32_t vl = env->vl; \ 1943 uint32_t i; \ 1944 \ 1945 for (i = env->vstart; i < vl; i++) { \ 1946 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1947 *((ETYPE *)vd + H(i)) = s1; \ 1948 } \ 1949 env->vstart = 0; \ 1950 } 1951 1952 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1953 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1954 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1955 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1956 1957 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1958 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1959 uint32_t desc) \ 1960 { \ 1961 uint32_t vl = env->vl; \ 1962 uint32_t i; \ 1963 \ 1964 for (i = env->vstart; i < vl; i++) { \ 1965 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1966 } \ 1967 env->vstart = 0; \ 1968 } 1969 1970 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1971 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1972 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1973 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1974 1975 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1976 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1977 CPURISCVState *env, uint32_t desc) \ 1978 { \ 1979 uint32_t vl = env->vl; \ 1980 uint32_t i; \ 1981 \ 1982 for (i = env->vstart; i < vl; i++) { \ 1983 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1984 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1985 } \ 1986 env->vstart = 0; \ 1987 } 1988 1989 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1990 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1991 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1992 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1993 1994 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1995 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1996 void *vs2, CPURISCVState *env, uint32_t desc) \ 1997 { \ 1998 uint32_t vl = env->vl; \ 1999 uint32_t i; \ 2000 \ 2001 for (i = env->vstart; i < vl; i++) { \ 2002 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2003 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 2004 (ETYPE)(target_long)s1); \ 2005 *((ETYPE *)vd + H(i)) = d; \ 2006 } \ 2007 env->vstart = 0; \ 2008 } 2009 2010 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2011 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2012 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2013 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2014 2015 /* 2016 *** Vector Fixed-Point Arithmetic Instructions 2017 */ 2018 2019 /* Vector Single-Width Saturating Add and Subtract */ 2020 2021 /* 2022 * As fixed point instructions probably have round mode and saturation, 2023 * define common macros for fixed point here. 2024 */ 2025 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2026 CPURISCVState *env, int vxrm); 2027 2028 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2029 static inline void \ 2030 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2031 CPURISCVState *env, int vxrm) \ 2032 { \ 2033 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2034 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2035 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2036 } 2037 2038 static inline void 2039 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2040 CPURISCVState *env, 2041 uint32_t vl, uint32_t vm, int vxrm, 2042 opivv2_rm_fn *fn) 2043 { 2044 for (uint32_t i = env->vstart; i < vl; i++) { 2045 if (!vm && !vext_elem_mask(v0, i)) { 2046 continue; 2047 } 2048 fn(vd, vs1, vs2, i, env, vxrm); 2049 } 2050 env->vstart = 0; 2051 } 2052 2053 static inline void 2054 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2055 CPURISCVState *env, 2056 uint32_t desc, 2057 opivv2_rm_fn *fn) 2058 { 2059 uint32_t vm = vext_vm(desc); 2060 uint32_t vl = env->vl; 2061 2062 switch (env->vxrm) { 2063 case 0: /* rnu */ 2064 vext_vv_rm_1(vd, v0, vs1, vs2, 2065 env, vl, vm, 0, fn); 2066 break; 2067 case 1: /* rne */ 2068 vext_vv_rm_1(vd, v0, vs1, vs2, 2069 env, vl, vm, 1, fn); 2070 break; 2071 case 2: /* rdn */ 2072 vext_vv_rm_1(vd, v0, vs1, vs2, 2073 env, vl, vm, 2, fn); 2074 break; 2075 default: /* rod */ 2076 vext_vv_rm_1(vd, v0, vs1, vs2, 2077 env, vl, vm, 3, fn); 2078 break; 2079 } 2080 } 2081 2082 /* generate helpers for fixed point instructions with OPIVV format */ 2083 #define GEN_VEXT_VV_RM(NAME) \ 2084 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2085 CPURISCVState *env, uint32_t desc) \ 2086 { \ 2087 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2088 do_##NAME); \ 2089 } 2090 2091 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2092 { 2093 uint8_t res = a + b; 2094 if (res < a) { 2095 res = UINT8_MAX; 2096 env->vxsat = 0x1; 2097 } 2098 return res; 2099 } 2100 2101 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2102 uint16_t b) 2103 { 2104 uint16_t res = a + b; 2105 if (res < a) { 2106 res = UINT16_MAX; 2107 env->vxsat = 0x1; 2108 } 2109 return res; 2110 } 2111 2112 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2113 uint32_t b) 2114 { 2115 uint32_t res = a + b; 2116 if (res < a) { 2117 res = UINT32_MAX; 2118 env->vxsat = 0x1; 2119 } 2120 return res; 2121 } 2122 2123 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2124 uint64_t b) 2125 { 2126 uint64_t res = a + b; 2127 if (res < a) { 2128 res = UINT64_MAX; 2129 env->vxsat = 0x1; 2130 } 2131 return res; 2132 } 2133 2134 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2135 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2136 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2137 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2138 GEN_VEXT_VV_RM(vsaddu_vv_b) 2139 GEN_VEXT_VV_RM(vsaddu_vv_h) 2140 GEN_VEXT_VV_RM(vsaddu_vv_w) 2141 GEN_VEXT_VV_RM(vsaddu_vv_d) 2142 2143 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2144 CPURISCVState *env, int vxrm); 2145 2146 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2147 static inline void \ 2148 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2149 CPURISCVState *env, int vxrm) \ 2150 { \ 2151 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2152 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2153 } 2154 2155 static inline void 2156 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2157 CPURISCVState *env, 2158 uint32_t vl, uint32_t vm, int vxrm, 2159 opivx2_rm_fn *fn) 2160 { 2161 for (uint32_t i = env->vstart; i < vl; i++) { 2162 if (!vm && !vext_elem_mask(v0, i)) { 2163 continue; 2164 } 2165 fn(vd, s1, vs2, i, env, vxrm); 2166 } 2167 env->vstart = 0; 2168 } 2169 2170 static inline void 2171 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2172 CPURISCVState *env, 2173 uint32_t desc, 2174 opivx2_rm_fn *fn) 2175 { 2176 uint32_t vm = vext_vm(desc); 2177 uint32_t vl = env->vl; 2178 2179 switch (env->vxrm) { 2180 case 0: /* rnu */ 2181 vext_vx_rm_1(vd, v0, s1, vs2, 2182 env, vl, vm, 0, fn); 2183 break; 2184 case 1: /* rne */ 2185 vext_vx_rm_1(vd, v0, s1, vs2, 2186 env, vl, vm, 1, fn); 2187 break; 2188 case 2: /* rdn */ 2189 vext_vx_rm_1(vd, v0, s1, vs2, 2190 env, vl, vm, 2, fn); 2191 break; 2192 default: /* rod */ 2193 vext_vx_rm_1(vd, v0, s1, vs2, 2194 env, vl, vm, 3, fn); 2195 break; 2196 } 2197 } 2198 2199 /* generate helpers for fixed point instructions with OPIVX format */ 2200 #define GEN_VEXT_VX_RM(NAME) \ 2201 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2202 void *vs2, CPURISCVState *env, uint32_t desc) \ 2203 { \ 2204 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2205 do_##NAME); \ 2206 } 2207 2208 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2209 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2210 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2211 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2212 GEN_VEXT_VX_RM(vsaddu_vx_b) 2213 GEN_VEXT_VX_RM(vsaddu_vx_h) 2214 GEN_VEXT_VX_RM(vsaddu_vx_w) 2215 GEN_VEXT_VX_RM(vsaddu_vx_d) 2216 2217 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2218 { 2219 int8_t res = a + b; 2220 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2221 res = a > 0 ? INT8_MAX : INT8_MIN; 2222 env->vxsat = 0x1; 2223 } 2224 return res; 2225 } 2226 2227 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2228 { 2229 int16_t res = a + b; 2230 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2231 res = a > 0 ? INT16_MAX : INT16_MIN; 2232 env->vxsat = 0x1; 2233 } 2234 return res; 2235 } 2236 2237 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2238 { 2239 int32_t res = a + b; 2240 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2241 res = a > 0 ? INT32_MAX : INT32_MIN; 2242 env->vxsat = 0x1; 2243 } 2244 return res; 2245 } 2246 2247 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2248 { 2249 int64_t res = a + b; 2250 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2251 res = a > 0 ? INT64_MAX : INT64_MIN; 2252 env->vxsat = 0x1; 2253 } 2254 return res; 2255 } 2256 2257 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2258 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2259 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2260 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2261 GEN_VEXT_VV_RM(vsadd_vv_b) 2262 GEN_VEXT_VV_RM(vsadd_vv_h) 2263 GEN_VEXT_VV_RM(vsadd_vv_w) 2264 GEN_VEXT_VV_RM(vsadd_vv_d) 2265 2266 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2267 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2268 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2269 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2270 GEN_VEXT_VX_RM(vsadd_vx_b) 2271 GEN_VEXT_VX_RM(vsadd_vx_h) 2272 GEN_VEXT_VX_RM(vsadd_vx_w) 2273 GEN_VEXT_VX_RM(vsadd_vx_d) 2274 2275 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2276 { 2277 uint8_t res = a - b; 2278 if (res > a) { 2279 res = 0; 2280 env->vxsat = 0x1; 2281 } 2282 return res; 2283 } 2284 2285 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2286 uint16_t b) 2287 { 2288 uint16_t res = a - b; 2289 if (res > a) { 2290 res = 0; 2291 env->vxsat = 0x1; 2292 } 2293 return res; 2294 } 2295 2296 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2297 uint32_t b) 2298 { 2299 uint32_t res = a - b; 2300 if (res > a) { 2301 res = 0; 2302 env->vxsat = 0x1; 2303 } 2304 return res; 2305 } 2306 2307 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2308 uint64_t b) 2309 { 2310 uint64_t res = a - b; 2311 if (res > a) { 2312 res = 0; 2313 env->vxsat = 0x1; 2314 } 2315 return res; 2316 } 2317 2318 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2319 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2320 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2321 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2322 GEN_VEXT_VV_RM(vssubu_vv_b) 2323 GEN_VEXT_VV_RM(vssubu_vv_h) 2324 GEN_VEXT_VV_RM(vssubu_vv_w) 2325 GEN_VEXT_VV_RM(vssubu_vv_d) 2326 2327 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2328 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2329 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2330 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2331 GEN_VEXT_VX_RM(vssubu_vx_b) 2332 GEN_VEXT_VX_RM(vssubu_vx_h) 2333 GEN_VEXT_VX_RM(vssubu_vx_w) 2334 GEN_VEXT_VX_RM(vssubu_vx_d) 2335 2336 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2337 { 2338 int8_t res = a - b; 2339 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2340 res = a >= 0 ? INT8_MAX : INT8_MIN; 2341 env->vxsat = 0x1; 2342 } 2343 return res; 2344 } 2345 2346 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2347 { 2348 int16_t res = a - b; 2349 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2350 res = a >= 0 ? INT16_MAX : INT16_MIN; 2351 env->vxsat = 0x1; 2352 } 2353 return res; 2354 } 2355 2356 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2357 { 2358 int32_t res = a - b; 2359 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2360 res = a >= 0 ? INT32_MAX : INT32_MIN; 2361 env->vxsat = 0x1; 2362 } 2363 return res; 2364 } 2365 2366 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2367 { 2368 int64_t res = a - b; 2369 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2370 res = a >= 0 ? INT64_MAX : INT64_MIN; 2371 env->vxsat = 0x1; 2372 } 2373 return res; 2374 } 2375 2376 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2377 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2378 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2379 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2380 GEN_VEXT_VV_RM(vssub_vv_b) 2381 GEN_VEXT_VV_RM(vssub_vv_h) 2382 GEN_VEXT_VV_RM(vssub_vv_w) 2383 GEN_VEXT_VV_RM(vssub_vv_d) 2384 2385 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2386 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2387 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2388 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2389 GEN_VEXT_VX_RM(vssub_vx_b) 2390 GEN_VEXT_VX_RM(vssub_vx_h) 2391 GEN_VEXT_VX_RM(vssub_vx_w) 2392 GEN_VEXT_VX_RM(vssub_vx_d) 2393 2394 /* Vector Single-Width Averaging Add and Subtract */ 2395 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2396 { 2397 uint8_t d = extract64(v, shift, 1); 2398 uint8_t d1; 2399 uint64_t D1, D2; 2400 2401 if (shift == 0 || shift > 64) { 2402 return 0; 2403 } 2404 2405 d1 = extract64(v, shift - 1, 1); 2406 D1 = extract64(v, 0, shift); 2407 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2408 return d1; 2409 } else if (vxrm == 1) { /* round-to-nearest-even */ 2410 if (shift > 1) { 2411 D2 = extract64(v, 0, shift - 1); 2412 return d1 & ((D2 != 0) | d); 2413 } else { 2414 return d1 & d; 2415 } 2416 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2417 return !d & (D1 != 0); 2418 } 2419 return 0; /* round-down (truncate) */ 2420 } 2421 2422 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2423 { 2424 int64_t res = (int64_t)a + b; 2425 uint8_t round = get_round(vxrm, res, 1); 2426 2427 return (res >> 1) + round; 2428 } 2429 2430 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2431 { 2432 int64_t res = a + b; 2433 uint8_t round = get_round(vxrm, res, 1); 2434 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2435 2436 /* With signed overflow, bit 64 is inverse of bit 63. */ 2437 return ((res >> 1) ^ over) + round; 2438 } 2439 2440 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2441 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2442 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2443 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2444 GEN_VEXT_VV_RM(vaadd_vv_b) 2445 GEN_VEXT_VV_RM(vaadd_vv_h) 2446 GEN_VEXT_VV_RM(vaadd_vv_w) 2447 GEN_VEXT_VV_RM(vaadd_vv_d) 2448 2449 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2450 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2451 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2452 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2453 GEN_VEXT_VX_RM(vaadd_vx_b) 2454 GEN_VEXT_VX_RM(vaadd_vx_h) 2455 GEN_VEXT_VX_RM(vaadd_vx_w) 2456 GEN_VEXT_VX_RM(vaadd_vx_d) 2457 2458 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2459 uint32_t a, uint32_t b) 2460 { 2461 uint64_t res = (uint64_t)a + b; 2462 uint8_t round = get_round(vxrm, res, 1); 2463 2464 return (res >> 1) + round; 2465 } 2466 2467 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2468 uint64_t a, uint64_t b) 2469 { 2470 uint64_t res = a + b; 2471 uint8_t round = get_round(vxrm, res, 1); 2472 uint64_t over = (uint64_t)(res < a) << 63; 2473 2474 return ((res >> 1) | over) + round; 2475 } 2476 2477 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2478 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2479 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2480 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2481 GEN_VEXT_VV_RM(vaaddu_vv_b) 2482 GEN_VEXT_VV_RM(vaaddu_vv_h) 2483 GEN_VEXT_VV_RM(vaaddu_vv_w) 2484 GEN_VEXT_VV_RM(vaaddu_vv_d) 2485 2486 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2487 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2488 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2489 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2490 GEN_VEXT_VX_RM(vaaddu_vx_b) 2491 GEN_VEXT_VX_RM(vaaddu_vx_h) 2492 GEN_VEXT_VX_RM(vaaddu_vx_w) 2493 GEN_VEXT_VX_RM(vaaddu_vx_d) 2494 2495 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2496 { 2497 int64_t res = (int64_t)a - b; 2498 uint8_t round = get_round(vxrm, res, 1); 2499 2500 return (res >> 1) + round; 2501 } 2502 2503 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2504 { 2505 int64_t res = (int64_t)a - b; 2506 uint8_t round = get_round(vxrm, res, 1); 2507 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2508 2509 /* With signed overflow, bit 64 is inverse of bit 63. */ 2510 return ((res >> 1) ^ over) + round; 2511 } 2512 2513 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2514 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2515 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2516 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2517 GEN_VEXT_VV_RM(vasub_vv_b) 2518 GEN_VEXT_VV_RM(vasub_vv_h) 2519 GEN_VEXT_VV_RM(vasub_vv_w) 2520 GEN_VEXT_VV_RM(vasub_vv_d) 2521 2522 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2523 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2524 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2525 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2526 GEN_VEXT_VX_RM(vasub_vx_b) 2527 GEN_VEXT_VX_RM(vasub_vx_h) 2528 GEN_VEXT_VX_RM(vasub_vx_w) 2529 GEN_VEXT_VX_RM(vasub_vx_d) 2530 2531 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2532 uint32_t a, uint32_t b) 2533 { 2534 int64_t res = (int64_t)a - b; 2535 uint8_t round = get_round(vxrm, res, 1); 2536 2537 return (res >> 1) + round; 2538 } 2539 2540 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2541 uint64_t a, uint64_t b) 2542 { 2543 uint64_t res = (uint64_t)a - b; 2544 uint8_t round = get_round(vxrm, res, 1); 2545 uint64_t over = (uint64_t)(res > a) << 63; 2546 2547 return ((res >> 1) | over) + round; 2548 } 2549 2550 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2551 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2552 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2553 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2554 GEN_VEXT_VV_RM(vasubu_vv_b) 2555 GEN_VEXT_VV_RM(vasubu_vv_h) 2556 GEN_VEXT_VV_RM(vasubu_vv_w) 2557 GEN_VEXT_VV_RM(vasubu_vv_d) 2558 2559 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2560 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2561 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2562 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2563 GEN_VEXT_VX_RM(vasubu_vx_b) 2564 GEN_VEXT_VX_RM(vasubu_vx_h) 2565 GEN_VEXT_VX_RM(vasubu_vx_w) 2566 GEN_VEXT_VX_RM(vasubu_vx_d) 2567 2568 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2569 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2570 { 2571 uint8_t round; 2572 int16_t res; 2573 2574 res = (int16_t)a * (int16_t)b; 2575 round = get_round(vxrm, res, 7); 2576 res = (res >> 7) + round; 2577 2578 if (res > INT8_MAX) { 2579 env->vxsat = 0x1; 2580 return INT8_MAX; 2581 } else if (res < INT8_MIN) { 2582 env->vxsat = 0x1; 2583 return INT8_MIN; 2584 } else { 2585 return res; 2586 } 2587 } 2588 2589 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2590 { 2591 uint8_t round; 2592 int32_t res; 2593 2594 res = (int32_t)a * (int32_t)b; 2595 round = get_round(vxrm, res, 15); 2596 res = (res >> 15) + round; 2597 2598 if (res > INT16_MAX) { 2599 env->vxsat = 0x1; 2600 return INT16_MAX; 2601 } else if (res < INT16_MIN) { 2602 env->vxsat = 0x1; 2603 return INT16_MIN; 2604 } else { 2605 return res; 2606 } 2607 } 2608 2609 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2610 { 2611 uint8_t round; 2612 int64_t res; 2613 2614 res = (int64_t)a * (int64_t)b; 2615 round = get_round(vxrm, res, 31); 2616 res = (res >> 31) + round; 2617 2618 if (res > INT32_MAX) { 2619 env->vxsat = 0x1; 2620 return INT32_MAX; 2621 } else if (res < INT32_MIN) { 2622 env->vxsat = 0x1; 2623 return INT32_MIN; 2624 } else { 2625 return res; 2626 } 2627 } 2628 2629 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2630 { 2631 uint8_t round; 2632 uint64_t hi_64, lo_64; 2633 int64_t res; 2634 2635 if (a == INT64_MIN && b == INT64_MIN) { 2636 env->vxsat = 1; 2637 return INT64_MAX; 2638 } 2639 2640 muls64(&lo_64, &hi_64, a, b); 2641 round = get_round(vxrm, lo_64, 63); 2642 /* 2643 * Cannot overflow, as there are always 2644 * 2 sign bits after multiply. 2645 */ 2646 res = (hi_64 << 1) | (lo_64 >> 63); 2647 if (round) { 2648 if (res == INT64_MAX) { 2649 env->vxsat = 1; 2650 } else { 2651 res += 1; 2652 } 2653 } 2654 return res; 2655 } 2656 2657 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2658 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2659 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2660 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2661 GEN_VEXT_VV_RM(vsmul_vv_b) 2662 GEN_VEXT_VV_RM(vsmul_vv_h) 2663 GEN_VEXT_VV_RM(vsmul_vv_w) 2664 GEN_VEXT_VV_RM(vsmul_vv_d) 2665 2666 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2667 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2668 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2669 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2670 GEN_VEXT_VX_RM(vsmul_vx_b) 2671 GEN_VEXT_VX_RM(vsmul_vx_h) 2672 GEN_VEXT_VX_RM(vsmul_vx_w) 2673 GEN_VEXT_VX_RM(vsmul_vx_d) 2674 2675 /* Vector Single-Width Scaling Shift Instructions */ 2676 static inline uint8_t 2677 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2678 { 2679 uint8_t round, shift = b & 0x7; 2680 uint8_t res; 2681 2682 round = get_round(vxrm, a, shift); 2683 res = (a >> shift) + round; 2684 return res; 2685 } 2686 static inline uint16_t 2687 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2688 { 2689 uint8_t round, shift = b & 0xf; 2690 uint16_t res; 2691 2692 round = get_round(vxrm, a, shift); 2693 res = (a >> shift) + round; 2694 return res; 2695 } 2696 static inline uint32_t 2697 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2698 { 2699 uint8_t round, shift = b & 0x1f; 2700 uint32_t res; 2701 2702 round = get_round(vxrm, a, shift); 2703 res = (a >> shift) + round; 2704 return res; 2705 } 2706 static inline uint64_t 2707 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2708 { 2709 uint8_t round, shift = b & 0x3f; 2710 uint64_t res; 2711 2712 round = get_round(vxrm, a, shift); 2713 res = (a >> shift) + round; 2714 return res; 2715 } 2716 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2717 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2718 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2719 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2720 GEN_VEXT_VV_RM(vssrl_vv_b) 2721 GEN_VEXT_VV_RM(vssrl_vv_h) 2722 GEN_VEXT_VV_RM(vssrl_vv_w) 2723 GEN_VEXT_VV_RM(vssrl_vv_d) 2724 2725 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2726 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2727 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2728 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2729 GEN_VEXT_VX_RM(vssrl_vx_b) 2730 GEN_VEXT_VX_RM(vssrl_vx_h) 2731 GEN_VEXT_VX_RM(vssrl_vx_w) 2732 GEN_VEXT_VX_RM(vssrl_vx_d) 2733 2734 static inline int8_t 2735 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2736 { 2737 uint8_t round, shift = b & 0x7; 2738 int8_t res; 2739 2740 round = get_round(vxrm, a, shift); 2741 res = (a >> shift) + round; 2742 return res; 2743 } 2744 static inline int16_t 2745 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2746 { 2747 uint8_t round, shift = b & 0xf; 2748 int16_t res; 2749 2750 round = get_round(vxrm, a, shift); 2751 res = (a >> shift) + round; 2752 return res; 2753 } 2754 static inline int32_t 2755 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2756 { 2757 uint8_t round, shift = b & 0x1f; 2758 int32_t res; 2759 2760 round = get_round(vxrm, a, shift); 2761 res = (a >> shift) + round; 2762 return res; 2763 } 2764 static inline int64_t 2765 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2766 { 2767 uint8_t round, shift = b & 0x3f; 2768 int64_t res; 2769 2770 round = get_round(vxrm, a, shift); 2771 res = (a >> shift) + round; 2772 return res; 2773 } 2774 2775 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2776 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2777 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2778 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2779 GEN_VEXT_VV_RM(vssra_vv_b) 2780 GEN_VEXT_VV_RM(vssra_vv_h) 2781 GEN_VEXT_VV_RM(vssra_vv_w) 2782 GEN_VEXT_VV_RM(vssra_vv_d) 2783 2784 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2785 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2786 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2787 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2788 GEN_VEXT_VX_RM(vssra_vx_b) 2789 GEN_VEXT_VX_RM(vssra_vx_h) 2790 GEN_VEXT_VX_RM(vssra_vx_w) 2791 GEN_VEXT_VX_RM(vssra_vx_d) 2792 2793 /* Vector Narrowing Fixed-Point Clip Instructions */ 2794 static inline int8_t 2795 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2796 { 2797 uint8_t round, shift = b & 0xf; 2798 int16_t res; 2799 2800 round = get_round(vxrm, a, shift); 2801 res = (a >> shift) + round; 2802 if (res > INT8_MAX) { 2803 env->vxsat = 0x1; 2804 return INT8_MAX; 2805 } else if (res < INT8_MIN) { 2806 env->vxsat = 0x1; 2807 return INT8_MIN; 2808 } else { 2809 return res; 2810 } 2811 } 2812 2813 static inline int16_t 2814 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2815 { 2816 uint8_t round, shift = b & 0x1f; 2817 int32_t res; 2818 2819 round = get_round(vxrm, a, shift); 2820 res = (a >> shift) + round; 2821 if (res > INT16_MAX) { 2822 env->vxsat = 0x1; 2823 return INT16_MAX; 2824 } else if (res < INT16_MIN) { 2825 env->vxsat = 0x1; 2826 return INT16_MIN; 2827 } else { 2828 return res; 2829 } 2830 } 2831 2832 static inline int32_t 2833 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2834 { 2835 uint8_t round, shift = b & 0x3f; 2836 int64_t res; 2837 2838 round = get_round(vxrm, a, shift); 2839 res = (a >> shift) + round; 2840 if (res > INT32_MAX) { 2841 env->vxsat = 0x1; 2842 return INT32_MAX; 2843 } else if (res < INT32_MIN) { 2844 env->vxsat = 0x1; 2845 return INT32_MIN; 2846 } else { 2847 return res; 2848 } 2849 } 2850 2851 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2852 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2853 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2854 GEN_VEXT_VV_RM(vnclip_wv_b) 2855 GEN_VEXT_VV_RM(vnclip_wv_h) 2856 GEN_VEXT_VV_RM(vnclip_wv_w) 2857 2858 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2859 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2860 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2861 GEN_VEXT_VX_RM(vnclip_wx_b) 2862 GEN_VEXT_VX_RM(vnclip_wx_h) 2863 GEN_VEXT_VX_RM(vnclip_wx_w) 2864 2865 static inline uint8_t 2866 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2867 { 2868 uint8_t round, shift = b & 0xf; 2869 uint16_t res; 2870 2871 round = get_round(vxrm, a, shift); 2872 res = (a >> shift) + round; 2873 if (res > UINT8_MAX) { 2874 env->vxsat = 0x1; 2875 return UINT8_MAX; 2876 } else { 2877 return res; 2878 } 2879 } 2880 2881 static inline uint16_t 2882 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2883 { 2884 uint8_t round, shift = b & 0x1f; 2885 uint32_t res; 2886 2887 round = get_round(vxrm, a, shift); 2888 res = (a >> shift) + round; 2889 if (res > UINT16_MAX) { 2890 env->vxsat = 0x1; 2891 return UINT16_MAX; 2892 } else { 2893 return res; 2894 } 2895 } 2896 2897 static inline uint32_t 2898 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2899 { 2900 uint8_t round, shift = b & 0x3f; 2901 uint64_t res; 2902 2903 round = get_round(vxrm, a, shift); 2904 res = (a >> shift) + round; 2905 if (res > UINT32_MAX) { 2906 env->vxsat = 0x1; 2907 return UINT32_MAX; 2908 } else { 2909 return res; 2910 } 2911 } 2912 2913 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2914 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2915 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2916 GEN_VEXT_VV_RM(vnclipu_wv_b) 2917 GEN_VEXT_VV_RM(vnclipu_wv_h) 2918 GEN_VEXT_VV_RM(vnclipu_wv_w) 2919 2920 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2921 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2922 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2923 GEN_VEXT_VX_RM(vnclipu_wx_b) 2924 GEN_VEXT_VX_RM(vnclipu_wx_h) 2925 GEN_VEXT_VX_RM(vnclipu_wx_w) 2926 2927 /* 2928 *** Vector Float Point Arithmetic Instructions 2929 */ 2930 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2931 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2932 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2933 CPURISCVState *env) \ 2934 { \ 2935 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2936 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2937 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2938 } 2939 2940 #define GEN_VEXT_VV_ENV(NAME) \ 2941 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2942 void *vs2, CPURISCVState *env, \ 2943 uint32_t desc) \ 2944 { \ 2945 uint32_t vm = vext_vm(desc); \ 2946 uint32_t vl = env->vl; \ 2947 uint32_t i; \ 2948 \ 2949 for (i = env->vstart; i < vl; i++) { \ 2950 if (!vm && !vext_elem_mask(v0, i)) { \ 2951 continue; \ 2952 } \ 2953 do_##NAME(vd, vs1, vs2, i, env); \ 2954 } \ 2955 env->vstart = 0; \ 2956 } 2957 2958 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2959 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2960 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2961 GEN_VEXT_VV_ENV(vfadd_vv_h) 2962 GEN_VEXT_VV_ENV(vfadd_vv_w) 2963 GEN_VEXT_VV_ENV(vfadd_vv_d) 2964 2965 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2966 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2967 CPURISCVState *env) \ 2968 { \ 2969 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2970 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2971 } 2972 2973 #define GEN_VEXT_VF(NAME) \ 2974 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2975 void *vs2, CPURISCVState *env, \ 2976 uint32_t desc) \ 2977 { \ 2978 uint32_t vm = vext_vm(desc); \ 2979 uint32_t vl = env->vl; \ 2980 uint32_t i; \ 2981 \ 2982 for (i = env->vstart; i < vl; i++) { \ 2983 if (!vm && !vext_elem_mask(v0, i)) { \ 2984 continue; \ 2985 } \ 2986 do_##NAME(vd, s1, vs2, i, env); \ 2987 } \ 2988 env->vstart = 0; \ 2989 } 2990 2991 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2992 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2993 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2994 GEN_VEXT_VF(vfadd_vf_h) 2995 GEN_VEXT_VF(vfadd_vf_w) 2996 GEN_VEXT_VF(vfadd_vf_d) 2997 2998 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2999 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3000 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3001 GEN_VEXT_VV_ENV(vfsub_vv_h) 3002 GEN_VEXT_VV_ENV(vfsub_vv_w) 3003 GEN_VEXT_VV_ENV(vfsub_vv_d) 3004 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3005 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3006 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3007 GEN_VEXT_VF(vfsub_vf_h) 3008 GEN_VEXT_VF(vfsub_vf_w) 3009 GEN_VEXT_VF(vfsub_vf_d) 3010 3011 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3012 { 3013 return float16_sub(b, a, s); 3014 } 3015 3016 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3017 { 3018 return float32_sub(b, a, s); 3019 } 3020 3021 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3022 { 3023 return float64_sub(b, a, s); 3024 } 3025 3026 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3027 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3028 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3029 GEN_VEXT_VF(vfrsub_vf_h) 3030 GEN_VEXT_VF(vfrsub_vf_w) 3031 GEN_VEXT_VF(vfrsub_vf_d) 3032 3033 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3034 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3035 { 3036 return float32_add(float16_to_float32(a, true, s), 3037 float16_to_float32(b, true, s), s); 3038 } 3039 3040 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3041 { 3042 return float64_add(float32_to_float64(a, s), 3043 float32_to_float64(b, s), s); 3044 3045 } 3046 3047 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3048 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3049 GEN_VEXT_VV_ENV(vfwadd_vv_h) 3050 GEN_VEXT_VV_ENV(vfwadd_vv_w) 3051 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3052 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3053 GEN_VEXT_VF(vfwadd_vf_h) 3054 GEN_VEXT_VF(vfwadd_vf_w) 3055 3056 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3057 { 3058 return float32_sub(float16_to_float32(a, true, s), 3059 float16_to_float32(b, true, s), s); 3060 } 3061 3062 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3063 { 3064 return float64_sub(float32_to_float64(a, s), 3065 float32_to_float64(b, s), s); 3066 3067 } 3068 3069 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3070 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3071 GEN_VEXT_VV_ENV(vfwsub_vv_h) 3072 GEN_VEXT_VV_ENV(vfwsub_vv_w) 3073 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3074 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3075 GEN_VEXT_VF(vfwsub_vf_h) 3076 GEN_VEXT_VF(vfwsub_vf_w) 3077 3078 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3079 { 3080 return float32_add(a, float16_to_float32(b, true, s), s); 3081 } 3082 3083 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3084 { 3085 return float64_add(a, float32_to_float64(b, s), s); 3086 } 3087 3088 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3089 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3090 GEN_VEXT_VV_ENV(vfwadd_wv_h) 3091 GEN_VEXT_VV_ENV(vfwadd_wv_w) 3092 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3093 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3094 GEN_VEXT_VF(vfwadd_wf_h) 3095 GEN_VEXT_VF(vfwadd_wf_w) 3096 3097 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3098 { 3099 return float32_sub(a, float16_to_float32(b, true, s), s); 3100 } 3101 3102 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3103 { 3104 return float64_sub(a, float32_to_float64(b, s), s); 3105 } 3106 3107 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3108 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3109 GEN_VEXT_VV_ENV(vfwsub_wv_h) 3110 GEN_VEXT_VV_ENV(vfwsub_wv_w) 3111 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3112 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3113 GEN_VEXT_VF(vfwsub_wf_h) 3114 GEN_VEXT_VF(vfwsub_wf_w) 3115 3116 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3117 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3118 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3119 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3120 GEN_VEXT_VV_ENV(vfmul_vv_h) 3121 GEN_VEXT_VV_ENV(vfmul_vv_w) 3122 GEN_VEXT_VV_ENV(vfmul_vv_d) 3123 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3124 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3125 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3126 GEN_VEXT_VF(vfmul_vf_h) 3127 GEN_VEXT_VF(vfmul_vf_w) 3128 GEN_VEXT_VF(vfmul_vf_d) 3129 3130 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3131 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3132 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3133 GEN_VEXT_VV_ENV(vfdiv_vv_h) 3134 GEN_VEXT_VV_ENV(vfdiv_vv_w) 3135 GEN_VEXT_VV_ENV(vfdiv_vv_d) 3136 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3137 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3138 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3139 GEN_VEXT_VF(vfdiv_vf_h) 3140 GEN_VEXT_VF(vfdiv_vf_w) 3141 GEN_VEXT_VF(vfdiv_vf_d) 3142 3143 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3144 { 3145 return float16_div(b, a, s); 3146 } 3147 3148 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3149 { 3150 return float32_div(b, a, s); 3151 } 3152 3153 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3154 { 3155 return float64_div(b, a, s); 3156 } 3157 3158 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3159 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3160 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3161 GEN_VEXT_VF(vfrdiv_vf_h) 3162 GEN_VEXT_VF(vfrdiv_vf_w) 3163 GEN_VEXT_VF(vfrdiv_vf_d) 3164 3165 /* Vector Widening Floating-Point Multiply */ 3166 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3167 { 3168 return float32_mul(float16_to_float32(a, true, s), 3169 float16_to_float32(b, true, s), s); 3170 } 3171 3172 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3173 { 3174 return float64_mul(float32_to_float64(a, s), 3175 float32_to_float64(b, s), s); 3176 3177 } 3178 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3179 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3180 GEN_VEXT_VV_ENV(vfwmul_vv_h) 3181 GEN_VEXT_VV_ENV(vfwmul_vv_w) 3182 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3183 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3184 GEN_VEXT_VF(vfwmul_vf_h) 3185 GEN_VEXT_VF(vfwmul_vf_w) 3186 3187 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3188 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3189 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3190 CPURISCVState *env) \ 3191 { \ 3192 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3193 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3194 TD d = *((TD *)vd + HD(i)); \ 3195 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3196 } 3197 3198 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3199 { 3200 return float16_muladd(a, b, d, 0, s); 3201 } 3202 3203 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3204 { 3205 return float32_muladd(a, b, d, 0, s); 3206 } 3207 3208 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3209 { 3210 return float64_muladd(a, b, d, 0, s); 3211 } 3212 3213 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3214 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3215 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3216 GEN_VEXT_VV_ENV(vfmacc_vv_h) 3217 GEN_VEXT_VV_ENV(vfmacc_vv_w) 3218 GEN_VEXT_VV_ENV(vfmacc_vv_d) 3219 3220 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3221 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3222 CPURISCVState *env) \ 3223 { \ 3224 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3225 TD d = *((TD *)vd + HD(i)); \ 3226 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3227 } 3228 3229 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3230 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3231 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3232 GEN_VEXT_VF(vfmacc_vf_h) 3233 GEN_VEXT_VF(vfmacc_vf_w) 3234 GEN_VEXT_VF(vfmacc_vf_d) 3235 3236 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3237 { 3238 return float16_muladd(a, b, d, 3239 float_muladd_negate_c | float_muladd_negate_product, s); 3240 } 3241 3242 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3243 { 3244 return float32_muladd(a, b, d, 3245 float_muladd_negate_c | float_muladd_negate_product, s); 3246 } 3247 3248 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3249 { 3250 return float64_muladd(a, b, d, 3251 float_muladd_negate_c | float_muladd_negate_product, s); 3252 } 3253 3254 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3255 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3256 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3257 GEN_VEXT_VV_ENV(vfnmacc_vv_h) 3258 GEN_VEXT_VV_ENV(vfnmacc_vv_w) 3259 GEN_VEXT_VV_ENV(vfnmacc_vv_d) 3260 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3261 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3262 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3263 GEN_VEXT_VF(vfnmacc_vf_h) 3264 GEN_VEXT_VF(vfnmacc_vf_w) 3265 GEN_VEXT_VF(vfnmacc_vf_d) 3266 3267 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3268 { 3269 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3270 } 3271 3272 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3273 { 3274 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3275 } 3276 3277 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3278 { 3279 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3280 } 3281 3282 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3283 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3284 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3285 GEN_VEXT_VV_ENV(vfmsac_vv_h) 3286 GEN_VEXT_VV_ENV(vfmsac_vv_w) 3287 GEN_VEXT_VV_ENV(vfmsac_vv_d) 3288 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3289 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3290 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3291 GEN_VEXT_VF(vfmsac_vf_h) 3292 GEN_VEXT_VF(vfmsac_vf_w) 3293 GEN_VEXT_VF(vfmsac_vf_d) 3294 3295 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3296 { 3297 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3298 } 3299 3300 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3301 { 3302 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3303 } 3304 3305 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3308 } 3309 3310 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3311 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3312 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3313 GEN_VEXT_VV_ENV(vfnmsac_vv_h) 3314 GEN_VEXT_VV_ENV(vfnmsac_vv_w) 3315 GEN_VEXT_VV_ENV(vfnmsac_vv_d) 3316 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3317 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3318 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3319 GEN_VEXT_VF(vfnmsac_vf_h) 3320 GEN_VEXT_VF(vfnmsac_vf_w) 3321 GEN_VEXT_VF(vfnmsac_vf_d) 3322 3323 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3324 { 3325 return float16_muladd(d, b, a, 0, s); 3326 } 3327 3328 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3329 { 3330 return float32_muladd(d, b, a, 0, s); 3331 } 3332 3333 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3334 { 3335 return float64_muladd(d, b, a, 0, s); 3336 } 3337 3338 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3339 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3340 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3341 GEN_VEXT_VV_ENV(vfmadd_vv_h) 3342 GEN_VEXT_VV_ENV(vfmadd_vv_w) 3343 GEN_VEXT_VV_ENV(vfmadd_vv_d) 3344 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3345 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3346 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3347 GEN_VEXT_VF(vfmadd_vf_h) 3348 GEN_VEXT_VF(vfmadd_vf_w) 3349 GEN_VEXT_VF(vfmadd_vf_d) 3350 3351 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3352 { 3353 return float16_muladd(d, b, a, 3354 float_muladd_negate_c | float_muladd_negate_product, s); 3355 } 3356 3357 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3358 { 3359 return float32_muladd(d, b, a, 3360 float_muladd_negate_c | float_muladd_negate_product, s); 3361 } 3362 3363 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3364 { 3365 return float64_muladd(d, b, a, 3366 float_muladd_negate_c | float_muladd_negate_product, s); 3367 } 3368 3369 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3370 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3371 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3372 GEN_VEXT_VV_ENV(vfnmadd_vv_h) 3373 GEN_VEXT_VV_ENV(vfnmadd_vv_w) 3374 GEN_VEXT_VV_ENV(vfnmadd_vv_d) 3375 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3376 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3377 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3378 GEN_VEXT_VF(vfnmadd_vf_h) 3379 GEN_VEXT_VF(vfnmadd_vf_w) 3380 GEN_VEXT_VF(vfnmadd_vf_d) 3381 3382 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3383 { 3384 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3385 } 3386 3387 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3388 { 3389 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3390 } 3391 3392 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3393 { 3394 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3395 } 3396 3397 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3398 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3399 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3400 GEN_VEXT_VV_ENV(vfmsub_vv_h) 3401 GEN_VEXT_VV_ENV(vfmsub_vv_w) 3402 GEN_VEXT_VV_ENV(vfmsub_vv_d) 3403 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3404 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3405 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3406 GEN_VEXT_VF(vfmsub_vf_h) 3407 GEN_VEXT_VF(vfmsub_vf_w) 3408 GEN_VEXT_VF(vfmsub_vf_d) 3409 3410 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3411 { 3412 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3413 } 3414 3415 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3416 { 3417 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3418 } 3419 3420 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3421 { 3422 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3423 } 3424 3425 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3426 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3427 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3428 GEN_VEXT_VV_ENV(vfnmsub_vv_h) 3429 GEN_VEXT_VV_ENV(vfnmsub_vv_w) 3430 GEN_VEXT_VV_ENV(vfnmsub_vv_d) 3431 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3432 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3433 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3434 GEN_VEXT_VF(vfnmsub_vf_h) 3435 GEN_VEXT_VF(vfnmsub_vf_w) 3436 GEN_VEXT_VF(vfnmsub_vf_d) 3437 3438 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3439 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3440 { 3441 return float32_muladd(float16_to_float32(a, true, s), 3442 float16_to_float32(b, true, s), d, 0, s); 3443 } 3444 3445 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3446 { 3447 return float64_muladd(float32_to_float64(a, s), 3448 float32_to_float64(b, s), d, 0, s); 3449 } 3450 3451 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3452 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3453 GEN_VEXT_VV_ENV(vfwmacc_vv_h) 3454 GEN_VEXT_VV_ENV(vfwmacc_vv_w) 3455 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3456 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3457 GEN_VEXT_VF(vfwmacc_vf_h) 3458 GEN_VEXT_VF(vfwmacc_vf_w) 3459 3460 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3461 { 3462 return float32_muladd(float16_to_float32(a, true, s), 3463 float16_to_float32(b, true, s), d, 3464 float_muladd_negate_c | float_muladd_negate_product, s); 3465 } 3466 3467 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3468 { 3469 return float64_muladd(float32_to_float64(a, s), 3470 float32_to_float64(b, s), d, 3471 float_muladd_negate_c | float_muladd_negate_product, s); 3472 } 3473 3474 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3475 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3476 GEN_VEXT_VV_ENV(vfwnmacc_vv_h) 3477 GEN_VEXT_VV_ENV(vfwnmacc_vv_w) 3478 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3479 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3480 GEN_VEXT_VF(vfwnmacc_vf_h) 3481 GEN_VEXT_VF(vfwnmacc_vf_w) 3482 3483 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3484 { 3485 return float32_muladd(float16_to_float32(a, true, s), 3486 float16_to_float32(b, true, s), d, 3487 float_muladd_negate_c, s); 3488 } 3489 3490 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3491 { 3492 return float64_muladd(float32_to_float64(a, s), 3493 float32_to_float64(b, s), d, 3494 float_muladd_negate_c, s); 3495 } 3496 3497 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3498 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3499 GEN_VEXT_VV_ENV(vfwmsac_vv_h) 3500 GEN_VEXT_VV_ENV(vfwmsac_vv_w) 3501 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3502 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3503 GEN_VEXT_VF(vfwmsac_vf_h) 3504 GEN_VEXT_VF(vfwmsac_vf_w) 3505 3506 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3507 { 3508 return float32_muladd(float16_to_float32(a, true, s), 3509 float16_to_float32(b, true, s), d, 3510 float_muladd_negate_product, s); 3511 } 3512 3513 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3514 { 3515 return float64_muladd(float32_to_float64(a, s), 3516 float32_to_float64(b, s), d, 3517 float_muladd_negate_product, s); 3518 } 3519 3520 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3521 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3522 GEN_VEXT_VV_ENV(vfwnmsac_vv_h) 3523 GEN_VEXT_VV_ENV(vfwnmsac_vv_w) 3524 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3525 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3526 GEN_VEXT_VF(vfwnmsac_vf_h) 3527 GEN_VEXT_VF(vfwnmsac_vf_w) 3528 3529 /* Vector Floating-Point Square-Root Instruction */ 3530 /* (TD, T2, TX2) */ 3531 #define OP_UU_H uint16_t, uint16_t, uint16_t 3532 #define OP_UU_W uint32_t, uint32_t, uint32_t 3533 #define OP_UU_D uint64_t, uint64_t, uint64_t 3534 3535 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3536 static void do_##NAME(void *vd, void *vs2, int i, \ 3537 CPURISCVState *env) \ 3538 { \ 3539 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3540 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3541 } 3542 3543 #define GEN_VEXT_V_ENV(NAME) \ 3544 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3545 CPURISCVState *env, uint32_t desc) \ 3546 { \ 3547 uint32_t vm = vext_vm(desc); \ 3548 uint32_t vl = env->vl; \ 3549 uint32_t i; \ 3550 \ 3551 if (vl == 0) { \ 3552 return; \ 3553 } \ 3554 for (i = env->vstart; i < vl; i++) { \ 3555 if (!vm && !vext_elem_mask(v0, i)) { \ 3556 continue; \ 3557 } \ 3558 do_##NAME(vd, vs2, i, env); \ 3559 } \ 3560 env->vstart = 0; \ 3561 } 3562 3563 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3564 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3565 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3566 GEN_VEXT_V_ENV(vfsqrt_v_h) 3567 GEN_VEXT_V_ENV(vfsqrt_v_w) 3568 GEN_VEXT_V_ENV(vfsqrt_v_d) 3569 3570 /* 3571 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3572 * 3573 * Adapted from riscv-v-spec recip.c: 3574 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3575 */ 3576 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3577 { 3578 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3579 uint64_t exp = extract64(f, frac_size, exp_size); 3580 uint64_t frac = extract64(f, 0, frac_size); 3581 3582 const uint8_t lookup_table[] = { 3583 52, 51, 50, 48, 47, 46, 44, 43, 3584 42, 41, 40, 39, 38, 36, 35, 34, 3585 33, 32, 31, 30, 30, 29, 28, 27, 3586 26, 25, 24, 23, 23, 22, 21, 20, 3587 19, 19, 18, 17, 16, 16, 15, 14, 3588 14, 13, 12, 12, 11, 10, 10, 9, 3589 9, 8, 7, 7, 6, 6, 5, 4, 3590 4, 3, 3, 2, 2, 1, 1, 0, 3591 127, 125, 123, 121, 119, 118, 116, 114, 3592 113, 111, 109, 108, 106, 105, 103, 102, 3593 100, 99, 97, 96, 95, 93, 92, 91, 3594 90, 88, 87, 86, 85, 84, 83, 82, 3595 80, 79, 78, 77, 76, 75, 74, 73, 3596 72, 71, 70, 70, 69, 68, 67, 66, 3597 65, 64, 63, 63, 62, 61, 60, 59, 3598 59, 58, 57, 56, 56, 55, 54, 53 3599 }; 3600 const int precision = 7; 3601 3602 if (exp == 0 && frac != 0) { /* subnormal */ 3603 /* Normalize the subnormal. */ 3604 while (extract64(frac, frac_size - 1, 1) == 0) { 3605 exp--; 3606 frac <<= 1; 3607 } 3608 3609 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3610 } 3611 3612 int idx = ((exp & 1) << (precision - 1)) | 3613 (frac >> (frac_size - precision + 1)); 3614 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3615 (frac_size - precision); 3616 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3617 3618 uint64_t val = 0; 3619 val = deposit64(val, 0, frac_size, out_frac); 3620 val = deposit64(val, frac_size, exp_size, out_exp); 3621 val = deposit64(val, frac_size + exp_size, 1, sign); 3622 return val; 3623 } 3624 3625 static float16 frsqrt7_h(float16 f, float_status *s) 3626 { 3627 int exp_size = 5, frac_size = 10; 3628 bool sign = float16_is_neg(f); 3629 3630 /* 3631 * frsqrt7(sNaN) = canonical NaN 3632 * frsqrt7(-inf) = canonical NaN 3633 * frsqrt7(-normal) = canonical NaN 3634 * frsqrt7(-subnormal) = canonical NaN 3635 */ 3636 if (float16_is_signaling_nan(f, s) || 3637 (float16_is_infinity(f) && sign) || 3638 (float16_is_normal(f) && sign) || 3639 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3640 s->float_exception_flags |= float_flag_invalid; 3641 return float16_default_nan(s); 3642 } 3643 3644 /* frsqrt7(qNaN) = canonical NaN */ 3645 if (float16_is_quiet_nan(f, s)) { 3646 return float16_default_nan(s); 3647 } 3648 3649 /* frsqrt7(+-0) = +-inf */ 3650 if (float16_is_zero(f)) { 3651 s->float_exception_flags |= float_flag_divbyzero; 3652 return float16_set_sign(float16_infinity, sign); 3653 } 3654 3655 /* frsqrt7(+inf) = +0 */ 3656 if (float16_is_infinity(f) && !sign) { 3657 return float16_set_sign(float16_zero, sign); 3658 } 3659 3660 /* +normal, +subnormal */ 3661 uint64_t val = frsqrt7(f, exp_size, frac_size); 3662 return make_float16(val); 3663 } 3664 3665 static float32 frsqrt7_s(float32 f, float_status *s) 3666 { 3667 int exp_size = 8, frac_size = 23; 3668 bool sign = float32_is_neg(f); 3669 3670 /* 3671 * frsqrt7(sNaN) = canonical NaN 3672 * frsqrt7(-inf) = canonical NaN 3673 * frsqrt7(-normal) = canonical NaN 3674 * frsqrt7(-subnormal) = canonical NaN 3675 */ 3676 if (float32_is_signaling_nan(f, s) || 3677 (float32_is_infinity(f) && sign) || 3678 (float32_is_normal(f) && sign) || 3679 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3680 s->float_exception_flags |= float_flag_invalid; 3681 return float32_default_nan(s); 3682 } 3683 3684 /* frsqrt7(qNaN) = canonical NaN */ 3685 if (float32_is_quiet_nan(f, s)) { 3686 return float32_default_nan(s); 3687 } 3688 3689 /* frsqrt7(+-0) = +-inf */ 3690 if (float32_is_zero(f)) { 3691 s->float_exception_flags |= float_flag_divbyzero; 3692 return float32_set_sign(float32_infinity, sign); 3693 } 3694 3695 /* frsqrt7(+inf) = +0 */ 3696 if (float32_is_infinity(f) && !sign) { 3697 return float32_set_sign(float32_zero, sign); 3698 } 3699 3700 /* +normal, +subnormal */ 3701 uint64_t val = frsqrt7(f, exp_size, frac_size); 3702 return make_float32(val); 3703 } 3704 3705 static float64 frsqrt7_d(float64 f, float_status *s) 3706 { 3707 int exp_size = 11, frac_size = 52; 3708 bool sign = float64_is_neg(f); 3709 3710 /* 3711 * frsqrt7(sNaN) = canonical NaN 3712 * frsqrt7(-inf) = canonical NaN 3713 * frsqrt7(-normal) = canonical NaN 3714 * frsqrt7(-subnormal) = canonical NaN 3715 */ 3716 if (float64_is_signaling_nan(f, s) || 3717 (float64_is_infinity(f) && sign) || 3718 (float64_is_normal(f) && sign) || 3719 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3720 s->float_exception_flags |= float_flag_invalid; 3721 return float64_default_nan(s); 3722 } 3723 3724 /* frsqrt7(qNaN) = canonical NaN */ 3725 if (float64_is_quiet_nan(f, s)) { 3726 return float64_default_nan(s); 3727 } 3728 3729 /* frsqrt7(+-0) = +-inf */ 3730 if (float64_is_zero(f)) { 3731 s->float_exception_flags |= float_flag_divbyzero; 3732 return float64_set_sign(float64_infinity, sign); 3733 } 3734 3735 /* frsqrt7(+inf) = +0 */ 3736 if (float64_is_infinity(f) && !sign) { 3737 return float64_set_sign(float64_zero, sign); 3738 } 3739 3740 /* +normal, +subnormal */ 3741 uint64_t val = frsqrt7(f, exp_size, frac_size); 3742 return make_float64(val); 3743 } 3744 3745 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3746 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3747 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3748 GEN_VEXT_V_ENV(vfrsqrt7_v_h) 3749 GEN_VEXT_V_ENV(vfrsqrt7_v_w) 3750 GEN_VEXT_V_ENV(vfrsqrt7_v_d) 3751 3752 /* 3753 * Vector Floating-Point Reciprocal Estimate Instruction 3754 * 3755 * Adapted from riscv-v-spec recip.c: 3756 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3757 */ 3758 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3759 float_status *s) 3760 { 3761 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3762 uint64_t exp = extract64(f, frac_size, exp_size); 3763 uint64_t frac = extract64(f, 0, frac_size); 3764 3765 const uint8_t lookup_table[] = { 3766 127, 125, 123, 121, 119, 117, 116, 114, 3767 112, 110, 109, 107, 105, 104, 102, 100, 3768 99, 97, 96, 94, 93, 91, 90, 88, 3769 87, 85, 84, 83, 81, 80, 79, 77, 3770 76, 75, 74, 72, 71, 70, 69, 68, 3771 66, 65, 64, 63, 62, 61, 60, 59, 3772 58, 57, 56, 55, 54, 53, 52, 51, 3773 50, 49, 48, 47, 46, 45, 44, 43, 3774 42, 41, 40, 40, 39, 38, 37, 36, 3775 35, 35, 34, 33, 32, 31, 31, 30, 3776 29, 28, 28, 27, 26, 25, 25, 24, 3777 23, 23, 22, 21, 21, 20, 19, 19, 3778 18, 17, 17, 16, 15, 15, 14, 14, 3779 13, 12, 12, 11, 11, 10, 9, 9, 3780 8, 8, 7, 7, 6, 5, 5, 4, 3781 4, 3, 3, 2, 2, 1, 1, 0 3782 }; 3783 const int precision = 7; 3784 3785 if (exp == 0 && frac != 0) { /* subnormal */ 3786 /* Normalize the subnormal. */ 3787 while (extract64(frac, frac_size - 1, 1) == 0) { 3788 exp--; 3789 frac <<= 1; 3790 } 3791 3792 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3793 3794 if (exp != 0 && exp != UINT64_MAX) { 3795 /* 3796 * Overflow to inf or max value of same sign, 3797 * depending on sign and rounding mode. 3798 */ 3799 s->float_exception_flags |= (float_flag_inexact | 3800 float_flag_overflow); 3801 3802 if ((s->float_rounding_mode == float_round_to_zero) || 3803 ((s->float_rounding_mode == float_round_down) && !sign) || 3804 ((s->float_rounding_mode == float_round_up) && sign)) { 3805 /* Return greatest/negative finite value. */ 3806 return (sign << (exp_size + frac_size)) | 3807 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3808 } else { 3809 /* Return +-inf. */ 3810 return (sign << (exp_size + frac_size)) | 3811 MAKE_64BIT_MASK(frac_size, exp_size); 3812 } 3813 } 3814 } 3815 3816 int idx = frac >> (frac_size - precision); 3817 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3818 (frac_size - precision); 3819 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3820 3821 if (out_exp == 0 || out_exp == UINT64_MAX) { 3822 /* 3823 * The result is subnormal, but don't raise the underflow exception, 3824 * because there's no additional loss of precision. 3825 */ 3826 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3827 if (out_exp == UINT64_MAX) { 3828 out_frac >>= 1; 3829 out_exp = 0; 3830 } 3831 } 3832 3833 uint64_t val = 0; 3834 val = deposit64(val, 0, frac_size, out_frac); 3835 val = deposit64(val, frac_size, exp_size, out_exp); 3836 val = deposit64(val, frac_size + exp_size, 1, sign); 3837 return val; 3838 } 3839 3840 static float16 frec7_h(float16 f, float_status *s) 3841 { 3842 int exp_size = 5, frac_size = 10; 3843 bool sign = float16_is_neg(f); 3844 3845 /* frec7(+-inf) = +-0 */ 3846 if (float16_is_infinity(f)) { 3847 return float16_set_sign(float16_zero, sign); 3848 } 3849 3850 /* frec7(+-0) = +-inf */ 3851 if (float16_is_zero(f)) { 3852 s->float_exception_flags |= float_flag_divbyzero; 3853 return float16_set_sign(float16_infinity, sign); 3854 } 3855 3856 /* frec7(sNaN) = canonical NaN */ 3857 if (float16_is_signaling_nan(f, s)) { 3858 s->float_exception_flags |= float_flag_invalid; 3859 return float16_default_nan(s); 3860 } 3861 3862 /* frec7(qNaN) = canonical NaN */ 3863 if (float16_is_quiet_nan(f, s)) { 3864 return float16_default_nan(s); 3865 } 3866 3867 /* +-normal, +-subnormal */ 3868 uint64_t val = frec7(f, exp_size, frac_size, s); 3869 return make_float16(val); 3870 } 3871 3872 static float32 frec7_s(float32 f, float_status *s) 3873 { 3874 int exp_size = 8, frac_size = 23; 3875 bool sign = float32_is_neg(f); 3876 3877 /* frec7(+-inf) = +-0 */ 3878 if (float32_is_infinity(f)) { 3879 return float32_set_sign(float32_zero, sign); 3880 } 3881 3882 /* frec7(+-0) = +-inf */ 3883 if (float32_is_zero(f)) { 3884 s->float_exception_flags |= float_flag_divbyzero; 3885 return float32_set_sign(float32_infinity, sign); 3886 } 3887 3888 /* frec7(sNaN) = canonical NaN */ 3889 if (float32_is_signaling_nan(f, s)) { 3890 s->float_exception_flags |= float_flag_invalid; 3891 return float32_default_nan(s); 3892 } 3893 3894 /* frec7(qNaN) = canonical NaN */ 3895 if (float32_is_quiet_nan(f, s)) { 3896 return float32_default_nan(s); 3897 } 3898 3899 /* +-normal, +-subnormal */ 3900 uint64_t val = frec7(f, exp_size, frac_size, s); 3901 return make_float32(val); 3902 } 3903 3904 static float64 frec7_d(float64 f, float_status *s) 3905 { 3906 int exp_size = 11, frac_size = 52; 3907 bool sign = float64_is_neg(f); 3908 3909 /* frec7(+-inf) = +-0 */ 3910 if (float64_is_infinity(f)) { 3911 return float64_set_sign(float64_zero, sign); 3912 } 3913 3914 /* frec7(+-0) = +-inf */ 3915 if (float64_is_zero(f)) { 3916 s->float_exception_flags |= float_flag_divbyzero; 3917 return float64_set_sign(float64_infinity, sign); 3918 } 3919 3920 /* frec7(sNaN) = canonical NaN */ 3921 if (float64_is_signaling_nan(f, s)) { 3922 s->float_exception_flags |= float_flag_invalid; 3923 return float64_default_nan(s); 3924 } 3925 3926 /* frec7(qNaN) = canonical NaN */ 3927 if (float64_is_quiet_nan(f, s)) { 3928 return float64_default_nan(s); 3929 } 3930 3931 /* +-normal, +-subnormal */ 3932 uint64_t val = frec7(f, exp_size, frac_size, s); 3933 return make_float64(val); 3934 } 3935 3936 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3937 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3938 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3939 GEN_VEXT_V_ENV(vfrec7_v_h) 3940 GEN_VEXT_V_ENV(vfrec7_v_w) 3941 GEN_VEXT_V_ENV(vfrec7_v_d) 3942 3943 /* Vector Floating-Point MIN/MAX Instructions */ 3944 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3945 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3946 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3947 GEN_VEXT_VV_ENV(vfmin_vv_h) 3948 GEN_VEXT_VV_ENV(vfmin_vv_w) 3949 GEN_VEXT_VV_ENV(vfmin_vv_d) 3950 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3951 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3952 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3953 GEN_VEXT_VF(vfmin_vf_h) 3954 GEN_VEXT_VF(vfmin_vf_w) 3955 GEN_VEXT_VF(vfmin_vf_d) 3956 3957 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3958 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3959 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3960 GEN_VEXT_VV_ENV(vfmax_vv_h) 3961 GEN_VEXT_VV_ENV(vfmax_vv_w) 3962 GEN_VEXT_VV_ENV(vfmax_vv_d) 3963 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3964 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3965 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3966 GEN_VEXT_VF(vfmax_vf_h) 3967 GEN_VEXT_VF(vfmax_vf_w) 3968 GEN_VEXT_VF(vfmax_vf_d) 3969 3970 /* Vector Floating-Point Sign-Injection Instructions */ 3971 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3972 { 3973 return deposit64(b, 0, 15, a); 3974 } 3975 3976 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3977 { 3978 return deposit64(b, 0, 31, a); 3979 } 3980 3981 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3982 { 3983 return deposit64(b, 0, 63, a); 3984 } 3985 3986 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3987 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3988 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3989 GEN_VEXT_VV_ENV(vfsgnj_vv_h) 3990 GEN_VEXT_VV_ENV(vfsgnj_vv_w) 3991 GEN_VEXT_VV_ENV(vfsgnj_vv_d) 3992 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3993 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3994 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3995 GEN_VEXT_VF(vfsgnj_vf_h) 3996 GEN_VEXT_VF(vfsgnj_vf_w) 3997 GEN_VEXT_VF(vfsgnj_vf_d) 3998 3999 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 4000 { 4001 return deposit64(~b, 0, 15, a); 4002 } 4003 4004 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 4005 { 4006 return deposit64(~b, 0, 31, a); 4007 } 4008 4009 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 4010 { 4011 return deposit64(~b, 0, 63, a); 4012 } 4013 4014 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 4015 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 4016 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 4017 GEN_VEXT_VV_ENV(vfsgnjn_vv_h) 4018 GEN_VEXT_VV_ENV(vfsgnjn_vv_w) 4019 GEN_VEXT_VV_ENV(vfsgnjn_vv_d) 4020 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 4021 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 4022 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 4023 GEN_VEXT_VF(vfsgnjn_vf_h) 4024 GEN_VEXT_VF(vfsgnjn_vf_w) 4025 GEN_VEXT_VF(vfsgnjn_vf_d) 4026 4027 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4028 { 4029 return deposit64(b ^ a, 0, 15, a); 4030 } 4031 4032 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4033 { 4034 return deposit64(b ^ a, 0, 31, a); 4035 } 4036 4037 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4038 { 4039 return deposit64(b ^ a, 0, 63, a); 4040 } 4041 4042 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4043 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4044 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4045 GEN_VEXT_VV_ENV(vfsgnjx_vv_h) 4046 GEN_VEXT_VV_ENV(vfsgnjx_vv_w) 4047 GEN_VEXT_VV_ENV(vfsgnjx_vv_d) 4048 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4049 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4050 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4051 GEN_VEXT_VF(vfsgnjx_vf_h) 4052 GEN_VEXT_VF(vfsgnjx_vf_w) 4053 GEN_VEXT_VF(vfsgnjx_vf_d) 4054 4055 /* Vector Floating-Point Compare Instructions */ 4056 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4057 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4058 CPURISCVState *env, uint32_t desc) \ 4059 { \ 4060 uint32_t vm = vext_vm(desc); \ 4061 uint32_t vl = env->vl; \ 4062 uint32_t i; \ 4063 \ 4064 for (i = env->vstart; i < vl; i++) { \ 4065 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4066 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4067 if (!vm && !vext_elem_mask(v0, i)) { \ 4068 continue; \ 4069 } \ 4070 vext_set_elem_mask(vd, i, \ 4071 DO_OP(s2, s1, &env->fp_status)); \ 4072 } \ 4073 env->vstart = 0; \ 4074 } 4075 4076 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4077 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4078 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4079 4080 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4081 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4082 CPURISCVState *env, uint32_t desc) \ 4083 { \ 4084 uint32_t vm = vext_vm(desc); \ 4085 uint32_t vl = env->vl; \ 4086 uint32_t i; \ 4087 \ 4088 for (i = env->vstart; i < vl; i++) { \ 4089 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4090 if (!vm && !vext_elem_mask(v0, i)) { \ 4091 continue; \ 4092 } \ 4093 vext_set_elem_mask(vd, i, \ 4094 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4095 } \ 4096 env->vstart = 0; \ 4097 } 4098 4099 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4100 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4101 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4102 4103 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4104 { 4105 FloatRelation compare = float16_compare_quiet(a, b, s); 4106 return compare != float_relation_equal; 4107 } 4108 4109 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4110 { 4111 FloatRelation compare = float32_compare_quiet(a, b, s); 4112 return compare != float_relation_equal; 4113 } 4114 4115 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4116 { 4117 FloatRelation compare = float64_compare_quiet(a, b, s); 4118 return compare != float_relation_equal; 4119 } 4120 4121 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4122 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4123 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4124 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4125 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4126 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4127 4128 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4129 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4130 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4131 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4132 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4133 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4134 4135 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4136 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4137 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4138 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4139 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4140 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4141 4142 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4143 { 4144 FloatRelation compare = float16_compare(a, b, s); 4145 return compare == float_relation_greater; 4146 } 4147 4148 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4149 { 4150 FloatRelation compare = float32_compare(a, b, s); 4151 return compare == float_relation_greater; 4152 } 4153 4154 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4155 { 4156 FloatRelation compare = float64_compare(a, b, s); 4157 return compare == float_relation_greater; 4158 } 4159 4160 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4161 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4162 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4163 4164 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4165 { 4166 FloatRelation compare = float16_compare(a, b, s); 4167 return compare == float_relation_greater || 4168 compare == float_relation_equal; 4169 } 4170 4171 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4172 { 4173 FloatRelation compare = float32_compare(a, b, s); 4174 return compare == float_relation_greater || 4175 compare == float_relation_equal; 4176 } 4177 4178 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4179 { 4180 FloatRelation compare = float64_compare(a, b, s); 4181 return compare == float_relation_greater || 4182 compare == float_relation_equal; 4183 } 4184 4185 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4186 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4187 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4188 4189 /* Vector Floating-Point Classify Instruction */ 4190 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4191 static void do_##NAME(void *vd, void *vs2, int i) \ 4192 { \ 4193 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4194 *((TD *)vd + HD(i)) = OP(s2); \ 4195 } 4196 4197 #define GEN_VEXT_V(NAME) \ 4198 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4199 CPURISCVState *env, uint32_t desc) \ 4200 { \ 4201 uint32_t vm = vext_vm(desc); \ 4202 uint32_t vl = env->vl; \ 4203 uint32_t i; \ 4204 \ 4205 for (i = env->vstart; i < vl; i++) { \ 4206 if (!vm && !vext_elem_mask(v0, i)) { \ 4207 continue; \ 4208 } \ 4209 do_##NAME(vd, vs2, i); \ 4210 } \ 4211 env->vstart = 0; \ 4212 } 4213 4214 target_ulong fclass_h(uint64_t frs1) 4215 { 4216 float16 f = frs1; 4217 bool sign = float16_is_neg(f); 4218 4219 if (float16_is_infinity(f)) { 4220 return sign ? 1 << 0 : 1 << 7; 4221 } else if (float16_is_zero(f)) { 4222 return sign ? 1 << 3 : 1 << 4; 4223 } else if (float16_is_zero_or_denormal(f)) { 4224 return sign ? 1 << 2 : 1 << 5; 4225 } else if (float16_is_any_nan(f)) { 4226 float_status s = { }; /* for snan_bit_is_one */ 4227 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4228 } else { 4229 return sign ? 1 << 1 : 1 << 6; 4230 } 4231 } 4232 4233 target_ulong fclass_s(uint64_t frs1) 4234 { 4235 float32 f = frs1; 4236 bool sign = float32_is_neg(f); 4237 4238 if (float32_is_infinity(f)) { 4239 return sign ? 1 << 0 : 1 << 7; 4240 } else if (float32_is_zero(f)) { 4241 return sign ? 1 << 3 : 1 << 4; 4242 } else if (float32_is_zero_or_denormal(f)) { 4243 return sign ? 1 << 2 : 1 << 5; 4244 } else if (float32_is_any_nan(f)) { 4245 float_status s = { }; /* for snan_bit_is_one */ 4246 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4247 } else { 4248 return sign ? 1 << 1 : 1 << 6; 4249 } 4250 } 4251 4252 target_ulong fclass_d(uint64_t frs1) 4253 { 4254 float64 f = frs1; 4255 bool sign = float64_is_neg(f); 4256 4257 if (float64_is_infinity(f)) { 4258 return sign ? 1 << 0 : 1 << 7; 4259 } else if (float64_is_zero(f)) { 4260 return sign ? 1 << 3 : 1 << 4; 4261 } else if (float64_is_zero_or_denormal(f)) { 4262 return sign ? 1 << 2 : 1 << 5; 4263 } else if (float64_is_any_nan(f)) { 4264 float_status s = { }; /* for snan_bit_is_one */ 4265 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4266 } else { 4267 return sign ? 1 << 1 : 1 << 6; 4268 } 4269 } 4270 4271 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4272 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4273 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4274 GEN_VEXT_V(vfclass_v_h) 4275 GEN_VEXT_V(vfclass_v_w) 4276 GEN_VEXT_V(vfclass_v_d) 4277 4278 /* Vector Floating-Point Merge Instruction */ 4279 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4280 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4281 CPURISCVState *env, uint32_t desc) \ 4282 { \ 4283 uint32_t vm = vext_vm(desc); \ 4284 uint32_t vl = env->vl; \ 4285 uint32_t i; \ 4286 \ 4287 for (i = env->vstart; i < vl; i++) { \ 4288 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4289 *((ETYPE *)vd + H(i)) \ 4290 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4291 } \ 4292 env->vstart = 0; \ 4293 } 4294 4295 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4296 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4297 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4298 4299 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4300 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4301 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4302 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4303 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4304 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) 4305 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) 4306 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) 4307 4308 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4309 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4310 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4311 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4312 GEN_VEXT_V_ENV(vfcvt_x_f_v_h) 4313 GEN_VEXT_V_ENV(vfcvt_x_f_v_w) 4314 GEN_VEXT_V_ENV(vfcvt_x_f_v_d) 4315 4316 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4317 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4318 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4319 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4320 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) 4321 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) 4322 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) 4323 4324 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4325 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4326 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4327 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4328 GEN_VEXT_V_ENV(vfcvt_f_x_v_h) 4329 GEN_VEXT_V_ENV(vfcvt_f_x_v_w) 4330 GEN_VEXT_V_ENV(vfcvt_f_x_v_d) 4331 4332 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4333 /* (TD, T2, TX2) */ 4334 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4335 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4336 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4337 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4338 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4339 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4340 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) 4341 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) 4342 4343 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4344 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4345 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4346 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) 4347 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) 4348 4349 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4350 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4351 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4352 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4353 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) 4354 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) 4355 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) 4356 4357 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4358 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4359 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4360 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4361 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) 4362 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) 4363 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) 4364 4365 /* 4366 * vfwcvt.f.f.v vd, vs2, vm 4367 * Convert single-width float to double-width float. 4368 */ 4369 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4370 { 4371 return float16_to_float32(a, true, s); 4372 } 4373 4374 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4375 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4376 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) 4377 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) 4378 4379 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4380 /* (TD, T2, TX2) */ 4381 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4382 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4383 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4384 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4385 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4386 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4387 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4388 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) 4389 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) 4390 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) 4391 4392 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4393 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4394 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4395 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4396 GEN_VEXT_V_ENV(vfncvt_x_f_w_b) 4397 GEN_VEXT_V_ENV(vfncvt_x_f_w_h) 4398 GEN_VEXT_V_ENV(vfncvt_x_f_w_w) 4399 4400 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4401 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4402 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4403 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) 4404 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) 4405 4406 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4407 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4408 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4409 GEN_VEXT_V_ENV(vfncvt_f_x_w_h) 4410 GEN_VEXT_V_ENV(vfncvt_f_x_w_w) 4411 4412 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4413 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4414 { 4415 return float32_to_float16(a, true, s); 4416 } 4417 4418 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4419 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4420 GEN_VEXT_V_ENV(vfncvt_f_f_w_h) 4421 GEN_VEXT_V_ENV(vfncvt_f_f_w_w) 4422 4423 /* 4424 *** Vector Reduction Operations 4425 */ 4426 /* Vector Single-Width Integer Reduction Instructions */ 4427 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4428 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4429 void *vs2, CPURISCVState *env, uint32_t desc) \ 4430 { \ 4431 uint32_t vm = vext_vm(desc); \ 4432 uint32_t vl = env->vl; \ 4433 uint32_t i; \ 4434 TD s1 = *((TD *)vs1 + HD(0)); \ 4435 \ 4436 for (i = env->vstart; i < vl; i++) { \ 4437 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4438 if (!vm && !vext_elem_mask(v0, i)) { \ 4439 continue; \ 4440 } \ 4441 s1 = OP(s1, (TD)s2); \ 4442 } \ 4443 *((TD *)vd + HD(0)) = s1; \ 4444 env->vstart = 0; \ 4445 } 4446 4447 /* vd[0] = sum(vs1[0], vs2[*]) */ 4448 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4449 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4450 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4451 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4452 4453 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4454 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4455 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4456 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4457 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4458 4459 /* vd[0] = max(vs1[0], vs2[*]) */ 4460 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4461 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4462 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4463 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4464 4465 /* vd[0] = minu(vs1[0], vs2[*]) */ 4466 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4467 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4468 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4469 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4470 4471 /* vd[0] = min(vs1[0], vs2[*]) */ 4472 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4473 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4474 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4475 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4476 4477 /* vd[0] = and(vs1[0], vs2[*]) */ 4478 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4479 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4480 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4481 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4482 4483 /* vd[0] = or(vs1[0], vs2[*]) */ 4484 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4485 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4486 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4487 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4488 4489 /* vd[0] = xor(vs1[0], vs2[*]) */ 4490 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4491 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4492 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4493 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4494 4495 /* Vector Widening Integer Reduction Instructions */ 4496 /* signed sum reduction into double-width accumulator */ 4497 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4498 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4499 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4500 4501 /* Unsigned sum reduction into double-width accumulator */ 4502 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4503 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4504 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4505 4506 /* Vector Single-Width Floating-Point Reduction Instructions */ 4507 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4508 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4509 void *vs2, CPURISCVState *env, \ 4510 uint32_t desc) \ 4511 { \ 4512 uint32_t vm = vext_vm(desc); \ 4513 uint32_t vl = env->vl; \ 4514 uint32_t i; \ 4515 TD s1 = *((TD *)vs1 + HD(0)); \ 4516 \ 4517 for (i = env->vstart; i < vl; i++) { \ 4518 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4519 if (!vm && !vext_elem_mask(v0, i)) { \ 4520 continue; \ 4521 } \ 4522 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4523 } \ 4524 *((TD *)vd + HD(0)) = s1; \ 4525 env->vstart = 0; \ 4526 } 4527 4528 /* Unordered sum */ 4529 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4530 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4531 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4532 4533 /* Maximum value */ 4534 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4535 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4536 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4537 4538 /* Minimum value */ 4539 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4540 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4541 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4542 4543 /* Vector Widening Floating-Point Reduction Instructions */ 4544 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4545 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4546 void *vs2, CPURISCVState *env, uint32_t desc) 4547 { 4548 uint32_t vm = vext_vm(desc); 4549 uint32_t vl = env->vl; 4550 uint32_t i; 4551 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4552 4553 for (i = env->vstart; i < vl; i++) { 4554 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4555 if (!vm && !vext_elem_mask(v0, i)) { 4556 continue; 4557 } 4558 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4559 &env->fp_status); 4560 } 4561 *((uint32_t *)vd + H4(0)) = s1; 4562 env->vstart = 0; 4563 } 4564 4565 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4566 void *vs2, CPURISCVState *env, uint32_t desc) 4567 { 4568 uint32_t vm = vext_vm(desc); 4569 uint32_t vl = env->vl; 4570 uint32_t i; 4571 uint64_t s1 = *((uint64_t *)vs1); 4572 4573 for (i = env->vstart; i < vl; i++) { 4574 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4575 if (!vm && !vext_elem_mask(v0, i)) { 4576 continue; 4577 } 4578 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4579 &env->fp_status); 4580 } 4581 *((uint64_t *)vd) = s1; 4582 env->vstart = 0; 4583 } 4584 4585 /* 4586 *** Vector Mask Operations 4587 */ 4588 /* Vector Mask-Register Logical Instructions */ 4589 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4590 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4591 void *vs2, CPURISCVState *env, \ 4592 uint32_t desc) \ 4593 { \ 4594 uint32_t vl = env->vl; \ 4595 uint32_t i; \ 4596 int a, b; \ 4597 \ 4598 for (i = env->vstart; i < vl; i++) { \ 4599 a = vext_elem_mask(vs1, i); \ 4600 b = vext_elem_mask(vs2, i); \ 4601 vext_set_elem_mask(vd, i, OP(b, a)); \ 4602 } \ 4603 env->vstart = 0; \ 4604 } 4605 4606 #define DO_NAND(N, M) (!(N & M)) 4607 #define DO_ANDNOT(N, M) (N & !M) 4608 #define DO_NOR(N, M) (!(N | M)) 4609 #define DO_ORNOT(N, M) (N | !M) 4610 #define DO_XNOR(N, M) (!(N ^ M)) 4611 4612 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4613 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4614 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4615 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4616 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4617 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4618 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4619 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4620 4621 /* Vector count population in mask vcpop */ 4622 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4623 uint32_t desc) 4624 { 4625 target_ulong cnt = 0; 4626 uint32_t vm = vext_vm(desc); 4627 uint32_t vl = env->vl; 4628 int i; 4629 4630 for (i = env->vstart; i < vl; i++) { 4631 if (vm || vext_elem_mask(v0, i)) { 4632 if (vext_elem_mask(vs2, i)) { 4633 cnt++; 4634 } 4635 } 4636 } 4637 env->vstart = 0; 4638 return cnt; 4639 } 4640 4641 /* vfirst find-first-set mask bit*/ 4642 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4643 uint32_t desc) 4644 { 4645 uint32_t vm = vext_vm(desc); 4646 uint32_t vl = env->vl; 4647 int i; 4648 4649 for (i = env->vstart; i < vl; i++) { 4650 if (vm || vext_elem_mask(v0, i)) { 4651 if (vext_elem_mask(vs2, i)) { 4652 return i; 4653 } 4654 } 4655 } 4656 env->vstart = 0; 4657 return -1LL; 4658 } 4659 4660 enum set_mask_type { 4661 ONLY_FIRST = 1, 4662 INCLUDE_FIRST, 4663 BEFORE_FIRST, 4664 }; 4665 4666 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4667 uint32_t desc, enum set_mask_type type) 4668 { 4669 uint32_t vm = vext_vm(desc); 4670 uint32_t vl = env->vl; 4671 int i; 4672 bool first_mask_bit = false; 4673 4674 for (i = env->vstart; i < vl; i++) { 4675 if (!vm && !vext_elem_mask(v0, i)) { 4676 continue; 4677 } 4678 /* write a zero to all following active elements */ 4679 if (first_mask_bit) { 4680 vext_set_elem_mask(vd, i, 0); 4681 continue; 4682 } 4683 if (vext_elem_mask(vs2, i)) { 4684 first_mask_bit = true; 4685 if (type == BEFORE_FIRST) { 4686 vext_set_elem_mask(vd, i, 0); 4687 } else { 4688 vext_set_elem_mask(vd, i, 1); 4689 } 4690 } else { 4691 if (type == ONLY_FIRST) { 4692 vext_set_elem_mask(vd, i, 0); 4693 } else { 4694 vext_set_elem_mask(vd, i, 1); 4695 } 4696 } 4697 } 4698 env->vstart = 0; 4699 } 4700 4701 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4702 uint32_t desc) 4703 { 4704 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4705 } 4706 4707 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4708 uint32_t desc) 4709 { 4710 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4711 } 4712 4713 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4714 uint32_t desc) 4715 { 4716 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4717 } 4718 4719 /* Vector Iota Instruction */ 4720 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4721 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4722 uint32_t desc) \ 4723 { \ 4724 uint32_t vm = vext_vm(desc); \ 4725 uint32_t vl = env->vl; \ 4726 uint32_t sum = 0; \ 4727 int i; \ 4728 \ 4729 for (i = env->vstart; i < vl; i++) { \ 4730 if (!vm && !vext_elem_mask(v0, i)) { \ 4731 continue; \ 4732 } \ 4733 *((ETYPE *)vd + H(i)) = sum; \ 4734 if (vext_elem_mask(vs2, i)) { \ 4735 sum++; \ 4736 } \ 4737 } \ 4738 env->vstart = 0; \ 4739 } 4740 4741 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4742 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4743 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4744 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4745 4746 /* Vector Element Index Instruction */ 4747 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4748 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4749 { \ 4750 uint32_t vm = vext_vm(desc); \ 4751 uint32_t vl = env->vl; \ 4752 int i; \ 4753 \ 4754 for (i = env->vstart; i < vl; i++) { \ 4755 if (!vm && !vext_elem_mask(v0, i)) { \ 4756 continue; \ 4757 } \ 4758 *((ETYPE *)vd + H(i)) = i; \ 4759 } \ 4760 env->vstart = 0; \ 4761 } 4762 4763 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4764 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4765 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4766 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4767 4768 /* 4769 *** Vector Permutation Instructions 4770 */ 4771 4772 /* Vector Slide Instructions */ 4773 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4774 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4775 CPURISCVState *env, uint32_t desc) \ 4776 { \ 4777 uint32_t vm = vext_vm(desc); \ 4778 uint32_t vl = env->vl; \ 4779 target_ulong offset = s1, i_min, i; \ 4780 \ 4781 i_min = MAX(env->vstart, offset); \ 4782 for (i = i_min; i < vl; i++) { \ 4783 if (!vm && !vext_elem_mask(v0, i)) { \ 4784 continue; \ 4785 } \ 4786 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4787 } \ 4788 } 4789 4790 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4791 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4792 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4793 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4794 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4795 4796 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4797 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4798 CPURISCVState *env, uint32_t desc) \ 4799 { \ 4800 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4801 uint32_t vm = vext_vm(desc); \ 4802 uint32_t vl = env->vl; \ 4803 target_ulong i_max, i; \ 4804 \ 4805 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4806 for (i = env->vstart; i < i_max; ++i) { \ 4807 if (vm || vext_elem_mask(v0, i)) { \ 4808 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4809 } \ 4810 } \ 4811 \ 4812 for (i = i_max; i < vl; ++i) { \ 4813 if (vm || vext_elem_mask(v0, i)) { \ 4814 *((ETYPE *)vd + H(i)) = 0; \ 4815 } \ 4816 } \ 4817 \ 4818 env->vstart = 0; \ 4819 } 4820 4821 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4822 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4823 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4824 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4825 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4826 4827 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4828 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4829 void *vs2, CPURISCVState *env, uint32_t desc) \ 4830 { \ 4831 typedef uint##BITWIDTH##_t ETYPE; \ 4832 uint32_t vm = vext_vm(desc); \ 4833 uint32_t vl = env->vl; \ 4834 uint32_t i; \ 4835 \ 4836 for (i = env->vstart; i < vl; i++) { \ 4837 if (!vm && !vext_elem_mask(v0, i)) { \ 4838 continue; \ 4839 } \ 4840 if (i == 0) { \ 4841 *((ETYPE *)vd + H(i)) = s1; \ 4842 } else { \ 4843 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4844 } \ 4845 } \ 4846 env->vstart = 0; \ 4847 } 4848 4849 GEN_VEXT_VSLIE1UP(8, H1) 4850 GEN_VEXT_VSLIE1UP(16, H2) 4851 GEN_VEXT_VSLIE1UP(32, H4) 4852 GEN_VEXT_VSLIE1UP(64, H8) 4853 4854 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4855 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4856 CPURISCVState *env, uint32_t desc) \ 4857 { \ 4858 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4859 } 4860 4861 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4862 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4863 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4864 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4865 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4866 4867 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4868 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4869 void *vs2, CPURISCVState *env, uint32_t desc) \ 4870 { \ 4871 typedef uint##BITWIDTH##_t ETYPE; \ 4872 uint32_t vm = vext_vm(desc); \ 4873 uint32_t vl = env->vl; \ 4874 uint32_t i; \ 4875 \ 4876 for (i = env->vstart; i < vl; i++) { \ 4877 if (!vm && !vext_elem_mask(v0, i)) { \ 4878 continue; \ 4879 } \ 4880 if (i == vl - 1) { \ 4881 *((ETYPE *)vd + H(i)) = s1; \ 4882 } else { \ 4883 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4884 } \ 4885 } \ 4886 env->vstart = 0; \ 4887 } 4888 4889 GEN_VEXT_VSLIDE1DOWN(8, H1) 4890 GEN_VEXT_VSLIDE1DOWN(16, H2) 4891 GEN_VEXT_VSLIDE1DOWN(32, H4) 4892 GEN_VEXT_VSLIDE1DOWN(64, H8) 4893 4894 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4895 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4896 CPURISCVState *env, uint32_t desc) \ 4897 { \ 4898 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4899 } 4900 4901 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4902 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4903 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4904 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4905 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4906 4907 /* Vector Floating-Point Slide Instructions */ 4908 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4909 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4910 CPURISCVState *env, uint32_t desc) \ 4911 { \ 4912 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4913 } 4914 4915 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4916 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4917 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4918 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4919 4920 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4921 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4922 CPURISCVState *env, uint32_t desc) \ 4923 { \ 4924 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4925 } 4926 4927 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4928 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4929 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4930 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4931 4932 /* Vector Register Gather Instruction */ 4933 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4934 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4935 CPURISCVState *env, uint32_t desc) \ 4936 { \ 4937 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4938 uint32_t vm = vext_vm(desc); \ 4939 uint32_t vl = env->vl; \ 4940 uint64_t index; \ 4941 uint32_t i; \ 4942 \ 4943 for (i = env->vstart; i < vl; i++) { \ 4944 if (!vm && !vext_elem_mask(v0, i)) { \ 4945 continue; \ 4946 } \ 4947 index = *((TS1 *)vs1 + HS1(i)); \ 4948 if (index >= vlmax) { \ 4949 *((TS2 *)vd + HS2(i)) = 0; \ 4950 } else { \ 4951 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4952 } \ 4953 } \ 4954 env->vstart = 0; \ 4955 } 4956 4957 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4958 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4959 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4960 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4961 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4962 4963 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4964 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4965 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4966 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4967 4968 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4969 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4970 CPURISCVState *env, uint32_t desc) \ 4971 { \ 4972 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4973 uint32_t vm = vext_vm(desc); \ 4974 uint32_t vl = env->vl; \ 4975 uint64_t index = s1; \ 4976 uint32_t i; \ 4977 \ 4978 for (i = env->vstart; i < vl; i++) { \ 4979 if (!vm && !vext_elem_mask(v0, i)) { \ 4980 continue; \ 4981 } \ 4982 if (index >= vlmax) { \ 4983 *((ETYPE *)vd + H(i)) = 0; \ 4984 } else { \ 4985 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4986 } \ 4987 } \ 4988 env->vstart = 0; \ 4989 } 4990 4991 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4992 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4993 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4994 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4995 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4996 4997 /* Vector Compress Instruction */ 4998 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4999 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5000 CPURISCVState *env, uint32_t desc) \ 5001 { \ 5002 uint32_t vl = env->vl; \ 5003 uint32_t num = 0, i; \ 5004 \ 5005 for (i = env->vstart; i < vl; i++) { \ 5006 if (!vext_elem_mask(vs1, i)) { \ 5007 continue; \ 5008 } \ 5009 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5010 num++; \ 5011 } \ 5012 env->vstart = 0; \ 5013 } 5014 5015 /* Compress into vd elements of vs2 where vs1 is enabled */ 5016 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5017 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5018 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5019 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5020 5021 /* Vector Whole Register Move */ 5022 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5023 { 5024 /* EEW = SEW */ 5025 uint32_t maxsz = simd_maxsz(desc); 5026 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5027 uint32_t startb = env->vstart * sewb; 5028 uint32_t i = startb; 5029 5030 memcpy((uint8_t *)vd + H1(i), 5031 (uint8_t *)vs2 + H1(i), 5032 maxsz - startb); 5033 5034 env->vstart = 0; 5035 } 5036 5037 /* Vector Integer Extension */ 5038 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5039 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5040 CPURISCVState *env, uint32_t desc) \ 5041 { \ 5042 uint32_t vl = env->vl; \ 5043 uint32_t vm = vext_vm(desc); \ 5044 uint32_t i; \ 5045 \ 5046 for (i = env->vstart; i < vl; i++) { \ 5047 if (!vm && !vext_elem_mask(v0, i)) { \ 5048 continue; \ 5049 } \ 5050 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5051 } \ 5052 env->vstart = 0; \ 5053 } 5054 5055 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5056 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5057 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5058 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5059 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5060 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5061 5062 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5063 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5064 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5065 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5066 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5067 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5068