1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 int xlen = riscv_cpu_xlen(env); 40 bool vill = (s2 >> (xlen - 1)) & 0x1; 41 target_ulong reserved = s2 & 42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 43 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 44 45 if (lmul & 4) { 46 /* Fractional LMUL. */ 47 if (lmul == 4 || 48 cpu->cfg.elen >> (8 - lmul) < sew) { 49 vill = true; 50 } 51 } 52 53 if ((sew > cpu->cfg.elen) 54 || vill 55 || (ediv != 0) 56 || (reserved != 0)) { 57 /* only set vill bit. */ 58 env->vill = 1; 59 env->vtype = 0; 60 env->vl = 0; 61 env->vstart = 0; 62 return 0; 63 } 64 65 vlmax = vext_get_vlmax(cpu, s2); 66 if (s1 <= vlmax) { 67 vl = s1; 68 } else { 69 vl = vlmax; 70 } 71 env->vl = vl; 72 env->vtype = s2; 73 env->vstart = 0; 74 env->vill = 0; 75 return vl; 76 } 77 78 /* 79 * Note that vector data is stored in host-endian 64-bit chunks, 80 * so addressing units smaller than that needs a host-endian fixup. 81 */ 82 #if HOST_BIG_ENDIAN 83 #define H1(x) ((x) ^ 7) 84 #define H1_2(x) ((x) ^ 6) 85 #define H1_4(x) ((x) ^ 4) 86 #define H2(x) ((x) ^ 3) 87 #define H4(x) ((x) ^ 1) 88 #define H8(x) ((x)) 89 #else 90 #define H1(x) (x) 91 #define H1_2(x) (x) 92 #define H1_4(x) (x) 93 #define H2(x) (x) 94 #define H4(x) (x) 95 #define H8(x) (x) 96 #endif 97 98 static inline uint32_t vext_nf(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, NF); 101 } 102 103 static inline uint32_t vext_vm(uint32_t desc) 104 { 105 return FIELD_EX32(simd_data(desc), VDATA, VM); 106 } 107 108 /* 109 * Encode LMUL to lmul as following: 110 * LMUL vlmul lmul 111 * 1 000 0 112 * 2 001 1 113 * 4 010 2 114 * 8 011 3 115 * - 100 - 116 * 1/8 101 -3 117 * 1/4 110 -2 118 * 1/2 111 -1 119 */ 120 static inline int32_t vext_lmul(uint32_t desc) 121 { 122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 123 } 124 125 static inline uint32_t vext_vta(uint32_t desc) 126 { 127 return FIELD_EX32(simd_data(desc), VDATA, VTA); 128 } 129 130 /* 131 * Get the maximum number of elements can be operated. 132 * 133 * log2_esz: log2 of element size in bytes. 134 */ 135 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 136 { 137 /* 138 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 139 * so vlen in bytes (vlenb) is encoded as maxsz. 140 */ 141 uint32_t vlenb = simd_maxsz(desc); 142 143 /* Return VLMAX */ 144 int scale = vext_lmul(desc) - log2_esz; 145 return scale < 0 ? vlenb >> -scale : vlenb << scale; 146 } 147 148 /* 149 * Get number of total elements, including prestart, body and tail elements. 150 * Note that when LMUL < 1, the tail includes the elements past VLMAX that 151 * are held in the same vector register. 152 */ 153 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, 154 uint32_t esz) 155 { 156 uint32_t vlenb = simd_maxsz(desc); 157 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 158 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : 159 ctzl(esz) - ctzl(sew) + vext_lmul(desc); 160 return (vlenb << emul) / esz; 161 } 162 163 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 164 { 165 return (addr & env->cur_pmmask) | env->cur_pmbase; 166 } 167 168 /* 169 * This function checks watchpoint before real load operation. 170 * 171 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 172 * In user mode, there is no watchpoint support now. 173 * 174 * It will trigger an exception if there is no mapping in TLB 175 * and page table walk can't fill the TLB entry. Then the guest 176 * software can return here after process the exception or never return. 177 */ 178 static void probe_pages(CPURISCVState *env, target_ulong addr, 179 target_ulong len, uintptr_t ra, 180 MMUAccessType access_type) 181 { 182 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 183 target_ulong curlen = MIN(pagelen, len); 184 185 probe_access(env, adjust_addr(env, addr), curlen, access_type, 186 cpu_mmu_index(env, false), ra); 187 if (len > curlen) { 188 addr += curlen; 189 curlen = len - curlen; 190 probe_access(env, adjust_addr(env, addr), curlen, access_type, 191 cpu_mmu_index(env, false), ra); 192 } 193 } 194 195 /* set agnostic elements to 1s */ 196 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, 197 uint32_t tot) 198 { 199 if (is_agnostic == 0) { 200 /* policy undisturbed */ 201 return; 202 } 203 if (tot - cnt == 0) { 204 return ; 205 } 206 memset(base + cnt, -1, tot - cnt); 207 } 208 209 static inline void vext_set_elem_mask(void *v0, int index, 210 uint8_t value) 211 { 212 int idx = index / 64; 213 int pos = index % 64; 214 uint64_t old = ((uint64_t *)v0)[idx]; 215 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 216 } 217 218 /* 219 * Earlier designs (pre-0.9) had a varying number of bits 220 * per mask value (MLEN). In the 0.9 design, MLEN=1. 221 * (Section 4.5) 222 */ 223 static inline int vext_elem_mask(void *v0, int index) 224 { 225 int idx = index / 64; 226 int pos = index % 64; 227 return (((uint64_t *)v0)[idx] >> pos) & 1; 228 } 229 230 /* elements operations for load and store */ 231 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 232 uint32_t idx, void *vd, uintptr_t retaddr); 233 234 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 235 static void NAME(CPURISCVState *env, abi_ptr addr, \ 236 uint32_t idx, void *vd, uintptr_t retaddr)\ 237 { \ 238 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 239 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 240 } \ 241 242 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 243 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 244 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 245 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 246 247 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 248 static void NAME(CPURISCVState *env, abi_ptr addr, \ 249 uint32_t idx, void *vd, uintptr_t retaddr)\ 250 { \ 251 ETYPE data = *((ETYPE *)vd + H(idx)); \ 252 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 253 } 254 255 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 256 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 257 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 258 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 259 260 /* 261 *** stride: access vector element from strided memory 262 */ 263 static void 264 vext_ldst_stride(void *vd, void *v0, target_ulong base, 265 target_ulong stride, CPURISCVState *env, 266 uint32_t desc, uint32_t vm, 267 vext_ldst_elem_fn *ldst_elem, 268 uint32_t log2_esz, uintptr_t ra) 269 { 270 uint32_t i, k; 271 uint32_t nf = vext_nf(desc); 272 uint32_t max_elems = vext_max_elems(desc, log2_esz); 273 274 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 275 if (!vm && !vext_elem_mask(v0, i)) { 276 continue; 277 } 278 279 k = 0; 280 while (k < nf) { 281 target_ulong addr = base + stride * i + (k << log2_esz); 282 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 283 k++; 284 } 285 } 286 env->vstart = 0; 287 } 288 289 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 290 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 291 target_ulong stride, CPURISCVState *env, \ 292 uint32_t desc) \ 293 { \ 294 uint32_t vm = vext_vm(desc); \ 295 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 296 ctzl(sizeof(ETYPE)), GETPC()); \ 297 } 298 299 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 300 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 301 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 302 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 303 304 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 305 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 306 target_ulong stride, CPURISCVState *env, \ 307 uint32_t desc) \ 308 { \ 309 uint32_t vm = vext_vm(desc); \ 310 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 311 ctzl(sizeof(ETYPE)), GETPC()); \ 312 } 313 314 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 315 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 316 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 317 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 318 319 /* 320 *** unit-stride: access elements stored contiguously in memory 321 */ 322 323 /* unmasked unit-stride load and store operation*/ 324 static void 325 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 326 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 327 uintptr_t ra) 328 { 329 uint32_t i, k; 330 uint32_t nf = vext_nf(desc); 331 uint32_t max_elems = vext_max_elems(desc, log2_esz); 332 333 /* load bytes from guest memory */ 334 for (i = env->vstart; i < evl; i++, env->vstart++) { 335 k = 0; 336 while (k < nf) { 337 target_ulong addr = base + ((i * nf + k) << log2_esz); 338 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 339 k++; 340 } 341 } 342 env->vstart = 0; 343 } 344 345 /* 346 * masked unit-stride load and store operation will be a special case of stride, 347 * stride = NF * sizeof (MTYPE) 348 */ 349 350 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 351 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 352 CPURISCVState *env, uint32_t desc) \ 353 { \ 354 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 355 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 356 ctzl(sizeof(ETYPE)), GETPC()); \ 357 } \ 358 \ 359 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 360 CPURISCVState *env, uint32_t desc) \ 361 { \ 362 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 363 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 364 } 365 366 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 367 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 368 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 369 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 370 371 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 372 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 373 CPURISCVState *env, uint32_t desc) \ 374 { \ 375 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 376 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 377 ctzl(sizeof(ETYPE)), GETPC()); \ 378 } \ 379 \ 380 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 381 CPURISCVState *env, uint32_t desc) \ 382 { \ 383 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 384 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 385 } 386 387 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 388 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 389 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 390 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 391 392 /* 393 *** unit stride mask load and store, EEW = 1 394 */ 395 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 396 CPURISCVState *env, uint32_t desc) 397 { 398 /* evl = ceil(vl/8) */ 399 uint8_t evl = (env->vl + 7) >> 3; 400 vext_ldst_us(vd, base, env, desc, lde_b, 401 0, evl, GETPC()); 402 } 403 404 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 405 CPURISCVState *env, uint32_t desc) 406 { 407 /* evl = ceil(vl/8) */ 408 uint8_t evl = (env->vl + 7) >> 3; 409 vext_ldst_us(vd, base, env, desc, ste_b, 410 0, evl, GETPC()); 411 } 412 413 /* 414 *** index: access vector element from indexed memory 415 */ 416 typedef target_ulong vext_get_index_addr(target_ulong base, 417 uint32_t idx, void *vs2); 418 419 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 420 static target_ulong NAME(target_ulong base, \ 421 uint32_t idx, void *vs2) \ 422 { \ 423 return (base + *((ETYPE *)vs2 + H(idx))); \ 424 } 425 426 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 427 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 428 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 429 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 430 431 static inline void 432 vext_ldst_index(void *vd, void *v0, target_ulong base, 433 void *vs2, CPURISCVState *env, uint32_t desc, 434 vext_get_index_addr get_index_addr, 435 vext_ldst_elem_fn *ldst_elem, 436 uint32_t log2_esz, uintptr_t ra) 437 { 438 uint32_t i, k; 439 uint32_t nf = vext_nf(desc); 440 uint32_t vm = vext_vm(desc); 441 uint32_t max_elems = vext_max_elems(desc, log2_esz); 442 443 /* load bytes from guest memory */ 444 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 445 if (!vm && !vext_elem_mask(v0, i)) { 446 continue; 447 } 448 449 k = 0; 450 while (k < nf) { 451 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 452 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 453 k++; 454 } 455 } 456 env->vstart = 0; 457 } 458 459 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 460 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 461 void *vs2, CPURISCVState *env, uint32_t desc) \ 462 { \ 463 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 464 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 465 } 466 467 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 468 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 469 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 470 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 471 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 472 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 473 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 474 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 475 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 476 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 477 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 478 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 479 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 480 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 481 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 482 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 483 484 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 485 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 486 void *vs2, CPURISCVState *env, uint32_t desc) \ 487 { \ 488 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 489 STORE_FN, ctzl(sizeof(ETYPE)), \ 490 GETPC()); \ 491 } 492 493 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 494 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 495 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 496 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 497 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 498 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 499 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 500 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 501 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 502 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 503 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 504 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 505 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 506 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 507 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 508 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 509 510 /* 511 *** unit-stride fault-only-fisrt load instructions 512 */ 513 static inline void 514 vext_ldff(void *vd, void *v0, target_ulong base, 515 CPURISCVState *env, uint32_t desc, 516 vext_ldst_elem_fn *ldst_elem, 517 uint32_t log2_esz, uintptr_t ra) 518 { 519 void *host; 520 uint32_t i, k, vl = 0; 521 uint32_t nf = vext_nf(desc); 522 uint32_t vm = vext_vm(desc); 523 uint32_t max_elems = vext_max_elems(desc, log2_esz); 524 target_ulong addr, offset, remain; 525 526 /* probe every access*/ 527 for (i = env->vstart; i < env->vl; i++) { 528 if (!vm && !vext_elem_mask(v0, i)) { 529 continue; 530 } 531 addr = adjust_addr(env, base + i * (nf << log2_esz)); 532 if (i == 0) { 533 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 534 } else { 535 /* if it triggers an exception, no need to check watchpoint */ 536 remain = nf << log2_esz; 537 while (remain > 0) { 538 offset = -(addr | TARGET_PAGE_MASK); 539 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 540 cpu_mmu_index(env, false)); 541 if (host) { 542 #ifdef CONFIG_USER_ONLY 543 if (page_check_range(addr, offset, PAGE_READ) < 0) { 544 vl = i; 545 goto ProbeSuccess; 546 } 547 #else 548 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 549 #endif 550 } else { 551 vl = i; 552 goto ProbeSuccess; 553 } 554 if (remain <= offset) { 555 break; 556 } 557 remain -= offset; 558 addr = adjust_addr(env, addr + offset); 559 } 560 } 561 } 562 ProbeSuccess: 563 /* load bytes from guest memory */ 564 if (vl != 0) { 565 env->vl = vl; 566 } 567 for (i = env->vstart; i < env->vl; i++) { 568 k = 0; 569 if (!vm && !vext_elem_mask(v0, i)) { 570 continue; 571 } 572 while (k < nf) { 573 target_ulong addr = base + ((i * nf + k) << log2_esz); 574 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 575 k++; 576 } 577 } 578 env->vstart = 0; 579 } 580 581 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 582 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 583 CPURISCVState *env, uint32_t desc) \ 584 { \ 585 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 586 ctzl(sizeof(ETYPE)), GETPC()); \ 587 } 588 589 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 590 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 591 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 592 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 593 594 #define DO_SWAP(N, M) (M) 595 #define DO_AND(N, M) (N & M) 596 #define DO_XOR(N, M) (N ^ M) 597 #define DO_OR(N, M) (N | M) 598 #define DO_ADD(N, M) (N + M) 599 600 /* Signed min/max */ 601 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 602 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 603 604 /* Unsigned min/max */ 605 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 606 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 607 608 /* 609 *** load and store whole register instructions 610 */ 611 static void 612 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 613 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 614 { 615 uint32_t i, k, off, pos; 616 uint32_t nf = vext_nf(desc); 617 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 618 uint32_t max_elems = vlenb >> log2_esz; 619 620 k = env->vstart / max_elems; 621 off = env->vstart % max_elems; 622 623 if (off) { 624 /* load/store rest of elements of current segment pointed by vstart */ 625 for (pos = off; pos < max_elems; pos++, env->vstart++) { 626 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 627 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra); 628 } 629 k++; 630 } 631 632 /* load/store elements for rest of segments */ 633 for (; k < nf; k++) { 634 for (i = 0; i < max_elems; i++, env->vstart++) { 635 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 636 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 637 } 638 } 639 640 env->vstart = 0; 641 } 642 643 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 644 void HELPER(NAME)(void *vd, target_ulong base, \ 645 CPURISCVState *env, uint32_t desc) \ 646 { \ 647 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 648 ctzl(sizeof(ETYPE)), GETPC()); \ 649 } 650 651 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 652 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 653 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 654 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 655 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 656 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 657 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 658 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 659 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 660 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 661 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 662 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 663 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 664 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 665 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 666 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 667 668 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 669 void HELPER(NAME)(void *vd, target_ulong base, \ 670 CPURISCVState *env, uint32_t desc) \ 671 { \ 672 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 673 ctzl(sizeof(ETYPE)), GETPC()); \ 674 } 675 676 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 677 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 678 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 679 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 680 681 /* 682 *** Vector Integer Arithmetic Instructions 683 */ 684 685 /* expand macro args before macro */ 686 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 687 688 /* (TD, T1, T2, TX1, TX2) */ 689 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 690 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 691 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 692 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 693 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 694 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 695 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 696 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 697 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 698 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 699 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 700 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 701 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 702 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 703 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 704 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 705 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 706 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 707 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 708 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 709 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 710 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 711 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 712 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 713 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 714 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 715 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 716 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 717 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 718 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 719 720 /* operation of two vector elements */ 721 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 722 723 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 724 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 725 { \ 726 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 727 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 728 *((TD *)vd + HD(i)) = OP(s2, s1); \ 729 } 730 #define DO_SUB(N, M) (N - M) 731 #define DO_RSUB(N, M) (M - N) 732 733 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 734 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 735 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 736 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 737 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 738 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 739 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 740 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 741 742 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 743 CPURISCVState *env, uint32_t desc, 744 opivv2_fn *fn, uint32_t esz) 745 { 746 uint32_t vm = vext_vm(desc); 747 uint32_t vl = env->vl; 748 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 749 uint32_t vta = vext_vta(desc); 750 uint32_t i; 751 752 for (i = env->vstart; i < vl; i++) { 753 if (!vm && !vext_elem_mask(v0, i)) { 754 continue; 755 } 756 fn(vd, vs1, vs2, i); 757 } 758 env->vstart = 0; 759 /* set tail elements to 1s */ 760 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 761 } 762 763 /* generate the helpers for OPIVV */ 764 #define GEN_VEXT_VV(NAME, ESZ) \ 765 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 766 void *vs2, CPURISCVState *env, \ 767 uint32_t desc) \ 768 { \ 769 do_vext_vv(vd, v0, vs1, vs2, env, desc, \ 770 do_##NAME, ESZ); \ 771 } 772 773 GEN_VEXT_VV(vadd_vv_b, 1) 774 GEN_VEXT_VV(vadd_vv_h, 2) 775 GEN_VEXT_VV(vadd_vv_w, 4) 776 GEN_VEXT_VV(vadd_vv_d, 8) 777 GEN_VEXT_VV(vsub_vv_b, 1) 778 GEN_VEXT_VV(vsub_vv_h, 2) 779 GEN_VEXT_VV(vsub_vv_w, 4) 780 GEN_VEXT_VV(vsub_vv_d, 8) 781 782 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 783 784 /* 785 * (T1)s1 gives the real operator type. 786 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 787 */ 788 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 789 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 790 { \ 791 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 792 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 793 } 794 795 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 796 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 797 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 798 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 799 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 800 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 801 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 802 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 803 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 804 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 805 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 806 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 807 808 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 809 CPURISCVState *env, uint32_t desc, 810 opivx2_fn fn) 811 { 812 uint32_t vm = vext_vm(desc); 813 uint32_t vl = env->vl; 814 uint32_t i; 815 816 for (i = env->vstart; i < vl; i++) { 817 if (!vm && !vext_elem_mask(v0, i)) { 818 continue; 819 } 820 fn(vd, s1, vs2, i); 821 } 822 env->vstart = 0; 823 } 824 825 /* generate the helpers for OPIVX */ 826 #define GEN_VEXT_VX(NAME) \ 827 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 828 void *vs2, CPURISCVState *env, \ 829 uint32_t desc) \ 830 { \ 831 do_vext_vx(vd, v0, s1, vs2, env, desc, \ 832 do_##NAME); \ 833 } 834 835 GEN_VEXT_VX(vadd_vx_b) 836 GEN_VEXT_VX(vadd_vx_h) 837 GEN_VEXT_VX(vadd_vx_w) 838 GEN_VEXT_VX(vadd_vx_d) 839 GEN_VEXT_VX(vsub_vx_b) 840 GEN_VEXT_VX(vsub_vx_h) 841 GEN_VEXT_VX(vsub_vx_w) 842 GEN_VEXT_VX(vsub_vx_d) 843 GEN_VEXT_VX(vrsub_vx_b) 844 GEN_VEXT_VX(vrsub_vx_h) 845 GEN_VEXT_VX(vrsub_vx_w) 846 GEN_VEXT_VX(vrsub_vx_d) 847 848 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 849 { 850 intptr_t oprsz = simd_oprsz(desc); 851 intptr_t i; 852 853 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 854 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 855 } 856 } 857 858 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 859 { 860 intptr_t oprsz = simd_oprsz(desc); 861 intptr_t i; 862 863 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 864 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 865 } 866 } 867 868 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 869 { 870 intptr_t oprsz = simd_oprsz(desc); 871 intptr_t i; 872 873 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 874 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 875 } 876 } 877 878 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 879 { 880 intptr_t oprsz = simd_oprsz(desc); 881 intptr_t i; 882 883 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 884 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 885 } 886 } 887 888 /* Vector Widening Integer Add/Subtract */ 889 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 890 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 891 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 892 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 893 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 894 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 895 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 896 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 897 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 898 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 899 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 900 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 901 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 902 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 903 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 904 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 905 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 906 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 907 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 908 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 909 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 910 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 911 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 912 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 913 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 914 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 915 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 916 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 917 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 918 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 919 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 920 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 921 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 922 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 923 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 924 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 925 GEN_VEXT_VV(vwaddu_vv_b, 2) 926 GEN_VEXT_VV(vwaddu_vv_h, 4) 927 GEN_VEXT_VV(vwaddu_vv_w, 8) 928 GEN_VEXT_VV(vwsubu_vv_b, 2) 929 GEN_VEXT_VV(vwsubu_vv_h, 4) 930 GEN_VEXT_VV(vwsubu_vv_w, 8) 931 GEN_VEXT_VV(vwadd_vv_b, 2) 932 GEN_VEXT_VV(vwadd_vv_h, 4) 933 GEN_VEXT_VV(vwadd_vv_w, 8) 934 GEN_VEXT_VV(vwsub_vv_b, 2) 935 GEN_VEXT_VV(vwsub_vv_h, 4) 936 GEN_VEXT_VV(vwsub_vv_w, 8) 937 GEN_VEXT_VV(vwaddu_wv_b, 2) 938 GEN_VEXT_VV(vwaddu_wv_h, 4) 939 GEN_VEXT_VV(vwaddu_wv_w, 8) 940 GEN_VEXT_VV(vwsubu_wv_b, 2) 941 GEN_VEXT_VV(vwsubu_wv_h, 4) 942 GEN_VEXT_VV(vwsubu_wv_w, 8) 943 GEN_VEXT_VV(vwadd_wv_b, 2) 944 GEN_VEXT_VV(vwadd_wv_h, 4) 945 GEN_VEXT_VV(vwadd_wv_w, 8) 946 GEN_VEXT_VV(vwsub_wv_b, 2) 947 GEN_VEXT_VV(vwsub_wv_h, 4) 948 GEN_VEXT_VV(vwsub_wv_w, 8) 949 950 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 951 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 952 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 953 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 954 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 955 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 956 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 957 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 958 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 959 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 960 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 961 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 962 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 963 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 964 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 965 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 966 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 967 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 968 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 969 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 970 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 971 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 972 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 973 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 974 GEN_VEXT_VX(vwaddu_vx_b) 975 GEN_VEXT_VX(vwaddu_vx_h) 976 GEN_VEXT_VX(vwaddu_vx_w) 977 GEN_VEXT_VX(vwsubu_vx_b) 978 GEN_VEXT_VX(vwsubu_vx_h) 979 GEN_VEXT_VX(vwsubu_vx_w) 980 GEN_VEXT_VX(vwadd_vx_b) 981 GEN_VEXT_VX(vwadd_vx_h) 982 GEN_VEXT_VX(vwadd_vx_w) 983 GEN_VEXT_VX(vwsub_vx_b) 984 GEN_VEXT_VX(vwsub_vx_h) 985 GEN_VEXT_VX(vwsub_vx_w) 986 GEN_VEXT_VX(vwaddu_wx_b) 987 GEN_VEXT_VX(vwaddu_wx_h) 988 GEN_VEXT_VX(vwaddu_wx_w) 989 GEN_VEXT_VX(vwsubu_wx_b) 990 GEN_VEXT_VX(vwsubu_wx_h) 991 GEN_VEXT_VX(vwsubu_wx_w) 992 GEN_VEXT_VX(vwadd_wx_b) 993 GEN_VEXT_VX(vwadd_wx_h) 994 GEN_VEXT_VX(vwadd_wx_w) 995 GEN_VEXT_VX(vwsub_wx_b) 996 GEN_VEXT_VX(vwsub_wx_h) 997 GEN_VEXT_VX(vwsub_wx_w) 998 999 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1000 #define DO_VADC(N, M, C) (N + M + C) 1001 #define DO_VSBC(N, M, C) (N - M - C) 1002 1003 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1004 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1005 CPURISCVState *env, uint32_t desc) \ 1006 { \ 1007 uint32_t vl = env->vl; \ 1008 uint32_t i; \ 1009 \ 1010 for (i = env->vstart; i < vl; i++) { \ 1011 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1012 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1013 ETYPE carry = vext_elem_mask(v0, i); \ 1014 \ 1015 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1016 } \ 1017 env->vstart = 0; \ 1018 } 1019 1020 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1021 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1022 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1023 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1024 1025 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1026 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1027 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1028 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1029 1030 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1031 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1032 CPURISCVState *env, uint32_t desc) \ 1033 { \ 1034 uint32_t vl = env->vl; \ 1035 uint32_t i; \ 1036 \ 1037 for (i = env->vstart; i < vl; i++) { \ 1038 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1039 ETYPE carry = vext_elem_mask(v0, i); \ 1040 \ 1041 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1042 } \ 1043 env->vstart = 0; \ 1044 } 1045 1046 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1047 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1048 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1049 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1050 1051 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1052 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1053 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1054 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1055 1056 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1057 (__typeof(N))(N + M) < N) 1058 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1059 1060 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1061 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1062 CPURISCVState *env, uint32_t desc) \ 1063 { \ 1064 uint32_t vl = env->vl; \ 1065 uint32_t vm = vext_vm(desc); \ 1066 uint32_t i; \ 1067 \ 1068 for (i = env->vstart; i < vl; i++) { \ 1069 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1070 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1071 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1072 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1073 } \ 1074 env->vstart = 0; \ 1075 } 1076 1077 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1078 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1079 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1080 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1081 1082 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1083 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1084 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1085 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1086 1087 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1088 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1089 void *vs2, CPURISCVState *env, uint32_t desc) \ 1090 { \ 1091 uint32_t vl = env->vl; \ 1092 uint32_t vm = vext_vm(desc); \ 1093 uint32_t i; \ 1094 \ 1095 for (i = env->vstart; i < vl; i++) { \ 1096 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1097 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1098 vext_set_elem_mask(vd, i, \ 1099 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1100 } \ 1101 env->vstart = 0; \ 1102 } 1103 1104 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1105 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1106 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1107 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1108 1109 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1110 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1111 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1112 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1113 1114 /* Vector Bitwise Logical Instructions */ 1115 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1116 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1117 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1118 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1119 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1120 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1121 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1122 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1123 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1124 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1125 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1126 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1127 GEN_VEXT_VV(vand_vv_b, 1) 1128 GEN_VEXT_VV(vand_vv_h, 2) 1129 GEN_VEXT_VV(vand_vv_w, 4) 1130 GEN_VEXT_VV(vand_vv_d, 8) 1131 GEN_VEXT_VV(vor_vv_b, 1) 1132 GEN_VEXT_VV(vor_vv_h, 2) 1133 GEN_VEXT_VV(vor_vv_w, 4) 1134 GEN_VEXT_VV(vor_vv_d, 8) 1135 GEN_VEXT_VV(vxor_vv_b, 1) 1136 GEN_VEXT_VV(vxor_vv_h, 2) 1137 GEN_VEXT_VV(vxor_vv_w, 4) 1138 GEN_VEXT_VV(vxor_vv_d, 8) 1139 1140 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1141 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1142 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1143 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1144 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1145 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1146 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1147 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1148 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1149 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1150 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1151 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1152 GEN_VEXT_VX(vand_vx_b) 1153 GEN_VEXT_VX(vand_vx_h) 1154 GEN_VEXT_VX(vand_vx_w) 1155 GEN_VEXT_VX(vand_vx_d) 1156 GEN_VEXT_VX(vor_vx_b) 1157 GEN_VEXT_VX(vor_vx_h) 1158 GEN_VEXT_VX(vor_vx_w) 1159 GEN_VEXT_VX(vor_vx_d) 1160 GEN_VEXT_VX(vxor_vx_b) 1161 GEN_VEXT_VX(vxor_vx_h) 1162 GEN_VEXT_VX(vxor_vx_w) 1163 GEN_VEXT_VX(vxor_vx_d) 1164 1165 /* Vector Single-Width Bit Shift Instructions */ 1166 #define DO_SLL(N, M) (N << (M)) 1167 #define DO_SRL(N, M) (N >> (M)) 1168 1169 /* generate the helpers for shift instructions with two vector operators */ 1170 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1171 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1172 void *vs2, CPURISCVState *env, uint32_t desc) \ 1173 { \ 1174 uint32_t vm = vext_vm(desc); \ 1175 uint32_t vl = env->vl; \ 1176 uint32_t i; \ 1177 \ 1178 for (i = env->vstart; i < vl; i++) { \ 1179 if (!vm && !vext_elem_mask(v0, i)) { \ 1180 continue; \ 1181 } \ 1182 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1183 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1184 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1185 } \ 1186 env->vstart = 0; \ 1187 } 1188 1189 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1190 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1191 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1192 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1193 1194 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1195 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1196 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1197 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1198 1199 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1200 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1201 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1202 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1203 1204 /* generate the helpers for shift instructions with one vector and one scalar */ 1205 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1206 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1207 void *vs2, CPURISCVState *env, uint32_t desc) \ 1208 { \ 1209 uint32_t vm = vext_vm(desc); \ 1210 uint32_t vl = env->vl; \ 1211 uint32_t i; \ 1212 \ 1213 for (i = env->vstart; i < vl; i++) { \ 1214 if (!vm && !vext_elem_mask(v0, i)) { \ 1215 continue; \ 1216 } \ 1217 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1218 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1219 } \ 1220 env->vstart = 0; \ 1221 } 1222 1223 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1224 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1225 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1226 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1227 1228 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1229 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1230 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1231 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1232 1233 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1234 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1235 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1236 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1237 1238 /* Vector Narrowing Integer Right Shift Instructions */ 1239 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1240 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1241 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1242 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1243 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1244 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1245 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1246 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1247 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1248 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1249 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1250 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1251 1252 /* Vector Integer Comparison Instructions */ 1253 #define DO_MSEQ(N, M) (N == M) 1254 #define DO_MSNE(N, M) (N != M) 1255 #define DO_MSLT(N, M) (N < M) 1256 #define DO_MSLE(N, M) (N <= M) 1257 #define DO_MSGT(N, M) (N > M) 1258 1259 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1260 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1261 CPURISCVState *env, uint32_t desc) \ 1262 { \ 1263 uint32_t vm = vext_vm(desc); \ 1264 uint32_t vl = env->vl; \ 1265 uint32_t i; \ 1266 \ 1267 for (i = env->vstart; i < vl; i++) { \ 1268 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1269 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1270 if (!vm && !vext_elem_mask(v0, i)) { \ 1271 continue; \ 1272 } \ 1273 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1274 } \ 1275 env->vstart = 0; \ 1276 } 1277 1278 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1279 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1280 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1281 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1282 1283 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1284 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1285 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1286 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1287 1288 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1289 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1290 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1291 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1292 1293 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1294 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1295 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1296 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1297 1298 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1299 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1300 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1301 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1302 1303 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1304 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1305 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1306 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1307 1308 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1309 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1310 CPURISCVState *env, uint32_t desc) \ 1311 { \ 1312 uint32_t vm = vext_vm(desc); \ 1313 uint32_t vl = env->vl; \ 1314 uint32_t i; \ 1315 \ 1316 for (i = env->vstart; i < vl; i++) { \ 1317 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1318 if (!vm && !vext_elem_mask(v0, i)) { \ 1319 continue; \ 1320 } \ 1321 vext_set_elem_mask(vd, i, \ 1322 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1323 } \ 1324 env->vstart = 0; \ 1325 } 1326 1327 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1328 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1329 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1330 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1331 1332 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1333 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1334 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1335 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1336 1337 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1338 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1339 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1340 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1341 1342 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1343 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1344 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1345 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1346 1347 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1348 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1349 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1350 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1351 1352 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1353 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1354 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1355 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1356 1357 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1358 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1359 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1360 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1361 1362 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1363 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1364 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1365 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1366 1367 /* Vector Integer Min/Max Instructions */ 1368 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1369 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1370 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1371 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1372 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1373 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1374 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1375 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1376 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1377 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1378 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1379 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1380 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1381 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1382 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1383 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1384 GEN_VEXT_VV(vminu_vv_b, 1) 1385 GEN_VEXT_VV(vminu_vv_h, 2) 1386 GEN_VEXT_VV(vminu_vv_w, 4) 1387 GEN_VEXT_VV(vminu_vv_d, 8) 1388 GEN_VEXT_VV(vmin_vv_b, 1) 1389 GEN_VEXT_VV(vmin_vv_h, 2) 1390 GEN_VEXT_VV(vmin_vv_w, 4) 1391 GEN_VEXT_VV(vmin_vv_d, 8) 1392 GEN_VEXT_VV(vmaxu_vv_b, 1) 1393 GEN_VEXT_VV(vmaxu_vv_h, 2) 1394 GEN_VEXT_VV(vmaxu_vv_w, 4) 1395 GEN_VEXT_VV(vmaxu_vv_d, 8) 1396 GEN_VEXT_VV(vmax_vv_b, 1) 1397 GEN_VEXT_VV(vmax_vv_h, 2) 1398 GEN_VEXT_VV(vmax_vv_w, 4) 1399 GEN_VEXT_VV(vmax_vv_d, 8) 1400 1401 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1402 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1403 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1404 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1405 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1406 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1407 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1408 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1409 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1410 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1411 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1412 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1413 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1414 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1415 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1416 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1417 GEN_VEXT_VX(vminu_vx_b) 1418 GEN_VEXT_VX(vminu_vx_h) 1419 GEN_VEXT_VX(vminu_vx_w) 1420 GEN_VEXT_VX(vminu_vx_d) 1421 GEN_VEXT_VX(vmin_vx_b) 1422 GEN_VEXT_VX(vmin_vx_h) 1423 GEN_VEXT_VX(vmin_vx_w) 1424 GEN_VEXT_VX(vmin_vx_d) 1425 GEN_VEXT_VX(vmaxu_vx_b) 1426 GEN_VEXT_VX(vmaxu_vx_h) 1427 GEN_VEXT_VX(vmaxu_vx_w) 1428 GEN_VEXT_VX(vmaxu_vx_d) 1429 GEN_VEXT_VX(vmax_vx_b) 1430 GEN_VEXT_VX(vmax_vx_h) 1431 GEN_VEXT_VX(vmax_vx_w) 1432 GEN_VEXT_VX(vmax_vx_d) 1433 1434 /* Vector Single-Width Integer Multiply Instructions */ 1435 #define DO_MUL(N, M) (N * M) 1436 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1437 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1438 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1439 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1440 GEN_VEXT_VV(vmul_vv_b, 1) 1441 GEN_VEXT_VV(vmul_vv_h, 2) 1442 GEN_VEXT_VV(vmul_vv_w, 4) 1443 GEN_VEXT_VV(vmul_vv_d, 8) 1444 1445 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1446 { 1447 return (int16_t)s2 * (int16_t)s1 >> 8; 1448 } 1449 1450 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1451 { 1452 return (int32_t)s2 * (int32_t)s1 >> 16; 1453 } 1454 1455 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1456 { 1457 return (int64_t)s2 * (int64_t)s1 >> 32; 1458 } 1459 1460 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1461 { 1462 uint64_t hi_64, lo_64; 1463 1464 muls64(&lo_64, &hi_64, s1, s2); 1465 return hi_64; 1466 } 1467 1468 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1469 { 1470 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1471 } 1472 1473 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1474 { 1475 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1476 } 1477 1478 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1479 { 1480 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1481 } 1482 1483 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1484 { 1485 uint64_t hi_64, lo_64; 1486 1487 mulu64(&lo_64, &hi_64, s2, s1); 1488 return hi_64; 1489 } 1490 1491 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1492 { 1493 return (int16_t)s2 * (uint16_t)s1 >> 8; 1494 } 1495 1496 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1497 { 1498 return (int32_t)s2 * (uint32_t)s1 >> 16; 1499 } 1500 1501 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1502 { 1503 return (int64_t)s2 * (uint64_t)s1 >> 32; 1504 } 1505 1506 /* 1507 * Let A = signed operand, 1508 * B = unsigned operand 1509 * P = mulu64(A, B), unsigned product 1510 * 1511 * LET X = 2 ** 64 - A, 2's complement of A 1512 * SP = signed product 1513 * THEN 1514 * IF A < 0 1515 * SP = -X * B 1516 * = -(2 ** 64 - A) * B 1517 * = A * B - 2 ** 64 * B 1518 * = P - 2 ** 64 * B 1519 * ELSE 1520 * SP = P 1521 * THEN 1522 * HI_P -= (A < 0 ? B : 0) 1523 */ 1524 1525 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1526 { 1527 uint64_t hi_64, lo_64; 1528 1529 mulu64(&lo_64, &hi_64, s2, s1); 1530 1531 hi_64 -= s2 < 0 ? s1 : 0; 1532 return hi_64; 1533 } 1534 1535 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1536 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1537 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1538 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1539 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1540 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1541 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1542 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1543 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1544 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1545 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1546 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1547 GEN_VEXT_VV(vmulh_vv_b, 1) 1548 GEN_VEXT_VV(vmulh_vv_h, 2) 1549 GEN_VEXT_VV(vmulh_vv_w, 4) 1550 GEN_VEXT_VV(vmulh_vv_d, 8) 1551 GEN_VEXT_VV(vmulhu_vv_b, 1) 1552 GEN_VEXT_VV(vmulhu_vv_h, 2) 1553 GEN_VEXT_VV(vmulhu_vv_w, 4) 1554 GEN_VEXT_VV(vmulhu_vv_d, 8) 1555 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1556 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1557 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1558 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1559 1560 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1561 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1562 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1563 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1564 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1565 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1566 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1567 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1568 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1569 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1570 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1571 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1572 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1573 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1574 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1575 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1576 GEN_VEXT_VX(vmul_vx_b) 1577 GEN_VEXT_VX(vmul_vx_h) 1578 GEN_VEXT_VX(vmul_vx_w) 1579 GEN_VEXT_VX(vmul_vx_d) 1580 GEN_VEXT_VX(vmulh_vx_b) 1581 GEN_VEXT_VX(vmulh_vx_h) 1582 GEN_VEXT_VX(vmulh_vx_w) 1583 GEN_VEXT_VX(vmulh_vx_d) 1584 GEN_VEXT_VX(vmulhu_vx_b) 1585 GEN_VEXT_VX(vmulhu_vx_h) 1586 GEN_VEXT_VX(vmulhu_vx_w) 1587 GEN_VEXT_VX(vmulhu_vx_d) 1588 GEN_VEXT_VX(vmulhsu_vx_b) 1589 GEN_VEXT_VX(vmulhsu_vx_h) 1590 GEN_VEXT_VX(vmulhsu_vx_w) 1591 GEN_VEXT_VX(vmulhsu_vx_d) 1592 1593 /* Vector Integer Divide Instructions */ 1594 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1595 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1596 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1597 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1598 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1599 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1600 1601 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1602 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1603 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1604 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1605 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1606 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1607 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1608 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1609 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1610 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1611 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1612 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1613 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1614 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1615 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1616 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1617 GEN_VEXT_VV(vdivu_vv_b, 1) 1618 GEN_VEXT_VV(vdivu_vv_h, 2) 1619 GEN_VEXT_VV(vdivu_vv_w, 4) 1620 GEN_VEXT_VV(vdivu_vv_d, 8) 1621 GEN_VEXT_VV(vdiv_vv_b, 1) 1622 GEN_VEXT_VV(vdiv_vv_h, 2) 1623 GEN_VEXT_VV(vdiv_vv_w, 4) 1624 GEN_VEXT_VV(vdiv_vv_d, 8) 1625 GEN_VEXT_VV(vremu_vv_b, 1) 1626 GEN_VEXT_VV(vremu_vv_h, 2) 1627 GEN_VEXT_VV(vremu_vv_w, 4) 1628 GEN_VEXT_VV(vremu_vv_d, 8) 1629 GEN_VEXT_VV(vrem_vv_b, 1) 1630 GEN_VEXT_VV(vrem_vv_h, 2) 1631 GEN_VEXT_VV(vrem_vv_w, 4) 1632 GEN_VEXT_VV(vrem_vv_d, 8) 1633 1634 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1635 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1636 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1637 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1638 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1639 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1640 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1641 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1642 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1643 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1644 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1645 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1646 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1647 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1648 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1649 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1650 GEN_VEXT_VX(vdivu_vx_b) 1651 GEN_VEXT_VX(vdivu_vx_h) 1652 GEN_VEXT_VX(vdivu_vx_w) 1653 GEN_VEXT_VX(vdivu_vx_d) 1654 GEN_VEXT_VX(vdiv_vx_b) 1655 GEN_VEXT_VX(vdiv_vx_h) 1656 GEN_VEXT_VX(vdiv_vx_w) 1657 GEN_VEXT_VX(vdiv_vx_d) 1658 GEN_VEXT_VX(vremu_vx_b) 1659 GEN_VEXT_VX(vremu_vx_h) 1660 GEN_VEXT_VX(vremu_vx_w) 1661 GEN_VEXT_VX(vremu_vx_d) 1662 GEN_VEXT_VX(vrem_vx_b) 1663 GEN_VEXT_VX(vrem_vx_h) 1664 GEN_VEXT_VX(vrem_vx_w) 1665 GEN_VEXT_VX(vrem_vx_d) 1666 1667 /* Vector Widening Integer Multiply Instructions */ 1668 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1669 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1670 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1671 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1672 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1673 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1674 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1675 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1676 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1677 GEN_VEXT_VV(vwmul_vv_b, 2) 1678 GEN_VEXT_VV(vwmul_vv_h, 4) 1679 GEN_VEXT_VV(vwmul_vv_w, 8) 1680 GEN_VEXT_VV(vwmulu_vv_b, 2) 1681 GEN_VEXT_VV(vwmulu_vv_h, 4) 1682 GEN_VEXT_VV(vwmulu_vv_w, 8) 1683 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1684 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1685 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1686 1687 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1688 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1689 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1690 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1691 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1692 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1693 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1694 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1695 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1696 GEN_VEXT_VX(vwmul_vx_b) 1697 GEN_VEXT_VX(vwmul_vx_h) 1698 GEN_VEXT_VX(vwmul_vx_w) 1699 GEN_VEXT_VX(vwmulu_vx_b) 1700 GEN_VEXT_VX(vwmulu_vx_h) 1701 GEN_VEXT_VX(vwmulu_vx_w) 1702 GEN_VEXT_VX(vwmulsu_vx_b) 1703 GEN_VEXT_VX(vwmulsu_vx_h) 1704 GEN_VEXT_VX(vwmulsu_vx_w) 1705 1706 /* Vector Single-Width Integer Multiply-Add Instructions */ 1707 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1708 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1709 { \ 1710 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1711 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1712 TD d = *((TD *)vd + HD(i)); \ 1713 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1714 } 1715 1716 #define DO_MACC(N, M, D) (M * N + D) 1717 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1718 #define DO_MADD(N, M, D) (M * D + N) 1719 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1720 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1721 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1722 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1723 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1724 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1725 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1726 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1727 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1728 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1729 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1730 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1731 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1732 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1733 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1734 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1735 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1736 GEN_VEXT_VV(vmacc_vv_b, 1) 1737 GEN_VEXT_VV(vmacc_vv_h, 2) 1738 GEN_VEXT_VV(vmacc_vv_w, 4) 1739 GEN_VEXT_VV(vmacc_vv_d, 8) 1740 GEN_VEXT_VV(vnmsac_vv_b, 1) 1741 GEN_VEXT_VV(vnmsac_vv_h, 2) 1742 GEN_VEXT_VV(vnmsac_vv_w, 4) 1743 GEN_VEXT_VV(vnmsac_vv_d, 8) 1744 GEN_VEXT_VV(vmadd_vv_b, 1) 1745 GEN_VEXT_VV(vmadd_vv_h, 2) 1746 GEN_VEXT_VV(vmadd_vv_w, 4) 1747 GEN_VEXT_VV(vmadd_vv_d, 8) 1748 GEN_VEXT_VV(vnmsub_vv_b, 1) 1749 GEN_VEXT_VV(vnmsub_vv_h, 2) 1750 GEN_VEXT_VV(vnmsub_vv_w, 4) 1751 GEN_VEXT_VV(vnmsub_vv_d, 8) 1752 1753 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1754 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1755 { \ 1756 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1757 TD d = *((TD *)vd + HD(i)); \ 1758 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1759 } 1760 1761 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1762 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1763 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1764 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1765 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1766 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1767 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1768 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1769 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1770 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1771 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1772 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1773 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1774 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1775 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1776 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1777 GEN_VEXT_VX(vmacc_vx_b) 1778 GEN_VEXT_VX(vmacc_vx_h) 1779 GEN_VEXT_VX(vmacc_vx_w) 1780 GEN_VEXT_VX(vmacc_vx_d) 1781 GEN_VEXT_VX(vnmsac_vx_b) 1782 GEN_VEXT_VX(vnmsac_vx_h) 1783 GEN_VEXT_VX(vnmsac_vx_w) 1784 GEN_VEXT_VX(vnmsac_vx_d) 1785 GEN_VEXT_VX(vmadd_vx_b) 1786 GEN_VEXT_VX(vmadd_vx_h) 1787 GEN_VEXT_VX(vmadd_vx_w) 1788 GEN_VEXT_VX(vmadd_vx_d) 1789 GEN_VEXT_VX(vnmsub_vx_b) 1790 GEN_VEXT_VX(vnmsub_vx_h) 1791 GEN_VEXT_VX(vnmsub_vx_w) 1792 GEN_VEXT_VX(vnmsub_vx_d) 1793 1794 /* Vector Widening Integer Multiply-Add Instructions */ 1795 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1796 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1797 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1798 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1799 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1800 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1801 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1802 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1803 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1804 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1805 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1806 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1807 GEN_VEXT_VV(vwmacc_vv_b, 2) 1808 GEN_VEXT_VV(vwmacc_vv_h, 4) 1809 GEN_VEXT_VV(vwmacc_vv_w, 8) 1810 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1811 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1812 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1813 1814 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1815 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1816 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1817 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1818 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1819 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1820 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1821 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1822 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1823 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1824 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1825 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1826 GEN_VEXT_VX(vwmaccu_vx_b) 1827 GEN_VEXT_VX(vwmaccu_vx_h) 1828 GEN_VEXT_VX(vwmaccu_vx_w) 1829 GEN_VEXT_VX(vwmacc_vx_b) 1830 GEN_VEXT_VX(vwmacc_vx_h) 1831 GEN_VEXT_VX(vwmacc_vx_w) 1832 GEN_VEXT_VX(vwmaccsu_vx_b) 1833 GEN_VEXT_VX(vwmaccsu_vx_h) 1834 GEN_VEXT_VX(vwmaccsu_vx_w) 1835 GEN_VEXT_VX(vwmaccus_vx_b) 1836 GEN_VEXT_VX(vwmaccus_vx_h) 1837 GEN_VEXT_VX(vwmaccus_vx_w) 1838 1839 /* Vector Integer Merge and Move Instructions */ 1840 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1841 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1842 uint32_t desc) \ 1843 { \ 1844 uint32_t vl = env->vl; \ 1845 uint32_t i; \ 1846 \ 1847 for (i = env->vstart; i < vl; i++) { \ 1848 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1849 *((ETYPE *)vd + H(i)) = s1; \ 1850 } \ 1851 env->vstart = 0; \ 1852 } 1853 1854 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1855 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1856 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1857 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1858 1859 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1860 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1861 uint32_t desc) \ 1862 { \ 1863 uint32_t vl = env->vl; \ 1864 uint32_t i; \ 1865 \ 1866 for (i = env->vstart; i < vl; i++) { \ 1867 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1868 } \ 1869 env->vstart = 0; \ 1870 } 1871 1872 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1873 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1874 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1875 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1876 1877 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1878 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1879 CPURISCVState *env, uint32_t desc) \ 1880 { \ 1881 uint32_t vl = env->vl; \ 1882 uint32_t i; \ 1883 \ 1884 for (i = env->vstart; i < vl; i++) { \ 1885 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1886 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1887 } \ 1888 env->vstart = 0; \ 1889 } 1890 1891 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1892 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1893 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1894 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1895 1896 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1897 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1898 void *vs2, CPURISCVState *env, uint32_t desc) \ 1899 { \ 1900 uint32_t vl = env->vl; \ 1901 uint32_t i; \ 1902 \ 1903 for (i = env->vstart; i < vl; i++) { \ 1904 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1905 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1906 (ETYPE)(target_long)s1); \ 1907 *((ETYPE *)vd + H(i)) = d; \ 1908 } \ 1909 env->vstart = 0; \ 1910 } 1911 1912 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1913 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1914 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1915 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1916 1917 /* 1918 *** Vector Fixed-Point Arithmetic Instructions 1919 */ 1920 1921 /* Vector Single-Width Saturating Add and Subtract */ 1922 1923 /* 1924 * As fixed point instructions probably have round mode and saturation, 1925 * define common macros for fixed point here. 1926 */ 1927 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1928 CPURISCVState *env, int vxrm); 1929 1930 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1931 static inline void \ 1932 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1933 CPURISCVState *env, int vxrm) \ 1934 { \ 1935 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1936 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1937 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1938 } 1939 1940 static inline void 1941 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1942 CPURISCVState *env, 1943 uint32_t vl, uint32_t vm, int vxrm, 1944 opivv2_rm_fn *fn) 1945 { 1946 for (uint32_t i = env->vstart; i < vl; i++) { 1947 if (!vm && !vext_elem_mask(v0, i)) { 1948 continue; 1949 } 1950 fn(vd, vs1, vs2, i, env, vxrm); 1951 } 1952 env->vstart = 0; 1953 } 1954 1955 static inline void 1956 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1957 CPURISCVState *env, 1958 uint32_t desc, 1959 opivv2_rm_fn *fn) 1960 { 1961 uint32_t vm = vext_vm(desc); 1962 uint32_t vl = env->vl; 1963 1964 switch (env->vxrm) { 1965 case 0: /* rnu */ 1966 vext_vv_rm_1(vd, v0, vs1, vs2, 1967 env, vl, vm, 0, fn); 1968 break; 1969 case 1: /* rne */ 1970 vext_vv_rm_1(vd, v0, vs1, vs2, 1971 env, vl, vm, 1, fn); 1972 break; 1973 case 2: /* rdn */ 1974 vext_vv_rm_1(vd, v0, vs1, vs2, 1975 env, vl, vm, 2, fn); 1976 break; 1977 default: /* rod */ 1978 vext_vv_rm_1(vd, v0, vs1, vs2, 1979 env, vl, vm, 3, fn); 1980 break; 1981 } 1982 } 1983 1984 /* generate helpers for fixed point instructions with OPIVV format */ 1985 #define GEN_VEXT_VV_RM(NAME) \ 1986 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1987 CPURISCVState *env, uint32_t desc) \ 1988 { \ 1989 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1990 do_##NAME); \ 1991 } 1992 1993 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1994 { 1995 uint8_t res = a + b; 1996 if (res < a) { 1997 res = UINT8_MAX; 1998 env->vxsat = 0x1; 1999 } 2000 return res; 2001 } 2002 2003 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2004 uint16_t b) 2005 { 2006 uint16_t res = a + b; 2007 if (res < a) { 2008 res = UINT16_MAX; 2009 env->vxsat = 0x1; 2010 } 2011 return res; 2012 } 2013 2014 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2015 uint32_t b) 2016 { 2017 uint32_t res = a + b; 2018 if (res < a) { 2019 res = UINT32_MAX; 2020 env->vxsat = 0x1; 2021 } 2022 return res; 2023 } 2024 2025 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2026 uint64_t b) 2027 { 2028 uint64_t res = a + b; 2029 if (res < a) { 2030 res = UINT64_MAX; 2031 env->vxsat = 0x1; 2032 } 2033 return res; 2034 } 2035 2036 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2037 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2038 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2039 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2040 GEN_VEXT_VV_RM(vsaddu_vv_b) 2041 GEN_VEXT_VV_RM(vsaddu_vv_h) 2042 GEN_VEXT_VV_RM(vsaddu_vv_w) 2043 GEN_VEXT_VV_RM(vsaddu_vv_d) 2044 2045 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2046 CPURISCVState *env, int vxrm); 2047 2048 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2049 static inline void \ 2050 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2051 CPURISCVState *env, int vxrm) \ 2052 { \ 2053 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2054 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2055 } 2056 2057 static inline void 2058 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2059 CPURISCVState *env, 2060 uint32_t vl, uint32_t vm, int vxrm, 2061 opivx2_rm_fn *fn) 2062 { 2063 for (uint32_t i = env->vstart; i < vl; i++) { 2064 if (!vm && !vext_elem_mask(v0, i)) { 2065 continue; 2066 } 2067 fn(vd, s1, vs2, i, env, vxrm); 2068 } 2069 env->vstart = 0; 2070 } 2071 2072 static inline void 2073 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2074 CPURISCVState *env, 2075 uint32_t desc, 2076 opivx2_rm_fn *fn) 2077 { 2078 uint32_t vm = vext_vm(desc); 2079 uint32_t vl = env->vl; 2080 2081 switch (env->vxrm) { 2082 case 0: /* rnu */ 2083 vext_vx_rm_1(vd, v0, s1, vs2, 2084 env, vl, vm, 0, fn); 2085 break; 2086 case 1: /* rne */ 2087 vext_vx_rm_1(vd, v0, s1, vs2, 2088 env, vl, vm, 1, fn); 2089 break; 2090 case 2: /* rdn */ 2091 vext_vx_rm_1(vd, v0, s1, vs2, 2092 env, vl, vm, 2, fn); 2093 break; 2094 default: /* rod */ 2095 vext_vx_rm_1(vd, v0, s1, vs2, 2096 env, vl, vm, 3, fn); 2097 break; 2098 } 2099 } 2100 2101 /* generate helpers for fixed point instructions with OPIVX format */ 2102 #define GEN_VEXT_VX_RM(NAME) \ 2103 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2104 void *vs2, CPURISCVState *env, uint32_t desc) \ 2105 { \ 2106 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2107 do_##NAME); \ 2108 } 2109 2110 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2111 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2112 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2113 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2114 GEN_VEXT_VX_RM(vsaddu_vx_b) 2115 GEN_VEXT_VX_RM(vsaddu_vx_h) 2116 GEN_VEXT_VX_RM(vsaddu_vx_w) 2117 GEN_VEXT_VX_RM(vsaddu_vx_d) 2118 2119 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2120 { 2121 int8_t res = a + b; 2122 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2123 res = a > 0 ? INT8_MAX : INT8_MIN; 2124 env->vxsat = 0x1; 2125 } 2126 return res; 2127 } 2128 2129 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2130 { 2131 int16_t res = a + b; 2132 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2133 res = a > 0 ? INT16_MAX : INT16_MIN; 2134 env->vxsat = 0x1; 2135 } 2136 return res; 2137 } 2138 2139 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2140 { 2141 int32_t res = a + b; 2142 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2143 res = a > 0 ? INT32_MAX : INT32_MIN; 2144 env->vxsat = 0x1; 2145 } 2146 return res; 2147 } 2148 2149 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2150 { 2151 int64_t res = a + b; 2152 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2153 res = a > 0 ? INT64_MAX : INT64_MIN; 2154 env->vxsat = 0x1; 2155 } 2156 return res; 2157 } 2158 2159 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2160 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2161 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2162 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2163 GEN_VEXT_VV_RM(vsadd_vv_b) 2164 GEN_VEXT_VV_RM(vsadd_vv_h) 2165 GEN_VEXT_VV_RM(vsadd_vv_w) 2166 GEN_VEXT_VV_RM(vsadd_vv_d) 2167 2168 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2169 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2170 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2171 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2172 GEN_VEXT_VX_RM(vsadd_vx_b) 2173 GEN_VEXT_VX_RM(vsadd_vx_h) 2174 GEN_VEXT_VX_RM(vsadd_vx_w) 2175 GEN_VEXT_VX_RM(vsadd_vx_d) 2176 2177 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2178 { 2179 uint8_t res = a - b; 2180 if (res > a) { 2181 res = 0; 2182 env->vxsat = 0x1; 2183 } 2184 return res; 2185 } 2186 2187 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2188 uint16_t b) 2189 { 2190 uint16_t res = a - b; 2191 if (res > a) { 2192 res = 0; 2193 env->vxsat = 0x1; 2194 } 2195 return res; 2196 } 2197 2198 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2199 uint32_t b) 2200 { 2201 uint32_t res = a - b; 2202 if (res > a) { 2203 res = 0; 2204 env->vxsat = 0x1; 2205 } 2206 return res; 2207 } 2208 2209 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2210 uint64_t b) 2211 { 2212 uint64_t res = a - b; 2213 if (res > a) { 2214 res = 0; 2215 env->vxsat = 0x1; 2216 } 2217 return res; 2218 } 2219 2220 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2221 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2222 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2223 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2224 GEN_VEXT_VV_RM(vssubu_vv_b) 2225 GEN_VEXT_VV_RM(vssubu_vv_h) 2226 GEN_VEXT_VV_RM(vssubu_vv_w) 2227 GEN_VEXT_VV_RM(vssubu_vv_d) 2228 2229 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2230 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2231 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2232 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2233 GEN_VEXT_VX_RM(vssubu_vx_b) 2234 GEN_VEXT_VX_RM(vssubu_vx_h) 2235 GEN_VEXT_VX_RM(vssubu_vx_w) 2236 GEN_VEXT_VX_RM(vssubu_vx_d) 2237 2238 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2239 { 2240 int8_t res = a - b; 2241 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2242 res = a >= 0 ? INT8_MAX : INT8_MIN; 2243 env->vxsat = 0x1; 2244 } 2245 return res; 2246 } 2247 2248 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2249 { 2250 int16_t res = a - b; 2251 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2252 res = a >= 0 ? INT16_MAX : INT16_MIN; 2253 env->vxsat = 0x1; 2254 } 2255 return res; 2256 } 2257 2258 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2259 { 2260 int32_t res = a - b; 2261 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2262 res = a >= 0 ? INT32_MAX : INT32_MIN; 2263 env->vxsat = 0x1; 2264 } 2265 return res; 2266 } 2267 2268 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2269 { 2270 int64_t res = a - b; 2271 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2272 res = a >= 0 ? INT64_MAX : INT64_MIN; 2273 env->vxsat = 0x1; 2274 } 2275 return res; 2276 } 2277 2278 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2279 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2280 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2281 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2282 GEN_VEXT_VV_RM(vssub_vv_b) 2283 GEN_VEXT_VV_RM(vssub_vv_h) 2284 GEN_VEXT_VV_RM(vssub_vv_w) 2285 GEN_VEXT_VV_RM(vssub_vv_d) 2286 2287 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2288 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2289 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2290 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2291 GEN_VEXT_VX_RM(vssub_vx_b) 2292 GEN_VEXT_VX_RM(vssub_vx_h) 2293 GEN_VEXT_VX_RM(vssub_vx_w) 2294 GEN_VEXT_VX_RM(vssub_vx_d) 2295 2296 /* Vector Single-Width Averaging Add and Subtract */ 2297 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2298 { 2299 uint8_t d = extract64(v, shift, 1); 2300 uint8_t d1; 2301 uint64_t D1, D2; 2302 2303 if (shift == 0 || shift > 64) { 2304 return 0; 2305 } 2306 2307 d1 = extract64(v, shift - 1, 1); 2308 D1 = extract64(v, 0, shift); 2309 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2310 return d1; 2311 } else if (vxrm == 1) { /* round-to-nearest-even */ 2312 if (shift > 1) { 2313 D2 = extract64(v, 0, shift - 1); 2314 return d1 & ((D2 != 0) | d); 2315 } else { 2316 return d1 & d; 2317 } 2318 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2319 return !d & (D1 != 0); 2320 } 2321 return 0; /* round-down (truncate) */ 2322 } 2323 2324 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2325 { 2326 int64_t res = (int64_t)a + b; 2327 uint8_t round = get_round(vxrm, res, 1); 2328 2329 return (res >> 1) + round; 2330 } 2331 2332 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2333 { 2334 int64_t res = a + b; 2335 uint8_t round = get_round(vxrm, res, 1); 2336 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2337 2338 /* With signed overflow, bit 64 is inverse of bit 63. */ 2339 return ((res >> 1) ^ over) + round; 2340 } 2341 2342 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2343 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2344 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2345 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2346 GEN_VEXT_VV_RM(vaadd_vv_b) 2347 GEN_VEXT_VV_RM(vaadd_vv_h) 2348 GEN_VEXT_VV_RM(vaadd_vv_w) 2349 GEN_VEXT_VV_RM(vaadd_vv_d) 2350 2351 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2352 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2353 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2354 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2355 GEN_VEXT_VX_RM(vaadd_vx_b) 2356 GEN_VEXT_VX_RM(vaadd_vx_h) 2357 GEN_VEXT_VX_RM(vaadd_vx_w) 2358 GEN_VEXT_VX_RM(vaadd_vx_d) 2359 2360 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2361 uint32_t a, uint32_t b) 2362 { 2363 uint64_t res = (uint64_t)a + b; 2364 uint8_t round = get_round(vxrm, res, 1); 2365 2366 return (res >> 1) + round; 2367 } 2368 2369 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2370 uint64_t a, uint64_t b) 2371 { 2372 uint64_t res = a + b; 2373 uint8_t round = get_round(vxrm, res, 1); 2374 uint64_t over = (uint64_t)(res < a) << 63; 2375 2376 return ((res >> 1) | over) + round; 2377 } 2378 2379 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2380 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2381 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2382 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2383 GEN_VEXT_VV_RM(vaaddu_vv_b) 2384 GEN_VEXT_VV_RM(vaaddu_vv_h) 2385 GEN_VEXT_VV_RM(vaaddu_vv_w) 2386 GEN_VEXT_VV_RM(vaaddu_vv_d) 2387 2388 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2389 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2390 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2391 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2392 GEN_VEXT_VX_RM(vaaddu_vx_b) 2393 GEN_VEXT_VX_RM(vaaddu_vx_h) 2394 GEN_VEXT_VX_RM(vaaddu_vx_w) 2395 GEN_VEXT_VX_RM(vaaddu_vx_d) 2396 2397 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2398 { 2399 int64_t res = (int64_t)a - b; 2400 uint8_t round = get_round(vxrm, res, 1); 2401 2402 return (res >> 1) + round; 2403 } 2404 2405 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2406 { 2407 int64_t res = (int64_t)a - b; 2408 uint8_t round = get_round(vxrm, res, 1); 2409 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2410 2411 /* With signed overflow, bit 64 is inverse of bit 63. */ 2412 return ((res >> 1) ^ over) + round; 2413 } 2414 2415 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2416 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2417 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2418 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2419 GEN_VEXT_VV_RM(vasub_vv_b) 2420 GEN_VEXT_VV_RM(vasub_vv_h) 2421 GEN_VEXT_VV_RM(vasub_vv_w) 2422 GEN_VEXT_VV_RM(vasub_vv_d) 2423 2424 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2425 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2426 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2427 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2428 GEN_VEXT_VX_RM(vasub_vx_b) 2429 GEN_VEXT_VX_RM(vasub_vx_h) 2430 GEN_VEXT_VX_RM(vasub_vx_w) 2431 GEN_VEXT_VX_RM(vasub_vx_d) 2432 2433 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2434 uint32_t a, uint32_t b) 2435 { 2436 int64_t res = (int64_t)a - b; 2437 uint8_t round = get_round(vxrm, res, 1); 2438 2439 return (res >> 1) + round; 2440 } 2441 2442 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2443 uint64_t a, uint64_t b) 2444 { 2445 uint64_t res = (uint64_t)a - b; 2446 uint8_t round = get_round(vxrm, res, 1); 2447 uint64_t over = (uint64_t)(res > a) << 63; 2448 2449 return ((res >> 1) | over) + round; 2450 } 2451 2452 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2453 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2454 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2455 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2456 GEN_VEXT_VV_RM(vasubu_vv_b) 2457 GEN_VEXT_VV_RM(vasubu_vv_h) 2458 GEN_VEXT_VV_RM(vasubu_vv_w) 2459 GEN_VEXT_VV_RM(vasubu_vv_d) 2460 2461 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2462 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2463 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2464 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2465 GEN_VEXT_VX_RM(vasubu_vx_b) 2466 GEN_VEXT_VX_RM(vasubu_vx_h) 2467 GEN_VEXT_VX_RM(vasubu_vx_w) 2468 GEN_VEXT_VX_RM(vasubu_vx_d) 2469 2470 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2471 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2472 { 2473 uint8_t round; 2474 int16_t res; 2475 2476 res = (int16_t)a * (int16_t)b; 2477 round = get_round(vxrm, res, 7); 2478 res = (res >> 7) + round; 2479 2480 if (res > INT8_MAX) { 2481 env->vxsat = 0x1; 2482 return INT8_MAX; 2483 } else if (res < INT8_MIN) { 2484 env->vxsat = 0x1; 2485 return INT8_MIN; 2486 } else { 2487 return res; 2488 } 2489 } 2490 2491 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2492 { 2493 uint8_t round; 2494 int32_t res; 2495 2496 res = (int32_t)a * (int32_t)b; 2497 round = get_round(vxrm, res, 15); 2498 res = (res >> 15) + round; 2499 2500 if (res > INT16_MAX) { 2501 env->vxsat = 0x1; 2502 return INT16_MAX; 2503 } else if (res < INT16_MIN) { 2504 env->vxsat = 0x1; 2505 return INT16_MIN; 2506 } else { 2507 return res; 2508 } 2509 } 2510 2511 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2512 { 2513 uint8_t round; 2514 int64_t res; 2515 2516 res = (int64_t)a * (int64_t)b; 2517 round = get_round(vxrm, res, 31); 2518 res = (res >> 31) + round; 2519 2520 if (res > INT32_MAX) { 2521 env->vxsat = 0x1; 2522 return INT32_MAX; 2523 } else if (res < INT32_MIN) { 2524 env->vxsat = 0x1; 2525 return INT32_MIN; 2526 } else { 2527 return res; 2528 } 2529 } 2530 2531 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2532 { 2533 uint8_t round; 2534 uint64_t hi_64, lo_64; 2535 int64_t res; 2536 2537 if (a == INT64_MIN && b == INT64_MIN) { 2538 env->vxsat = 1; 2539 return INT64_MAX; 2540 } 2541 2542 muls64(&lo_64, &hi_64, a, b); 2543 round = get_round(vxrm, lo_64, 63); 2544 /* 2545 * Cannot overflow, as there are always 2546 * 2 sign bits after multiply. 2547 */ 2548 res = (hi_64 << 1) | (lo_64 >> 63); 2549 if (round) { 2550 if (res == INT64_MAX) { 2551 env->vxsat = 1; 2552 } else { 2553 res += 1; 2554 } 2555 } 2556 return res; 2557 } 2558 2559 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2560 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2561 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2562 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2563 GEN_VEXT_VV_RM(vsmul_vv_b) 2564 GEN_VEXT_VV_RM(vsmul_vv_h) 2565 GEN_VEXT_VV_RM(vsmul_vv_w) 2566 GEN_VEXT_VV_RM(vsmul_vv_d) 2567 2568 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2569 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2570 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2571 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2572 GEN_VEXT_VX_RM(vsmul_vx_b) 2573 GEN_VEXT_VX_RM(vsmul_vx_h) 2574 GEN_VEXT_VX_RM(vsmul_vx_w) 2575 GEN_VEXT_VX_RM(vsmul_vx_d) 2576 2577 /* Vector Single-Width Scaling Shift Instructions */ 2578 static inline uint8_t 2579 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2580 { 2581 uint8_t round, shift = b & 0x7; 2582 uint8_t res; 2583 2584 round = get_round(vxrm, a, shift); 2585 res = (a >> shift) + round; 2586 return res; 2587 } 2588 static inline uint16_t 2589 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2590 { 2591 uint8_t round, shift = b & 0xf; 2592 uint16_t res; 2593 2594 round = get_round(vxrm, a, shift); 2595 res = (a >> shift) + round; 2596 return res; 2597 } 2598 static inline uint32_t 2599 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2600 { 2601 uint8_t round, shift = b & 0x1f; 2602 uint32_t res; 2603 2604 round = get_round(vxrm, a, shift); 2605 res = (a >> shift) + round; 2606 return res; 2607 } 2608 static inline uint64_t 2609 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2610 { 2611 uint8_t round, shift = b & 0x3f; 2612 uint64_t res; 2613 2614 round = get_round(vxrm, a, shift); 2615 res = (a >> shift) + round; 2616 return res; 2617 } 2618 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2619 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2620 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2621 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2622 GEN_VEXT_VV_RM(vssrl_vv_b) 2623 GEN_VEXT_VV_RM(vssrl_vv_h) 2624 GEN_VEXT_VV_RM(vssrl_vv_w) 2625 GEN_VEXT_VV_RM(vssrl_vv_d) 2626 2627 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2628 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2629 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2630 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2631 GEN_VEXT_VX_RM(vssrl_vx_b) 2632 GEN_VEXT_VX_RM(vssrl_vx_h) 2633 GEN_VEXT_VX_RM(vssrl_vx_w) 2634 GEN_VEXT_VX_RM(vssrl_vx_d) 2635 2636 static inline int8_t 2637 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2638 { 2639 uint8_t round, shift = b & 0x7; 2640 int8_t res; 2641 2642 round = get_round(vxrm, a, shift); 2643 res = (a >> shift) + round; 2644 return res; 2645 } 2646 static inline int16_t 2647 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2648 { 2649 uint8_t round, shift = b & 0xf; 2650 int16_t res; 2651 2652 round = get_round(vxrm, a, shift); 2653 res = (a >> shift) + round; 2654 return res; 2655 } 2656 static inline int32_t 2657 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2658 { 2659 uint8_t round, shift = b & 0x1f; 2660 int32_t res; 2661 2662 round = get_round(vxrm, a, shift); 2663 res = (a >> shift) + round; 2664 return res; 2665 } 2666 static inline int64_t 2667 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2668 { 2669 uint8_t round, shift = b & 0x3f; 2670 int64_t res; 2671 2672 round = get_round(vxrm, a, shift); 2673 res = (a >> shift) + round; 2674 return res; 2675 } 2676 2677 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2678 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2679 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2680 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2681 GEN_VEXT_VV_RM(vssra_vv_b) 2682 GEN_VEXT_VV_RM(vssra_vv_h) 2683 GEN_VEXT_VV_RM(vssra_vv_w) 2684 GEN_VEXT_VV_RM(vssra_vv_d) 2685 2686 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2687 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2688 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2689 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2690 GEN_VEXT_VX_RM(vssra_vx_b) 2691 GEN_VEXT_VX_RM(vssra_vx_h) 2692 GEN_VEXT_VX_RM(vssra_vx_w) 2693 GEN_VEXT_VX_RM(vssra_vx_d) 2694 2695 /* Vector Narrowing Fixed-Point Clip Instructions */ 2696 static inline int8_t 2697 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2698 { 2699 uint8_t round, shift = b & 0xf; 2700 int16_t res; 2701 2702 round = get_round(vxrm, a, shift); 2703 res = (a >> shift) + round; 2704 if (res > INT8_MAX) { 2705 env->vxsat = 0x1; 2706 return INT8_MAX; 2707 } else if (res < INT8_MIN) { 2708 env->vxsat = 0x1; 2709 return INT8_MIN; 2710 } else { 2711 return res; 2712 } 2713 } 2714 2715 static inline int16_t 2716 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2717 { 2718 uint8_t round, shift = b & 0x1f; 2719 int32_t res; 2720 2721 round = get_round(vxrm, a, shift); 2722 res = (a >> shift) + round; 2723 if (res > INT16_MAX) { 2724 env->vxsat = 0x1; 2725 return INT16_MAX; 2726 } else if (res < INT16_MIN) { 2727 env->vxsat = 0x1; 2728 return INT16_MIN; 2729 } else { 2730 return res; 2731 } 2732 } 2733 2734 static inline int32_t 2735 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2736 { 2737 uint8_t round, shift = b & 0x3f; 2738 int64_t res; 2739 2740 round = get_round(vxrm, a, shift); 2741 res = (a >> shift) + round; 2742 if (res > INT32_MAX) { 2743 env->vxsat = 0x1; 2744 return INT32_MAX; 2745 } else if (res < INT32_MIN) { 2746 env->vxsat = 0x1; 2747 return INT32_MIN; 2748 } else { 2749 return res; 2750 } 2751 } 2752 2753 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2754 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2755 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2756 GEN_VEXT_VV_RM(vnclip_wv_b) 2757 GEN_VEXT_VV_RM(vnclip_wv_h) 2758 GEN_VEXT_VV_RM(vnclip_wv_w) 2759 2760 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2761 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2762 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2763 GEN_VEXT_VX_RM(vnclip_wx_b) 2764 GEN_VEXT_VX_RM(vnclip_wx_h) 2765 GEN_VEXT_VX_RM(vnclip_wx_w) 2766 2767 static inline uint8_t 2768 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2769 { 2770 uint8_t round, shift = b & 0xf; 2771 uint16_t res; 2772 2773 round = get_round(vxrm, a, shift); 2774 res = (a >> shift) + round; 2775 if (res > UINT8_MAX) { 2776 env->vxsat = 0x1; 2777 return UINT8_MAX; 2778 } else { 2779 return res; 2780 } 2781 } 2782 2783 static inline uint16_t 2784 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2785 { 2786 uint8_t round, shift = b & 0x1f; 2787 uint32_t res; 2788 2789 round = get_round(vxrm, a, shift); 2790 res = (a >> shift) + round; 2791 if (res > UINT16_MAX) { 2792 env->vxsat = 0x1; 2793 return UINT16_MAX; 2794 } else { 2795 return res; 2796 } 2797 } 2798 2799 static inline uint32_t 2800 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2801 { 2802 uint8_t round, shift = b & 0x3f; 2803 uint64_t res; 2804 2805 round = get_round(vxrm, a, shift); 2806 res = (a >> shift) + round; 2807 if (res > UINT32_MAX) { 2808 env->vxsat = 0x1; 2809 return UINT32_MAX; 2810 } else { 2811 return res; 2812 } 2813 } 2814 2815 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2816 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2817 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2818 GEN_VEXT_VV_RM(vnclipu_wv_b) 2819 GEN_VEXT_VV_RM(vnclipu_wv_h) 2820 GEN_VEXT_VV_RM(vnclipu_wv_w) 2821 2822 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2823 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2824 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2825 GEN_VEXT_VX_RM(vnclipu_wx_b) 2826 GEN_VEXT_VX_RM(vnclipu_wx_h) 2827 GEN_VEXT_VX_RM(vnclipu_wx_w) 2828 2829 /* 2830 *** Vector Float Point Arithmetic Instructions 2831 */ 2832 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2833 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2834 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2835 CPURISCVState *env) \ 2836 { \ 2837 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2839 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2840 } 2841 2842 #define GEN_VEXT_VV_ENV(NAME) \ 2843 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2844 void *vs2, CPURISCVState *env, \ 2845 uint32_t desc) \ 2846 { \ 2847 uint32_t vm = vext_vm(desc); \ 2848 uint32_t vl = env->vl; \ 2849 uint32_t i; \ 2850 \ 2851 for (i = env->vstart; i < vl; i++) { \ 2852 if (!vm && !vext_elem_mask(v0, i)) { \ 2853 continue; \ 2854 } \ 2855 do_##NAME(vd, vs1, vs2, i, env); \ 2856 } \ 2857 env->vstart = 0; \ 2858 } 2859 2860 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2861 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2862 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2863 GEN_VEXT_VV_ENV(vfadd_vv_h) 2864 GEN_VEXT_VV_ENV(vfadd_vv_w) 2865 GEN_VEXT_VV_ENV(vfadd_vv_d) 2866 2867 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2868 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2869 CPURISCVState *env) \ 2870 { \ 2871 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2872 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2873 } 2874 2875 #define GEN_VEXT_VF(NAME) \ 2876 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2877 void *vs2, CPURISCVState *env, \ 2878 uint32_t desc) \ 2879 { \ 2880 uint32_t vm = vext_vm(desc); \ 2881 uint32_t vl = env->vl; \ 2882 uint32_t i; \ 2883 \ 2884 for (i = env->vstart; i < vl; i++) { \ 2885 if (!vm && !vext_elem_mask(v0, i)) { \ 2886 continue; \ 2887 } \ 2888 do_##NAME(vd, s1, vs2, i, env); \ 2889 } \ 2890 env->vstart = 0; \ 2891 } 2892 2893 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2894 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2895 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2896 GEN_VEXT_VF(vfadd_vf_h) 2897 GEN_VEXT_VF(vfadd_vf_w) 2898 GEN_VEXT_VF(vfadd_vf_d) 2899 2900 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2901 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2902 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2903 GEN_VEXT_VV_ENV(vfsub_vv_h) 2904 GEN_VEXT_VV_ENV(vfsub_vv_w) 2905 GEN_VEXT_VV_ENV(vfsub_vv_d) 2906 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2907 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2908 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2909 GEN_VEXT_VF(vfsub_vf_h) 2910 GEN_VEXT_VF(vfsub_vf_w) 2911 GEN_VEXT_VF(vfsub_vf_d) 2912 2913 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2914 { 2915 return float16_sub(b, a, s); 2916 } 2917 2918 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2919 { 2920 return float32_sub(b, a, s); 2921 } 2922 2923 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2924 { 2925 return float64_sub(b, a, s); 2926 } 2927 2928 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2929 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2930 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2931 GEN_VEXT_VF(vfrsub_vf_h) 2932 GEN_VEXT_VF(vfrsub_vf_w) 2933 GEN_VEXT_VF(vfrsub_vf_d) 2934 2935 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2936 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2937 { 2938 return float32_add(float16_to_float32(a, true, s), 2939 float16_to_float32(b, true, s), s); 2940 } 2941 2942 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2943 { 2944 return float64_add(float32_to_float64(a, s), 2945 float32_to_float64(b, s), s); 2946 2947 } 2948 2949 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2950 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2951 GEN_VEXT_VV_ENV(vfwadd_vv_h) 2952 GEN_VEXT_VV_ENV(vfwadd_vv_w) 2953 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2954 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2955 GEN_VEXT_VF(vfwadd_vf_h) 2956 GEN_VEXT_VF(vfwadd_vf_w) 2957 2958 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2959 { 2960 return float32_sub(float16_to_float32(a, true, s), 2961 float16_to_float32(b, true, s), s); 2962 } 2963 2964 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2965 { 2966 return float64_sub(float32_to_float64(a, s), 2967 float32_to_float64(b, s), s); 2968 2969 } 2970 2971 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2972 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2973 GEN_VEXT_VV_ENV(vfwsub_vv_h) 2974 GEN_VEXT_VV_ENV(vfwsub_vv_w) 2975 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2976 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2977 GEN_VEXT_VF(vfwsub_vf_h) 2978 GEN_VEXT_VF(vfwsub_vf_w) 2979 2980 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2981 { 2982 return float32_add(a, float16_to_float32(b, true, s), s); 2983 } 2984 2985 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2986 { 2987 return float64_add(a, float32_to_float64(b, s), s); 2988 } 2989 2990 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2991 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2992 GEN_VEXT_VV_ENV(vfwadd_wv_h) 2993 GEN_VEXT_VV_ENV(vfwadd_wv_w) 2994 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2995 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2996 GEN_VEXT_VF(vfwadd_wf_h) 2997 GEN_VEXT_VF(vfwadd_wf_w) 2998 2999 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3000 { 3001 return float32_sub(a, float16_to_float32(b, true, s), s); 3002 } 3003 3004 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3005 { 3006 return float64_sub(a, float32_to_float64(b, s), s); 3007 } 3008 3009 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3010 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3011 GEN_VEXT_VV_ENV(vfwsub_wv_h) 3012 GEN_VEXT_VV_ENV(vfwsub_wv_w) 3013 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3014 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3015 GEN_VEXT_VF(vfwsub_wf_h) 3016 GEN_VEXT_VF(vfwsub_wf_w) 3017 3018 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3019 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3020 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3021 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3022 GEN_VEXT_VV_ENV(vfmul_vv_h) 3023 GEN_VEXT_VV_ENV(vfmul_vv_w) 3024 GEN_VEXT_VV_ENV(vfmul_vv_d) 3025 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3026 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3027 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3028 GEN_VEXT_VF(vfmul_vf_h) 3029 GEN_VEXT_VF(vfmul_vf_w) 3030 GEN_VEXT_VF(vfmul_vf_d) 3031 3032 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3033 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3034 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3035 GEN_VEXT_VV_ENV(vfdiv_vv_h) 3036 GEN_VEXT_VV_ENV(vfdiv_vv_w) 3037 GEN_VEXT_VV_ENV(vfdiv_vv_d) 3038 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3039 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3040 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3041 GEN_VEXT_VF(vfdiv_vf_h) 3042 GEN_VEXT_VF(vfdiv_vf_w) 3043 GEN_VEXT_VF(vfdiv_vf_d) 3044 3045 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3046 { 3047 return float16_div(b, a, s); 3048 } 3049 3050 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3051 { 3052 return float32_div(b, a, s); 3053 } 3054 3055 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3056 { 3057 return float64_div(b, a, s); 3058 } 3059 3060 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3061 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3062 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3063 GEN_VEXT_VF(vfrdiv_vf_h) 3064 GEN_VEXT_VF(vfrdiv_vf_w) 3065 GEN_VEXT_VF(vfrdiv_vf_d) 3066 3067 /* Vector Widening Floating-Point Multiply */ 3068 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3069 { 3070 return float32_mul(float16_to_float32(a, true, s), 3071 float16_to_float32(b, true, s), s); 3072 } 3073 3074 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3075 { 3076 return float64_mul(float32_to_float64(a, s), 3077 float32_to_float64(b, s), s); 3078 3079 } 3080 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3081 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3082 GEN_VEXT_VV_ENV(vfwmul_vv_h) 3083 GEN_VEXT_VV_ENV(vfwmul_vv_w) 3084 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3085 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3086 GEN_VEXT_VF(vfwmul_vf_h) 3087 GEN_VEXT_VF(vfwmul_vf_w) 3088 3089 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3090 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3091 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3092 CPURISCVState *env) \ 3093 { \ 3094 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3095 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3096 TD d = *((TD *)vd + HD(i)); \ 3097 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3098 } 3099 3100 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3101 { 3102 return float16_muladd(a, b, d, 0, s); 3103 } 3104 3105 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3106 { 3107 return float32_muladd(a, b, d, 0, s); 3108 } 3109 3110 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3111 { 3112 return float64_muladd(a, b, d, 0, s); 3113 } 3114 3115 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3116 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3117 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3118 GEN_VEXT_VV_ENV(vfmacc_vv_h) 3119 GEN_VEXT_VV_ENV(vfmacc_vv_w) 3120 GEN_VEXT_VV_ENV(vfmacc_vv_d) 3121 3122 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3123 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3124 CPURISCVState *env) \ 3125 { \ 3126 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3127 TD d = *((TD *)vd + HD(i)); \ 3128 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3129 } 3130 3131 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3132 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3133 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3134 GEN_VEXT_VF(vfmacc_vf_h) 3135 GEN_VEXT_VF(vfmacc_vf_w) 3136 GEN_VEXT_VF(vfmacc_vf_d) 3137 3138 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3139 { 3140 return float16_muladd(a, b, d, 3141 float_muladd_negate_c | float_muladd_negate_product, s); 3142 } 3143 3144 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3145 { 3146 return float32_muladd(a, b, d, 3147 float_muladd_negate_c | float_muladd_negate_product, s); 3148 } 3149 3150 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3151 { 3152 return float64_muladd(a, b, d, 3153 float_muladd_negate_c | float_muladd_negate_product, s); 3154 } 3155 3156 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3157 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3158 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3159 GEN_VEXT_VV_ENV(vfnmacc_vv_h) 3160 GEN_VEXT_VV_ENV(vfnmacc_vv_w) 3161 GEN_VEXT_VV_ENV(vfnmacc_vv_d) 3162 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3163 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3164 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3165 GEN_VEXT_VF(vfnmacc_vf_h) 3166 GEN_VEXT_VF(vfnmacc_vf_w) 3167 GEN_VEXT_VF(vfnmacc_vf_d) 3168 3169 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3170 { 3171 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3172 } 3173 3174 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3175 { 3176 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3177 } 3178 3179 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3180 { 3181 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3182 } 3183 3184 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3185 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3186 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3187 GEN_VEXT_VV_ENV(vfmsac_vv_h) 3188 GEN_VEXT_VV_ENV(vfmsac_vv_w) 3189 GEN_VEXT_VV_ENV(vfmsac_vv_d) 3190 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3191 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3192 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3193 GEN_VEXT_VF(vfmsac_vf_h) 3194 GEN_VEXT_VF(vfmsac_vf_w) 3195 GEN_VEXT_VF(vfmsac_vf_d) 3196 3197 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3198 { 3199 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3200 } 3201 3202 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3203 { 3204 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3205 } 3206 3207 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3208 { 3209 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3210 } 3211 3212 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3213 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3214 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3215 GEN_VEXT_VV_ENV(vfnmsac_vv_h) 3216 GEN_VEXT_VV_ENV(vfnmsac_vv_w) 3217 GEN_VEXT_VV_ENV(vfnmsac_vv_d) 3218 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3219 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3220 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3221 GEN_VEXT_VF(vfnmsac_vf_h) 3222 GEN_VEXT_VF(vfnmsac_vf_w) 3223 GEN_VEXT_VF(vfnmsac_vf_d) 3224 3225 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3226 { 3227 return float16_muladd(d, b, a, 0, s); 3228 } 3229 3230 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3231 { 3232 return float32_muladd(d, b, a, 0, s); 3233 } 3234 3235 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3236 { 3237 return float64_muladd(d, b, a, 0, s); 3238 } 3239 3240 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3241 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3242 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3243 GEN_VEXT_VV_ENV(vfmadd_vv_h) 3244 GEN_VEXT_VV_ENV(vfmadd_vv_w) 3245 GEN_VEXT_VV_ENV(vfmadd_vv_d) 3246 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3247 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3248 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3249 GEN_VEXT_VF(vfmadd_vf_h) 3250 GEN_VEXT_VF(vfmadd_vf_w) 3251 GEN_VEXT_VF(vfmadd_vf_d) 3252 3253 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3254 { 3255 return float16_muladd(d, b, a, 3256 float_muladd_negate_c | float_muladd_negate_product, s); 3257 } 3258 3259 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3260 { 3261 return float32_muladd(d, b, a, 3262 float_muladd_negate_c | float_muladd_negate_product, s); 3263 } 3264 3265 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3266 { 3267 return float64_muladd(d, b, a, 3268 float_muladd_negate_c | float_muladd_negate_product, s); 3269 } 3270 3271 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3272 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3273 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3274 GEN_VEXT_VV_ENV(vfnmadd_vv_h) 3275 GEN_VEXT_VV_ENV(vfnmadd_vv_w) 3276 GEN_VEXT_VV_ENV(vfnmadd_vv_d) 3277 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3278 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3279 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3280 GEN_VEXT_VF(vfnmadd_vf_h) 3281 GEN_VEXT_VF(vfnmadd_vf_w) 3282 GEN_VEXT_VF(vfnmadd_vf_d) 3283 3284 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3285 { 3286 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3287 } 3288 3289 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3290 { 3291 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3292 } 3293 3294 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3295 { 3296 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3297 } 3298 3299 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3300 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3301 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3302 GEN_VEXT_VV_ENV(vfmsub_vv_h) 3303 GEN_VEXT_VV_ENV(vfmsub_vv_w) 3304 GEN_VEXT_VV_ENV(vfmsub_vv_d) 3305 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3306 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3307 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3308 GEN_VEXT_VF(vfmsub_vf_h) 3309 GEN_VEXT_VF(vfmsub_vf_w) 3310 GEN_VEXT_VF(vfmsub_vf_d) 3311 3312 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3313 { 3314 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3315 } 3316 3317 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3318 { 3319 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3320 } 3321 3322 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3323 { 3324 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3325 } 3326 3327 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3328 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3329 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3330 GEN_VEXT_VV_ENV(vfnmsub_vv_h) 3331 GEN_VEXT_VV_ENV(vfnmsub_vv_w) 3332 GEN_VEXT_VV_ENV(vfnmsub_vv_d) 3333 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3334 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3335 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3336 GEN_VEXT_VF(vfnmsub_vf_h) 3337 GEN_VEXT_VF(vfnmsub_vf_w) 3338 GEN_VEXT_VF(vfnmsub_vf_d) 3339 3340 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3341 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3342 { 3343 return float32_muladd(float16_to_float32(a, true, s), 3344 float16_to_float32(b, true, s), d, 0, s); 3345 } 3346 3347 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3348 { 3349 return float64_muladd(float32_to_float64(a, s), 3350 float32_to_float64(b, s), d, 0, s); 3351 } 3352 3353 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3354 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3355 GEN_VEXT_VV_ENV(vfwmacc_vv_h) 3356 GEN_VEXT_VV_ENV(vfwmacc_vv_w) 3357 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3358 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3359 GEN_VEXT_VF(vfwmacc_vf_h) 3360 GEN_VEXT_VF(vfwmacc_vf_w) 3361 3362 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3363 { 3364 return float32_muladd(float16_to_float32(a, true, s), 3365 float16_to_float32(b, true, s), d, 3366 float_muladd_negate_c | float_muladd_negate_product, s); 3367 } 3368 3369 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3370 { 3371 return float64_muladd(float32_to_float64(a, s), 3372 float32_to_float64(b, s), d, 3373 float_muladd_negate_c | float_muladd_negate_product, s); 3374 } 3375 3376 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3377 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3378 GEN_VEXT_VV_ENV(vfwnmacc_vv_h) 3379 GEN_VEXT_VV_ENV(vfwnmacc_vv_w) 3380 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3381 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3382 GEN_VEXT_VF(vfwnmacc_vf_h) 3383 GEN_VEXT_VF(vfwnmacc_vf_w) 3384 3385 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3386 { 3387 return float32_muladd(float16_to_float32(a, true, s), 3388 float16_to_float32(b, true, s), d, 3389 float_muladd_negate_c, s); 3390 } 3391 3392 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3393 { 3394 return float64_muladd(float32_to_float64(a, s), 3395 float32_to_float64(b, s), d, 3396 float_muladd_negate_c, s); 3397 } 3398 3399 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3400 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3401 GEN_VEXT_VV_ENV(vfwmsac_vv_h) 3402 GEN_VEXT_VV_ENV(vfwmsac_vv_w) 3403 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3404 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3405 GEN_VEXT_VF(vfwmsac_vf_h) 3406 GEN_VEXT_VF(vfwmsac_vf_w) 3407 3408 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3409 { 3410 return float32_muladd(float16_to_float32(a, true, s), 3411 float16_to_float32(b, true, s), d, 3412 float_muladd_negate_product, s); 3413 } 3414 3415 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3416 { 3417 return float64_muladd(float32_to_float64(a, s), 3418 float32_to_float64(b, s), d, 3419 float_muladd_negate_product, s); 3420 } 3421 3422 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3423 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3424 GEN_VEXT_VV_ENV(vfwnmsac_vv_h) 3425 GEN_VEXT_VV_ENV(vfwnmsac_vv_w) 3426 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3427 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3428 GEN_VEXT_VF(vfwnmsac_vf_h) 3429 GEN_VEXT_VF(vfwnmsac_vf_w) 3430 3431 /* Vector Floating-Point Square-Root Instruction */ 3432 /* (TD, T2, TX2) */ 3433 #define OP_UU_H uint16_t, uint16_t, uint16_t 3434 #define OP_UU_W uint32_t, uint32_t, uint32_t 3435 #define OP_UU_D uint64_t, uint64_t, uint64_t 3436 3437 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3438 static void do_##NAME(void *vd, void *vs2, int i, \ 3439 CPURISCVState *env) \ 3440 { \ 3441 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3442 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3443 } 3444 3445 #define GEN_VEXT_V_ENV(NAME) \ 3446 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3447 CPURISCVState *env, uint32_t desc) \ 3448 { \ 3449 uint32_t vm = vext_vm(desc); \ 3450 uint32_t vl = env->vl; \ 3451 uint32_t i; \ 3452 \ 3453 if (vl == 0) { \ 3454 return; \ 3455 } \ 3456 for (i = env->vstart; i < vl; i++) { \ 3457 if (!vm && !vext_elem_mask(v0, i)) { \ 3458 continue; \ 3459 } \ 3460 do_##NAME(vd, vs2, i, env); \ 3461 } \ 3462 env->vstart = 0; \ 3463 } 3464 3465 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3466 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3467 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3468 GEN_VEXT_V_ENV(vfsqrt_v_h) 3469 GEN_VEXT_V_ENV(vfsqrt_v_w) 3470 GEN_VEXT_V_ENV(vfsqrt_v_d) 3471 3472 /* 3473 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3474 * 3475 * Adapted from riscv-v-spec recip.c: 3476 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3477 */ 3478 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3479 { 3480 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3481 uint64_t exp = extract64(f, frac_size, exp_size); 3482 uint64_t frac = extract64(f, 0, frac_size); 3483 3484 const uint8_t lookup_table[] = { 3485 52, 51, 50, 48, 47, 46, 44, 43, 3486 42, 41, 40, 39, 38, 36, 35, 34, 3487 33, 32, 31, 30, 30, 29, 28, 27, 3488 26, 25, 24, 23, 23, 22, 21, 20, 3489 19, 19, 18, 17, 16, 16, 15, 14, 3490 14, 13, 12, 12, 11, 10, 10, 9, 3491 9, 8, 7, 7, 6, 6, 5, 4, 3492 4, 3, 3, 2, 2, 1, 1, 0, 3493 127, 125, 123, 121, 119, 118, 116, 114, 3494 113, 111, 109, 108, 106, 105, 103, 102, 3495 100, 99, 97, 96, 95, 93, 92, 91, 3496 90, 88, 87, 86, 85, 84, 83, 82, 3497 80, 79, 78, 77, 76, 75, 74, 73, 3498 72, 71, 70, 70, 69, 68, 67, 66, 3499 65, 64, 63, 63, 62, 61, 60, 59, 3500 59, 58, 57, 56, 56, 55, 54, 53 3501 }; 3502 const int precision = 7; 3503 3504 if (exp == 0 && frac != 0) { /* subnormal */ 3505 /* Normalize the subnormal. */ 3506 while (extract64(frac, frac_size - 1, 1) == 0) { 3507 exp--; 3508 frac <<= 1; 3509 } 3510 3511 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3512 } 3513 3514 int idx = ((exp & 1) << (precision - 1)) | 3515 (frac >> (frac_size - precision + 1)); 3516 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3517 (frac_size - precision); 3518 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3519 3520 uint64_t val = 0; 3521 val = deposit64(val, 0, frac_size, out_frac); 3522 val = deposit64(val, frac_size, exp_size, out_exp); 3523 val = deposit64(val, frac_size + exp_size, 1, sign); 3524 return val; 3525 } 3526 3527 static float16 frsqrt7_h(float16 f, float_status *s) 3528 { 3529 int exp_size = 5, frac_size = 10; 3530 bool sign = float16_is_neg(f); 3531 3532 /* 3533 * frsqrt7(sNaN) = canonical NaN 3534 * frsqrt7(-inf) = canonical NaN 3535 * frsqrt7(-normal) = canonical NaN 3536 * frsqrt7(-subnormal) = canonical NaN 3537 */ 3538 if (float16_is_signaling_nan(f, s) || 3539 (float16_is_infinity(f) && sign) || 3540 (float16_is_normal(f) && sign) || 3541 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3542 s->float_exception_flags |= float_flag_invalid; 3543 return float16_default_nan(s); 3544 } 3545 3546 /* frsqrt7(qNaN) = canonical NaN */ 3547 if (float16_is_quiet_nan(f, s)) { 3548 return float16_default_nan(s); 3549 } 3550 3551 /* frsqrt7(+-0) = +-inf */ 3552 if (float16_is_zero(f)) { 3553 s->float_exception_flags |= float_flag_divbyzero; 3554 return float16_set_sign(float16_infinity, sign); 3555 } 3556 3557 /* frsqrt7(+inf) = +0 */ 3558 if (float16_is_infinity(f) && !sign) { 3559 return float16_set_sign(float16_zero, sign); 3560 } 3561 3562 /* +normal, +subnormal */ 3563 uint64_t val = frsqrt7(f, exp_size, frac_size); 3564 return make_float16(val); 3565 } 3566 3567 static float32 frsqrt7_s(float32 f, float_status *s) 3568 { 3569 int exp_size = 8, frac_size = 23; 3570 bool sign = float32_is_neg(f); 3571 3572 /* 3573 * frsqrt7(sNaN) = canonical NaN 3574 * frsqrt7(-inf) = canonical NaN 3575 * frsqrt7(-normal) = canonical NaN 3576 * frsqrt7(-subnormal) = canonical NaN 3577 */ 3578 if (float32_is_signaling_nan(f, s) || 3579 (float32_is_infinity(f) && sign) || 3580 (float32_is_normal(f) && sign) || 3581 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3582 s->float_exception_flags |= float_flag_invalid; 3583 return float32_default_nan(s); 3584 } 3585 3586 /* frsqrt7(qNaN) = canonical NaN */ 3587 if (float32_is_quiet_nan(f, s)) { 3588 return float32_default_nan(s); 3589 } 3590 3591 /* frsqrt7(+-0) = +-inf */ 3592 if (float32_is_zero(f)) { 3593 s->float_exception_flags |= float_flag_divbyzero; 3594 return float32_set_sign(float32_infinity, sign); 3595 } 3596 3597 /* frsqrt7(+inf) = +0 */ 3598 if (float32_is_infinity(f) && !sign) { 3599 return float32_set_sign(float32_zero, sign); 3600 } 3601 3602 /* +normal, +subnormal */ 3603 uint64_t val = frsqrt7(f, exp_size, frac_size); 3604 return make_float32(val); 3605 } 3606 3607 static float64 frsqrt7_d(float64 f, float_status *s) 3608 { 3609 int exp_size = 11, frac_size = 52; 3610 bool sign = float64_is_neg(f); 3611 3612 /* 3613 * frsqrt7(sNaN) = canonical NaN 3614 * frsqrt7(-inf) = canonical NaN 3615 * frsqrt7(-normal) = canonical NaN 3616 * frsqrt7(-subnormal) = canonical NaN 3617 */ 3618 if (float64_is_signaling_nan(f, s) || 3619 (float64_is_infinity(f) && sign) || 3620 (float64_is_normal(f) && sign) || 3621 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3622 s->float_exception_flags |= float_flag_invalid; 3623 return float64_default_nan(s); 3624 } 3625 3626 /* frsqrt7(qNaN) = canonical NaN */ 3627 if (float64_is_quiet_nan(f, s)) { 3628 return float64_default_nan(s); 3629 } 3630 3631 /* frsqrt7(+-0) = +-inf */ 3632 if (float64_is_zero(f)) { 3633 s->float_exception_flags |= float_flag_divbyzero; 3634 return float64_set_sign(float64_infinity, sign); 3635 } 3636 3637 /* frsqrt7(+inf) = +0 */ 3638 if (float64_is_infinity(f) && !sign) { 3639 return float64_set_sign(float64_zero, sign); 3640 } 3641 3642 /* +normal, +subnormal */ 3643 uint64_t val = frsqrt7(f, exp_size, frac_size); 3644 return make_float64(val); 3645 } 3646 3647 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3648 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3649 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3650 GEN_VEXT_V_ENV(vfrsqrt7_v_h) 3651 GEN_VEXT_V_ENV(vfrsqrt7_v_w) 3652 GEN_VEXT_V_ENV(vfrsqrt7_v_d) 3653 3654 /* 3655 * Vector Floating-Point Reciprocal Estimate Instruction 3656 * 3657 * Adapted from riscv-v-spec recip.c: 3658 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3659 */ 3660 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3661 float_status *s) 3662 { 3663 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3664 uint64_t exp = extract64(f, frac_size, exp_size); 3665 uint64_t frac = extract64(f, 0, frac_size); 3666 3667 const uint8_t lookup_table[] = { 3668 127, 125, 123, 121, 119, 117, 116, 114, 3669 112, 110, 109, 107, 105, 104, 102, 100, 3670 99, 97, 96, 94, 93, 91, 90, 88, 3671 87, 85, 84, 83, 81, 80, 79, 77, 3672 76, 75, 74, 72, 71, 70, 69, 68, 3673 66, 65, 64, 63, 62, 61, 60, 59, 3674 58, 57, 56, 55, 54, 53, 52, 51, 3675 50, 49, 48, 47, 46, 45, 44, 43, 3676 42, 41, 40, 40, 39, 38, 37, 36, 3677 35, 35, 34, 33, 32, 31, 31, 30, 3678 29, 28, 28, 27, 26, 25, 25, 24, 3679 23, 23, 22, 21, 21, 20, 19, 19, 3680 18, 17, 17, 16, 15, 15, 14, 14, 3681 13, 12, 12, 11, 11, 10, 9, 9, 3682 8, 8, 7, 7, 6, 5, 5, 4, 3683 4, 3, 3, 2, 2, 1, 1, 0 3684 }; 3685 const int precision = 7; 3686 3687 if (exp == 0 && frac != 0) { /* subnormal */ 3688 /* Normalize the subnormal. */ 3689 while (extract64(frac, frac_size - 1, 1) == 0) { 3690 exp--; 3691 frac <<= 1; 3692 } 3693 3694 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3695 3696 if (exp != 0 && exp != UINT64_MAX) { 3697 /* 3698 * Overflow to inf or max value of same sign, 3699 * depending on sign and rounding mode. 3700 */ 3701 s->float_exception_flags |= (float_flag_inexact | 3702 float_flag_overflow); 3703 3704 if ((s->float_rounding_mode == float_round_to_zero) || 3705 ((s->float_rounding_mode == float_round_down) && !sign) || 3706 ((s->float_rounding_mode == float_round_up) && sign)) { 3707 /* Return greatest/negative finite value. */ 3708 return (sign << (exp_size + frac_size)) | 3709 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3710 } else { 3711 /* Return +-inf. */ 3712 return (sign << (exp_size + frac_size)) | 3713 MAKE_64BIT_MASK(frac_size, exp_size); 3714 } 3715 } 3716 } 3717 3718 int idx = frac >> (frac_size - precision); 3719 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3720 (frac_size - precision); 3721 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3722 3723 if (out_exp == 0 || out_exp == UINT64_MAX) { 3724 /* 3725 * The result is subnormal, but don't raise the underflow exception, 3726 * because there's no additional loss of precision. 3727 */ 3728 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3729 if (out_exp == UINT64_MAX) { 3730 out_frac >>= 1; 3731 out_exp = 0; 3732 } 3733 } 3734 3735 uint64_t val = 0; 3736 val = deposit64(val, 0, frac_size, out_frac); 3737 val = deposit64(val, frac_size, exp_size, out_exp); 3738 val = deposit64(val, frac_size + exp_size, 1, sign); 3739 return val; 3740 } 3741 3742 static float16 frec7_h(float16 f, float_status *s) 3743 { 3744 int exp_size = 5, frac_size = 10; 3745 bool sign = float16_is_neg(f); 3746 3747 /* frec7(+-inf) = +-0 */ 3748 if (float16_is_infinity(f)) { 3749 return float16_set_sign(float16_zero, sign); 3750 } 3751 3752 /* frec7(+-0) = +-inf */ 3753 if (float16_is_zero(f)) { 3754 s->float_exception_flags |= float_flag_divbyzero; 3755 return float16_set_sign(float16_infinity, sign); 3756 } 3757 3758 /* frec7(sNaN) = canonical NaN */ 3759 if (float16_is_signaling_nan(f, s)) { 3760 s->float_exception_flags |= float_flag_invalid; 3761 return float16_default_nan(s); 3762 } 3763 3764 /* frec7(qNaN) = canonical NaN */ 3765 if (float16_is_quiet_nan(f, s)) { 3766 return float16_default_nan(s); 3767 } 3768 3769 /* +-normal, +-subnormal */ 3770 uint64_t val = frec7(f, exp_size, frac_size, s); 3771 return make_float16(val); 3772 } 3773 3774 static float32 frec7_s(float32 f, float_status *s) 3775 { 3776 int exp_size = 8, frac_size = 23; 3777 bool sign = float32_is_neg(f); 3778 3779 /* frec7(+-inf) = +-0 */ 3780 if (float32_is_infinity(f)) { 3781 return float32_set_sign(float32_zero, sign); 3782 } 3783 3784 /* frec7(+-0) = +-inf */ 3785 if (float32_is_zero(f)) { 3786 s->float_exception_flags |= float_flag_divbyzero; 3787 return float32_set_sign(float32_infinity, sign); 3788 } 3789 3790 /* frec7(sNaN) = canonical NaN */ 3791 if (float32_is_signaling_nan(f, s)) { 3792 s->float_exception_flags |= float_flag_invalid; 3793 return float32_default_nan(s); 3794 } 3795 3796 /* frec7(qNaN) = canonical NaN */ 3797 if (float32_is_quiet_nan(f, s)) { 3798 return float32_default_nan(s); 3799 } 3800 3801 /* +-normal, +-subnormal */ 3802 uint64_t val = frec7(f, exp_size, frac_size, s); 3803 return make_float32(val); 3804 } 3805 3806 static float64 frec7_d(float64 f, float_status *s) 3807 { 3808 int exp_size = 11, frac_size = 52; 3809 bool sign = float64_is_neg(f); 3810 3811 /* frec7(+-inf) = +-0 */ 3812 if (float64_is_infinity(f)) { 3813 return float64_set_sign(float64_zero, sign); 3814 } 3815 3816 /* frec7(+-0) = +-inf */ 3817 if (float64_is_zero(f)) { 3818 s->float_exception_flags |= float_flag_divbyzero; 3819 return float64_set_sign(float64_infinity, sign); 3820 } 3821 3822 /* frec7(sNaN) = canonical NaN */ 3823 if (float64_is_signaling_nan(f, s)) { 3824 s->float_exception_flags |= float_flag_invalid; 3825 return float64_default_nan(s); 3826 } 3827 3828 /* frec7(qNaN) = canonical NaN */ 3829 if (float64_is_quiet_nan(f, s)) { 3830 return float64_default_nan(s); 3831 } 3832 3833 /* +-normal, +-subnormal */ 3834 uint64_t val = frec7(f, exp_size, frac_size, s); 3835 return make_float64(val); 3836 } 3837 3838 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3839 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3840 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3841 GEN_VEXT_V_ENV(vfrec7_v_h) 3842 GEN_VEXT_V_ENV(vfrec7_v_w) 3843 GEN_VEXT_V_ENV(vfrec7_v_d) 3844 3845 /* Vector Floating-Point MIN/MAX Instructions */ 3846 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3847 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3848 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3849 GEN_VEXT_VV_ENV(vfmin_vv_h) 3850 GEN_VEXT_VV_ENV(vfmin_vv_w) 3851 GEN_VEXT_VV_ENV(vfmin_vv_d) 3852 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3853 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3854 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3855 GEN_VEXT_VF(vfmin_vf_h) 3856 GEN_VEXT_VF(vfmin_vf_w) 3857 GEN_VEXT_VF(vfmin_vf_d) 3858 3859 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3860 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3861 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3862 GEN_VEXT_VV_ENV(vfmax_vv_h) 3863 GEN_VEXT_VV_ENV(vfmax_vv_w) 3864 GEN_VEXT_VV_ENV(vfmax_vv_d) 3865 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3866 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3867 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3868 GEN_VEXT_VF(vfmax_vf_h) 3869 GEN_VEXT_VF(vfmax_vf_w) 3870 GEN_VEXT_VF(vfmax_vf_d) 3871 3872 /* Vector Floating-Point Sign-Injection Instructions */ 3873 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3874 { 3875 return deposit64(b, 0, 15, a); 3876 } 3877 3878 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3879 { 3880 return deposit64(b, 0, 31, a); 3881 } 3882 3883 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3884 { 3885 return deposit64(b, 0, 63, a); 3886 } 3887 3888 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3889 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3890 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3891 GEN_VEXT_VV_ENV(vfsgnj_vv_h) 3892 GEN_VEXT_VV_ENV(vfsgnj_vv_w) 3893 GEN_VEXT_VV_ENV(vfsgnj_vv_d) 3894 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3895 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3896 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3897 GEN_VEXT_VF(vfsgnj_vf_h) 3898 GEN_VEXT_VF(vfsgnj_vf_w) 3899 GEN_VEXT_VF(vfsgnj_vf_d) 3900 3901 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3902 { 3903 return deposit64(~b, 0, 15, a); 3904 } 3905 3906 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3907 { 3908 return deposit64(~b, 0, 31, a); 3909 } 3910 3911 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3912 { 3913 return deposit64(~b, 0, 63, a); 3914 } 3915 3916 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3917 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3918 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3919 GEN_VEXT_VV_ENV(vfsgnjn_vv_h) 3920 GEN_VEXT_VV_ENV(vfsgnjn_vv_w) 3921 GEN_VEXT_VV_ENV(vfsgnjn_vv_d) 3922 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3923 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3924 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3925 GEN_VEXT_VF(vfsgnjn_vf_h) 3926 GEN_VEXT_VF(vfsgnjn_vf_w) 3927 GEN_VEXT_VF(vfsgnjn_vf_d) 3928 3929 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3930 { 3931 return deposit64(b ^ a, 0, 15, a); 3932 } 3933 3934 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3935 { 3936 return deposit64(b ^ a, 0, 31, a); 3937 } 3938 3939 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3940 { 3941 return deposit64(b ^ a, 0, 63, a); 3942 } 3943 3944 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3945 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3946 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3947 GEN_VEXT_VV_ENV(vfsgnjx_vv_h) 3948 GEN_VEXT_VV_ENV(vfsgnjx_vv_w) 3949 GEN_VEXT_VV_ENV(vfsgnjx_vv_d) 3950 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3951 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3952 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3953 GEN_VEXT_VF(vfsgnjx_vf_h) 3954 GEN_VEXT_VF(vfsgnjx_vf_w) 3955 GEN_VEXT_VF(vfsgnjx_vf_d) 3956 3957 /* Vector Floating-Point Compare Instructions */ 3958 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3959 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3960 CPURISCVState *env, uint32_t desc) \ 3961 { \ 3962 uint32_t vm = vext_vm(desc); \ 3963 uint32_t vl = env->vl; \ 3964 uint32_t i; \ 3965 \ 3966 for (i = env->vstart; i < vl; i++) { \ 3967 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3968 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3969 if (!vm && !vext_elem_mask(v0, i)) { \ 3970 continue; \ 3971 } \ 3972 vext_set_elem_mask(vd, i, \ 3973 DO_OP(s2, s1, &env->fp_status)); \ 3974 } \ 3975 env->vstart = 0; \ 3976 } 3977 3978 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3979 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3980 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3981 3982 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3983 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3984 CPURISCVState *env, uint32_t desc) \ 3985 { \ 3986 uint32_t vm = vext_vm(desc); \ 3987 uint32_t vl = env->vl; \ 3988 uint32_t i; \ 3989 \ 3990 for (i = env->vstart; i < vl; i++) { \ 3991 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3992 if (!vm && !vext_elem_mask(v0, i)) { \ 3993 continue; \ 3994 } \ 3995 vext_set_elem_mask(vd, i, \ 3996 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3997 } \ 3998 env->vstart = 0; \ 3999 } 4000 4001 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4002 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4003 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4004 4005 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4006 { 4007 FloatRelation compare = float16_compare_quiet(a, b, s); 4008 return compare != float_relation_equal; 4009 } 4010 4011 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4012 { 4013 FloatRelation compare = float32_compare_quiet(a, b, s); 4014 return compare != float_relation_equal; 4015 } 4016 4017 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4018 { 4019 FloatRelation compare = float64_compare_quiet(a, b, s); 4020 return compare != float_relation_equal; 4021 } 4022 4023 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4024 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4025 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4026 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4027 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4028 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4029 4030 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4031 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4032 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4033 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4034 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4035 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4036 4037 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4038 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4039 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4040 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4041 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4042 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4043 4044 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4045 { 4046 FloatRelation compare = float16_compare(a, b, s); 4047 return compare == float_relation_greater; 4048 } 4049 4050 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4051 { 4052 FloatRelation compare = float32_compare(a, b, s); 4053 return compare == float_relation_greater; 4054 } 4055 4056 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4057 { 4058 FloatRelation compare = float64_compare(a, b, s); 4059 return compare == float_relation_greater; 4060 } 4061 4062 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4063 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4064 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4065 4066 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4067 { 4068 FloatRelation compare = float16_compare(a, b, s); 4069 return compare == float_relation_greater || 4070 compare == float_relation_equal; 4071 } 4072 4073 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4074 { 4075 FloatRelation compare = float32_compare(a, b, s); 4076 return compare == float_relation_greater || 4077 compare == float_relation_equal; 4078 } 4079 4080 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4081 { 4082 FloatRelation compare = float64_compare(a, b, s); 4083 return compare == float_relation_greater || 4084 compare == float_relation_equal; 4085 } 4086 4087 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4088 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4089 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4090 4091 /* Vector Floating-Point Classify Instruction */ 4092 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4093 static void do_##NAME(void *vd, void *vs2, int i) \ 4094 { \ 4095 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4096 *((TD *)vd + HD(i)) = OP(s2); \ 4097 } 4098 4099 #define GEN_VEXT_V(NAME) \ 4100 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4101 CPURISCVState *env, uint32_t desc) \ 4102 { \ 4103 uint32_t vm = vext_vm(desc); \ 4104 uint32_t vl = env->vl; \ 4105 uint32_t i; \ 4106 \ 4107 for (i = env->vstart; i < vl; i++) { \ 4108 if (!vm && !vext_elem_mask(v0, i)) { \ 4109 continue; \ 4110 } \ 4111 do_##NAME(vd, vs2, i); \ 4112 } \ 4113 env->vstart = 0; \ 4114 } 4115 4116 target_ulong fclass_h(uint64_t frs1) 4117 { 4118 float16 f = frs1; 4119 bool sign = float16_is_neg(f); 4120 4121 if (float16_is_infinity(f)) { 4122 return sign ? 1 << 0 : 1 << 7; 4123 } else if (float16_is_zero(f)) { 4124 return sign ? 1 << 3 : 1 << 4; 4125 } else if (float16_is_zero_or_denormal(f)) { 4126 return sign ? 1 << 2 : 1 << 5; 4127 } else if (float16_is_any_nan(f)) { 4128 float_status s = { }; /* for snan_bit_is_one */ 4129 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4130 } else { 4131 return sign ? 1 << 1 : 1 << 6; 4132 } 4133 } 4134 4135 target_ulong fclass_s(uint64_t frs1) 4136 { 4137 float32 f = frs1; 4138 bool sign = float32_is_neg(f); 4139 4140 if (float32_is_infinity(f)) { 4141 return sign ? 1 << 0 : 1 << 7; 4142 } else if (float32_is_zero(f)) { 4143 return sign ? 1 << 3 : 1 << 4; 4144 } else if (float32_is_zero_or_denormal(f)) { 4145 return sign ? 1 << 2 : 1 << 5; 4146 } else if (float32_is_any_nan(f)) { 4147 float_status s = { }; /* for snan_bit_is_one */ 4148 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4149 } else { 4150 return sign ? 1 << 1 : 1 << 6; 4151 } 4152 } 4153 4154 target_ulong fclass_d(uint64_t frs1) 4155 { 4156 float64 f = frs1; 4157 bool sign = float64_is_neg(f); 4158 4159 if (float64_is_infinity(f)) { 4160 return sign ? 1 << 0 : 1 << 7; 4161 } else if (float64_is_zero(f)) { 4162 return sign ? 1 << 3 : 1 << 4; 4163 } else if (float64_is_zero_or_denormal(f)) { 4164 return sign ? 1 << 2 : 1 << 5; 4165 } else if (float64_is_any_nan(f)) { 4166 float_status s = { }; /* for snan_bit_is_one */ 4167 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4168 } else { 4169 return sign ? 1 << 1 : 1 << 6; 4170 } 4171 } 4172 4173 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4174 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4175 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4176 GEN_VEXT_V(vfclass_v_h) 4177 GEN_VEXT_V(vfclass_v_w) 4178 GEN_VEXT_V(vfclass_v_d) 4179 4180 /* Vector Floating-Point Merge Instruction */ 4181 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4182 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4183 CPURISCVState *env, uint32_t desc) \ 4184 { \ 4185 uint32_t vm = vext_vm(desc); \ 4186 uint32_t vl = env->vl; \ 4187 uint32_t i; \ 4188 \ 4189 for (i = env->vstart; i < vl; i++) { \ 4190 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4191 *((ETYPE *)vd + H(i)) \ 4192 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4193 } \ 4194 env->vstart = 0; \ 4195 } 4196 4197 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4198 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4199 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4200 4201 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4202 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4203 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4204 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4205 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4206 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h) 4207 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w) 4208 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d) 4209 4210 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4211 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4212 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4213 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4214 GEN_VEXT_V_ENV(vfcvt_x_f_v_h) 4215 GEN_VEXT_V_ENV(vfcvt_x_f_v_w) 4216 GEN_VEXT_V_ENV(vfcvt_x_f_v_d) 4217 4218 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4219 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4220 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4221 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4222 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h) 4223 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w) 4224 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d) 4225 4226 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4227 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4228 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4229 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4230 GEN_VEXT_V_ENV(vfcvt_f_x_v_h) 4231 GEN_VEXT_V_ENV(vfcvt_f_x_v_w) 4232 GEN_VEXT_V_ENV(vfcvt_f_x_v_d) 4233 4234 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4235 /* (TD, T2, TX2) */ 4236 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4237 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4238 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4239 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4240 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4241 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4242 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h) 4243 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w) 4244 4245 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4246 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4247 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4248 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h) 4249 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w) 4250 4251 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4252 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4253 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4254 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4255 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b) 4256 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h) 4257 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w) 4258 4259 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4260 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4261 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4262 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4263 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b) 4264 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h) 4265 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w) 4266 4267 /* 4268 * vfwcvt.f.f.v vd, vs2, vm 4269 * Convert single-width float to double-width float. 4270 */ 4271 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4272 { 4273 return float16_to_float32(a, true, s); 4274 } 4275 4276 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4277 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4278 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h) 4279 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w) 4280 4281 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4282 /* (TD, T2, TX2) */ 4283 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4284 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4285 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4286 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4287 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4288 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4289 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4290 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b) 4291 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h) 4292 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w) 4293 4294 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4295 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4296 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4297 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4298 GEN_VEXT_V_ENV(vfncvt_x_f_w_b) 4299 GEN_VEXT_V_ENV(vfncvt_x_f_w_h) 4300 GEN_VEXT_V_ENV(vfncvt_x_f_w_w) 4301 4302 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4303 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4304 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4305 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h) 4306 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w) 4307 4308 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4309 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4310 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4311 GEN_VEXT_V_ENV(vfncvt_f_x_w_h) 4312 GEN_VEXT_V_ENV(vfncvt_f_x_w_w) 4313 4314 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4315 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4316 { 4317 return float32_to_float16(a, true, s); 4318 } 4319 4320 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4321 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4322 GEN_VEXT_V_ENV(vfncvt_f_f_w_h) 4323 GEN_VEXT_V_ENV(vfncvt_f_f_w_w) 4324 4325 /* 4326 *** Vector Reduction Operations 4327 */ 4328 /* Vector Single-Width Integer Reduction Instructions */ 4329 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4330 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4331 void *vs2, CPURISCVState *env, uint32_t desc) \ 4332 { \ 4333 uint32_t vm = vext_vm(desc); \ 4334 uint32_t vl = env->vl; \ 4335 uint32_t i; \ 4336 TD s1 = *((TD *)vs1 + HD(0)); \ 4337 \ 4338 for (i = env->vstart; i < vl; i++) { \ 4339 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4340 if (!vm && !vext_elem_mask(v0, i)) { \ 4341 continue; \ 4342 } \ 4343 s1 = OP(s1, (TD)s2); \ 4344 } \ 4345 *((TD *)vd + HD(0)) = s1; \ 4346 env->vstart = 0; \ 4347 } 4348 4349 /* vd[0] = sum(vs1[0], vs2[*]) */ 4350 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4351 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4352 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4353 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4354 4355 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4356 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4357 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4358 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4359 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4360 4361 /* vd[0] = max(vs1[0], vs2[*]) */ 4362 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4363 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4364 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4365 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4366 4367 /* vd[0] = minu(vs1[0], vs2[*]) */ 4368 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4369 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4370 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4371 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4372 4373 /* vd[0] = min(vs1[0], vs2[*]) */ 4374 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4375 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4376 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4377 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4378 4379 /* vd[0] = and(vs1[0], vs2[*]) */ 4380 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4381 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4382 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4383 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4384 4385 /* vd[0] = or(vs1[0], vs2[*]) */ 4386 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4387 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4388 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4389 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4390 4391 /* vd[0] = xor(vs1[0], vs2[*]) */ 4392 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4393 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4394 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4395 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4396 4397 /* Vector Widening Integer Reduction Instructions */ 4398 /* signed sum reduction into double-width accumulator */ 4399 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4400 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4401 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4402 4403 /* Unsigned sum reduction into double-width accumulator */ 4404 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4405 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4406 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4407 4408 /* Vector Single-Width Floating-Point Reduction Instructions */ 4409 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4410 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4411 void *vs2, CPURISCVState *env, \ 4412 uint32_t desc) \ 4413 { \ 4414 uint32_t vm = vext_vm(desc); \ 4415 uint32_t vl = env->vl; \ 4416 uint32_t i; \ 4417 TD s1 = *((TD *)vs1 + HD(0)); \ 4418 \ 4419 for (i = env->vstart; i < vl; i++) { \ 4420 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4421 if (!vm && !vext_elem_mask(v0, i)) { \ 4422 continue; \ 4423 } \ 4424 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4425 } \ 4426 *((TD *)vd + HD(0)) = s1; \ 4427 env->vstart = 0; \ 4428 } 4429 4430 /* Unordered sum */ 4431 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4432 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4433 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4434 4435 /* Maximum value */ 4436 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4437 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4438 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4439 4440 /* Minimum value */ 4441 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4442 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4443 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4444 4445 /* Vector Widening Floating-Point Reduction Instructions */ 4446 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4447 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4448 void *vs2, CPURISCVState *env, uint32_t desc) 4449 { 4450 uint32_t vm = vext_vm(desc); 4451 uint32_t vl = env->vl; 4452 uint32_t i; 4453 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4454 4455 for (i = env->vstart; i < vl; i++) { 4456 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4457 if (!vm && !vext_elem_mask(v0, i)) { 4458 continue; 4459 } 4460 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4461 &env->fp_status); 4462 } 4463 *((uint32_t *)vd + H4(0)) = s1; 4464 env->vstart = 0; 4465 } 4466 4467 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4468 void *vs2, CPURISCVState *env, uint32_t desc) 4469 { 4470 uint32_t vm = vext_vm(desc); 4471 uint32_t vl = env->vl; 4472 uint32_t i; 4473 uint64_t s1 = *((uint64_t *)vs1); 4474 4475 for (i = env->vstart; i < vl; i++) { 4476 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4477 if (!vm && !vext_elem_mask(v0, i)) { 4478 continue; 4479 } 4480 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4481 &env->fp_status); 4482 } 4483 *((uint64_t *)vd) = s1; 4484 env->vstart = 0; 4485 } 4486 4487 /* 4488 *** Vector Mask Operations 4489 */ 4490 /* Vector Mask-Register Logical Instructions */ 4491 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4492 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4493 void *vs2, CPURISCVState *env, \ 4494 uint32_t desc) \ 4495 { \ 4496 uint32_t vl = env->vl; \ 4497 uint32_t i; \ 4498 int a, b; \ 4499 \ 4500 for (i = env->vstart; i < vl; i++) { \ 4501 a = vext_elem_mask(vs1, i); \ 4502 b = vext_elem_mask(vs2, i); \ 4503 vext_set_elem_mask(vd, i, OP(b, a)); \ 4504 } \ 4505 env->vstart = 0; \ 4506 } 4507 4508 #define DO_NAND(N, M) (!(N & M)) 4509 #define DO_ANDNOT(N, M) (N & !M) 4510 #define DO_NOR(N, M) (!(N | M)) 4511 #define DO_ORNOT(N, M) (N | !M) 4512 #define DO_XNOR(N, M) (!(N ^ M)) 4513 4514 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4515 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4516 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4517 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4518 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4519 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4520 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4521 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4522 4523 /* Vector count population in mask vcpop */ 4524 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4525 uint32_t desc) 4526 { 4527 target_ulong cnt = 0; 4528 uint32_t vm = vext_vm(desc); 4529 uint32_t vl = env->vl; 4530 int i; 4531 4532 for (i = env->vstart; i < vl; i++) { 4533 if (vm || vext_elem_mask(v0, i)) { 4534 if (vext_elem_mask(vs2, i)) { 4535 cnt++; 4536 } 4537 } 4538 } 4539 env->vstart = 0; 4540 return cnt; 4541 } 4542 4543 /* vfirst find-first-set mask bit*/ 4544 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4545 uint32_t desc) 4546 { 4547 uint32_t vm = vext_vm(desc); 4548 uint32_t vl = env->vl; 4549 int i; 4550 4551 for (i = env->vstart; i < vl; i++) { 4552 if (vm || vext_elem_mask(v0, i)) { 4553 if (vext_elem_mask(vs2, i)) { 4554 return i; 4555 } 4556 } 4557 } 4558 env->vstart = 0; 4559 return -1LL; 4560 } 4561 4562 enum set_mask_type { 4563 ONLY_FIRST = 1, 4564 INCLUDE_FIRST, 4565 BEFORE_FIRST, 4566 }; 4567 4568 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4569 uint32_t desc, enum set_mask_type type) 4570 { 4571 uint32_t vm = vext_vm(desc); 4572 uint32_t vl = env->vl; 4573 int i; 4574 bool first_mask_bit = false; 4575 4576 for (i = env->vstart; i < vl; i++) { 4577 if (!vm && !vext_elem_mask(v0, i)) { 4578 continue; 4579 } 4580 /* write a zero to all following active elements */ 4581 if (first_mask_bit) { 4582 vext_set_elem_mask(vd, i, 0); 4583 continue; 4584 } 4585 if (vext_elem_mask(vs2, i)) { 4586 first_mask_bit = true; 4587 if (type == BEFORE_FIRST) { 4588 vext_set_elem_mask(vd, i, 0); 4589 } else { 4590 vext_set_elem_mask(vd, i, 1); 4591 } 4592 } else { 4593 if (type == ONLY_FIRST) { 4594 vext_set_elem_mask(vd, i, 0); 4595 } else { 4596 vext_set_elem_mask(vd, i, 1); 4597 } 4598 } 4599 } 4600 env->vstart = 0; 4601 } 4602 4603 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4604 uint32_t desc) 4605 { 4606 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4607 } 4608 4609 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4610 uint32_t desc) 4611 { 4612 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4613 } 4614 4615 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4616 uint32_t desc) 4617 { 4618 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4619 } 4620 4621 /* Vector Iota Instruction */ 4622 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4623 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4624 uint32_t desc) \ 4625 { \ 4626 uint32_t vm = vext_vm(desc); \ 4627 uint32_t vl = env->vl; \ 4628 uint32_t sum = 0; \ 4629 int i; \ 4630 \ 4631 for (i = env->vstart; i < vl; i++) { \ 4632 if (!vm && !vext_elem_mask(v0, i)) { \ 4633 continue; \ 4634 } \ 4635 *((ETYPE *)vd + H(i)) = sum; \ 4636 if (vext_elem_mask(vs2, i)) { \ 4637 sum++; \ 4638 } \ 4639 } \ 4640 env->vstart = 0; \ 4641 } 4642 4643 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4644 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4645 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4646 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4647 4648 /* Vector Element Index Instruction */ 4649 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4650 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4651 { \ 4652 uint32_t vm = vext_vm(desc); \ 4653 uint32_t vl = env->vl; \ 4654 int i; \ 4655 \ 4656 for (i = env->vstart; i < vl; i++) { \ 4657 if (!vm && !vext_elem_mask(v0, i)) { \ 4658 continue; \ 4659 } \ 4660 *((ETYPE *)vd + H(i)) = i; \ 4661 } \ 4662 env->vstart = 0; \ 4663 } 4664 4665 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4666 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4667 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4668 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4669 4670 /* 4671 *** Vector Permutation Instructions 4672 */ 4673 4674 /* Vector Slide Instructions */ 4675 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4676 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4677 CPURISCVState *env, uint32_t desc) \ 4678 { \ 4679 uint32_t vm = vext_vm(desc); \ 4680 uint32_t vl = env->vl; \ 4681 target_ulong offset = s1, i_min, i; \ 4682 \ 4683 i_min = MAX(env->vstart, offset); \ 4684 for (i = i_min; i < vl; i++) { \ 4685 if (!vm && !vext_elem_mask(v0, i)) { \ 4686 continue; \ 4687 } \ 4688 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4689 } \ 4690 } 4691 4692 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4693 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4694 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4695 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4696 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4697 4698 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4699 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4700 CPURISCVState *env, uint32_t desc) \ 4701 { \ 4702 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4703 uint32_t vm = vext_vm(desc); \ 4704 uint32_t vl = env->vl; \ 4705 target_ulong i_max, i; \ 4706 \ 4707 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4708 for (i = env->vstart; i < i_max; ++i) { \ 4709 if (vm || vext_elem_mask(v0, i)) { \ 4710 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4711 } \ 4712 } \ 4713 \ 4714 for (i = i_max; i < vl; ++i) { \ 4715 if (vm || vext_elem_mask(v0, i)) { \ 4716 *((ETYPE *)vd + H(i)) = 0; \ 4717 } \ 4718 } \ 4719 \ 4720 env->vstart = 0; \ 4721 } 4722 4723 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4724 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4725 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4726 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4727 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4728 4729 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4730 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4731 void *vs2, CPURISCVState *env, uint32_t desc) \ 4732 { \ 4733 typedef uint##BITWIDTH##_t ETYPE; \ 4734 uint32_t vm = vext_vm(desc); \ 4735 uint32_t vl = env->vl; \ 4736 uint32_t i; \ 4737 \ 4738 for (i = env->vstart; i < vl; i++) { \ 4739 if (!vm && !vext_elem_mask(v0, i)) { \ 4740 continue; \ 4741 } \ 4742 if (i == 0) { \ 4743 *((ETYPE *)vd + H(i)) = s1; \ 4744 } else { \ 4745 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4746 } \ 4747 } \ 4748 env->vstart = 0; \ 4749 } 4750 4751 GEN_VEXT_VSLIE1UP(8, H1) 4752 GEN_VEXT_VSLIE1UP(16, H2) 4753 GEN_VEXT_VSLIE1UP(32, H4) 4754 GEN_VEXT_VSLIE1UP(64, H8) 4755 4756 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4757 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4758 CPURISCVState *env, uint32_t desc) \ 4759 { \ 4760 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4761 } 4762 4763 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4764 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4765 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4766 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4767 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4768 4769 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4770 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \ 4771 void *vs2, CPURISCVState *env, uint32_t desc) \ 4772 { \ 4773 typedef uint##BITWIDTH##_t ETYPE; \ 4774 uint32_t vm = vext_vm(desc); \ 4775 uint32_t vl = env->vl; \ 4776 uint32_t i; \ 4777 \ 4778 for (i = env->vstart; i < vl; i++) { \ 4779 if (!vm && !vext_elem_mask(v0, i)) { \ 4780 continue; \ 4781 } \ 4782 if (i == vl - 1) { \ 4783 *((ETYPE *)vd + H(i)) = s1; \ 4784 } else { \ 4785 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4786 } \ 4787 } \ 4788 env->vstart = 0; \ 4789 } 4790 4791 GEN_VEXT_VSLIDE1DOWN(8, H1) 4792 GEN_VEXT_VSLIDE1DOWN(16, H2) 4793 GEN_VEXT_VSLIDE1DOWN(32, H4) 4794 GEN_VEXT_VSLIDE1DOWN(64, H8) 4795 4796 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4797 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4798 CPURISCVState *env, uint32_t desc) \ 4799 { \ 4800 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4801 } 4802 4803 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4804 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4805 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4806 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4807 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4808 4809 /* Vector Floating-Point Slide Instructions */ 4810 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4811 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4812 CPURISCVState *env, uint32_t desc) \ 4813 { \ 4814 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4815 } 4816 4817 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4818 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4819 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4820 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4821 4822 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4823 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4824 CPURISCVState *env, uint32_t desc) \ 4825 { \ 4826 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4827 } 4828 4829 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4830 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4831 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4832 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4833 4834 /* Vector Register Gather Instruction */ 4835 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4836 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4837 CPURISCVState *env, uint32_t desc) \ 4838 { \ 4839 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4840 uint32_t vm = vext_vm(desc); \ 4841 uint32_t vl = env->vl; \ 4842 uint64_t index; \ 4843 uint32_t i; \ 4844 \ 4845 for (i = env->vstart; i < vl; i++) { \ 4846 if (!vm && !vext_elem_mask(v0, i)) { \ 4847 continue; \ 4848 } \ 4849 index = *((TS1 *)vs1 + HS1(i)); \ 4850 if (index >= vlmax) { \ 4851 *((TS2 *)vd + HS2(i)) = 0; \ 4852 } else { \ 4853 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4854 } \ 4855 } \ 4856 env->vstart = 0; \ 4857 } 4858 4859 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4860 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4861 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4862 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4863 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4864 4865 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4866 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4867 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4868 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4869 4870 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4871 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4872 CPURISCVState *env, uint32_t desc) \ 4873 { \ 4874 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4875 uint32_t vm = vext_vm(desc); \ 4876 uint32_t vl = env->vl; \ 4877 uint64_t index = s1; \ 4878 uint32_t i; \ 4879 \ 4880 for (i = env->vstart; i < vl; i++) { \ 4881 if (!vm && !vext_elem_mask(v0, i)) { \ 4882 continue; \ 4883 } \ 4884 if (index >= vlmax) { \ 4885 *((ETYPE *)vd + H(i)) = 0; \ 4886 } else { \ 4887 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4888 } \ 4889 } \ 4890 env->vstart = 0; \ 4891 } 4892 4893 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4894 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4895 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4896 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4897 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4898 4899 /* Vector Compress Instruction */ 4900 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4901 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4902 CPURISCVState *env, uint32_t desc) \ 4903 { \ 4904 uint32_t vl = env->vl; \ 4905 uint32_t num = 0, i; \ 4906 \ 4907 for (i = env->vstart; i < vl; i++) { \ 4908 if (!vext_elem_mask(vs1, i)) { \ 4909 continue; \ 4910 } \ 4911 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4912 num++; \ 4913 } \ 4914 env->vstart = 0; \ 4915 } 4916 4917 /* Compress into vd elements of vs2 where vs1 is enabled */ 4918 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4919 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4920 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4921 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4922 4923 /* Vector Whole Register Move */ 4924 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 4925 { 4926 /* EEW = SEW */ 4927 uint32_t maxsz = simd_maxsz(desc); 4928 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 4929 uint32_t startb = env->vstart * sewb; 4930 uint32_t i = startb; 4931 4932 memcpy((uint8_t *)vd + H1(i), 4933 (uint8_t *)vs2 + H1(i), 4934 maxsz - startb); 4935 4936 env->vstart = 0; 4937 } 4938 4939 /* Vector Integer Extension */ 4940 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4941 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4942 CPURISCVState *env, uint32_t desc) \ 4943 { \ 4944 uint32_t vl = env->vl; \ 4945 uint32_t vm = vext_vm(desc); \ 4946 uint32_t i; \ 4947 \ 4948 for (i = env->vstart; i < vl; i++) { \ 4949 if (!vm && !vext_elem_mask(v0, i)) { \ 4950 continue; \ 4951 } \ 4952 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4953 } \ 4954 env->vstart = 0; \ 4955 } 4956 4957 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4958 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4959 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4960 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4961 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4962 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4963 4964 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4965 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4966 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4967 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4968 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4969 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4970