1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 bool vill = FIELD_EX64(s2, VTYPE, VILL); 40 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 41 42 if (lmul & 4) { 43 /* Fractional LMUL. */ 44 if (lmul == 4 || 45 cpu->cfg.elen >> (8 - lmul) < sew) { 46 vill = true; 47 } 48 } 49 50 if ((sew > cpu->cfg.elen) 51 || vill 52 || (ediv != 0) 53 || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vill = 1; 56 env->vtype = 0; 57 env->vl = 0; 58 env->vstart = 0; 59 return 0; 60 } 61 62 vlmax = vext_get_vlmax(cpu, s2); 63 if (s1 <= vlmax) { 64 vl = s1; 65 } else { 66 vl = vlmax; 67 } 68 env->vl = vl; 69 env->vtype = s2; 70 env->vstart = 0; 71 return vl; 72 } 73 74 /* 75 * Note that vector data is stored in host-endian 64-bit chunks, 76 * so addressing units smaller than that needs a host-endian fixup. 77 */ 78 #ifdef HOST_WORDS_BIGENDIAN 79 #define H1(x) ((x) ^ 7) 80 #define H1_2(x) ((x) ^ 6) 81 #define H1_4(x) ((x) ^ 4) 82 #define H2(x) ((x) ^ 3) 83 #define H4(x) ((x) ^ 1) 84 #define H8(x) ((x)) 85 #else 86 #define H1(x) (x) 87 #define H1_2(x) (x) 88 #define H1_4(x) (x) 89 #define H2(x) (x) 90 #define H4(x) (x) 91 #define H8(x) (x) 92 #endif 93 94 static inline uint32_t vext_nf(uint32_t desc) 95 { 96 return FIELD_EX32(simd_data(desc), VDATA, NF); 97 } 98 99 static inline uint32_t vext_vm(uint32_t desc) 100 { 101 return FIELD_EX32(simd_data(desc), VDATA, VM); 102 } 103 104 /* 105 * Encode LMUL to lmul as following: 106 * LMUL vlmul lmul 107 * 1 000 0 108 * 2 001 1 109 * 4 010 2 110 * 8 011 3 111 * - 100 - 112 * 1/8 101 -3 113 * 1/4 110 -2 114 * 1/2 111 -1 115 */ 116 static inline int32_t vext_lmul(uint32_t desc) 117 { 118 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 119 } 120 121 /* 122 * Get the maximum number of elements can be operated. 123 * 124 * esz: log2 of element size in bytes. 125 */ 126 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 127 { 128 /* 129 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 130 * so vlen in bytes (vlenb) is encoded as maxsz. 131 */ 132 uint32_t vlenb = simd_maxsz(desc); 133 134 /* Return VLMAX */ 135 int scale = vext_lmul(desc) - esz; 136 return scale < 0 ? vlenb >> -scale : vlenb << scale; 137 } 138 139 /* 140 * This function checks watchpoint before real load operation. 141 * 142 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 143 * In user mode, there is no watchpoint support now. 144 * 145 * It will trigger an exception if there is no mapping in TLB 146 * and page table walk can't fill the TLB entry. Then the guest 147 * software can return here after process the exception or never return. 148 */ 149 static void probe_pages(CPURISCVState *env, target_ulong addr, 150 target_ulong len, uintptr_t ra, 151 MMUAccessType access_type) 152 { 153 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 154 target_ulong curlen = MIN(pagelen, len); 155 156 probe_access(env, addr, curlen, access_type, 157 cpu_mmu_index(env, false), ra); 158 if (len > curlen) { 159 addr += curlen; 160 curlen = len - curlen; 161 probe_access(env, addr, curlen, access_type, 162 cpu_mmu_index(env, false), ra); 163 } 164 } 165 166 static inline void vext_set_elem_mask(void *v0, int index, 167 uint8_t value) 168 { 169 int idx = index / 64; 170 int pos = index % 64; 171 uint64_t old = ((uint64_t *)v0)[idx]; 172 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 173 } 174 175 /* 176 * Earlier designs (pre-0.9) had a varying number of bits 177 * per mask value (MLEN). In the 0.9 design, MLEN=1. 178 * (Section 4.5) 179 */ 180 static inline int vext_elem_mask(void *v0, int index) 181 { 182 int idx = index / 64; 183 int pos = index % 64; 184 return (((uint64_t *)v0)[idx] >> pos) & 1; 185 } 186 187 /* elements operations for load and store */ 188 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 189 uint32_t idx, void *vd, uintptr_t retaddr); 190 191 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 192 static void NAME(CPURISCVState *env, abi_ptr addr, \ 193 uint32_t idx, void *vd, uintptr_t retaddr)\ 194 { \ 195 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 196 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 197 } \ 198 199 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 200 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 201 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 202 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 203 204 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 205 static void NAME(CPURISCVState *env, abi_ptr addr, \ 206 uint32_t idx, void *vd, uintptr_t retaddr)\ 207 { \ 208 ETYPE data = *((ETYPE *)vd + H(idx)); \ 209 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 210 } 211 212 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 213 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 214 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 215 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 216 217 /* 218 *** stride: access vector element from strided memory 219 */ 220 static void 221 vext_ldst_stride(void *vd, void *v0, target_ulong base, 222 target_ulong stride, CPURISCVState *env, 223 uint32_t desc, uint32_t vm, 224 vext_ldst_elem_fn *ldst_elem, 225 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 226 { 227 uint32_t i, k; 228 uint32_t nf = vext_nf(desc); 229 uint32_t max_elems = vext_max_elems(desc, esz); 230 231 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 232 if (!vm && !vext_elem_mask(v0, i)) { 233 continue; 234 } 235 236 k = 0; 237 while (k < nf) { 238 target_ulong addr = base + stride * i + (k << esz); 239 ldst_elem(env, addr, i + k * max_elems, vd, ra); 240 k++; 241 } 242 } 243 env->vstart = 0; 244 } 245 246 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 247 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 248 target_ulong stride, CPURISCVState *env, \ 249 uint32_t desc) \ 250 { \ 251 uint32_t vm = vext_vm(desc); \ 252 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 253 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 254 } 255 256 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 257 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 258 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 259 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 260 261 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 262 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 263 target_ulong stride, CPURISCVState *env, \ 264 uint32_t desc) \ 265 { \ 266 uint32_t vm = vext_vm(desc); \ 267 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 268 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 269 } 270 271 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 272 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 273 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 274 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 275 276 /* 277 *** unit-stride: access elements stored contiguously in memory 278 */ 279 280 /* unmasked unit-stride load and store operation*/ 281 static void 282 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 283 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 284 uintptr_t ra, MMUAccessType access_type) 285 { 286 uint32_t i, k; 287 uint32_t nf = vext_nf(desc); 288 uint32_t max_elems = vext_max_elems(desc, esz); 289 290 /* load bytes from guest memory */ 291 for (i = env->vstart; i < evl; i++, env->vstart++) { 292 k = 0; 293 while (k < nf) { 294 target_ulong addr = base + ((i * nf + k) << esz); 295 ldst_elem(env, addr, i + k * max_elems, vd, ra); 296 k++; 297 } 298 } 299 env->vstart = 0; 300 } 301 302 /* 303 * masked unit-stride load and store operation will be a special case of stride, 304 * stride = NF * sizeof (MTYPE) 305 */ 306 307 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 308 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 309 CPURISCVState *env, uint32_t desc) \ 310 { \ 311 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 312 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 313 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 314 } \ 315 \ 316 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 317 CPURISCVState *env, uint32_t desc) \ 318 { \ 319 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 320 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ 321 } 322 323 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 324 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 325 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 326 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 327 328 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 329 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 330 CPURISCVState *env, uint32_t desc) \ 331 { \ 332 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 333 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 334 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 335 } \ 336 \ 337 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 338 CPURISCVState *env, uint32_t desc) \ 339 { \ 340 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 341 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ 342 } 343 344 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 345 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 346 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 347 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 348 349 /* 350 *** unit stride mask load and store, EEW = 1 351 */ 352 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 353 CPURISCVState *env, uint32_t desc) 354 { 355 /* evl = ceil(vl/8) */ 356 uint8_t evl = (env->vl + 7) >> 3; 357 vext_ldst_us(vd, base, env, desc, lde_b, 358 0, evl, GETPC(), MMU_DATA_LOAD); 359 } 360 361 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 362 CPURISCVState *env, uint32_t desc) 363 { 364 /* evl = ceil(vl/8) */ 365 uint8_t evl = (env->vl + 7) >> 3; 366 vext_ldst_us(vd, base, env, desc, ste_b, 367 0, evl, GETPC(), MMU_DATA_STORE); 368 } 369 370 /* 371 *** index: access vector element from indexed memory 372 */ 373 typedef target_ulong vext_get_index_addr(target_ulong base, 374 uint32_t idx, void *vs2); 375 376 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 377 static target_ulong NAME(target_ulong base, \ 378 uint32_t idx, void *vs2) \ 379 { \ 380 return (base + *((ETYPE *)vs2 + H(idx))); \ 381 } 382 383 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 384 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 385 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 386 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 387 388 static inline void 389 vext_ldst_index(void *vd, void *v0, target_ulong base, 390 void *vs2, CPURISCVState *env, uint32_t desc, 391 vext_get_index_addr get_index_addr, 392 vext_ldst_elem_fn *ldst_elem, 393 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 394 { 395 uint32_t i, k; 396 uint32_t nf = vext_nf(desc); 397 uint32_t vm = vext_vm(desc); 398 uint32_t max_elems = vext_max_elems(desc, esz); 399 400 /* load bytes from guest memory */ 401 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 402 if (!vm && !vext_elem_mask(v0, i)) { 403 continue; 404 } 405 406 k = 0; 407 while (k < nf) { 408 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 409 ldst_elem(env, addr, i + k * max_elems, vd, ra); 410 k++; 411 } 412 } 413 env->vstart = 0; 414 } 415 416 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 417 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 418 void *vs2, CPURISCVState *env, uint32_t desc) \ 419 { \ 420 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 421 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 422 } 423 424 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 425 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 426 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 427 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 428 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 429 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 430 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 431 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 432 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 433 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 434 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 435 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 436 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 437 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 438 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 439 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 440 441 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 442 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 443 void *vs2, CPURISCVState *env, uint32_t desc) \ 444 { \ 445 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 446 STORE_FN, ctzl(sizeof(ETYPE)), \ 447 GETPC(), MMU_DATA_STORE); \ 448 } 449 450 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 451 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 452 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 453 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 454 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 455 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 456 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 457 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 458 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 459 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 460 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 461 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 462 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 463 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 464 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 465 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 466 467 /* 468 *** unit-stride fault-only-fisrt load instructions 469 */ 470 static inline void 471 vext_ldff(void *vd, void *v0, target_ulong base, 472 CPURISCVState *env, uint32_t desc, 473 vext_ldst_elem_fn *ldst_elem, 474 uint32_t esz, uintptr_t ra) 475 { 476 void *host; 477 uint32_t i, k, vl = 0; 478 uint32_t nf = vext_nf(desc); 479 uint32_t vm = vext_vm(desc); 480 uint32_t max_elems = vext_max_elems(desc, esz); 481 target_ulong addr, offset, remain; 482 483 /* probe every access*/ 484 for (i = env->vstart; i < env->vl; i++) { 485 if (!vm && !vext_elem_mask(v0, i)) { 486 continue; 487 } 488 addr = base + i * (nf << esz); 489 if (i == 0) { 490 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 491 } else { 492 /* if it triggers an exception, no need to check watchpoint */ 493 remain = nf << esz; 494 while (remain > 0) { 495 offset = -(addr | TARGET_PAGE_MASK); 496 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 497 cpu_mmu_index(env, false)); 498 if (host) { 499 #ifdef CONFIG_USER_ONLY 500 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 501 vl = i; 502 goto ProbeSuccess; 503 } 504 #else 505 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 506 #endif 507 } else { 508 vl = i; 509 goto ProbeSuccess; 510 } 511 if (remain <= offset) { 512 break; 513 } 514 remain -= offset; 515 addr += offset; 516 } 517 } 518 } 519 ProbeSuccess: 520 /* load bytes from guest memory */ 521 if (vl != 0) { 522 env->vl = vl; 523 } 524 for (i = env->vstart; i < env->vl; i++) { 525 k = 0; 526 if (!vm && !vext_elem_mask(v0, i)) { 527 continue; 528 } 529 while (k < nf) { 530 target_ulong addr = base + ((i * nf + k) << esz); 531 ldst_elem(env, addr, i + k * max_elems, vd, ra); 532 k++; 533 } 534 } 535 env->vstart = 0; 536 } 537 538 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 539 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 540 CPURISCVState *env, uint32_t desc) \ 541 { \ 542 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 543 ctzl(sizeof(ETYPE)), GETPC()); \ 544 } 545 546 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 547 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 548 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 549 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 550 551 #define DO_SWAP(N, M) (M) 552 #define DO_AND(N, M) (N & M) 553 #define DO_XOR(N, M) (N ^ M) 554 #define DO_OR(N, M) (N | M) 555 #define DO_ADD(N, M) (N + M) 556 557 /* Signed min/max */ 558 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 559 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 560 561 /* Unsigned min/max */ 562 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 563 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 564 565 /* 566 *** load and store whole register instructions 567 */ 568 static void 569 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 570 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 571 MMUAccessType access_type) 572 { 573 uint32_t i, k, off, pos; 574 uint32_t nf = vext_nf(desc); 575 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 576 uint32_t max_elems = vlenb >> esz; 577 578 k = env->vstart / max_elems; 579 off = env->vstart % max_elems; 580 581 if (off) { 582 /* load/store rest of elements of current segment pointed by vstart */ 583 for (pos = off; pos < max_elems; pos++, env->vstart++) { 584 target_ulong addr = base + ((pos + k * max_elems) << esz); 585 ldst_elem(env, addr, pos + k * max_elems, vd, ra); 586 } 587 k++; 588 } 589 590 /* load/store elements for rest of segments */ 591 for (; k < nf; k++) { 592 for (i = 0; i < max_elems; i++, env->vstart++) { 593 target_ulong addr = base + ((i + k * max_elems) << esz); 594 ldst_elem(env, addr, i + k * max_elems, vd, ra); 595 } 596 } 597 598 env->vstart = 0; 599 } 600 601 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 602 void HELPER(NAME)(void *vd, target_ulong base, \ 603 CPURISCVState *env, uint32_t desc) \ 604 { \ 605 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 606 ctzl(sizeof(ETYPE)), GETPC(), \ 607 MMU_DATA_LOAD); \ 608 } 609 610 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 611 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 612 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 613 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 614 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 615 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 616 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 617 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 618 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 619 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 620 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 621 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 622 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 623 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 624 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 625 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 626 627 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 628 void HELPER(NAME)(void *vd, target_ulong base, \ 629 CPURISCVState *env, uint32_t desc) \ 630 { \ 631 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 632 ctzl(sizeof(ETYPE)), GETPC(), \ 633 MMU_DATA_STORE); \ 634 } 635 636 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 637 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 638 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 639 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 640 641 /* 642 *** Vector Integer Arithmetic Instructions 643 */ 644 645 /* expand macro args before macro */ 646 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 647 648 /* (TD, T1, T2, TX1, TX2) */ 649 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 650 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 651 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 652 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 653 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 654 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 655 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 656 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 657 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 658 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 659 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 660 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 661 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 662 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 663 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 664 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 665 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 666 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 667 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 668 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 669 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 670 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 671 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 672 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 673 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 674 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 675 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 676 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 677 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 678 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 679 680 /* operation of two vector elements */ 681 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 682 683 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 684 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 685 { \ 686 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 687 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 688 *((TD *)vd + HD(i)) = OP(s2, s1); \ 689 } 690 #define DO_SUB(N, M) (N - M) 691 #define DO_RSUB(N, M) (M - N) 692 693 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 694 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 695 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 696 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 697 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 698 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 699 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 700 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 701 702 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 703 CPURISCVState *env, uint32_t desc, 704 uint32_t esz, uint32_t dsz, 705 opivv2_fn *fn) 706 { 707 uint32_t vm = vext_vm(desc); 708 uint32_t vl = env->vl; 709 uint32_t i; 710 711 for (i = env->vstart; i < vl; i++) { 712 if (!vm && !vext_elem_mask(v0, i)) { 713 continue; 714 } 715 fn(vd, vs1, vs2, i); 716 } 717 env->vstart = 0; 718 } 719 720 /* generate the helpers for OPIVV */ 721 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 722 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 723 void *vs2, CPURISCVState *env, \ 724 uint32_t desc) \ 725 { \ 726 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 727 do_##NAME); \ 728 } 729 730 GEN_VEXT_VV(vadd_vv_b, 1, 1) 731 GEN_VEXT_VV(vadd_vv_h, 2, 2) 732 GEN_VEXT_VV(vadd_vv_w, 4, 4) 733 GEN_VEXT_VV(vadd_vv_d, 8, 8) 734 GEN_VEXT_VV(vsub_vv_b, 1, 1) 735 GEN_VEXT_VV(vsub_vv_h, 2, 2) 736 GEN_VEXT_VV(vsub_vv_w, 4, 4) 737 GEN_VEXT_VV(vsub_vv_d, 8, 8) 738 739 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 740 741 /* 742 * (T1)s1 gives the real operator type. 743 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 744 */ 745 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 746 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 747 { \ 748 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 749 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 750 } 751 752 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 753 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 754 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 755 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 756 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 757 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 758 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 759 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 760 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 761 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 762 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 763 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 764 765 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 766 CPURISCVState *env, uint32_t desc, 767 uint32_t esz, uint32_t dsz, 768 opivx2_fn fn) 769 { 770 uint32_t vm = vext_vm(desc); 771 uint32_t vl = env->vl; 772 uint32_t i; 773 774 for (i = env->vstart; i < vl; i++) { 775 if (!vm && !vext_elem_mask(v0, i)) { 776 continue; 777 } 778 fn(vd, s1, vs2, i); 779 } 780 env->vstart = 0; 781 } 782 783 /* generate the helpers for OPIVX */ 784 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 785 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 786 void *vs2, CPURISCVState *env, \ 787 uint32_t desc) \ 788 { \ 789 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 790 do_##NAME); \ 791 } 792 793 GEN_VEXT_VX(vadd_vx_b, 1, 1) 794 GEN_VEXT_VX(vadd_vx_h, 2, 2) 795 GEN_VEXT_VX(vadd_vx_w, 4, 4) 796 GEN_VEXT_VX(vadd_vx_d, 8, 8) 797 GEN_VEXT_VX(vsub_vx_b, 1, 1) 798 GEN_VEXT_VX(vsub_vx_h, 2, 2) 799 GEN_VEXT_VX(vsub_vx_w, 4, 4) 800 GEN_VEXT_VX(vsub_vx_d, 8, 8) 801 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 802 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 803 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 804 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 805 806 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 807 { 808 intptr_t oprsz = simd_oprsz(desc); 809 intptr_t i; 810 811 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 812 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 813 } 814 } 815 816 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 817 { 818 intptr_t oprsz = simd_oprsz(desc); 819 intptr_t i; 820 821 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 822 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 823 } 824 } 825 826 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 827 { 828 intptr_t oprsz = simd_oprsz(desc); 829 intptr_t i; 830 831 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 832 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 833 } 834 } 835 836 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 837 { 838 intptr_t oprsz = simd_oprsz(desc); 839 intptr_t i; 840 841 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 842 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 843 } 844 } 845 846 /* Vector Widening Integer Add/Subtract */ 847 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 848 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 849 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 850 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 851 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 852 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 853 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 854 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 855 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 856 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 857 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 858 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 859 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 860 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 861 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 862 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 863 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 864 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 865 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 866 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 867 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 868 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 869 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 870 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 871 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 872 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 873 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 874 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 875 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 876 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 877 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 878 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 879 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 880 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 881 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 882 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 883 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 884 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 885 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 886 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 887 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 888 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 889 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 890 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 891 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 892 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 893 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 894 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 895 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 896 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 897 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 898 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 899 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 900 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 901 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 902 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 903 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 904 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 905 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 906 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 907 908 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 909 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 910 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 911 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 912 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 913 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 914 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 915 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 916 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 917 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 918 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 919 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 920 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 921 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 922 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 923 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 924 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 925 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 926 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 927 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 928 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 929 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 930 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 931 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 932 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 933 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 934 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 935 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 936 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 937 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 938 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 939 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 940 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 941 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 942 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 943 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 944 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 945 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 946 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 947 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 948 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 949 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 950 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 951 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 952 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 953 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 954 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 955 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 956 957 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 958 #define DO_VADC(N, M, C) (N + M + C) 959 #define DO_VSBC(N, M, C) (N - M - C) 960 961 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 962 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 963 CPURISCVState *env, uint32_t desc) \ 964 { \ 965 uint32_t vl = env->vl; \ 966 uint32_t i; \ 967 \ 968 for (i = env->vstart; i < vl; i++) { \ 969 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 970 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 971 ETYPE carry = vext_elem_mask(v0, i); \ 972 \ 973 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 974 } \ 975 env->vstart = 0; \ 976 } 977 978 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 979 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 980 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 981 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 982 983 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 984 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 985 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 986 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 987 988 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 989 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 990 CPURISCVState *env, uint32_t desc) \ 991 { \ 992 uint32_t vl = env->vl; \ 993 uint32_t i; \ 994 \ 995 for (i = env->vstart; i < vl; i++) { \ 996 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 997 ETYPE carry = vext_elem_mask(v0, i); \ 998 \ 999 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1000 } \ 1001 env->vstart = 0; \ 1002 } 1003 1004 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1005 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1006 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1007 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1008 1009 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1010 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1011 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1012 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1013 1014 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1015 (__typeof(N))(N + M) < N) 1016 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1017 1018 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1019 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1020 CPURISCVState *env, uint32_t desc) \ 1021 { \ 1022 uint32_t vl = env->vl; \ 1023 uint32_t vm = vext_vm(desc); \ 1024 uint32_t i; \ 1025 \ 1026 for (i = env->vstart; i < vl; i++) { \ 1027 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1028 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1029 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1030 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1031 } \ 1032 env->vstart = 0; \ 1033 } 1034 1035 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1036 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1037 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1038 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1039 1040 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1041 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1042 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1043 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1044 1045 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1046 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1047 void *vs2, CPURISCVState *env, uint32_t desc) \ 1048 { \ 1049 uint32_t vl = env->vl; \ 1050 uint32_t vm = vext_vm(desc); \ 1051 uint32_t i; \ 1052 \ 1053 for (i = env->vstart; i < vl; i++) { \ 1054 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1055 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1056 vext_set_elem_mask(vd, i, \ 1057 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1058 } \ 1059 env->vstart = 0; \ 1060 } 1061 1062 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1063 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1064 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1065 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1066 1067 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1068 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1069 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1070 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1071 1072 /* Vector Bitwise Logical Instructions */ 1073 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1074 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1075 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1076 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1077 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1078 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1079 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1080 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1081 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1082 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1083 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1084 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1085 GEN_VEXT_VV(vand_vv_b, 1, 1) 1086 GEN_VEXT_VV(vand_vv_h, 2, 2) 1087 GEN_VEXT_VV(vand_vv_w, 4, 4) 1088 GEN_VEXT_VV(vand_vv_d, 8, 8) 1089 GEN_VEXT_VV(vor_vv_b, 1, 1) 1090 GEN_VEXT_VV(vor_vv_h, 2, 2) 1091 GEN_VEXT_VV(vor_vv_w, 4, 4) 1092 GEN_VEXT_VV(vor_vv_d, 8, 8) 1093 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1094 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1095 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1096 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1097 1098 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1099 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1100 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1101 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1102 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1103 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1104 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1105 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1106 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1107 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1108 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1109 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1110 GEN_VEXT_VX(vand_vx_b, 1, 1) 1111 GEN_VEXT_VX(vand_vx_h, 2, 2) 1112 GEN_VEXT_VX(vand_vx_w, 4, 4) 1113 GEN_VEXT_VX(vand_vx_d, 8, 8) 1114 GEN_VEXT_VX(vor_vx_b, 1, 1) 1115 GEN_VEXT_VX(vor_vx_h, 2, 2) 1116 GEN_VEXT_VX(vor_vx_w, 4, 4) 1117 GEN_VEXT_VX(vor_vx_d, 8, 8) 1118 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1119 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1120 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1121 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1122 1123 /* Vector Single-Width Bit Shift Instructions */ 1124 #define DO_SLL(N, M) (N << (M)) 1125 #define DO_SRL(N, M) (N >> (M)) 1126 1127 /* generate the helpers for shift instructions with two vector operators */ 1128 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1129 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1130 void *vs2, CPURISCVState *env, uint32_t desc) \ 1131 { \ 1132 uint32_t vm = vext_vm(desc); \ 1133 uint32_t vl = env->vl; \ 1134 uint32_t i; \ 1135 \ 1136 for (i = env->vstart; i < vl; i++) { \ 1137 if (!vm && !vext_elem_mask(v0, i)) { \ 1138 continue; \ 1139 } \ 1140 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1141 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1142 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1143 } \ 1144 env->vstart = 0; \ 1145 } 1146 1147 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1148 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1149 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1150 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1151 1152 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1153 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1154 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1155 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1156 1157 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1158 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1159 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1160 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1161 1162 /* generate the helpers for shift instructions with one vector and one scalar */ 1163 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1164 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1165 void *vs2, CPURISCVState *env, uint32_t desc) \ 1166 { \ 1167 uint32_t vm = vext_vm(desc); \ 1168 uint32_t vl = env->vl; \ 1169 uint32_t i; \ 1170 \ 1171 for (i = env->vstart; i < vl; i++) { \ 1172 if (!vm && !vext_elem_mask(v0, i)) { \ 1173 continue; \ 1174 } \ 1175 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1176 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1177 } \ 1178 env->vstart = 0; \ 1179 } 1180 1181 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1182 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1183 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1184 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1185 1186 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1187 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1188 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1189 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1190 1191 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1192 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1193 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1194 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1195 1196 /* Vector Narrowing Integer Right Shift Instructions */ 1197 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1198 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1199 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1200 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1201 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1202 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1203 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1204 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1205 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1206 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1207 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1208 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1209 1210 /* Vector Integer Comparison Instructions */ 1211 #define DO_MSEQ(N, M) (N == M) 1212 #define DO_MSNE(N, M) (N != M) 1213 #define DO_MSLT(N, M) (N < M) 1214 #define DO_MSLE(N, M) (N <= M) 1215 #define DO_MSGT(N, M) (N > M) 1216 1217 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1218 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1219 CPURISCVState *env, uint32_t desc) \ 1220 { \ 1221 uint32_t vm = vext_vm(desc); \ 1222 uint32_t vl = env->vl; \ 1223 uint32_t i; \ 1224 \ 1225 for (i = env->vstart; i < vl; i++) { \ 1226 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1227 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1228 if (!vm && !vext_elem_mask(v0, i)) { \ 1229 continue; \ 1230 } \ 1231 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1232 } \ 1233 env->vstart = 0; \ 1234 } 1235 1236 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1237 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1238 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1239 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1240 1241 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1242 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1243 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1244 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1245 1246 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1247 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1248 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1249 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1250 1251 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1252 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1253 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1254 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1255 1256 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1257 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1258 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1259 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1260 1261 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1262 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1263 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1264 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1265 1266 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1267 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1268 CPURISCVState *env, uint32_t desc) \ 1269 { \ 1270 uint32_t vm = vext_vm(desc); \ 1271 uint32_t vl = env->vl; \ 1272 uint32_t i; \ 1273 \ 1274 for (i = env->vstart; i < vl; i++) { \ 1275 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1276 if (!vm && !vext_elem_mask(v0, i)) { \ 1277 continue; \ 1278 } \ 1279 vext_set_elem_mask(vd, i, \ 1280 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1281 } \ 1282 env->vstart = 0; \ 1283 } 1284 1285 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1286 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1287 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1288 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1289 1290 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1291 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1292 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1293 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1294 1295 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1296 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1297 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1298 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1299 1300 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1301 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1302 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1303 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1304 1305 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1306 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1307 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1308 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1309 1310 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1311 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1312 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1313 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1314 1315 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1316 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1317 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1318 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1319 1320 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1321 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1322 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1323 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1324 1325 /* Vector Integer Min/Max Instructions */ 1326 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1327 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1328 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1329 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1330 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1331 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1332 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1333 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1334 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1335 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1336 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1337 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1338 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1339 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1340 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1341 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1342 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1343 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1344 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1345 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1346 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1347 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1348 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1349 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1350 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1351 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1352 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1353 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1354 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1355 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1356 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1357 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1358 1359 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1360 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1361 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1362 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1363 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1364 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1365 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1366 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1367 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1368 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1369 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1370 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1371 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1372 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1373 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1374 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1375 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1376 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1377 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1378 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1379 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1380 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1381 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1382 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1383 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1384 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1385 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1386 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1387 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1388 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1389 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1390 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1391 1392 /* Vector Single-Width Integer Multiply Instructions */ 1393 #define DO_MUL(N, M) (N * M) 1394 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1395 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1396 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1397 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1398 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1399 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1400 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1401 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1402 1403 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1404 { 1405 return (int16_t)s2 * (int16_t)s1 >> 8; 1406 } 1407 1408 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1409 { 1410 return (int32_t)s2 * (int32_t)s1 >> 16; 1411 } 1412 1413 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1414 { 1415 return (int64_t)s2 * (int64_t)s1 >> 32; 1416 } 1417 1418 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1419 { 1420 uint64_t hi_64, lo_64; 1421 1422 muls64(&lo_64, &hi_64, s1, s2); 1423 return hi_64; 1424 } 1425 1426 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1427 { 1428 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1429 } 1430 1431 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1432 { 1433 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1434 } 1435 1436 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1437 { 1438 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1439 } 1440 1441 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1442 { 1443 uint64_t hi_64, lo_64; 1444 1445 mulu64(&lo_64, &hi_64, s2, s1); 1446 return hi_64; 1447 } 1448 1449 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1450 { 1451 return (int16_t)s2 * (uint16_t)s1 >> 8; 1452 } 1453 1454 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1455 { 1456 return (int32_t)s2 * (uint32_t)s1 >> 16; 1457 } 1458 1459 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1460 { 1461 return (int64_t)s2 * (uint64_t)s1 >> 32; 1462 } 1463 1464 /* 1465 * Let A = signed operand, 1466 * B = unsigned operand 1467 * P = mulu64(A, B), unsigned product 1468 * 1469 * LET X = 2 ** 64 - A, 2's complement of A 1470 * SP = signed product 1471 * THEN 1472 * IF A < 0 1473 * SP = -X * B 1474 * = -(2 ** 64 - A) * B 1475 * = A * B - 2 ** 64 * B 1476 * = P - 2 ** 64 * B 1477 * ELSE 1478 * SP = P 1479 * THEN 1480 * HI_P -= (A < 0 ? B : 0) 1481 */ 1482 1483 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1484 { 1485 uint64_t hi_64, lo_64; 1486 1487 mulu64(&lo_64, &hi_64, s2, s1); 1488 1489 hi_64 -= s2 < 0 ? s1 : 0; 1490 return hi_64; 1491 } 1492 1493 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1494 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1495 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1496 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1497 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1498 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1499 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1500 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1501 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1502 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1503 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1504 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1505 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1506 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1507 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1508 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1509 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1510 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1511 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1512 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1513 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1514 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1515 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1516 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1517 1518 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1519 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1520 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1521 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1522 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1523 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1524 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1525 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1526 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1527 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1528 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1529 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1530 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1531 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1532 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1533 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1534 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1535 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1536 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1537 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1538 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1539 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1540 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1541 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1542 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1543 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1544 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1545 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1546 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1547 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1548 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1549 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1550 1551 /* Vector Integer Divide Instructions */ 1552 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1553 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1554 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1555 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1556 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1557 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1558 1559 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1560 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1561 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1562 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1563 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1564 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1565 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1566 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1567 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1568 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1569 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1570 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1571 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1572 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1573 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1574 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1575 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1576 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1577 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1578 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1579 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1580 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1581 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1582 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1583 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1584 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1585 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1586 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1587 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1588 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1589 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1590 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1591 1592 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1593 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1594 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1595 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1596 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1597 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1598 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1599 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1600 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1601 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1602 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1603 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1604 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1605 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1606 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1607 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1608 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1609 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1610 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1611 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1612 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1613 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1614 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1615 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1616 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1617 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1618 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1619 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1620 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1621 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1622 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1623 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1624 1625 /* Vector Widening Integer Multiply Instructions */ 1626 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1627 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1628 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1629 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1630 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1631 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1632 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1633 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1634 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1635 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1636 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1637 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1638 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1639 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1640 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1641 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1642 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1643 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1644 1645 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1646 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1647 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1648 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1649 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1650 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1651 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1652 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1653 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1654 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1655 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1656 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1657 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1658 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1659 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1660 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1661 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1662 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1663 1664 /* Vector Single-Width Integer Multiply-Add Instructions */ 1665 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1666 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1667 { \ 1668 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1669 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1670 TD d = *((TD *)vd + HD(i)); \ 1671 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1672 } 1673 1674 #define DO_MACC(N, M, D) (M * N + D) 1675 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1676 #define DO_MADD(N, M, D) (M * D + N) 1677 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1678 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1679 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1680 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1681 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1682 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1683 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1684 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1685 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1686 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1687 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1688 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1689 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1690 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1691 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1692 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1693 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1694 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1695 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1696 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1697 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1698 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1699 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1700 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1701 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1702 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1703 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1704 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1705 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1706 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1707 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1708 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1709 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1710 1711 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1712 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1713 { \ 1714 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1715 TD d = *((TD *)vd + HD(i)); \ 1716 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1717 } 1718 1719 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1720 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1721 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1722 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1723 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1724 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1725 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1726 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1727 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1728 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1729 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1730 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1731 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1732 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1733 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1734 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1735 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1736 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1737 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1738 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1739 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1740 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1741 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1742 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1743 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1744 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1745 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1746 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1747 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1748 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1749 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1750 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1751 1752 /* Vector Widening Integer Multiply-Add Instructions */ 1753 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1754 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1755 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1756 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1757 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1758 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1759 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1760 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1761 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1762 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1763 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1764 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1765 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1766 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1767 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1768 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1769 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1770 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1771 1772 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1773 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1774 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1775 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1776 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1777 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1778 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1779 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1780 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1781 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1782 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1783 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1784 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1785 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1786 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1787 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1788 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1789 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1790 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1791 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1792 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1793 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1794 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1795 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1796 1797 /* Vector Integer Merge and Move Instructions */ 1798 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1799 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1800 uint32_t desc) \ 1801 { \ 1802 uint32_t vl = env->vl; \ 1803 uint32_t i; \ 1804 \ 1805 for (i = env->vstart; i < vl; i++) { \ 1806 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1807 *((ETYPE *)vd + H(i)) = s1; \ 1808 } \ 1809 env->vstart = 0; \ 1810 } 1811 1812 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1813 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1814 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1815 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1816 1817 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1818 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1819 uint32_t desc) \ 1820 { \ 1821 uint32_t vl = env->vl; \ 1822 uint32_t i; \ 1823 \ 1824 for (i = env->vstart; i < vl; i++) { \ 1825 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1826 } \ 1827 env->vstart = 0; \ 1828 } 1829 1830 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1831 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1832 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1833 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1834 1835 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1836 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1837 CPURISCVState *env, uint32_t desc) \ 1838 { \ 1839 uint32_t vl = env->vl; \ 1840 uint32_t i; \ 1841 \ 1842 for (i = env->vstart; i < vl; i++) { \ 1843 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1844 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1845 } \ 1846 env->vstart = 0; \ 1847 } 1848 1849 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1851 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1852 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1853 1854 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1855 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1856 void *vs2, CPURISCVState *env, uint32_t desc) \ 1857 { \ 1858 uint32_t vl = env->vl; \ 1859 uint32_t i; \ 1860 \ 1861 for (i = env->vstart; i < vl; i++) { \ 1862 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1863 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1864 (ETYPE)(target_long)s1); \ 1865 *((ETYPE *)vd + H(i)) = d; \ 1866 } \ 1867 env->vstart = 0; \ 1868 } 1869 1870 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1871 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1872 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1873 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1874 1875 /* 1876 *** Vector Fixed-Point Arithmetic Instructions 1877 */ 1878 1879 /* Vector Single-Width Saturating Add and Subtract */ 1880 1881 /* 1882 * As fixed point instructions probably have round mode and saturation, 1883 * define common macros for fixed point here. 1884 */ 1885 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1886 CPURISCVState *env, int vxrm); 1887 1888 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1889 static inline void \ 1890 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1891 CPURISCVState *env, int vxrm) \ 1892 { \ 1893 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1894 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1895 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1896 } 1897 1898 static inline void 1899 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1900 CPURISCVState *env, 1901 uint32_t vl, uint32_t vm, int vxrm, 1902 opivv2_rm_fn *fn) 1903 { 1904 for (uint32_t i = env->vstart; i < vl; i++) { 1905 if (!vm && !vext_elem_mask(v0, i)) { 1906 continue; 1907 } 1908 fn(vd, vs1, vs2, i, env, vxrm); 1909 } 1910 env->vstart = 0; 1911 } 1912 1913 static inline void 1914 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1915 CPURISCVState *env, 1916 uint32_t desc, uint32_t esz, uint32_t dsz, 1917 opivv2_rm_fn *fn) 1918 { 1919 uint32_t vm = vext_vm(desc); 1920 uint32_t vl = env->vl; 1921 1922 switch (env->vxrm) { 1923 case 0: /* rnu */ 1924 vext_vv_rm_1(vd, v0, vs1, vs2, 1925 env, vl, vm, 0, fn); 1926 break; 1927 case 1: /* rne */ 1928 vext_vv_rm_1(vd, v0, vs1, vs2, 1929 env, vl, vm, 1, fn); 1930 break; 1931 case 2: /* rdn */ 1932 vext_vv_rm_1(vd, v0, vs1, vs2, 1933 env, vl, vm, 2, fn); 1934 break; 1935 default: /* rod */ 1936 vext_vv_rm_1(vd, v0, vs1, vs2, 1937 env, vl, vm, 3, fn); 1938 break; 1939 } 1940 } 1941 1942 /* generate helpers for fixed point instructions with OPIVV format */ 1943 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1944 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1945 CPURISCVState *env, uint32_t desc) \ 1946 { \ 1947 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1948 do_##NAME); \ 1949 } 1950 1951 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1952 { 1953 uint8_t res = a + b; 1954 if (res < a) { 1955 res = UINT8_MAX; 1956 env->vxsat = 0x1; 1957 } 1958 return res; 1959 } 1960 1961 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1962 uint16_t b) 1963 { 1964 uint16_t res = a + b; 1965 if (res < a) { 1966 res = UINT16_MAX; 1967 env->vxsat = 0x1; 1968 } 1969 return res; 1970 } 1971 1972 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1973 uint32_t b) 1974 { 1975 uint32_t res = a + b; 1976 if (res < a) { 1977 res = UINT32_MAX; 1978 env->vxsat = 0x1; 1979 } 1980 return res; 1981 } 1982 1983 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1984 uint64_t b) 1985 { 1986 uint64_t res = a + b; 1987 if (res < a) { 1988 res = UINT64_MAX; 1989 env->vxsat = 0x1; 1990 } 1991 return res; 1992 } 1993 1994 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1995 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1996 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1997 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1998 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1999 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 2000 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2001 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2002 2003 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2004 CPURISCVState *env, int vxrm); 2005 2006 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2007 static inline void \ 2008 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2009 CPURISCVState *env, int vxrm) \ 2010 { \ 2011 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2012 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2013 } 2014 2015 static inline void 2016 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2017 CPURISCVState *env, 2018 uint32_t vl, uint32_t vm, int vxrm, 2019 opivx2_rm_fn *fn) 2020 { 2021 for (uint32_t i = env->vstart; i < vl; i++) { 2022 if (!vm && !vext_elem_mask(v0, i)) { 2023 continue; 2024 } 2025 fn(vd, s1, vs2, i, env, vxrm); 2026 } 2027 env->vstart = 0; 2028 } 2029 2030 static inline void 2031 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2032 CPURISCVState *env, 2033 uint32_t desc, uint32_t esz, uint32_t dsz, 2034 opivx2_rm_fn *fn) 2035 { 2036 uint32_t vm = vext_vm(desc); 2037 uint32_t vl = env->vl; 2038 2039 switch (env->vxrm) { 2040 case 0: /* rnu */ 2041 vext_vx_rm_1(vd, v0, s1, vs2, 2042 env, vl, vm, 0, fn); 2043 break; 2044 case 1: /* rne */ 2045 vext_vx_rm_1(vd, v0, s1, vs2, 2046 env, vl, vm, 1, fn); 2047 break; 2048 case 2: /* rdn */ 2049 vext_vx_rm_1(vd, v0, s1, vs2, 2050 env, vl, vm, 2, fn); 2051 break; 2052 default: /* rod */ 2053 vext_vx_rm_1(vd, v0, s1, vs2, 2054 env, vl, vm, 3, fn); 2055 break; 2056 } 2057 } 2058 2059 /* generate helpers for fixed point instructions with OPIVX format */ 2060 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2061 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2062 void *vs2, CPURISCVState *env, uint32_t desc) \ 2063 { \ 2064 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2065 do_##NAME); \ 2066 } 2067 2068 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2069 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2070 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2071 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2072 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2073 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2074 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2075 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2076 2077 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2078 { 2079 int8_t res = a + b; 2080 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2081 res = a > 0 ? INT8_MAX : INT8_MIN; 2082 env->vxsat = 0x1; 2083 } 2084 return res; 2085 } 2086 2087 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2088 { 2089 int16_t res = a + b; 2090 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2091 res = a > 0 ? INT16_MAX : INT16_MIN; 2092 env->vxsat = 0x1; 2093 } 2094 return res; 2095 } 2096 2097 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2098 { 2099 int32_t res = a + b; 2100 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2101 res = a > 0 ? INT32_MAX : INT32_MIN; 2102 env->vxsat = 0x1; 2103 } 2104 return res; 2105 } 2106 2107 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2108 { 2109 int64_t res = a + b; 2110 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2111 res = a > 0 ? INT64_MAX : INT64_MIN; 2112 env->vxsat = 0x1; 2113 } 2114 return res; 2115 } 2116 2117 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2118 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2119 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2120 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2121 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2122 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2123 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2124 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2125 2126 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2127 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2128 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2129 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2130 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2131 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2132 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2133 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2134 2135 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2136 { 2137 uint8_t res = a - b; 2138 if (res > a) { 2139 res = 0; 2140 env->vxsat = 0x1; 2141 } 2142 return res; 2143 } 2144 2145 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2146 uint16_t b) 2147 { 2148 uint16_t res = a - b; 2149 if (res > a) { 2150 res = 0; 2151 env->vxsat = 0x1; 2152 } 2153 return res; 2154 } 2155 2156 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2157 uint32_t b) 2158 { 2159 uint32_t res = a - b; 2160 if (res > a) { 2161 res = 0; 2162 env->vxsat = 0x1; 2163 } 2164 return res; 2165 } 2166 2167 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2168 uint64_t b) 2169 { 2170 uint64_t res = a - b; 2171 if (res > a) { 2172 res = 0; 2173 env->vxsat = 0x1; 2174 } 2175 return res; 2176 } 2177 2178 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2179 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2180 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2181 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2182 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2183 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2184 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2185 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2186 2187 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2188 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2189 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2190 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2191 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2192 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2193 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2194 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2195 2196 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2197 { 2198 int8_t res = a - b; 2199 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2200 res = a >= 0 ? INT8_MAX : INT8_MIN; 2201 env->vxsat = 0x1; 2202 } 2203 return res; 2204 } 2205 2206 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2207 { 2208 int16_t res = a - b; 2209 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2210 res = a >= 0 ? INT16_MAX : INT16_MIN; 2211 env->vxsat = 0x1; 2212 } 2213 return res; 2214 } 2215 2216 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2217 { 2218 int32_t res = a - b; 2219 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2220 res = a >= 0 ? INT32_MAX : INT32_MIN; 2221 env->vxsat = 0x1; 2222 } 2223 return res; 2224 } 2225 2226 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2227 { 2228 int64_t res = a - b; 2229 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2230 res = a >= 0 ? INT64_MAX : INT64_MIN; 2231 env->vxsat = 0x1; 2232 } 2233 return res; 2234 } 2235 2236 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2237 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2238 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2239 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2240 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2241 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2242 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2243 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2244 2245 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2246 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2247 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2248 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2249 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2250 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2251 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2252 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2253 2254 /* Vector Single-Width Averaging Add and Subtract */ 2255 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2256 { 2257 uint8_t d = extract64(v, shift, 1); 2258 uint8_t d1; 2259 uint64_t D1, D2; 2260 2261 if (shift == 0 || shift > 64) { 2262 return 0; 2263 } 2264 2265 d1 = extract64(v, shift - 1, 1); 2266 D1 = extract64(v, 0, shift); 2267 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2268 return d1; 2269 } else if (vxrm == 1) { /* round-to-nearest-even */ 2270 if (shift > 1) { 2271 D2 = extract64(v, 0, shift - 1); 2272 return d1 & ((D2 != 0) | d); 2273 } else { 2274 return d1 & d; 2275 } 2276 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2277 return !d & (D1 != 0); 2278 } 2279 return 0; /* round-down (truncate) */ 2280 } 2281 2282 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2283 { 2284 int64_t res = (int64_t)a + b; 2285 uint8_t round = get_round(vxrm, res, 1); 2286 2287 return (res >> 1) + round; 2288 } 2289 2290 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2291 { 2292 int64_t res = a + b; 2293 uint8_t round = get_round(vxrm, res, 1); 2294 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2295 2296 /* With signed overflow, bit 64 is inverse of bit 63. */ 2297 return ((res >> 1) ^ over) + round; 2298 } 2299 2300 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2301 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2302 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2303 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2304 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2305 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2306 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2307 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2308 2309 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2310 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2311 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2312 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2313 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2314 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2315 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2316 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2317 2318 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2319 uint32_t a, uint32_t b) 2320 { 2321 uint64_t res = (uint64_t)a + b; 2322 uint8_t round = get_round(vxrm, res, 1); 2323 2324 return (res >> 1) + round; 2325 } 2326 2327 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2328 uint64_t a, uint64_t b) 2329 { 2330 uint64_t res = a + b; 2331 uint8_t round = get_round(vxrm, res, 1); 2332 uint64_t over = (uint64_t)(res < a) << 63; 2333 2334 return ((res >> 1) | over) + round; 2335 } 2336 2337 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2338 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2339 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2340 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2341 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2342 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2343 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2344 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2345 2346 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2347 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2348 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2349 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2350 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2351 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2352 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2353 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2354 2355 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2356 { 2357 int64_t res = (int64_t)a - b; 2358 uint8_t round = get_round(vxrm, res, 1); 2359 2360 return (res >> 1) + round; 2361 } 2362 2363 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2364 { 2365 int64_t res = (int64_t)a - b; 2366 uint8_t round = get_round(vxrm, res, 1); 2367 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2368 2369 /* With signed overflow, bit 64 is inverse of bit 63. */ 2370 return ((res >> 1) ^ over) + round; 2371 } 2372 2373 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2374 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2375 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2376 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2377 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2378 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2379 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2380 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2381 2382 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2383 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2384 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2385 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2386 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2387 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2388 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2389 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2390 2391 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2392 uint32_t a, uint32_t b) 2393 { 2394 int64_t res = (int64_t)a - b; 2395 uint8_t round = get_round(vxrm, res, 1); 2396 2397 return (res >> 1) + round; 2398 } 2399 2400 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2401 uint64_t a, uint64_t b) 2402 { 2403 uint64_t res = (uint64_t)a - b; 2404 uint8_t round = get_round(vxrm, res, 1); 2405 uint64_t over = (uint64_t)(res > a) << 63; 2406 2407 return ((res >> 1) | over) + round; 2408 } 2409 2410 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2411 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2412 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2413 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2414 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2415 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2416 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2417 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2418 2419 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2420 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2421 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2422 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2423 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2424 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2425 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2426 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2427 2428 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2429 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2430 { 2431 uint8_t round; 2432 int16_t res; 2433 2434 res = (int16_t)a * (int16_t)b; 2435 round = get_round(vxrm, res, 7); 2436 res = (res >> 7) + round; 2437 2438 if (res > INT8_MAX) { 2439 env->vxsat = 0x1; 2440 return INT8_MAX; 2441 } else if (res < INT8_MIN) { 2442 env->vxsat = 0x1; 2443 return INT8_MIN; 2444 } else { 2445 return res; 2446 } 2447 } 2448 2449 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2450 { 2451 uint8_t round; 2452 int32_t res; 2453 2454 res = (int32_t)a * (int32_t)b; 2455 round = get_round(vxrm, res, 15); 2456 res = (res >> 15) + round; 2457 2458 if (res > INT16_MAX) { 2459 env->vxsat = 0x1; 2460 return INT16_MAX; 2461 } else if (res < INT16_MIN) { 2462 env->vxsat = 0x1; 2463 return INT16_MIN; 2464 } else { 2465 return res; 2466 } 2467 } 2468 2469 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2470 { 2471 uint8_t round; 2472 int64_t res; 2473 2474 res = (int64_t)a * (int64_t)b; 2475 round = get_round(vxrm, res, 31); 2476 res = (res >> 31) + round; 2477 2478 if (res > INT32_MAX) { 2479 env->vxsat = 0x1; 2480 return INT32_MAX; 2481 } else if (res < INT32_MIN) { 2482 env->vxsat = 0x1; 2483 return INT32_MIN; 2484 } else { 2485 return res; 2486 } 2487 } 2488 2489 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2490 { 2491 uint8_t round; 2492 uint64_t hi_64, lo_64; 2493 int64_t res; 2494 2495 if (a == INT64_MIN && b == INT64_MIN) { 2496 env->vxsat = 1; 2497 return INT64_MAX; 2498 } 2499 2500 muls64(&lo_64, &hi_64, a, b); 2501 round = get_round(vxrm, lo_64, 63); 2502 /* 2503 * Cannot overflow, as there are always 2504 * 2 sign bits after multiply. 2505 */ 2506 res = (hi_64 << 1) | (lo_64 >> 63); 2507 if (round) { 2508 if (res == INT64_MAX) { 2509 env->vxsat = 1; 2510 } else { 2511 res += 1; 2512 } 2513 } 2514 return res; 2515 } 2516 2517 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2518 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2519 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2520 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2521 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2522 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2523 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2524 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2525 2526 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2527 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2528 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2529 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2530 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2531 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2532 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2533 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2534 2535 /* Vector Single-Width Scaling Shift Instructions */ 2536 static inline uint8_t 2537 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2538 { 2539 uint8_t round, shift = b & 0x7; 2540 uint8_t res; 2541 2542 round = get_round(vxrm, a, shift); 2543 res = (a >> shift) + round; 2544 return res; 2545 } 2546 static inline uint16_t 2547 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2548 { 2549 uint8_t round, shift = b & 0xf; 2550 uint16_t res; 2551 2552 round = get_round(vxrm, a, shift); 2553 res = (a >> shift) + round; 2554 return res; 2555 } 2556 static inline uint32_t 2557 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2558 { 2559 uint8_t round, shift = b & 0x1f; 2560 uint32_t res; 2561 2562 round = get_round(vxrm, a, shift); 2563 res = (a >> shift) + round; 2564 return res; 2565 } 2566 static inline uint64_t 2567 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2568 { 2569 uint8_t round, shift = b & 0x3f; 2570 uint64_t res; 2571 2572 round = get_round(vxrm, a, shift); 2573 res = (a >> shift) + round; 2574 return res; 2575 } 2576 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2577 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2578 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2579 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2580 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2581 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2582 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2583 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2584 2585 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2586 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2587 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2588 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2589 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2590 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2591 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2592 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2593 2594 static inline int8_t 2595 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2596 { 2597 uint8_t round, shift = b & 0x7; 2598 int8_t res; 2599 2600 round = get_round(vxrm, a, shift); 2601 res = (a >> shift) + round; 2602 return res; 2603 } 2604 static inline int16_t 2605 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2606 { 2607 uint8_t round, shift = b & 0xf; 2608 int16_t res; 2609 2610 round = get_round(vxrm, a, shift); 2611 res = (a >> shift) + round; 2612 return res; 2613 } 2614 static inline int32_t 2615 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2616 { 2617 uint8_t round, shift = b & 0x1f; 2618 int32_t res; 2619 2620 round = get_round(vxrm, a, shift); 2621 res = (a >> shift) + round; 2622 return res; 2623 } 2624 static inline int64_t 2625 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2626 { 2627 uint8_t round, shift = b & 0x3f; 2628 int64_t res; 2629 2630 round = get_round(vxrm, a, shift); 2631 res = (a >> shift) + round; 2632 return res; 2633 } 2634 2635 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2636 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2637 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2638 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2639 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2640 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2641 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2642 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2643 2644 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2645 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2646 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2647 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2648 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2649 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2650 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2651 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2652 2653 /* Vector Narrowing Fixed-Point Clip Instructions */ 2654 static inline int8_t 2655 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2656 { 2657 uint8_t round, shift = b & 0xf; 2658 int16_t res; 2659 2660 round = get_round(vxrm, a, shift); 2661 res = (a >> shift) + round; 2662 if (res > INT8_MAX) { 2663 env->vxsat = 0x1; 2664 return INT8_MAX; 2665 } else if (res < INT8_MIN) { 2666 env->vxsat = 0x1; 2667 return INT8_MIN; 2668 } else { 2669 return res; 2670 } 2671 } 2672 2673 static inline int16_t 2674 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2675 { 2676 uint8_t round, shift = b & 0x1f; 2677 int32_t res; 2678 2679 round = get_round(vxrm, a, shift); 2680 res = (a >> shift) + round; 2681 if (res > INT16_MAX) { 2682 env->vxsat = 0x1; 2683 return INT16_MAX; 2684 } else if (res < INT16_MIN) { 2685 env->vxsat = 0x1; 2686 return INT16_MIN; 2687 } else { 2688 return res; 2689 } 2690 } 2691 2692 static inline int32_t 2693 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2694 { 2695 uint8_t round, shift = b & 0x3f; 2696 int64_t res; 2697 2698 round = get_round(vxrm, a, shift); 2699 res = (a >> shift) + round; 2700 if (res > INT32_MAX) { 2701 env->vxsat = 0x1; 2702 return INT32_MAX; 2703 } else if (res < INT32_MIN) { 2704 env->vxsat = 0x1; 2705 return INT32_MIN; 2706 } else { 2707 return res; 2708 } 2709 } 2710 2711 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2712 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2713 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2714 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2715 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2716 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2717 2718 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2719 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2720 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2721 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2722 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2723 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2724 2725 static inline uint8_t 2726 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2727 { 2728 uint8_t round, shift = b & 0xf; 2729 uint16_t res; 2730 2731 round = get_round(vxrm, a, shift); 2732 res = (a >> shift) + round; 2733 if (res > UINT8_MAX) { 2734 env->vxsat = 0x1; 2735 return UINT8_MAX; 2736 } else { 2737 return res; 2738 } 2739 } 2740 2741 static inline uint16_t 2742 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2743 { 2744 uint8_t round, shift = b & 0x1f; 2745 uint32_t res; 2746 2747 round = get_round(vxrm, a, shift); 2748 res = (a >> shift) + round; 2749 if (res > UINT16_MAX) { 2750 env->vxsat = 0x1; 2751 return UINT16_MAX; 2752 } else { 2753 return res; 2754 } 2755 } 2756 2757 static inline uint32_t 2758 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2759 { 2760 uint8_t round, shift = b & 0x3f; 2761 uint64_t res; 2762 2763 round = get_round(vxrm, a, shift); 2764 res = (a >> shift) + round; 2765 if (res > UINT32_MAX) { 2766 env->vxsat = 0x1; 2767 return UINT32_MAX; 2768 } else { 2769 return res; 2770 } 2771 } 2772 2773 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2774 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2775 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2776 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2777 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2778 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2779 2780 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2781 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2782 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2783 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2784 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2785 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2786 2787 /* 2788 *** Vector Float Point Arithmetic Instructions 2789 */ 2790 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2791 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2792 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2793 CPURISCVState *env) \ 2794 { \ 2795 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2796 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2797 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2798 } 2799 2800 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2801 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2802 void *vs2, CPURISCVState *env, \ 2803 uint32_t desc) \ 2804 { \ 2805 uint32_t vm = vext_vm(desc); \ 2806 uint32_t vl = env->vl; \ 2807 uint32_t i; \ 2808 \ 2809 for (i = env->vstart; i < vl; i++) { \ 2810 if (!vm && !vext_elem_mask(v0, i)) { \ 2811 continue; \ 2812 } \ 2813 do_##NAME(vd, vs1, vs2, i, env); \ 2814 } \ 2815 env->vstart = 0; \ 2816 } 2817 2818 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2819 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2820 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2821 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2822 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2823 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2824 2825 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2826 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2827 CPURISCVState *env) \ 2828 { \ 2829 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2830 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2831 } 2832 2833 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2834 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2835 void *vs2, CPURISCVState *env, \ 2836 uint32_t desc) \ 2837 { \ 2838 uint32_t vm = vext_vm(desc); \ 2839 uint32_t vl = env->vl; \ 2840 uint32_t i; \ 2841 \ 2842 for (i = env->vstart; i < vl; i++) { \ 2843 if (!vm && !vext_elem_mask(v0, i)) { \ 2844 continue; \ 2845 } \ 2846 do_##NAME(vd, s1, vs2, i, env); \ 2847 } \ 2848 env->vstart = 0; \ 2849 } 2850 2851 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2852 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2853 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2854 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2855 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2856 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2857 2858 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2859 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2860 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2861 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2862 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2863 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2864 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2865 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2866 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2867 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2868 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2869 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2870 2871 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2872 { 2873 return float16_sub(b, a, s); 2874 } 2875 2876 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2877 { 2878 return float32_sub(b, a, s); 2879 } 2880 2881 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2882 { 2883 return float64_sub(b, a, s); 2884 } 2885 2886 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2887 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2888 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2889 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2890 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2891 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2892 2893 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2894 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2895 { 2896 return float32_add(float16_to_float32(a, true, s), 2897 float16_to_float32(b, true, s), s); 2898 } 2899 2900 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2901 { 2902 return float64_add(float32_to_float64(a, s), 2903 float32_to_float64(b, s), s); 2904 2905 } 2906 2907 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2908 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2909 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2910 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2911 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2912 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2913 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2914 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2915 2916 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2917 { 2918 return float32_sub(float16_to_float32(a, true, s), 2919 float16_to_float32(b, true, s), s); 2920 } 2921 2922 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2923 { 2924 return float64_sub(float32_to_float64(a, s), 2925 float32_to_float64(b, s), s); 2926 2927 } 2928 2929 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2930 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2931 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2932 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2933 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2934 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2935 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2936 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2937 2938 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2939 { 2940 return float32_add(a, float16_to_float32(b, true, s), s); 2941 } 2942 2943 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2944 { 2945 return float64_add(a, float32_to_float64(b, s), s); 2946 } 2947 2948 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2949 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2950 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2951 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2952 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2953 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2954 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2955 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2956 2957 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2958 { 2959 return float32_sub(a, float16_to_float32(b, true, s), s); 2960 } 2961 2962 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2963 { 2964 return float64_sub(a, float32_to_float64(b, s), s); 2965 } 2966 2967 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2968 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2969 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2970 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2971 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2972 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2973 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2974 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2975 2976 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2977 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2978 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2979 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2980 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2981 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2982 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2983 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2984 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2985 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2986 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2987 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2988 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2989 2990 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2991 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2992 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2993 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 2994 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 2995 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 2996 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 2997 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 2998 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 2999 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3000 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3001 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3002 3003 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3004 { 3005 return float16_div(b, a, s); 3006 } 3007 3008 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3009 { 3010 return float32_div(b, a, s); 3011 } 3012 3013 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3014 { 3015 return float64_div(b, a, s); 3016 } 3017 3018 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3019 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3020 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3021 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3022 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3023 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3024 3025 /* Vector Widening Floating-Point Multiply */ 3026 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3027 { 3028 return float32_mul(float16_to_float32(a, true, s), 3029 float16_to_float32(b, true, s), s); 3030 } 3031 3032 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3033 { 3034 return float64_mul(float32_to_float64(a, s), 3035 float32_to_float64(b, s), s); 3036 3037 } 3038 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3039 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3040 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3041 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3042 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3043 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3044 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3045 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3046 3047 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3048 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3049 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3050 CPURISCVState *env) \ 3051 { \ 3052 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3053 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3054 TD d = *((TD *)vd + HD(i)); \ 3055 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3056 } 3057 3058 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3059 { 3060 return float16_muladd(a, b, d, 0, s); 3061 } 3062 3063 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3064 { 3065 return float32_muladd(a, b, d, 0, s); 3066 } 3067 3068 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3069 { 3070 return float64_muladd(a, b, d, 0, s); 3071 } 3072 3073 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3074 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3075 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3076 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3077 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3078 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3079 3080 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3081 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3082 CPURISCVState *env) \ 3083 { \ 3084 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3085 TD d = *((TD *)vd + HD(i)); \ 3086 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3087 } 3088 3089 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3090 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3091 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3092 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3093 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3094 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3095 3096 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3097 { 3098 return float16_muladd(a, b, d, 3099 float_muladd_negate_c | float_muladd_negate_product, s); 3100 } 3101 3102 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3103 { 3104 return float32_muladd(a, b, d, 3105 float_muladd_negate_c | float_muladd_negate_product, s); 3106 } 3107 3108 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3109 { 3110 return float64_muladd(a, b, d, 3111 float_muladd_negate_c | float_muladd_negate_product, s); 3112 } 3113 3114 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3115 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3116 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3117 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3118 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3119 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3120 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3121 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3122 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3123 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3124 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3125 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3126 3127 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3128 { 3129 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3130 } 3131 3132 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3133 { 3134 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3135 } 3136 3137 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3138 { 3139 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3140 } 3141 3142 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3143 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3144 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3145 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3146 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3147 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3148 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3149 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3150 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3151 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3152 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3153 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3154 3155 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3156 { 3157 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3158 } 3159 3160 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3161 { 3162 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3163 } 3164 3165 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3166 { 3167 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3168 } 3169 3170 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3171 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3172 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3173 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3174 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3175 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3176 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3177 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3178 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3179 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3180 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3181 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3182 3183 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3184 { 3185 return float16_muladd(d, b, a, 0, s); 3186 } 3187 3188 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3189 { 3190 return float32_muladd(d, b, a, 0, s); 3191 } 3192 3193 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3194 { 3195 return float64_muladd(d, b, a, 0, s); 3196 } 3197 3198 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3199 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3200 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3201 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3202 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3203 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3204 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3205 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3206 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3207 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3208 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3209 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3210 3211 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3212 { 3213 return float16_muladd(d, b, a, 3214 float_muladd_negate_c | float_muladd_negate_product, s); 3215 } 3216 3217 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3218 { 3219 return float32_muladd(d, b, a, 3220 float_muladd_negate_c | float_muladd_negate_product, s); 3221 } 3222 3223 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3224 { 3225 return float64_muladd(d, b, a, 3226 float_muladd_negate_c | float_muladd_negate_product, s); 3227 } 3228 3229 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3230 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3231 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3232 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3233 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3234 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3235 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3236 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3237 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3238 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3239 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3240 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3241 3242 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3243 { 3244 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3245 } 3246 3247 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3248 { 3249 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3250 } 3251 3252 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3253 { 3254 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3255 } 3256 3257 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3258 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3259 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3260 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3261 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3262 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3263 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3264 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3265 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3266 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3267 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3268 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3269 3270 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3271 { 3272 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3273 } 3274 3275 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3276 { 3277 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3278 } 3279 3280 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3281 { 3282 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3283 } 3284 3285 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3286 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3287 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3288 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3289 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3290 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3291 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3292 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3293 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3294 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3295 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3296 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3297 3298 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3299 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3300 { 3301 return float32_muladd(float16_to_float32(a, true, s), 3302 float16_to_float32(b, true, s), d, 0, s); 3303 } 3304 3305 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(float32_to_float64(a, s), 3308 float32_to_float64(b, s), d, 0, s); 3309 } 3310 3311 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3312 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3313 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3314 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3315 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3316 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3317 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3318 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3319 3320 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3321 { 3322 return float32_muladd(float16_to_float32(a, true, s), 3323 float16_to_float32(b, true, s), d, 3324 float_muladd_negate_c | float_muladd_negate_product, s); 3325 } 3326 3327 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3328 { 3329 return float64_muladd(float32_to_float64(a, s), 3330 float32_to_float64(b, s), d, 3331 float_muladd_negate_c | float_muladd_negate_product, s); 3332 } 3333 3334 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3335 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3336 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3337 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3338 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3339 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3340 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3341 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3342 3343 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3344 { 3345 return float32_muladd(float16_to_float32(a, true, s), 3346 float16_to_float32(b, true, s), d, 3347 float_muladd_negate_c, s); 3348 } 3349 3350 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3351 { 3352 return float64_muladd(float32_to_float64(a, s), 3353 float32_to_float64(b, s), d, 3354 float_muladd_negate_c, s); 3355 } 3356 3357 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3358 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3359 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3360 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3361 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3362 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3363 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3364 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3365 3366 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3367 { 3368 return float32_muladd(float16_to_float32(a, true, s), 3369 float16_to_float32(b, true, s), d, 3370 float_muladd_negate_product, s); 3371 } 3372 3373 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3374 { 3375 return float64_muladd(float32_to_float64(a, s), 3376 float32_to_float64(b, s), d, 3377 float_muladd_negate_product, s); 3378 } 3379 3380 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3381 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3382 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3383 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3384 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3385 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3386 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3387 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3388 3389 /* Vector Floating-Point Square-Root Instruction */ 3390 /* (TD, T2, TX2) */ 3391 #define OP_UU_H uint16_t, uint16_t, uint16_t 3392 #define OP_UU_W uint32_t, uint32_t, uint32_t 3393 #define OP_UU_D uint64_t, uint64_t, uint64_t 3394 3395 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3396 static void do_##NAME(void *vd, void *vs2, int i, \ 3397 CPURISCVState *env) \ 3398 { \ 3399 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3400 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3401 } 3402 3403 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3404 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3405 CPURISCVState *env, uint32_t desc) \ 3406 { \ 3407 uint32_t vm = vext_vm(desc); \ 3408 uint32_t vl = env->vl; \ 3409 uint32_t i; \ 3410 \ 3411 if (vl == 0) { \ 3412 return; \ 3413 } \ 3414 for (i = env->vstart; i < vl; i++) { \ 3415 if (!vm && !vext_elem_mask(v0, i)) { \ 3416 continue; \ 3417 } \ 3418 do_##NAME(vd, vs2, i, env); \ 3419 } \ 3420 env->vstart = 0; \ 3421 } 3422 3423 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3424 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3425 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3426 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3427 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3428 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3429 3430 /* 3431 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3432 * 3433 * Adapted from riscv-v-spec recip.c: 3434 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3435 */ 3436 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3437 { 3438 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3439 uint64_t exp = extract64(f, frac_size, exp_size); 3440 uint64_t frac = extract64(f, 0, frac_size); 3441 3442 const uint8_t lookup_table[] = { 3443 52, 51, 50, 48, 47, 46, 44, 43, 3444 42, 41, 40, 39, 38, 36, 35, 34, 3445 33, 32, 31, 30, 30, 29, 28, 27, 3446 26, 25, 24, 23, 23, 22, 21, 20, 3447 19, 19, 18, 17, 16, 16, 15, 14, 3448 14, 13, 12, 12, 11, 10, 10, 9, 3449 9, 8, 7, 7, 6, 6, 5, 4, 3450 4, 3, 3, 2, 2, 1, 1, 0, 3451 127, 125, 123, 121, 119, 118, 116, 114, 3452 113, 111, 109, 108, 106, 105, 103, 102, 3453 100, 99, 97, 96, 95, 93, 92, 91, 3454 90, 88, 87, 86, 85, 84, 83, 82, 3455 80, 79, 78, 77, 76, 75, 74, 73, 3456 72, 71, 70, 70, 69, 68, 67, 66, 3457 65, 64, 63, 63, 62, 61, 60, 59, 3458 59, 58, 57, 56, 56, 55, 54, 53 3459 }; 3460 const int precision = 7; 3461 3462 if (exp == 0 && frac != 0) { /* subnormal */ 3463 /* Normalize the subnormal. */ 3464 while (extract64(frac, frac_size - 1, 1) == 0) { 3465 exp--; 3466 frac <<= 1; 3467 } 3468 3469 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3470 } 3471 3472 int idx = ((exp & 1) << (precision - 1)) | 3473 (frac >> (frac_size - precision + 1)); 3474 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3475 (frac_size - precision); 3476 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3477 3478 uint64_t val = 0; 3479 val = deposit64(val, 0, frac_size, out_frac); 3480 val = deposit64(val, frac_size, exp_size, out_exp); 3481 val = deposit64(val, frac_size + exp_size, 1, sign); 3482 return val; 3483 } 3484 3485 static float16 frsqrt7_h(float16 f, float_status *s) 3486 { 3487 int exp_size = 5, frac_size = 10; 3488 bool sign = float16_is_neg(f); 3489 3490 /* 3491 * frsqrt7(sNaN) = canonical NaN 3492 * frsqrt7(-inf) = canonical NaN 3493 * frsqrt7(-normal) = canonical NaN 3494 * frsqrt7(-subnormal) = canonical NaN 3495 */ 3496 if (float16_is_signaling_nan(f, s) || 3497 (float16_is_infinity(f) && sign) || 3498 (float16_is_normal(f) && sign) || 3499 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3500 s->float_exception_flags |= float_flag_invalid; 3501 return float16_default_nan(s); 3502 } 3503 3504 /* frsqrt7(qNaN) = canonical NaN */ 3505 if (float16_is_quiet_nan(f, s)) { 3506 return float16_default_nan(s); 3507 } 3508 3509 /* frsqrt7(+-0) = +-inf */ 3510 if (float16_is_zero(f)) { 3511 s->float_exception_flags |= float_flag_divbyzero; 3512 return float16_set_sign(float16_infinity, sign); 3513 } 3514 3515 /* frsqrt7(+inf) = +0 */ 3516 if (float16_is_infinity(f) && !sign) { 3517 return float16_set_sign(float16_zero, sign); 3518 } 3519 3520 /* +normal, +subnormal */ 3521 uint64_t val = frsqrt7(f, exp_size, frac_size); 3522 return make_float16(val); 3523 } 3524 3525 static float32 frsqrt7_s(float32 f, float_status *s) 3526 { 3527 int exp_size = 8, frac_size = 23; 3528 bool sign = float32_is_neg(f); 3529 3530 /* 3531 * frsqrt7(sNaN) = canonical NaN 3532 * frsqrt7(-inf) = canonical NaN 3533 * frsqrt7(-normal) = canonical NaN 3534 * frsqrt7(-subnormal) = canonical NaN 3535 */ 3536 if (float32_is_signaling_nan(f, s) || 3537 (float32_is_infinity(f) && sign) || 3538 (float32_is_normal(f) && sign) || 3539 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3540 s->float_exception_flags |= float_flag_invalid; 3541 return float32_default_nan(s); 3542 } 3543 3544 /* frsqrt7(qNaN) = canonical NaN */ 3545 if (float32_is_quiet_nan(f, s)) { 3546 return float32_default_nan(s); 3547 } 3548 3549 /* frsqrt7(+-0) = +-inf */ 3550 if (float32_is_zero(f)) { 3551 s->float_exception_flags |= float_flag_divbyzero; 3552 return float32_set_sign(float32_infinity, sign); 3553 } 3554 3555 /* frsqrt7(+inf) = +0 */ 3556 if (float32_is_infinity(f) && !sign) { 3557 return float32_set_sign(float32_zero, sign); 3558 } 3559 3560 /* +normal, +subnormal */ 3561 uint64_t val = frsqrt7(f, exp_size, frac_size); 3562 return make_float32(val); 3563 } 3564 3565 static float64 frsqrt7_d(float64 f, float_status *s) 3566 { 3567 int exp_size = 11, frac_size = 52; 3568 bool sign = float64_is_neg(f); 3569 3570 /* 3571 * frsqrt7(sNaN) = canonical NaN 3572 * frsqrt7(-inf) = canonical NaN 3573 * frsqrt7(-normal) = canonical NaN 3574 * frsqrt7(-subnormal) = canonical NaN 3575 */ 3576 if (float64_is_signaling_nan(f, s) || 3577 (float64_is_infinity(f) && sign) || 3578 (float64_is_normal(f) && sign) || 3579 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3580 s->float_exception_flags |= float_flag_invalid; 3581 return float64_default_nan(s); 3582 } 3583 3584 /* frsqrt7(qNaN) = canonical NaN */ 3585 if (float64_is_quiet_nan(f, s)) { 3586 return float64_default_nan(s); 3587 } 3588 3589 /* frsqrt7(+-0) = +-inf */ 3590 if (float64_is_zero(f)) { 3591 s->float_exception_flags |= float_flag_divbyzero; 3592 return float64_set_sign(float64_infinity, sign); 3593 } 3594 3595 /* frsqrt7(+inf) = +0 */ 3596 if (float64_is_infinity(f) && !sign) { 3597 return float64_set_sign(float64_zero, sign); 3598 } 3599 3600 /* +normal, +subnormal */ 3601 uint64_t val = frsqrt7(f, exp_size, frac_size); 3602 return make_float64(val); 3603 } 3604 3605 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3606 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3607 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3608 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) 3609 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) 3610 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) 3611 3612 /* 3613 * Vector Floating-Point Reciprocal Estimate Instruction 3614 * 3615 * Adapted from riscv-v-spec recip.c: 3616 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3617 */ 3618 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3619 float_status *s) 3620 { 3621 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3622 uint64_t exp = extract64(f, frac_size, exp_size); 3623 uint64_t frac = extract64(f, 0, frac_size); 3624 3625 const uint8_t lookup_table[] = { 3626 127, 125, 123, 121, 119, 117, 116, 114, 3627 112, 110, 109, 107, 105, 104, 102, 100, 3628 99, 97, 96, 94, 93, 91, 90, 88, 3629 87, 85, 84, 83, 81, 80, 79, 77, 3630 76, 75, 74, 72, 71, 70, 69, 68, 3631 66, 65, 64, 63, 62, 61, 60, 59, 3632 58, 57, 56, 55, 54, 53, 52, 51, 3633 50, 49, 48, 47, 46, 45, 44, 43, 3634 42, 41, 40, 40, 39, 38, 37, 36, 3635 35, 35, 34, 33, 32, 31, 31, 30, 3636 29, 28, 28, 27, 26, 25, 25, 24, 3637 23, 23, 22, 21, 21, 20, 19, 19, 3638 18, 17, 17, 16, 15, 15, 14, 14, 3639 13, 12, 12, 11, 11, 10, 9, 9, 3640 8, 8, 7, 7, 6, 5, 5, 4, 3641 4, 3, 3, 2, 2, 1, 1, 0 3642 }; 3643 const int precision = 7; 3644 3645 if (exp == 0 && frac != 0) { /* subnormal */ 3646 /* Normalize the subnormal. */ 3647 while (extract64(frac, frac_size - 1, 1) == 0) { 3648 exp--; 3649 frac <<= 1; 3650 } 3651 3652 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3653 3654 if (exp != 0 && exp != UINT64_MAX) { 3655 /* 3656 * Overflow to inf or max value of same sign, 3657 * depending on sign and rounding mode. 3658 */ 3659 s->float_exception_flags |= (float_flag_inexact | 3660 float_flag_overflow); 3661 3662 if ((s->float_rounding_mode == float_round_to_zero) || 3663 ((s->float_rounding_mode == float_round_down) && !sign) || 3664 ((s->float_rounding_mode == float_round_up) && sign)) { 3665 /* Return greatest/negative finite value. */ 3666 return (sign << (exp_size + frac_size)) | 3667 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3668 } else { 3669 /* Return +-inf. */ 3670 return (sign << (exp_size + frac_size)) | 3671 MAKE_64BIT_MASK(frac_size, exp_size); 3672 } 3673 } 3674 } 3675 3676 int idx = frac >> (frac_size - precision); 3677 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3678 (frac_size - precision); 3679 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3680 3681 if (out_exp == 0 || out_exp == UINT64_MAX) { 3682 /* 3683 * The result is subnormal, but don't raise the underflow exception, 3684 * because there's no additional loss of precision. 3685 */ 3686 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3687 if (out_exp == UINT64_MAX) { 3688 out_frac >>= 1; 3689 out_exp = 0; 3690 } 3691 } 3692 3693 uint64_t val = 0; 3694 val = deposit64(val, 0, frac_size, out_frac); 3695 val = deposit64(val, frac_size, exp_size, out_exp); 3696 val = deposit64(val, frac_size + exp_size, 1, sign); 3697 return val; 3698 } 3699 3700 static float16 frec7_h(float16 f, float_status *s) 3701 { 3702 int exp_size = 5, frac_size = 10; 3703 bool sign = float16_is_neg(f); 3704 3705 /* frec7(+-inf) = +-0 */ 3706 if (float16_is_infinity(f)) { 3707 return float16_set_sign(float16_zero, sign); 3708 } 3709 3710 /* frec7(+-0) = +-inf */ 3711 if (float16_is_zero(f)) { 3712 s->float_exception_flags |= float_flag_divbyzero; 3713 return float16_set_sign(float16_infinity, sign); 3714 } 3715 3716 /* frec7(sNaN) = canonical NaN */ 3717 if (float16_is_signaling_nan(f, s)) { 3718 s->float_exception_flags |= float_flag_invalid; 3719 return float16_default_nan(s); 3720 } 3721 3722 /* frec7(qNaN) = canonical NaN */ 3723 if (float16_is_quiet_nan(f, s)) { 3724 return float16_default_nan(s); 3725 } 3726 3727 /* +-normal, +-subnormal */ 3728 uint64_t val = frec7(f, exp_size, frac_size, s); 3729 return make_float16(val); 3730 } 3731 3732 static float32 frec7_s(float32 f, float_status *s) 3733 { 3734 int exp_size = 8, frac_size = 23; 3735 bool sign = float32_is_neg(f); 3736 3737 /* frec7(+-inf) = +-0 */ 3738 if (float32_is_infinity(f)) { 3739 return float32_set_sign(float32_zero, sign); 3740 } 3741 3742 /* frec7(+-0) = +-inf */ 3743 if (float32_is_zero(f)) { 3744 s->float_exception_flags |= float_flag_divbyzero; 3745 return float32_set_sign(float32_infinity, sign); 3746 } 3747 3748 /* frec7(sNaN) = canonical NaN */ 3749 if (float32_is_signaling_nan(f, s)) { 3750 s->float_exception_flags |= float_flag_invalid; 3751 return float32_default_nan(s); 3752 } 3753 3754 /* frec7(qNaN) = canonical NaN */ 3755 if (float32_is_quiet_nan(f, s)) { 3756 return float32_default_nan(s); 3757 } 3758 3759 /* +-normal, +-subnormal */ 3760 uint64_t val = frec7(f, exp_size, frac_size, s); 3761 return make_float32(val); 3762 } 3763 3764 static float64 frec7_d(float64 f, float_status *s) 3765 { 3766 int exp_size = 11, frac_size = 52; 3767 bool sign = float64_is_neg(f); 3768 3769 /* frec7(+-inf) = +-0 */ 3770 if (float64_is_infinity(f)) { 3771 return float64_set_sign(float64_zero, sign); 3772 } 3773 3774 /* frec7(+-0) = +-inf */ 3775 if (float64_is_zero(f)) { 3776 s->float_exception_flags |= float_flag_divbyzero; 3777 return float64_set_sign(float64_infinity, sign); 3778 } 3779 3780 /* frec7(sNaN) = canonical NaN */ 3781 if (float64_is_signaling_nan(f, s)) { 3782 s->float_exception_flags |= float_flag_invalid; 3783 return float64_default_nan(s); 3784 } 3785 3786 /* frec7(qNaN) = canonical NaN */ 3787 if (float64_is_quiet_nan(f, s)) { 3788 return float64_default_nan(s); 3789 } 3790 3791 /* +-normal, +-subnormal */ 3792 uint64_t val = frec7(f, exp_size, frac_size, s); 3793 return make_float64(val); 3794 } 3795 3796 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3797 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3798 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3799 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) 3800 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) 3801 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) 3802 3803 /* Vector Floating-Point MIN/MAX Instructions */ 3804 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3805 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3806 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3807 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3808 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3809 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3810 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3811 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3812 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3813 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3814 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3815 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3816 3817 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3818 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3819 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3820 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3821 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3822 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3823 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3824 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3825 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3826 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3827 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3828 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3829 3830 /* Vector Floating-Point Sign-Injection Instructions */ 3831 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3832 { 3833 return deposit64(b, 0, 15, a); 3834 } 3835 3836 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3837 { 3838 return deposit64(b, 0, 31, a); 3839 } 3840 3841 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3842 { 3843 return deposit64(b, 0, 63, a); 3844 } 3845 3846 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3847 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3848 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3849 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3850 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3851 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3852 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3853 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3854 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3855 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3856 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3857 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3858 3859 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3860 { 3861 return deposit64(~b, 0, 15, a); 3862 } 3863 3864 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3865 { 3866 return deposit64(~b, 0, 31, a); 3867 } 3868 3869 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3870 { 3871 return deposit64(~b, 0, 63, a); 3872 } 3873 3874 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3875 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3876 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3877 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3878 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3879 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3880 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3881 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3882 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3883 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3884 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3885 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3886 3887 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3888 { 3889 return deposit64(b ^ a, 0, 15, a); 3890 } 3891 3892 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3893 { 3894 return deposit64(b ^ a, 0, 31, a); 3895 } 3896 3897 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3898 { 3899 return deposit64(b ^ a, 0, 63, a); 3900 } 3901 3902 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3903 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3904 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3905 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3906 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3907 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3908 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3909 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3910 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3911 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3912 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3913 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3914 3915 /* Vector Floating-Point Compare Instructions */ 3916 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3917 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3918 CPURISCVState *env, uint32_t desc) \ 3919 { \ 3920 uint32_t vm = vext_vm(desc); \ 3921 uint32_t vl = env->vl; \ 3922 uint32_t i; \ 3923 \ 3924 for (i = env->vstart; i < vl; i++) { \ 3925 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3926 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3927 if (!vm && !vext_elem_mask(v0, i)) { \ 3928 continue; \ 3929 } \ 3930 vext_set_elem_mask(vd, i, \ 3931 DO_OP(s2, s1, &env->fp_status)); \ 3932 } \ 3933 env->vstart = 0; \ 3934 } 3935 3936 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3937 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3938 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3939 3940 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3941 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3942 CPURISCVState *env, uint32_t desc) \ 3943 { \ 3944 uint32_t vm = vext_vm(desc); \ 3945 uint32_t vl = env->vl; \ 3946 uint32_t i; \ 3947 \ 3948 for (i = env->vstart; i < vl; i++) { \ 3949 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3950 if (!vm && !vext_elem_mask(v0, i)) { \ 3951 continue; \ 3952 } \ 3953 vext_set_elem_mask(vd, i, \ 3954 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3955 } \ 3956 env->vstart = 0; \ 3957 } 3958 3959 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3960 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3961 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3962 3963 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3964 { 3965 FloatRelation compare = float16_compare_quiet(a, b, s); 3966 return compare != float_relation_equal; 3967 } 3968 3969 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3970 { 3971 FloatRelation compare = float32_compare_quiet(a, b, s); 3972 return compare != float_relation_equal; 3973 } 3974 3975 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3976 { 3977 FloatRelation compare = float64_compare_quiet(a, b, s); 3978 return compare != float_relation_equal; 3979 } 3980 3981 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3982 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3983 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3984 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3985 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3986 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3987 3988 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3989 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3990 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3991 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3992 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3993 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3994 3995 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3996 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3997 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3998 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3999 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4000 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4001 4002 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4003 { 4004 FloatRelation compare = float16_compare(a, b, s); 4005 return compare == float_relation_greater; 4006 } 4007 4008 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4009 { 4010 FloatRelation compare = float32_compare(a, b, s); 4011 return compare == float_relation_greater; 4012 } 4013 4014 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4015 { 4016 FloatRelation compare = float64_compare(a, b, s); 4017 return compare == float_relation_greater; 4018 } 4019 4020 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4021 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4022 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4023 4024 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4025 { 4026 FloatRelation compare = float16_compare(a, b, s); 4027 return compare == float_relation_greater || 4028 compare == float_relation_equal; 4029 } 4030 4031 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4032 { 4033 FloatRelation compare = float32_compare(a, b, s); 4034 return compare == float_relation_greater || 4035 compare == float_relation_equal; 4036 } 4037 4038 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4039 { 4040 FloatRelation compare = float64_compare(a, b, s); 4041 return compare == float_relation_greater || 4042 compare == float_relation_equal; 4043 } 4044 4045 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4046 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4047 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4048 4049 /* Vector Floating-Point Classify Instruction */ 4050 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4051 static void do_##NAME(void *vd, void *vs2, int i) \ 4052 { \ 4053 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4054 *((TD *)vd + HD(i)) = OP(s2); \ 4055 } 4056 4057 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 4058 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4059 CPURISCVState *env, uint32_t desc) \ 4060 { \ 4061 uint32_t vm = vext_vm(desc); \ 4062 uint32_t vl = env->vl; \ 4063 uint32_t i; \ 4064 \ 4065 for (i = env->vstart; i < vl; i++) { \ 4066 if (!vm && !vext_elem_mask(v0, i)) { \ 4067 continue; \ 4068 } \ 4069 do_##NAME(vd, vs2, i); \ 4070 } \ 4071 env->vstart = 0; \ 4072 } 4073 4074 target_ulong fclass_h(uint64_t frs1) 4075 { 4076 float16 f = frs1; 4077 bool sign = float16_is_neg(f); 4078 4079 if (float16_is_infinity(f)) { 4080 return sign ? 1 << 0 : 1 << 7; 4081 } else if (float16_is_zero(f)) { 4082 return sign ? 1 << 3 : 1 << 4; 4083 } else if (float16_is_zero_or_denormal(f)) { 4084 return sign ? 1 << 2 : 1 << 5; 4085 } else if (float16_is_any_nan(f)) { 4086 float_status s = { }; /* for snan_bit_is_one */ 4087 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4088 } else { 4089 return sign ? 1 << 1 : 1 << 6; 4090 } 4091 } 4092 4093 target_ulong fclass_s(uint64_t frs1) 4094 { 4095 float32 f = frs1; 4096 bool sign = float32_is_neg(f); 4097 4098 if (float32_is_infinity(f)) { 4099 return sign ? 1 << 0 : 1 << 7; 4100 } else if (float32_is_zero(f)) { 4101 return sign ? 1 << 3 : 1 << 4; 4102 } else if (float32_is_zero_or_denormal(f)) { 4103 return sign ? 1 << 2 : 1 << 5; 4104 } else if (float32_is_any_nan(f)) { 4105 float_status s = { }; /* for snan_bit_is_one */ 4106 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4107 } else { 4108 return sign ? 1 << 1 : 1 << 6; 4109 } 4110 } 4111 4112 target_ulong fclass_d(uint64_t frs1) 4113 { 4114 float64 f = frs1; 4115 bool sign = float64_is_neg(f); 4116 4117 if (float64_is_infinity(f)) { 4118 return sign ? 1 << 0 : 1 << 7; 4119 } else if (float64_is_zero(f)) { 4120 return sign ? 1 << 3 : 1 << 4; 4121 } else if (float64_is_zero_or_denormal(f)) { 4122 return sign ? 1 << 2 : 1 << 5; 4123 } else if (float64_is_any_nan(f)) { 4124 float_status s = { }; /* for snan_bit_is_one */ 4125 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4126 } else { 4127 return sign ? 1 << 1 : 1 << 6; 4128 } 4129 } 4130 4131 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4132 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4133 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4134 GEN_VEXT_V(vfclass_v_h, 2, 2) 4135 GEN_VEXT_V(vfclass_v_w, 4, 4) 4136 GEN_VEXT_V(vfclass_v_d, 8, 8) 4137 4138 /* Vector Floating-Point Merge Instruction */ 4139 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4140 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4141 CPURISCVState *env, uint32_t desc) \ 4142 { \ 4143 uint32_t vm = vext_vm(desc); \ 4144 uint32_t vl = env->vl; \ 4145 uint32_t i; \ 4146 \ 4147 for (i = env->vstart; i < vl; i++) { \ 4148 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4149 *((ETYPE *)vd + H(i)) \ 4150 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4151 } \ 4152 env->vstart = 0; \ 4153 } 4154 4155 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4156 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4157 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4158 4159 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4160 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4161 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4162 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4163 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4164 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 4165 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 4166 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 4167 4168 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4169 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4170 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4171 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4172 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 4173 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 4174 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 4175 4176 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4177 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4178 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4179 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4180 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 4181 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 4182 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 4183 4184 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4185 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4186 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4187 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4188 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4189 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4190 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4191 4192 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4193 /* (TD, T2, TX2) */ 4194 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4195 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4196 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4197 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4198 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4199 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4200 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4201 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4202 4203 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4204 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4205 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4206 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4207 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4208 4209 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4210 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4211 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4212 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4213 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 4214 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4215 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4216 4217 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4218 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4219 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4220 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4221 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 4222 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4223 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4224 4225 /* 4226 * vfwcvt.f.f.v vd, vs2, vm 4227 * Convert single-width float to double-width float. 4228 */ 4229 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4230 { 4231 return float16_to_float32(a, true, s); 4232 } 4233 4234 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4235 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4236 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4237 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4238 4239 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4240 /* (TD, T2, TX2) */ 4241 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4242 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4243 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4244 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4245 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4246 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4247 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4248 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 4249 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 4250 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 4251 4252 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4253 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4254 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4255 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4256 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 4257 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 4258 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 4259 4260 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4261 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4262 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4263 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 4264 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 4265 4266 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4267 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4268 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4269 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 4270 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 4271 4272 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4273 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4274 { 4275 return float32_to_float16(a, true, s); 4276 } 4277 4278 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4279 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4280 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 4281 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 4282 4283 /* 4284 *** Vector Reduction Operations 4285 */ 4286 /* Vector Single-Width Integer Reduction Instructions */ 4287 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4288 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4289 void *vs2, CPURISCVState *env, uint32_t desc) \ 4290 { \ 4291 uint32_t vm = vext_vm(desc); \ 4292 uint32_t vl = env->vl; \ 4293 uint32_t i; \ 4294 TD s1 = *((TD *)vs1 + HD(0)); \ 4295 \ 4296 for (i = env->vstart; i < vl; i++) { \ 4297 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4298 if (!vm && !vext_elem_mask(v0, i)) { \ 4299 continue; \ 4300 } \ 4301 s1 = OP(s1, (TD)s2); \ 4302 } \ 4303 *((TD *)vd + HD(0)) = s1; \ 4304 env->vstart = 0; \ 4305 } 4306 4307 /* vd[0] = sum(vs1[0], vs2[*]) */ 4308 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4309 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4310 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4311 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4312 4313 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4314 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4315 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4316 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4317 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4318 4319 /* vd[0] = max(vs1[0], vs2[*]) */ 4320 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4321 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4322 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4323 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4324 4325 /* vd[0] = minu(vs1[0], vs2[*]) */ 4326 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4327 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4328 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4329 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4330 4331 /* vd[0] = min(vs1[0], vs2[*]) */ 4332 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4333 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4334 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4335 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4336 4337 /* vd[0] = and(vs1[0], vs2[*]) */ 4338 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4339 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4340 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4341 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4342 4343 /* vd[0] = or(vs1[0], vs2[*]) */ 4344 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4345 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4346 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4347 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4348 4349 /* vd[0] = xor(vs1[0], vs2[*]) */ 4350 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4351 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4352 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4353 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4354 4355 /* Vector Widening Integer Reduction Instructions */ 4356 /* signed sum reduction into double-width accumulator */ 4357 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4358 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4359 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4360 4361 /* Unsigned sum reduction into double-width accumulator */ 4362 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4363 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4364 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4365 4366 /* Vector Single-Width Floating-Point Reduction Instructions */ 4367 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4368 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4369 void *vs2, CPURISCVState *env, \ 4370 uint32_t desc) \ 4371 { \ 4372 uint32_t vm = vext_vm(desc); \ 4373 uint32_t vl = env->vl; \ 4374 uint32_t i; \ 4375 TD s1 = *((TD *)vs1 + HD(0)); \ 4376 \ 4377 for (i = env->vstart; i < vl; i++) { \ 4378 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4379 if (!vm && !vext_elem_mask(v0, i)) { \ 4380 continue; \ 4381 } \ 4382 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4383 } \ 4384 *((TD *)vd + HD(0)) = s1; \ 4385 env->vstart = 0; \ 4386 } 4387 4388 /* Unordered sum */ 4389 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4390 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4391 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4392 4393 /* Maximum value */ 4394 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4395 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4396 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4397 4398 /* Minimum value */ 4399 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4400 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4401 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4402 4403 /* Vector Widening Floating-Point Reduction Instructions */ 4404 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4405 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4406 void *vs2, CPURISCVState *env, uint32_t desc) 4407 { 4408 uint32_t vm = vext_vm(desc); 4409 uint32_t vl = env->vl; 4410 uint32_t i; 4411 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4412 4413 for (i = env->vstart; i < vl; i++) { 4414 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4415 if (!vm && !vext_elem_mask(v0, i)) { 4416 continue; 4417 } 4418 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4419 &env->fp_status); 4420 } 4421 *((uint32_t *)vd + H4(0)) = s1; 4422 env->vstart = 0; 4423 } 4424 4425 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4426 void *vs2, CPURISCVState *env, uint32_t desc) 4427 { 4428 uint32_t vm = vext_vm(desc); 4429 uint32_t vl = env->vl; 4430 uint32_t i; 4431 uint64_t s1 = *((uint64_t *)vs1); 4432 4433 for (i = env->vstart; i < vl; i++) { 4434 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4435 if (!vm && !vext_elem_mask(v0, i)) { 4436 continue; 4437 } 4438 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4439 &env->fp_status); 4440 } 4441 *((uint64_t *)vd) = s1; 4442 env->vstart = 0; 4443 } 4444 4445 /* 4446 *** Vector Mask Operations 4447 */ 4448 /* Vector Mask-Register Logical Instructions */ 4449 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4450 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4451 void *vs2, CPURISCVState *env, \ 4452 uint32_t desc) \ 4453 { \ 4454 uint32_t vl = env->vl; \ 4455 uint32_t i; \ 4456 int a, b; \ 4457 \ 4458 for (i = env->vstart; i < vl; i++) { \ 4459 a = vext_elem_mask(vs1, i); \ 4460 b = vext_elem_mask(vs2, i); \ 4461 vext_set_elem_mask(vd, i, OP(b, a)); \ 4462 } \ 4463 env->vstart = 0; \ 4464 } 4465 4466 #define DO_NAND(N, M) (!(N & M)) 4467 #define DO_ANDNOT(N, M) (N & !M) 4468 #define DO_NOR(N, M) (!(N | M)) 4469 #define DO_ORNOT(N, M) (N | !M) 4470 #define DO_XNOR(N, M) (!(N ^ M)) 4471 4472 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4473 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4474 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4475 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4476 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4477 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4478 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4479 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4480 4481 /* Vector count population in mask vcpop */ 4482 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4483 uint32_t desc) 4484 { 4485 target_ulong cnt = 0; 4486 uint32_t vm = vext_vm(desc); 4487 uint32_t vl = env->vl; 4488 int i; 4489 4490 for (i = env->vstart; i < vl; i++) { 4491 if (vm || vext_elem_mask(v0, i)) { 4492 if (vext_elem_mask(vs2, i)) { 4493 cnt++; 4494 } 4495 } 4496 } 4497 env->vstart = 0; 4498 return cnt; 4499 } 4500 4501 /* vfirst find-first-set mask bit*/ 4502 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4503 uint32_t desc) 4504 { 4505 uint32_t vm = vext_vm(desc); 4506 uint32_t vl = env->vl; 4507 int i; 4508 4509 for (i = env->vstart; i < vl; i++) { 4510 if (vm || vext_elem_mask(v0, i)) { 4511 if (vext_elem_mask(vs2, i)) { 4512 return i; 4513 } 4514 } 4515 } 4516 env->vstart = 0; 4517 return -1LL; 4518 } 4519 4520 enum set_mask_type { 4521 ONLY_FIRST = 1, 4522 INCLUDE_FIRST, 4523 BEFORE_FIRST, 4524 }; 4525 4526 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4527 uint32_t desc, enum set_mask_type type) 4528 { 4529 uint32_t vm = vext_vm(desc); 4530 uint32_t vl = env->vl; 4531 int i; 4532 bool first_mask_bit = false; 4533 4534 for (i = env->vstart; i < vl; i++) { 4535 if (!vm && !vext_elem_mask(v0, i)) { 4536 continue; 4537 } 4538 /* write a zero to all following active elements */ 4539 if (first_mask_bit) { 4540 vext_set_elem_mask(vd, i, 0); 4541 continue; 4542 } 4543 if (vext_elem_mask(vs2, i)) { 4544 first_mask_bit = true; 4545 if (type == BEFORE_FIRST) { 4546 vext_set_elem_mask(vd, i, 0); 4547 } else { 4548 vext_set_elem_mask(vd, i, 1); 4549 } 4550 } else { 4551 if (type == ONLY_FIRST) { 4552 vext_set_elem_mask(vd, i, 0); 4553 } else { 4554 vext_set_elem_mask(vd, i, 1); 4555 } 4556 } 4557 } 4558 env->vstart = 0; 4559 } 4560 4561 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4562 uint32_t desc) 4563 { 4564 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4565 } 4566 4567 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4568 uint32_t desc) 4569 { 4570 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4571 } 4572 4573 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4574 uint32_t desc) 4575 { 4576 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4577 } 4578 4579 /* Vector Iota Instruction */ 4580 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4581 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4582 uint32_t desc) \ 4583 { \ 4584 uint32_t vm = vext_vm(desc); \ 4585 uint32_t vl = env->vl; \ 4586 uint32_t sum = 0; \ 4587 int i; \ 4588 \ 4589 for (i = env->vstart; i < vl; i++) { \ 4590 if (!vm && !vext_elem_mask(v0, i)) { \ 4591 continue; \ 4592 } \ 4593 *((ETYPE *)vd + H(i)) = sum; \ 4594 if (vext_elem_mask(vs2, i)) { \ 4595 sum++; \ 4596 } \ 4597 } \ 4598 env->vstart = 0; \ 4599 } 4600 4601 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4602 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4603 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4604 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4605 4606 /* Vector Element Index Instruction */ 4607 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4608 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4609 { \ 4610 uint32_t vm = vext_vm(desc); \ 4611 uint32_t vl = env->vl; \ 4612 int i; \ 4613 \ 4614 for (i = env->vstart; i < vl; i++) { \ 4615 if (!vm && !vext_elem_mask(v0, i)) { \ 4616 continue; \ 4617 } \ 4618 *((ETYPE *)vd + H(i)) = i; \ 4619 } \ 4620 env->vstart = 0; \ 4621 } 4622 4623 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4624 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4625 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4626 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4627 4628 /* 4629 *** Vector Permutation Instructions 4630 */ 4631 4632 /* Vector Slide Instructions */ 4633 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4634 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4635 CPURISCVState *env, uint32_t desc) \ 4636 { \ 4637 uint32_t vm = vext_vm(desc); \ 4638 uint32_t vl = env->vl; \ 4639 target_ulong offset = s1, i_min, i; \ 4640 \ 4641 i_min = MAX(env->vstart, offset); \ 4642 for (i = i_min; i < vl; i++) { \ 4643 if (!vm && !vext_elem_mask(v0, i)) { \ 4644 continue; \ 4645 } \ 4646 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4647 } \ 4648 } 4649 4650 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4651 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4652 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4653 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4654 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4655 4656 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4657 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4658 CPURISCVState *env, uint32_t desc) \ 4659 { \ 4660 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4661 uint32_t vm = vext_vm(desc); \ 4662 uint32_t vl = env->vl; \ 4663 target_ulong i_max, i; \ 4664 \ 4665 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4666 for (i = env->vstart; i < i_max; ++i) { \ 4667 if (vm || vext_elem_mask(v0, i)) { \ 4668 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4669 } \ 4670 } \ 4671 \ 4672 for (i = i_max; i < vl; ++i) { \ 4673 if (vm || vext_elem_mask(v0, i)) { \ 4674 *((ETYPE *)vd + H(i)) = 0; \ 4675 } \ 4676 } \ 4677 \ 4678 env->vstart = 0; \ 4679 } 4680 4681 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4682 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4683 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4684 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4685 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4686 4687 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4688 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4689 CPURISCVState *env, uint32_t desc) \ 4690 { \ 4691 typedef uint##ESZ##_t ETYPE; \ 4692 uint32_t vm = vext_vm(desc); \ 4693 uint32_t vl = env->vl; \ 4694 uint32_t i; \ 4695 \ 4696 for (i = env->vstart; i < vl; i++) { \ 4697 if (!vm && !vext_elem_mask(v0, i)) { \ 4698 continue; \ 4699 } \ 4700 if (i == 0) { \ 4701 *((ETYPE *)vd + H(i)) = s1; \ 4702 } else { \ 4703 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4704 } \ 4705 } \ 4706 env->vstart = 0; \ 4707 } 4708 4709 GEN_VEXT_VSLIE1UP(8, H1) 4710 GEN_VEXT_VSLIE1UP(16, H2) 4711 GEN_VEXT_VSLIE1UP(32, H4) 4712 GEN_VEXT_VSLIE1UP(64, H8) 4713 4714 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4715 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4716 CPURISCVState *env, uint32_t desc) \ 4717 { \ 4718 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4719 } 4720 4721 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4722 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4723 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4724 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4725 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4726 4727 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4728 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4729 CPURISCVState *env, uint32_t desc) \ 4730 { \ 4731 typedef uint##ESZ##_t ETYPE; \ 4732 uint32_t vm = vext_vm(desc); \ 4733 uint32_t vl = env->vl; \ 4734 uint32_t i; \ 4735 \ 4736 for (i = env->vstart; i < vl; i++) { \ 4737 if (!vm && !vext_elem_mask(v0, i)) { \ 4738 continue; \ 4739 } \ 4740 if (i == vl - 1) { \ 4741 *((ETYPE *)vd + H(i)) = s1; \ 4742 } else { \ 4743 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4744 } \ 4745 } \ 4746 env->vstart = 0; \ 4747 } 4748 4749 GEN_VEXT_VSLIDE1DOWN(8, H1) 4750 GEN_VEXT_VSLIDE1DOWN(16, H2) 4751 GEN_VEXT_VSLIDE1DOWN(32, H4) 4752 GEN_VEXT_VSLIDE1DOWN(64, H8) 4753 4754 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4755 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4756 CPURISCVState *env, uint32_t desc) \ 4757 { \ 4758 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4759 } 4760 4761 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4762 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4763 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4764 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4765 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4766 4767 /* Vector Floating-Point Slide Instructions */ 4768 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4769 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4770 CPURISCVState *env, uint32_t desc) \ 4771 { \ 4772 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4773 } 4774 4775 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4776 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4777 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4778 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4779 4780 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4781 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4782 CPURISCVState *env, uint32_t desc) \ 4783 { \ 4784 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4785 } 4786 4787 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4788 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4789 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4790 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4791 4792 /* Vector Register Gather Instruction */ 4793 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4794 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4795 CPURISCVState *env, uint32_t desc) \ 4796 { \ 4797 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4798 uint32_t vm = vext_vm(desc); \ 4799 uint32_t vl = env->vl; \ 4800 uint64_t index; \ 4801 uint32_t i; \ 4802 \ 4803 for (i = env->vstart; i < vl; i++) { \ 4804 if (!vm && !vext_elem_mask(v0, i)) { \ 4805 continue; \ 4806 } \ 4807 index = *((TS1 *)vs1 + HS1(i)); \ 4808 if (index >= vlmax) { \ 4809 *((TS2 *)vd + HS2(i)) = 0; \ 4810 } else { \ 4811 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4812 } \ 4813 } \ 4814 env->vstart = 0; \ 4815 } 4816 4817 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4818 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4819 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4820 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4821 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4822 4823 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4824 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4825 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4826 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4827 4828 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4829 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4830 CPURISCVState *env, uint32_t desc) \ 4831 { \ 4832 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4833 uint32_t vm = vext_vm(desc); \ 4834 uint32_t vl = env->vl; \ 4835 uint64_t index = s1; \ 4836 uint32_t i; \ 4837 \ 4838 for (i = env->vstart; i < vl; i++) { \ 4839 if (!vm && !vext_elem_mask(v0, i)) { \ 4840 continue; \ 4841 } \ 4842 if (index >= vlmax) { \ 4843 *((ETYPE *)vd + H(i)) = 0; \ 4844 } else { \ 4845 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4846 } \ 4847 } \ 4848 env->vstart = 0; \ 4849 } 4850 4851 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4852 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4853 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4854 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4855 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4856 4857 /* Vector Compress Instruction */ 4858 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4859 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4860 CPURISCVState *env, uint32_t desc) \ 4861 { \ 4862 uint32_t vl = env->vl; \ 4863 uint32_t num = 0, i; \ 4864 \ 4865 for (i = env->vstart; i < vl; i++) { \ 4866 if (!vext_elem_mask(vs1, i)) { \ 4867 continue; \ 4868 } \ 4869 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4870 num++; \ 4871 } \ 4872 env->vstart = 0; \ 4873 } 4874 4875 /* Compress into vd elements of vs2 where vs1 is enabled */ 4876 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4877 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4878 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4879 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4880 4881 /* Vector Whole Register Move */ 4882 #define GEN_VEXT_VMV_WHOLE(NAME, LEN) \ 4883 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ 4884 uint32_t desc) \ 4885 { \ 4886 /* EEW = 8 */ \ 4887 uint32_t maxsz = simd_maxsz(desc); \ 4888 uint32_t i = env->vstart; \ 4889 \ 4890 memcpy((uint8_t *)vd + H1(i), \ 4891 (uint8_t *)vs2 + H1(i), \ 4892 maxsz - env->vstart); \ 4893 \ 4894 env->vstart = 0; \ 4895 } 4896 4897 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1) 4898 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2) 4899 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4) 4900 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8) 4901 4902 /* Vector Integer Extension */ 4903 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4904 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4905 CPURISCVState *env, uint32_t desc) \ 4906 { \ 4907 uint32_t vl = env->vl; \ 4908 uint32_t vm = vext_vm(desc); \ 4909 uint32_t i; \ 4910 \ 4911 for (i = env->vstart; i < vl; i++) { \ 4912 if (!vm && !vext_elem_mask(v0, i)) { \ 4913 continue; \ 4914 } \ 4915 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4916 } \ 4917 env->vstart = 0; \ 4918 } 4919 4920 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4921 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4922 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4923 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4924 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4925 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4926 4927 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4928 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4929 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4930 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4931 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4932 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4933