1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "internals.h" 27 #include <math.h> 28 29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 30 target_ulong s2) 31 { 32 int vlmax, vl; 33 RISCVCPU *cpu = env_archcpu(env); 34 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 35 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 36 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 37 bool vill = FIELD_EX64(s2, VTYPE, VILL); 38 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 39 40 if (lmul & 4) { 41 /* Fractional LMUL. */ 42 if (lmul == 4 || 43 cpu->cfg.elen >> (8 - lmul) < sew) { 44 vill = true; 45 } 46 } 47 48 if ((sew > cpu->cfg.elen) 49 || vill 50 || (ediv != 0) 51 || (reserved != 0)) { 52 /* only set vill bit. */ 53 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 54 env->vl = 0; 55 env->vstart = 0; 56 return 0; 57 } 58 59 vlmax = vext_get_vlmax(cpu, s2); 60 if (s1 <= vlmax) { 61 vl = s1; 62 } else { 63 vl = vlmax; 64 } 65 env->vl = vl; 66 env->vtype = s2; 67 env->vstart = 0; 68 return vl; 69 } 70 71 /* 72 * Note that vector data is stored in host-endian 64-bit chunks, 73 * so addressing units smaller than that needs a host-endian fixup. 74 */ 75 #ifdef HOST_WORDS_BIGENDIAN 76 #define H1(x) ((x) ^ 7) 77 #define H1_2(x) ((x) ^ 6) 78 #define H1_4(x) ((x) ^ 4) 79 #define H2(x) ((x) ^ 3) 80 #define H4(x) ((x) ^ 1) 81 #define H8(x) ((x)) 82 #else 83 #define H1(x) (x) 84 #define H1_2(x) (x) 85 #define H1_4(x) (x) 86 #define H2(x) (x) 87 #define H4(x) (x) 88 #define H8(x) (x) 89 #endif 90 91 static inline uint32_t vext_nf(uint32_t desc) 92 { 93 return FIELD_EX32(simd_data(desc), VDATA, NF); 94 } 95 96 static inline uint32_t vext_vm(uint32_t desc) 97 { 98 return FIELD_EX32(simd_data(desc), VDATA, VM); 99 } 100 101 /* 102 * Encode LMUL to lmul as following: 103 * LMUL vlmul lmul 104 * 1 000 0 105 * 2 001 1 106 * 4 010 2 107 * 8 011 3 108 * - 100 - 109 * 1/8 101 -3 110 * 1/4 110 -2 111 * 1/2 111 -1 112 */ 113 static inline int32_t vext_lmul(uint32_t desc) 114 { 115 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 116 } 117 118 /* 119 * Get vector group length in bytes. Its range is [64, 2048]. 120 * 121 * As simd_desc support at most 256, the max vlen is 512 bits. 122 * So vlen in bytes is encoded as maxsz. 123 */ 124 static inline uint32_t vext_maxsz(uint32_t desc) 125 { 126 return simd_maxsz(desc) << vext_lmul(desc); 127 } 128 129 /* 130 * This function checks watchpoint before real load operation. 131 * 132 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 133 * In user mode, there is no watchpoint support now. 134 * 135 * It will trigger an exception if there is no mapping in TLB 136 * and page table walk can't fill the TLB entry. Then the guest 137 * software can return here after process the exception or never return. 138 */ 139 static void probe_pages(CPURISCVState *env, target_ulong addr, 140 target_ulong len, uintptr_t ra, 141 MMUAccessType access_type) 142 { 143 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 144 target_ulong curlen = MIN(pagelen, len); 145 146 probe_access(env, addr, curlen, access_type, 147 cpu_mmu_index(env, false), ra); 148 if (len > curlen) { 149 addr += curlen; 150 curlen = len - curlen; 151 probe_access(env, addr, curlen, access_type, 152 cpu_mmu_index(env, false), ra); 153 } 154 } 155 156 static inline void vext_set_elem_mask(void *v0, int index, 157 uint8_t value) 158 { 159 int idx = index / 64; 160 int pos = index % 64; 161 uint64_t old = ((uint64_t *)v0)[idx]; 162 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 163 } 164 165 /* 166 * Earlier designs (pre-0.9) had a varying number of bits 167 * per mask value (MLEN). In the 0.9 design, MLEN=1. 168 * (Section 4.5) 169 */ 170 static inline int vext_elem_mask(void *v0, int index) 171 { 172 int idx = index / 64; 173 int pos = index % 64; 174 return (((uint64_t *)v0)[idx] >> pos) & 1; 175 } 176 177 /* elements operations for load and store */ 178 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 179 uint32_t idx, void *vd, uintptr_t retaddr); 180 181 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 182 static void NAME(CPURISCVState *env, abi_ptr addr, \ 183 uint32_t idx, void *vd, uintptr_t retaddr)\ 184 { \ 185 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 186 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 187 } \ 188 189 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 190 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 191 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 192 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 193 194 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 195 static void NAME(CPURISCVState *env, abi_ptr addr, \ 196 uint32_t idx, void *vd, uintptr_t retaddr)\ 197 { \ 198 ETYPE data = *((ETYPE *)vd + H(idx)); \ 199 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 200 } 201 202 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 203 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 204 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 205 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 206 207 /* 208 *** stride: access vector element from strided memory 209 */ 210 static void 211 vext_ldst_stride(void *vd, void *v0, target_ulong base, 212 target_ulong stride, CPURISCVState *env, 213 uint32_t desc, uint32_t vm, 214 vext_ldst_elem_fn *ldst_elem, 215 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 216 { 217 uint32_t i, k; 218 uint32_t nf = vext_nf(desc); 219 uint32_t vlmax = vext_maxsz(desc) / esz; 220 221 /* probe every access*/ 222 for (i = 0; i < env->vl; i++) { 223 if (!vm && !vext_elem_mask(v0, i)) { 224 continue; 225 } 226 probe_pages(env, base + stride * i, nf * esz, ra, access_type); 227 } 228 /* do real access */ 229 for (i = 0; i < env->vl; i++) { 230 k = 0; 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 while (k < nf) { 235 target_ulong addr = base + stride * i + k * esz; 236 ldst_elem(env, addr, i + k * vlmax, vd, ra); 237 k++; 238 } 239 } 240 } 241 242 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 243 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 244 target_ulong stride, CPURISCVState *env, \ 245 uint32_t desc) \ 246 { \ 247 uint32_t vm = vext_vm(desc); \ 248 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 249 sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 250 } 251 252 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 253 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 254 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 255 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 256 257 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 258 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 259 target_ulong stride, CPURISCVState *env, \ 260 uint32_t desc) \ 261 { \ 262 uint32_t vm = vext_vm(desc); \ 263 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 264 sizeof(ETYPE), GETPC(), MMU_DATA_STORE); \ 265 } 266 267 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 268 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 269 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 270 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 271 272 /* 273 *** unit-stride: access elements stored contiguously in memory 274 */ 275 276 /* unmasked unit-stride load and store operation*/ 277 static void 278 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 279 vext_ldst_elem_fn *ldst_elem, 280 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 281 { 282 uint32_t i, k; 283 uint32_t nf = vext_nf(desc); 284 uint32_t vlmax = vext_maxsz(desc) / esz; 285 286 /* probe every access */ 287 probe_pages(env, base, env->vl * nf * esz, ra, access_type); 288 /* load bytes from guest memory */ 289 for (i = 0; i < env->vl; i++) { 290 k = 0; 291 while (k < nf) { 292 target_ulong addr = base + (i * nf + k) * esz; 293 ldst_elem(env, addr, i + k * vlmax, vd, ra); 294 k++; 295 } 296 } 297 } 298 299 /* 300 * masked unit-stride load and store operation will be a special case of stride, 301 * stride = NF * sizeof (MTYPE) 302 */ 303 304 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 305 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 306 CPURISCVState *env, uint32_t desc) \ 307 { \ 308 uint32_t stride = vext_nf(desc) * sizeof(ETYPE); \ 309 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 310 sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 311 } \ 312 \ 313 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 317 sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 318 } 319 320 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 321 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 322 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 323 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 324 325 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 326 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 327 CPURISCVState *env, uint32_t desc) \ 328 { \ 329 uint32_t stride = vext_nf(desc) * sizeof(ETYPE); \ 330 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 331 sizeof(ETYPE), GETPC(), MMU_DATA_STORE); \ 332 } \ 333 \ 334 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 335 CPURISCVState *env, uint32_t desc) \ 336 { \ 337 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 338 sizeof(ETYPE), GETPC(), MMU_DATA_STORE); \ 339 } 340 341 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 342 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 343 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 344 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 345 346 /* 347 *** index: access vector element from indexed memory 348 */ 349 typedef target_ulong vext_get_index_addr(target_ulong base, 350 uint32_t idx, void *vs2); 351 352 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 353 static target_ulong NAME(target_ulong base, \ 354 uint32_t idx, void *vs2) \ 355 { \ 356 return (base + *((ETYPE *)vs2 + H(idx))); \ 357 } 358 359 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 360 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 361 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 362 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 363 364 static inline void 365 vext_ldst_index(void *vd, void *v0, target_ulong base, 366 void *vs2, CPURISCVState *env, uint32_t desc, 367 vext_get_index_addr get_index_addr, 368 vext_ldst_elem_fn *ldst_elem, 369 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 370 { 371 uint32_t i, k; 372 uint32_t nf = vext_nf(desc); 373 uint32_t vm = vext_vm(desc); 374 uint32_t vlmax = vext_maxsz(desc) / esz; 375 376 /* probe every access*/ 377 for (i = 0; i < env->vl; i++) { 378 if (!vm && !vext_elem_mask(v0, i)) { 379 continue; 380 } 381 probe_pages(env, get_index_addr(base, i, vs2), nf * esz, ra, 382 access_type); 383 } 384 /* load bytes from guest memory */ 385 for (i = 0; i < env->vl; i++) { 386 k = 0; 387 if (!vm && !vext_elem_mask(v0, i)) { 388 continue; 389 } 390 while (k < nf) { 391 abi_ptr addr = get_index_addr(base, i, vs2) + k * esz; 392 ldst_elem(env, addr, i + k * vlmax, vd, ra); 393 k++; 394 } 395 } 396 } 397 398 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 399 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 400 void *vs2, CPURISCVState *env, uint32_t desc) \ 401 { \ 402 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 403 LOAD_FN, sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 404 } 405 406 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 407 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 408 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 409 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 410 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 411 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 412 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 413 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 414 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 418 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 419 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 420 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 421 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 422 423 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 424 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 425 void *vs2, CPURISCVState *env, uint32_t desc) \ 426 { \ 427 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 428 STORE_FN, sizeof(ETYPE), \ 429 GETPC(), MMU_DATA_STORE); \ 430 } 431 432 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 433 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 434 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 435 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 436 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 437 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 438 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 439 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 440 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 444 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 445 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 446 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 447 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 448 449 /* 450 *** unit-stride fault-only-fisrt load instructions 451 */ 452 static inline void 453 vext_ldff(void *vd, void *v0, target_ulong base, 454 CPURISCVState *env, uint32_t desc, 455 vext_ldst_elem_fn *ldst_elem, 456 uint32_t esz, uintptr_t ra) 457 { 458 void *host; 459 uint32_t i, k, vl = 0; 460 uint32_t nf = vext_nf(desc); 461 uint32_t vm = vext_vm(desc); 462 uint32_t vlmax = vext_maxsz(desc) / esz; 463 target_ulong addr, offset, remain; 464 465 /* probe every access*/ 466 for (i = 0; i < env->vl; i++) { 467 if (!vm && !vext_elem_mask(v0, i)) { 468 continue; 469 } 470 addr = base + nf * i * esz; 471 if (i == 0) { 472 probe_pages(env, addr, nf * esz, ra, MMU_DATA_LOAD); 473 } else { 474 /* if it triggers an exception, no need to check watchpoint */ 475 remain = nf * esz; 476 while (remain > 0) { 477 offset = -(addr | TARGET_PAGE_MASK); 478 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 479 cpu_mmu_index(env, false)); 480 if (host) { 481 #ifdef CONFIG_USER_ONLY 482 if (page_check_range(addr, nf * esz, PAGE_READ) < 0) { 483 vl = i; 484 goto ProbeSuccess; 485 } 486 #else 487 probe_pages(env, addr, nf * esz, ra, MMU_DATA_LOAD); 488 #endif 489 } else { 490 vl = i; 491 goto ProbeSuccess; 492 } 493 if (remain <= offset) { 494 break; 495 } 496 remain -= offset; 497 addr += offset; 498 } 499 } 500 } 501 ProbeSuccess: 502 /* load bytes from guest memory */ 503 if (vl != 0) { 504 env->vl = vl; 505 } 506 for (i = 0; i < env->vl; i++) { 507 k = 0; 508 if (!vm && !vext_elem_mask(v0, i)) { 509 continue; 510 } 511 while (k < nf) { 512 target_ulong addr = base + (i * nf + k) * esz; 513 ldst_elem(env, addr, i + k * vlmax, vd, ra); 514 k++; 515 } 516 } 517 } 518 519 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 520 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 521 CPURISCVState *env, uint32_t desc) \ 522 { \ 523 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 524 sizeof(ETYPE), GETPC()); \ 525 } 526 527 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 528 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 529 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 530 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 531 532 #define DO_SWAP(N, M) (M) 533 #define DO_AND(N, M) (N & M) 534 #define DO_XOR(N, M) (N ^ M) 535 #define DO_OR(N, M) (N | M) 536 #define DO_ADD(N, M) (N + M) 537 538 /* Signed min/max */ 539 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 540 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 541 542 /* Unsigned min/max */ 543 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 544 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 545 546 /* 547 *** Vector Integer Arithmetic Instructions 548 */ 549 550 /* expand macro args before macro */ 551 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 552 553 /* (TD, T1, T2, TX1, TX2) */ 554 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 555 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 556 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 557 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 558 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 559 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 560 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 561 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 562 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 563 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 564 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 565 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 566 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 567 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 568 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 569 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 570 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 571 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 572 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 573 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 574 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 575 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 576 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 577 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 578 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 579 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 580 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 581 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 582 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 583 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 584 585 /* operation of two vector elements */ 586 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 587 588 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 589 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 590 { \ 591 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 592 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 593 *((TD *)vd + HD(i)) = OP(s2, s1); \ 594 } 595 #define DO_SUB(N, M) (N - M) 596 #define DO_RSUB(N, M) (M - N) 597 598 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 599 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 600 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 601 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 602 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 603 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 604 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 605 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 606 607 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 608 CPURISCVState *env, uint32_t desc, 609 uint32_t esz, uint32_t dsz, 610 opivv2_fn *fn) 611 { 612 uint32_t vm = vext_vm(desc); 613 uint32_t vl = env->vl; 614 uint32_t i; 615 616 for (i = 0; i < vl; i++) { 617 if (!vm && !vext_elem_mask(v0, i)) { 618 continue; 619 } 620 fn(vd, vs1, vs2, i); 621 } 622 } 623 624 /* generate the helpers for OPIVV */ 625 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 626 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 627 void *vs2, CPURISCVState *env, \ 628 uint32_t desc) \ 629 { \ 630 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 631 do_##NAME); \ 632 } 633 634 GEN_VEXT_VV(vadd_vv_b, 1, 1) 635 GEN_VEXT_VV(vadd_vv_h, 2, 2) 636 GEN_VEXT_VV(vadd_vv_w, 4, 4) 637 GEN_VEXT_VV(vadd_vv_d, 8, 8) 638 GEN_VEXT_VV(vsub_vv_b, 1, 1) 639 GEN_VEXT_VV(vsub_vv_h, 2, 2) 640 GEN_VEXT_VV(vsub_vv_w, 4, 4) 641 GEN_VEXT_VV(vsub_vv_d, 8, 8) 642 643 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 644 645 /* 646 * (T1)s1 gives the real operator type. 647 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 648 */ 649 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 650 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 651 { \ 652 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 653 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 654 } 655 656 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 657 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 658 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 659 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 660 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 661 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 662 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 663 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 664 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 665 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 666 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 667 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 668 669 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 670 CPURISCVState *env, uint32_t desc, 671 uint32_t esz, uint32_t dsz, 672 opivx2_fn fn) 673 { 674 uint32_t vm = vext_vm(desc); 675 uint32_t vl = env->vl; 676 uint32_t i; 677 678 for (i = 0; i < vl; i++) { 679 if (!vm && !vext_elem_mask(v0, i)) { 680 continue; 681 } 682 fn(vd, s1, vs2, i); 683 } 684 } 685 686 /* generate the helpers for OPIVX */ 687 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 688 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 689 void *vs2, CPURISCVState *env, \ 690 uint32_t desc) \ 691 { \ 692 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 693 do_##NAME); \ 694 } 695 696 GEN_VEXT_VX(vadd_vx_b, 1, 1) 697 GEN_VEXT_VX(vadd_vx_h, 2, 2) 698 GEN_VEXT_VX(vadd_vx_w, 4, 4) 699 GEN_VEXT_VX(vadd_vx_d, 8, 8) 700 GEN_VEXT_VX(vsub_vx_b, 1, 1) 701 GEN_VEXT_VX(vsub_vx_h, 2, 2) 702 GEN_VEXT_VX(vsub_vx_w, 4, 4) 703 GEN_VEXT_VX(vsub_vx_d, 8, 8) 704 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 705 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 706 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 707 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 708 709 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 710 { 711 intptr_t oprsz = simd_oprsz(desc); 712 intptr_t i; 713 714 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 715 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 716 } 717 } 718 719 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 720 { 721 intptr_t oprsz = simd_oprsz(desc); 722 intptr_t i; 723 724 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 725 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 726 } 727 } 728 729 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 730 { 731 intptr_t oprsz = simd_oprsz(desc); 732 intptr_t i; 733 734 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 735 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 736 } 737 } 738 739 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 740 { 741 intptr_t oprsz = simd_oprsz(desc); 742 intptr_t i; 743 744 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 745 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 746 } 747 } 748 749 /* Vector Widening Integer Add/Subtract */ 750 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 751 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 752 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 753 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 754 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 755 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 756 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 757 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 758 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 759 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 760 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 761 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 762 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 763 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 764 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 765 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 766 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 767 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 768 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 769 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 770 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 771 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 772 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 773 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 774 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 775 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 776 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 777 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 778 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 779 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 780 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 781 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 782 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 783 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 784 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 785 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 786 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 787 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 788 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 789 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 790 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 791 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 792 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 793 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 794 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 795 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 796 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 797 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 798 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 799 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 800 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 801 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 802 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 803 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 804 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 805 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 806 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 807 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 808 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 809 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 810 811 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 812 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 813 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 814 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 815 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 816 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 817 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 818 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 819 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 820 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 821 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 822 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 823 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 824 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 825 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 826 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 827 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 828 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 829 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 830 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 831 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 832 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 833 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 834 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 835 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 836 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 837 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 838 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 839 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 840 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 841 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 842 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 843 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 844 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 845 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 846 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 847 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 848 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 849 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 850 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 851 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 852 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 853 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 854 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 855 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 856 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 857 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 858 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 859 860 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 861 #define DO_VADC(N, M, C) (N + M + C) 862 #define DO_VSBC(N, M, C) (N - M - C) 863 864 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 865 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 866 CPURISCVState *env, uint32_t desc) \ 867 { \ 868 uint32_t vl = env->vl; \ 869 uint32_t i; \ 870 \ 871 for (i = 0; i < vl; i++) { \ 872 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 873 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 874 uint8_t carry = vext_elem_mask(v0, i); \ 875 \ 876 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 877 } \ 878 } 879 880 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 881 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 882 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 883 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 884 885 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 886 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 887 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 888 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 889 890 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 891 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 892 CPURISCVState *env, uint32_t desc) \ 893 { \ 894 uint32_t vl = env->vl; \ 895 uint32_t i; \ 896 \ 897 for (i = 0; i < vl; i++) { \ 898 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 899 uint8_t carry = vext_elem_mask(v0, i); \ 900 \ 901 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 902 } \ 903 } 904 905 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 906 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 907 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 908 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 909 910 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 911 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 912 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 913 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 914 915 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 916 (__typeof(N))(N + M) < N) 917 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 918 919 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 920 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 921 CPURISCVState *env, uint32_t desc) \ 922 { \ 923 uint32_t vl = env->vl; \ 924 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 925 uint32_t i; \ 926 \ 927 for (i = 0; i < vl; i++) { \ 928 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 929 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 930 uint8_t carry = vext_elem_mask(v0, i); \ 931 \ 932 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 933 } \ 934 for (; i < vlmax; i++) { \ 935 vext_set_elem_mask(vd, i, 0); \ 936 } \ 937 } 938 939 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 940 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 941 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 942 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 943 944 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 945 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 946 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 947 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 948 949 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 950 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 951 void *vs2, CPURISCVState *env, uint32_t desc) \ 952 { \ 953 uint32_t vl = env->vl; \ 954 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 955 uint32_t i; \ 956 \ 957 for (i = 0; i < vl; i++) { \ 958 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 959 uint8_t carry = vext_elem_mask(v0, i); \ 960 \ 961 vext_set_elem_mask(vd, i, \ 962 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 963 } \ 964 for (; i < vlmax; i++) { \ 965 vext_set_elem_mask(vd, i, 0); \ 966 } \ 967 } 968 969 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 970 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 971 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 972 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 973 974 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 975 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 976 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 977 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 978 979 /* Vector Bitwise Logical Instructions */ 980 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 981 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 982 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 983 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 984 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 985 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 986 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 987 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 988 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 989 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 990 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 991 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 992 GEN_VEXT_VV(vand_vv_b, 1, 1) 993 GEN_VEXT_VV(vand_vv_h, 2, 2) 994 GEN_VEXT_VV(vand_vv_w, 4, 4) 995 GEN_VEXT_VV(vand_vv_d, 8, 8) 996 GEN_VEXT_VV(vor_vv_b, 1, 1) 997 GEN_VEXT_VV(vor_vv_h, 2, 2) 998 GEN_VEXT_VV(vor_vv_w, 4, 4) 999 GEN_VEXT_VV(vor_vv_d, 8, 8) 1000 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1001 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1002 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1003 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1004 1005 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1006 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1007 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1008 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1009 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1010 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1011 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1012 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1013 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1014 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1015 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1016 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1017 GEN_VEXT_VX(vand_vx_b, 1, 1) 1018 GEN_VEXT_VX(vand_vx_h, 2, 2) 1019 GEN_VEXT_VX(vand_vx_w, 4, 4) 1020 GEN_VEXT_VX(vand_vx_d, 8, 8) 1021 GEN_VEXT_VX(vor_vx_b, 1, 1) 1022 GEN_VEXT_VX(vor_vx_h, 2, 2) 1023 GEN_VEXT_VX(vor_vx_w, 4, 4) 1024 GEN_VEXT_VX(vor_vx_d, 8, 8) 1025 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1026 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1027 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1028 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1029 1030 /* Vector Single-Width Bit Shift Instructions */ 1031 #define DO_SLL(N, M) (N << (M)) 1032 #define DO_SRL(N, M) (N >> (M)) 1033 1034 /* generate the helpers for shift instructions with two vector operators */ 1035 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1036 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1037 void *vs2, CPURISCVState *env, uint32_t desc) \ 1038 { \ 1039 uint32_t vm = vext_vm(desc); \ 1040 uint32_t vl = env->vl; \ 1041 uint32_t i; \ 1042 \ 1043 for (i = 0; i < vl; i++) { \ 1044 if (!vm && !vext_elem_mask(v0, i)) { \ 1045 continue; \ 1046 } \ 1047 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1048 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1049 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1050 } \ 1051 } 1052 1053 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1054 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1055 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1056 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1057 1058 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1059 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1060 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1061 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1062 1063 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1064 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1065 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1066 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1067 1068 /* generate the helpers for shift instructions with one vector and one scalar */ 1069 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1070 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1071 void *vs2, CPURISCVState *env, uint32_t desc) \ 1072 { \ 1073 uint32_t vm = vext_vm(desc); \ 1074 uint32_t vl = env->vl; \ 1075 uint32_t i; \ 1076 \ 1077 for (i = 0; i < vl; i++) { \ 1078 if (!vm && !vext_elem_mask(v0, i)) { \ 1079 continue; \ 1080 } \ 1081 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1082 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1083 } \ 1084 } 1085 1086 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1087 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1088 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1089 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1090 1091 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1092 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1093 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1094 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1095 1096 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1097 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1098 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1099 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1100 1101 /* Vector Narrowing Integer Right Shift Instructions */ 1102 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1103 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1104 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1105 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1106 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1107 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1108 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1109 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1110 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1111 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1112 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1113 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1114 1115 /* Vector Integer Comparison Instructions */ 1116 #define DO_MSEQ(N, M) (N == M) 1117 #define DO_MSNE(N, M) (N != M) 1118 #define DO_MSLT(N, M) (N < M) 1119 #define DO_MSLE(N, M) (N <= M) 1120 #define DO_MSGT(N, M) (N > M) 1121 1122 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1123 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1124 CPURISCVState *env, uint32_t desc) \ 1125 { \ 1126 uint32_t vm = vext_vm(desc); \ 1127 uint32_t vl = env->vl; \ 1128 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1129 uint32_t i; \ 1130 \ 1131 for (i = 0; i < vl; i++) { \ 1132 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1133 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1134 if (!vm && !vext_elem_mask(v0, i)) { \ 1135 continue; \ 1136 } \ 1137 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1138 } \ 1139 for (; i < vlmax; i++) { \ 1140 vext_set_elem_mask(vd, i, 0); \ 1141 } \ 1142 } 1143 1144 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1145 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1146 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1147 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1148 1149 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1150 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1151 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1152 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1153 1154 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1155 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1156 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1157 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1158 1159 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1160 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1161 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1162 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1163 1164 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1165 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1166 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1167 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1168 1169 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1170 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1171 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1172 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1173 1174 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1175 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1176 CPURISCVState *env, uint32_t desc) \ 1177 { \ 1178 uint32_t vm = vext_vm(desc); \ 1179 uint32_t vl = env->vl; \ 1180 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1181 uint32_t i; \ 1182 \ 1183 for (i = 0; i < vl; i++) { \ 1184 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1185 if (!vm && !vext_elem_mask(v0, i)) { \ 1186 continue; \ 1187 } \ 1188 vext_set_elem_mask(vd, i, \ 1189 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1190 } \ 1191 for (; i < vlmax; i++) { \ 1192 vext_set_elem_mask(vd, i, 0); \ 1193 } \ 1194 } 1195 1196 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1197 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1198 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1199 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1200 1201 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1202 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1203 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1204 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1205 1206 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1207 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1208 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1209 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1210 1211 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1212 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1213 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1214 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1215 1216 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1217 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1218 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1219 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1220 1221 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1222 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1223 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1224 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1225 1226 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1227 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1228 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1229 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1230 1231 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1232 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1233 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1234 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1235 1236 /* Vector Integer Min/Max Instructions */ 1237 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1238 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1239 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1240 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1241 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1242 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1243 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1244 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1245 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1246 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1247 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1248 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1249 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1250 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1251 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1252 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1253 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1254 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1255 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1256 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1257 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1258 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1259 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1260 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1261 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1262 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1263 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1264 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1265 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1266 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1267 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1268 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1269 1270 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1271 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1272 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1273 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1274 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1275 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1276 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1277 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1278 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1279 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1280 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1281 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1282 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1283 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1284 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1285 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1286 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1287 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1288 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1289 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1290 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1291 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1292 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1293 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1294 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1295 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1296 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1297 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1298 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1299 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1300 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1301 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1302 1303 /* Vector Single-Width Integer Multiply Instructions */ 1304 #define DO_MUL(N, M) (N * M) 1305 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1306 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1307 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1308 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1309 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1310 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1311 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1312 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1313 1314 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1315 { 1316 return (int16_t)s2 * (int16_t)s1 >> 8; 1317 } 1318 1319 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1320 { 1321 return (int32_t)s2 * (int32_t)s1 >> 16; 1322 } 1323 1324 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1325 { 1326 return (int64_t)s2 * (int64_t)s1 >> 32; 1327 } 1328 1329 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1330 { 1331 uint64_t hi_64, lo_64; 1332 1333 muls64(&lo_64, &hi_64, s1, s2); 1334 return hi_64; 1335 } 1336 1337 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1338 { 1339 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1340 } 1341 1342 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1343 { 1344 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1345 } 1346 1347 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1348 { 1349 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1350 } 1351 1352 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1353 { 1354 uint64_t hi_64, lo_64; 1355 1356 mulu64(&lo_64, &hi_64, s2, s1); 1357 return hi_64; 1358 } 1359 1360 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1361 { 1362 return (int16_t)s2 * (uint16_t)s1 >> 8; 1363 } 1364 1365 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1366 { 1367 return (int32_t)s2 * (uint32_t)s1 >> 16; 1368 } 1369 1370 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1371 { 1372 return (int64_t)s2 * (uint64_t)s1 >> 32; 1373 } 1374 1375 /* 1376 * Let A = signed operand, 1377 * B = unsigned operand 1378 * P = mulu64(A, B), unsigned product 1379 * 1380 * LET X = 2 ** 64 - A, 2's complement of A 1381 * SP = signed product 1382 * THEN 1383 * IF A < 0 1384 * SP = -X * B 1385 * = -(2 ** 64 - A) * B 1386 * = A * B - 2 ** 64 * B 1387 * = P - 2 ** 64 * B 1388 * ELSE 1389 * SP = P 1390 * THEN 1391 * HI_P -= (A < 0 ? B : 0) 1392 */ 1393 1394 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1395 { 1396 uint64_t hi_64, lo_64; 1397 1398 mulu64(&lo_64, &hi_64, s2, s1); 1399 1400 hi_64 -= s2 < 0 ? s1 : 0; 1401 return hi_64; 1402 } 1403 1404 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1405 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1406 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1407 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1408 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1409 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1410 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1411 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1412 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1413 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1414 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1415 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1416 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1417 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1418 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1419 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1420 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1421 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1422 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1423 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1424 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1425 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1426 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1427 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1428 1429 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1430 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1431 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1432 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1433 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1434 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1435 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1436 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1437 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1438 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1439 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1440 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1441 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1442 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1443 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1444 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1445 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1446 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1447 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1448 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1449 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1450 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1451 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1452 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1453 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1454 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1455 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1456 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1457 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1458 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1459 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1460 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1461 1462 /* Vector Integer Divide Instructions */ 1463 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1464 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1465 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1466 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1467 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1468 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1469 1470 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1471 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1472 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1473 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1474 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1475 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1476 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1477 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1478 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1479 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1480 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1481 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1482 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1483 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1484 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1485 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1486 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1487 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1488 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1489 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1490 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1491 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1492 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1493 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1494 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1495 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1496 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1497 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1498 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1499 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1500 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1501 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1502 1503 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1504 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1505 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1506 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1507 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1508 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1509 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1510 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1511 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1512 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1513 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1514 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1515 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1516 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1517 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1518 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1519 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1520 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1521 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1522 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1523 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1524 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1525 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1526 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1527 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1528 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1529 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1530 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1531 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1532 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1533 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1534 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1535 1536 /* Vector Widening Integer Multiply Instructions */ 1537 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1538 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1539 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1540 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1541 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1542 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1543 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1544 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1545 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1546 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1547 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1548 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1549 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1550 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1551 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1552 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1553 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1554 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1555 1556 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1557 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1558 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1559 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1560 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1561 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1562 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1563 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1564 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1565 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1566 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1567 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1568 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1569 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1570 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1571 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1572 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1573 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1574 1575 /* Vector Single-Width Integer Multiply-Add Instructions */ 1576 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1577 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1578 { \ 1579 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1580 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1581 TD d = *((TD *)vd + HD(i)); \ 1582 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1583 } 1584 1585 #define DO_MACC(N, M, D) (M * N + D) 1586 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1587 #define DO_MADD(N, M, D) (M * D + N) 1588 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1589 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1590 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1591 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1592 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1593 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1594 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1595 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1596 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1597 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1598 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1599 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1600 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1601 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1602 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1603 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1604 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1605 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1606 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1607 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1608 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1609 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1610 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1611 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1612 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1613 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1614 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1615 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1616 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1617 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1618 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1619 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1620 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1621 1622 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1623 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1624 { \ 1625 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1626 TD d = *((TD *)vd + HD(i)); \ 1627 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1628 } 1629 1630 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1631 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1632 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1633 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1634 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1635 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1636 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1637 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1638 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1639 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1640 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1641 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1642 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1643 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1644 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1645 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1646 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1647 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1648 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1649 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1650 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1651 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1652 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1653 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1654 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1655 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1656 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1657 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1658 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1659 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1660 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1661 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1662 1663 /* Vector Widening Integer Multiply-Add Instructions */ 1664 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1665 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1666 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1667 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1668 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1669 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1670 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1671 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1672 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1673 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1674 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1675 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1676 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1677 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1678 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1679 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1680 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1681 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1682 1683 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1684 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1685 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1686 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1687 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1688 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1689 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1690 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1691 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1692 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1693 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1694 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1695 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1696 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1697 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1698 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1699 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1700 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1701 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1702 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1703 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1704 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1705 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1706 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1707 1708 /* Vector Integer Merge and Move Instructions */ 1709 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1710 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1711 uint32_t desc) \ 1712 { \ 1713 uint32_t vl = env->vl; \ 1714 uint32_t i; \ 1715 \ 1716 for (i = 0; i < vl; i++) { \ 1717 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1718 *((ETYPE *)vd + H(i)) = s1; \ 1719 } \ 1720 } 1721 1722 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1723 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1724 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1725 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1726 1727 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1728 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1729 uint32_t desc) \ 1730 { \ 1731 uint32_t vl = env->vl; \ 1732 uint32_t i; \ 1733 \ 1734 for (i = 0; i < vl; i++) { \ 1735 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1736 } \ 1737 } 1738 1739 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1740 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1741 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1742 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1743 1744 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1745 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1746 CPURISCVState *env, uint32_t desc) \ 1747 { \ 1748 uint32_t vl = env->vl; \ 1749 uint32_t i; \ 1750 \ 1751 for (i = 0; i < vl; i++) { \ 1752 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1753 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1754 } \ 1755 } 1756 1757 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1758 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1759 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1760 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1761 1762 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1764 void *vs2, CPURISCVState *env, uint32_t desc) \ 1765 { \ 1766 uint32_t vl = env->vl; \ 1767 uint32_t i; \ 1768 \ 1769 for (i = 0; i < vl; i++) { \ 1770 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1771 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1772 (ETYPE)(target_long)s1); \ 1773 *((ETYPE *)vd + H(i)) = d; \ 1774 } \ 1775 } 1776 1777 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1778 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1779 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1780 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1781 1782 /* 1783 *** Vector Fixed-Point Arithmetic Instructions 1784 */ 1785 1786 /* Vector Single-Width Saturating Add and Subtract */ 1787 1788 /* 1789 * As fixed point instructions probably have round mode and saturation, 1790 * define common macros for fixed point here. 1791 */ 1792 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1793 CPURISCVState *env, int vxrm); 1794 1795 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1796 static inline void \ 1797 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1798 CPURISCVState *env, int vxrm) \ 1799 { \ 1800 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1801 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1802 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1803 } 1804 1805 static inline void 1806 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1807 CPURISCVState *env, 1808 uint32_t vl, uint32_t vm, int vxrm, 1809 opivv2_rm_fn *fn) 1810 { 1811 for (uint32_t i = 0; i < vl; i++) { 1812 if (!vm && !vext_elem_mask(v0, i)) { 1813 continue; 1814 } 1815 fn(vd, vs1, vs2, i, env, vxrm); 1816 } 1817 } 1818 1819 static inline void 1820 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1821 CPURISCVState *env, 1822 uint32_t desc, uint32_t esz, uint32_t dsz, 1823 opivv2_rm_fn *fn) 1824 { 1825 uint32_t vm = vext_vm(desc); 1826 uint32_t vl = env->vl; 1827 1828 switch (env->vxrm) { 1829 case 0: /* rnu */ 1830 vext_vv_rm_1(vd, v0, vs1, vs2, 1831 env, vl, vm, 0, fn); 1832 break; 1833 case 1: /* rne */ 1834 vext_vv_rm_1(vd, v0, vs1, vs2, 1835 env, vl, vm, 1, fn); 1836 break; 1837 case 2: /* rdn */ 1838 vext_vv_rm_1(vd, v0, vs1, vs2, 1839 env, vl, vm, 2, fn); 1840 break; 1841 default: /* rod */ 1842 vext_vv_rm_1(vd, v0, vs1, vs2, 1843 env, vl, vm, 3, fn); 1844 break; 1845 } 1846 } 1847 1848 /* generate helpers for fixed point instructions with OPIVV format */ 1849 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1850 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1851 CPURISCVState *env, uint32_t desc) \ 1852 { \ 1853 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1854 do_##NAME); \ 1855 } 1856 1857 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1858 { 1859 uint8_t res = a + b; 1860 if (res < a) { 1861 res = UINT8_MAX; 1862 env->vxsat = 0x1; 1863 } 1864 return res; 1865 } 1866 1867 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1868 uint16_t b) 1869 { 1870 uint16_t res = a + b; 1871 if (res < a) { 1872 res = UINT16_MAX; 1873 env->vxsat = 0x1; 1874 } 1875 return res; 1876 } 1877 1878 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1879 uint32_t b) 1880 { 1881 uint32_t res = a + b; 1882 if (res < a) { 1883 res = UINT32_MAX; 1884 env->vxsat = 0x1; 1885 } 1886 return res; 1887 } 1888 1889 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1890 uint64_t b) 1891 { 1892 uint64_t res = a + b; 1893 if (res < a) { 1894 res = UINT64_MAX; 1895 env->vxsat = 0x1; 1896 } 1897 return res; 1898 } 1899 1900 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1901 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1902 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1903 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1904 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1905 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1906 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1907 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1908 1909 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1910 CPURISCVState *env, int vxrm); 1911 1912 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1913 static inline void \ 1914 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1915 CPURISCVState *env, int vxrm) \ 1916 { \ 1917 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1918 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1919 } 1920 1921 static inline void 1922 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1923 CPURISCVState *env, 1924 uint32_t vl, uint32_t vm, int vxrm, 1925 opivx2_rm_fn *fn) 1926 { 1927 for (uint32_t i = 0; i < vl; i++) { 1928 if (!vm && !vext_elem_mask(v0, i)) { 1929 continue; 1930 } 1931 fn(vd, s1, vs2, i, env, vxrm); 1932 } 1933 } 1934 1935 static inline void 1936 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 1937 CPURISCVState *env, 1938 uint32_t desc, uint32_t esz, uint32_t dsz, 1939 opivx2_rm_fn *fn) 1940 { 1941 uint32_t vm = vext_vm(desc); 1942 uint32_t vl = env->vl; 1943 1944 switch (env->vxrm) { 1945 case 0: /* rnu */ 1946 vext_vx_rm_1(vd, v0, s1, vs2, 1947 env, vl, vm, 0, fn); 1948 break; 1949 case 1: /* rne */ 1950 vext_vx_rm_1(vd, v0, s1, vs2, 1951 env, vl, vm, 1, fn); 1952 break; 1953 case 2: /* rdn */ 1954 vext_vx_rm_1(vd, v0, s1, vs2, 1955 env, vl, vm, 2, fn); 1956 break; 1957 default: /* rod */ 1958 vext_vx_rm_1(vd, v0, s1, vs2, 1959 env, vl, vm, 3, fn); 1960 break; 1961 } 1962 } 1963 1964 /* generate helpers for fixed point instructions with OPIVX format */ 1965 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 1966 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1967 void *vs2, CPURISCVState *env, uint32_t desc) \ 1968 { \ 1969 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 1970 do_##NAME); \ 1971 } 1972 1973 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 1974 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 1975 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 1976 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 1977 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 1978 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 1979 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 1980 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 1981 1982 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 1983 { 1984 int8_t res = a + b; 1985 if ((res ^ a) & (res ^ b) & INT8_MIN) { 1986 res = a > 0 ? INT8_MAX : INT8_MIN; 1987 env->vxsat = 0x1; 1988 } 1989 return res; 1990 } 1991 1992 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 1993 { 1994 int16_t res = a + b; 1995 if ((res ^ a) & (res ^ b) & INT16_MIN) { 1996 res = a > 0 ? INT16_MAX : INT16_MIN; 1997 env->vxsat = 0x1; 1998 } 1999 return res; 2000 } 2001 2002 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2003 { 2004 int32_t res = a + b; 2005 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2006 res = a > 0 ? INT32_MAX : INT32_MIN; 2007 env->vxsat = 0x1; 2008 } 2009 return res; 2010 } 2011 2012 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2013 { 2014 int64_t res = a + b; 2015 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2016 res = a > 0 ? INT64_MAX : INT64_MIN; 2017 env->vxsat = 0x1; 2018 } 2019 return res; 2020 } 2021 2022 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2023 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2024 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2025 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2026 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2027 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2028 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2029 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2030 2031 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2032 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2033 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2034 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2035 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2036 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2037 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2038 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2039 2040 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2041 { 2042 uint8_t res = a - b; 2043 if (res > a) { 2044 res = 0; 2045 env->vxsat = 0x1; 2046 } 2047 return res; 2048 } 2049 2050 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2051 uint16_t b) 2052 { 2053 uint16_t res = a - b; 2054 if (res > a) { 2055 res = 0; 2056 env->vxsat = 0x1; 2057 } 2058 return res; 2059 } 2060 2061 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2062 uint32_t b) 2063 { 2064 uint32_t res = a - b; 2065 if (res > a) { 2066 res = 0; 2067 env->vxsat = 0x1; 2068 } 2069 return res; 2070 } 2071 2072 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2073 uint64_t b) 2074 { 2075 uint64_t res = a - b; 2076 if (res > a) { 2077 res = 0; 2078 env->vxsat = 0x1; 2079 } 2080 return res; 2081 } 2082 2083 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2084 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2085 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2086 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2087 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2088 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2089 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2090 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2091 2092 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2093 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2094 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2095 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2096 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2097 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2098 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2099 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2100 2101 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2102 { 2103 int8_t res = a - b; 2104 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2105 res = a >= 0 ? INT8_MAX : INT8_MIN; 2106 env->vxsat = 0x1; 2107 } 2108 return res; 2109 } 2110 2111 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2112 { 2113 int16_t res = a - b; 2114 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2115 res = a >= 0 ? INT16_MAX : INT16_MIN; 2116 env->vxsat = 0x1; 2117 } 2118 return res; 2119 } 2120 2121 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2122 { 2123 int32_t res = a - b; 2124 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2125 res = a >= 0 ? INT32_MAX : INT32_MIN; 2126 env->vxsat = 0x1; 2127 } 2128 return res; 2129 } 2130 2131 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2132 { 2133 int64_t res = a - b; 2134 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2135 res = a >= 0 ? INT64_MAX : INT64_MIN; 2136 env->vxsat = 0x1; 2137 } 2138 return res; 2139 } 2140 2141 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2142 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2143 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2144 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2145 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2146 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2147 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2148 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2149 2150 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2151 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2152 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2153 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2154 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2155 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2156 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2157 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2158 2159 /* Vector Single-Width Averaging Add and Subtract */ 2160 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2161 { 2162 uint8_t d = extract64(v, shift, 1); 2163 uint8_t d1; 2164 uint64_t D1, D2; 2165 2166 if (shift == 0 || shift > 64) { 2167 return 0; 2168 } 2169 2170 d1 = extract64(v, shift - 1, 1); 2171 D1 = extract64(v, 0, shift); 2172 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2173 return d1; 2174 } else if (vxrm == 1) { /* round-to-nearest-even */ 2175 if (shift > 1) { 2176 D2 = extract64(v, 0, shift - 1); 2177 return d1 & ((D2 != 0) | d); 2178 } else { 2179 return d1 & d; 2180 } 2181 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2182 return !d & (D1 != 0); 2183 } 2184 return 0; /* round-down (truncate) */ 2185 } 2186 2187 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2188 { 2189 int64_t res = (int64_t)a + b; 2190 uint8_t round = get_round(vxrm, res, 1); 2191 2192 return (res >> 1) + round; 2193 } 2194 2195 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2196 { 2197 int64_t res = a + b; 2198 uint8_t round = get_round(vxrm, res, 1); 2199 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2200 2201 /* With signed overflow, bit 64 is inverse of bit 63. */ 2202 return ((res >> 1) ^ over) + round; 2203 } 2204 2205 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2206 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2207 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2208 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2209 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2210 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2211 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2212 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2213 2214 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2215 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2216 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2217 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2218 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2219 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2220 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2221 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2222 2223 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2224 { 2225 int64_t res = (int64_t)a - b; 2226 uint8_t round = get_round(vxrm, res, 1); 2227 2228 return (res >> 1) + round; 2229 } 2230 2231 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2232 { 2233 int64_t res = (int64_t)a - b; 2234 uint8_t round = get_round(vxrm, res, 1); 2235 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2236 2237 /* With signed overflow, bit 64 is inverse of bit 63. */ 2238 return ((res >> 1) ^ over) + round; 2239 } 2240 2241 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2242 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2243 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2244 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2245 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2246 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2247 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2248 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2249 2250 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2251 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2252 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2253 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2254 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2255 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2256 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2257 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2258 2259 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2260 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2261 { 2262 uint8_t round; 2263 int16_t res; 2264 2265 res = (int16_t)a * (int16_t)b; 2266 round = get_round(vxrm, res, 7); 2267 res = (res >> 7) + round; 2268 2269 if (res > INT8_MAX) { 2270 env->vxsat = 0x1; 2271 return INT8_MAX; 2272 } else if (res < INT8_MIN) { 2273 env->vxsat = 0x1; 2274 return INT8_MIN; 2275 } else { 2276 return res; 2277 } 2278 } 2279 2280 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2281 { 2282 uint8_t round; 2283 int32_t res; 2284 2285 res = (int32_t)a * (int32_t)b; 2286 round = get_round(vxrm, res, 15); 2287 res = (res >> 15) + round; 2288 2289 if (res > INT16_MAX) { 2290 env->vxsat = 0x1; 2291 return INT16_MAX; 2292 } else if (res < INT16_MIN) { 2293 env->vxsat = 0x1; 2294 return INT16_MIN; 2295 } else { 2296 return res; 2297 } 2298 } 2299 2300 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2301 { 2302 uint8_t round; 2303 int64_t res; 2304 2305 res = (int64_t)a * (int64_t)b; 2306 round = get_round(vxrm, res, 31); 2307 res = (res >> 31) + round; 2308 2309 if (res > INT32_MAX) { 2310 env->vxsat = 0x1; 2311 return INT32_MAX; 2312 } else if (res < INT32_MIN) { 2313 env->vxsat = 0x1; 2314 return INT32_MIN; 2315 } else { 2316 return res; 2317 } 2318 } 2319 2320 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2321 { 2322 uint8_t round; 2323 uint64_t hi_64, lo_64; 2324 int64_t res; 2325 2326 if (a == INT64_MIN && b == INT64_MIN) { 2327 env->vxsat = 1; 2328 return INT64_MAX; 2329 } 2330 2331 muls64(&lo_64, &hi_64, a, b); 2332 round = get_round(vxrm, lo_64, 63); 2333 /* 2334 * Cannot overflow, as there are always 2335 * 2 sign bits after multiply. 2336 */ 2337 res = (hi_64 << 1) | (lo_64 >> 63); 2338 if (round) { 2339 if (res == INT64_MAX) { 2340 env->vxsat = 1; 2341 } else { 2342 res += 1; 2343 } 2344 } 2345 return res; 2346 } 2347 2348 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2349 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2350 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2351 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2352 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2353 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2354 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2355 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2356 2357 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2358 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2359 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2360 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2361 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2362 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2363 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2364 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2365 2366 /* Vector Widening Saturating Scaled Multiply-Add */ 2367 static inline uint16_t 2368 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2369 uint16_t c) 2370 { 2371 uint8_t round; 2372 uint16_t res = (uint16_t)a * b; 2373 2374 round = get_round(vxrm, res, 4); 2375 res = (res >> 4) + round; 2376 return saddu16(env, vxrm, c, res); 2377 } 2378 2379 static inline uint32_t 2380 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2381 uint32_t c) 2382 { 2383 uint8_t round; 2384 uint32_t res = (uint32_t)a * b; 2385 2386 round = get_round(vxrm, res, 8); 2387 res = (res >> 8) + round; 2388 return saddu32(env, vxrm, c, res); 2389 } 2390 2391 static inline uint64_t 2392 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2393 uint64_t c) 2394 { 2395 uint8_t round; 2396 uint64_t res = (uint64_t)a * b; 2397 2398 round = get_round(vxrm, res, 16); 2399 res = (res >> 16) + round; 2400 return saddu64(env, vxrm, c, res); 2401 } 2402 2403 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2404 static inline void \ 2405 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2406 CPURISCVState *env, int vxrm) \ 2407 { \ 2408 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2409 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2410 TD d = *((TD *)vd + HD(i)); \ 2411 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2412 } 2413 2414 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2415 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2416 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2417 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2418 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2419 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2420 2421 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2422 static inline void \ 2423 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2424 CPURISCVState *env, int vxrm) \ 2425 { \ 2426 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2427 TD d = *((TD *)vd + HD(i)); \ 2428 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2429 } 2430 2431 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2432 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2433 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2434 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2435 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2436 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2437 2438 static inline int16_t 2439 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2440 { 2441 uint8_t round; 2442 int16_t res = (int16_t)a * b; 2443 2444 round = get_round(vxrm, res, 4); 2445 res = (res >> 4) + round; 2446 return sadd16(env, vxrm, c, res); 2447 } 2448 2449 static inline int32_t 2450 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2451 { 2452 uint8_t round; 2453 int32_t res = (int32_t)a * b; 2454 2455 round = get_round(vxrm, res, 8); 2456 res = (res >> 8) + round; 2457 return sadd32(env, vxrm, c, res); 2458 2459 } 2460 2461 static inline int64_t 2462 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2463 { 2464 uint8_t round; 2465 int64_t res = (int64_t)a * b; 2466 2467 round = get_round(vxrm, res, 16); 2468 res = (res >> 16) + round; 2469 return sadd64(env, vxrm, c, res); 2470 } 2471 2472 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2473 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2474 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2475 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2476 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2477 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2478 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2479 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2480 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2481 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2482 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2483 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2484 2485 static inline int16_t 2486 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2487 { 2488 uint8_t round; 2489 int16_t res = a * (int16_t)b; 2490 2491 round = get_round(vxrm, res, 4); 2492 res = (res >> 4) + round; 2493 return ssub16(env, vxrm, c, res); 2494 } 2495 2496 static inline int32_t 2497 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2498 { 2499 uint8_t round; 2500 int32_t res = a * (int32_t)b; 2501 2502 round = get_round(vxrm, res, 8); 2503 res = (res >> 8) + round; 2504 return ssub32(env, vxrm, c, res); 2505 } 2506 2507 static inline int64_t 2508 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2509 { 2510 uint8_t round; 2511 int64_t res = a * (int64_t)b; 2512 2513 round = get_round(vxrm, res, 16); 2514 res = (res >> 16) + round; 2515 return ssub64(env, vxrm, c, res); 2516 } 2517 2518 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2519 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2520 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2521 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2522 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2523 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2524 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2525 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2526 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2527 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2528 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2529 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2530 2531 static inline int16_t 2532 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2533 { 2534 uint8_t round; 2535 int16_t res = (int16_t)a * b; 2536 2537 round = get_round(vxrm, res, 4); 2538 res = (res >> 4) + round; 2539 return ssub16(env, vxrm, c, res); 2540 } 2541 2542 static inline int32_t 2543 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2544 { 2545 uint8_t round; 2546 int32_t res = (int32_t)a * b; 2547 2548 round = get_round(vxrm, res, 8); 2549 res = (res >> 8) + round; 2550 return ssub32(env, vxrm, c, res); 2551 } 2552 2553 static inline int64_t 2554 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2555 { 2556 uint8_t round; 2557 int64_t res = (int64_t)a * b; 2558 2559 round = get_round(vxrm, res, 16); 2560 res = (res >> 16) + round; 2561 return ssub64(env, vxrm, c, res); 2562 } 2563 2564 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2565 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2566 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2567 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2568 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2569 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2570 2571 /* Vector Single-Width Scaling Shift Instructions */ 2572 static inline uint8_t 2573 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2574 { 2575 uint8_t round, shift = b & 0x7; 2576 uint8_t res; 2577 2578 round = get_round(vxrm, a, shift); 2579 res = (a >> shift) + round; 2580 return res; 2581 } 2582 static inline uint16_t 2583 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2584 { 2585 uint8_t round, shift = b & 0xf; 2586 uint16_t res; 2587 2588 round = get_round(vxrm, a, shift); 2589 res = (a >> shift) + round; 2590 return res; 2591 } 2592 static inline uint32_t 2593 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2594 { 2595 uint8_t round, shift = b & 0x1f; 2596 uint32_t res; 2597 2598 round = get_round(vxrm, a, shift); 2599 res = (a >> shift) + round; 2600 return res; 2601 } 2602 static inline uint64_t 2603 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2604 { 2605 uint8_t round, shift = b & 0x3f; 2606 uint64_t res; 2607 2608 round = get_round(vxrm, a, shift); 2609 res = (a >> shift) + round; 2610 return res; 2611 } 2612 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2613 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2614 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2615 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2616 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2617 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2618 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2619 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2620 2621 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2622 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2623 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2624 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2625 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2626 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2627 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2628 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2629 2630 static inline int8_t 2631 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2632 { 2633 uint8_t round, shift = b & 0x7; 2634 int8_t res; 2635 2636 round = get_round(vxrm, a, shift); 2637 res = (a >> shift) + round; 2638 return res; 2639 } 2640 static inline int16_t 2641 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2642 { 2643 uint8_t round, shift = b & 0xf; 2644 int16_t res; 2645 2646 round = get_round(vxrm, a, shift); 2647 res = (a >> shift) + round; 2648 return res; 2649 } 2650 static inline int32_t 2651 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2652 { 2653 uint8_t round, shift = b & 0x1f; 2654 int32_t res; 2655 2656 round = get_round(vxrm, a, shift); 2657 res = (a >> shift) + round; 2658 return res; 2659 } 2660 static inline int64_t 2661 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2662 { 2663 uint8_t round, shift = b & 0x3f; 2664 int64_t res; 2665 2666 round = get_round(vxrm, a, shift); 2667 res = (a >> shift) + round; 2668 return res; 2669 } 2670 2671 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2672 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2673 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2674 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2675 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2676 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2677 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2678 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2679 2680 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2681 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2682 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2683 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2684 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2685 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2686 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2687 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2688 2689 /* Vector Narrowing Fixed-Point Clip Instructions */ 2690 static inline int8_t 2691 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2692 { 2693 uint8_t round, shift = b & 0xf; 2694 int16_t res; 2695 2696 round = get_round(vxrm, a, shift); 2697 res = (a >> shift) + round; 2698 if (res > INT8_MAX) { 2699 env->vxsat = 0x1; 2700 return INT8_MAX; 2701 } else if (res < INT8_MIN) { 2702 env->vxsat = 0x1; 2703 return INT8_MIN; 2704 } else { 2705 return res; 2706 } 2707 } 2708 2709 static inline int16_t 2710 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2711 { 2712 uint8_t round, shift = b & 0x1f; 2713 int32_t res; 2714 2715 round = get_round(vxrm, a, shift); 2716 res = (a >> shift) + round; 2717 if (res > INT16_MAX) { 2718 env->vxsat = 0x1; 2719 return INT16_MAX; 2720 } else if (res < INT16_MIN) { 2721 env->vxsat = 0x1; 2722 return INT16_MIN; 2723 } else { 2724 return res; 2725 } 2726 } 2727 2728 static inline int32_t 2729 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2730 { 2731 uint8_t round, shift = b & 0x3f; 2732 int64_t res; 2733 2734 round = get_round(vxrm, a, shift); 2735 res = (a >> shift) + round; 2736 if (res > INT32_MAX) { 2737 env->vxsat = 0x1; 2738 return INT32_MAX; 2739 } else if (res < INT32_MIN) { 2740 env->vxsat = 0x1; 2741 return INT32_MIN; 2742 } else { 2743 return res; 2744 } 2745 } 2746 2747 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2748 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2749 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2750 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2751 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2752 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2753 2754 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2755 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2756 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2757 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2758 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2759 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2760 2761 static inline uint8_t 2762 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2763 { 2764 uint8_t round, shift = b & 0xf; 2765 uint16_t res; 2766 2767 round = get_round(vxrm, a, shift); 2768 res = (a >> shift) + round; 2769 if (res > UINT8_MAX) { 2770 env->vxsat = 0x1; 2771 return UINT8_MAX; 2772 } else { 2773 return res; 2774 } 2775 } 2776 2777 static inline uint16_t 2778 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2779 { 2780 uint8_t round, shift = b & 0x1f; 2781 uint32_t res; 2782 2783 round = get_round(vxrm, a, shift); 2784 res = (a >> shift) + round; 2785 if (res > UINT16_MAX) { 2786 env->vxsat = 0x1; 2787 return UINT16_MAX; 2788 } else { 2789 return res; 2790 } 2791 } 2792 2793 static inline uint32_t 2794 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2795 { 2796 uint8_t round, shift = b & 0x3f; 2797 int64_t res; 2798 2799 round = get_round(vxrm, a, shift); 2800 res = (a >> shift) + round; 2801 if (res > UINT32_MAX) { 2802 env->vxsat = 0x1; 2803 return UINT32_MAX; 2804 } else { 2805 return res; 2806 } 2807 } 2808 2809 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2810 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2811 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2812 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 2813 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 2814 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 2815 2816 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 2817 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 2818 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 2819 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 2820 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 2821 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 2822 2823 /* 2824 *** Vector Float Point Arithmetic Instructions 2825 */ 2826 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2827 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2828 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2829 CPURISCVState *env) \ 2830 { \ 2831 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2832 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2833 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2834 } 2835 2836 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2837 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2838 void *vs2, CPURISCVState *env, \ 2839 uint32_t desc) \ 2840 { \ 2841 uint32_t vm = vext_vm(desc); \ 2842 uint32_t vl = env->vl; \ 2843 uint32_t i; \ 2844 \ 2845 for (i = 0; i < vl; i++) { \ 2846 if (!vm && !vext_elem_mask(v0, i)) { \ 2847 continue; \ 2848 } \ 2849 do_##NAME(vd, vs1, vs2, i, env); \ 2850 } \ 2851 } 2852 2853 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2854 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2855 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2856 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2857 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2858 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2859 2860 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2861 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2862 CPURISCVState *env) \ 2863 { \ 2864 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2865 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2866 } 2867 2868 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2869 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2870 void *vs2, CPURISCVState *env, \ 2871 uint32_t desc) \ 2872 { \ 2873 uint32_t vm = vext_vm(desc); \ 2874 uint32_t vl = env->vl; \ 2875 uint32_t i; \ 2876 \ 2877 for (i = 0; i < vl; i++) { \ 2878 if (!vm && !vext_elem_mask(v0, i)) { \ 2879 continue; \ 2880 } \ 2881 do_##NAME(vd, s1, vs2, i, env); \ 2882 } \ 2883 } 2884 2885 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2886 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2887 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2888 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2889 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2890 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2891 2892 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2893 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2894 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2895 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2896 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2897 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2898 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2899 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2900 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2901 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2902 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2903 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2904 2905 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2906 { 2907 return float16_sub(b, a, s); 2908 } 2909 2910 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2911 { 2912 return float32_sub(b, a, s); 2913 } 2914 2915 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2916 { 2917 return float64_sub(b, a, s); 2918 } 2919 2920 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2921 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2922 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2923 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2924 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2925 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2926 2927 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2928 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2929 { 2930 return float32_add(float16_to_float32(a, true, s), 2931 float16_to_float32(b, true, s), s); 2932 } 2933 2934 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2935 { 2936 return float64_add(float32_to_float64(a, s), 2937 float32_to_float64(b, s), s); 2938 2939 } 2940 2941 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2942 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2943 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2944 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2945 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2946 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2947 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2948 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2949 2950 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2951 { 2952 return float32_sub(float16_to_float32(a, true, s), 2953 float16_to_float32(b, true, s), s); 2954 } 2955 2956 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2957 { 2958 return float64_sub(float32_to_float64(a, s), 2959 float32_to_float64(b, s), s); 2960 2961 } 2962 2963 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2964 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2965 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2966 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2967 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2968 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2969 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2970 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2971 2972 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2973 { 2974 return float32_add(a, float16_to_float32(b, true, s), s); 2975 } 2976 2977 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2978 { 2979 return float64_add(a, float32_to_float64(b, s), s); 2980 } 2981 2982 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2983 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2984 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2985 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2986 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2987 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2988 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2989 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2990 2991 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2992 { 2993 return float32_sub(a, float16_to_float32(b, true, s), s); 2994 } 2995 2996 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2997 { 2998 return float64_sub(a, float32_to_float64(b, s), s); 2999 } 3000 3001 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3002 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3003 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3004 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3005 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3006 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3007 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3008 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3009 3010 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3011 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3012 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3013 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3014 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3015 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3016 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3017 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3018 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3019 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3020 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3021 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3022 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3023 3024 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3025 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3026 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3027 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3028 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3029 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3030 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3031 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3032 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3033 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3034 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3035 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3036 3037 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3038 { 3039 return float16_div(b, a, s); 3040 } 3041 3042 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3043 { 3044 return float32_div(b, a, s); 3045 } 3046 3047 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3048 { 3049 return float64_div(b, a, s); 3050 } 3051 3052 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3053 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3054 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3055 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3056 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3057 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3058 3059 /* Vector Widening Floating-Point Multiply */ 3060 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3061 { 3062 return float32_mul(float16_to_float32(a, true, s), 3063 float16_to_float32(b, true, s), s); 3064 } 3065 3066 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3067 { 3068 return float64_mul(float32_to_float64(a, s), 3069 float32_to_float64(b, s), s); 3070 3071 } 3072 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3073 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3074 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3075 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3076 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3077 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3078 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3079 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3080 3081 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3082 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3083 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3084 CPURISCVState *env) \ 3085 { \ 3086 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3087 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3088 TD d = *((TD *)vd + HD(i)); \ 3089 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3090 } 3091 3092 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3093 { 3094 return float16_muladd(a, b, d, 0, s); 3095 } 3096 3097 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3098 { 3099 return float32_muladd(a, b, d, 0, s); 3100 } 3101 3102 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3103 { 3104 return float64_muladd(a, b, d, 0, s); 3105 } 3106 3107 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3108 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3109 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3110 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3111 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3112 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3113 3114 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3115 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3116 CPURISCVState *env) \ 3117 { \ 3118 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3119 TD d = *((TD *)vd + HD(i)); \ 3120 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3121 } 3122 3123 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3124 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3125 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3126 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3127 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3128 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3129 3130 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3131 { 3132 return float16_muladd(a, b, d, 3133 float_muladd_negate_c | float_muladd_negate_product, s); 3134 } 3135 3136 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3137 { 3138 return float32_muladd(a, b, d, 3139 float_muladd_negate_c | float_muladd_negate_product, s); 3140 } 3141 3142 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3143 { 3144 return float64_muladd(a, b, d, 3145 float_muladd_negate_c | float_muladd_negate_product, s); 3146 } 3147 3148 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3149 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3150 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3151 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3152 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3153 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3154 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3155 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3156 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3157 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3158 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3159 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3160 3161 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3162 { 3163 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3164 } 3165 3166 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3167 { 3168 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3169 } 3170 3171 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3172 { 3173 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3174 } 3175 3176 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3177 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3178 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3179 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3180 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3181 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3182 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3183 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3184 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3185 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3186 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3187 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3188 3189 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3190 { 3191 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3192 } 3193 3194 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3195 { 3196 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3197 } 3198 3199 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3200 { 3201 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3202 } 3203 3204 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3205 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3206 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3207 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3208 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3209 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3210 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3211 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3212 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3213 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3214 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3215 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3216 3217 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3218 { 3219 return float16_muladd(d, b, a, 0, s); 3220 } 3221 3222 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3223 { 3224 return float32_muladd(d, b, a, 0, s); 3225 } 3226 3227 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3228 { 3229 return float64_muladd(d, b, a, 0, s); 3230 } 3231 3232 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3233 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3234 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3235 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3236 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3237 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3238 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3239 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3240 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3241 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3242 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3243 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3244 3245 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3246 { 3247 return float16_muladd(d, b, a, 3248 float_muladd_negate_c | float_muladd_negate_product, s); 3249 } 3250 3251 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3252 { 3253 return float32_muladd(d, b, a, 3254 float_muladd_negate_c | float_muladd_negate_product, s); 3255 } 3256 3257 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3258 { 3259 return float64_muladd(d, b, a, 3260 float_muladd_negate_c | float_muladd_negate_product, s); 3261 } 3262 3263 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3264 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3265 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3266 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3267 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3268 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3269 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3270 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3271 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3272 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3273 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3274 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3275 3276 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3277 { 3278 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3279 } 3280 3281 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3282 { 3283 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3284 } 3285 3286 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3287 { 3288 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3289 } 3290 3291 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3292 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3293 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3294 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3295 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3296 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3297 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3298 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3299 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3300 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3301 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3302 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3303 3304 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3305 { 3306 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3307 } 3308 3309 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3310 { 3311 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3312 } 3313 3314 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3315 { 3316 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3317 } 3318 3319 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3320 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3321 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3322 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3323 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3324 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3325 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3326 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3327 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3328 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3329 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3330 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3331 3332 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3333 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3334 { 3335 return float32_muladd(float16_to_float32(a, true, s), 3336 float16_to_float32(b, true, s), d, 0, s); 3337 } 3338 3339 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3340 { 3341 return float64_muladd(float32_to_float64(a, s), 3342 float32_to_float64(b, s), d, 0, s); 3343 } 3344 3345 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3346 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3347 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3348 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3349 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3350 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3351 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3352 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3353 3354 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3355 { 3356 return float32_muladd(float16_to_float32(a, true, s), 3357 float16_to_float32(b, true, s), d, 3358 float_muladd_negate_c | float_muladd_negate_product, s); 3359 } 3360 3361 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3362 { 3363 return float64_muladd(float32_to_float64(a, s), 3364 float32_to_float64(b, s), d, 3365 float_muladd_negate_c | float_muladd_negate_product, s); 3366 } 3367 3368 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3369 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3370 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3371 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3372 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3373 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3374 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3375 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3376 3377 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3378 { 3379 return float32_muladd(float16_to_float32(a, true, s), 3380 float16_to_float32(b, true, s), d, 3381 float_muladd_negate_c, s); 3382 } 3383 3384 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3385 { 3386 return float64_muladd(float32_to_float64(a, s), 3387 float32_to_float64(b, s), d, 3388 float_muladd_negate_c, s); 3389 } 3390 3391 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3392 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3393 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3394 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3395 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3396 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3397 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3398 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3399 3400 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3401 { 3402 return float32_muladd(float16_to_float32(a, true, s), 3403 float16_to_float32(b, true, s), d, 3404 float_muladd_negate_product, s); 3405 } 3406 3407 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3408 { 3409 return float64_muladd(float32_to_float64(a, s), 3410 float32_to_float64(b, s), d, 3411 float_muladd_negate_product, s); 3412 } 3413 3414 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3415 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3416 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3417 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3418 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3419 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3420 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3421 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3422 3423 /* Vector Floating-Point Square-Root Instruction */ 3424 /* (TD, T2, TX2) */ 3425 #define OP_UU_H uint16_t, uint16_t, uint16_t 3426 #define OP_UU_W uint32_t, uint32_t, uint32_t 3427 #define OP_UU_D uint64_t, uint64_t, uint64_t 3428 3429 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3430 static void do_##NAME(void *vd, void *vs2, int i, \ 3431 CPURISCVState *env) \ 3432 { \ 3433 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3434 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3435 } 3436 3437 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3438 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3439 CPURISCVState *env, uint32_t desc) \ 3440 { \ 3441 uint32_t vm = vext_vm(desc); \ 3442 uint32_t vl = env->vl; \ 3443 uint32_t i; \ 3444 \ 3445 if (vl == 0) { \ 3446 return; \ 3447 } \ 3448 for (i = 0; i < vl; i++) { \ 3449 if (!vm && !vext_elem_mask(v0, i)) { \ 3450 continue; \ 3451 } \ 3452 do_##NAME(vd, vs2, i, env); \ 3453 } \ 3454 } 3455 3456 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3457 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3458 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3459 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3460 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3461 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3462 3463 /* Vector Floating-Point MIN/MAX Instructions */ 3464 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3465 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3466 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3467 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3468 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3469 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3470 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3471 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3472 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3473 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3474 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3475 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3476 3477 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3478 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3479 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3480 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3481 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3482 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3483 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3484 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3485 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3486 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3487 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3488 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3489 3490 /* Vector Floating-Point Sign-Injection Instructions */ 3491 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3492 { 3493 return deposit64(b, 0, 15, a); 3494 } 3495 3496 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3497 { 3498 return deposit64(b, 0, 31, a); 3499 } 3500 3501 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3502 { 3503 return deposit64(b, 0, 63, a); 3504 } 3505 3506 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3507 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3508 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3509 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3510 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3511 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3512 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3513 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3514 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3515 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3516 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3517 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3518 3519 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3520 { 3521 return deposit64(~b, 0, 15, a); 3522 } 3523 3524 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3525 { 3526 return deposit64(~b, 0, 31, a); 3527 } 3528 3529 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3530 { 3531 return deposit64(~b, 0, 63, a); 3532 } 3533 3534 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3535 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3536 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3537 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3538 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3539 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3540 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3541 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3542 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3543 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3544 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3545 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3546 3547 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3548 { 3549 return deposit64(b ^ a, 0, 15, a); 3550 } 3551 3552 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3553 { 3554 return deposit64(b ^ a, 0, 31, a); 3555 } 3556 3557 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3558 { 3559 return deposit64(b ^ a, 0, 63, a); 3560 } 3561 3562 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3563 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3564 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3565 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3566 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3567 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3568 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3569 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3570 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3571 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3572 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3573 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3574 3575 /* Vector Floating-Point Compare Instructions */ 3576 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3577 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3578 CPURISCVState *env, uint32_t desc) \ 3579 { \ 3580 uint32_t vm = vext_vm(desc); \ 3581 uint32_t vl = env->vl; \ 3582 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3583 uint32_t i; \ 3584 \ 3585 for (i = 0; i < vl; i++) { \ 3586 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3587 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3588 if (!vm && !vext_elem_mask(v0, i)) { \ 3589 continue; \ 3590 } \ 3591 vext_set_elem_mask(vd, i, \ 3592 DO_OP(s2, s1, &env->fp_status)); \ 3593 } \ 3594 for (; i < vlmax; i++) { \ 3595 vext_set_elem_mask(vd, i, 0); \ 3596 } \ 3597 } 3598 3599 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3600 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3601 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3602 3603 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3604 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3605 CPURISCVState *env, uint32_t desc) \ 3606 { \ 3607 uint32_t vm = vext_vm(desc); \ 3608 uint32_t vl = env->vl; \ 3609 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3610 uint32_t i; \ 3611 \ 3612 for (i = 0; i < vl; i++) { \ 3613 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3614 if (!vm && !vext_elem_mask(v0, i)) { \ 3615 continue; \ 3616 } \ 3617 vext_set_elem_mask(vd, i, \ 3618 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3619 } \ 3620 for (; i < vlmax; i++) { \ 3621 vext_set_elem_mask(vd, i, 0); \ 3622 } \ 3623 } 3624 3625 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3626 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3627 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3628 3629 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3630 { 3631 FloatRelation compare = float16_compare_quiet(a, b, s); 3632 return compare != float_relation_equal; 3633 } 3634 3635 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3636 { 3637 FloatRelation compare = float32_compare_quiet(a, b, s); 3638 return compare != float_relation_equal; 3639 } 3640 3641 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3642 { 3643 FloatRelation compare = float64_compare_quiet(a, b, s); 3644 return compare != float_relation_equal; 3645 } 3646 3647 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3648 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3649 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3650 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3651 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3652 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3653 3654 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3655 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3656 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3657 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3658 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3659 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3660 3661 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3662 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3663 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3664 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3665 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3666 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3667 3668 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3669 { 3670 FloatRelation compare = float16_compare(a, b, s); 3671 return compare == float_relation_greater; 3672 } 3673 3674 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3675 { 3676 FloatRelation compare = float32_compare(a, b, s); 3677 return compare == float_relation_greater; 3678 } 3679 3680 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3681 { 3682 FloatRelation compare = float64_compare(a, b, s); 3683 return compare == float_relation_greater; 3684 } 3685 3686 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3687 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3688 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3689 3690 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3691 { 3692 FloatRelation compare = float16_compare(a, b, s); 3693 return compare == float_relation_greater || 3694 compare == float_relation_equal; 3695 } 3696 3697 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3698 { 3699 FloatRelation compare = float32_compare(a, b, s); 3700 return compare == float_relation_greater || 3701 compare == float_relation_equal; 3702 } 3703 3704 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3705 { 3706 FloatRelation compare = float64_compare(a, b, s); 3707 return compare == float_relation_greater || 3708 compare == float_relation_equal; 3709 } 3710 3711 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3712 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3713 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3714 3715 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3716 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3717 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3718 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3719 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3720 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3721 3722 /* Vector Floating-Point Classify Instruction */ 3723 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3724 static void do_##NAME(void *vd, void *vs2, int i) \ 3725 { \ 3726 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3727 *((TD *)vd + HD(i)) = OP(s2); \ 3728 } 3729 3730 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3731 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3732 CPURISCVState *env, uint32_t desc) \ 3733 { \ 3734 uint32_t vm = vext_vm(desc); \ 3735 uint32_t vl = env->vl; \ 3736 uint32_t i; \ 3737 \ 3738 for (i = 0; i < vl; i++) { \ 3739 if (!vm && !vext_elem_mask(v0, i)) { \ 3740 continue; \ 3741 } \ 3742 do_##NAME(vd, vs2, i); \ 3743 } \ 3744 } 3745 3746 target_ulong fclass_h(uint64_t frs1) 3747 { 3748 float16 f = frs1; 3749 bool sign = float16_is_neg(f); 3750 3751 if (float16_is_infinity(f)) { 3752 return sign ? 1 << 0 : 1 << 7; 3753 } else if (float16_is_zero(f)) { 3754 return sign ? 1 << 3 : 1 << 4; 3755 } else if (float16_is_zero_or_denormal(f)) { 3756 return sign ? 1 << 2 : 1 << 5; 3757 } else if (float16_is_any_nan(f)) { 3758 float_status s = { }; /* for snan_bit_is_one */ 3759 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3760 } else { 3761 return sign ? 1 << 1 : 1 << 6; 3762 } 3763 } 3764 3765 target_ulong fclass_s(uint64_t frs1) 3766 { 3767 float32 f = frs1; 3768 bool sign = float32_is_neg(f); 3769 3770 if (float32_is_infinity(f)) { 3771 return sign ? 1 << 0 : 1 << 7; 3772 } else if (float32_is_zero(f)) { 3773 return sign ? 1 << 3 : 1 << 4; 3774 } else if (float32_is_zero_or_denormal(f)) { 3775 return sign ? 1 << 2 : 1 << 5; 3776 } else if (float32_is_any_nan(f)) { 3777 float_status s = { }; /* for snan_bit_is_one */ 3778 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3779 } else { 3780 return sign ? 1 << 1 : 1 << 6; 3781 } 3782 } 3783 3784 target_ulong fclass_d(uint64_t frs1) 3785 { 3786 float64 f = frs1; 3787 bool sign = float64_is_neg(f); 3788 3789 if (float64_is_infinity(f)) { 3790 return sign ? 1 << 0 : 1 << 7; 3791 } else if (float64_is_zero(f)) { 3792 return sign ? 1 << 3 : 1 << 4; 3793 } else if (float64_is_zero_or_denormal(f)) { 3794 return sign ? 1 << 2 : 1 << 5; 3795 } else if (float64_is_any_nan(f)) { 3796 float_status s = { }; /* for snan_bit_is_one */ 3797 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3798 } else { 3799 return sign ? 1 << 1 : 1 << 6; 3800 } 3801 } 3802 3803 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3804 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3805 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3806 GEN_VEXT_V(vfclass_v_h, 2, 2) 3807 GEN_VEXT_V(vfclass_v_w, 4, 4) 3808 GEN_VEXT_V(vfclass_v_d, 8, 8) 3809 3810 /* Vector Floating-Point Merge Instruction */ 3811 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3812 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3813 CPURISCVState *env, uint32_t desc) \ 3814 { \ 3815 uint32_t vm = vext_vm(desc); \ 3816 uint32_t vl = env->vl; \ 3817 uint32_t i; \ 3818 \ 3819 for (i = 0; i < vl; i++) { \ 3820 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3821 *((ETYPE *)vd + H(i)) \ 3822 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3823 } \ 3824 } 3825 3826 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3827 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3828 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3829 3830 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3831 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3832 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3833 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3834 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3835 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3836 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3837 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3838 3839 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3840 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3841 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3842 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3843 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3844 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3845 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3846 3847 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3848 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3849 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3850 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3851 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3852 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3853 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3854 3855 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3856 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3857 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3858 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3859 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3860 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3861 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3862 3863 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3864 /* (TD, T2, TX2) */ 3865 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3866 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3867 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3868 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3869 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3870 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3871 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3872 3873 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3874 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3875 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3876 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3877 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3878 3879 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3880 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3881 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 3882 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 3883 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 3884 3885 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 3886 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 3887 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 3888 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 3889 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 3890 3891 /* 3892 * vfwcvt.f.f.v vd, vs2, vm # 3893 * Convert single-width float to double-width float. 3894 */ 3895 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 3896 { 3897 return float16_to_float32(a, true, s); 3898 } 3899 3900 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 3901 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 3902 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 3903 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 3904 3905 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 3906 /* (TD, T2, TX2) */ 3907 #define NOP_UU_H uint16_t, uint32_t, uint32_t 3908 #define NOP_UU_W uint32_t, uint64_t, uint64_t 3909 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3910 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 3911 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 3912 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 3913 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 3914 3915 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 3916 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 3917 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 3918 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 3919 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 3920 3921 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 3922 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 3923 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 3924 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 3925 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 3926 3927 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 3928 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 3929 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 3930 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 3931 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 3932 3933 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 3934 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 3935 { 3936 return float32_to_float16(a, true, s); 3937 } 3938 3939 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 3940 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 3941 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 3942 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 3943 3944 /* 3945 *** Vector Reduction Operations 3946 */ 3947 /* Vector Single-Width Integer Reduction Instructions */ 3948 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 3949 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3950 void *vs2, CPURISCVState *env, uint32_t desc) \ 3951 { \ 3952 uint32_t vm = vext_vm(desc); \ 3953 uint32_t vl = env->vl; \ 3954 uint32_t i; \ 3955 TD s1 = *((TD *)vs1 + HD(0)); \ 3956 \ 3957 for (i = 0; i < vl; i++) { \ 3958 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 3959 if (!vm && !vext_elem_mask(v0, i)) { \ 3960 continue; \ 3961 } \ 3962 s1 = OP(s1, (TD)s2); \ 3963 } \ 3964 *((TD *)vd + HD(0)) = s1; \ 3965 } 3966 3967 /* vd[0] = sum(vs1[0], vs2[*]) */ 3968 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 3969 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 3970 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 3971 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 3972 3973 /* vd[0] = maxu(vs1[0], vs2[*]) */ 3974 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 3975 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 3976 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 3977 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 3978 3979 /* vd[0] = max(vs1[0], vs2[*]) */ 3980 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 3981 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 3982 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 3983 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 3984 3985 /* vd[0] = minu(vs1[0], vs2[*]) */ 3986 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 3987 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 3988 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 3989 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 3990 3991 /* vd[0] = min(vs1[0], vs2[*]) */ 3992 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 3993 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 3994 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 3995 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 3996 3997 /* vd[0] = and(vs1[0], vs2[*]) */ 3998 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 3999 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4000 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4001 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4002 4003 /* vd[0] = or(vs1[0], vs2[*]) */ 4004 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4005 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4006 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4007 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4008 4009 /* vd[0] = xor(vs1[0], vs2[*]) */ 4010 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4011 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4012 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4013 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4014 4015 /* Vector Widening Integer Reduction Instructions */ 4016 /* signed sum reduction into double-width accumulator */ 4017 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4018 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4019 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4020 4021 /* Unsigned sum reduction into double-width accumulator */ 4022 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4023 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4024 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4025 4026 /* Vector Single-Width Floating-Point Reduction Instructions */ 4027 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4028 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4029 void *vs2, CPURISCVState *env, \ 4030 uint32_t desc) \ 4031 { \ 4032 uint32_t vm = vext_vm(desc); \ 4033 uint32_t vl = env->vl; \ 4034 uint32_t i; \ 4035 TD s1 = *((TD *)vs1 + HD(0)); \ 4036 \ 4037 for (i = 0; i < vl; i++) { \ 4038 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4039 if (!vm && !vext_elem_mask(v0, i)) { \ 4040 continue; \ 4041 } \ 4042 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4043 } \ 4044 *((TD *)vd + HD(0)) = s1; \ 4045 } 4046 4047 /* Unordered sum */ 4048 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4049 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4050 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4051 4052 /* Maximum value */ 4053 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4054 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4055 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4056 4057 /* Minimum value */ 4058 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4059 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4060 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4061 4062 /* Vector Widening Floating-Point Reduction Instructions */ 4063 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4064 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4065 void *vs2, CPURISCVState *env, uint32_t desc) 4066 { 4067 uint32_t vm = vext_vm(desc); 4068 uint32_t vl = env->vl; 4069 uint32_t i; 4070 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4071 4072 for (i = 0; i < vl; i++) { 4073 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4074 if (!vm && !vext_elem_mask(v0, i)) { 4075 continue; 4076 } 4077 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4078 &env->fp_status); 4079 } 4080 *((uint32_t *)vd + H4(0)) = s1; 4081 } 4082 4083 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4084 void *vs2, CPURISCVState *env, uint32_t desc) 4085 { 4086 uint32_t vm = vext_vm(desc); 4087 uint32_t vl = env->vl; 4088 uint32_t i; 4089 uint64_t s1 = *((uint64_t *)vs1); 4090 4091 for (i = 0; i < vl; i++) { 4092 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4093 if (!vm && !vext_elem_mask(v0, i)) { 4094 continue; 4095 } 4096 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4097 &env->fp_status); 4098 } 4099 *((uint64_t *)vd) = s1; 4100 } 4101 4102 /* 4103 *** Vector Mask Operations 4104 */ 4105 /* Vector Mask-Register Logical Instructions */ 4106 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4107 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4108 void *vs2, CPURISCVState *env, \ 4109 uint32_t desc) \ 4110 { \ 4111 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4112 uint32_t vl = env->vl; \ 4113 uint32_t i; \ 4114 int a, b; \ 4115 \ 4116 for (i = 0; i < vl; i++) { \ 4117 a = vext_elem_mask(vs1, i); \ 4118 b = vext_elem_mask(vs2, i); \ 4119 vext_set_elem_mask(vd, i, OP(b, a)); \ 4120 } \ 4121 for (; i < vlmax; i++) { \ 4122 vext_set_elem_mask(vd, i, 0); \ 4123 } \ 4124 } 4125 4126 #define DO_NAND(N, M) (!(N & M)) 4127 #define DO_ANDNOT(N, M) (N & !M) 4128 #define DO_NOR(N, M) (!(N | M)) 4129 #define DO_ORNOT(N, M) (N | !M) 4130 #define DO_XNOR(N, M) (!(N ^ M)) 4131 4132 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4133 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4134 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4135 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4136 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4137 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4138 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4139 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4140 4141 /* Vector mask population count vmpopc */ 4142 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, 4143 uint32_t desc) 4144 { 4145 target_ulong cnt = 0; 4146 uint32_t vm = vext_vm(desc); 4147 uint32_t vl = env->vl; 4148 int i; 4149 4150 for (i = 0; i < vl; i++) { 4151 if (vm || vext_elem_mask(v0, i)) { 4152 if (vext_elem_mask(vs2, i)) { 4153 cnt++; 4154 } 4155 } 4156 } 4157 return cnt; 4158 } 4159 4160 /* vmfirst find-first-set mask bit*/ 4161 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4162 uint32_t desc) 4163 { 4164 uint32_t vm = vext_vm(desc); 4165 uint32_t vl = env->vl; 4166 int i; 4167 4168 for (i = 0; i < vl; i++) { 4169 if (vm || vext_elem_mask(v0, i)) { 4170 if (vext_elem_mask(vs2, i)) { 4171 return i; 4172 } 4173 } 4174 } 4175 return -1LL; 4176 } 4177 4178 enum set_mask_type { 4179 ONLY_FIRST = 1, 4180 INCLUDE_FIRST, 4181 BEFORE_FIRST, 4182 }; 4183 4184 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4185 uint32_t desc, enum set_mask_type type) 4186 { 4187 uint32_t vlmax = env_archcpu(env)->cfg.vlen; 4188 uint32_t vm = vext_vm(desc); 4189 uint32_t vl = env->vl; 4190 int i; 4191 bool first_mask_bit = false; 4192 4193 for (i = 0; i < vl; i++) { 4194 if (!vm && !vext_elem_mask(v0, i)) { 4195 continue; 4196 } 4197 /* write a zero to all following active elements */ 4198 if (first_mask_bit) { 4199 vext_set_elem_mask(vd, i, 0); 4200 continue; 4201 } 4202 if (vext_elem_mask(vs2, i)) { 4203 first_mask_bit = true; 4204 if (type == BEFORE_FIRST) { 4205 vext_set_elem_mask(vd, i, 0); 4206 } else { 4207 vext_set_elem_mask(vd, i, 1); 4208 } 4209 } else { 4210 if (type == ONLY_FIRST) { 4211 vext_set_elem_mask(vd, i, 0); 4212 } else { 4213 vext_set_elem_mask(vd, i, 1); 4214 } 4215 } 4216 } 4217 for (; i < vlmax; i++) { 4218 vext_set_elem_mask(vd, i, 0); 4219 } 4220 } 4221 4222 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4223 uint32_t desc) 4224 { 4225 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4226 } 4227 4228 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4229 uint32_t desc) 4230 { 4231 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4232 } 4233 4234 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4235 uint32_t desc) 4236 { 4237 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4238 } 4239 4240 /* Vector Iota Instruction */ 4241 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4242 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4243 uint32_t desc) \ 4244 { \ 4245 uint32_t vm = vext_vm(desc); \ 4246 uint32_t vl = env->vl; \ 4247 uint32_t sum = 0; \ 4248 int i; \ 4249 \ 4250 for (i = 0; i < vl; i++) { \ 4251 if (!vm && !vext_elem_mask(v0, i)) { \ 4252 continue; \ 4253 } \ 4254 *((ETYPE *)vd + H(i)) = sum; \ 4255 if (vext_elem_mask(vs2, i)) { \ 4256 sum++; \ 4257 } \ 4258 } \ 4259 } 4260 4261 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4262 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4263 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4264 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4265 4266 /* Vector Element Index Instruction */ 4267 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4268 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4269 { \ 4270 uint32_t vm = vext_vm(desc); \ 4271 uint32_t vl = env->vl; \ 4272 int i; \ 4273 \ 4274 for (i = 0; i < vl; i++) { \ 4275 if (!vm && !vext_elem_mask(v0, i)) { \ 4276 continue; \ 4277 } \ 4278 *((ETYPE *)vd + H(i)) = i; \ 4279 } \ 4280 } 4281 4282 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4283 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4284 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4285 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4286 4287 /* 4288 *** Vector Permutation Instructions 4289 */ 4290 4291 /* Vector Slide Instructions */ 4292 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4293 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4294 CPURISCVState *env, uint32_t desc) \ 4295 { \ 4296 uint32_t vm = vext_vm(desc); \ 4297 uint32_t vl = env->vl; \ 4298 target_ulong offset = s1, i; \ 4299 \ 4300 for (i = offset; i < vl; i++) { \ 4301 if (!vm && !vext_elem_mask(v0, i)) { \ 4302 continue; \ 4303 } \ 4304 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4305 } \ 4306 } 4307 4308 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4309 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4310 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4311 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4312 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4313 4314 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4315 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4316 CPURISCVState *env, uint32_t desc) \ 4317 { \ 4318 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4319 uint32_t vm = vext_vm(desc); \ 4320 uint32_t vl = env->vl; \ 4321 target_ulong offset = s1, i; \ 4322 \ 4323 for (i = 0; i < vl; ++i) { \ 4324 target_ulong j = i + offset; \ 4325 if (!vm && !vext_elem_mask(v0, i)) { \ 4326 continue; \ 4327 } \ 4328 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4329 } \ 4330 } 4331 4332 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4333 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4334 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4335 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4336 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4337 4338 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4339 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4340 CPURISCVState *env, uint32_t desc) \ 4341 { \ 4342 uint32_t vm = vext_vm(desc); \ 4343 uint32_t vl = env->vl; \ 4344 uint32_t i; \ 4345 \ 4346 for (i = 0; i < vl; i++) { \ 4347 if (!vm && !vext_elem_mask(v0, i)) { \ 4348 continue; \ 4349 } \ 4350 if (i == 0) { \ 4351 *((ETYPE *)vd + H(i)) = s1; \ 4352 } else { \ 4353 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4354 } \ 4355 } \ 4356 } 4357 4358 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4359 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4360 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4361 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4362 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4363 4364 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4365 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4366 CPURISCVState *env, uint32_t desc) \ 4367 { \ 4368 uint32_t vm = vext_vm(desc); \ 4369 uint32_t vl = env->vl; \ 4370 uint32_t i; \ 4371 \ 4372 for (i = 0; i < vl; i++) { \ 4373 if (!vm && !vext_elem_mask(v0, i)) { \ 4374 continue; \ 4375 } \ 4376 if (i == vl - 1) { \ 4377 *((ETYPE *)vd + H(i)) = s1; \ 4378 } else { \ 4379 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4380 } \ 4381 } \ 4382 } 4383 4384 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4385 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4386 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4387 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4388 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4389 4390 /* Vector Register Gather Instruction */ 4391 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H) \ 4392 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4393 CPURISCVState *env, uint32_t desc) \ 4394 { \ 4395 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4396 uint32_t vm = vext_vm(desc); \ 4397 uint32_t vl = env->vl; \ 4398 uint64_t index; \ 4399 uint32_t i; \ 4400 \ 4401 for (i = 0; i < vl; i++) { \ 4402 if (!vm && !vext_elem_mask(v0, i)) { \ 4403 continue; \ 4404 } \ 4405 index = *((ETYPE *)vs1 + H(i)); \ 4406 if (index >= vlmax) { \ 4407 *((ETYPE *)vd + H(i)) = 0; \ 4408 } else { \ 4409 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4410 } \ 4411 } \ 4412 } 4413 4414 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4415 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1) 4416 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2) 4417 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4) 4418 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8) 4419 4420 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4421 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4422 CPURISCVState *env, uint32_t desc) \ 4423 { \ 4424 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4425 uint32_t vm = vext_vm(desc); \ 4426 uint32_t vl = env->vl; \ 4427 uint64_t index = s1; \ 4428 uint32_t i; \ 4429 \ 4430 for (i = 0; i < vl; i++) { \ 4431 if (!vm && !vext_elem_mask(v0, i)) { \ 4432 continue; \ 4433 } \ 4434 if (index >= vlmax) { \ 4435 *((ETYPE *)vd + H(i)) = 0; \ 4436 } else { \ 4437 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4438 } \ 4439 } \ 4440 } 4441 4442 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4443 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4444 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4445 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4446 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4447 4448 /* Vector Compress Instruction */ 4449 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4450 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4451 CPURISCVState *env, uint32_t desc) \ 4452 { \ 4453 uint32_t vl = env->vl; \ 4454 uint32_t num = 0, i; \ 4455 \ 4456 for (i = 0; i < vl; i++) { \ 4457 if (!vext_elem_mask(vs1, i)) { \ 4458 continue; \ 4459 } \ 4460 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4461 num++; \ 4462 } \ 4463 } 4464 4465 /* Compress into vd elements of vs2 where vs1 is enabled */ 4466 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4467 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4468 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4469 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4470