1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 bool vill = FIELD_EX64(s2, VTYPE, VILL); 40 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 41 42 if (lmul & 4) { 43 /* Fractional LMUL. */ 44 if (lmul == 4 || 45 cpu->cfg.elen >> (8 - lmul) < sew) { 46 vill = true; 47 } 48 } 49 50 if ((sew > cpu->cfg.elen) 51 || vill 52 || (ediv != 0) 53 || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 56 env->vl = 0; 57 env->vstart = 0; 58 return 0; 59 } 60 61 vlmax = vext_get_vlmax(cpu, s2); 62 if (s1 <= vlmax) { 63 vl = s1; 64 } else { 65 vl = vlmax; 66 } 67 env->vl = vl; 68 env->vtype = s2; 69 env->vstart = 0; 70 return vl; 71 } 72 73 /* 74 * Note that vector data is stored in host-endian 64-bit chunks, 75 * so addressing units smaller than that needs a host-endian fixup. 76 */ 77 #ifdef HOST_WORDS_BIGENDIAN 78 #define H1(x) ((x) ^ 7) 79 #define H1_2(x) ((x) ^ 6) 80 #define H1_4(x) ((x) ^ 4) 81 #define H2(x) ((x) ^ 3) 82 #define H4(x) ((x) ^ 1) 83 #define H8(x) ((x)) 84 #else 85 #define H1(x) (x) 86 #define H1_2(x) (x) 87 #define H1_4(x) (x) 88 #define H2(x) (x) 89 #define H4(x) (x) 90 #define H8(x) (x) 91 #endif 92 93 static inline uint32_t vext_nf(uint32_t desc) 94 { 95 return FIELD_EX32(simd_data(desc), VDATA, NF); 96 } 97 98 static inline uint32_t vext_vm(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, VM); 101 } 102 103 /* 104 * Encode LMUL to lmul as following: 105 * LMUL vlmul lmul 106 * 1 000 0 107 * 2 001 1 108 * 4 010 2 109 * 8 011 3 110 * - 100 - 111 * 1/8 101 -3 112 * 1/4 110 -2 113 * 1/2 111 -1 114 */ 115 static inline int32_t vext_lmul(uint32_t desc) 116 { 117 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 118 } 119 120 /* 121 * Get the maximum number of elements can be operated. 122 * 123 * esz: log2 of element size in bytes. 124 */ 125 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 126 { 127 /* 128 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 129 * so vlen in bytes (vlenb) is encoded as maxsz. 130 */ 131 uint32_t vlenb = simd_maxsz(desc); 132 133 /* Return VLMAX */ 134 int scale = vext_lmul(desc) - esz; 135 return scale < 0 ? vlenb >> -scale : vlenb << scale; 136 } 137 138 /* 139 * This function checks watchpoint before real load operation. 140 * 141 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 142 * In user mode, there is no watchpoint support now. 143 * 144 * It will trigger an exception if there is no mapping in TLB 145 * and page table walk can't fill the TLB entry. Then the guest 146 * software can return here after process the exception or never return. 147 */ 148 static void probe_pages(CPURISCVState *env, target_ulong addr, 149 target_ulong len, uintptr_t ra, 150 MMUAccessType access_type) 151 { 152 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 153 target_ulong curlen = MIN(pagelen, len); 154 155 probe_access(env, addr, curlen, access_type, 156 cpu_mmu_index(env, false), ra); 157 if (len > curlen) { 158 addr += curlen; 159 curlen = len - curlen; 160 probe_access(env, addr, curlen, access_type, 161 cpu_mmu_index(env, false), ra); 162 } 163 } 164 165 static inline void vext_set_elem_mask(void *v0, int index, 166 uint8_t value) 167 { 168 int idx = index / 64; 169 int pos = index % 64; 170 uint64_t old = ((uint64_t *)v0)[idx]; 171 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 172 } 173 174 /* 175 * Earlier designs (pre-0.9) had a varying number of bits 176 * per mask value (MLEN). In the 0.9 design, MLEN=1. 177 * (Section 4.5) 178 */ 179 static inline int vext_elem_mask(void *v0, int index) 180 { 181 int idx = index / 64; 182 int pos = index % 64; 183 return (((uint64_t *)v0)[idx] >> pos) & 1; 184 } 185 186 /* elements operations for load and store */ 187 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 188 uint32_t idx, void *vd, uintptr_t retaddr); 189 190 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 191 static void NAME(CPURISCVState *env, abi_ptr addr, \ 192 uint32_t idx, void *vd, uintptr_t retaddr)\ 193 { \ 194 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 195 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 196 } \ 197 198 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 202 203 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 204 static void NAME(CPURISCVState *env, abi_ptr addr, \ 205 uint32_t idx, void *vd, uintptr_t retaddr)\ 206 { \ 207 ETYPE data = *((ETYPE *)vd + H(idx)); \ 208 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 209 } 210 211 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 212 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 213 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 214 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 215 216 /* 217 *** stride: access vector element from strided memory 218 */ 219 static void 220 vext_ldst_stride(void *vd, void *v0, target_ulong base, 221 target_ulong stride, CPURISCVState *env, 222 uint32_t desc, uint32_t vm, 223 vext_ldst_elem_fn *ldst_elem, 224 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 225 { 226 uint32_t i, k; 227 uint32_t nf = vext_nf(desc); 228 uint32_t max_elems = vext_max_elems(desc, esz); 229 230 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 235 k = 0; 236 while (k < nf) { 237 target_ulong addr = base + stride * i + (k << esz); 238 ldst_elem(env, addr, i + k * max_elems, vd, ra); 239 k++; 240 } 241 } 242 env->vstart = 0; 243 } 244 245 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 246 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 247 target_ulong stride, CPURISCVState *env, \ 248 uint32_t desc) \ 249 { \ 250 uint32_t vm = vext_vm(desc); \ 251 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 252 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 253 } 254 255 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 256 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 257 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 258 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 259 260 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 261 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 262 target_ulong stride, CPURISCVState *env, \ 263 uint32_t desc) \ 264 { \ 265 uint32_t vm = vext_vm(desc); \ 266 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 267 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 268 } 269 270 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 271 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 272 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 273 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 274 275 /* 276 *** unit-stride: access elements stored contiguously in memory 277 */ 278 279 /* unmasked unit-stride load and store operation*/ 280 static void 281 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 282 vext_ldst_elem_fn *ldst_elem, 283 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 284 { 285 uint32_t i, k; 286 uint32_t nf = vext_nf(desc); 287 uint32_t max_elems = vext_max_elems(desc, esz); 288 289 /* load bytes from guest memory */ 290 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 291 k = 0; 292 while (k < nf) { 293 target_ulong addr = base + ((i * nf + k) << esz); 294 ldst_elem(env, addr, i + k * max_elems, vd, ra); 295 k++; 296 } 297 } 298 env->vstart = 0; 299 } 300 301 /* 302 * masked unit-stride load and store operation will be a special case of stride, 303 * stride = NF * sizeof (MTYPE) 304 */ 305 306 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 307 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 308 CPURISCVState *env, uint32_t desc) \ 309 { \ 310 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 311 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 312 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 313 } \ 314 \ 315 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 316 CPURISCVState *env, uint32_t desc) \ 317 { \ 318 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 319 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 320 } 321 322 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 323 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 324 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 325 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 326 327 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 328 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 329 CPURISCVState *env, uint32_t desc) \ 330 { \ 331 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 332 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 333 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 334 } \ 335 \ 336 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 337 CPURISCVState *env, uint32_t desc) \ 338 { \ 339 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 340 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 341 } 342 343 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 344 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 345 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 346 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 347 348 /* 349 *** index: access vector element from indexed memory 350 */ 351 typedef target_ulong vext_get_index_addr(target_ulong base, 352 uint32_t idx, void *vs2); 353 354 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 355 static target_ulong NAME(target_ulong base, \ 356 uint32_t idx, void *vs2) \ 357 { \ 358 return (base + *((ETYPE *)vs2 + H(idx))); \ 359 } 360 361 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 362 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 363 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 364 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 365 366 static inline void 367 vext_ldst_index(void *vd, void *v0, target_ulong base, 368 void *vs2, CPURISCVState *env, uint32_t desc, 369 vext_get_index_addr get_index_addr, 370 vext_ldst_elem_fn *ldst_elem, 371 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 372 { 373 uint32_t i, k; 374 uint32_t nf = vext_nf(desc); 375 uint32_t vm = vext_vm(desc); 376 uint32_t max_elems = vext_max_elems(desc, esz); 377 378 /* load bytes from guest memory */ 379 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 380 if (!vm && !vext_elem_mask(v0, i)) { 381 continue; 382 } 383 384 k = 0; 385 while (k < nf) { 386 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 387 ldst_elem(env, addr, i + k * max_elems, vd, ra); 388 k++; 389 } 390 } 391 env->vstart = 0; 392 } 393 394 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 395 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 396 void *vs2, CPURISCVState *env, uint32_t desc) \ 397 { \ 398 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 399 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 400 } 401 402 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 403 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 404 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 405 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 406 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 407 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 408 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 409 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 410 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 411 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 412 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 413 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 414 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 418 419 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 420 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 421 void *vs2, CPURISCVState *env, uint32_t desc) \ 422 { \ 423 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 424 STORE_FN, ctzl(sizeof(ETYPE)), \ 425 GETPC(), MMU_DATA_STORE); \ 426 } 427 428 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 429 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 430 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 431 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 432 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 433 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 434 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 435 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 436 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 437 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 438 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 439 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 440 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 444 445 /* 446 *** unit-stride fault-only-fisrt load instructions 447 */ 448 static inline void 449 vext_ldff(void *vd, void *v0, target_ulong base, 450 CPURISCVState *env, uint32_t desc, 451 vext_ldst_elem_fn *ldst_elem, 452 uint32_t esz, uintptr_t ra) 453 { 454 void *host; 455 uint32_t i, k, vl = 0; 456 uint32_t nf = vext_nf(desc); 457 uint32_t vm = vext_vm(desc); 458 uint32_t max_elems = vext_max_elems(desc, esz); 459 target_ulong addr, offset, remain; 460 461 /* probe every access*/ 462 for (i = env->vstart; i < env->vl; i++) { 463 if (!vm && !vext_elem_mask(v0, i)) { 464 continue; 465 } 466 addr = base + i * (nf << esz); 467 if (i == 0) { 468 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 469 } else { 470 /* if it triggers an exception, no need to check watchpoint */ 471 remain = nf << esz; 472 while (remain > 0) { 473 offset = -(addr | TARGET_PAGE_MASK); 474 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 475 cpu_mmu_index(env, false)); 476 if (host) { 477 #ifdef CONFIG_USER_ONLY 478 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 479 vl = i; 480 goto ProbeSuccess; 481 } 482 #else 483 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 484 #endif 485 } else { 486 vl = i; 487 goto ProbeSuccess; 488 } 489 if (remain <= offset) { 490 break; 491 } 492 remain -= offset; 493 addr += offset; 494 } 495 } 496 } 497 ProbeSuccess: 498 /* load bytes from guest memory */ 499 if (vl != 0) { 500 env->vl = vl; 501 } 502 for (i = env->vstart; i < env->vl; i++) { 503 k = 0; 504 if (!vm && !vext_elem_mask(v0, i)) { 505 continue; 506 } 507 while (k < nf) { 508 target_ulong addr = base + ((i * nf + k) << esz); 509 ldst_elem(env, addr, i + k * max_elems, vd, ra); 510 k++; 511 } 512 } 513 env->vstart = 0; 514 } 515 516 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 517 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 518 CPURISCVState *env, uint32_t desc) \ 519 { \ 520 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 521 ctzl(sizeof(ETYPE)), GETPC()); \ 522 } 523 524 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 525 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 526 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 527 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 528 529 #define DO_SWAP(N, M) (M) 530 #define DO_AND(N, M) (N & M) 531 #define DO_XOR(N, M) (N ^ M) 532 #define DO_OR(N, M) (N | M) 533 #define DO_ADD(N, M) (N + M) 534 535 /* Signed min/max */ 536 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 537 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 538 539 /* Unsigned min/max */ 540 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 541 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 542 543 /* 544 *** load and store whole register instructions 545 */ 546 static void 547 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 548 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 549 MMUAccessType access_type) 550 { 551 uint32_t i, k, off, pos; 552 uint32_t nf = vext_nf(desc); 553 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 554 uint32_t max_elems = vlenb >> esz; 555 556 k = env->vstart / max_elems; 557 off = env->vstart % max_elems; 558 559 if (off) { 560 /* load/store rest of elements of current segment pointed by vstart */ 561 for (pos = off; pos < max_elems; pos++, env->vstart++) { 562 target_ulong addr = base + ((pos + k * max_elems) << esz); 563 ldst_elem(env, addr, pos + k * max_elems, vd, ra); 564 } 565 k++; 566 } 567 568 /* load/store elements for rest of segments */ 569 for (; k < nf; k++) { 570 for (i = 0; i < max_elems; i++, env->vstart++) { 571 target_ulong addr = base + ((i + k * max_elems) << esz); 572 ldst_elem(env, addr, i + k * max_elems, vd, ra); 573 } 574 } 575 576 env->vstart = 0; 577 } 578 579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 580 void HELPER(NAME)(void *vd, target_ulong base, \ 581 CPURISCVState *env, uint32_t desc) \ 582 { \ 583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 584 ctzl(sizeof(ETYPE)), GETPC(), \ 585 MMU_DATA_LOAD); \ 586 } 587 588 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 592 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 596 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 604 605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 606 void HELPER(NAME)(void *vd, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 610 ctzl(sizeof(ETYPE)), GETPC(), \ 611 MMU_DATA_STORE); \ 612 } 613 614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 618 619 /* 620 *** Vector Integer Arithmetic Instructions 621 */ 622 623 /* expand macro args before macro */ 624 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 625 626 /* (TD, T1, T2, TX1, TX2) */ 627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 /* operation of two vector elements */ 659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 660 661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 663 { \ 664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 666 *((TD *)vd + HD(i)) = OP(s2, s1); \ 667 } 668 #define DO_SUB(N, M) (N - M) 669 #define DO_RSUB(N, M) (M - N) 670 671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 679 680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 681 CPURISCVState *env, uint32_t desc, 682 uint32_t esz, uint32_t dsz, 683 opivv2_fn *fn) 684 { 685 uint32_t vm = vext_vm(desc); 686 uint32_t vl = env->vl; 687 uint32_t i; 688 689 for (i = env->vstart; i < vl; i++) { 690 if (!vm && !vext_elem_mask(v0, i)) { 691 continue; 692 } 693 fn(vd, vs1, vs2, i); 694 } 695 env->vstart = 0; 696 } 697 698 /* generate the helpers for OPIVV */ 699 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 700 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 701 void *vs2, CPURISCVState *env, \ 702 uint32_t desc) \ 703 { \ 704 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 705 do_##NAME); \ 706 } 707 708 GEN_VEXT_VV(vadd_vv_b, 1, 1) 709 GEN_VEXT_VV(vadd_vv_h, 2, 2) 710 GEN_VEXT_VV(vadd_vv_w, 4, 4) 711 GEN_VEXT_VV(vadd_vv_d, 8, 8) 712 GEN_VEXT_VV(vsub_vv_b, 1, 1) 713 GEN_VEXT_VV(vsub_vv_h, 2, 2) 714 GEN_VEXT_VV(vsub_vv_w, 4, 4) 715 GEN_VEXT_VV(vsub_vv_d, 8, 8) 716 717 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 718 719 /* 720 * (T1)s1 gives the real operator type. 721 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 722 */ 723 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 724 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 725 { \ 726 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 727 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 728 } 729 730 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 731 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 732 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 733 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 734 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 735 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 736 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 737 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 738 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 739 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 740 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 741 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 742 743 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 744 CPURISCVState *env, uint32_t desc, 745 uint32_t esz, uint32_t dsz, 746 opivx2_fn fn) 747 { 748 uint32_t vm = vext_vm(desc); 749 uint32_t vl = env->vl; 750 uint32_t i; 751 752 for (i = env->vstart; i < vl; i++) { 753 if (!vm && !vext_elem_mask(v0, i)) { 754 continue; 755 } 756 fn(vd, s1, vs2, i); 757 } 758 env->vstart = 0; 759 } 760 761 /* generate the helpers for OPIVX */ 762 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 764 void *vs2, CPURISCVState *env, \ 765 uint32_t desc) \ 766 { \ 767 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 768 do_##NAME); \ 769 } 770 771 GEN_VEXT_VX(vadd_vx_b, 1, 1) 772 GEN_VEXT_VX(vadd_vx_h, 2, 2) 773 GEN_VEXT_VX(vadd_vx_w, 4, 4) 774 GEN_VEXT_VX(vadd_vx_d, 8, 8) 775 GEN_VEXT_VX(vsub_vx_b, 1, 1) 776 GEN_VEXT_VX(vsub_vx_h, 2, 2) 777 GEN_VEXT_VX(vsub_vx_w, 4, 4) 778 GEN_VEXT_VX(vsub_vx_d, 8, 8) 779 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 780 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 781 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 782 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 783 784 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 785 { 786 intptr_t oprsz = simd_oprsz(desc); 787 intptr_t i; 788 789 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 790 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 791 } 792 } 793 794 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 795 { 796 intptr_t oprsz = simd_oprsz(desc); 797 intptr_t i; 798 799 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 800 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 801 } 802 } 803 804 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 805 { 806 intptr_t oprsz = simd_oprsz(desc); 807 intptr_t i; 808 809 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 810 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 811 } 812 } 813 814 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 815 { 816 intptr_t oprsz = simd_oprsz(desc); 817 intptr_t i; 818 819 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 820 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 821 } 822 } 823 824 /* Vector Widening Integer Add/Subtract */ 825 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 826 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 827 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 828 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 829 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 830 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 831 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 832 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 833 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 834 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 835 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 836 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 837 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 838 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 839 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 840 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 841 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 842 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 843 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 844 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 845 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 846 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 847 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 848 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 849 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 850 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 851 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 852 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 853 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 854 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 855 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 856 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 857 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 858 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 859 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 860 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 861 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 862 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 863 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 864 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 865 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 866 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 867 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 868 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 869 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 870 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 871 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 872 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 873 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 874 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 875 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 876 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 877 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 878 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 879 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 880 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 881 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 882 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 883 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 884 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 885 886 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 887 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 888 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 889 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 890 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 891 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 892 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 893 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 894 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 895 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 896 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 897 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 898 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 899 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 900 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 901 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 902 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 903 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 904 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 905 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 906 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 907 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 908 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 909 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 910 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 911 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 912 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 913 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 914 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 915 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 916 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 917 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 918 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 919 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 920 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 921 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 922 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 923 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 924 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 925 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 926 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 927 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 928 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 929 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 930 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 931 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 932 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 933 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 934 935 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 936 #define DO_VADC(N, M, C) (N + M + C) 937 #define DO_VSBC(N, M, C) (N - M - C) 938 939 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 940 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 941 CPURISCVState *env, uint32_t desc) \ 942 { \ 943 uint32_t vl = env->vl; \ 944 uint32_t i; \ 945 \ 946 for (i = env->vstart; i < vl; i++) { \ 947 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 948 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 949 ETYPE carry = vext_elem_mask(v0, i); \ 950 \ 951 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 952 } \ 953 env->vstart = 0; \ 954 } 955 956 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 957 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 958 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 959 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 960 961 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 962 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 963 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 964 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 965 966 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 967 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 968 CPURISCVState *env, uint32_t desc) \ 969 { \ 970 uint32_t vl = env->vl; \ 971 uint32_t i; \ 972 \ 973 for (i = env->vstart; i < vl; i++) { \ 974 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 975 ETYPE carry = vext_elem_mask(v0, i); \ 976 \ 977 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 978 } \ 979 env->vstart = 0; \ 980 } 981 982 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 983 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 984 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 985 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 986 987 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 988 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 989 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 990 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 991 992 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 993 (__typeof(N))(N + M) < N) 994 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 995 996 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 997 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 998 CPURISCVState *env, uint32_t desc) \ 999 { \ 1000 uint32_t vl = env->vl; \ 1001 uint32_t vm = vext_vm(desc); \ 1002 uint32_t i; \ 1003 \ 1004 for (i = env->vstart; i < vl; i++) { \ 1005 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1006 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1007 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1008 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1009 } \ 1010 env->vstart = 0; \ 1011 } 1012 1013 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1014 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1015 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1016 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1017 1018 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1019 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1020 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1021 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1022 1023 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1025 void *vs2, CPURISCVState *env, uint32_t desc) \ 1026 { \ 1027 uint32_t vl = env->vl; \ 1028 uint32_t vm = vext_vm(desc); \ 1029 uint32_t i; \ 1030 \ 1031 for (i = env->vstart; i < vl; i++) { \ 1032 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1033 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1034 vext_set_elem_mask(vd, i, \ 1035 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1036 } \ 1037 env->vstart = 0; \ 1038 } 1039 1040 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1041 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1042 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1043 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1044 1045 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1046 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1047 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1048 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1049 1050 /* Vector Bitwise Logical Instructions */ 1051 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1052 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1053 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1054 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1055 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1056 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1057 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1058 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1059 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1060 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1061 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1062 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1063 GEN_VEXT_VV(vand_vv_b, 1, 1) 1064 GEN_VEXT_VV(vand_vv_h, 2, 2) 1065 GEN_VEXT_VV(vand_vv_w, 4, 4) 1066 GEN_VEXT_VV(vand_vv_d, 8, 8) 1067 GEN_VEXT_VV(vor_vv_b, 1, 1) 1068 GEN_VEXT_VV(vor_vv_h, 2, 2) 1069 GEN_VEXT_VV(vor_vv_w, 4, 4) 1070 GEN_VEXT_VV(vor_vv_d, 8, 8) 1071 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1072 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1073 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1074 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1075 1076 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1077 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1078 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1079 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1080 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1081 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1082 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1083 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1084 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1085 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1086 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1087 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1088 GEN_VEXT_VX(vand_vx_b, 1, 1) 1089 GEN_VEXT_VX(vand_vx_h, 2, 2) 1090 GEN_VEXT_VX(vand_vx_w, 4, 4) 1091 GEN_VEXT_VX(vand_vx_d, 8, 8) 1092 GEN_VEXT_VX(vor_vx_b, 1, 1) 1093 GEN_VEXT_VX(vor_vx_h, 2, 2) 1094 GEN_VEXT_VX(vor_vx_w, 4, 4) 1095 GEN_VEXT_VX(vor_vx_d, 8, 8) 1096 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1097 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1098 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1099 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1100 1101 /* Vector Single-Width Bit Shift Instructions */ 1102 #define DO_SLL(N, M) (N << (M)) 1103 #define DO_SRL(N, M) (N >> (M)) 1104 1105 /* generate the helpers for shift instructions with two vector operators */ 1106 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1107 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1108 void *vs2, CPURISCVState *env, uint32_t desc) \ 1109 { \ 1110 uint32_t vm = vext_vm(desc); \ 1111 uint32_t vl = env->vl; \ 1112 uint32_t i; \ 1113 \ 1114 for (i = env->vstart; i < vl; i++) { \ 1115 if (!vm && !vext_elem_mask(v0, i)) { \ 1116 continue; \ 1117 } \ 1118 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1119 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1120 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1121 } \ 1122 env->vstart = 0; \ 1123 } 1124 1125 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1126 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1127 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1128 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1129 1130 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1131 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1132 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1133 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1134 1135 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1136 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1137 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1138 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1139 1140 /* generate the helpers for shift instructions with one vector and one scalar */ 1141 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1143 void *vs2, CPURISCVState *env, uint32_t desc) \ 1144 { \ 1145 uint32_t vm = vext_vm(desc); \ 1146 uint32_t vl = env->vl; \ 1147 uint32_t i; \ 1148 \ 1149 for (i = env->vstart; i < vl; i++) { \ 1150 if (!vm && !vext_elem_mask(v0, i)) { \ 1151 continue; \ 1152 } \ 1153 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1154 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1155 } \ 1156 env->vstart = 0; \ 1157 } 1158 1159 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1160 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1161 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1162 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1163 1164 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1165 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1166 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1167 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1168 1169 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1170 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1171 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1172 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1173 1174 /* Vector Narrowing Integer Right Shift Instructions */ 1175 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1176 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1177 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1178 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1179 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1180 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1181 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1182 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1183 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1184 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1185 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1186 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1187 1188 /* Vector Integer Comparison Instructions */ 1189 #define DO_MSEQ(N, M) (N == M) 1190 #define DO_MSNE(N, M) (N != M) 1191 #define DO_MSLT(N, M) (N < M) 1192 #define DO_MSLE(N, M) (N <= M) 1193 #define DO_MSGT(N, M) (N > M) 1194 1195 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1196 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1197 CPURISCVState *env, uint32_t desc) \ 1198 { \ 1199 uint32_t vm = vext_vm(desc); \ 1200 uint32_t vl = env->vl; \ 1201 uint32_t i; \ 1202 \ 1203 for (i = env->vstart; i < vl; i++) { \ 1204 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1205 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1206 if (!vm && !vext_elem_mask(v0, i)) { \ 1207 continue; \ 1208 } \ 1209 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1210 } \ 1211 env->vstart = 0; \ 1212 } 1213 1214 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1215 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1216 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1217 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1218 1219 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1220 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1221 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1222 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1223 1224 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1225 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1226 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1227 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1228 1229 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1230 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1231 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1232 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1233 1234 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1235 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1236 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1237 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1238 1239 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1240 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1241 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1242 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1243 1244 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1245 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1246 CPURISCVState *env, uint32_t desc) \ 1247 { \ 1248 uint32_t vm = vext_vm(desc); \ 1249 uint32_t vl = env->vl; \ 1250 uint32_t i; \ 1251 \ 1252 for (i = env->vstart; i < vl; i++) { \ 1253 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1254 if (!vm && !vext_elem_mask(v0, i)) { \ 1255 continue; \ 1256 } \ 1257 vext_set_elem_mask(vd, i, \ 1258 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1259 } \ 1260 env->vstart = 0; \ 1261 } 1262 1263 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1264 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1265 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1266 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1267 1268 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1269 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1270 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1271 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1272 1273 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1274 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1275 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1276 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1277 1278 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1279 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1280 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1281 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1282 1283 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1284 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1285 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1286 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1287 1288 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1289 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1290 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1291 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1292 1293 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1294 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1295 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1296 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1297 1298 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1299 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1300 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1301 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1302 1303 /* Vector Integer Min/Max Instructions */ 1304 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1305 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1306 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1307 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1308 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1309 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1310 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1311 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1312 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1313 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1314 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1315 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1316 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1317 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1318 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1319 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1320 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1321 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1322 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1323 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1324 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1325 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1326 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1327 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1328 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1329 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1330 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1331 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1332 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1333 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1334 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1335 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1336 1337 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1338 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1339 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1340 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1341 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1342 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1343 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1344 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1345 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1346 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1347 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1348 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1349 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1350 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1351 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1352 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1353 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1354 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1355 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1356 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1357 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1358 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1359 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1360 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1361 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1362 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1363 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1364 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1365 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1366 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1367 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1368 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1369 1370 /* Vector Single-Width Integer Multiply Instructions */ 1371 #define DO_MUL(N, M) (N * M) 1372 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1373 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1374 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1375 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1376 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1377 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1378 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1379 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1380 1381 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1382 { 1383 return (int16_t)s2 * (int16_t)s1 >> 8; 1384 } 1385 1386 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1387 { 1388 return (int32_t)s2 * (int32_t)s1 >> 16; 1389 } 1390 1391 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1392 { 1393 return (int64_t)s2 * (int64_t)s1 >> 32; 1394 } 1395 1396 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1397 { 1398 uint64_t hi_64, lo_64; 1399 1400 muls64(&lo_64, &hi_64, s1, s2); 1401 return hi_64; 1402 } 1403 1404 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1405 { 1406 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1407 } 1408 1409 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1410 { 1411 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1412 } 1413 1414 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1415 { 1416 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1417 } 1418 1419 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1420 { 1421 uint64_t hi_64, lo_64; 1422 1423 mulu64(&lo_64, &hi_64, s2, s1); 1424 return hi_64; 1425 } 1426 1427 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1428 { 1429 return (int16_t)s2 * (uint16_t)s1 >> 8; 1430 } 1431 1432 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1433 { 1434 return (int32_t)s2 * (uint32_t)s1 >> 16; 1435 } 1436 1437 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1438 { 1439 return (int64_t)s2 * (uint64_t)s1 >> 32; 1440 } 1441 1442 /* 1443 * Let A = signed operand, 1444 * B = unsigned operand 1445 * P = mulu64(A, B), unsigned product 1446 * 1447 * LET X = 2 ** 64 - A, 2's complement of A 1448 * SP = signed product 1449 * THEN 1450 * IF A < 0 1451 * SP = -X * B 1452 * = -(2 ** 64 - A) * B 1453 * = A * B - 2 ** 64 * B 1454 * = P - 2 ** 64 * B 1455 * ELSE 1456 * SP = P 1457 * THEN 1458 * HI_P -= (A < 0 ? B : 0) 1459 */ 1460 1461 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1462 { 1463 uint64_t hi_64, lo_64; 1464 1465 mulu64(&lo_64, &hi_64, s2, s1); 1466 1467 hi_64 -= s2 < 0 ? s1 : 0; 1468 return hi_64; 1469 } 1470 1471 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1472 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1473 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1474 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1475 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1476 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1477 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1478 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1479 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1480 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1481 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1482 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1483 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1484 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1485 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1486 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1487 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1488 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1489 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1490 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1491 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1492 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1493 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1494 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1495 1496 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1497 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1498 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1499 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1500 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1501 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1502 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1503 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1504 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1505 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1506 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1507 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1508 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1509 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1510 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1511 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1512 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1513 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1514 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1515 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1516 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1517 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1518 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1519 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1520 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1521 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1522 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1523 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1524 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1525 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1526 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1527 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1528 1529 /* Vector Integer Divide Instructions */ 1530 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1531 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1532 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1533 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1534 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1535 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1536 1537 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1538 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1539 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1540 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1541 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1542 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1543 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1544 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1545 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1546 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1547 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1548 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1549 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1550 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1551 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1552 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1553 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1554 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1555 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1556 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1557 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1558 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1559 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1560 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1561 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1562 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1563 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1564 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1565 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1566 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1567 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1568 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1569 1570 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1571 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1572 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1573 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1574 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1575 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1576 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1577 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1578 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1579 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1580 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1581 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1582 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1583 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1584 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1585 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1586 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1587 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1588 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1589 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1590 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1591 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1592 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1593 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1594 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1595 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1596 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1597 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1598 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1599 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1600 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1601 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1602 1603 /* Vector Widening Integer Multiply Instructions */ 1604 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1605 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1606 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1607 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1608 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1609 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1610 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1611 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1612 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1613 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1614 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1615 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1616 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1617 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1618 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1619 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1620 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1621 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1622 1623 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1624 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1625 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1626 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1627 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1628 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1629 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1630 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1631 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1632 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1633 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1634 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1635 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1636 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1637 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1638 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1639 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1640 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1641 1642 /* Vector Single-Width Integer Multiply-Add Instructions */ 1643 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1644 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1645 { \ 1646 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1647 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1648 TD d = *((TD *)vd + HD(i)); \ 1649 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1650 } 1651 1652 #define DO_MACC(N, M, D) (M * N + D) 1653 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1654 #define DO_MADD(N, M, D) (M * D + N) 1655 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1656 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1657 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1658 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1659 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1660 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1661 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1662 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1663 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1664 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1665 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1666 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1667 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1668 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1669 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1670 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1671 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1672 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1673 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1674 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1675 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1676 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1677 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1678 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1679 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1680 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1681 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1682 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1683 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1684 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1685 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1686 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1687 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1688 1689 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1690 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1691 { \ 1692 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1693 TD d = *((TD *)vd + HD(i)); \ 1694 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1695 } 1696 1697 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1698 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1699 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1700 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1701 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1702 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1703 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1704 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1705 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1706 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1707 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1708 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1709 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1710 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1711 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1712 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1713 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1714 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1715 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1716 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1717 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1718 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1719 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1720 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1721 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1722 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1723 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1724 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1725 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1726 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1727 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1728 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1729 1730 /* Vector Widening Integer Multiply-Add Instructions */ 1731 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1732 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1733 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1734 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1735 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1736 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1737 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1738 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1739 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1740 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1741 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1742 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1743 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1744 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1745 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1746 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1747 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1748 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1749 1750 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1751 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1752 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1753 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1754 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1755 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1756 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1757 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1758 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1759 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1760 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1761 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1762 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1763 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1764 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1765 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1766 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1767 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1768 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1769 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1770 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1771 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1772 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1773 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1774 1775 /* Vector Integer Merge and Move Instructions */ 1776 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1777 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1778 uint32_t desc) \ 1779 { \ 1780 uint32_t vl = env->vl; \ 1781 uint32_t i; \ 1782 \ 1783 for (i = env->vstart; i < vl; i++) { \ 1784 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1785 *((ETYPE *)vd + H(i)) = s1; \ 1786 } \ 1787 env->vstart = 0; \ 1788 } 1789 1790 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1791 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1792 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1793 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1794 1795 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1796 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1797 uint32_t desc) \ 1798 { \ 1799 uint32_t vl = env->vl; \ 1800 uint32_t i; \ 1801 \ 1802 for (i = env->vstart; i < vl; i++) { \ 1803 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1804 } \ 1805 env->vstart = 0; \ 1806 } 1807 1808 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1809 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1810 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1811 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1812 1813 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1814 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1815 CPURISCVState *env, uint32_t desc) \ 1816 { \ 1817 uint32_t vl = env->vl; \ 1818 uint32_t i; \ 1819 \ 1820 for (i = env->vstart; i < vl; i++) { \ 1821 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1822 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1823 } \ 1824 env->vstart = 0; \ 1825 } 1826 1827 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1828 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1829 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1830 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1831 1832 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1833 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1834 void *vs2, CPURISCVState *env, uint32_t desc) \ 1835 { \ 1836 uint32_t vl = env->vl; \ 1837 uint32_t i; \ 1838 \ 1839 for (i = env->vstart; i < vl; i++) { \ 1840 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1841 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1842 (ETYPE)(target_long)s1); \ 1843 *((ETYPE *)vd + H(i)) = d; \ 1844 } \ 1845 env->vstart = 0; \ 1846 } 1847 1848 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1849 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1850 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1851 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1852 1853 /* 1854 *** Vector Fixed-Point Arithmetic Instructions 1855 */ 1856 1857 /* Vector Single-Width Saturating Add and Subtract */ 1858 1859 /* 1860 * As fixed point instructions probably have round mode and saturation, 1861 * define common macros for fixed point here. 1862 */ 1863 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1864 CPURISCVState *env, int vxrm); 1865 1866 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1867 static inline void \ 1868 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1869 CPURISCVState *env, int vxrm) \ 1870 { \ 1871 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1872 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1873 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1874 } 1875 1876 static inline void 1877 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1878 CPURISCVState *env, 1879 uint32_t vl, uint32_t vm, int vxrm, 1880 opivv2_rm_fn *fn) 1881 { 1882 for (uint32_t i = env->vstart; i < vl; i++) { 1883 if (!vm && !vext_elem_mask(v0, i)) { 1884 continue; 1885 } 1886 fn(vd, vs1, vs2, i, env, vxrm); 1887 } 1888 env->vstart = 0; 1889 } 1890 1891 static inline void 1892 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1893 CPURISCVState *env, 1894 uint32_t desc, uint32_t esz, uint32_t dsz, 1895 opivv2_rm_fn *fn) 1896 { 1897 uint32_t vm = vext_vm(desc); 1898 uint32_t vl = env->vl; 1899 1900 switch (env->vxrm) { 1901 case 0: /* rnu */ 1902 vext_vv_rm_1(vd, v0, vs1, vs2, 1903 env, vl, vm, 0, fn); 1904 break; 1905 case 1: /* rne */ 1906 vext_vv_rm_1(vd, v0, vs1, vs2, 1907 env, vl, vm, 1, fn); 1908 break; 1909 case 2: /* rdn */ 1910 vext_vv_rm_1(vd, v0, vs1, vs2, 1911 env, vl, vm, 2, fn); 1912 break; 1913 default: /* rod */ 1914 vext_vv_rm_1(vd, v0, vs1, vs2, 1915 env, vl, vm, 3, fn); 1916 break; 1917 } 1918 } 1919 1920 /* generate helpers for fixed point instructions with OPIVV format */ 1921 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1922 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1923 CPURISCVState *env, uint32_t desc) \ 1924 { \ 1925 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1926 do_##NAME); \ 1927 } 1928 1929 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1930 { 1931 uint8_t res = a + b; 1932 if (res < a) { 1933 res = UINT8_MAX; 1934 env->vxsat = 0x1; 1935 } 1936 return res; 1937 } 1938 1939 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1940 uint16_t b) 1941 { 1942 uint16_t res = a + b; 1943 if (res < a) { 1944 res = UINT16_MAX; 1945 env->vxsat = 0x1; 1946 } 1947 return res; 1948 } 1949 1950 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1951 uint32_t b) 1952 { 1953 uint32_t res = a + b; 1954 if (res < a) { 1955 res = UINT32_MAX; 1956 env->vxsat = 0x1; 1957 } 1958 return res; 1959 } 1960 1961 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1962 uint64_t b) 1963 { 1964 uint64_t res = a + b; 1965 if (res < a) { 1966 res = UINT64_MAX; 1967 env->vxsat = 0x1; 1968 } 1969 return res; 1970 } 1971 1972 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1973 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1974 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1975 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1976 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1977 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1978 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1979 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1980 1981 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1982 CPURISCVState *env, int vxrm); 1983 1984 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1985 static inline void \ 1986 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1987 CPURISCVState *env, int vxrm) \ 1988 { \ 1989 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1990 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1991 } 1992 1993 static inline void 1994 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1995 CPURISCVState *env, 1996 uint32_t vl, uint32_t vm, int vxrm, 1997 opivx2_rm_fn *fn) 1998 { 1999 for (uint32_t i = env->vstart; i < vl; i++) { 2000 if (!vm && !vext_elem_mask(v0, i)) { 2001 continue; 2002 } 2003 fn(vd, s1, vs2, i, env, vxrm); 2004 } 2005 env->vstart = 0; 2006 } 2007 2008 static inline void 2009 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2010 CPURISCVState *env, 2011 uint32_t desc, uint32_t esz, uint32_t dsz, 2012 opivx2_rm_fn *fn) 2013 { 2014 uint32_t vm = vext_vm(desc); 2015 uint32_t vl = env->vl; 2016 2017 switch (env->vxrm) { 2018 case 0: /* rnu */ 2019 vext_vx_rm_1(vd, v0, s1, vs2, 2020 env, vl, vm, 0, fn); 2021 break; 2022 case 1: /* rne */ 2023 vext_vx_rm_1(vd, v0, s1, vs2, 2024 env, vl, vm, 1, fn); 2025 break; 2026 case 2: /* rdn */ 2027 vext_vx_rm_1(vd, v0, s1, vs2, 2028 env, vl, vm, 2, fn); 2029 break; 2030 default: /* rod */ 2031 vext_vx_rm_1(vd, v0, s1, vs2, 2032 env, vl, vm, 3, fn); 2033 break; 2034 } 2035 } 2036 2037 /* generate helpers for fixed point instructions with OPIVX format */ 2038 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2039 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2040 void *vs2, CPURISCVState *env, uint32_t desc) \ 2041 { \ 2042 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2043 do_##NAME); \ 2044 } 2045 2046 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2047 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2048 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2049 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2050 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2051 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2052 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2053 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2054 2055 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2056 { 2057 int8_t res = a + b; 2058 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2059 res = a > 0 ? INT8_MAX : INT8_MIN; 2060 env->vxsat = 0x1; 2061 } 2062 return res; 2063 } 2064 2065 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2066 { 2067 int16_t res = a + b; 2068 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2069 res = a > 0 ? INT16_MAX : INT16_MIN; 2070 env->vxsat = 0x1; 2071 } 2072 return res; 2073 } 2074 2075 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2076 { 2077 int32_t res = a + b; 2078 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2079 res = a > 0 ? INT32_MAX : INT32_MIN; 2080 env->vxsat = 0x1; 2081 } 2082 return res; 2083 } 2084 2085 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2086 { 2087 int64_t res = a + b; 2088 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2089 res = a > 0 ? INT64_MAX : INT64_MIN; 2090 env->vxsat = 0x1; 2091 } 2092 return res; 2093 } 2094 2095 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2096 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2097 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2098 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2099 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2100 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2101 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2102 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2103 2104 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2105 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2106 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2107 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2108 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2109 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2110 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2111 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2112 2113 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2114 { 2115 uint8_t res = a - b; 2116 if (res > a) { 2117 res = 0; 2118 env->vxsat = 0x1; 2119 } 2120 return res; 2121 } 2122 2123 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2124 uint16_t b) 2125 { 2126 uint16_t res = a - b; 2127 if (res > a) { 2128 res = 0; 2129 env->vxsat = 0x1; 2130 } 2131 return res; 2132 } 2133 2134 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2135 uint32_t b) 2136 { 2137 uint32_t res = a - b; 2138 if (res > a) { 2139 res = 0; 2140 env->vxsat = 0x1; 2141 } 2142 return res; 2143 } 2144 2145 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2146 uint64_t b) 2147 { 2148 uint64_t res = a - b; 2149 if (res > a) { 2150 res = 0; 2151 env->vxsat = 0x1; 2152 } 2153 return res; 2154 } 2155 2156 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2157 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2158 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2159 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2160 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2161 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2162 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2163 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2164 2165 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2166 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2167 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2168 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2169 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2170 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2171 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2172 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2173 2174 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2175 { 2176 int8_t res = a - b; 2177 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2178 res = a >= 0 ? INT8_MAX : INT8_MIN; 2179 env->vxsat = 0x1; 2180 } 2181 return res; 2182 } 2183 2184 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2185 { 2186 int16_t res = a - b; 2187 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2188 res = a >= 0 ? INT16_MAX : INT16_MIN; 2189 env->vxsat = 0x1; 2190 } 2191 return res; 2192 } 2193 2194 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2195 { 2196 int32_t res = a - b; 2197 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2198 res = a >= 0 ? INT32_MAX : INT32_MIN; 2199 env->vxsat = 0x1; 2200 } 2201 return res; 2202 } 2203 2204 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2205 { 2206 int64_t res = a - b; 2207 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2208 res = a >= 0 ? INT64_MAX : INT64_MIN; 2209 env->vxsat = 0x1; 2210 } 2211 return res; 2212 } 2213 2214 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2215 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2216 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2217 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2218 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2219 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2220 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2221 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2222 2223 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2224 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2225 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2226 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2227 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2228 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2229 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2230 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2231 2232 /* Vector Single-Width Averaging Add and Subtract */ 2233 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2234 { 2235 uint8_t d = extract64(v, shift, 1); 2236 uint8_t d1; 2237 uint64_t D1, D2; 2238 2239 if (shift == 0 || shift > 64) { 2240 return 0; 2241 } 2242 2243 d1 = extract64(v, shift - 1, 1); 2244 D1 = extract64(v, 0, shift); 2245 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2246 return d1; 2247 } else if (vxrm == 1) { /* round-to-nearest-even */ 2248 if (shift > 1) { 2249 D2 = extract64(v, 0, shift - 1); 2250 return d1 & ((D2 != 0) | d); 2251 } else { 2252 return d1 & d; 2253 } 2254 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2255 return !d & (D1 != 0); 2256 } 2257 return 0; /* round-down (truncate) */ 2258 } 2259 2260 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2261 { 2262 int64_t res = (int64_t)a + b; 2263 uint8_t round = get_round(vxrm, res, 1); 2264 2265 return (res >> 1) + round; 2266 } 2267 2268 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2269 { 2270 int64_t res = a + b; 2271 uint8_t round = get_round(vxrm, res, 1); 2272 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2273 2274 /* With signed overflow, bit 64 is inverse of bit 63. */ 2275 return ((res >> 1) ^ over) + round; 2276 } 2277 2278 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2279 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2280 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2281 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2282 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2283 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2284 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2285 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2286 2287 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2288 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2289 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2290 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2291 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2292 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2293 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2294 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2295 2296 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2297 uint32_t a, uint32_t b) 2298 { 2299 uint64_t res = (uint64_t)a + b; 2300 uint8_t round = get_round(vxrm, res, 1); 2301 2302 return (res >> 1) + round; 2303 } 2304 2305 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2306 uint64_t a, uint64_t b) 2307 { 2308 uint64_t res = a + b; 2309 uint8_t round = get_round(vxrm, res, 1); 2310 uint64_t over = (uint64_t)(res < a) << 63; 2311 2312 return ((res >> 1) | over) + round; 2313 } 2314 2315 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2316 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2317 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2318 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2319 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2320 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2321 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2322 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2323 2324 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2325 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2326 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2327 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2328 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2329 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2330 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2331 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2332 2333 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2334 { 2335 int64_t res = (int64_t)a - b; 2336 uint8_t round = get_round(vxrm, res, 1); 2337 2338 return (res >> 1) + round; 2339 } 2340 2341 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2342 { 2343 int64_t res = (int64_t)a - b; 2344 uint8_t round = get_round(vxrm, res, 1); 2345 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2346 2347 /* With signed overflow, bit 64 is inverse of bit 63. */ 2348 return ((res >> 1) ^ over) + round; 2349 } 2350 2351 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2352 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2353 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2354 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2355 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2356 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2357 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2358 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2359 2360 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2361 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2362 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2363 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2364 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2365 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2366 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2367 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2368 2369 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2370 uint32_t a, uint32_t b) 2371 { 2372 int64_t res = (int64_t)a - b; 2373 uint8_t round = get_round(vxrm, res, 1); 2374 2375 return (res >> 1) + round; 2376 } 2377 2378 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2379 uint64_t a, uint64_t b) 2380 { 2381 uint64_t res = (uint64_t)a - b; 2382 uint8_t round = get_round(vxrm, res, 1); 2383 uint64_t over = (uint64_t)(res > a) << 63; 2384 2385 return ((res >> 1) | over) + round; 2386 } 2387 2388 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2389 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2390 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2391 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2392 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2393 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2394 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2395 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2396 2397 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2398 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2399 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2400 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2401 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2402 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2403 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2404 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2405 2406 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2407 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2408 { 2409 uint8_t round; 2410 int16_t res; 2411 2412 res = (int16_t)a * (int16_t)b; 2413 round = get_round(vxrm, res, 7); 2414 res = (res >> 7) + round; 2415 2416 if (res > INT8_MAX) { 2417 env->vxsat = 0x1; 2418 return INT8_MAX; 2419 } else if (res < INT8_MIN) { 2420 env->vxsat = 0x1; 2421 return INT8_MIN; 2422 } else { 2423 return res; 2424 } 2425 } 2426 2427 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2428 { 2429 uint8_t round; 2430 int32_t res; 2431 2432 res = (int32_t)a * (int32_t)b; 2433 round = get_round(vxrm, res, 15); 2434 res = (res >> 15) + round; 2435 2436 if (res > INT16_MAX) { 2437 env->vxsat = 0x1; 2438 return INT16_MAX; 2439 } else if (res < INT16_MIN) { 2440 env->vxsat = 0x1; 2441 return INT16_MIN; 2442 } else { 2443 return res; 2444 } 2445 } 2446 2447 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2448 { 2449 uint8_t round; 2450 int64_t res; 2451 2452 res = (int64_t)a * (int64_t)b; 2453 round = get_round(vxrm, res, 31); 2454 res = (res >> 31) + round; 2455 2456 if (res > INT32_MAX) { 2457 env->vxsat = 0x1; 2458 return INT32_MAX; 2459 } else if (res < INT32_MIN) { 2460 env->vxsat = 0x1; 2461 return INT32_MIN; 2462 } else { 2463 return res; 2464 } 2465 } 2466 2467 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2468 { 2469 uint8_t round; 2470 uint64_t hi_64, lo_64; 2471 int64_t res; 2472 2473 if (a == INT64_MIN && b == INT64_MIN) { 2474 env->vxsat = 1; 2475 return INT64_MAX; 2476 } 2477 2478 muls64(&lo_64, &hi_64, a, b); 2479 round = get_round(vxrm, lo_64, 63); 2480 /* 2481 * Cannot overflow, as there are always 2482 * 2 sign bits after multiply. 2483 */ 2484 res = (hi_64 << 1) | (lo_64 >> 63); 2485 if (round) { 2486 if (res == INT64_MAX) { 2487 env->vxsat = 1; 2488 } else { 2489 res += 1; 2490 } 2491 } 2492 return res; 2493 } 2494 2495 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2496 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2497 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2498 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2499 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2500 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2501 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2502 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2503 2504 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2505 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2506 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2507 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2508 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2509 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2510 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2511 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2512 2513 /* Vector Single-Width Scaling Shift Instructions */ 2514 static inline uint8_t 2515 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2516 { 2517 uint8_t round, shift = b & 0x7; 2518 uint8_t res; 2519 2520 round = get_round(vxrm, a, shift); 2521 res = (a >> shift) + round; 2522 return res; 2523 } 2524 static inline uint16_t 2525 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2526 { 2527 uint8_t round, shift = b & 0xf; 2528 uint16_t res; 2529 2530 round = get_round(vxrm, a, shift); 2531 res = (a >> shift) + round; 2532 return res; 2533 } 2534 static inline uint32_t 2535 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2536 { 2537 uint8_t round, shift = b & 0x1f; 2538 uint32_t res; 2539 2540 round = get_round(vxrm, a, shift); 2541 res = (a >> shift) + round; 2542 return res; 2543 } 2544 static inline uint64_t 2545 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2546 { 2547 uint8_t round, shift = b & 0x3f; 2548 uint64_t res; 2549 2550 round = get_round(vxrm, a, shift); 2551 res = (a >> shift) + round; 2552 return res; 2553 } 2554 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2555 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2556 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2557 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2558 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2559 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2560 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2561 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2562 2563 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2564 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2565 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2566 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2567 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2568 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2569 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2570 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2571 2572 static inline int8_t 2573 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2574 { 2575 uint8_t round, shift = b & 0x7; 2576 int8_t res; 2577 2578 round = get_round(vxrm, a, shift); 2579 res = (a >> shift) + round; 2580 return res; 2581 } 2582 static inline int16_t 2583 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2584 { 2585 uint8_t round, shift = b & 0xf; 2586 int16_t res; 2587 2588 round = get_round(vxrm, a, shift); 2589 res = (a >> shift) + round; 2590 return res; 2591 } 2592 static inline int32_t 2593 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2594 { 2595 uint8_t round, shift = b & 0x1f; 2596 int32_t res; 2597 2598 round = get_round(vxrm, a, shift); 2599 res = (a >> shift) + round; 2600 return res; 2601 } 2602 static inline int64_t 2603 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2604 { 2605 uint8_t round, shift = b & 0x3f; 2606 int64_t res; 2607 2608 round = get_round(vxrm, a, shift); 2609 res = (a >> shift) + round; 2610 return res; 2611 } 2612 2613 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2614 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2615 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2616 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2617 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2618 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2619 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2620 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2621 2622 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2623 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2624 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2625 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2626 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2627 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2628 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2629 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2630 2631 /* Vector Narrowing Fixed-Point Clip Instructions */ 2632 static inline int8_t 2633 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2634 { 2635 uint8_t round, shift = b & 0xf; 2636 int16_t res; 2637 2638 round = get_round(vxrm, a, shift); 2639 res = (a >> shift) + round; 2640 if (res > INT8_MAX) { 2641 env->vxsat = 0x1; 2642 return INT8_MAX; 2643 } else if (res < INT8_MIN) { 2644 env->vxsat = 0x1; 2645 return INT8_MIN; 2646 } else { 2647 return res; 2648 } 2649 } 2650 2651 static inline int16_t 2652 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2653 { 2654 uint8_t round, shift = b & 0x1f; 2655 int32_t res; 2656 2657 round = get_round(vxrm, a, shift); 2658 res = (a >> shift) + round; 2659 if (res > INT16_MAX) { 2660 env->vxsat = 0x1; 2661 return INT16_MAX; 2662 } else if (res < INT16_MIN) { 2663 env->vxsat = 0x1; 2664 return INT16_MIN; 2665 } else { 2666 return res; 2667 } 2668 } 2669 2670 static inline int32_t 2671 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2672 { 2673 uint8_t round, shift = b & 0x3f; 2674 int64_t res; 2675 2676 round = get_round(vxrm, a, shift); 2677 res = (a >> shift) + round; 2678 if (res > INT32_MAX) { 2679 env->vxsat = 0x1; 2680 return INT32_MAX; 2681 } else if (res < INT32_MIN) { 2682 env->vxsat = 0x1; 2683 return INT32_MIN; 2684 } else { 2685 return res; 2686 } 2687 } 2688 2689 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2690 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2691 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2692 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2693 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2694 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2695 2696 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2697 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2698 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2699 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2700 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2701 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2702 2703 static inline uint8_t 2704 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2705 { 2706 uint8_t round, shift = b & 0xf; 2707 uint16_t res; 2708 2709 round = get_round(vxrm, a, shift); 2710 res = (a >> shift) + round; 2711 if (res > UINT8_MAX) { 2712 env->vxsat = 0x1; 2713 return UINT8_MAX; 2714 } else { 2715 return res; 2716 } 2717 } 2718 2719 static inline uint16_t 2720 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2721 { 2722 uint8_t round, shift = b & 0x1f; 2723 uint32_t res; 2724 2725 round = get_round(vxrm, a, shift); 2726 res = (a >> shift) + round; 2727 if (res > UINT16_MAX) { 2728 env->vxsat = 0x1; 2729 return UINT16_MAX; 2730 } else { 2731 return res; 2732 } 2733 } 2734 2735 static inline uint32_t 2736 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2737 { 2738 uint8_t round, shift = b & 0x3f; 2739 uint64_t res; 2740 2741 round = get_round(vxrm, a, shift); 2742 res = (a >> shift) + round; 2743 if (res > UINT32_MAX) { 2744 env->vxsat = 0x1; 2745 return UINT32_MAX; 2746 } else { 2747 return res; 2748 } 2749 } 2750 2751 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2752 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2753 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2754 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2755 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2756 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2757 2758 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2759 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2760 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2761 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2762 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2763 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2764 2765 /* 2766 *** Vector Float Point Arithmetic Instructions 2767 */ 2768 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2769 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2770 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2771 CPURISCVState *env) \ 2772 { \ 2773 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2774 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2775 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2776 } 2777 2778 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2779 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2780 void *vs2, CPURISCVState *env, \ 2781 uint32_t desc) \ 2782 { \ 2783 uint32_t vm = vext_vm(desc); \ 2784 uint32_t vl = env->vl; \ 2785 uint32_t i; \ 2786 \ 2787 for (i = env->vstart; i < vl; i++) { \ 2788 if (!vm && !vext_elem_mask(v0, i)) { \ 2789 continue; \ 2790 } \ 2791 do_##NAME(vd, vs1, vs2, i, env); \ 2792 } \ 2793 env->vstart = 0; \ 2794 } 2795 2796 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2797 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2798 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2799 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2800 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2801 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2802 2803 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2804 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2805 CPURISCVState *env) \ 2806 { \ 2807 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2808 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2809 } 2810 2811 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2812 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2813 void *vs2, CPURISCVState *env, \ 2814 uint32_t desc) \ 2815 { \ 2816 uint32_t vm = vext_vm(desc); \ 2817 uint32_t vl = env->vl; \ 2818 uint32_t i; \ 2819 \ 2820 for (i = env->vstart; i < vl; i++) { \ 2821 if (!vm && !vext_elem_mask(v0, i)) { \ 2822 continue; \ 2823 } \ 2824 do_##NAME(vd, s1, vs2, i, env); \ 2825 } \ 2826 env->vstart = 0; \ 2827 } 2828 2829 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2830 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2831 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2832 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2833 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2834 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2835 2836 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2837 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2838 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2839 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2840 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2841 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2842 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2843 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2844 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2845 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2846 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2847 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2848 2849 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2850 { 2851 return float16_sub(b, a, s); 2852 } 2853 2854 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2855 { 2856 return float32_sub(b, a, s); 2857 } 2858 2859 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2860 { 2861 return float64_sub(b, a, s); 2862 } 2863 2864 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2865 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2866 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2867 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2868 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2869 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2870 2871 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2872 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2873 { 2874 return float32_add(float16_to_float32(a, true, s), 2875 float16_to_float32(b, true, s), s); 2876 } 2877 2878 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2879 { 2880 return float64_add(float32_to_float64(a, s), 2881 float32_to_float64(b, s), s); 2882 2883 } 2884 2885 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2886 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2887 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2888 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2889 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2890 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2891 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2892 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2893 2894 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2895 { 2896 return float32_sub(float16_to_float32(a, true, s), 2897 float16_to_float32(b, true, s), s); 2898 } 2899 2900 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2901 { 2902 return float64_sub(float32_to_float64(a, s), 2903 float32_to_float64(b, s), s); 2904 2905 } 2906 2907 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2908 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2909 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2910 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2911 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2912 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2913 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2914 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2915 2916 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2917 { 2918 return float32_add(a, float16_to_float32(b, true, s), s); 2919 } 2920 2921 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2922 { 2923 return float64_add(a, float32_to_float64(b, s), s); 2924 } 2925 2926 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2927 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2928 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2929 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2930 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2931 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2932 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2933 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2934 2935 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2936 { 2937 return float32_sub(a, float16_to_float32(b, true, s), s); 2938 } 2939 2940 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2941 { 2942 return float64_sub(a, float32_to_float64(b, s), s); 2943 } 2944 2945 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2946 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2947 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2948 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2949 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2950 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2951 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2952 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2953 2954 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2955 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2956 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2957 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2958 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2959 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2960 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2961 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2962 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2963 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2964 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2965 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2966 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2967 2968 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2969 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2970 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2971 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 2972 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 2973 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 2974 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 2975 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 2976 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 2977 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 2978 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 2979 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 2980 2981 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 2982 { 2983 return float16_div(b, a, s); 2984 } 2985 2986 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 2987 { 2988 return float32_div(b, a, s); 2989 } 2990 2991 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 2992 { 2993 return float64_div(b, a, s); 2994 } 2995 2996 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 2997 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 2998 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 2999 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3000 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3001 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3002 3003 /* Vector Widening Floating-Point Multiply */ 3004 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3005 { 3006 return float32_mul(float16_to_float32(a, true, s), 3007 float16_to_float32(b, true, s), s); 3008 } 3009 3010 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3011 { 3012 return float64_mul(float32_to_float64(a, s), 3013 float32_to_float64(b, s), s); 3014 3015 } 3016 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3017 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3018 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3019 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3020 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3021 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3022 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3023 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3024 3025 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3026 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3027 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3028 CPURISCVState *env) \ 3029 { \ 3030 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3031 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3032 TD d = *((TD *)vd + HD(i)); \ 3033 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3034 } 3035 3036 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3037 { 3038 return float16_muladd(a, b, d, 0, s); 3039 } 3040 3041 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3042 { 3043 return float32_muladd(a, b, d, 0, s); 3044 } 3045 3046 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3047 { 3048 return float64_muladd(a, b, d, 0, s); 3049 } 3050 3051 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3052 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3053 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3054 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3055 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3056 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3057 3058 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3059 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3060 CPURISCVState *env) \ 3061 { \ 3062 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3063 TD d = *((TD *)vd + HD(i)); \ 3064 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3065 } 3066 3067 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3068 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3069 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3070 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3071 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3072 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3073 3074 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3075 { 3076 return float16_muladd(a, b, d, 3077 float_muladd_negate_c | float_muladd_negate_product, s); 3078 } 3079 3080 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3081 { 3082 return float32_muladd(a, b, d, 3083 float_muladd_negate_c | float_muladd_negate_product, s); 3084 } 3085 3086 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3087 { 3088 return float64_muladd(a, b, d, 3089 float_muladd_negate_c | float_muladd_negate_product, s); 3090 } 3091 3092 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3093 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3094 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3095 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3096 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3097 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3098 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3099 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3100 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3101 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3102 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3103 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3104 3105 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3106 { 3107 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3108 } 3109 3110 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3111 { 3112 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3113 } 3114 3115 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3116 { 3117 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3118 } 3119 3120 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3121 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3122 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3123 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3124 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3125 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3126 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3127 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3128 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3129 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3130 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3131 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3132 3133 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3134 { 3135 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3136 } 3137 3138 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3139 { 3140 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3141 } 3142 3143 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3144 { 3145 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3146 } 3147 3148 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3149 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3150 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3151 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3152 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3153 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3154 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3155 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3156 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3157 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3158 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3159 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3160 3161 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3162 { 3163 return float16_muladd(d, b, a, 0, s); 3164 } 3165 3166 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3167 { 3168 return float32_muladd(d, b, a, 0, s); 3169 } 3170 3171 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3172 { 3173 return float64_muladd(d, b, a, 0, s); 3174 } 3175 3176 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3177 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3178 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3179 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3180 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3181 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3182 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3183 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3184 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3185 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3186 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3187 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3188 3189 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3190 { 3191 return float16_muladd(d, b, a, 3192 float_muladd_negate_c | float_muladd_negate_product, s); 3193 } 3194 3195 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3196 { 3197 return float32_muladd(d, b, a, 3198 float_muladd_negate_c | float_muladd_negate_product, s); 3199 } 3200 3201 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3202 { 3203 return float64_muladd(d, b, a, 3204 float_muladd_negate_c | float_muladd_negate_product, s); 3205 } 3206 3207 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3208 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3209 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3210 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3211 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3212 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3213 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3214 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3215 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3216 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3217 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3218 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3219 3220 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3221 { 3222 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3223 } 3224 3225 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3226 { 3227 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3228 } 3229 3230 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3231 { 3232 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3233 } 3234 3235 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3236 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3237 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3238 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3239 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3240 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3241 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3242 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3243 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3244 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3245 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3246 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3247 3248 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3249 { 3250 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3251 } 3252 3253 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3254 { 3255 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3256 } 3257 3258 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3259 { 3260 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3261 } 3262 3263 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3264 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3265 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3266 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3267 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3268 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3269 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3270 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3271 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3272 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3273 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3274 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3275 3276 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3277 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3278 { 3279 return float32_muladd(float16_to_float32(a, true, s), 3280 float16_to_float32(b, true, s), d, 0, s); 3281 } 3282 3283 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3284 { 3285 return float64_muladd(float32_to_float64(a, s), 3286 float32_to_float64(b, s), d, 0, s); 3287 } 3288 3289 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3290 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3291 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3292 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3293 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3294 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3295 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3296 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3297 3298 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3299 { 3300 return float32_muladd(float16_to_float32(a, true, s), 3301 float16_to_float32(b, true, s), d, 3302 float_muladd_negate_c | float_muladd_negate_product, s); 3303 } 3304 3305 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(float32_to_float64(a, s), 3308 float32_to_float64(b, s), d, 3309 float_muladd_negate_c | float_muladd_negate_product, s); 3310 } 3311 3312 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3313 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3314 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3315 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3316 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3317 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3318 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3319 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3320 3321 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3322 { 3323 return float32_muladd(float16_to_float32(a, true, s), 3324 float16_to_float32(b, true, s), d, 3325 float_muladd_negate_c, s); 3326 } 3327 3328 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3329 { 3330 return float64_muladd(float32_to_float64(a, s), 3331 float32_to_float64(b, s), d, 3332 float_muladd_negate_c, s); 3333 } 3334 3335 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3336 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3337 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3338 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3339 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3340 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3341 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3342 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3343 3344 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3345 { 3346 return float32_muladd(float16_to_float32(a, true, s), 3347 float16_to_float32(b, true, s), d, 3348 float_muladd_negate_product, s); 3349 } 3350 3351 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3352 { 3353 return float64_muladd(float32_to_float64(a, s), 3354 float32_to_float64(b, s), d, 3355 float_muladd_negate_product, s); 3356 } 3357 3358 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3359 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3360 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3361 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3362 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3363 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3364 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3365 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3366 3367 /* Vector Floating-Point Square-Root Instruction */ 3368 /* (TD, T2, TX2) */ 3369 #define OP_UU_H uint16_t, uint16_t, uint16_t 3370 #define OP_UU_W uint32_t, uint32_t, uint32_t 3371 #define OP_UU_D uint64_t, uint64_t, uint64_t 3372 3373 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3374 static void do_##NAME(void *vd, void *vs2, int i, \ 3375 CPURISCVState *env) \ 3376 { \ 3377 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3378 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3379 } 3380 3381 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3382 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3383 CPURISCVState *env, uint32_t desc) \ 3384 { \ 3385 uint32_t vm = vext_vm(desc); \ 3386 uint32_t vl = env->vl; \ 3387 uint32_t i; \ 3388 \ 3389 if (vl == 0) { \ 3390 return; \ 3391 } \ 3392 for (i = env->vstart; i < vl; i++) { \ 3393 if (!vm && !vext_elem_mask(v0, i)) { \ 3394 continue; \ 3395 } \ 3396 do_##NAME(vd, vs2, i, env); \ 3397 } \ 3398 env->vstart = 0; \ 3399 } 3400 3401 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3402 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3403 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3404 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3405 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3406 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3407 3408 /* 3409 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3410 * 3411 * Adapted from riscv-v-spec recip.c: 3412 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3413 */ 3414 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3415 { 3416 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3417 uint64_t exp = extract64(f, frac_size, exp_size); 3418 uint64_t frac = extract64(f, 0, frac_size); 3419 3420 const uint8_t lookup_table[] = { 3421 52, 51, 50, 48, 47, 46, 44, 43, 3422 42, 41, 40, 39, 38, 36, 35, 34, 3423 33, 32, 31, 30, 30, 29, 28, 27, 3424 26, 25, 24, 23, 23, 22, 21, 20, 3425 19, 19, 18, 17, 16, 16, 15, 14, 3426 14, 13, 12, 12, 11, 10, 10, 9, 3427 9, 8, 7, 7, 6, 6, 5, 4, 3428 4, 3, 3, 2, 2, 1, 1, 0, 3429 127, 125, 123, 121, 119, 118, 116, 114, 3430 113, 111, 109, 108, 106, 105, 103, 102, 3431 100, 99, 97, 96, 95, 93, 92, 91, 3432 90, 88, 87, 86, 85, 84, 83, 82, 3433 80, 79, 78, 77, 76, 75, 74, 73, 3434 72, 71, 70, 70, 69, 68, 67, 66, 3435 65, 64, 63, 63, 62, 61, 60, 59, 3436 59, 58, 57, 56, 56, 55, 54, 53 3437 }; 3438 const int precision = 7; 3439 3440 if (exp == 0 && frac != 0) { /* subnormal */ 3441 /* Normalize the subnormal. */ 3442 while (extract64(frac, frac_size - 1, 1) == 0) { 3443 exp--; 3444 frac <<= 1; 3445 } 3446 3447 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3448 } 3449 3450 int idx = ((exp & 1) << (precision - 1)) | 3451 (frac >> (frac_size - precision + 1)); 3452 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3453 (frac_size - precision); 3454 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3455 3456 uint64_t val = 0; 3457 val = deposit64(val, 0, frac_size, out_frac); 3458 val = deposit64(val, frac_size, exp_size, out_exp); 3459 val = deposit64(val, frac_size + exp_size, 1, sign); 3460 return val; 3461 } 3462 3463 static float16 frsqrt7_h(float16 f, float_status *s) 3464 { 3465 int exp_size = 5, frac_size = 10; 3466 bool sign = float16_is_neg(f); 3467 3468 /* 3469 * frsqrt7(sNaN) = canonical NaN 3470 * frsqrt7(-inf) = canonical NaN 3471 * frsqrt7(-normal) = canonical NaN 3472 * frsqrt7(-subnormal) = canonical NaN 3473 */ 3474 if (float16_is_signaling_nan(f, s) || 3475 (float16_is_infinity(f) && sign) || 3476 (float16_is_normal(f) && sign) || 3477 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3478 s->float_exception_flags |= float_flag_invalid; 3479 return float16_default_nan(s); 3480 } 3481 3482 /* frsqrt7(qNaN) = canonical NaN */ 3483 if (float16_is_quiet_nan(f, s)) { 3484 return float16_default_nan(s); 3485 } 3486 3487 /* frsqrt7(+-0) = +-inf */ 3488 if (float16_is_zero(f)) { 3489 s->float_exception_flags |= float_flag_divbyzero; 3490 return float16_set_sign(float16_infinity, sign); 3491 } 3492 3493 /* frsqrt7(+inf) = +0 */ 3494 if (float16_is_infinity(f) && !sign) { 3495 return float16_set_sign(float16_zero, sign); 3496 } 3497 3498 /* +normal, +subnormal */ 3499 uint64_t val = frsqrt7(f, exp_size, frac_size); 3500 return make_float16(val); 3501 } 3502 3503 static float32 frsqrt7_s(float32 f, float_status *s) 3504 { 3505 int exp_size = 8, frac_size = 23; 3506 bool sign = float32_is_neg(f); 3507 3508 /* 3509 * frsqrt7(sNaN) = canonical NaN 3510 * frsqrt7(-inf) = canonical NaN 3511 * frsqrt7(-normal) = canonical NaN 3512 * frsqrt7(-subnormal) = canonical NaN 3513 */ 3514 if (float32_is_signaling_nan(f, s) || 3515 (float32_is_infinity(f) && sign) || 3516 (float32_is_normal(f) && sign) || 3517 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3518 s->float_exception_flags |= float_flag_invalid; 3519 return float32_default_nan(s); 3520 } 3521 3522 /* frsqrt7(qNaN) = canonical NaN */ 3523 if (float32_is_quiet_nan(f, s)) { 3524 return float32_default_nan(s); 3525 } 3526 3527 /* frsqrt7(+-0) = +-inf */ 3528 if (float32_is_zero(f)) { 3529 s->float_exception_flags |= float_flag_divbyzero; 3530 return float32_set_sign(float32_infinity, sign); 3531 } 3532 3533 /* frsqrt7(+inf) = +0 */ 3534 if (float32_is_infinity(f) && !sign) { 3535 return float32_set_sign(float32_zero, sign); 3536 } 3537 3538 /* +normal, +subnormal */ 3539 uint64_t val = frsqrt7(f, exp_size, frac_size); 3540 return make_float32(val); 3541 } 3542 3543 static float64 frsqrt7_d(float64 f, float_status *s) 3544 { 3545 int exp_size = 11, frac_size = 52; 3546 bool sign = float64_is_neg(f); 3547 3548 /* 3549 * frsqrt7(sNaN) = canonical NaN 3550 * frsqrt7(-inf) = canonical NaN 3551 * frsqrt7(-normal) = canonical NaN 3552 * frsqrt7(-subnormal) = canonical NaN 3553 */ 3554 if (float64_is_signaling_nan(f, s) || 3555 (float64_is_infinity(f) && sign) || 3556 (float64_is_normal(f) && sign) || 3557 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3558 s->float_exception_flags |= float_flag_invalid; 3559 return float64_default_nan(s); 3560 } 3561 3562 /* frsqrt7(qNaN) = canonical NaN */ 3563 if (float64_is_quiet_nan(f, s)) { 3564 return float64_default_nan(s); 3565 } 3566 3567 /* frsqrt7(+-0) = +-inf */ 3568 if (float64_is_zero(f)) { 3569 s->float_exception_flags |= float_flag_divbyzero; 3570 return float64_set_sign(float64_infinity, sign); 3571 } 3572 3573 /* frsqrt7(+inf) = +0 */ 3574 if (float64_is_infinity(f) && !sign) { 3575 return float64_set_sign(float64_zero, sign); 3576 } 3577 3578 /* +normal, +subnormal */ 3579 uint64_t val = frsqrt7(f, exp_size, frac_size); 3580 return make_float64(val); 3581 } 3582 3583 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3584 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3585 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3586 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) 3587 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) 3588 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) 3589 3590 /* Vector Floating-Point MIN/MAX Instructions */ 3591 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3592 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3593 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3594 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3595 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3596 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3597 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3598 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3599 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3600 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3601 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3602 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3603 3604 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3605 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3606 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3607 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3608 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3609 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3610 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3611 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3612 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3613 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3614 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3615 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3616 3617 /* Vector Floating-Point Sign-Injection Instructions */ 3618 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3619 { 3620 return deposit64(b, 0, 15, a); 3621 } 3622 3623 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3624 { 3625 return deposit64(b, 0, 31, a); 3626 } 3627 3628 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3629 { 3630 return deposit64(b, 0, 63, a); 3631 } 3632 3633 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3634 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3635 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3636 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3637 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3638 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3639 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3640 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3641 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3642 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3643 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3644 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3645 3646 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3647 { 3648 return deposit64(~b, 0, 15, a); 3649 } 3650 3651 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3652 { 3653 return deposit64(~b, 0, 31, a); 3654 } 3655 3656 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3657 { 3658 return deposit64(~b, 0, 63, a); 3659 } 3660 3661 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3662 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3663 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3664 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3665 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3666 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3667 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3668 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3669 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3670 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3671 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3672 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3673 3674 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3675 { 3676 return deposit64(b ^ a, 0, 15, a); 3677 } 3678 3679 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3680 { 3681 return deposit64(b ^ a, 0, 31, a); 3682 } 3683 3684 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3685 { 3686 return deposit64(b ^ a, 0, 63, a); 3687 } 3688 3689 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3690 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3691 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3692 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3693 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3694 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3695 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3696 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3697 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3698 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3699 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3700 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3701 3702 /* Vector Floating-Point Compare Instructions */ 3703 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3704 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3705 CPURISCVState *env, uint32_t desc) \ 3706 { \ 3707 uint32_t vm = vext_vm(desc); \ 3708 uint32_t vl = env->vl; \ 3709 uint32_t i; \ 3710 \ 3711 for (i = env->vstart; i < vl; i++) { \ 3712 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3713 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3714 if (!vm && !vext_elem_mask(v0, i)) { \ 3715 continue; \ 3716 } \ 3717 vext_set_elem_mask(vd, i, \ 3718 DO_OP(s2, s1, &env->fp_status)); \ 3719 } \ 3720 env->vstart = 0; \ 3721 } 3722 3723 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3724 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3725 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3726 3727 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3728 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3729 CPURISCVState *env, uint32_t desc) \ 3730 { \ 3731 uint32_t vm = vext_vm(desc); \ 3732 uint32_t vl = env->vl; \ 3733 uint32_t i; \ 3734 \ 3735 for (i = env->vstart; i < vl; i++) { \ 3736 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3737 if (!vm && !vext_elem_mask(v0, i)) { \ 3738 continue; \ 3739 } \ 3740 vext_set_elem_mask(vd, i, \ 3741 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3742 } \ 3743 env->vstart = 0; \ 3744 } 3745 3746 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3747 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3748 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3749 3750 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3751 { 3752 FloatRelation compare = float16_compare_quiet(a, b, s); 3753 return compare != float_relation_equal; 3754 } 3755 3756 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3757 { 3758 FloatRelation compare = float32_compare_quiet(a, b, s); 3759 return compare != float_relation_equal; 3760 } 3761 3762 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3763 { 3764 FloatRelation compare = float64_compare_quiet(a, b, s); 3765 return compare != float_relation_equal; 3766 } 3767 3768 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3769 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3770 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3771 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3772 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3773 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3774 3775 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3776 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3777 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3778 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3779 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3780 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3781 3782 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3783 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3784 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3785 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3786 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3787 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3788 3789 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3790 { 3791 FloatRelation compare = float16_compare(a, b, s); 3792 return compare == float_relation_greater; 3793 } 3794 3795 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3796 { 3797 FloatRelation compare = float32_compare(a, b, s); 3798 return compare == float_relation_greater; 3799 } 3800 3801 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3802 { 3803 FloatRelation compare = float64_compare(a, b, s); 3804 return compare == float_relation_greater; 3805 } 3806 3807 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3808 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3809 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3810 3811 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3812 { 3813 FloatRelation compare = float16_compare(a, b, s); 3814 return compare == float_relation_greater || 3815 compare == float_relation_equal; 3816 } 3817 3818 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3819 { 3820 FloatRelation compare = float32_compare(a, b, s); 3821 return compare == float_relation_greater || 3822 compare == float_relation_equal; 3823 } 3824 3825 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3826 { 3827 FloatRelation compare = float64_compare(a, b, s); 3828 return compare == float_relation_greater || 3829 compare == float_relation_equal; 3830 } 3831 3832 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3833 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3834 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3835 3836 /* Vector Floating-Point Classify Instruction */ 3837 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3838 static void do_##NAME(void *vd, void *vs2, int i) \ 3839 { \ 3840 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3841 *((TD *)vd + HD(i)) = OP(s2); \ 3842 } 3843 3844 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3845 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3846 CPURISCVState *env, uint32_t desc) \ 3847 { \ 3848 uint32_t vm = vext_vm(desc); \ 3849 uint32_t vl = env->vl; \ 3850 uint32_t i; \ 3851 \ 3852 for (i = env->vstart; i < vl; i++) { \ 3853 if (!vm && !vext_elem_mask(v0, i)) { \ 3854 continue; \ 3855 } \ 3856 do_##NAME(vd, vs2, i); \ 3857 } \ 3858 env->vstart = 0; \ 3859 } 3860 3861 target_ulong fclass_h(uint64_t frs1) 3862 { 3863 float16 f = frs1; 3864 bool sign = float16_is_neg(f); 3865 3866 if (float16_is_infinity(f)) { 3867 return sign ? 1 << 0 : 1 << 7; 3868 } else if (float16_is_zero(f)) { 3869 return sign ? 1 << 3 : 1 << 4; 3870 } else if (float16_is_zero_or_denormal(f)) { 3871 return sign ? 1 << 2 : 1 << 5; 3872 } else if (float16_is_any_nan(f)) { 3873 float_status s = { }; /* for snan_bit_is_one */ 3874 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3875 } else { 3876 return sign ? 1 << 1 : 1 << 6; 3877 } 3878 } 3879 3880 target_ulong fclass_s(uint64_t frs1) 3881 { 3882 float32 f = frs1; 3883 bool sign = float32_is_neg(f); 3884 3885 if (float32_is_infinity(f)) { 3886 return sign ? 1 << 0 : 1 << 7; 3887 } else if (float32_is_zero(f)) { 3888 return sign ? 1 << 3 : 1 << 4; 3889 } else if (float32_is_zero_or_denormal(f)) { 3890 return sign ? 1 << 2 : 1 << 5; 3891 } else if (float32_is_any_nan(f)) { 3892 float_status s = { }; /* for snan_bit_is_one */ 3893 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3894 } else { 3895 return sign ? 1 << 1 : 1 << 6; 3896 } 3897 } 3898 3899 target_ulong fclass_d(uint64_t frs1) 3900 { 3901 float64 f = frs1; 3902 bool sign = float64_is_neg(f); 3903 3904 if (float64_is_infinity(f)) { 3905 return sign ? 1 << 0 : 1 << 7; 3906 } else if (float64_is_zero(f)) { 3907 return sign ? 1 << 3 : 1 << 4; 3908 } else if (float64_is_zero_or_denormal(f)) { 3909 return sign ? 1 << 2 : 1 << 5; 3910 } else if (float64_is_any_nan(f)) { 3911 float_status s = { }; /* for snan_bit_is_one */ 3912 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3913 } else { 3914 return sign ? 1 << 1 : 1 << 6; 3915 } 3916 } 3917 3918 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3919 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3920 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3921 GEN_VEXT_V(vfclass_v_h, 2, 2) 3922 GEN_VEXT_V(vfclass_v_w, 4, 4) 3923 GEN_VEXT_V(vfclass_v_d, 8, 8) 3924 3925 /* Vector Floating-Point Merge Instruction */ 3926 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3927 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3928 CPURISCVState *env, uint32_t desc) \ 3929 { \ 3930 uint32_t vm = vext_vm(desc); \ 3931 uint32_t vl = env->vl; \ 3932 uint32_t i; \ 3933 \ 3934 for (i = env->vstart; i < vl; i++) { \ 3935 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3936 *((ETYPE *)vd + H(i)) \ 3937 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3938 } \ 3939 env->vstart = 0; \ 3940 } 3941 3942 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3943 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3944 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3945 3946 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3947 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3948 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3949 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3950 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3951 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3952 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3953 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3954 3955 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3956 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3957 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3958 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3959 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3960 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3961 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3962 3963 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3964 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3965 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3966 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3967 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3968 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3969 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3970 3971 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3972 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3973 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3974 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3975 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3976 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3977 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3978 3979 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3980 /* (TD, T2, TX2) */ 3981 #define WOP_UU_B uint16_t, uint8_t, uint8_t 3982 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3983 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3984 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3985 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3986 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3987 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3988 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3989 3990 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3991 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3992 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3993 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3994 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3995 3996 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3997 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 3998 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3999 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4000 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 4001 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4002 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4003 4004 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4005 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4006 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4007 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4008 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 4009 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4010 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4011 4012 /* 4013 * vfwcvt.f.f.v vd, vs2, vm 4014 * Convert single-width float to double-width float. 4015 */ 4016 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4017 { 4018 return float16_to_float32(a, true, s); 4019 } 4020 4021 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4022 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4023 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4024 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4025 4026 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4027 /* (TD, T2, TX2) */ 4028 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4029 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4030 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4031 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4032 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4033 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4034 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4035 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 4036 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 4037 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 4038 4039 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4040 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4041 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4042 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4043 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 4044 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 4045 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 4046 4047 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4048 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4049 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4050 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 4051 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 4052 4053 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4054 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4055 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4056 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 4057 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 4058 4059 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4060 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4061 { 4062 return float32_to_float16(a, true, s); 4063 } 4064 4065 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4066 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4067 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 4068 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 4069 4070 /* 4071 *** Vector Reduction Operations 4072 */ 4073 /* Vector Single-Width Integer Reduction Instructions */ 4074 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4075 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4076 void *vs2, CPURISCVState *env, uint32_t desc) \ 4077 { \ 4078 uint32_t vm = vext_vm(desc); \ 4079 uint32_t vl = env->vl; \ 4080 uint32_t i; \ 4081 TD s1 = *((TD *)vs1 + HD(0)); \ 4082 \ 4083 for (i = env->vstart; i < vl; i++) { \ 4084 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4085 if (!vm && !vext_elem_mask(v0, i)) { \ 4086 continue; \ 4087 } \ 4088 s1 = OP(s1, (TD)s2); \ 4089 } \ 4090 *((TD *)vd + HD(0)) = s1; \ 4091 env->vstart = 0; \ 4092 } 4093 4094 /* vd[0] = sum(vs1[0], vs2[*]) */ 4095 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4096 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4097 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4098 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4099 4100 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4101 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4102 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4103 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4104 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4105 4106 /* vd[0] = max(vs1[0], vs2[*]) */ 4107 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4108 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4109 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4110 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4111 4112 /* vd[0] = minu(vs1[0], vs2[*]) */ 4113 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4114 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4115 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4116 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4117 4118 /* vd[0] = min(vs1[0], vs2[*]) */ 4119 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4120 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4121 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4122 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4123 4124 /* vd[0] = and(vs1[0], vs2[*]) */ 4125 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4126 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4127 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4128 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4129 4130 /* vd[0] = or(vs1[0], vs2[*]) */ 4131 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4132 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4133 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4134 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4135 4136 /* vd[0] = xor(vs1[0], vs2[*]) */ 4137 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4138 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4139 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4140 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4141 4142 /* Vector Widening Integer Reduction Instructions */ 4143 /* signed sum reduction into double-width accumulator */ 4144 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4145 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4146 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4147 4148 /* Unsigned sum reduction into double-width accumulator */ 4149 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4150 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4151 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4152 4153 /* Vector Single-Width Floating-Point Reduction Instructions */ 4154 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4155 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4156 void *vs2, CPURISCVState *env, \ 4157 uint32_t desc) \ 4158 { \ 4159 uint32_t vm = vext_vm(desc); \ 4160 uint32_t vl = env->vl; \ 4161 uint32_t i; \ 4162 TD s1 = *((TD *)vs1 + HD(0)); \ 4163 \ 4164 for (i = env->vstart; i < vl; i++) { \ 4165 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4166 if (!vm && !vext_elem_mask(v0, i)) { \ 4167 continue; \ 4168 } \ 4169 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4170 } \ 4171 *((TD *)vd + HD(0)) = s1; \ 4172 env->vstart = 0; \ 4173 } 4174 4175 /* Unordered sum */ 4176 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4177 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4178 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4179 4180 /* Maximum value */ 4181 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4182 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4183 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4184 4185 /* Minimum value */ 4186 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4187 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4188 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4189 4190 /* Vector Widening Floating-Point Reduction Instructions */ 4191 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4192 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4193 void *vs2, CPURISCVState *env, uint32_t desc) 4194 { 4195 uint32_t vm = vext_vm(desc); 4196 uint32_t vl = env->vl; 4197 uint32_t i; 4198 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4199 4200 for (i = env->vstart; i < vl; i++) { 4201 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4202 if (!vm && !vext_elem_mask(v0, i)) { 4203 continue; 4204 } 4205 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4206 &env->fp_status); 4207 } 4208 *((uint32_t *)vd + H4(0)) = s1; 4209 env->vstart = 0; 4210 } 4211 4212 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4213 void *vs2, CPURISCVState *env, uint32_t desc) 4214 { 4215 uint32_t vm = vext_vm(desc); 4216 uint32_t vl = env->vl; 4217 uint32_t i; 4218 uint64_t s1 = *((uint64_t *)vs1); 4219 4220 for (i = env->vstart; i < vl; i++) { 4221 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4222 if (!vm && !vext_elem_mask(v0, i)) { 4223 continue; 4224 } 4225 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4226 &env->fp_status); 4227 } 4228 *((uint64_t *)vd) = s1; 4229 env->vstart = 0; 4230 } 4231 4232 /* 4233 *** Vector Mask Operations 4234 */ 4235 /* Vector Mask-Register Logical Instructions */ 4236 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4237 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4238 void *vs2, CPURISCVState *env, \ 4239 uint32_t desc) \ 4240 { \ 4241 uint32_t vl = env->vl; \ 4242 uint32_t i; \ 4243 int a, b; \ 4244 \ 4245 for (i = env->vstart; i < vl; i++) { \ 4246 a = vext_elem_mask(vs1, i); \ 4247 b = vext_elem_mask(vs2, i); \ 4248 vext_set_elem_mask(vd, i, OP(b, a)); \ 4249 } \ 4250 env->vstart = 0; \ 4251 } 4252 4253 #define DO_NAND(N, M) (!(N & M)) 4254 #define DO_ANDNOT(N, M) (N & !M) 4255 #define DO_NOR(N, M) (!(N | M)) 4256 #define DO_ORNOT(N, M) (N | !M) 4257 #define DO_XNOR(N, M) (!(N ^ M)) 4258 4259 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4260 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4261 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4262 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4263 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4264 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4265 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4266 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4267 4268 /* Vector count population in mask vcpop */ 4269 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4270 uint32_t desc) 4271 { 4272 target_ulong cnt = 0; 4273 uint32_t vm = vext_vm(desc); 4274 uint32_t vl = env->vl; 4275 int i; 4276 4277 for (i = env->vstart; i < vl; i++) { 4278 if (vm || vext_elem_mask(v0, i)) { 4279 if (vext_elem_mask(vs2, i)) { 4280 cnt++; 4281 } 4282 } 4283 } 4284 env->vstart = 0; 4285 return cnt; 4286 } 4287 4288 /* vfirst find-first-set mask bit*/ 4289 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4290 uint32_t desc) 4291 { 4292 uint32_t vm = vext_vm(desc); 4293 uint32_t vl = env->vl; 4294 int i; 4295 4296 for (i = env->vstart; i < vl; i++) { 4297 if (vm || vext_elem_mask(v0, i)) { 4298 if (vext_elem_mask(vs2, i)) { 4299 return i; 4300 } 4301 } 4302 } 4303 env->vstart = 0; 4304 return -1LL; 4305 } 4306 4307 enum set_mask_type { 4308 ONLY_FIRST = 1, 4309 INCLUDE_FIRST, 4310 BEFORE_FIRST, 4311 }; 4312 4313 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4314 uint32_t desc, enum set_mask_type type) 4315 { 4316 uint32_t vm = vext_vm(desc); 4317 uint32_t vl = env->vl; 4318 int i; 4319 bool first_mask_bit = false; 4320 4321 for (i = env->vstart; i < vl; i++) { 4322 if (!vm && !vext_elem_mask(v0, i)) { 4323 continue; 4324 } 4325 /* write a zero to all following active elements */ 4326 if (first_mask_bit) { 4327 vext_set_elem_mask(vd, i, 0); 4328 continue; 4329 } 4330 if (vext_elem_mask(vs2, i)) { 4331 first_mask_bit = true; 4332 if (type == BEFORE_FIRST) { 4333 vext_set_elem_mask(vd, i, 0); 4334 } else { 4335 vext_set_elem_mask(vd, i, 1); 4336 } 4337 } else { 4338 if (type == ONLY_FIRST) { 4339 vext_set_elem_mask(vd, i, 0); 4340 } else { 4341 vext_set_elem_mask(vd, i, 1); 4342 } 4343 } 4344 } 4345 env->vstart = 0; 4346 } 4347 4348 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4349 uint32_t desc) 4350 { 4351 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4352 } 4353 4354 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4355 uint32_t desc) 4356 { 4357 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4358 } 4359 4360 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4361 uint32_t desc) 4362 { 4363 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4364 } 4365 4366 /* Vector Iota Instruction */ 4367 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4368 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4369 uint32_t desc) \ 4370 { \ 4371 uint32_t vm = vext_vm(desc); \ 4372 uint32_t vl = env->vl; \ 4373 uint32_t sum = 0; \ 4374 int i; \ 4375 \ 4376 for (i = env->vstart; i < vl; i++) { \ 4377 if (!vm && !vext_elem_mask(v0, i)) { \ 4378 continue; \ 4379 } \ 4380 *((ETYPE *)vd + H(i)) = sum; \ 4381 if (vext_elem_mask(vs2, i)) { \ 4382 sum++; \ 4383 } \ 4384 } \ 4385 env->vstart = 0; \ 4386 } 4387 4388 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4389 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4390 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4391 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4392 4393 /* Vector Element Index Instruction */ 4394 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4395 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4396 { \ 4397 uint32_t vm = vext_vm(desc); \ 4398 uint32_t vl = env->vl; \ 4399 int i; \ 4400 \ 4401 for (i = env->vstart; i < vl; i++) { \ 4402 if (!vm && !vext_elem_mask(v0, i)) { \ 4403 continue; \ 4404 } \ 4405 *((ETYPE *)vd + H(i)) = i; \ 4406 } \ 4407 env->vstart = 0; \ 4408 } 4409 4410 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4411 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4412 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4413 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4414 4415 /* 4416 *** Vector Permutation Instructions 4417 */ 4418 4419 /* Vector Slide Instructions */ 4420 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4421 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4422 CPURISCVState *env, uint32_t desc) \ 4423 { \ 4424 uint32_t vm = vext_vm(desc); \ 4425 uint32_t vl = env->vl; \ 4426 target_ulong offset = s1, i_min, i; \ 4427 \ 4428 i_min = MAX(env->vstart, offset); \ 4429 for (i = i_min; i < vl; i++) { \ 4430 if (!vm && !vext_elem_mask(v0, i)) { \ 4431 continue; \ 4432 } \ 4433 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4434 } \ 4435 } 4436 4437 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4438 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4439 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4440 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4441 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4442 4443 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4444 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4445 CPURISCVState *env, uint32_t desc) \ 4446 { \ 4447 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4448 uint32_t vm = vext_vm(desc); \ 4449 uint32_t vl = env->vl; \ 4450 target_ulong i_max, i; \ 4451 \ 4452 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4453 for (i = env->vstart; i < i_max; ++i) { \ 4454 if (vm || vext_elem_mask(v0, i)) { \ 4455 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4456 } \ 4457 } \ 4458 \ 4459 for (i = i_max; i < vl; ++i) { \ 4460 if (vm || vext_elem_mask(v0, i)) { \ 4461 *((ETYPE *)vd + H(i)) = 0; \ 4462 } \ 4463 } \ 4464 \ 4465 env->vstart = 0; \ 4466 } 4467 4468 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4469 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4470 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4471 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4472 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4473 4474 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4475 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4476 CPURISCVState *env, uint32_t desc) \ 4477 { \ 4478 typedef uint##ESZ##_t ETYPE; \ 4479 uint32_t vm = vext_vm(desc); \ 4480 uint32_t vl = env->vl; \ 4481 uint32_t i; \ 4482 \ 4483 for (i = env->vstart; i < vl; i++) { \ 4484 if (!vm && !vext_elem_mask(v0, i)) { \ 4485 continue; \ 4486 } \ 4487 if (i == 0) { \ 4488 *((ETYPE *)vd + H(i)) = s1; \ 4489 } else { \ 4490 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4491 } \ 4492 } \ 4493 env->vstart = 0; \ 4494 } 4495 4496 GEN_VEXT_VSLIE1UP(8, H1) 4497 GEN_VEXT_VSLIE1UP(16, H2) 4498 GEN_VEXT_VSLIE1UP(32, H4) 4499 GEN_VEXT_VSLIE1UP(64, H8) 4500 4501 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4502 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4503 CPURISCVState *env, uint32_t desc) \ 4504 { \ 4505 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4506 } 4507 4508 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4509 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4510 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4511 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4512 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4513 4514 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4515 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4516 CPURISCVState *env, uint32_t desc) \ 4517 { \ 4518 typedef uint##ESZ##_t ETYPE; \ 4519 uint32_t vm = vext_vm(desc); \ 4520 uint32_t vl = env->vl; \ 4521 uint32_t i; \ 4522 \ 4523 for (i = env->vstart; i < vl; i++) { \ 4524 if (!vm && !vext_elem_mask(v0, i)) { \ 4525 continue; \ 4526 } \ 4527 if (i == vl - 1) { \ 4528 *((ETYPE *)vd + H(i)) = s1; \ 4529 } else { \ 4530 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4531 } \ 4532 } \ 4533 env->vstart = 0; \ 4534 } 4535 4536 GEN_VEXT_VSLIDE1DOWN(8, H1) 4537 GEN_VEXT_VSLIDE1DOWN(16, H2) 4538 GEN_VEXT_VSLIDE1DOWN(32, H4) 4539 GEN_VEXT_VSLIDE1DOWN(64, H8) 4540 4541 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4542 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4543 CPURISCVState *env, uint32_t desc) \ 4544 { \ 4545 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4546 } 4547 4548 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4549 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4550 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4551 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4552 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4553 4554 /* Vector Floating-Point Slide Instructions */ 4555 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4556 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4557 CPURISCVState *env, uint32_t desc) \ 4558 { \ 4559 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4560 } 4561 4562 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4563 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4564 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4565 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4566 4567 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4568 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4569 CPURISCVState *env, uint32_t desc) \ 4570 { \ 4571 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4572 } 4573 4574 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4575 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4576 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4577 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4578 4579 /* Vector Register Gather Instruction */ 4580 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4581 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4582 CPURISCVState *env, uint32_t desc) \ 4583 { \ 4584 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4585 uint32_t vm = vext_vm(desc); \ 4586 uint32_t vl = env->vl; \ 4587 uint64_t index; \ 4588 uint32_t i; \ 4589 \ 4590 for (i = env->vstart; i < vl; i++) { \ 4591 if (!vm && !vext_elem_mask(v0, i)) { \ 4592 continue; \ 4593 } \ 4594 index = *((TS1 *)vs1 + HS1(i)); \ 4595 if (index >= vlmax) { \ 4596 *((TS2 *)vd + HS2(i)) = 0; \ 4597 } else { \ 4598 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4599 } \ 4600 } \ 4601 env->vstart = 0; \ 4602 } 4603 4604 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4605 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4606 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4607 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4608 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4609 4610 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4611 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4612 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4613 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4614 4615 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4616 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4617 CPURISCVState *env, uint32_t desc) \ 4618 { \ 4619 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4620 uint32_t vm = vext_vm(desc); \ 4621 uint32_t vl = env->vl; \ 4622 uint64_t index = s1; \ 4623 uint32_t i; \ 4624 \ 4625 for (i = env->vstart; i < vl; i++) { \ 4626 if (!vm && !vext_elem_mask(v0, i)) { \ 4627 continue; \ 4628 } \ 4629 if (index >= vlmax) { \ 4630 *((ETYPE *)vd + H(i)) = 0; \ 4631 } else { \ 4632 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4633 } \ 4634 } \ 4635 env->vstart = 0; \ 4636 } 4637 4638 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4639 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4640 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4641 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4642 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4643 4644 /* Vector Compress Instruction */ 4645 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4646 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4647 CPURISCVState *env, uint32_t desc) \ 4648 { \ 4649 uint32_t vl = env->vl; \ 4650 uint32_t num = 0, i; \ 4651 \ 4652 for (i = env->vstart; i < vl; i++) { \ 4653 if (!vext_elem_mask(vs1, i)) { \ 4654 continue; \ 4655 } \ 4656 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4657 num++; \ 4658 } \ 4659 env->vstart = 0; \ 4660 } 4661 4662 /* Compress into vd elements of vs2 where vs1 is enabled */ 4663 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4664 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4665 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4666 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4667 4668 /* Vector Whole Register Move */ 4669 #define GEN_VEXT_VMV_WHOLE(NAME, LEN) \ 4670 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ 4671 uint32_t desc) \ 4672 { \ 4673 /* EEW = 8 */ \ 4674 uint32_t maxsz = simd_maxsz(desc); \ 4675 uint32_t i = env->vstart; \ 4676 \ 4677 memcpy((uint8_t *)vd + H1(i), \ 4678 (uint8_t *)vs2 + H1(i), \ 4679 maxsz - env->vstart); \ 4680 \ 4681 env->vstart = 0; \ 4682 } 4683 4684 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1) 4685 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2) 4686 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4) 4687 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8) 4688 4689 /* Vector Integer Extension */ 4690 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4691 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4692 CPURISCVState *env, uint32_t desc) \ 4693 { \ 4694 uint32_t vl = env->vl; \ 4695 uint32_t vm = vext_vm(desc); \ 4696 uint32_t i; \ 4697 \ 4698 for (i = env->vstart; i < vl; i++) { \ 4699 if (!vm && !vext_elem_mask(v0, i)) { \ 4700 continue; \ 4701 } \ 4702 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4703 } \ 4704 env->vstart = 0; \ 4705 } 4706 4707 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4708 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4709 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4710 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4711 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4712 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4713 4714 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4715 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4716 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4717 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4718 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4719 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4720