1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 bool vill = FIELD_EX64(s2, VTYPE, VILL); 40 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 41 42 if (lmul & 4) { 43 /* Fractional LMUL. */ 44 if (lmul == 4 || 45 cpu->cfg.elen >> (8 - lmul) < sew) { 46 vill = true; 47 } 48 } 49 50 if ((sew > cpu->cfg.elen) 51 || vill 52 || (ediv != 0) 53 || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 56 env->vl = 0; 57 env->vstart = 0; 58 return 0; 59 } 60 61 vlmax = vext_get_vlmax(cpu, s2); 62 if (s1 <= vlmax) { 63 vl = s1; 64 } else { 65 vl = vlmax; 66 } 67 env->vl = vl; 68 env->vtype = s2; 69 env->vstart = 0; 70 return vl; 71 } 72 73 /* 74 * Note that vector data is stored in host-endian 64-bit chunks, 75 * so addressing units smaller than that needs a host-endian fixup. 76 */ 77 #ifdef HOST_WORDS_BIGENDIAN 78 #define H1(x) ((x) ^ 7) 79 #define H1_2(x) ((x) ^ 6) 80 #define H1_4(x) ((x) ^ 4) 81 #define H2(x) ((x) ^ 3) 82 #define H4(x) ((x) ^ 1) 83 #define H8(x) ((x)) 84 #else 85 #define H1(x) (x) 86 #define H1_2(x) (x) 87 #define H1_4(x) (x) 88 #define H2(x) (x) 89 #define H4(x) (x) 90 #define H8(x) (x) 91 #endif 92 93 static inline uint32_t vext_nf(uint32_t desc) 94 { 95 return FIELD_EX32(simd_data(desc), VDATA, NF); 96 } 97 98 static inline uint32_t vext_vm(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, VM); 101 } 102 103 /* 104 * Encode LMUL to lmul as following: 105 * LMUL vlmul lmul 106 * 1 000 0 107 * 2 001 1 108 * 4 010 2 109 * 8 011 3 110 * - 100 - 111 * 1/8 101 -3 112 * 1/4 110 -2 113 * 1/2 111 -1 114 */ 115 static inline int32_t vext_lmul(uint32_t desc) 116 { 117 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 118 } 119 120 /* 121 * Get the maximum number of elements can be operated. 122 * 123 * esz: log2 of element size in bytes. 124 */ 125 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 126 { 127 /* 128 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 129 * so vlen in bytes (vlenb) is encoded as maxsz. 130 */ 131 uint32_t vlenb = simd_maxsz(desc); 132 133 /* Return VLMAX */ 134 int scale = vext_lmul(desc) - esz; 135 return scale < 0 ? vlenb >> -scale : vlenb << scale; 136 } 137 138 /* 139 * This function checks watchpoint before real load operation. 140 * 141 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 142 * In user mode, there is no watchpoint support now. 143 * 144 * It will trigger an exception if there is no mapping in TLB 145 * and page table walk can't fill the TLB entry. Then the guest 146 * software can return here after process the exception or never return. 147 */ 148 static void probe_pages(CPURISCVState *env, target_ulong addr, 149 target_ulong len, uintptr_t ra, 150 MMUAccessType access_type) 151 { 152 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 153 target_ulong curlen = MIN(pagelen, len); 154 155 probe_access(env, addr, curlen, access_type, 156 cpu_mmu_index(env, false), ra); 157 if (len > curlen) { 158 addr += curlen; 159 curlen = len - curlen; 160 probe_access(env, addr, curlen, access_type, 161 cpu_mmu_index(env, false), ra); 162 } 163 } 164 165 static inline void vext_set_elem_mask(void *v0, int index, 166 uint8_t value) 167 { 168 int idx = index / 64; 169 int pos = index % 64; 170 uint64_t old = ((uint64_t *)v0)[idx]; 171 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 172 } 173 174 /* 175 * Earlier designs (pre-0.9) had a varying number of bits 176 * per mask value (MLEN). In the 0.9 design, MLEN=1. 177 * (Section 4.5) 178 */ 179 static inline int vext_elem_mask(void *v0, int index) 180 { 181 int idx = index / 64; 182 int pos = index % 64; 183 return (((uint64_t *)v0)[idx] >> pos) & 1; 184 } 185 186 /* elements operations for load and store */ 187 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 188 uint32_t idx, void *vd, uintptr_t retaddr); 189 190 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 191 static void NAME(CPURISCVState *env, abi_ptr addr, \ 192 uint32_t idx, void *vd, uintptr_t retaddr)\ 193 { \ 194 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 195 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 196 } \ 197 198 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 202 203 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 204 static void NAME(CPURISCVState *env, abi_ptr addr, \ 205 uint32_t idx, void *vd, uintptr_t retaddr)\ 206 { \ 207 ETYPE data = *((ETYPE *)vd + H(idx)); \ 208 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 209 } 210 211 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 212 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 213 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 214 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 215 216 /* 217 *** stride: access vector element from strided memory 218 */ 219 static void 220 vext_ldst_stride(void *vd, void *v0, target_ulong base, 221 target_ulong stride, CPURISCVState *env, 222 uint32_t desc, uint32_t vm, 223 vext_ldst_elem_fn *ldst_elem, 224 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 225 { 226 uint32_t i, k; 227 uint32_t nf = vext_nf(desc); 228 uint32_t max_elems = vext_max_elems(desc, esz); 229 230 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 235 k = 0; 236 while (k < nf) { 237 target_ulong addr = base + stride * i + (k << esz); 238 ldst_elem(env, addr, i + k * max_elems, vd, ra); 239 k++; 240 } 241 } 242 env->vstart = 0; 243 } 244 245 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 246 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 247 target_ulong stride, CPURISCVState *env, \ 248 uint32_t desc) \ 249 { \ 250 uint32_t vm = vext_vm(desc); \ 251 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 252 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 253 } 254 255 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 256 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 257 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 258 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 259 260 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 261 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 262 target_ulong stride, CPURISCVState *env, \ 263 uint32_t desc) \ 264 { \ 265 uint32_t vm = vext_vm(desc); \ 266 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 267 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 268 } 269 270 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 271 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 272 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 273 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 274 275 /* 276 *** unit-stride: access elements stored contiguously in memory 277 */ 278 279 /* unmasked unit-stride load and store operation*/ 280 static void 281 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 282 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 283 uintptr_t ra, MMUAccessType access_type) 284 { 285 uint32_t i, k; 286 uint32_t nf = vext_nf(desc); 287 uint32_t max_elems = vext_max_elems(desc, esz); 288 289 /* load bytes from guest memory */ 290 for (i = env->vstart; i < evl; i++, env->vstart++) { 291 k = 0; 292 while (k < nf) { 293 target_ulong addr = base + ((i * nf + k) << esz); 294 ldst_elem(env, addr, i + k * max_elems, vd, ra); 295 k++; 296 } 297 } 298 env->vstart = 0; 299 } 300 301 /* 302 * masked unit-stride load and store operation will be a special case of stride, 303 * stride = NF * sizeof (MTYPE) 304 */ 305 306 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 307 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 308 CPURISCVState *env, uint32_t desc) \ 309 { \ 310 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 311 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 312 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 313 } \ 314 \ 315 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 316 CPURISCVState *env, uint32_t desc) \ 317 { \ 318 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 319 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ 320 } 321 322 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 323 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 324 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 325 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 326 327 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 328 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 329 CPURISCVState *env, uint32_t desc) \ 330 { \ 331 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 332 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 333 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 334 } \ 335 \ 336 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 337 CPURISCVState *env, uint32_t desc) \ 338 { \ 339 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 340 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ 341 } 342 343 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 344 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 345 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 346 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 347 348 /* 349 *** unit stride mask load and store, EEW = 1 350 */ 351 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 352 CPURISCVState *env, uint32_t desc) 353 { 354 /* evl = ceil(vl/8) */ 355 uint8_t evl = (env->vl + 7) >> 3; 356 vext_ldst_us(vd, base, env, desc, lde_b, 357 0, evl, GETPC(), MMU_DATA_LOAD); 358 } 359 360 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 361 CPURISCVState *env, uint32_t desc) 362 { 363 /* evl = ceil(vl/8) */ 364 uint8_t evl = (env->vl + 7) >> 3; 365 vext_ldst_us(vd, base, env, desc, ste_b, 366 0, evl, GETPC(), MMU_DATA_STORE); 367 } 368 369 /* 370 *** index: access vector element from indexed memory 371 */ 372 typedef target_ulong vext_get_index_addr(target_ulong base, 373 uint32_t idx, void *vs2); 374 375 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 376 static target_ulong NAME(target_ulong base, \ 377 uint32_t idx, void *vs2) \ 378 { \ 379 return (base + *((ETYPE *)vs2 + H(idx))); \ 380 } 381 382 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 383 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 384 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 385 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 386 387 static inline void 388 vext_ldst_index(void *vd, void *v0, target_ulong base, 389 void *vs2, CPURISCVState *env, uint32_t desc, 390 vext_get_index_addr get_index_addr, 391 vext_ldst_elem_fn *ldst_elem, 392 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 393 { 394 uint32_t i, k; 395 uint32_t nf = vext_nf(desc); 396 uint32_t vm = vext_vm(desc); 397 uint32_t max_elems = vext_max_elems(desc, esz); 398 399 /* load bytes from guest memory */ 400 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 401 if (!vm && !vext_elem_mask(v0, i)) { 402 continue; 403 } 404 405 k = 0; 406 while (k < nf) { 407 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 408 ldst_elem(env, addr, i + k * max_elems, vd, ra); 409 k++; 410 } 411 } 412 env->vstart = 0; 413 } 414 415 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 416 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 417 void *vs2, CPURISCVState *env, uint32_t desc) \ 418 { \ 419 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 420 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 421 } 422 423 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 424 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 425 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 426 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 427 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 428 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 429 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 430 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 431 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 432 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 433 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 434 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 435 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 436 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 437 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 438 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 439 440 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 441 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 442 void *vs2, CPURISCVState *env, uint32_t desc) \ 443 { \ 444 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 445 STORE_FN, ctzl(sizeof(ETYPE)), \ 446 GETPC(), MMU_DATA_STORE); \ 447 } 448 449 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 450 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 451 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 452 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 453 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 454 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 455 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 456 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 457 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 458 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 459 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 460 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 461 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 462 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 463 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 464 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 465 466 /* 467 *** unit-stride fault-only-fisrt load instructions 468 */ 469 static inline void 470 vext_ldff(void *vd, void *v0, target_ulong base, 471 CPURISCVState *env, uint32_t desc, 472 vext_ldst_elem_fn *ldst_elem, 473 uint32_t esz, uintptr_t ra) 474 { 475 void *host; 476 uint32_t i, k, vl = 0; 477 uint32_t nf = vext_nf(desc); 478 uint32_t vm = vext_vm(desc); 479 uint32_t max_elems = vext_max_elems(desc, esz); 480 target_ulong addr, offset, remain; 481 482 /* probe every access*/ 483 for (i = env->vstart; i < env->vl; i++) { 484 if (!vm && !vext_elem_mask(v0, i)) { 485 continue; 486 } 487 addr = base + i * (nf << esz); 488 if (i == 0) { 489 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 490 } else { 491 /* if it triggers an exception, no need to check watchpoint */ 492 remain = nf << esz; 493 while (remain > 0) { 494 offset = -(addr | TARGET_PAGE_MASK); 495 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 496 cpu_mmu_index(env, false)); 497 if (host) { 498 #ifdef CONFIG_USER_ONLY 499 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 500 vl = i; 501 goto ProbeSuccess; 502 } 503 #else 504 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 505 #endif 506 } else { 507 vl = i; 508 goto ProbeSuccess; 509 } 510 if (remain <= offset) { 511 break; 512 } 513 remain -= offset; 514 addr += offset; 515 } 516 } 517 } 518 ProbeSuccess: 519 /* load bytes from guest memory */ 520 if (vl != 0) { 521 env->vl = vl; 522 } 523 for (i = env->vstart; i < env->vl; i++) { 524 k = 0; 525 if (!vm && !vext_elem_mask(v0, i)) { 526 continue; 527 } 528 while (k < nf) { 529 target_ulong addr = base + ((i * nf + k) << esz); 530 ldst_elem(env, addr, i + k * max_elems, vd, ra); 531 k++; 532 } 533 } 534 env->vstart = 0; 535 } 536 537 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 538 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 539 CPURISCVState *env, uint32_t desc) \ 540 { \ 541 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 542 ctzl(sizeof(ETYPE)), GETPC()); \ 543 } 544 545 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 546 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 547 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 548 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 549 550 #define DO_SWAP(N, M) (M) 551 #define DO_AND(N, M) (N & M) 552 #define DO_XOR(N, M) (N ^ M) 553 #define DO_OR(N, M) (N | M) 554 #define DO_ADD(N, M) (N + M) 555 556 /* Signed min/max */ 557 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 558 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 559 560 /* Unsigned min/max */ 561 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 562 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 563 564 /* 565 *** load and store whole register instructions 566 */ 567 static void 568 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 569 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 570 MMUAccessType access_type) 571 { 572 uint32_t i, k, off, pos; 573 uint32_t nf = vext_nf(desc); 574 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 575 uint32_t max_elems = vlenb >> esz; 576 577 k = env->vstart / max_elems; 578 off = env->vstart % max_elems; 579 580 if (off) { 581 /* load/store rest of elements of current segment pointed by vstart */ 582 for (pos = off; pos < max_elems; pos++, env->vstart++) { 583 target_ulong addr = base + ((pos + k * max_elems) << esz); 584 ldst_elem(env, addr, pos + k * max_elems, vd, ra); 585 } 586 k++; 587 } 588 589 /* load/store elements for rest of segments */ 590 for (; k < nf; k++) { 591 for (i = 0; i < max_elems; i++, env->vstart++) { 592 target_ulong addr = base + ((i + k * max_elems) << esz); 593 ldst_elem(env, addr, i + k * max_elems, vd, ra); 594 } 595 } 596 597 env->vstart = 0; 598 } 599 600 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 601 void HELPER(NAME)(void *vd, target_ulong base, \ 602 CPURISCVState *env, uint32_t desc) \ 603 { \ 604 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 605 ctzl(sizeof(ETYPE)), GETPC(), \ 606 MMU_DATA_LOAD); \ 607 } 608 609 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 610 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 611 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 612 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 613 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 614 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 615 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 616 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 617 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 618 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 619 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 620 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 621 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 622 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 623 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 624 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 625 626 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 627 void HELPER(NAME)(void *vd, target_ulong base, \ 628 CPURISCVState *env, uint32_t desc) \ 629 { \ 630 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 631 ctzl(sizeof(ETYPE)), GETPC(), \ 632 MMU_DATA_STORE); \ 633 } 634 635 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 636 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 637 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 638 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 639 640 /* 641 *** Vector Integer Arithmetic Instructions 642 */ 643 644 /* expand macro args before macro */ 645 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 646 647 /* (TD, T1, T2, TX1, TX2) */ 648 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 649 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 650 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 651 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 652 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 653 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 654 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 655 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 656 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 657 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 658 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 659 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 660 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 661 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 662 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 663 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 664 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 665 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 666 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 667 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 668 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 669 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 670 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 671 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 672 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 673 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 674 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 675 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 676 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 677 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 678 679 /* operation of two vector elements */ 680 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 681 682 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 683 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 684 { \ 685 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 686 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 687 *((TD *)vd + HD(i)) = OP(s2, s1); \ 688 } 689 #define DO_SUB(N, M) (N - M) 690 #define DO_RSUB(N, M) (M - N) 691 692 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 693 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 694 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 695 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 696 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 697 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 698 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 699 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 700 701 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 702 CPURISCVState *env, uint32_t desc, 703 uint32_t esz, uint32_t dsz, 704 opivv2_fn *fn) 705 { 706 uint32_t vm = vext_vm(desc); 707 uint32_t vl = env->vl; 708 uint32_t i; 709 710 for (i = env->vstart; i < vl; i++) { 711 if (!vm && !vext_elem_mask(v0, i)) { 712 continue; 713 } 714 fn(vd, vs1, vs2, i); 715 } 716 env->vstart = 0; 717 } 718 719 /* generate the helpers for OPIVV */ 720 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 721 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 722 void *vs2, CPURISCVState *env, \ 723 uint32_t desc) \ 724 { \ 725 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 726 do_##NAME); \ 727 } 728 729 GEN_VEXT_VV(vadd_vv_b, 1, 1) 730 GEN_VEXT_VV(vadd_vv_h, 2, 2) 731 GEN_VEXT_VV(vadd_vv_w, 4, 4) 732 GEN_VEXT_VV(vadd_vv_d, 8, 8) 733 GEN_VEXT_VV(vsub_vv_b, 1, 1) 734 GEN_VEXT_VV(vsub_vv_h, 2, 2) 735 GEN_VEXT_VV(vsub_vv_w, 4, 4) 736 GEN_VEXT_VV(vsub_vv_d, 8, 8) 737 738 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 739 740 /* 741 * (T1)s1 gives the real operator type. 742 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 743 */ 744 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 745 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 746 { \ 747 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 748 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 749 } 750 751 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 752 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 753 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 754 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 755 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 756 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 757 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 758 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 759 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 760 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 761 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 762 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 763 764 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 765 CPURISCVState *env, uint32_t desc, 766 uint32_t esz, uint32_t dsz, 767 opivx2_fn fn) 768 { 769 uint32_t vm = vext_vm(desc); 770 uint32_t vl = env->vl; 771 uint32_t i; 772 773 for (i = env->vstart; i < vl; i++) { 774 if (!vm && !vext_elem_mask(v0, i)) { 775 continue; 776 } 777 fn(vd, s1, vs2, i); 778 } 779 env->vstart = 0; 780 } 781 782 /* generate the helpers for OPIVX */ 783 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 784 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 785 void *vs2, CPURISCVState *env, \ 786 uint32_t desc) \ 787 { \ 788 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 789 do_##NAME); \ 790 } 791 792 GEN_VEXT_VX(vadd_vx_b, 1, 1) 793 GEN_VEXT_VX(vadd_vx_h, 2, 2) 794 GEN_VEXT_VX(vadd_vx_w, 4, 4) 795 GEN_VEXT_VX(vadd_vx_d, 8, 8) 796 GEN_VEXT_VX(vsub_vx_b, 1, 1) 797 GEN_VEXT_VX(vsub_vx_h, 2, 2) 798 GEN_VEXT_VX(vsub_vx_w, 4, 4) 799 GEN_VEXT_VX(vsub_vx_d, 8, 8) 800 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 801 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 802 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 803 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 804 805 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 806 { 807 intptr_t oprsz = simd_oprsz(desc); 808 intptr_t i; 809 810 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 811 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 812 } 813 } 814 815 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 816 { 817 intptr_t oprsz = simd_oprsz(desc); 818 intptr_t i; 819 820 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 821 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 822 } 823 } 824 825 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 826 { 827 intptr_t oprsz = simd_oprsz(desc); 828 intptr_t i; 829 830 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 831 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 832 } 833 } 834 835 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 836 { 837 intptr_t oprsz = simd_oprsz(desc); 838 intptr_t i; 839 840 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 841 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 842 } 843 } 844 845 /* Vector Widening Integer Add/Subtract */ 846 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 847 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 848 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 849 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 850 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 851 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 852 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 853 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 854 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 855 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 856 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 857 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 858 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 859 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 860 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 861 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 862 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 863 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 864 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 865 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 866 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 867 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 868 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 869 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 870 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 871 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 872 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 873 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 874 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 875 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 876 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 877 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 878 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 879 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 880 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 881 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 882 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 883 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 884 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 885 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 886 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 887 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 888 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 889 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 890 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 891 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 892 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 893 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 894 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 895 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 896 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 897 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 898 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 899 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 900 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 901 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 902 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 903 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 904 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 905 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 906 907 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 908 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 909 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 910 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 911 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 912 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 913 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 914 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 915 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 916 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 917 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 918 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 919 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 920 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 921 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 922 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 923 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 924 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 925 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 926 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 927 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 928 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 929 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 930 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 931 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 932 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 933 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 934 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 935 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 936 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 937 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 938 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 939 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 940 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 941 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 942 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 943 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 944 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 945 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 946 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 947 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 948 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 949 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 950 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 951 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 952 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 953 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 954 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 955 956 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 957 #define DO_VADC(N, M, C) (N + M + C) 958 #define DO_VSBC(N, M, C) (N - M - C) 959 960 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 962 CPURISCVState *env, uint32_t desc) \ 963 { \ 964 uint32_t vl = env->vl; \ 965 uint32_t i; \ 966 \ 967 for (i = env->vstart; i < vl; i++) { \ 968 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 969 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 970 ETYPE carry = vext_elem_mask(v0, i); \ 971 \ 972 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 973 } \ 974 env->vstart = 0; \ 975 } 976 977 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 978 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 979 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 980 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 981 982 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 983 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 984 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 985 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 986 987 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 988 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 989 CPURISCVState *env, uint32_t desc) \ 990 { \ 991 uint32_t vl = env->vl; \ 992 uint32_t i; \ 993 \ 994 for (i = env->vstart; i < vl; i++) { \ 995 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 996 ETYPE carry = vext_elem_mask(v0, i); \ 997 \ 998 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 999 } \ 1000 env->vstart = 0; \ 1001 } 1002 1003 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1004 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1005 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1006 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1007 1008 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1009 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1010 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1011 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1012 1013 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1014 (__typeof(N))(N + M) < N) 1015 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1016 1017 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1018 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1019 CPURISCVState *env, uint32_t desc) \ 1020 { \ 1021 uint32_t vl = env->vl; \ 1022 uint32_t vm = vext_vm(desc); \ 1023 uint32_t i; \ 1024 \ 1025 for (i = env->vstart; i < vl; i++) { \ 1026 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1027 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1028 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1029 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1030 } \ 1031 env->vstart = 0; \ 1032 } 1033 1034 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1035 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1036 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1037 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1038 1039 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1040 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1041 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1042 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1043 1044 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1045 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1046 void *vs2, CPURISCVState *env, uint32_t desc) \ 1047 { \ 1048 uint32_t vl = env->vl; \ 1049 uint32_t vm = vext_vm(desc); \ 1050 uint32_t i; \ 1051 \ 1052 for (i = env->vstart; i < vl; i++) { \ 1053 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1054 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1055 vext_set_elem_mask(vd, i, \ 1056 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1057 } \ 1058 env->vstart = 0; \ 1059 } 1060 1061 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1062 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1063 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1064 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1065 1066 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1067 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1068 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1069 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1070 1071 /* Vector Bitwise Logical Instructions */ 1072 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1073 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1074 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1075 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1076 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1077 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1078 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1079 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1080 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1081 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1082 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1083 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1084 GEN_VEXT_VV(vand_vv_b, 1, 1) 1085 GEN_VEXT_VV(vand_vv_h, 2, 2) 1086 GEN_VEXT_VV(vand_vv_w, 4, 4) 1087 GEN_VEXT_VV(vand_vv_d, 8, 8) 1088 GEN_VEXT_VV(vor_vv_b, 1, 1) 1089 GEN_VEXT_VV(vor_vv_h, 2, 2) 1090 GEN_VEXT_VV(vor_vv_w, 4, 4) 1091 GEN_VEXT_VV(vor_vv_d, 8, 8) 1092 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1093 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1094 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1095 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1096 1097 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1098 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1099 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1100 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1101 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1102 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1103 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1104 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1105 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1106 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1107 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1108 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1109 GEN_VEXT_VX(vand_vx_b, 1, 1) 1110 GEN_VEXT_VX(vand_vx_h, 2, 2) 1111 GEN_VEXT_VX(vand_vx_w, 4, 4) 1112 GEN_VEXT_VX(vand_vx_d, 8, 8) 1113 GEN_VEXT_VX(vor_vx_b, 1, 1) 1114 GEN_VEXT_VX(vor_vx_h, 2, 2) 1115 GEN_VEXT_VX(vor_vx_w, 4, 4) 1116 GEN_VEXT_VX(vor_vx_d, 8, 8) 1117 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1118 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1119 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1120 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1121 1122 /* Vector Single-Width Bit Shift Instructions */ 1123 #define DO_SLL(N, M) (N << (M)) 1124 #define DO_SRL(N, M) (N >> (M)) 1125 1126 /* generate the helpers for shift instructions with two vector operators */ 1127 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1128 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1129 void *vs2, CPURISCVState *env, uint32_t desc) \ 1130 { \ 1131 uint32_t vm = vext_vm(desc); \ 1132 uint32_t vl = env->vl; \ 1133 uint32_t i; \ 1134 \ 1135 for (i = env->vstart; i < vl; i++) { \ 1136 if (!vm && !vext_elem_mask(v0, i)) { \ 1137 continue; \ 1138 } \ 1139 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1140 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1141 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1142 } \ 1143 env->vstart = 0; \ 1144 } 1145 1146 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1147 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1148 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1149 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1150 1151 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1152 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1153 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1154 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1155 1156 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1157 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1160 1161 /* generate the helpers for shift instructions with one vector and one scalar */ 1162 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1163 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1164 void *vs2, CPURISCVState *env, uint32_t desc) \ 1165 { \ 1166 uint32_t vm = vext_vm(desc); \ 1167 uint32_t vl = env->vl; \ 1168 uint32_t i; \ 1169 \ 1170 for (i = env->vstart; i < vl; i++) { \ 1171 if (!vm && !vext_elem_mask(v0, i)) { \ 1172 continue; \ 1173 } \ 1174 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1175 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1176 } \ 1177 env->vstart = 0; \ 1178 } 1179 1180 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1181 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1182 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1183 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1184 1185 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1186 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1187 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1188 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1189 1190 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1191 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1192 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1193 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1194 1195 /* Vector Narrowing Integer Right Shift Instructions */ 1196 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1197 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1198 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1199 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1200 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1201 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1202 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1203 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1204 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1205 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1206 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1207 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1208 1209 /* Vector Integer Comparison Instructions */ 1210 #define DO_MSEQ(N, M) (N == M) 1211 #define DO_MSNE(N, M) (N != M) 1212 #define DO_MSLT(N, M) (N < M) 1213 #define DO_MSLE(N, M) (N <= M) 1214 #define DO_MSGT(N, M) (N > M) 1215 1216 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1217 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1218 CPURISCVState *env, uint32_t desc) \ 1219 { \ 1220 uint32_t vm = vext_vm(desc); \ 1221 uint32_t vl = env->vl; \ 1222 uint32_t i; \ 1223 \ 1224 for (i = env->vstart; i < vl; i++) { \ 1225 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1226 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1227 if (!vm && !vext_elem_mask(v0, i)) { \ 1228 continue; \ 1229 } \ 1230 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1231 } \ 1232 env->vstart = 0; \ 1233 } 1234 1235 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1236 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1237 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1238 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1239 1240 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1241 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1242 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1243 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1244 1245 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1246 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1247 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1248 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1249 1250 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1251 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1252 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1253 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1254 1255 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1256 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1257 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1258 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1259 1260 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1261 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1262 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1263 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1264 1265 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1266 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1267 CPURISCVState *env, uint32_t desc) \ 1268 { \ 1269 uint32_t vm = vext_vm(desc); \ 1270 uint32_t vl = env->vl; \ 1271 uint32_t i; \ 1272 \ 1273 for (i = env->vstart; i < vl; i++) { \ 1274 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1275 if (!vm && !vext_elem_mask(v0, i)) { \ 1276 continue; \ 1277 } \ 1278 vext_set_elem_mask(vd, i, \ 1279 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1280 } \ 1281 env->vstart = 0; \ 1282 } 1283 1284 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1285 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1286 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1287 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1288 1289 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1290 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1291 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1292 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1293 1294 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1295 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1296 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1297 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1298 1299 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1300 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1301 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1302 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1303 1304 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1305 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1306 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1307 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1308 1309 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1310 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1311 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1312 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1313 1314 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1315 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1316 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1317 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1318 1319 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1320 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1321 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1322 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1323 1324 /* Vector Integer Min/Max Instructions */ 1325 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1326 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1327 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1328 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1329 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1330 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1331 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1332 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1333 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1334 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1335 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1336 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1337 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1338 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1339 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1340 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1341 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1342 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1343 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1344 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1345 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1346 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1347 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1348 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1349 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1350 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1351 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1352 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1353 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1354 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1355 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1356 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1357 1358 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1359 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1360 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1361 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1362 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1363 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1364 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1365 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1366 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1367 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1368 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1369 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1370 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1371 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1372 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1373 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1374 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1375 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1376 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1377 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1378 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1379 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1380 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1381 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1382 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1383 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1384 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1385 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1386 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1387 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1388 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1389 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1390 1391 /* Vector Single-Width Integer Multiply Instructions */ 1392 #define DO_MUL(N, M) (N * M) 1393 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1394 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1395 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1396 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1397 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1398 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1399 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1400 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1401 1402 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1403 { 1404 return (int16_t)s2 * (int16_t)s1 >> 8; 1405 } 1406 1407 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1408 { 1409 return (int32_t)s2 * (int32_t)s1 >> 16; 1410 } 1411 1412 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1413 { 1414 return (int64_t)s2 * (int64_t)s1 >> 32; 1415 } 1416 1417 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1418 { 1419 uint64_t hi_64, lo_64; 1420 1421 muls64(&lo_64, &hi_64, s1, s2); 1422 return hi_64; 1423 } 1424 1425 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1426 { 1427 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1428 } 1429 1430 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1431 { 1432 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1433 } 1434 1435 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1436 { 1437 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1438 } 1439 1440 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1441 { 1442 uint64_t hi_64, lo_64; 1443 1444 mulu64(&lo_64, &hi_64, s2, s1); 1445 return hi_64; 1446 } 1447 1448 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1449 { 1450 return (int16_t)s2 * (uint16_t)s1 >> 8; 1451 } 1452 1453 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1454 { 1455 return (int32_t)s2 * (uint32_t)s1 >> 16; 1456 } 1457 1458 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1459 { 1460 return (int64_t)s2 * (uint64_t)s1 >> 32; 1461 } 1462 1463 /* 1464 * Let A = signed operand, 1465 * B = unsigned operand 1466 * P = mulu64(A, B), unsigned product 1467 * 1468 * LET X = 2 ** 64 - A, 2's complement of A 1469 * SP = signed product 1470 * THEN 1471 * IF A < 0 1472 * SP = -X * B 1473 * = -(2 ** 64 - A) * B 1474 * = A * B - 2 ** 64 * B 1475 * = P - 2 ** 64 * B 1476 * ELSE 1477 * SP = P 1478 * THEN 1479 * HI_P -= (A < 0 ? B : 0) 1480 */ 1481 1482 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1483 { 1484 uint64_t hi_64, lo_64; 1485 1486 mulu64(&lo_64, &hi_64, s2, s1); 1487 1488 hi_64 -= s2 < 0 ? s1 : 0; 1489 return hi_64; 1490 } 1491 1492 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1493 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1494 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1495 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1496 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1497 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1498 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1499 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1500 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1501 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1502 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1503 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1504 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1505 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1506 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1507 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1508 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1509 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1510 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1511 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1512 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1513 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1514 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1515 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1516 1517 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1518 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1519 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1520 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1521 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1522 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1523 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1524 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1525 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1526 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1527 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1528 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1529 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1530 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1531 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1532 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1533 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1534 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1535 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1536 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1537 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1538 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1539 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1540 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1541 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1542 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1543 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1544 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1545 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1546 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1547 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1548 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1549 1550 /* Vector Integer Divide Instructions */ 1551 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1552 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1553 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1554 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1555 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1556 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1557 1558 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1559 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1560 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1561 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1562 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1563 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1564 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1565 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1566 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1567 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1568 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1569 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1570 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1571 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1572 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1573 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1574 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1575 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1576 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1577 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1578 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1579 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1580 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1581 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1582 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1583 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1584 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1585 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1586 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1587 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1588 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1589 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1590 1591 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1592 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1593 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1594 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1595 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1596 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1597 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1598 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1599 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1600 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1601 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1602 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1603 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1604 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1605 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1606 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1607 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1608 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1609 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1610 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1611 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1612 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1613 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1614 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1615 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1616 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1617 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1618 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1619 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1620 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1621 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1622 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1623 1624 /* Vector Widening Integer Multiply Instructions */ 1625 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1626 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1627 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1628 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1629 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1630 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1631 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1632 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1633 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1634 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1635 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1636 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1637 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1638 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1639 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1640 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1641 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1642 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1643 1644 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1645 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1646 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1647 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1648 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1649 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1650 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1651 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1652 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1653 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1654 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1655 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1656 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1657 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1658 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1659 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1660 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1661 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1662 1663 /* Vector Single-Width Integer Multiply-Add Instructions */ 1664 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1665 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1666 { \ 1667 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1668 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1669 TD d = *((TD *)vd + HD(i)); \ 1670 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1671 } 1672 1673 #define DO_MACC(N, M, D) (M * N + D) 1674 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1675 #define DO_MADD(N, M, D) (M * D + N) 1676 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1677 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1678 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1679 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1680 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1681 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1682 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1683 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1684 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1685 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1686 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1687 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1688 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1689 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1690 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1691 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1692 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1693 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1694 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1695 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1696 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1697 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1698 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1699 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1700 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1701 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1702 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1703 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1704 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1705 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1706 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1707 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1708 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1709 1710 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1711 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1712 { \ 1713 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1714 TD d = *((TD *)vd + HD(i)); \ 1715 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1716 } 1717 1718 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1719 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1720 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1721 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1722 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1723 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1724 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1725 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1726 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1727 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1728 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1729 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1730 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1731 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1732 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1733 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1734 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1735 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1736 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1737 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1738 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1739 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1740 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1741 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1742 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1743 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1744 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1745 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1746 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1747 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1748 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1749 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1750 1751 /* Vector Widening Integer Multiply-Add Instructions */ 1752 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1753 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1754 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1755 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1756 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1757 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1758 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1759 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1760 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1761 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1762 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1763 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1764 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1765 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1766 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1767 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1768 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1769 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1770 1771 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1772 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1773 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1774 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1775 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1776 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1777 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1778 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1779 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1780 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1781 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1782 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1783 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1784 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1785 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1786 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1787 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1788 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1789 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1790 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1791 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1792 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1793 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1794 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1795 1796 /* Vector Integer Merge and Move Instructions */ 1797 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1798 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1799 uint32_t desc) \ 1800 { \ 1801 uint32_t vl = env->vl; \ 1802 uint32_t i; \ 1803 \ 1804 for (i = env->vstart; i < vl; i++) { \ 1805 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1806 *((ETYPE *)vd + H(i)) = s1; \ 1807 } \ 1808 env->vstart = 0; \ 1809 } 1810 1811 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1812 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1813 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1814 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1815 1816 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1817 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1818 uint32_t desc) \ 1819 { \ 1820 uint32_t vl = env->vl; \ 1821 uint32_t i; \ 1822 \ 1823 for (i = env->vstart; i < vl; i++) { \ 1824 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1825 } \ 1826 env->vstart = 0; \ 1827 } 1828 1829 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1830 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1831 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1832 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1833 1834 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1835 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1836 CPURISCVState *env, uint32_t desc) \ 1837 { \ 1838 uint32_t vl = env->vl; \ 1839 uint32_t i; \ 1840 \ 1841 for (i = env->vstart; i < vl; i++) { \ 1842 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1843 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1844 } \ 1845 env->vstart = 0; \ 1846 } 1847 1848 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1849 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1851 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1852 1853 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1854 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1855 void *vs2, CPURISCVState *env, uint32_t desc) \ 1856 { \ 1857 uint32_t vl = env->vl; \ 1858 uint32_t i; \ 1859 \ 1860 for (i = env->vstart; i < vl; i++) { \ 1861 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1862 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1863 (ETYPE)(target_long)s1); \ 1864 *((ETYPE *)vd + H(i)) = d; \ 1865 } \ 1866 env->vstart = 0; \ 1867 } 1868 1869 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1870 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1871 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1872 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1873 1874 /* 1875 *** Vector Fixed-Point Arithmetic Instructions 1876 */ 1877 1878 /* Vector Single-Width Saturating Add and Subtract */ 1879 1880 /* 1881 * As fixed point instructions probably have round mode and saturation, 1882 * define common macros for fixed point here. 1883 */ 1884 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1885 CPURISCVState *env, int vxrm); 1886 1887 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1888 static inline void \ 1889 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1890 CPURISCVState *env, int vxrm) \ 1891 { \ 1892 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1893 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1894 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1895 } 1896 1897 static inline void 1898 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1899 CPURISCVState *env, 1900 uint32_t vl, uint32_t vm, int vxrm, 1901 opivv2_rm_fn *fn) 1902 { 1903 for (uint32_t i = env->vstart; i < vl; i++) { 1904 if (!vm && !vext_elem_mask(v0, i)) { 1905 continue; 1906 } 1907 fn(vd, vs1, vs2, i, env, vxrm); 1908 } 1909 env->vstart = 0; 1910 } 1911 1912 static inline void 1913 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1914 CPURISCVState *env, 1915 uint32_t desc, uint32_t esz, uint32_t dsz, 1916 opivv2_rm_fn *fn) 1917 { 1918 uint32_t vm = vext_vm(desc); 1919 uint32_t vl = env->vl; 1920 1921 switch (env->vxrm) { 1922 case 0: /* rnu */ 1923 vext_vv_rm_1(vd, v0, vs1, vs2, 1924 env, vl, vm, 0, fn); 1925 break; 1926 case 1: /* rne */ 1927 vext_vv_rm_1(vd, v0, vs1, vs2, 1928 env, vl, vm, 1, fn); 1929 break; 1930 case 2: /* rdn */ 1931 vext_vv_rm_1(vd, v0, vs1, vs2, 1932 env, vl, vm, 2, fn); 1933 break; 1934 default: /* rod */ 1935 vext_vv_rm_1(vd, v0, vs1, vs2, 1936 env, vl, vm, 3, fn); 1937 break; 1938 } 1939 } 1940 1941 /* generate helpers for fixed point instructions with OPIVV format */ 1942 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1943 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1944 CPURISCVState *env, uint32_t desc) \ 1945 { \ 1946 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1947 do_##NAME); \ 1948 } 1949 1950 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1951 { 1952 uint8_t res = a + b; 1953 if (res < a) { 1954 res = UINT8_MAX; 1955 env->vxsat = 0x1; 1956 } 1957 return res; 1958 } 1959 1960 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1961 uint16_t b) 1962 { 1963 uint16_t res = a + b; 1964 if (res < a) { 1965 res = UINT16_MAX; 1966 env->vxsat = 0x1; 1967 } 1968 return res; 1969 } 1970 1971 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1972 uint32_t b) 1973 { 1974 uint32_t res = a + b; 1975 if (res < a) { 1976 res = UINT32_MAX; 1977 env->vxsat = 0x1; 1978 } 1979 return res; 1980 } 1981 1982 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1983 uint64_t b) 1984 { 1985 uint64_t res = a + b; 1986 if (res < a) { 1987 res = UINT64_MAX; 1988 env->vxsat = 0x1; 1989 } 1990 return res; 1991 } 1992 1993 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1994 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1995 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1996 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1997 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1998 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1999 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2000 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2001 2002 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2003 CPURISCVState *env, int vxrm); 2004 2005 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2006 static inline void \ 2007 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2008 CPURISCVState *env, int vxrm) \ 2009 { \ 2010 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2011 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2012 } 2013 2014 static inline void 2015 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2016 CPURISCVState *env, 2017 uint32_t vl, uint32_t vm, int vxrm, 2018 opivx2_rm_fn *fn) 2019 { 2020 for (uint32_t i = env->vstart; i < vl; i++) { 2021 if (!vm && !vext_elem_mask(v0, i)) { 2022 continue; 2023 } 2024 fn(vd, s1, vs2, i, env, vxrm); 2025 } 2026 env->vstart = 0; 2027 } 2028 2029 static inline void 2030 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2031 CPURISCVState *env, 2032 uint32_t desc, uint32_t esz, uint32_t dsz, 2033 opivx2_rm_fn *fn) 2034 { 2035 uint32_t vm = vext_vm(desc); 2036 uint32_t vl = env->vl; 2037 2038 switch (env->vxrm) { 2039 case 0: /* rnu */ 2040 vext_vx_rm_1(vd, v0, s1, vs2, 2041 env, vl, vm, 0, fn); 2042 break; 2043 case 1: /* rne */ 2044 vext_vx_rm_1(vd, v0, s1, vs2, 2045 env, vl, vm, 1, fn); 2046 break; 2047 case 2: /* rdn */ 2048 vext_vx_rm_1(vd, v0, s1, vs2, 2049 env, vl, vm, 2, fn); 2050 break; 2051 default: /* rod */ 2052 vext_vx_rm_1(vd, v0, s1, vs2, 2053 env, vl, vm, 3, fn); 2054 break; 2055 } 2056 } 2057 2058 /* generate helpers for fixed point instructions with OPIVX format */ 2059 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2060 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2061 void *vs2, CPURISCVState *env, uint32_t desc) \ 2062 { \ 2063 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2064 do_##NAME); \ 2065 } 2066 2067 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2068 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2069 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2070 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2071 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2072 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2073 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2074 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2075 2076 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2077 { 2078 int8_t res = a + b; 2079 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2080 res = a > 0 ? INT8_MAX : INT8_MIN; 2081 env->vxsat = 0x1; 2082 } 2083 return res; 2084 } 2085 2086 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2087 { 2088 int16_t res = a + b; 2089 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2090 res = a > 0 ? INT16_MAX : INT16_MIN; 2091 env->vxsat = 0x1; 2092 } 2093 return res; 2094 } 2095 2096 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2097 { 2098 int32_t res = a + b; 2099 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2100 res = a > 0 ? INT32_MAX : INT32_MIN; 2101 env->vxsat = 0x1; 2102 } 2103 return res; 2104 } 2105 2106 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2107 { 2108 int64_t res = a + b; 2109 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2110 res = a > 0 ? INT64_MAX : INT64_MIN; 2111 env->vxsat = 0x1; 2112 } 2113 return res; 2114 } 2115 2116 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2117 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2118 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2119 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2120 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2121 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2122 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2123 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2124 2125 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2126 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2127 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2128 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2129 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2130 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2131 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2132 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2133 2134 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2135 { 2136 uint8_t res = a - b; 2137 if (res > a) { 2138 res = 0; 2139 env->vxsat = 0x1; 2140 } 2141 return res; 2142 } 2143 2144 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2145 uint16_t b) 2146 { 2147 uint16_t res = a - b; 2148 if (res > a) { 2149 res = 0; 2150 env->vxsat = 0x1; 2151 } 2152 return res; 2153 } 2154 2155 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2156 uint32_t b) 2157 { 2158 uint32_t res = a - b; 2159 if (res > a) { 2160 res = 0; 2161 env->vxsat = 0x1; 2162 } 2163 return res; 2164 } 2165 2166 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2167 uint64_t b) 2168 { 2169 uint64_t res = a - b; 2170 if (res > a) { 2171 res = 0; 2172 env->vxsat = 0x1; 2173 } 2174 return res; 2175 } 2176 2177 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2178 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2179 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2180 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2181 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2182 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2183 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2184 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2185 2186 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2187 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2188 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2189 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2190 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2191 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2192 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2193 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2194 2195 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2196 { 2197 int8_t res = a - b; 2198 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2199 res = a >= 0 ? INT8_MAX : INT8_MIN; 2200 env->vxsat = 0x1; 2201 } 2202 return res; 2203 } 2204 2205 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2206 { 2207 int16_t res = a - b; 2208 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2209 res = a >= 0 ? INT16_MAX : INT16_MIN; 2210 env->vxsat = 0x1; 2211 } 2212 return res; 2213 } 2214 2215 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2216 { 2217 int32_t res = a - b; 2218 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2219 res = a >= 0 ? INT32_MAX : INT32_MIN; 2220 env->vxsat = 0x1; 2221 } 2222 return res; 2223 } 2224 2225 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2226 { 2227 int64_t res = a - b; 2228 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2229 res = a >= 0 ? INT64_MAX : INT64_MIN; 2230 env->vxsat = 0x1; 2231 } 2232 return res; 2233 } 2234 2235 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2236 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2237 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2238 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2239 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2240 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2241 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2242 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2243 2244 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2245 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2246 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2247 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2248 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2249 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2250 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2251 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2252 2253 /* Vector Single-Width Averaging Add and Subtract */ 2254 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2255 { 2256 uint8_t d = extract64(v, shift, 1); 2257 uint8_t d1; 2258 uint64_t D1, D2; 2259 2260 if (shift == 0 || shift > 64) { 2261 return 0; 2262 } 2263 2264 d1 = extract64(v, shift - 1, 1); 2265 D1 = extract64(v, 0, shift); 2266 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2267 return d1; 2268 } else if (vxrm == 1) { /* round-to-nearest-even */ 2269 if (shift > 1) { 2270 D2 = extract64(v, 0, shift - 1); 2271 return d1 & ((D2 != 0) | d); 2272 } else { 2273 return d1 & d; 2274 } 2275 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2276 return !d & (D1 != 0); 2277 } 2278 return 0; /* round-down (truncate) */ 2279 } 2280 2281 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2282 { 2283 int64_t res = (int64_t)a + b; 2284 uint8_t round = get_round(vxrm, res, 1); 2285 2286 return (res >> 1) + round; 2287 } 2288 2289 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2290 { 2291 int64_t res = a + b; 2292 uint8_t round = get_round(vxrm, res, 1); 2293 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2294 2295 /* With signed overflow, bit 64 is inverse of bit 63. */ 2296 return ((res >> 1) ^ over) + round; 2297 } 2298 2299 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2300 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2301 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2302 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2303 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2304 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2305 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2306 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2307 2308 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2309 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2310 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2311 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2312 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2313 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2314 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2315 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2316 2317 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2318 uint32_t a, uint32_t b) 2319 { 2320 uint64_t res = (uint64_t)a + b; 2321 uint8_t round = get_round(vxrm, res, 1); 2322 2323 return (res >> 1) + round; 2324 } 2325 2326 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2327 uint64_t a, uint64_t b) 2328 { 2329 uint64_t res = a + b; 2330 uint8_t round = get_round(vxrm, res, 1); 2331 uint64_t over = (uint64_t)(res < a) << 63; 2332 2333 return ((res >> 1) | over) + round; 2334 } 2335 2336 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2337 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2338 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2339 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2340 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2341 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2342 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2343 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2344 2345 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2346 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2347 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2348 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2349 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2350 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2351 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2352 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2353 2354 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2355 { 2356 int64_t res = (int64_t)a - b; 2357 uint8_t round = get_round(vxrm, res, 1); 2358 2359 return (res >> 1) + round; 2360 } 2361 2362 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2363 { 2364 int64_t res = (int64_t)a - b; 2365 uint8_t round = get_round(vxrm, res, 1); 2366 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2367 2368 /* With signed overflow, bit 64 is inverse of bit 63. */ 2369 return ((res >> 1) ^ over) + round; 2370 } 2371 2372 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2373 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2374 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2375 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2376 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2377 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2378 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2379 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2380 2381 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2382 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2383 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2384 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2385 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2386 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2387 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2388 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2389 2390 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2391 uint32_t a, uint32_t b) 2392 { 2393 int64_t res = (int64_t)a - b; 2394 uint8_t round = get_round(vxrm, res, 1); 2395 2396 return (res >> 1) + round; 2397 } 2398 2399 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2400 uint64_t a, uint64_t b) 2401 { 2402 uint64_t res = (uint64_t)a - b; 2403 uint8_t round = get_round(vxrm, res, 1); 2404 uint64_t over = (uint64_t)(res > a) << 63; 2405 2406 return ((res >> 1) | over) + round; 2407 } 2408 2409 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2410 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2411 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2412 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2413 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2414 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2415 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2416 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2417 2418 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2419 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2420 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2421 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2422 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2423 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2424 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2425 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2426 2427 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2428 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2429 { 2430 uint8_t round; 2431 int16_t res; 2432 2433 res = (int16_t)a * (int16_t)b; 2434 round = get_round(vxrm, res, 7); 2435 res = (res >> 7) + round; 2436 2437 if (res > INT8_MAX) { 2438 env->vxsat = 0x1; 2439 return INT8_MAX; 2440 } else if (res < INT8_MIN) { 2441 env->vxsat = 0x1; 2442 return INT8_MIN; 2443 } else { 2444 return res; 2445 } 2446 } 2447 2448 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2449 { 2450 uint8_t round; 2451 int32_t res; 2452 2453 res = (int32_t)a * (int32_t)b; 2454 round = get_round(vxrm, res, 15); 2455 res = (res >> 15) + round; 2456 2457 if (res > INT16_MAX) { 2458 env->vxsat = 0x1; 2459 return INT16_MAX; 2460 } else if (res < INT16_MIN) { 2461 env->vxsat = 0x1; 2462 return INT16_MIN; 2463 } else { 2464 return res; 2465 } 2466 } 2467 2468 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2469 { 2470 uint8_t round; 2471 int64_t res; 2472 2473 res = (int64_t)a * (int64_t)b; 2474 round = get_round(vxrm, res, 31); 2475 res = (res >> 31) + round; 2476 2477 if (res > INT32_MAX) { 2478 env->vxsat = 0x1; 2479 return INT32_MAX; 2480 } else if (res < INT32_MIN) { 2481 env->vxsat = 0x1; 2482 return INT32_MIN; 2483 } else { 2484 return res; 2485 } 2486 } 2487 2488 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2489 { 2490 uint8_t round; 2491 uint64_t hi_64, lo_64; 2492 int64_t res; 2493 2494 if (a == INT64_MIN && b == INT64_MIN) { 2495 env->vxsat = 1; 2496 return INT64_MAX; 2497 } 2498 2499 muls64(&lo_64, &hi_64, a, b); 2500 round = get_round(vxrm, lo_64, 63); 2501 /* 2502 * Cannot overflow, as there are always 2503 * 2 sign bits after multiply. 2504 */ 2505 res = (hi_64 << 1) | (lo_64 >> 63); 2506 if (round) { 2507 if (res == INT64_MAX) { 2508 env->vxsat = 1; 2509 } else { 2510 res += 1; 2511 } 2512 } 2513 return res; 2514 } 2515 2516 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2517 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2518 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2519 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2520 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2521 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2522 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2523 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2524 2525 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2526 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2527 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2528 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2529 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2530 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2531 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2532 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2533 2534 /* Vector Single-Width Scaling Shift Instructions */ 2535 static inline uint8_t 2536 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2537 { 2538 uint8_t round, shift = b & 0x7; 2539 uint8_t res; 2540 2541 round = get_round(vxrm, a, shift); 2542 res = (a >> shift) + round; 2543 return res; 2544 } 2545 static inline uint16_t 2546 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2547 { 2548 uint8_t round, shift = b & 0xf; 2549 uint16_t res; 2550 2551 round = get_round(vxrm, a, shift); 2552 res = (a >> shift) + round; 2553 return res; 2554 } 2555 static inline uint32_t 2556 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2557 { 2558 uint8_t round, shift = b & 0x1f; 2559 uint32_t res; 2560 2561 round = get_round(vxrm, a, shift); 2562 res = (a >> shift) + round; 2563 return res; 2564 } 2565 static inline uint64_t 2566 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2567 { 2568 uint8_t round, shift = b & 0x3f; 2569 uint64_t res; 2570 2571 round = get_round(vxrm, a, shift); 2572 res = (a >> shift) + round; 2573 return res; 2574 } 2575 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2576 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2577 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2578 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2579 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2580 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2581 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2582 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2583 2584 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2585 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2586 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2587 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2588 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2589 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2590 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2591 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2592 2593 static inline int8_t 2594 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2595 { 2596 uint8_t round, shift = b & 0x7; 2597 int8_t res; 2598 2599 round = get_round(vxrm, a, shift); 2600 res = (a >> shift) + round; 2601 return res; 2602 } 2603 static inline int16_t 2604 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2605 { 2606 uint8_t round, shift = b & 0xf; 2607 int16_t res; 2608 2609 round = get_round(vxrm, a, shift); 2610 res = (a >> shift) + round; 2611 return res; 2612 } 2613 static inline int32_t 2614 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2615 { 2616 uint8_t round, shift = b & 0x1f; 2617 int32_t res; 2618 2619 round = get_round(vxrm, a, shift); 2620 res = (a >> shift) + round; 2621 return res; 2622 } 2623 static inline int64_t 2624 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2625 { 2626 uint8_t round, shift = b & 0x3f; 2627 int64_t res; 2628 2629 round = get_round(vxrm, a, shift); 2630 res = (a >> shift) + round; 2631 return res; 2632 } 2633 2634 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2635 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2636 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2637 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2638 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2639 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2640 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2641 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2642 2643 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2644 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2645 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2646 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2647 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2648 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2649 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2650 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2651 2652 /* Vector Narrowing Fixed-Point Clip Instructions */ 2653 static inline int8_t 2654 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2655 { 2656 uint8_t round, shift = b & 0xf; 2657 int16_t res; 2658 2659 round = get_round(vxrm, a, shift); 2660 res = (a >> shift) + round; 2661 if (res > INT8_MAX) { 2662 env->vxsat = 0x1; 2663 return INT8_MAX; 2664 } else if (res < INT8_MIN) { 2665 env->vxsat = 0x1; 2666 return INT8_MIN; 2667 } else { 2668 return res; 2669 } 2670 } 2671 2672 static inline int16_t 2673 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2674 { 2675 uint8_t round, shift = b & 0x1f; 2676 int32_t res; 2677 2678 round = get_round(vxrm, a, shift); 2679 res = (a >> shift) + round; 2680 if (res > INT16_MAX) { 2681 env->vxsat = 0x1; 2682 return INT16_MAX; 2683 } else if (res < INT16_MIN) { 2684 env->vxsat = 0x1; 2685 return INT16_MIN; 2686 } else { 2687 return res; 2688 } 2689 } 2690 2691 static inline int32_t 2692 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2693 { 2694 uint8_t round, shift = b & 0x3f; 2695 int64_t res; 2696 2697 round = get_round(vxrm, a, shift); 2698 res = (a >> shift) + round; 2699 if (res > INT32_MAX) { 2700 env->vxsat = 0x1; 2701 return INT32_MAX; 2702 } else if (res < INT32_MIN) { 2703 env->vxsat = 0x1; 2704 return INT32_MIN; 2705 } else { 2706 return res; 2707 } 2708 } 2709 2710 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2711 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2712 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2713 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2714 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2715 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2716 2717 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2718 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2719 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2720 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2721 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2722 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2723 2724 static inline uint8_t 2725 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2726 { 2727 uint8_t round, shift = b & 0xf; 2728 uint16_t res; 2729 2730 round = get_round(vxrm, a, shift); 2731 res = (a >> shift) + round; 2732 if (res > UINT8_MAX) { 2733 env->vxsat = 0x1; 2734 return UINT8_MAX; 2735 } else { 2736 return res; 2737 } 2738 } 2739 2740 static inline uint16_t 2741 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2742 { 2743 uint8_t round, shift = b & 0x1f; 2744 uint32_t res; 2745 2746 round = get_round(vxrm, a, shift); 2747 res = (a >> shift) + round; 2748 if (res > UINT16_MAX) { 2749 env->vxsat = 0x1; 2750 return UINT16_MAX; 2751 } else { 2752 return res; 2753 } 2754 } 2755 2756 static inline uint32_t 2757 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2758 { 2759 uint8_t round, shift = b & 0x3f; 2760 uint64_t res; 2761 2762 round = get_round(vxrm, a, shift); 2763 res = (a >> shift) + round; 2764 if (res > UINT32_MAX) { 2765 env->vxsat = 0x1; 2766 return UINT32_MAX; 2767 } else { 2768 return res; 2769 } 2770 } 2771 2772 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2773 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2774 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2775 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2776 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2777 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2778 2779 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2780 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2781 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2782 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2783 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2784 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2785 2786 /* 2787 *** Vector Float Point Arithmetic Instructions 2788 */ 2789 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2790 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2791 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2792 CPURISCVState *env) \ 2793 { \ 2794 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2795 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2796 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2797 } 2798 2799 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2800 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2801 void *vs2, CPURISCVState *env, \ 2802 uint32_t desc) \ 2803 { \ 2804 uint32_t vm = vext_vm(desc); \ 2805 uint32_t vl = env->vl; \ 2806 uint32_t i; \ 2807 \ 2808 for (i = env->vstart; i < vl; i++) { \ 2809 if (!vm && !vext_elem_mask(v0, i)) { \ 2810 continue; \ 2811 } \ 2812 do_##NAME(vd, vs1, vs2, i, env); \ 2813 } \ 2814 env->vstart = 0; \ 2815 } 2816 2817 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2818 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2819 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2820 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2821 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2822 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2823 2824 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2825 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2826 CPURISCVState *env) \ 2827 { \ 2828 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2829 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2830 } 2831 2832 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2833 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2834 void *vs2, CPURISCVState *env, \ 2835 uint32_t desc) \ 2836 { \ 2837 uint32_t vm = vext_vm(desc); \ 2838 uint32_t vl = env->vl; \ 2839 uint32_t i; \ 2840 \ 2841 for (i = env->vstart; i < vl; i++) { \ 2842 if (!vm && !vext_elem_mask(v0, i)) { \ 2843 continue; \ 2844 } \ 2845 do_##NAME(vd, s1, vs2, i, env); \ 2846 } \ 2847 env->vstart = 0; \ 2848 } 2849 2850 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2851 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2852 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2853 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2854 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2855 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2856 2857 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2858 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2859 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2860 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2861 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2862 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2863 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2864 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2865 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2866 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2867 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2868 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2869 2870 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2871 { 2872 return float16_sub(b, a, s); 2873 } 2874 2875 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2876 { 2877 return float32_sub(b, a, s); 2878 } 2879 2880 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2881 { 2882 return float64_sub(b, a, s); 2883 } 2884 2885 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2886 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2887 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2888 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2889 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2890 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2891 2892 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2893 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2894 { 2895 return float32_add(float16_to_float32(a, true, s), 2896 float16_to_float32(b, true, s), s); 2897 } 2898 2899 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2900 { 2901 return float64_add(float32_to_float64(a, s), 2902 float32_to_float64(b, s), s); 2903 2904 } 2905 2906 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2907 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2908 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2909 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2910 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2911 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2912 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2913 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2914 2915 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2916 { 2917 return float32_sub(float16_to_float32(a, true, s), 2918 float16_to_float32(b, true, s), s); 2919 } 2920 2921 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2922 { 2923 return float64_sub(float32_to_float64(a, s), 2924 float32_to_float64(b, s), s); 2925 2926 } 2927 2928 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2929 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2930 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2931 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2932 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2933 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2934 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2935 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2936 2937 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2938 { 2939 return float32_add(a, float16_to_float32(b, true, s), s); 2940 } 2941 2942 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2943 { 2944 return float64_add(a, float32_to_float64(b, s), s); 2945 } 2946 2947 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2948 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2949 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2950 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2951 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2952 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2953 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2954 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2955 2956 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2957 { 2958 return float32_sub(a, float16_to_float32(b, true, s), s); 2959 } 2960 2961 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2962 { 2963 return float64_sub(a, float32_to_float64(b, s), s); 2964 } 2965 2966 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2967 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2968 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2969 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2970 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2971 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2972 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2973 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2974 2975 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2976 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2977 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2978 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2979 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2980 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2981 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2982 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2983 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2984 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2985 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2986 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2987 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2988 2989 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2990 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2991 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2992 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 2993 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 2994 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 2995 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 2996 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 2997 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 2998 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 2999 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3000 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3001 3002 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3003 { 3004 return float16_div(b, a, s); 3005 } 3006 3007 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3008 { 3009 return float32_div(b, a, s); 3010 } 3011 3012 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3013 { 3014 return float64_div(b, a, s); 3015 } 3016 3017 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3018 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3019 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3020 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3021 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3022 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3023 3024 /* Vector Widening Floating-Point Multiply */ 3025 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3026 { 3027 return float32_mul(float16_to_float32(a, true, s), 3028 float16_to_float32(b, true, s), s); 3029 } 3030 3031 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3032 { 3033 return float64_mul(float32_to_float64(a, s), 3034 float32_to_float64(b, s), s); 3035 3036 } 3037 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3038 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3039 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3040 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3041 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3042 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3043 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3044 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3045 3046 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3047 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3048 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3049 CPURISCVState *env) \ 3050 { \ 3051 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3052 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3053 TD d = *((TD *)vd + HD(i)); \ 3054 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3055 } 3056 3057 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3058 { 3059 return float16_muladd(a, b, d, 0, s); 3060 } 3061 3062 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3063 { 3064 return float32_muladd(a, b, d, 0, s); 3065 } 3066 3067 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3068 { 3069 return float64_muladd(a, b, d, 0, s); 3070 } 3071 3072 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3073 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3074 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3075 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3076 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3077 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3078 3079 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3080 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3081 CPURISCVState *env) \ 3082 { \ 3083 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3084 TD d = *((TD *)vd + HD(i)); \ 3085 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3086 } 3087 3088 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3089 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3090 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3091 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3092 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3093 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3094 3095 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3096 { 3097 return float16_muladd(a, b, d, 3098 float_muladd_negate_c | float_muladd_negate_product, s); 3099 } 3100 3101 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3102 { 3103 return float32_muladd(a, b, d, 3104 float_muladd_negate_c | float_muladd_negate_product, s); 3105 } 3106 3107 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3108 { 3109 return float64_muladd(a, b, d, 3110 float_muladd_negate_c | float_muladd_negate_product, s); 3111 } 3112 3113 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3114 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3115 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3116 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3117 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3118 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3119 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3120 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3121 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3122 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3123 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3124 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3125 3126 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3127 { 3128 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3129 } 3130 3131 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3132 { 3133 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3134 } 3135 3136 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3137 { 3138 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3139 } 3140 3141 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3142 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3143 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3144 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3145 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3146 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3147 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3148 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3149 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3150 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3151 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3152 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3153 3154 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3155 { 3156 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3157 } 3158 3159 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3160 { 3161 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3162 } 3163 3164 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3165 { 3166 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3167 } 3168 3169 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3170 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3171 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3172 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3173 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3174 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3175 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3176 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3177 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3178 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3179 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3180 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3181 3182 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3183 { 3184 return float16_muladd(d, b, a, 0, s); 3185 } 3186 3187 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3188 { 3189 return float32_muladd(d, b, a, 0, s); 3190 } 3191 3192 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3193 { 3194 return float64_muladd(d, b, a, 0, s); 3195 } 3196 3197 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3198 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3199 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3200 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3201 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3202 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3203 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3204 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3205 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3206 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3207 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3208 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3209 3210 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3211 { 3212 return float16_muladd(d, b, a, 3213 float_muladd_negate_c | float_muladd_negate_product, s); 3214 } 3215 3216 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3217 { 3218 return float32_muladd(d, b, a, 3219 float_muladd_negate_c | float_muladd_negate_product, s); 3220 } 3221 3222 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3223 { 3224 return float64_muladd(d, b, a, 3225 float_muladd_negate_c | float_muladd_negate_product, s); 3226 } 3227 3228 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3229 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3230 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3231 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3232 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3233 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3234 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3235 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3236 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3237 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3238 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3239 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3240 3241 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3242 { 3243 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3244 } 3245 3246 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3247 { 3248 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3249 } 3250 3251 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3252 { 3253 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3254 } 3255 3256 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3257 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3258 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3259 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3260 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3261 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3262 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3263 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3264 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3265 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3266 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3267 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3268 3269 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3270 { 3271 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3272 } 3273 3274 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3275 { 3276 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3277 } 3278 3279 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3280 { 3281 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3282 } 3283 3284 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3285 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3286 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3287 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3288 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3289 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3290 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3291 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3292 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3293 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3294 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3295 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3296 3297 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3298 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3299 { 3300 return float32_muladd(float16_to_float32(a, true, s), 3301 float16_to_float32(b, true, s), d, 0, s); 3302 } 3303 3304 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3305 { 3306 return float64_muladd(float32_to_float64(a, s), 3307 float32_to_float64(b, s), d, 0, s); 3308 } 3309 3310 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3311 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3312 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3313 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3314 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3315 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3316 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3317 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3318 3319 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3320 { 3321 return float32_muladd(float16_to_float32(a, true, s), 3322 float16_to_float32(b, true, s), d, 3323 float_muladd_negate_c | float_muladd_negate_product, s); 3324 } 3325 3326 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3327 { 3328 return float64_muladd(float32_to_float64(a, s), 3329 float32_to_float64(b, s), d, 3330 float_muladd_negate_c | float_muladd_negate_product, s); 3331 } 3332 3333 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3334 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3335 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3336 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3337 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3338 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3339 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3340 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3341 3342 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3343 { 3344 return float32_muladd(float16_to_float32(a, true, s), 3345 float16_to_float32(b, true, s), d, 3346 float_muladd_negate_c, s); 3347 } 3348 3349 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3350 { 3351 return float64_muladd(float32_to_float64(a, s), 3352 float32_to_float64(b, s), d, 3353 float_muladd_negate_c, s); 3354 } 3355 3356 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3357 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3358 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3359 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3360 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3361 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3362 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3363 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3364 3365 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3366 { 3367 return float32_muladd(float16_to_float32(a, true, s), 3368 float16_to_float32(b, true, s), d, 3369 float_muladd_negate_product, s); 3370 } 3371 3372 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3373 { 3374 return float64_muladd(float32_to_float64(a, s), 3375 float32_to_float64(b, s), d, 3376 float_muladd_negate_product, s); 3377 } 3378 3379 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3380 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3381 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3382 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3383 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3384 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3385 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3386 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3387 3388 /* Vector Floating-Point Square-Root Instruction */ 3389 /* (TD, T2, TX2) */ 3390 #define OP_UU_H uint16_t, uint16_t, uint16_t 3391 #define OP_UU_W uint32_t, uint32_t, uint32_t 3392 #define OP_UU_D uint64_t, uint64_t, uint64_t 3393 3394 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3395 static void do_##NAME(void *vd, void *vs2, int i, \ 3396 CPURISCVState *env) \ 3397 { \ 3398 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3399 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3400 } 3401 3402 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3403 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3404 CPURISCVState *env, uint32_t desc) \ 3405 { \ 3406 uint32_t vm = vext_vm(desc); \ 3407 uint32_t vl = env->vl; \ 3408 uint32_t i; \ 3409 \ 3410 if (vl == 0) { \ 3411 return; \ 3412 } \ 3413 for (i = env->vstart; i < vl; i++) { \ 3414 if (!vm && !vext_elem_mask(v0, i)) { \ 3415 continue; \ 3416 } \ 3417 do_##NAME(vd, vs2, i, env); \ 3418 } \ 3419 env->vstart = 0; \ 3420 } 3421 3422 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3423 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3424 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3425 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3426 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3427 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3428 3429 /* 3430 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3431 * 3432 * Adapted from riscv-v-spec recip.c: 3433 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3434 */ 3435 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3436 { 3437 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3438 uint64_t exp = extract64(f, frac_size, exp_size); 3439 uint64_t frac = extract64(f, 0, frac_size); 3440 3441 const uint8_t lookup_table[] = { 3442 52, 51, 50, 48, 47, 46, 44, 43, 3443 42, 41, 40, 39, 38, 36, 35, 34, 3444 33, 32, 31, 30, 30, 29, 28, 27, 3445 26, 25, 24, 23, 23, 22, 21, 20, 3446 19, 19, 18, 17, 16, 16, 15, 14, 3447 14, 13, 12, 12, 11, 10, 10, 9, 3448 9, 8, 7, 7, 6, 6, 5, 4, 3449 4, 3, 3, 2, 2, 1, 1, 0, 3450 127, 125, 123, 121, 119, 118, 116, 114, 3451 113, 111, 109, 108, 106, 105, 103, 102, 3452 100, 99, 97, 96, 95, 93, 92, 91, 3453 90, 88, 87, 86, 85, 84, 83, 82, 3454 80, 79, 78, 77, 76, 75, 74, 73, 3455 72, 71, 70, 70, 69, 68, 67, 66, 3456 65, 64, 63, 63, 62, 61, 60, 59, 3457 59, 58, 57, 56, 56, 55, 54, 53 3458 }; 3459 const int precision = 7; 3460 3461 if (exp == 0 && frac != 0) { /* subnormal */ 3462 /* Normalize the subnormal. */ 3463 while (extract64(frac, frac_size - 1, 1) == 0) { 3464 exp--; 3465 frac <<= 1; 3466 } 3467 3468 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3469 } 3470 3471 int idx = ((exp & 1) << (precision - 1)) | 3472 (frac >> (frac_size - precision + 1)); 3473 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3474 (frac_size - precision); 3475 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3476 3477 uint64_t val = 0; 3478 val = deposit64(val, 0, frac_size, out_frac); 3479 val = deposit64(val, frac_size, exp_size, out_exp); 3480 val = deposit64(val, frac_size + exp_size, 1, sign); 3481 return val; 3482 } 3483 3484 static float16 frsqrt7_h(float16 f, float_status *s) 3485 { 3486 int exp_size = 5, frac_size = 10; 3487 bool sign = float16_is_neg(f); 3488 3489 /* 3490 * frsqrt7(sNaN) = canonical NaN 3491 * frsqrt7(-inf) = canonical NaN 3492 * frsqrt7(-normal) = canonical NaN 3493 * frsqrt7(-subnormal) = canonical NaN 3494 */ 3495 if (float16_is_signaling_nan(f, s) || 3496 (float16_is_infinity(f) && sign) || 3497 (float16_is_normal(f) && sign) || 3498 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3499 s->float_exception_flags |= float_flag_invalid; 3500 return float16_default_nan(s); 3501 } 3502 3503 /* frsqrt7(qNaN) = canonical NaN */ 3504 if (float16_is_quiet_nan(f, s)) { 3505 return float16_default_nan(s); 3506 } 3507 3508 /* frsqrt7(+-0) = +-inf */ 3509 if (float16_is_zero(f)) { 3510 s->float_exception_flags |= float_flag_divbyzero; 3511 return float16_set_sign(float16_infinity, sign); 3512 } 3513 3514 /* frsqrt7(+inf) = +0 */ 3515 if (float16_is_infinity(f) && !sign) { 3516 return float16_set_sign(float16_zero, sign); 3517 } 3518 3519 /* +normal, +subnormal */ 3520 uint64_t val = frsqrt7(f, exp_size, frac_size); 3521 return make_float16(val); 3522 } 3523 3524 static float32 frsqrt7_s(float32 f, float_status *s) 3525 { 3526 int exp_size = 8, frac_size = 23; 3527 bool sign = float32_is_neg(f); 3528 3529 /* 3530 * frsqrt7(sNaN) = canonical NaN 3531 * frsqrt7(-inf) = canonical NaN 3532 * frsqrt7(-normal) = canonical NaN 3533 * frsqrt7(-subnormal) = canonical NaN 3534 */ 3535 if (float32_is_signaling_nan(f, s) || 3536 (float32_is_infinity(f) && sign) || 3537 (float32_is_normal(f) && sign) || 3538 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3539 s->float_exception_flags |= float_flag_invalid; 3540 return float32_default_nan(s); 3541 } 3542 3543 /* frsqrt7(qNaN) = canonical NaN */ 3544 if (float32_is_quiet_nan(f, s)) { 3545 return float32_default_nan(s); 3546 } 3547 3548 /* frsqrt7(+-0) = +-inf */ 3549 if (float32_is_zero(f)) { 3550 s->float_exception_flags |= float_flag_divbyzero; 3551 return float32_set_sign(float32_infinity, sign); 3552 } 3553 3554 /* frsqrt7(+inf) = +0 */ 3555 if (float32_is_infinity(f) && !sign) { 3556 return float32_set_sign(float32_zero, sign); 3557 } 3558 3559 /* +normal, +subnormal */ 3560 uint64_t val = frsqrt7(f, exp_size, frac_size); 3561 return make_float32(val); 3562 } 3563 3564 static float64 frsqrt7_d(float64 f, float_status *s) 3565 { 3566 int exp_size = 11, frac_size = 52; 3567 bool sign = float64_is_neg(f); 3568 3569 /* 3570 * frsqrt7(sNaN) = canonical NaN 3571 * frsqrt7(-inf) = canonical NaN 3572 * frsqrt7(-normal) = canonical NaN 3573 * frsqrt7(-subnormal) = canonical NaN 3574 */ 3575 if (float64_is_signaling_nan(f, s) || 3576 (float64_is_infinity(f) && sign) || 3577 (float64_is_normal(f) && sign) || 3578 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3579 s->float_exception_flags |= float_flag_invalid; 3580 return float64_default_nan(s); 3581 } 3582 3583 /* frsqrt7(qNaN) = canonical NaN */ 3584 if (float64_is_quiet_nan(f, s)) { 3585 return float64_default_nan(s); 3586 } 3587 3588 /* frsqrt7(+-0) = +-inf */ 3589 if (float64_is_zero(f)) { 3590 s->float_exception_flags |= float_flag_divbyzero; 3591 return float64_set_sign(float64_infinity, sign); 3592 } 3593 3594 /* frsqrt7(+inf) = +0 */ 3595 if (float64_is_infinity(f) && !sign) { 3596 return float64_set_sign(float64_zero, sign); 3597 } 3598 3599 /* +normal, +subnormal */ 3600 uint64_t val = frsqrt7(f, exp_size, frac_size); 3601 return make_float64(val); 3602 } 3603 3604 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3605 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3606 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3607 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) 3608 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) 3609 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) 3610 3611 /* 3612 * Vector Floating-Point Reciprocal Estimate Instruction 3613 * 3614 * Adapted from riscv-v-spec recip.c: 3615 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3616 */ 3617 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3618 float_status *s) 3619 { 3620 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3621 uint64_t exp = extract64(f, frac_size, exp_size); 3622 uint64_t frac = extract64(f, 0, frac_size); 3623 3624 const uint8_t lookup_table[] = { 3625 127, 125, 123, 121, 119, 117, 116, 114, 3626 112, 110, 109, 107, 105, 104, 102, 100, 3627 99, 97, 96, 94, 93, 91, 90, 88, 3628 87, 85, 84, 83, 81, 80, 79, 77, 3629 76, 75, 74, 72, 71, 70, 69, 68, 3630 66, 65, 64, 63, 62, 61, 60, 59, 3631 58, 57, 56, 55, 54, 53, 52, 51, 3632 50, 49, 48, 47, 46, 45, 44, 43, 3633 42, 41, 40, 40, 39, 38, 37, 36, 3634 35, 35, 34, 33, 32, 31, 31, 30, 3635 29, 28, 28, 27, 26, 25, 25, 24, 3636 23, 23, 22, 21, 21, 20, 19, 19, 3637 18, 17, 17, 16, 15, 15, 14, 14, 3638 13, 12, 12, 11, 11, 10, 9, 9, 3639 8, 8, 7, 7, 6, 5, 5, 4, 3640 4, 3, 3, 2, 2, 1, 1, 0 3641 }; 3642 const int precision = 7; 3643 3644 if (exp == 0 && frac != 0) { /* subnormal */ 3645 /* Normalize the subnormal. */ 3646 while (extract64(frac, frac_size - 1, 1) == 0) { 3647 exp--; 3648 frac <<= 1; 3649 } 3650 3651 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3652 3653 if (exp != 0 && exp != UINT64_MAX) { 3654 /* 3655 * Overflow to inf or max value of same sign, 3656 * depending on sign and rounding mode. 3657 */ 3658 s->float_exception_flags |= (float_flag_inexact | 3659 float_flag_overflow); 3660 3661 if ((s->float_rounding_mode == float_round_to_zero) || 3662 ((s->float_rounding_mode == float_round_down) && !sign) || 3663 ((s->float_rounding_mode == float_round_up) && sign)) { 3664 /* Return greatest/negative finite value. */ 3665 return (sign << (exp_size + frac_size)) | 3666 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3667 } else { 3668 /* Return +-inf. */ 3669 return (sign << (exp_size + frac_size)) | 3670 MAKE_64BIT_MASK(frac_size, exp_size); 3671 } 3672 } 3673 } 3674 3675 int idx = frac >> (frac_size - precision); 3676 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3677 (frac_size - precision); 3678 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3679 3680 if (out_exp == 0 || out_exp == UINT64_MAX) { 3681 /* 3682 * The result is subnormal, but don't raise the underflow exception, 3683 * because there's no additional loss of precision. 3684 */ 3685 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3686 if (out_exp == UINT64_MAX) { 3687 out_frac >>= 1; 3688 out_exp = 0; 3689 } 3690 } 3691 3692 uint64_t val = 0; 3693 val = deposit64(val, 0, frac_size, out_frac); 3694 val = deposit64(val, frac_size, exp_size, out_exp); 3695 val = deposit64(val, frac_size + exp_size, 1, sign); 3696 return val; 3697 } 3698 3699 static float16 frec7_h(float16 f, float_status *s) 3700 { 3701 int exp_size = 5, frac_size = 10; 3702 bool sign = float16_is_neg(f); 3703 3704 /* frec7(+-inf) = +-0 */ 3705 if (float16_is_infinity(f)) { 3706 return float16_set_sign(float16_zero, sign); 3707 } 3708 3709 /* frec7(+-0) = +-inf */ 3710 if (float16_is_zero(f)) { 3711 s->float_exception_flags |= float_flag_divbyzero; 3712 return float16_set_sign(float16_infinity, sign); 3713 } 3714 3715 /* frec7(sNaN) = canonical NaN */ 3716 if (float16_is_signaling_nan(f, s)) { 3717 s->float_exception_flags |= float_flag_invalid; 3718 return float16_default_nan(s); 3719 } 3720 3721 /* frec7(qNaN) = canonical NaN */ 3722 if (float16_is_quiet_nan(f, s)) { 3723 return float16_default_nan(s); 3724 } 3725 3726 /* +-normal, +-subnormal */ 3727 uint64_t val = frec7(f, exp_size, frac_size, s); 3728 return make_float16(val); 3729 } 3730 3731 static float32 frec7_s(float32 f, float_status *s) 3732 { 3733 int exp_size = 8, frac_size = 23; 3734 bool sign = float32_is_neg(f); 3735 3736 /* frec7(+-inf) = +-0 */ 3737 if (float32_is_infinity(f)) { 3738 return float32_set_sign(float32_zero, sign); 3739 } 3740 3741 /* frec7(+-0) = +-inf */ 3742 if (float32_is_zero(f)) { 3743 s->float_exception_flags |= float_flag_divbyzero; 3744 return float32_set_sign(float32_infinity, sign); 3745 } 3746 3747 /* frec7(sNaN) = canonical NaN */ 3748 if (float32_is_signaling_nan(f, s)) { 3749 s->float_exception_flags |= float_flag_invalid; 3750 return float32_default_nan(s); 3751 } 3752 3753 /* frec7(qNaN) = canonical NaN */ 3754 if (float32_is_quiet_nan(f, s)) { 3755 return float32_default_nan(s); 3756 } 3757 3758 /* +-normal, +-subnormal */ 3759 uint64_t val = frec7(f, exp_size, frac_size, s); 3760 return make_float32(val); 3761 } 3762 3763 static float64 frec7_d(float64 f, float_status *s) 3764 { 3765 int exp_size = 11, frac_size = 52; 3766 bool sign = float64_is_neg(f); 3767 3768 /* frec7(+-inf) = +-0 */ 3769 if (float64_is_infinity(f)) { 3770 return float64_set_sign(float64_zero, sign); 3771 } 3772 3773 /* frec7(+-0) = +-inf */ 3774 if (float64_is_zero(f)) { 3775 s->float_exception_flags |= float_flag_divbyzero; 3776 return float64_set_sign(float64_infinity, sign); 3777 } 3778 3779 /* frec7(sNaN) = canonical NaN */ 3780 if (float64_is_signaling_nan(f, s)) { 3781 s->float_exception_flags |= float_flag_invalid; 3782 return float64_default_nan(s); 3783 } 3784 3785 /* frec7(qNaN) = canonical NaN */ 3786 if (float64_is_quiet_nan(f, s)) { 3787 return float64_default_nan(s); 3788 } 3789 3790 /* +-normal, +-subnormal */ 3791 uint64_t val = frec7(f, exp_size, frac_size, s); 3792 return make_float64(val); 3793 } 3794 3795 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3796 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3797 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3798 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) 3799 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) 3800 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) 3801 3802 /* Vector Floating-Point MIN/MAX Instructions */ 3803 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3804 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3805 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3806 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3807 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3808 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3809 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3810 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3811 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3812 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3813 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3814 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3815 3816 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3817 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3818 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3819 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3820 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3821 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3822 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3823 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3824 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3825 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3826 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3827 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3828 3829 /* Vector Floating-Point Sign-Injection Instructions */ 3830 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3831 { 3832 return deposit64(b, 0, 15, a); 3833 } 3834 3835 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3836 { 3837 return deposit64(b, 0, 31, a); 3838 } 3839 3840 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3841 { 3842 return deposit64(b, 0, 63, a); 3843 } 3844 3845 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3846 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3847 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3848 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3849 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3850 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3851 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3852 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3853 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3854 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3855 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3856 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3857 3858 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3859 { 3860 return deposit64(~b, 0, 15, a); 3861 } 3862 3863 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3864 { 3865 return deposit64(~b, 0, 31, a); 3866 } 3867 3868 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3869 { 3870 return deposit64(~b, 0, 63, a); 3871 } 3872 3873 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3874 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3875 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3876 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3877 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3878 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3879 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3880 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3881 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3882 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3883 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3884 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3885 3886 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3887 { 3888 return deposit64(b ^ a, 0, 15, a); 3889 } 3890 3891 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3892 { 3893 return deposit64(b ^ a, 0, 31, a); 3894 } 3895 3896 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3897 { 3898 return deposit64(b ^ a, 0, 63, a); 3899 } 3900 3901 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3902 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3903 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3904 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3905 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3906 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3907 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3908 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3909 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3910 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3911 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3912 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3913 3914 /* Vector Floating-Point Compare Instructions */ 3915 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3916 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3917 CPURISCVState *env, uint32_t desc) \ 3918 { \ 3919 uint32_t vm = vext_vm(desc); \ 3920 uint32_t vl = env->vl; \ 3921 uint32_t i; \ 3922 \ 3923 for (i = env->vstart; i < vl; i++) { \ 3924 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3925 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3926 if (!vm && !vext_elem_mask(v0, i)) { \ 3927 continue; \ 3928 } \ 3929 vext_set_elem_mask(vd, i, \ 3930 DO_OP(s2, s1, &env->fp_status)); \ 3931 } \ 3932 env->vstart = 0; \ 3933 } 3934 3935 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3936 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3937 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3938 3939 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3940 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3941 CPURISCVState *env, uint32_t desc) \ 3942 { \ 3943 uint32_t vm = vext_vm(desc); \ 3944 uint32_t vl = env->vl; \ 3945 uint32_t i; \ 3946 \ 3947 for (i = env->vstart; i < vl; i++) { \ 3948 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3949 if (!vm && !vext_elem_mask(v0, i)) { \ 3950 continue; \ 3951 } \ 3952 vext_set_elem_mask(vd, i, \ 3953 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3954 } \ 3955 env->vstart = 0; \ 3956 } 3957 3958 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3959 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3960 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3961 3962 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3963 { 3964 FloatRelation compare = float16_compare_quiet(a, b, s); 3965 return compare != float_relation_equal; 3966 } 3967 3968 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3969 { 3970 FloatRelation compare = float32_compare_quiet(a, b, s); 3971 return compare != float_relation_equal; 3972 } 3973 3974 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3975 { 3976 FloatRelation compare = float64_compare_quiet(a, b, s); 3977 return compare != float_relation_equal; 3978 } 3979 3980 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3981 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3982 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3983 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3984 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3985 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3986 3987 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3988 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3989 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3990 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3991 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3992 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3993 3994 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3995 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3996 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3997 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3998 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3999 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4000 4001 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4002 { 4003 FloatRelation compare = float16_compare(a, b, s); 4004 return compare == float_relation_greater; 4005 } 4006 4007 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4008 { 4009 FloatRelation compare = float32_compare(a, b, s); 4010 return compare == float_relation_greater; 4011 } 4012 4013 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4014 { 4015 FloatRelation compare = float64_compare(a, b, s); 4016 return compare == float_relation_greater; 4017 } 4018 4019 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4020 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4021 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4022 4023 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4024 { 4025 FloatRelation compare = float16_compare(a, b, s); 4026 return compare == float_relation_greater || 4027 compare == float_relation_equal; 4028 } 4029 4030 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4031 { 4032 FloatRelation compare = float32_compare(a, b, s); 4033 return compare == float_relation_greater || 4034 compare == float_relation_equal; 4035 } 4036 4037 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4038 { 4039 FloatRelation compare = float64_compare(a, b, s); 4040 return compare == float_relation_greater || 4041 compare == float_relation_equal; 4042 } 4043 4044 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4045 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4046 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4047 4048 /* Vector Floating-Point Classify Instruction */ 4049 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4050 static void do_##NAME(void *vd, void *vs2, int i) \ 4051 { \ 4052 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4053 *((TD *)vd + HD(i)) = OP(s2); \ 4054 } 4055 4056 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 4057 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4058 CPURISCVState *env, uint32_t desc) \ 4059 { \ 4060 uint32_t vm = vext_vm(desc); \ 4061 uint32_t vl = env->vl; \ 4062 uint32_t i; \ 4063 \ 4064 for (i = env->vstart; i < vl; i++) { \ 4065 if (!vm && !vext_elem_mask(v0, i)) { \ 4066 continue; \ 4067 } \ 4068 do_##NAME(vd, vs2, i); \ 4069 } \ 4070 env->vstart = 0; \ 4071 } 4072 4073 target_ulong fclass_h(uint64_t frs1) 4074 { 4075 float16 f = frs1; 4076 bool sign = float16_is_neg(f); 4077 4078 if (float16_is_infinity(f)) { 4079 return sign ? 1 << 0 : 1 << 7; 4080 } else if (float16_is_zero(f)) { 4081 return sign ? 1 << 3 : 1 << 4; 4082 } else if (float16_is_zero_or_denormal(f)) { 4083 return sign ? 1 << 2 : 1 << 5; 4084 } else if (float16_is_any_nan(f)) { 4085 float_status s = { }; /* for snan_bit_is_one */ 4086 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4087 } else { 4088 return sign ? 1 << 1 : 1 << 6; 4089 } 4090 } 4091 4092 target_ulong fclass_s(uint64_t frs1) 4093 { 4094 float32 f = frs1; 4095 bool sign = float32_is_neg(f); 4096 4097 if (float32_is_infinity(f)) { 4098 return sign ? 1 << 0 : 1 << 7; 4099 } else if (float32_is_zero(f)) { 4100 return sign ? 1 << 3 : 1 << 4; 4101 } else if (float32_is_zero_or_denormal(f)) { 4102 return sign ? 1 << 2 : 1 << 5; 4103 } else if (float32_is_any_nan(f)) { 4104 float_status s = { }; /* for snan_bit_is_one */ 4105 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4106 } else { 4107 return sign ? 1 << 1 : 1 << 6; 4108 } 4109 } 4110 4111 target_ulong fclass_d(uint64_t frs1) 4112 { 4113 float64 f = frs1; 4114 bool sign = float64_is_neg(f); 4115 4116 if (float64_is_infinity(f)) { 4117 return sign ? 1 << 0 : 1 << 7; 4118 } else if (float64_is_zero(f)) { 4119 return sign ? 1 << 3 : 1 << 4; 4120 } else if (float64_is_zero_or_denormal(f)) { 4121 return sign ? 1 << 2 : 1 << 5; 4122 } else if (float64_is_any_nan(f)) { 4123 float_status s = { }; /* for snan_bit_is_one */ 4124 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4125 } else { 4126 return sign ? 1 << 1 : 1 << 6; 4127 } 4128 } 4129 4130 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4131 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4132 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4133 GEN_VEXT_V(vfclass_v_h, 2, 2) 4134 GEN_VEXT_V(vfclass_v_w, 4, 4) 4135 GEN_VEXT_V(vfclass_v_d, 8, 8) 4136 4137 /* Vector Floating-Point Merge Instruction */ 4138 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4139 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4140 CPURISCVState *env, uint32_t desc) \ 4141 { \ 4142 uint32_t vm = vext_vm(desc); \ 4143 uint32_t vl = env->vl; \ 4144 uint32_t i; \ 4145 \ 4146 for (i = env->vstart; i < vl; i++) { \ 4147 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4148 *((ETYPE *)vd + H(i)) \ 4149 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4150 } \ 4151 env->vstart = 0; \ 4152 } 4153 4154 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4155 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4156 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4157 4158 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4159 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4160 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4161 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4162 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4163 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 4164 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 4165 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 4166 4167 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4168 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4169 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4170 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4171 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 4172 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 4173 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 4174 4175 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4176 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4177 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4178 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4179 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 4180 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 4181 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 4182 4183 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4184 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4185 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4186 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4187 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4188 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4189 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4190 4191 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4192 /* (TD, T2, TX2) */ 4193 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4194 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4195 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4196 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4197 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4198 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4199 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4200 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4201 4202 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4203 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4204 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4205 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4206 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4207 4208 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4209 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4210 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4211 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4212 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 4213 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4214 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4215 4216 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4217 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4218 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4219 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4220 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 4221 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4222 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4223 4224 /* 4225 * vfwcvt.f.f.v vd, vs2, vm 4226 * Convert single-width float to double-width float. 4227 */ 4228 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4229 { 4230 return float16_to_float32(a, true, s); 4231 } 4232 4233 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4234 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4235 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4236 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4237 4238 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4239 /* (TD, T2, TX2) */ 4240 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4241 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4242 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4243 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4244 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4245 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4246 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4247 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 4248 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 4249 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 4250 4251 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4252 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4253 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4254 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4255 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 4256 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 4257 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 4258 4259 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4260 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4261 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4262 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 4263 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 4264 4265 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4266 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4267 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4268 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 4269 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 4270 4271 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4272 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4273 { 4274 return float32_to_float16(a, true, s); 4275 } 4276 4277 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4278 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4279 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 4280 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 4281 4282 /* 4283 *** Vector Reduction Operations 4284 */ 4285 /* Vector Single-Width Integer Reduction Instructions */ 4286 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4287 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4288 void *vs2, CPURISCVState *env, uint32_t desc) \ 4289 { \ 4290 uint32_t vm = vext_vm(desc); \ 4291 uint32_t vl = env->vl; \ 4292 uint32_t i; \ 4293 TD s1 = *((TD *)vs1 + HD(0)); \ 4294 \ 4295 for (i = env->vstart; i < vl; i++) { \ 4296 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4297 if (!vm && !vext_elem_mask(v0, i)) { \ 4298 continue; \ 4299 } \ 4300 s1 = OP(s1, (TD)s2); \ 4301 } \ 4302 *((TD *)vd + HD(0)) = s1; \ 4303 env->vstart = 0; \ 4304 } 4305 4306 /* vd[0] = sum(vs1[0], vs2[*]) */ 4307 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4308 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4309 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4310 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4311 4312 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4313 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4314 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4315 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4316 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4317 4318 /* vd[0] = max(vs1[0], vs2[*]) */ 4319 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4320 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4321 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4322 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4323 4324 /* vd[0] = minu(vs1[0], vs2[*]) */ 4325 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4326 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4327 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4328 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4329 4330 /* vd[0] = min(vs1[0], vs2[*]) */ 4331 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4332 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4333 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4334 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4335 4336 /* vd[0] = and(vs1[0], vs2[*]) */ 4337 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4338 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4339 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4340 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4341 4342 /* vd[0] = or(vs1[0], vs2[*]) */ 4343 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4344 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4345 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4346 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4347 4348 /* vd[0] = xor(vs1[0], vs2[*]) */ 4349 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4350 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4351 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4352 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4353 4354 /* Vector Widening Integer Reduction Instructions */ 4355 /* signed sum reduction into double-width accumulator */ 4356 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4357 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4358 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4359 4360 /* Unsigned sum reduction into double-width accumulator */ 4361 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4362 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4363 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4364 4365 /* Vector Single-Width Floating-Point Reduction Instructions */ 4366 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4367 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4368 void *vs2, CPURISCVState *env, \ 4369 uint32_t desc) \ 4370 { \ 4371 uint32_t vm = vext_vm(desc); \ 4372 uint32_t vl = env->vl; \ 4373 uint32_t i; \ 4374 TD s1 = *((TD *)vs1 + HD(0)); \ 4375 \ 4376 for (i = env->vstart; i < vl; i++) { \ 4377 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4378 if (!vm && !vext_elem_mask(v0, i)) { \ 4379 continue; \ 4380 } \ 4381 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4382 } \ 4383 *((TD *)vd + HD(0)) = s1; \ 4384 env->vstart = 0; \ 4385 } 4386 4387 /* Unordered sum */ 4388 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4389 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4390 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4391 4392 /* Maximum value */ 4393 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4394 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4395 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4396 4397 /* Minimum value */ 4398 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4399 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4400 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4401 4402 /* Vector Widening Floating-Point Reduction Instructions */ 4403 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4404 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4405 void *vs2, CPURISCVState *env, uint32_t desc) 4406 { 4407 uint32_t vm = vext_vm(desc); 4408 uint32_t vl = env->vl; 4409 uint32_t i; 4410 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4411 4412 for (i = env->vstart; i < vl; i++) { 4413 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4414 if (!vm && !vext_elem_mask(v0, i)) { 4415 continue; 4416 } 4417 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4418 &env->fp_status); 4419 } 4420 *((uint32_t *)vd + H4(0)) = s1; 4421 env->vstart = 0; 4422 } 4423 4424 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4425 void *vs2, CPURISCVState *env, uint32_t desc) 4426 { 4427 uint32_t vm = vext_vm(desc); 4428 uint32_t vl = env->vl; 4429 uint32_t i; 4430 uint64_t s1 = *((uint64_t *)vs1); 4431 4432 for (i = env->vstart; i < vl; i++) { 4433 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4434 if (!vm && !vext_elem_mask(v0, i)) { 4435 continue; 4436 } 4437 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4438 &env->fp_status); 4439 } 4440 *((uint64_t *)vd) = s1; 4441 env->vstart = 0; 4442 } 4443 4444 /* 4445 *** Vector Mask Operations 4446 */ 4447 /* Vector Mask-Register Logical Instructions */ 4448 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4449 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4450 void *vs2, CPURISCVState *env, \ 4451 uint32_t desc) \ 4452 { \ 4453 uint32_t vl = env->vl; \ 4454 uint32_t i; \ 4455 int a, b; \ 4456 \ 4457 for (i = env->vstart; i < vl; i++) { \ 4458 a = vext_elem_mask(vs1, i); \ 4459 b = vext_elem_mask(vs2, i); \ 4460 vext_set_elem_mask(vd, i, OP(b, a)); \ 4461 } \ 4462 env->vstart = 0; \ 4463 } 4464 4465 #define DO_NAND(N, M) (!(N & M)) 4466 #define DO_ANDNOT(N, M) (N & !M) 4467 #define DO_NOR(N, M) (!(N | M)) 4468 #define DO_ORNOT(N, M) (N | !M) 4469 #define DO_XNOR(N, M) (!(N ^ M)) 4470 4471 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4472 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4473 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4474 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4475 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4476 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4477 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4478 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4479 4480 /* Vector count population in mask vcpop */ 4481 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4482 uint32_t desc) 4483 { 4484 target_ulong cnt = 0; 4485 uint32_t vm = vext_vm(desc); 4486 uint32_t vl = env->vl; 4487 int i; 4488 4489 for (i = env->vstart; i < vl; i++) { 4490 if (vm || vext_elem_mask(v0, i)) { 4491 if (vext_elem_mask(vs2, i)) { 4492 cnt++; 4493 } 4494 } 4495 } 4496 env->vstart = 0; 4497 return cnt; 4498 } 4499 4500 /* vfirst find-first-set mask bit*/ 4501 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4502 uint32_t desc) 4503 { 4504 uint32_t vm = vext_vm(desc); 4505 uint32_t vl = env->vl; 4506 int i; 4507 4508 for (i = env->vstart; i < vl; i++) { 4509 if (vm || vext_elem_mask(v0, i)) { 4510 if (vext_elem_mask(vs2, i)) { 4511 return i; 4512 } 4513 } 4514 } 4515 env->vstart = 0; 4516 return -1LL; 4517 } 4518 4519 enum set_mask_type { 4520 ONLY_FIRST = 1, 4521 INCLUDE_FIRST, 4522 BEFORE_FIRST, 4523 }; 4524 4525 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4526 uint32_t desc, enum set_mask_type type) 4527 { 4528 uint32_t vm = vext_vm(desc); 4529 uint32_t vl = env->vl; 4530 int i; 4531 bool first_mask_bit = false; 4532 4533 for (i = env->vstart; i < vl; i++) { 4534 if (!vm && !vext_elem_mask(v0, i)) { 4535 continue; 4536 } 4537 /* write a zero to all following active elements */ 4538 if (first_mask_bit) { 4539 vext_set_elem_mask(vd, i, 0); 4540 continue; 4541 } 4542 if (vext_elem_mask(vs2, i)) { 4543 first_mask_bit = true; 4544 if (type == BEFORE_FIRST) { 4545 vext_set_elem_mask(vd, i, 0); 4546 } else { 4547 vext_set_elem_mask(vd, i, 1); 4548 } 4549 } else { 4550 if (type == ONLY_FIRST) { 4551 vext_set_elem_mask(vd, i, 0); 4552 } else { 4553 vext_set_elem_mask(vd, i, 1); 4554 } 4555 } 4556 } 4557 env->vstart = 0; 4558 } 4559 4560 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4561 uint32_t desc) 4562 { 4563 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4564 } 4565 4566 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4567 uint32_t desc) 4568 { 4569 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4570 } 4571 4572 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4573 uint32_t desc) 4574 { 4575 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4576 } 4577 4578 /* Vector Iota Instruction */ 4579 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4580 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4581 uint32_t desc) \ 4582 { \ 4583 uint32_t vm = vext_vm(desc); \ 4584 uint32_t vl = env->vl; \ 4585 uint32_t sum = 0; \ 4586 int i; \ 4587 \ 4588 for (i = env->vstart; i < vl; i++) { \ 4589 if (!vm && !vext_elem_mask(v0, i)) { \ 4590 continue; \ 4591 } \ 4592 *((ETYPE *)vd + H(i)) = sum; \ 4593 if (vext_elem_mask(vs2, i)) { \ 4594 sum++; \ 4595 } \ 4596 } \ 4597 env->vstart = 0; \ 4598 } 4599 4600 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4601 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4602 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4603 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4604 4605 /* Vector Element Index Instruction */ 4606 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4607 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4608 { \ 4609 uint32_t vm = vext_vm(desc); \ 4610 uint32_t vl = env->vl; \ 4611 int i; \ 4612 \ 4613 for (i = env->vstart; i < vl; i++) { \ 4614 if (!vm && !vext_elem_mask(v0, i)) { \ 4615 continue; \ 4616 } \ 4617 *((ETYPE *)vd + H(i)) = i; \ 4618 } \ 4619 env->vstart = 0; \ 4620 } 4621 4622 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4623 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4624 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4625 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4626 4627 /* 4628 *** Vector Permutation Instructions 4629 */ 4630 4631 /* Vector Slide Instructions */ 4632 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4633 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4634 CPURISCVState *env, uint32_t desc) \ 4635 { \ 4636 uint32_t vm = vext_vm(desc); \ 4637 uint32_t vl = env->vl; \ 4638 target_ulong offset = s1, i_min, i; \ 4639 \ 4640 i_min = MAX(env->vstart, offset); \ 4641 for (i = i_min; i < vl; i++) { \ 4642 if (!vm && !vext_elem_mask(v0, i)) { \ 4643 continue; \ 4644 } \ 4645 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4646 } \ 4647 } 4648 4649 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4650 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4651 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4652 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4653 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4654 4655 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4656 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4657 CPURISCVState *env, uint32_t desc) \ 4658 { \ 4659 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4660 uint32_t vm = vext_vm(desc); \ 4661 uint32_t vl = env->vl; \ 4662 target_ulong i_max, i; \ 4663 \ 4664 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4665 for (i = env->vstart; i < i_max; ++i) { \ 4666 if (vm || vext_elem_mask(v0, i)) { \ 4667 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4668 } \ 4669 } \ 4670 \ 4671 for (i = i_max; i < vl; ++i) { \ 4672 if (vm || vext_elem_mask(v0, i)) { \ 4673 *((ETYPE *)vd + H(i)) = 0; \ 4674 } \ 4675 } \ 4676 \ 4677 env->vstart = 0; \ 4678 } 4679 4680 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4681 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4682 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4683 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4684 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4685 4686 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4687 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4688 CPURISCVState *env, uint32_t desc) \ 4689 { \ 4690 typedef uint##ESZ##_t ETYPE; \ 4691 uint32_t vm = vext_vm(desc); \ 4692 uint32_t vl = env->vl; \ 4693 uint32_t i; \ 4694 \ 4695 for (i = env->vstart; i < vl; i++) { \ 4696 if (!vm && !vext_elem_mask(v0, i)) { \ 4697 continue; \ 4698 } \ 4699 if (i == 0) { \ 4700 *((ETYPE *)vd + H(i)) = s1; \ 4701 } else { \ 4702 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4703 } \ 4704 } \ 4705 env->vstart = 0; \ 4706 } 4707 4708 GEN_VEXT_VSLIE1UP(8, H1) 4709 GEN_VEXT_VSLIE1UP(16, H2) 4710 GEN_VEXT_VSLIE1UP(32, H4) 4711 GEN_VEXT_VSLIE1UP(64, H8) 4712 4713 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4714 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4715 CPURISCVState *env, uint32_t desc) \ 4716 { \ 4717 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4718 } 4719 4720 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4721 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4722 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4723 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4724 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4725 4726 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4727 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4728 CPURISCVState *env, uint32_t desc) \ 4729 { \ 4730 typedef uint##ESZ##_t ETYPE; \ 4731 uint32_t vm = vext_vm(desc); \ 4732 uint32_t vl = env->vl; \ 4733 uint32_t i; \ 4734 \ 4735 for (i = env->vstart; i < vl; i++) { \ 4736 if (!vm && !vext_elem_mask(v0, i)) { \ 4737 continue; \ 4738 } \ 4739 if (i == vl - 1) { \ 4740 *((ETYPE *)vd + H(i)) = s1; \ 4741 } else { \ 4742 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4743 } \ 4744 } \ 4745 env->vstart = 0; \ 4746 } 4747 4748 GEN_VEXT_VSLIDE1DOWN(8, H1) 4749 GEN_VEXT_VSLIDE1DOWN(16, H2) 4750 GEN_VEXT_VSLIDE1DOWN(32, H4) 4751 GEN_VEXT_VSLIDE1DOWN(64, H8) 4752 4753 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4754 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4755 CPURISCVState *env, uint32_t desc) \ 4756 { \ 4757 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4758 } 4759 4760 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4761 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4762 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4763 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4764 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4765 4766 /* Vector Floating-Point Slide Instructions */ 4767 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4768 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4769 CPURISCVState *env, uint32_t desc) \ 4770 { \ 4771 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4772 } 4773 4774 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4775 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4776 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4777 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4778 4779 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4780 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4781 CPURISCVState *env, uint32_t desc) \ 4782 { \ 4783 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4784 } 4785 4786 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4787 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4788 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4789 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4790 4791 /* Vector Register Gather Instruction */ 4792 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4793 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4794 CPURISCVState *env, uint32_t desc) \ 4795 { \ 4796 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4797 uint32_t vm = vext_vm(desc); \ 4798 uint32_t vl = env->vl; \ 4799 uint64_t index; \ 4800 uint32_t i; \ 4801 \ 4802 for (i = env->vstart; i < vl; i++) { \ 4803 if (!vm && !vext_elem_mask(v0, i)) { \ 4804 continue; \ 4805 } \ 4806 index = *((TS1 *)vs1 + HS1(i)); \ 4807 if (index >= vlmax) { \ 4808 *((TS2 *)vd + HS2(i)) = 0; \ 4809 } else { \ 4810 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4811 } \ 4812 } \ 4813 env->vstart = 0; \ 4814 } 4815 4816 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4817 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4818 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4819 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4820 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4821 4822 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4823 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4824 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4825 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4826 4827 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4828 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4829 CPURISCVState *env, uint32_t desc) \ 4830 { \ 4831 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4832 uint32_t vm = vext_vm(desc); \ 4833 uint32_t vl = env->vl; \ 4834 uint64_t index = s1; \ 4835 uint32_t i; \ 4836 \ 4837 for (i = env->vstart; i < vl; i++) { \ 4838 if (!vm && !vext_elem_mask(v0, i)) { \ 4839 continue; \ 4840 } \ 4841 if (index >= vlmax) { \ 4842 *((ETYPE *)vd + H(i)) = 0; \ 4843 } else { \ 4844 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4845 } \ 4846 } \ 4847 env->vstart = 0; \ 4848 } 4849 4850 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4851 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4852 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4853 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4854 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4855 4856 /* Vector Compress Instruction */ 4857 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4858 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4859 CPURISCVState *env, uint32_t desc) \ 4860 { \ 4861 uint32_t vl = env->vl; \ 4862 uint32_t num = 0, i; \ 4863 \ 4864 for (i = env->vstart; i < vl; i++) { \ 4865 if (!vext_elem_mask(vs1, i)) { \ 4866 continue; \ 4867 } \ 4868 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4869 num++; \ 4870 } \ 4871 env->vstart = 0; \ 4872 } 4873 4874 /* Compress into vd elements of vs2 where vs1 is enabled */ 4875 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4876 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4877 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4878 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4879 4880 /* Vector Whole Register Move */ 4881 #define GEN_VEXT_VMV_WHOLE(NAME, LEN) \ 4882 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ 4883 uint32_t desc) \ 4884 { \ 4885 /* EEW = 8 */ \ 4886 uint32_t maxsz = simd_maxsz(desc); \ 4887 uint32_t i = env->vstart; \ 4888 \ 4889 memcpy((uint8_t *)vd + H1(i), \ 4890 (uint8_t *)vs2 + H1(i), \ 4891 maxsz - env->vstart); \ 4892 \ 4893 env->vstart = 0; \ 4894 } 4895 4896 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1) 4897 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2) 4898 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4) 4899 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8) 4900 4901 /* Vector Integer Extension */ 4902 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4903 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4904 CPURISCVState *env, uint32_t desc) \ 4905 { \ 4906 uint32_t vl = env->vl; \ 4907 uint32_t vm = vext_vm(desc); \ 4908 uint32_t i; \ 4909 \ 4910 for (i = env->vstart; i < vl; i++) { \ 4911 if (!vm && !vext_elem_mask(v0, i)) { \ 4912 continue; \ 4913 } \ 4914 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4915 } \ 4916 env->vstart = 0; \ 4917 } 4918 4919 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4920 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4921 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4922 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4923 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4924 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4925 4926 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4927 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4928 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4929 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4930 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4931 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4932