1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "tcg/tcg-gvec-desc.h" 28 #include "internals.h" 29 #include <math.h> 30 31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 32 target_ulong s2) 33 { 34 int vlmax, vl; 35 RISCVCPU *cpu = env_archcpu(env); 36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 39 bool vill = FIELD_EX64(s2, VTYPE, VILL); 40 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 41 42 if (lmul & 4) { 43 /* Fractional LMUL. */ 44 if (lmul == 4 || 45 cpu->cfg.elen >> (8 - lmul) < sew) { 46 vill = true; 47 } 48 } 49 50 if ((sew > cpu->cfg.elen) 51 || vill 52 || (ediv != 0) 53 || (reserved != 0)) { 54 /* only set vill bit. */ 55 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 56 env->vl = 0; 57 env->vstart = 0; 58 return 0; 59 } 60 61 vlmax = vext_get_vlmax(cpu, s2); 62 if (s1 <= vlmax) { 63 vl = s1; 64 } else { 65 vl = vlmax; 66 } 67 env->vl = vl; 68 env->vtype = s2; 69 env->vstart = 0; 70 return vl; 71 } 72 73 /* 74 * Note that vector data is stored in host-endian 64-bit chunks, 75 * so addressing units smaller than that needs a host-endian fixup. 76 */ 77 #ifdef HOST_WORDS_BIGENDIAN 78 #define H1(x) ((x) ^ 7) 79 #define H1_2(x) ((x) ^ 6) 80 #define H1_4(x) ((x) ^ 4) 81 #define H2(x) ((x) ^ 3) 82 #define H4(x) ((x) ^ 1) 83 #define H8(x) ((x)) 84 #else 85 #define H1(x) (x) 86 #define H1_2(x) (x) 87 #define H1_4(x) (x) 88 #define H2(x) (x) 89 #define H4(x) (x) 90 #define H8(x) (x) 91 #endif 92 93 static inline uint32_t vext_nf(uint32_t desc) 94 { 95 return FIELD_EX32(simd_data(desc), VDATA, NF); 96 } 97 98 static inline uint32_t vext_vm(uint32_t desc) 99 { 100 return FIELD_EX32(simd_data(desc), VDATA, VM); 101 } 102 103 /* 104 * Encode LMUL to lmul as following: 105 * LMUL vlmul lmul 106 * 1 000 0 107 * 2 001 1 108 * 4 010 2 109 * 8 011 3 110 * - 100 - 111 * 1/8 101 -3 112 * 1/4 110 -2 113 * 1/2 111 -1 114 */ 115 static inline int32_t vext_lmul(uint32_t desc) 116 { 117 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 118 } 119 120 /* 121 * Get the maximum number of elements can be operated. 122 * 123 * esz: log2 of element size in bytes. 124 */ 125 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t esz) 126 { 127 /* 128 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 129 * so vlen in bytes (vlenb) is encoded as maxsz. 130 */ 131 uint32_t vlenb = simd_maxsz(desc); 132 133 /* Return VLMAX */ 134 int scale = vext_lmul(desc) - esz; 135 return scale < 0 ? vlenb >> -scale : vlenb << scale; 136 } 137 138 /* 139 * This function checks watchpoint before real load operation. 140 * 141 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 142 * In user mode, there is no watchpoint support now. 143 * 144 * It will trigger an exception if there is no mapping in TLB 145 * and page table walk can't fill the TLB entry. Then the guest 146 * software can return here after process the exception or never return. 147 */ 148 static void probe_pages(CPURISCVState *env, target_ulong addr, 149 target_ulong len, uintptr_t ra, 150 MMUAccessType access_type) 151 { 152 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 153 target_ulong curlen = MIN(pagelen, len); 154 155 probe_access(env, addr, curlen, access_type, 156 cpu_mmu_index(env, false), ra); 157 if (len > curlen) { 158 addr += curlen; 159 curlen = len - curlen; 160 probe_access(env, addr, curlen, access_type, 161 cpu_mmu_index(env, false), ra); 162 } 163 } 164 165 static inline void vext_set_elem_mask(void *v0, int index, 166 uint8_t value) 167 { 168 int idx = index / 64; 169 int pos = index % 64; 170 uint64_t old = ((uint64_t *)v0)[idx]; 171 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 172 } 173 174 /* 175 * Earlier designs (pre-0.9) had a varying number of bits 176 * per mask value (MLEN). In the 0.9 design, MLEN=1. 177 * (Section 4.5) 178 */ 179 static inline int vext_elem_mask(void *v0, int index) 180 { 181 int idx = index / 64; 182 int pos = index % 64; 183 return (((uint64_t *)v0)[idx] >> pos) & 1; 184 } 185 186 /* elements operations for load and store */ 187 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 188 uint32_t idx, void *vd, uintptr_t retaddr); 189 190 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 191 static void NAME(CPURISCVState *env, abi_ptr addr, \ 192 uint32_t idx, void *vd, uintptr_t retaddr)\ 193 { \ 194 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 195 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 196 } \ 197 198 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 202 203 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 204 static void NAME(CPURISCVState *env, abi_ptr addr, \ 205 uint32_t idx, void *vd, uintptr_t retaddr)\ 206 { \ 207 ETYPE data = *((ETYPE *)vd + H(idx)); \ 208 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 209 } 210 211 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 212 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 213 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 214 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 215 216 /* 217 *** stride: access vector element from strided memory 218 */ 219 static void 220 vext_ldst_stride(void *vd, void *v0, target_ulong base, 221 target_ulong stride, CPURISCVState *env, 222 uint32_t desc, uint32_t vm, 223 vext_ldst_elem_fn *ldst_elem, 224 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 225 { 226 uint32_t i, k; 227 uint32_t nf = vext_nf(desc); 228 uint32_t max_elems = vext_max_elems(desc, esz); 229 230 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 231 if (!vm && !vext_elem_mask(v0, i)) { 232 continue; 233 } 234 235 k = 0; 236 while (k < nf) { 237 target_ulong addr = base + stride * i + (k << esz); 238 ldst_elem(env, addr, i + k * max_elems, vd, ra); 239 k++; 240 } 241 } 242 env->vstart = 0; 243 } 244 245 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 246 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 247 target_ulong stride, CPURISCVState *env, \ 248 uint32_t desc) \ 249 { \ 250 uint32_t vm = vext_vm(desc); \ 251 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 252 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 253 } 254 255 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 256 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 257 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 258 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 259 260 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 261 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 262 target_ulong stride, CPURISCVState *env, \ 263 uint32_t desc) \ 264 { \ 265 uint32_t vm = vext_vm(desc); \ 266 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 267 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 268 } 269 270 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 271 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 272 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 273 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 274 275 /* 276 *** unit-stride: access elements stored contiguously in memory 277 */ 278 279 /* unmasked unit-stride load and store operation*/ 280 static void 281 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 282 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t evl, 283 uintptr_t ra, MMUAccessType access_type) 284 { 285 uint32_t i, k; 286 uint32_t nf = vext_nf(desc); 287 uint32_t max_elems = vext_max_elems(desc, esz); 288 289 /* load bytes from guest memory */ 290 for (i = env->vstart; i < evl; i++, env->vstart++) { 291 k = 0; 292 while (k < nf) { 293 target_ulong addr = base + ((i * nf + k) << esz); 294 ldst_elem(env, addr, i + k * max_elems, vd, ra); 295 k++; 296 } 297 } 298 env->vstart = 0; 299 } 300 301 /* 302 * masked unit-stride load and store operation will be a special case of stride, 303 * stride = NF * sizeof (MTYPE) 304 */ 305 306 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 307 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 308 CPURISCVState *env, uint32_t desc) \ 309 { \ 310 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 311 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 312 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 313 } \ 314 \ 315 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 316 CPURISCVState *env, uint32_t desc) \ 317 { \ 318 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 319 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_LOAD); \ 320 } 321 322 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 323 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 324 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 325 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 326 327 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 328 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 329 CPURISCVState *env, uint32_t desc) \ 330 { \ 331 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 332 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 333 ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_STORE); \ 334 } \ 335 \ 336 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 337 CPURISCVState *env, uint32_t desc) \ 338 { \ 339 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 340 ctzl(sizeof(ETYPE)), env->vl, GETPC(), MMU_DATA_STORE); \ 341 } 342 343 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 344 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 345 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 346 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 347 348 /* 349 *** index: access vector element from indexed memory 350 */ 351 typedef target_ulong vext_get_index_addr(target_ulong base, 352 uint32_t idx, void *vs2); 353 354 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 355 static target_ulong NAME(target_ulong base, \ 356 uint32_t idx, void *vs2) \ 357 { \ 358 return (base + *((ETYPE *)vs2 + H(idx))); \ 359 } 360 361 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 362 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 363 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 364 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 365 366 static inline void 367 vext_ldst_index(void *vd, void *v0, target_ulong base, 368 void *vs2, CPURISCVState *env, uint32_t desc, 369 vext_get_index_addr get_index_addr, 370 vext_ldst_elem_fn *ldst_elem, 371 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 372 { 373 uint32_t i, k; 374 uint32_t nf = vext_nf(desc); 375 uint32_t vm = vext_vm(desc); 376 uint32_t max_elems = vext_max_elems(desc, esz); 377 378 /* load bytes from guest memory */ 379 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 380 if (!vm && !vext_elem_mask(v0, i)) { 381 continue; 382 } 383 384 k = 0; 385 while (k < nf) { 386 abi_ptr addr = get_index_addr(base, i, vs2) + (k << esz); 387 ldst_elem(env, addr, i + k * max_elems, vd, ra); 388 k++; 389 } 390 } 391 env->vstart = 0; 392 } 393 394 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 395 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 396 void *vs2, CPURISCVState *env, uint32_t desc) \ 397 { \ 398 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 399 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC(), MMU_DATA_LOAD); \ 400 } 401 402 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 403 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 404 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 405 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 406 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 407 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 408 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 409 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 410 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 411 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 412 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 413 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 414 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 415 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 416 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 417 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 418 419 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 420 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 421 void *vs2, CPURISCVState *env, uint32_t desc) \ 422 { \ 423 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 424 STORE_FN, ctzl(sizeof(ETYPE)), \ 425 GETPC(), MMU_DATA_STORE); \ 426 } 427 428 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 429 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 430 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 431 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 432 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 433 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 434 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 435 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 436 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 437 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 438 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 439 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 440 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 441 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 442 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 443 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 444 445 /* 446 *** unit-stride fault-only-fisrt load instructions 447 */ 448 static inline void 449 vext_ldff(void *vd, void *v0, target_ulong base, 450 CPURISCVState *env, uint32_t desc, 451 vext_ldst_elem_fn *ldst_elem, 452 uint32_t esz, uintptr_t ra) 453 { 454 void *host; 455 uint32_t i, k, vl = 0; 456 uint32_t nf = vext_nf(desc); 457 uint32_t vm = vext_vm(desc); 458 uint32_t max_elems = vext_max_elems(desc, esz); 459 target_ulong addr, offset, remain; 460 461 /* probe every access*/ 462 for (i = env->vstart; i < env->vl; i++) { 463 if (!vm && !vext_elem_mask(v0, i)) { 464 continue; 465 } 466 addr = base + i * (nf << esz); 467 if (i == 0) { 468 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 469 } else { 470 /* if it triggers an exception, no need to check watchpoint */ 471 remain = nf << esz; 472 while (remain > 0) { 473 offset = -(addr | TARGET_PAGE_MASK); 474 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 475 cpu_mmu_index(env, false)); 476 if (host) { 477 #ifdef CONFIG_USER_ONLY 478 if (page_check_range(addr, nf << esz, PAGE_READ) < 0) { 479 vl = i; 480 goto ProbeSuccess; 481 } 482 #else 483 probe_pages(env, addr, nf << esz, ra, MMU_DATA_LOAD); 484 #endif 485 } else { 486 vl = i; 487 goto ProbeSuccess; 488 } 489 if (remain <= offset) { 490 break; 491 } 492 remain -= offset; 493 addr += offset; 494 } 495 } 496 } 497 ProbeSuccess: 498 /* load bytes from guest memory */ 499 if (vl != 0) { 500 env->vl = vl; 501 } 502 for (i = env->vstart; i < env->vl; i++) { 503 k = 0; 504 if (!vm && !vext_elem_mask(v0, i)) { 505 continue; 506 } 507 while (k < nf) { 508 target_ulong addr = base + ((i * nf + k) << esz); 509 ldst_elem(env, addr, i + k * max_elems, vd, ra); 510 k++; 511 } 512 } 513 env->vstart = 0; 514 } 515 516 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 517 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 518 CPURISCVState *env, uint32_t desc) \ 519 { \ 520 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 521 ctzl(sizeof(ETYPE)), GETPC()); \ 522 } 523 524 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 525 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 526 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 527 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 528 529 #define DO_SWAP(N, M) (M) 530 #define DO_AND(N, M) (N & M) 531 #define DO_XOR(N, M) (N ^ M) 532 #define DO_OR(N, M) (N | M) 533 #define DO_ADD(N, M) (N + M) 534 535 /* Signed min/max */ 536 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 537 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 538 539 /* Unsigned min/max */ 540 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 541 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 542 543 /* 544 *** load and store whole register instructions 545 */ 546 static void 547 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 548 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uintptr_t ra, 549 MMUAccessType access_type) 550 { 551 uint32_t i, k, off, pos; 552 uint32_t nf = vext_nf(desc); 553 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3; 554 uint32_t max_elems = vlenb >> esz; 555 556 k = env->vstart / max_elems; 557 off = env->vstart % max_elems; 558 559 if (off) { 560 /* load/store rest of elements of current segment pointed by vstart */ 561 for (pos = off; pos < max_elems; pos++, env->vstart++) { 562 target_ulong addr = base + ((pos + k * max_elems) << esz); 563 ldst_elem(env, addr, pos + k * max_elems, vd, ra); 564 } 565 k++; 566 } 567 568 /* load/store elements for rest of segments */ 569 for (; k < nf; k++) { 570 for (i = 0; i < max_elems; i++, env->vstart++) { 571 target_ulong addr = base + ((i + k * max_elems) << esz); 572 ldst_elem(env, addr, i + k * max_elems, vd, ra); 573 } 574 } 575 576 env->vstart = 0; 577 } 578 579 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 580 void HELPER(NAME)(void *vd, target_ulong base, \ 581 CPURISCVState *env, uint32_t desc) \ 582 { \ 583 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 584 ctzl(sizeof(ETYPE)), GETPC(), \ 585 MMU_DATA_LOAD); \ 586 } 587 588 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 589 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 590 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 591 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 592 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 593 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 594 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 595 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 596 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 604 605 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 606 void HELPER(NAME)(void *vd, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 610 ctzl(sizeof(ETYPE)), GETPC(), \ 611 MMU_DATA_STORE); \ 612 } 613 614 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 615 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 616 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 617 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 618 619 /* 620 *** Vector Integer Arithmetic Instructions 621 */ 622 623 /* expand macro args before macro */ 624 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 625 626 /* (TD, T1, T2, TX1, TX2) */ 627 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 628 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 629 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 630 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 631 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 632 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 633 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 634 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 /* operation of two vector elements */ 659 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 660 661 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 662 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 663 { \ 664 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 665 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 666 *((TD *)vd + HD(i)) = OP(s2, s1); \ 667 } 668 #define DO_SUB(N, M) (N - M) 669 #define DO_RSUB(N, M) (M - N) 670 671 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 674 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 675 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 678 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 679 680 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 681 CPURISCVState *env, uint32_t desc, 682 uint32_t esz, uint32_t dsz, 683 opivv2_fn *fn) 684 { 685 uint32_t vm = vext_vm(desc); 686 uint32_t vl = env->vl; 687 uint32_t i; 688 689 for (i = env->vstart; i < vl; i++) { 690 if (!vm && !vext_elem_mask(v0, i)) { 691 continue; 692 } 693 fn(vd, vs1, vs2, i); 694 } 695 env->vstart = 0; 696 } 697 698 /* generate the helpers for OPIVV */ 699 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 700 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 701 void *vs2, CPURISCVState *env, \ 702 uint32_t desc) \ 703 { \ 704 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 705 do_##NAME); \ 706 } 707 708 GEN_VEXT_VV(vadd_vv_b, 1, 1) 709 GEN_VEXT_VV(vadd_vv_h, 2, 2) 710 GEN_VEXT_VV(vadd_vv_w, 4, 4) 711 GEN_VEXT_VV(vadd_vv_d, 8, 8) 712 GEN_VEXT_VV(vsub_vv_b, 1, 1) 713 GEN_VEXT_VV(vsub_vv_h, 2, 2) 714 GEN_VEXT_VV(vsub_vv_w, 4, 4) 715 GEN_VEXT_VV(vsub_vv_d, 8, 8) 716 717 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 718 719 /* 720 * (T1)s1 gives the real operator type. 721 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 722 */ 723 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 724 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 725 { \ 726 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 727 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 728 } 729 730 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 731 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 732 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 733 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 734 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 735 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 736 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 737 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 738 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 739 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 740 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 741 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 742 743 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 744 CPURISCVState *env, uint32_t desc, 745 uint32_t esz, uint32_t dsz, 746 opivx2_fn fn) 747 { 748 uint32_t vm = vext_vm(desc); 749 uint32_t vl = env->vl; 750 uint32_t i; 751 752 for (i = env->vstart; i < vl; i++) { 753 if (!vm && !vext_elem_mask(v0, i)) { 754 continue; 755 } 756 fn(vd, s1, vs2, i); 757 } 758 env->vstart = 0; 759 } 760 761 /* generate the helpers for OPIVX */ 762 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 763 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 764 void *vs2, CPURISCVState *env, \ 765 uint32_t desc) \ 766 { \ 767 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 768 do_##NAME); \ 769 } 770 771 GEN_VEXT_VX(vadd_vx_b, 1, 1) 772 GEN_VEXT_VX(vadd_vx_h, 2, 2) 773 GEN_VEXT_VX(vadd_vx_w, 4, 4) 774 GEN_VEXT_VX(vadd_vx_d, 8, 8) 775 GEN_VEXT_VX(vsub_vx_b, 1, 1) 776 GEN_VEXT_VX(vsub_vx_h, 2, 2) 777 GEN_VEXT_VX(vsub_vx_w, 4, 4) 778 GEN_VEXT_VX(vsub_vx_d, 8, 8) 779 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 780 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 781 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 782 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 783 784 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 785 { 786 intptr_t oprsz = simd_oprsz(desc); 787 intptr_t i; 788 789 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 790 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 791 } 792 } 793 794 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 795 { 796 intptr_t oprsz = simd_oprsz(desc); 797 intptr_t i; 798 799 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 800 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 801 } 802 } 803 804 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 805 { 806 intptr_t oprsz = simd_oprsz(desc); 807 intptr_t i; 808 809 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 810 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 811 } 812 } 813 814 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 815 { 816 intptr_t oprsz = simd_oprsz(desc); 817 intptr_t i; 818 819 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 820 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 821 } 822 } 823 824 /* Vector Widening Integer Add/Subtract */ 825 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 826 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 827 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 828 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 829 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 830 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 831 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 832 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 833 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 834 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 835 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 836 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 837 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 838 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 839 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 840 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 841 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 842 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 843 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 844 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 845 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 846 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 847 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 848 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 849 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 850 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 851 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 852 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 853 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 854 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 855 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 856 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 857 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 858 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 859 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 860 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 861 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 862 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 863 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 864 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 865 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 866 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 867 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 868 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 869 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 870 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 871 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 872 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 873 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 874 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 875 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 876 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 877 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 878 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 879 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 880 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 881 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 882 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 883 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 884 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 885 886 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 887 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 888 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 889 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 890 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 891 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 892 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 893 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 894 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 895 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 896 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 897 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 898 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 899 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 900 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 901 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 902 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 903 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 904 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 905 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 906 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 907 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 908 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 909 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 910 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 911 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 912 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 913 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 914 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 915 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 916 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 917 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 918 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 919 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 920 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 921 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 922 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 923 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 924 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 925 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 926 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 927 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 928 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 929 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 930 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 931 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 932 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 933 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 934 935 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 936 #define DO_VADC(N, M, C) (N + M + C) 937 #define DO_VSBC(N, M, C) (N - M - C) 938 939 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 940 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 941 CPURISCVState *env, uint32_t desc) \ 942 { \ 943 uint32_t vl = env->vl; \ 944 uint32_t i; \ 945 \ 946 for (i = env->vstart; i < vl; i++) { \ 947 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 948 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 949 ETYPE carry = vext_elem_mask(v0, i); \ 950 \ 951 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 952 } \ 953 env->vstart = 0; \ 954 } 955 956 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 957 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 958 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 959 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 960 961 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 962 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 963 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 964 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 965 966 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 967 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 968 CPURISCVState *env, uint32_t desc) \ 969 { \ 970 uint32_t vl = env->vl; \ 971 uint32_t i; \ 972 \ 973 for (i = env->vstart; i < vl; i++) { \ 974 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 975 ETYPE carry = vext_elem_mask(v0, i); \ 976 \ 977 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 978 } \ 979 env->vstart = 0; \ 980 } 981 982 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 983 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 984 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 985 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 986 987 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 988 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 989 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 990 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 991 992 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 993 (__typeof(N))(N + M) < N) 994 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 995 996 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 997 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 998 CPURISCVState *env, uint32_t desc) \ 999 { \ 1000 uint32_t vl = env->vl; \ 1001 uint32_t vm = vext_vm(desc); \ 1002 uint32_t i; \ 1003 \ 1004 for (i = env->vstart; i < vl; i++) { \ 1005 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1006 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1007 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1008 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1009 } \ 1010 env->vstart = 0; \ 1011 } 1012 1013 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1014 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1015 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1016 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1017 1018 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1019 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1020 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1021 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1022 1023 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1024 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1025 void *vs2, CPURISCVState *env, uint32_t desc) \ 1026 { \ 1027 uint32_t vl = env->vl; \ 1028 uint32_t vm = vext_vm(desc); \ 1029 uint32_t i; \ 1030 \ 1031 for (i = env->vstart; i < vl; i++) { \ 1032 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1033 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1034 vext_set_elem_mask(vd, i, \ 1035 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1036 } \ 1037 env->vstart = 0; \ 1038 } 1039 1040 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1041 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1042 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1043 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1044 1045 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1046 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1047 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1048 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1049 1050 /* Vector Bitwise Logical Instructions */ 1051 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1052 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1053 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1054 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1055 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1056 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1057 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1058 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1059 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1060 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1061 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1062 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1063 GEN_VEXT_VV(vand_vv_b, 1, 1) 1064 GEN_VEXT_VV(vand_vv_h, 2, 2) 1065 GEN_VEXT_VV(vand_vv_w, 4, 4) 1066 GEN_VEXT_VV(vand_vv_d, 8, 8) 1067 GEN_VEXT_VV(vor_vv_b, 1, 1) 1068 GEN_VEXT_VV(vor_vv_h, 2, 2) 1069 GEN_VEXT_VV(vor_vv_w, 4, 4) 1070 GEN_VEXT_VV(vor_vv_d, 8, 8) 1071 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1072 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1073 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1074 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1075 1076 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1077 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1078 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1079 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1080 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1081 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1082 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1083 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1084 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1085 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1086 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1087 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1088 GEN_VEXT_VX(vand_vx_b, 1, 1) 1089 GEN_VEXT_VX(vand_vx_h, 2, 2) 1090 GEN_VEXT_VX(vand_vx_w, 4, 4) 1091 GEN_VEXT_VX(vand_vx_d, 8, 8) 1092 GEN_VEXT_VX(vor_vx_b, 1, 1) 1093 GEN_VEXT_VX(vor_vx_h, 2, 2) 1094 GEN_VEXT_VX(vor_vx_w, 4, 4) 1095 GEN_VEXT_VX(vor_vx_d, 8, 8) 1096 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1097 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1098 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1099 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1100 1101 /* Vector Single-Width Bit Shift Instructions */ 1102 #define DO_SLL(N, M) (N << (M)) 1103 #define DO_SRL(N, M) (N >> (M)) 1104 1105 /* generate the helpers for shift instructions with two vector operators */ 1106 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1107 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1108 void *vs2, CPURISCVState *env, uint32_t desc) \ 1109 { \ 1110 uint32_t vm = vext_vm(desc); \ 1111 uint32_t vl = env->vl; \ 1112 uint32_t i; \ 1113 \ 1114 for (i = env->vstart; i < vl; i++) { \ 1115 if (!vm && !vext_elem_mask(v0, i)) { \ 1116 continue; \ 1117 } \ 1118 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1119 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1120 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1121 } \ 1122 env->vstart = 0; \ 1123 } 1124 1125 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1126 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1127 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1128 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1129 1130 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1131 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1132 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1133 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1134 1135 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1136 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1137 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1138 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1139 1140 /* generate the helpers for shift instructions with one vector and one scalar */ 1141 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1142 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1143 void *vs2, CPURISCVState *env, uint32_t desc) \ 1144 { \ 1145 uint32_t vm = vext_vm(desc); \ 1146 uint32_t vl = env->vl; \ 1147 uint32_t i; \ 1148 \ 1149 for (i = env->vstart; i < vl; i++) { \ 1150 if (!vm && !vext_elem_mask(v0, i)) { \ 1151 continue; \ 1152 } \ 1153 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1154 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1155 } \ 1156 env->vstart = 0; \ 1157 } 1158 1159 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1160 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1161 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1162 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1163 1164 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1165 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1166 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1167 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1168 1169 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1170 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1171 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1172 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1173 1174 /* Vector Narrowing Integer Right Shift Instructions */ 1175 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1176 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1177 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1178 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1179 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1180 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1181 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1182 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1183 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1184 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1185 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1186 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1187 1188 /* Vector Integer Comparison Instructions */ 1189 #define DO_MSEQ(N, M) (N == M) 1190 #define DO_MSNE(N, M) (N != M) 1191 #define DO_MSLT(N, M) (N < M) 1192 #define DO_MSLE(N, M) (N <= M) 1193 #define DO_MSGT(N, M) (N > M) 1194 1195 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1196 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1197 CPURISCVState *env, uint32_t desc) \ 1198 { \ 1199 uint32_t vm = vext_vm(desc); \ 1200 uint32_t vl = env->vl; \ 1201 uint32_t i; \ 1202 \ 1203 for (i = env->vstart; i < vl; i++) { \ 1204 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1205 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1206 if (!vm && !vext_elem_mask(v0, i)) { \ 1207 continue; \ 1208 } \ 1209 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1210 } \ 1211 env->vstart = 0; \ 1212 } 1213 1214 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1215 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1216 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1217 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1218 1219 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1220 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1221 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1222 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1223 1224 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1225 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1226 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1227 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1228 1229 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1230 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1231 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1232 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1233 1234 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1235 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1236 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1237 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1238 1239 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1240 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1241 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1242 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1243 1244 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1245 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1246 CPURISCVState *env, uint32_t desc) \ 1247 { \ 1248 uint32_t vm = vext_vm(desc); \ 1249 uint32_t vl = env->vl; \ 1250 uint32_t i; \ 1251 \ 1252 for (i = env->vstart; i < vl; i++) { \ 1253 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1254 if (!vm && !vext_elem_mask(v0, i)) { \ 1255 continue; \ 1256 } \ 1257 vext_set_elem_mask(vd, i, \ 1258 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1259 } \ 1260 env->vstart = 0; \ 1261 } 1262 1263 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1264 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1265 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1266 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1267 1268 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1269 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1270 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1271 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1272 1273 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1274 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1275 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1276 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1277 1278 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1279 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1280 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1281 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1282 1283 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1284 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1285 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1286 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1287 1288 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1289 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1290 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1291 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1292 1293 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1294 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1295 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1296 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1297 1298 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1299 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1300 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1301 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1302 1303 /* Vector Integer Min/Max Instructions */ 1304 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1305 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1306 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1307 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1308 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1309 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1310 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1311 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1312 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1313 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1314 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1315 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1316 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1317 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1318 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1319 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1320 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1321 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1322 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1323 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1324 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1325 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1326 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1327 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1328 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1329 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1330 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1331 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1332 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1333 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1334 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1335 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1336 1337 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1338 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1339 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1340 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1341 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1342 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1343 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1344 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1345 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1346 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1347 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1348 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1349 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1350 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1351 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1352 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1353 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1354 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1355 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1356 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1357 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1358 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1359 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1360 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1361 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1362 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1363 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1364 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1365 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1366 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1367 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1368 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1369 1370 /* Vector Single-Width Integer Multiply Instructions */ 1371 #define DO_MUL(N, M) (N * M) 1372 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1373 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1374 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1375 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1376 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1377 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1378 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1379 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1380 1381 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1382 { 1383 return (int16_t)s2 * (int16_t)s1 >> 8; 1384 } 1385 1386 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1387 { 1388 return (int32_t)s2 * (int32_t)s1 >> 16; 1389 } 1390 1391 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1392 { 1393 return (int64_t)s2 * (int64_t)s1 >> 32; 1394 } 1395 1396 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1397 { 1398 uint64_t hi_64, lo_64; 1399 1400 muls64(&lo_64, &hi_64, s1, s2); 1401 return hi_64; 1402 } 1403 1404 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1405 { 1406 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1407 } 1408 1409 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1410 { 1411 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1412 } 1413 1414 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1415 { 1416 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1417 } 1418 1419 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1420 { 1421 uint64_t hi_64, lo_64; 1422 1423 mulu64(&lo_64, &hi_64, s2, s1); 1424 return hi_64; 1425 } 1426 1427 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1428 { 1429 return (int16_t)s2 * (uint16_t)s1 >> 8; 1430 } 1431 1432 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1433 { 1434 return (int32_t)s2 * (uint32_t)s1 >> 16; 1435 } 1436 1437 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1438 { 1439 return (int64_t)s2 * (uint64_t)s1 >> 32; 1440 } 1441 1442 /* 1443 * Let A = signed operand, 1444 * B = unsigned operand 1445 * P = mulu64(A, B), unsigned product 1446 * 1447 * LET X = 2 ** 64 - A, 2's complement of A 1448 * SP = signed product 1449 * THEN 1450 * IF A < 0 1451 * SP = -X * B 1452 * = -(2 ** 64 - A) * B 1453 * = A * B - 2 ** 64 * B 1454 * = P - 2 ** 64 * B 1455 * ELSE 1456 * SP = P 1457 * THEN 1458 * HI_P -= (A < 0 ? B : 0) 1459 */ 1460 1461 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1462 { 1463 uint64_t hi_64, lo_64; 1464 1465 mulu64(&lo_64, &hi_64, s2, s1); 1466 1467 hi_64 -= s2 < 0 ? s1 : 0; 1468 return hi_64; 1469 } 1470 1471 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1472 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1473 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1474 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1475 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1476 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1477 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1478 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1479 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1480 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1481 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1482 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1483 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1484 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1485 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1486 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1487 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1488 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1489 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1490 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1491 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1492 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1493 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1494 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1495 1496 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1497 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1498 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1499 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1500 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1501 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1502 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1503 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1504 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1505 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1506 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1507 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1508 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1509 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1510 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1511 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1512 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1513 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1514 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1515 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1516 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1517 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1518 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1519 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1520 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1521 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1522 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1523 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1524 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1525 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1526 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1527 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1528 1529 /* Vector Integer Divide Instructions */ 1530 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1531 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1532 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1533 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1534 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1535 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1536 1537 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1538 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1539 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1540 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1541 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1542 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1543 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1544 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1545 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1546 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1547 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1548 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1549 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1550 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1551 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1552 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1553 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1554 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1555 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1556 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1557 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1558 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1559 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1560 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1561 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1562 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1563 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1564 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1565 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1566 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1567 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1568 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1569 1570 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1571 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1572 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1573 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1574 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1575 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1576 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1577 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1578 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1579 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1580 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1581 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1582 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1583 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1584 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1585 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1586 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1587 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1588 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1589 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1590 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1591 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1592 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1593 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1594 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1595 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1596 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1597 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1598 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1599 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1600 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1601 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1602 1603 /* Vector Widening Integer Multiply Instructions */ 1604 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1605 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1606 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1607 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1608 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1609 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1610 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1611 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1612 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1613 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1614 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1615 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1616 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1617 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1618 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1619 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1620 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1621 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1622 1623 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1624 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1625 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1626 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1627 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1628 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1629 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1630 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1631 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1632 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1633 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1634 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1635 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1636 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1637 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1638 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1639 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1640 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1641 1642 /* Vector Single-Width Integer Multiply-Add Instructions */ 1643 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1644 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1645 { \ 1646 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1647 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1648 TD d = *((TD *)vd + HD(i)); \ 1649 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1650 } 1651 1652 #define DO_MACC(N, M, D) (M * N + D) 1653 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1654 #define DO_MADD(N, M, D) (M * D + N) 1655 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1656 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1657 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1658 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1659 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1660 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1661 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1662 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1663 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1664 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1665 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1666 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1667 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1668 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1669 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1670 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1671 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1672 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1673 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1674 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1675 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1676 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1677 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1678 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1679 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1680 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1681 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1682 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1683 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1684 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1685 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1686 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1687 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1688 1689 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1690 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1691 { \ 1692 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1693 TD d = *((TD *)vd + HD(i)); \ 1694 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1695 } 1696 1697 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1698 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1699 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1700 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1701 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1702 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1703 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1704 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1705 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1706 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1707 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1708 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1709 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1710 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1711 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1712 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1713 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1714 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1715 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1716 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1717 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1718 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1719 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1720 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1721 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1722 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1723 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1724 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1725 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1726 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1727 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1728 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1729 1730 /* Vector Widening Integer Multiply-Add Instructions */ 1731 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1732 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1733 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1734 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1735 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1736 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1737 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1738 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1739 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1740 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1741 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1742 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1743 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1744 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1745 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1746 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1747 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1748 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1749 1750 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1751 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1752 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1753 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1754 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1755 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1756 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1757 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1758 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1759 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1760 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1761 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1762 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1763 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1764 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1765 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1766 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1767 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1768 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1769 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1770 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1771 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1772 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1773 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1774 1775 /* Vector Integer Merge and Move Instructions */ 1776 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1777 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1778 uint32_t desc) \ 1779 { \ 1780 uint32_t vl = env->vl; \ 1781 uint32_t i; \ 1782 \ 1783 for (i = env->vstart; i < vl; i++) { \ 1784 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1785 *((ETYPE *)vd + H(i)) = s1; \ 1786 } \ 1787 env->vstart = 0; \ 1788 } 1789 1790 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1791 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1792 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1793 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1794 1795 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1796 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1797 uint32_t desc) \ 1798 { \ 1799 uint32_t vl = env->vl; \ 1800 uint32_t i; \ 1801 \ 1802 for (i = env->vstart; i < vl; i++) { \ 1803 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1804 } \ 1805 env->vstart = 0; \ 1806 } 1807 1808 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1809 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1810 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1811 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1812 1813 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1814 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1815 CPURISCVState *env, uint32_t desc) \ 1816 { \ 1817 uint32_t vl = env->vl; \ 1818 uint32_t i; \ 1819 \ 1820 for (i = env->vstart; i < vl; i++) { \ 1821 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1822 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1823 } \ 1824 env->vstart = 0; \ 1825 } 1826 1827 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1828 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1829 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1830 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1831 1832 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1833 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1834 void *vs2, CPURISCVState *env, uint32_t desc) \ 1835 { \ 1836 uint32_t vl = env->vl; \ 1837 uint32_t i; \ 1838 \ 1839 for (i = env->vstart; i < vl; i++) { \ 1840 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1841 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1842 (ETYPE)(target_long)s1); \ 1843 *((ETYPE *)vd + H(i)) = d; \ 1844 } \ 1845 env->vstart = 0; \ 1846 } 1847 1848 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1849 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1850 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1851 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1852 1853 /* 1854 *** Vector Fixed-Point Arithmetic Instructions 1855 */ 1856 1857 /* Vector Single-Width Saturating Add and Subtract */ 1858 1859 /* 1860 * As fixed point instructions probably have round mode and saturation, 1861 * define common macros for fixed point here. 1862 */ 1863 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1864 CPURISCVState *env, int vxrm); 1865 1866 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1867 static inline void \ 1868 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1869 CPURISCVState *env, int vxrm) \ 1870 { \ 1871 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1872 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1873 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1874 } 1875 1876 static inline void 1877 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1878 CPURISCVState *env, 1879 uint32_t vl, uint32_t vm, int vxrm, 1880 opivv2_rm_fn *fn) 1881 { 1882 for (uint32_t i = env->vstart; i < vl; i++) { 1883 if (!vm && !vext_elem_mask(v0, i)) { 1884 continue; 1885 } 1886 fn(vd, vs1, vs2, i, env, vxrm); 1887 } 1888 env->vstart = 0; 1889 } 1890 1891 static inline void 1892 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1893 CPURISCVState *env, 1894 uint32_t desc, uint32_t esz, uint32_t dsz, 1895 opivv2_rm_fn *fn) 1896 { 1897 uint32_t vm = vext_vm(desc); 1898 uint32_t vl = env->vl; 1899 1900 switch (env->vxrm) { 1901 case 0: /* rnu */ 1902 vext_vv_rm_1(vd, v0, vs1, vs2, 1903 env, vl, vm, 0, fn); 1904 break; 1905 case 1: /* rne */ 1906 vext_vv_rm_1(vd, v0, vs1, vs2, 1907 env, vl, vm, 1, fn); 1908 break; 1909 case 2: /* rdn */ 1910 vext_vv_rm_1(vd, v0, vs1, vs2, 1911 env, vl, vm, 2, fn); 1912 break; 1913 default: /* rod */ 1914 vext_vv_rm_1(vd, v0, vs1, vs2, 1915 env, vl, vm, 3, fn); 1916 break; 1917 } 1918 } 1919 1920 /* generate helpers for fixed point instructions with OPIVV format */ 1921 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1922 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1923 CPURISCVState *env, uint32_t desc) \ 1924 { \ 1925 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1926 do_##NAME); \ 1927 } 1928 1929 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1930 { 1931 uint8_t res = a + b; 1932 if (res < a) { 1933 res = UINT8_MAX; 1934 env->vxsat = 0x1; 1935 } 1936 return res; 1937 } 1938 1939 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1940 uint16_t b) 1941 { 1942 uint16_t res = a + b; 1943 if (res < a) { 1944 res = UINT16_MAX; 1945 env->vxsat = 0x1; 1946 } 1947 return res; 1948 } 1949 1950 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1951 uint32_t b) 1952 { 1953 uint32_t res = a + b; 1954 if (res < a) { 1955 res = UINT32_MAX; 1956 env->vxsat = 0x1; 1957 } 1958 return res; 1959 } 1960 1961 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1962 uint64_t b) 1963 { 1964 uint64_t res = a + b; 1965 if (res < a) { 1966 res = UINT64_MAX; 1967 env->vxsat = 0x1; 1968 } 1969 return res; 1970 } 1971 1972 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1973 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1974 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1975 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1976 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1977 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1978 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1979 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1980 1981 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1982 CPURISCVState *env, int vxrm); 1983 1984 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1985 static inline void \ 1986 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1987 CPURISCVState *env, int vxrm) \ 1988 { \ 1989 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1990 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1991 } 1992 1993 static inline void 1994 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1995 CPURISCVState *env, 1996 uint32_t vl, uint32_t vm, int vxrm, 1997 opivx2_rm_fn *fn) 1998 { 1999 for (uint32_t i = env->vstart; i < vl; i++) { 2000 if (!vm && !vext_elem_mask(v0, i)) { 2001 continue; 2002 } 2003 fn(vd, s1, vs2, i, env, vxrm); 2004 } 2005 env->vstart = 0; 2006 } 2007 2008 static inline void 2009 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2010 CPURISCVState *env, 2011 uint32_t desc, uint32_t esz, uint32_t dsz, 2012 opivx2_rm_fn *fn) 2013 { 2014 uint32_t vm = vext_vm(desc); 2015 uint32_t vl = env->vl; 2016 2017 switch (env->vxrm) { 2018 case 0: /* rnu */ 2019 vext_vx_rm_1(vd, v0, s1, vs2, 2020 env, vl, vm, 0, fn); 2021 break; 2022 case 1: /* rne */ 2023 vext_vx_rm_1(vd, v0, s1, vs2, 2024 env, vl, vm, 1, fn); 2025 break; 2026 case 2: /* rdn */ 2027 vext_vx_rm_1(vd, v0, s1, vs2, 2028 env, vl, vm, 2, fn); 2029 break; 2030 default: /* rod */ 2031 vext_vx_rm_1(vd, v0, s1, vs2, 2032 env, vl, vm, 3, fn); 2033 break; 2034 } 2035 } 2036 2037 /* generate helpers for fixed point instructions with OPIVX format */ 2038 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2039 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2040 void *vs2, CPURISCVState *env, uint32_t desc) \ 2041 { \ 2042 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2043 do_##NAME); \ 2044 } 2045 2046 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2047 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2048 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2049 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2050 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2051 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2052 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2053 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2054 2055 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2056 { 2057 int8_t res = a + b; 2058 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2059 res = a > 0 ? INT8_MAX : INT8_MIN; 2060 env->vxsat = 0x1; 2061 } 2062 return res; 2063 } 2064 2065 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2066 { 2067 int16_t res = a + b; 2068 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2069 res = a > 0 ? INT16_MAX : INT16_MIN; 2070 env->vxsat = 0x1; 2071 } 2072 return res; 2073 } 2074 2075 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2076 { 2077 int32_t res = a + b; 2078 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2079 res = a > 0 ? INT32_MAX : INT32_MIN; 2080 env->vxsat = 0x1; 2081 } 2082 return res; 2083 } 2084 2085 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2086 { 2087 int64_t res = a + b; 2088 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2089 res = a > 0 ? INT64_MAX : INT64_MIN; 2090 env->vxsat = 0x1; 2091 } 2092 return res; 2093 } 2094 2095 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2096 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2097 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2098 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2099 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2100 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2101 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2102 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2103 2104 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2105 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2106 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2107 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2108 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2109 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2110 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2111 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2112 2113 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2114 { 2115 uint8_t res = a - b; 2116 if (res > a) { 2117 res = 0; 2118 env->vxsat = 0x1; 2119 } 2120 return res; 2121 } 2122 2123 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2124 uint16_t b) 2125 { 2126 uint16_t res = a - b; 2127 if (res > a) { 2128 res = 0; 2129 env->vxsat = 0x1; 2130 } 2131 return res; 2132 } 2133 2134 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2135 uint32_t b) 2136 { 2137 uint32_t res = a - b; 2138 if (res > a) { 2139 res = 0; 2140 env->vxsat = 0x1; 2141 } 2142 return res; 2143 } 2144 2145 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2146 uint64_t b) 2147 { 2148 uint64_t res = a - b; 2149 if (res > a) { 2150 res = 0; 2151 env->vxsat = 0x1; 2152 } 2153 return res; 2154 } 2155 2156 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2157 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2158 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2159 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2160 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2161 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2162 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2163 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2164 2165 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2166 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2167 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2168 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2169 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2170 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2171 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2172 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2173 2174 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2175 { 2176 int8_t res = a - b; 2177 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2178 res = a >= 0 ? INT8_MAX : INT8_MIN; 2179 env->vxsat = 0x1; 2180 } 2181 return res; 2182 } 2183 2184 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2185 { 2186 int16_t res = a - b; 2187 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2188 res = a >= 0 ? INT16_MAX : INT16_MIN; 2189 env->vxsat = 0x1; 2190 } 2191 return res; 2192 } 2193 2194 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2195 { 2196 int32_t res = a - b; 2197 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2198 res = a >= 0 ? INT32_MAX : INT32_MIN; 2199 env->vxsat = 0x1; 2200 } 2201 return res; 2202 } 2203 2204 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2205 { 2206 int64_t res = a - b; 2207 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2208 res = a >= 0 ? INT64_MAX : INT64_MIN; 2209 env->vxsat = 0x1; 2210 } 2211 return res; 2212 } 2213 2214 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2215 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2216 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2217 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2218 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2219 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2220 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2221 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2222 2223 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2224 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2225 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2226 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2227 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2228 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2229 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2230 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2231 2232 /* Vector Single-Width Averaging Add and Subtract */ 2233 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2234 { 2235 uint8_t d = extract64(v, shift, 1); 2236 uint8_t d1; 2237 uint64_t D1, D2; 2238 2239 if (shift == 0 || shift > 64) { 2240 return 0; 2241 } 2242 2243 d1 = extract64(v, shift - 1, 1); 2244 D1 = extract64(v, 0, shift); 2245 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2246 return d1; 2247 } else if (vxrm == 1) { /* round-to-nearest-even */ 2248 if (shift > 1) { 2249 D2 = extract64(v, 0, shift - 1); 2250 return d1 & ((D2 != 0) | d); 2251 } else { 2252 return d1 & d; 2253 } 2254 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2255 return !d & (D1 != 0); 2256 } 2257 return 0; /* round-down (truncate) */ 2258 } 2259 2260 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2261 { 2262 int64_t res = (int64_t)a + b; 2263 uint8_t round = get_round(vxrm, res, 1); 2264 2265 return (res >> 1) + round; 2266 } 2267 2268 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2269 { 2270 int64_t res = a + b; 2271 uint8_t round = get_round(vxrm, res, 1); 2272 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2273 2274 /* With signed overflow, bit 64 is inverse of bit 63. */ 2275 return ((res >> 1) ^ over) + round; 2276 } 2277 2278 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2279 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2280 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2281 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2282 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2283 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2284 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2285 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2286 2287 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2288 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2289 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2290 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2291 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2292 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2293 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2294 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2295 2296 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2297 uint32_t a, uint32_t b) 2298 { 2299 uint64_t res = (uint64_t)a + b; 2300 uint8_t round = get_round(vxrm, res, 1); 2301 2302 return (res >> 1) + round; 2303 } 2304 2305 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2306 uint64_t a, uint64_t b) 2307 { 2308 uint64_t res = a + b; 2309 uint8_t round = get_round(vxrm, res, 1); 2310 uint64_t over = (uint64_t)(res < a) << 63; 2311 2312 return ((res >> 1) | over) + round; 2313 } 2314 2315 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2316 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2317 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2318 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2319 GEN_VEXT_VV_RM(vaaddu_vv_b, 1, 1) 2320 GEN_VEXT_VV_RM(vaaddu_vv_h, 2, 2) 2321 GEN_VEXT_VV_RM(vaaddu_vv_w, 4, 4) 2322 GEN_VEXT_VV_RM(vaaddu_vv_d, 8, 8) 2323 2324 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2325 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2326 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2327 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2328 GEN_VEXT_VX_RM(vaaddu_vx_b, 1, 1) 2329 GEN_VEXT_VX_RM(vaaddu_vx_h, 2, 2) 2330 GEN_VEXT_VX_RM(vaaddu_vx_w, 4, 4) 2331 GEN_VEXT_VX_RM(vaaddu_vx_d, 8, 8) 2332 2333 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2334 { 2335 int64_t res = (int64_t)a - b; 2336 uint8_t round = get_round(vxrm, res, 1); 2337 2338 return (res >> 1) + round; 2339 } 2340 2341 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2342 { 2343 int64_t res = (int64_t)a - b; 2344 uint8_t round = get_round(vxrm, res, 1); 2345 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2346 2347 /* With signed overflow, bit 64 is inverse of bit 63. */ 2348 return ((res >> 1) ^ over) + round; 2349 } 2350 2351 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2352 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2353 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2354 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2355 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2356 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2357 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2358 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2359 2360 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2361 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2362 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2363 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2364 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2365 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2366 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2367 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2368 2369 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2370 uint32_t a, uint32_t b) 2371 { 2372 int64_t res = (int64_t)a - b; 2373 uint8_t round = get_round(vxrm, res, 1); 2374 2375 return (res >> 1) + round; 2376 } 2377 2378 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2379 uint64_t a, uint64_t b) 2380 { 2381 uint64_t res = (uint64_t)a - b; 2382 uint8_t round = get_round(vxrm, res, 1); 2383 uint64_t over = (uint64_t)(res > a) << 63; 2384 2385 return ((res >> 1) | over) + round; 2386 } 2387 2388 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2389 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2390 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2391 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2392 GEN_VEXT_VV_RM(vasubu_vv_b, 1, 1) 2393 GEN_VEXT_VV_RM(vasubu_vv_h, 2, 2) 2394 GEN_VEXT_VV_RM(vasubu_vv_w, 4, 4) 2395 GEN_VEXT_VV_RM(vasubu_vv_d, 8, 8) 2396 2397 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2398 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2399 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2400 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2401 GEN_VEXT_VX_RM(vasubu_vx_b, 1, 1) 2402 GEN_VEXT_VX_RM(vasubu_vx_h, 2, 2) 2403 GEN_VEXT_VX_RM(vasubu_vx_w, 4, 4) 2404 GEN_VEXT_VX_RM(vasubu_vx_d, 8, 8) 2405 2406 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2407 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2408 { 2409 uint8_t round; 2410 int16_t res; 2411 2412 res = (int16_t)a * (int16_t)b; 2413 round = get_round(vxrm, res, 7); 2414 res = (res >> 7) + round; 2415 2416 if (res > INT8_MAX) { 2417 env->vxsat = 0x1; 2418 return INT8_MAX; 2419 } else if (res < INT8_MIN) { 2420 env->vxsat = 0x1; 2421 return INT8_MIN; 2422 } else { 2423 return res; 2424 } 2425 } 2426 2427 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2428 { 2429 uint8_t round; 2430 int32_t res; 2431 2432 res = (int32_t)a * (int32_t)b; 2433 round = get_round(vxrm, res, 15); 2434 res = (res >> 15) + round; 2435 2436 if (res > INT16_MAX) { 2437 env->vxsat = 0x1; 2438 return INT16_MAX; 2439 } else if (res < INT16_MIN) { 2440 env->vxsat = 0x1; 2441 return INT16_MIN; 2442 } else { 2443 return res; 2444 } 2445 } 2446 2447 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2448 { 2449 uint8_t round; 2450 int64_t res; 2451 2452 res = (int64_t)a * (int64_t)b; 2453 round = get_round(vxrm, res, 31); 2454 res = (res >> 31) + round; 2455 2456 if (res > INT32_MAX) { 2457 env->vxsat = 0x1; 2458 return INT32_MAX; 2459 } else if (res < INT32_MIN) { 2460 env->vxsat = 0x1; 2461 return INT32_MIN; 2462 } else { 2463 return res; 2464 } 2465 } 2466 2467 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2468 { 2469 uint8_t round; 2470 uint64_t hi_64, lo_64; 2471 int64_t res; 2472 2473 if (a == INT64_MIN && b == INT64_MIN) { 2474 env->vxsat = 1; 2475 return INT64_MAX; 2476 } 2477 2478 muls64(&lo_64, &hi_64, a, b); 2479 round = get_round(vxrm, lo_64, 63); 2480 /* 2481 * Cannot overflow, as there are always 2482 * 2 sign bits after multiply. 2483 */ 2484 res = (hi_64 << 1) | (lo_64 >> 63); 2485 if (round) { 2486 if (res == INT64_MAX) { 2487 env->vxsat = 1; 2488 } else { 2489 res += 1; 2490 } 2491 } 2492 return res; 2493 } 2494 2495 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2496 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2497 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2498 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2499 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2500 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2501 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2502 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2503 2504 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2505 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2506 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2507 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2508 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2509 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2510 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2511 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2512 2513 /* Vector Single-Width Scaling Shift Instructions */ 2514 static inline uint8_t 2515 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2516 { 2517 uint8_t round, shift = b & 0x7; 2518 uint8_t res; 2519 2520 round = get_round(vxrm, a, shift); 2521 res = (a >> shift) + round; 2522 return res; 2523 } 2524 static inline uint16_t 2525 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2526 { 2527 uint8_t round, shift = b & 0xf; 2528 uint16_t res; 2529 2530 round = get_round(vxrm, a, shift); 2531 res = (a >> shift) + round; 2532 return res; 2533 } 2534 static inline uint32_t 2535 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2536 { 2537 uint8_t round, shift = b & 0x1f; 2538 uint32_t res; 2539 2540 round = get_round(vxrm, a, shift); 2541 res = (a >> shift) + round; 2542 return res; 2543 } 2544 static inline uint64_t 2545 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2546 { 2547 uint8_t round, shift = b & 0x3f; 2548 uint64_t res; 2549 2550 round = get_round(vxrm, a, shift); 2551 res = (a >> shift) + round; 2552 return res; 2553 } 2554 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2555 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2556 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2557 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2558 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2559 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2560 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2561 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2562 2563 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2564 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2565 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2566 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2567 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2568 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2569 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2570 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2571 2572 static inline int8_t 2573 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2574 { 2575 uint8_t round, shift = b & 0x7; 2576 int8_t res; 2577 2578 round = get_round(vxrm, a, shift); 2579 res = (a >> shift) + round; 2580 return res; 2581 } 2582 static inline int16_t 2583 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2584 { 2585 uint8_t round, shift = b & 0xf; 2586 int16_t res; 2587 2588 round = get_round(vxrm, a, shift); 2589 res = (a >> shift) + round; 2590 return res; 2591 } 2592 static inline int32_t 2593 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2594 { 2595 uint8_t round, shift = b & 0x1f; 2596 int32_t res; 2597 2598 round = get_round(vxrm, a, shift); 2599 res = (a >> shift) + round; 2600 return res; 2601 } 2602 static inline int64_t 2603 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2604 { 2605 uint8_t round, shift = b & 0x3f; 2606 int64_t res; 2607 2608 round = get_round(vxrm, a, shift); 2609 res = (a >> shift) + round; 2610 return res; 2611 } 2612 2613 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2614 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2615 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2616 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2617 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2618 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2619 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2620 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2621 2622 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2623 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2624 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2625 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2626 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2627 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2628 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2629 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2630 2631 /* Vector Narrowing Fixed-Point Clip Instructions */ 2632 static inline int8_t 2633 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2634 { 2635 uint8_t round, shift = b & 0xf; 2636 int16_t res; 2637 2638 round = get_round(vxrm, a, shift); 2639 res = (a >> shift) + round; 2640 if (res > INT8_MAX) { 2641 env->vxsat = 0x1; 2642 return INT8_MAX; 2643 } else if (res < INT8_MIN) { 2644 env->vxsat = 0x1; 2645 return INT8_MIN; 2646 } else { 2647 return res; 2648 } 2649 } 2650 2651 static inline int16_t 2652 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2653 { 2654 uint8_t round, shift = b & 0x1f; 2655 int32_t res; 2656 2657 round = get_round(vxrm, a, shift); 2658 res = (a >> shift) + round; 2659 if (res > INT16_MAX) { 2660 env->vxsat = 0x1; 2661 return INT16_MAX; 2662 } else if (res < INT16_MIN) { 2663 env->vxsat = 0x1; 2664 return INT16_MIN; 2665 } else { 2666 return res; 2667 } 2668 } 2669 2670 static inline int32_t 2671 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2672 { 2673 uint8_t round, shift = b & 0x3f; 2674 int64_t res; 2675 2676 round = get_round(vxrm, a, shift); 2677 res = (a >> shift) + round; 2678 if (res > INT32_MAX) { 2679 env->vxsat = 0x1; 2680 return INT32_MAX; 2681 } else if (res < INT32_MIN) { 2682 env->vxsat = 0x1; 2683 return INT32_MIN; 2684 } else { 2685 return res; 2686 } 2687 } 2688 2689 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2690 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2691 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2692 GEN_VEXT_VV_RM(vnclip_wv_b, 1, 1) 2693 GEN_VEXT_VV_RM(vnclip_wv_h, 2, 2) 2694 GEN_VEXT_VV_RM(vnclip_wv_w, 4, 4) 2695 2696 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2697 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2698 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2699 GEN_VEXT_VX_RM(vnclip_wx_b, 1, 1) 2700 GEN_VEXT_VX_RM(vnclip_wx_h, 2, 2) 2701 GEN_VEXT_VX_RM(vnclip_wx_w, 4, 4) 2702 2703 static inline uint8_t 2704 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2705 { 2706 uint8_t round, shift = b & 0xf; 2707 uint16_t res; 2708 2709 round = get_round(vxrm, a, shift); 2710 res = (a >> shift) + round; 2711 if (res > UINT8_MAX) { 2712 env->vxsat = 0x1; 2713 return UINT8_MAX; 2714 } else { 2715 return res; 2716 } 2717 } 2718 2719 static inline uint16_t 2720 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2721 { 2722 uint8_t round, shift = b & 0x1f; 2723 uint32_t res; 2724 2725 round = get_round(vxrm, a, shift); 2726 res = (a >> shift) + round; 2727 if (res > UINT16_MAX) { 2728 env->vxsat = 0x1; 2729 return UINT16_MAX; 2730 } else { 2731 return res; 2732 } 2733 } 2734 2735 static inline uint32_t 2736 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2737 { 2738 uint8_t round, shift = b & 0x3f; 2739 uint64_t res; 2740 2741 round = get_round(vxrm, a, shift); 2742 res = (a >> shift) + round; 2743 if (res > UINT32_MAX) { 2744 env->vxsat = 0x1; 2745 return UINT32_MAX; 2746 } else { 2747 return res; 2748 } 2749 } 2750 2751 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2752 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2753 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2754 GEN_VEXT_VV_RM(vnclipu_wv_b, 1, 1) 2755 GEN_VEXT_VV_RM(vnclipu_wv_h, 2, 2) 2756 GEN_VEXT_VV_RM(vnclipu_wv_w, 4, 4) 2757 2758 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2759 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2760 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2761 GEN_VEXT_VX_RM(vnclipu_wx_b, 1, 1) 2762 GEN_VEXT_VX_RM(vnclipu_wx_h, 2, 2) 2763 GEN_VEXT_VX_RM(vnclipu_wx_w, 4, 4) 2764 2765 /* 2766 *** Vector Float Point Arithmetic Instructions 2767 */ 2768 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2769 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2770 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2771 CPURISCVState *env) \ 2772 { \ 2773 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2774 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2775 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2776 } 2777 2778 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2779 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2780 void *vs2, CPURISCVState *env, \ 2781 uint32_t desc) \ 2782 { \ 2783 uint32_t vm = vext_vm(desc); \ 2784 uint32_t vl = env->vl; \ 2785 uint32_t i; \ 2786 \ 2787 for (i = env->vstart; i < vl; i++) { \ 2788 if (!vm && !vext_elem_mask(v0, i)) { \ 2789 continue; \ 2790 } \ 2791 do_##NAME(vd, vs1, vs2, i, env); \ 2792 } \ 2793 env->vstart = 0; \ 2794 } 2795 2796 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2797 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2798 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2799 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2800 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2801 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2802 2803 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2804 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2805 CPURISCVState *env) \ 2806 { \ 2807 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2808 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2809 } 2810 2811 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2812 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2813 void *vs2, CPURISCVState *env, \ 2814 uint32_t desc) \ 2815 { \ 2816 uint32_t vm = vext_vm(desc); \ 2817 uint32_t vl = env->vl; \ 2818 uint32_t i; \ 2819 \ 2820 for (i = env->vstart; i < vl; i++) { \ 2821 if (!vm && !vext_elem_mask(v0, i)) { \ 2822 continue; \ 2823 } \ 2824 do_##NAME(vd, s1, vs2, i, env); \ 2825 } \ 2826 env->vstart = 0; \ 2827 } 2828 2829 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2830 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2831 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2832 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2833 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2834 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2835 2836 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2837 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2838 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2839 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2840 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2841 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2842 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2843 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2844 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2845 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2846 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2847 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2848 2849 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2850 { 2851 return float16_sub(b, a, s); 2852 } 2853 2854 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2855 { 2856 return float32_sub(b, a, s); 2857 } 2858 2859 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2860 { 2861 return float64_sub(b, a, s); 2862 } 2863 2864 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2865 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2866 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2867 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2868 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2869 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2870 2871 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2872 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2873 { 2874 return float32_add(float16_to_float32(a, true, s), 2875 float16_to_float32(b, true, s), s); 2876 } 2877 2878 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2879 { 2880 return float64_add(float32_to_float64(a, s), 2881 float32_to_float64(b, s), s); 2882 2883 } 2884 2885 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2886 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2887 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2888 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2889 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2890 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2891 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2892 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2893 2894 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2895 { 2896 return float32_sub(float16_to_float32(a, true, s), 2897 float16_to_float32(b, true, s), s); 2898 } 2899 2900 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2901 { 2902 return float64_sub(float32_to_float64(a, s), 2903 float32_to_float64(b, s), s); 2904 2905 } 2906 2907 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2908 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2909 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 2910 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 2911 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2912 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2913 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 2914 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 2915 2916 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2917 { 2918 return float32_add(a, float16_to_float32(b, true, s), s); 2919 } 2920 2921 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2922 { 2923 return float64_add(a, float32_to_float64(b, s), s); 2924 } 2925 2926 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2927 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2928 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 2929 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 2930 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2931 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2932 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 2933 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 2934 2935 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2936 { 2937 return float32_sub(a, float16_to_float32(b, true, s), s); 2938 } 2939 2940 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2941 { 2942 return float64_sub(a, float32_to_float64(b, s), s); 2943 } 2944 2945 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 2946 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 2947 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 2948 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 2949 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 2950 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 2951 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 2952 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 2953 2954 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 2955 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 2956 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 2957 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 2958 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 2959 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 2960 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 2961 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 2962 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 2963 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 2964 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 2965 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 2966 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 2967 2968 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 2969 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 2970 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 2971 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 2972 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 2973 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 2974 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 2975 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 2976 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 2977 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 2978 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 2979 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 2980 2981 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 2982 { 2983 return float16_div(b, a, s); 2984 } 2985 2986 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 2987 { 2988 return float32_div(b, a, s); 2989 } 2990 2991 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 2992 { 2993 return float64_div(b, a, s); 2994 } 2995 2996 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 2997 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 2998 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 2999 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3000 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3001 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3002 3003 /* Vector Widening Floating-Point Multiply */ 3004 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3005 { 3006 return float32_mul(float16_to_float32(a, true, s), 3007 float16_to_float32(b, true, s), s); 3008 } 3009 3010 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3011 { 3012 return float64_mul(float32_to_float64(a, s), 3013 float32_to_float64(b, s), s); 3014 3015 } 3016 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3017 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3018 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3019 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3020 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3021 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3022 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3023 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3024 3025 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3026 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3027 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3028 CPURISCVState *env) \ 3029 { \ 3030 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3031 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3032 TD d = *((TD *)vd + HD(i)); \ 3033 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3034 } 3035 3036 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3037 { 3038 return float16_muladd(a, b, d, 0, s); 3039 } 3040 3041 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3042 { 3043 return float32_muladd(a, b, d, 0, s); 3044 } 3045 3046 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3047 { 3048 return float64_muladd(a, b, d, 0, s); 3049 } 3050 3051 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3052 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3053 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3054 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3055 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3056 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3057 3058 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3059 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3060 CPURISCVState *env) \ 3061 { \ 3062 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3063 TD d = *((TD *)vd + HD(i)); \ 3064 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3065 } 3066 3067 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3068 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3069 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3070 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3071 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3072 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3073 3074 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3075 { 3076 return float16_muladd(a, b, d, 3077 float_muladd_negate_c | float_muladd_negate_product, s); 3078 } 3079 3080 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3081 { 3082 return float32_muladd(a, b, d, 3083 float_muladd_negate_c | float_muladd_negate_product, s); 3084 } 3085 3086 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3087 { 3088 return float64_muladd(a, b, d, 3089 float_muladd_negate_c | float_muladd_negate_product, s); 3090 } 3091 3092 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3093 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3094 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3095 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3096 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3097 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3098 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3099 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3100 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3101 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3102 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3103 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3104 3105 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3106 { 3107 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3108 } 3109 3110 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3111 { 3112 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3113 } 3114 3115 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3116 { 3117 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3118 } 3119 3120 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3121 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3122 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3123 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3124 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3125 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3126 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3127 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3128 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3129 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3130 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3131 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3132 3133 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3134 { 3135 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3136 } 3137 3138 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3139 { 3140 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3141 } 3142 3143 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3144 { 3145 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3146 } 3147 3148 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3149 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3150 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3151 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3152 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3153 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3154 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3155 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3156 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3157 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3158 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3159 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3160 3161 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3162 { 3163 return float16_muladd(d, b, a, 0, s); 3164 } 3165 3166 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3167 { 3168 return float32_muladd(d, b, a, 0, s); 3169 } 3170 3171 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3172 { 3173 return float64_muladd(d, b, a, 0, s); 3174 } 3175 3176 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3177 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3178 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3179 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3180 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3181 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3182 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3183 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3184 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3185 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3186 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3187 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3188 3189 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3190 { 3191 return float16_muladd(d, b, a, 3192 float_muladd_negate_c | float_muladd_negate_product, s); 3193 } 3194 3195 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3196 { 3197 return float32_muladd(d, b, a, 3198 float_muladd_negate_c | float_muladd_negate_product, s); 3199 } 3200 3201 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3202 { 3203 return float64_muladd(d, b, a, 3204 float_muladd_negate_c | float_muladd_negate_product, s); 3205 } 3206 3207 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3208 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3209 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3210 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3211 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3212 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3213 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3214 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3215 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3216 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3217 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3218 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3219 3220 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3221 { 3222 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3223 } 3224 3225 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3226 { 3227 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3228 } 3229 3230 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3231 { 3232 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3233 } 3234 3235 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3236 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3237 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3238 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3239 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3240 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3241 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3242 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3243 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3244 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3245 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3246 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3247 3248 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3249 { 3250 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3251 } 3252 3253 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3254 { 3255 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3256 } 3257 3258 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3259 { 3260 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3261 } 3262 3263 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3264 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3265 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3266 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3267 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3268 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3269 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3270 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3271 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3272 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3273 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3274 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3275 3276 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3277 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3278 { 3279 return float32_muladd(float16_to_float32(a, true, s), 3280 float16_to_float32(b, true, s), d, 0, s); 3281 } 3282 3283 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3284 { 3285 return float64_muladd(float32_to_float64(a, s), 3286 float32_to_float64(b, s), d, 0, s); 3287 } 3288 3289 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3290 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3291 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3292 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3293 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3294 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3295 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3296 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3297 3298 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3299 { 3300 return float32_muladd(float16_to_float32(a, true, s), 3301 float16_to_float32(b, true, s), d, 3302 float_muladd_negate_c | float_muladd_negate_product, s); 3303 } 3304 3305 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3306 { 3307 return float64_muladd(float32_to_float64(a, s), 3308 float32_to_float64(b, s), d, 3309 float_muladd_negate_c | float_muladd_negate_product, s); 3310 } 3311 3312 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3313 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3314 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3315 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3316 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3317 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3318 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3319 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3320 3321 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3322 { 3323 return float32_muladd(float16_to_float32(a, true, s), 3324 float16_to_float32(b, true, s), d, 3325 float_muladd_negate_c, s); 3326 } 3327 3328 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3329 { 3330 return float64_muladd(float32_to_float64(a, s), 3331 float32_to_float64(b, s), d, 3332 float_muladd_negate_c, s); 3333 } 3334 3335 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3336 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3337 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3338 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3339 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3340 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3341 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3342 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3343 3344 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3345 { 3346 return float32_muladd(float16_to_float32(a, true, s), 3347 float16_to_float32(b, true, s), d, 3348 float_muladd_negate_product, s); 3349 } 3350 3351 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3352 { 3353 return float64_muladd(float32_to_float64(a, s), 3354 float32_to_float64(b, s), d, 3355 float_muladd_negate_product, s); 3356 } 3357 3358 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3359 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3360 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3361 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3362 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3363 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3364 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3365 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3366 3367 /* Vector Floating-Point Square-Root Instruction */ 3368 /* (TD, T2, TX2) */ 3369 #define OP_UU_H uint16_t, uint16_t, uint16_t 3370 #define OP_UU_W uint32_t, uint32_t, uint32_t 3371 #define OP_UU_D uint64_t, uint64_t, uint64_t 3372 3373 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3374 static void do_##NAME(void *vd, void *vs2, int i, \ 3375 CPURISCVState *env) \ 3376 { \ 3377 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3378 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3379 } 3380 3381 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3382 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3383 CPURISCVState *env, uint32_t desc) \ 3384 { \ 3385 uint32_t vm = vext_vm(desc); \ 3386 uint32_t vl = env->vl; \ 3387 uint32_t i; \ 3388 \ 3389 if (vl == 0) { \ 3390 return; \ 3391 } \ 3392 for (i = env->vstart; i < vl; i++) { \ 3393 if (!vm && !vext_elem_mask(v0, i)) { \ 3394 continue; \ 3395 } \ 3396 do_##NAME(vd, vs2, i, env); \ 3397 } \ 3398 env->vstart = 0; \ 3399 } 3400 3401 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3402 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3403 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3404 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3405 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3406 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3407 3408 /* 3409 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3410 * 3411 * Adapted from riscv-v-spec recip.c: 3412 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3413 */ 3414 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3415 { 3416 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3417 uint64_t exp = extract64(f, frac_size, exp_size); 3418 uint64_t frac = extract64(f, 0, frac_size); 3419 3420 const uint8_t lookup_table[] = { 3421 52, 51, 50, 48, 47, 46, 44, 43, 3422 42, 41, 40, 39, 38, 36, 35, 34, 3423 33, 32, 31, 30, 30, 29, 28, 27, 3424 26, 25, 24, 23, 23, 22, 21, 20, 3425 19, 19, 18, 17, 16, 16, 15, 14, 3426 14, 13, 12, 12, 11, 10, 10, 9, 3427 9, 8, 7, 7, 6, 6, 5, 4, 3428 4, 3, 3, 2, 2, 1, 1, 0, 3429 127, 125, 123, 121, 119, 118, 116, 114, 3430 113, 111, 109, 108, 106, 105, 103, 102, 3431 100, 99, 97, 96, 95, 93, 92, 91, 3432 90, 88, 87, 86, 85, 84, 83, 82, 3433 80, 79, 78, 77, 76, 75, 74, 73, 3434 72, 71, 70, 70, 69, 68, 67, 66, 3435 65, 64, 63, 63, 62, 61, 60, 59, 3436 59, 58, 57, 56, 56, 55, 54, 53 3437 }; 3438 const int precision = 7; 3439 3440 if (exp == 0 && frac != 0) { /* subnormal */ 3441 /* Normalize the subnormal. */ 3442 while (extract64(frac, frac_size - 1, 1) == 0) { 3443 exp--; 3444 frac <<= 1; 3445 } 3446 3447 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3448 } 3449 3450 int idx = ((exp & 1) << (precision - 1)) | 3451 (frac >> (frac_size - precision + 1)); 3452 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3453 (frac_size - precision); 3454 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3455 3456 uint64_t val = 0; 3457 val = deposit64(val, 0, frac_size, out_frac); 3458 val = deposit64(val, frac_size, exp_size, out_exp); 3459 val = deposit64(val, frac_size + exp_size, 1, sign); 3460 return val; 3461 } 3462 3463 static float16 frsqrt7_h(float16 f, float_status *s) 3464 { 3465 int exp_size = 5, frac_size = 10; 3466 bool sign = float16_is_neg(f); 3467 3468 /* 3469 * frsqrt7(sNaN) = canonical NaN 3470 * frsqrt7(-inf) = canonical NaN 3471 * frsqrt7(-normal) = canonical NaN 3472 * frsqrt7(-subnormal) = canonical NaN 3473 */ 3474 if (float16_is_signaling_nan(f, s) || 3475 (float16_is_infinity(f) && sign) || 3476 (float16_is_normal(f) && sign) || 3477 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3478 s->float_exception_flags |= float_flag_invalid; 3479 return float16_default_nan(s); 3480 } 3481 3482 /* frsqrt7(qNaN) = canonical NaN */ 3483 if (float16_is_quiet_nan(f, s)) { 3484 return float16_default_nan(s); 3485 } 3486 3487 /* frsqrt7(+-0) = +-inf */ 3488 if (float16_is_zero(f)) { 3489 s->float_exception_flags |= float_flag_divbyzero; 3490 return float16_set_sign(float16_infinity, sign); 3491 } 3492 3493 /* frsqrt7(+inf) = +0 */ 3494 if (float16_is_infinity(f) && !sign) { 3495 return float16_set_sign(float16_zero, sign); 3496 } 3497 3498 /* +normal, +subnormal */ 3499 uint64_t val = frsqrt7(f, exp_size, frac_size); 3500 return make_float16(val); 3501 } 3502 3503 static float32 frsqrt7_s(float32 f, float_status *s) 3504 { 3505 int exp_size = 8, frac_size = 23; 3506 bool sign = float32_is_neg(f); 3507 3508 /* 3509 * frsqrt7(sNaN) = canonical NaN 3510 * frsqrt7(-inf) = canonical NaN 3511 * frsqrt7(-normal) = canonical NaN 3512 * frsqrt7(-subnormal) = canonical NaN 3513 */ 3514 if (float32_is_signaling_nan(f, s) || 3515 (float32_is_infinity(f) && sign) || 3516 (float32_is_normal(f) && sign) || 3517 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3518 s->float_exception_flags |= float_flag_invalid; 3519 return float32_default_nan(s); 3520 } 3521 3522 /* frsqrt7(qNaN) = canonical NaN */ 3523 if (float32_is_quiet_nan(f, s)) { 3524 return float32_default_nan(s); 3525 } 3526 3527 /* frsqrt7(+-0) = +-inf */ 3528 if (float32_is_zero(f)) { 3529 s->float_exception_flags |= float_flag_divbyzero; 3530 return float32_set_sign(float32_infinity, sign); 3531 } 3532 3533 /* frsqrt7(+inf) = +0 */ 3534 if (float32_is_infinity(f) && !sign) { 3535 return float32_set_sign(float32_zero, sign); 3536 } 3537 3538 /* +normal, +subnormal */ 3539 uint64_t val = frsqrt7(f, exp_size, frac_size); 3540 return make_float32(val); 3541 } 3542 3543 static float64 frsqrt7_d(float64 f, float_status *s) 3544 { 3545 int exp_size = 11, frac_size = 52; 3546 bool sign = float64_is_neg(f); 3547 3548 /* 3549 * frsqrt7(sNaN) = canonical NaN 3550 * frsqrt7(-inf) = canonical NaN 3551 * frsqrt7(-normal) = canonical NaN 3552 * frsqrt7(-subnormal) = canonical NaN 3553 */ 3554 if (float64_is_signaling_nan(f, s) || 3555 (float64_is_infinity(f) && sign) || 3556 (float64_is_normal(f) && sign) || 3557 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3558 s->float_exception_flags |= float_flag_invalid; 3559 return float64_default_nan(s); 3560 } 3561 3562 /* frsqrt7(qNaN) = canonical NaN */ 3563 if (float64_is_quiet_nan(f, s)) { 3564 return float64_default_nan(s); 3565 } 3566 3567 /* frsqrt7(+-0) = +-inf */ 3568 if (float64_is_zero(f)) { 3569 s->float_exception_flags |= float_flag_divbyzero; 3570 return float64_set_sign(float64_infinity, sign); 3571 } 3572 3573 /* frsqrt7(+inf) = +0 */ 3574 if (float64_is_infinity(f) && !sign) { 3575 return float64_set_sign(float64_zero, sign); 3576 } 3577 3578 /* +normal, +subnormal */ 3579 uint64_t val = frsqrt7(f, exp_size, frac_size); 3580 return make_float64(val); 3581 } 3582 3583 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3584 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3585 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3586 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2, 2) 3587 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4, 4) 3588 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8, 8) 3589 3590 /* 3591 * Vector Floating-Point Reciprocal Estimate Instruction 3592 * 3593 * Adapted from riscv-v-spec recip.c: 3594 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3595 */ 3596 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3597 float_status *s) 3598 { 3599 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3600 uint64_t exp = extract64(f, frac_size, exp_size); 3601 uint64_t frac = extract64(f, 0, frac_size); 3602 3603 const uint8_t lookup_table[] = { 3604 127, 125, 123, 121, 119, 117, 116, 114, 3605 112, 110, 109, 107, 105, 104, 102, 100, 3606 99, 97, 96, 94, 93, 91, 90, 88, 3607 87, 85, 84, 83, 81, 80, 79, 77, 3608 76, 75, 74, 72, 71, 70, 69, 68, 3609 66, 65, 64, 63, 62, 61, 60, 59, 3610 58, 57, 56, 55, 54, 53, 52, 51, 3611 50, 49, 48, 47, 46, 45, 44, 43, 3612 42, 41, 40, 40, 39, 38, 37, 36, 3613 35, 35, 34, 33, 32, 31, 31, 30, 3614 29, 28, 28, 27, 26, 25, 25, 24, 3615 23, 23, 22, 21, 21, 20, 19, 19, 3616 18, 17, 17, 16, 15, 15, 14, 14, 3617 13, 12, 12, 11, 11, 10, 9, 9, 3618 8, 8, 7, 7, 6, 5, 5, 4, 3619 4, 3, 3, 2, 2, 1, 1, 0 3620 }; 3621 const int precision = 7; 3622 3623 if (exp == 0 && frac != 0) { /* subnormal */ 3624 /* Normalize the subnormal. */ 3625 while (extract64(frac, frac_size - 1, 1) == 0) { 3626 exp--; 3627 frac <<= 1; 3628 } 3629 3630 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3631 3632 if (exp != 0 && exp != UINT64_MAX) { 3633 /* 3634 * Overflow to inf or max value of same sign, 3635 * depending on sign and rounding mode. 3636 */ 3637 s->float_exception_flags |= (float_flag_inexact | 3638 float_flag_overflow); 3639 3640 if ((s->float_rounding_mode == float_round_to_zero) || 3641 ((s->float_rounding_mode == float_round_down) && !sign) || 3642 ((s->float_rounding_mode == float_round_up) && sign)) { 3643 /* Return greatest/negative finite value. */ 3644 return (sign << (exp_size + frac_size)) | 3645 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3646 } else { 3647 /* Return +-inf. */ 3648 return (sign << (exp_size + frac_size)) | 3649 MAKE_64BIT_MASK(frac_size, exp_size); 3650 } 3651 } 3652 } 3653 3654 int idx = frac >> (frac_size - precision); 3655 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3656 (frac_size - precision); 3657 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3658 3659 if (out_exp == 0 || out_exp == UINT64_MAX) { 3660 /* 3661 * The result is subnormal, but don't raise the underflow exception, 3662 * because there's no additional loss of precision. 3663 */ 3664 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3665 if (out_exp == UINT64_MAX) { 3666 out_frac >>= 1; 3667 out_exp = 0; 3668 } 3669 } 3670 3671 uint64_t val = 0; 3672 val = deposit64(val, 0, frac_size, out_frac); 3673 val = deposit64(val, frac_size, exp_size, out_exp); 3674 val = deposit64(val, frac_size + exp_size, 1, sign); 3675 return val; 3676 } 3677 3678 static float16 frec7_h(float16 f, float_status *s) 3679 { 3680 int exp_size = 5, frac_size = 10; 3681 bool sign = float16_is_neg(f); 3682 3683 /* frec7(+-inf) = +-0 */ 3684 if (float16_is_infinity(f)) { 3685 return float16_set_sign(float16_zero, sign); 3686 } 3687 3688 /* frec7(+-0) = +-inf */ 3689 if (float16_is_zero(f)) { 3690 s->float_exception_flags |= float_flag_divbyzero; 3691 return float16_set_sign(float16_infinity, sign); 3692 } 3693 3694 /* frec7(sNaN) = canonical NaN */ 3695 if (float16_is_signaling_nan(f, s)) { 3696 s->float_exception_flags |= float_flag_invalid; 3697 return float16_default_nan(s); 3698 } 3699 3700 /* frec7(qNaN) = canonical NaN */ 3701 if (float16_is_quiet_nan(f, s)) { 3702 return float16_default_nan(s); 3703 } 3704 3705 /* +-normal, +-subnormal */ 3706 uint64_t val = frec7(f, exp_size, frac_size, s); 3707 return make_float16(val); 3708 } 3709 3710 static float32 frec7_s(float32 f, float_status *s) 3711 { 3712 int exp_size = 8, frac_size = 23; 3713 bool sign = float32_is_neg(f); 3714 3715 /* frec7(+-inf) = +-0 */ 3716 if (float32_is_infinity(f)) { 3717 return float32_set_sign(float32_zero, sign); 3718 } 3719 3720 /* frec7(+-0) = +-inf */ 3721 if (float32_is_zero(f)) { 3722 s->float_exception_flags |= float_flag_divbyzero; 3723 return float32_set_sign(float32_infinity, sign); 3724 } 3725 3726 /* frec7(sNaN) = canonical NaN */ 3727 if (float32_is_signaling_nan(f, s)) { 3728 s->float_exception_flags |= float_flag_invalid; 3729 return float32_default_nan(s); 3730 } 3731 3732 /* frec7(qNaN) = canonical NaN */ 3733 if (float32_is_quiet_nan(f, s)) { 3734 return float32_default_nan(s); 3735 } 3736 3737 /* +-normal, +-subnormal */ 3738 uint64_t val = frec7(f, exp_size, frac_size, s); 3739 return make_float32(val); 3740 } 3741 3742 static float64 frec7_d(float64 f, float_status *s) 3743 { 3744 int exp_size = 11, frac_size = 52; 3745 bool sign = float64_is_neg(f); 3746 3747 /* frec7(+-inf) = +-0 */ 3748 if (float64_is_infinity(f)) { 3749 return float64_set_sign(float64_zero, sign); 3750 } 3751 3752 /* frec7(+-0) = +-inf */ 3753 if (float64_is_zero(f)) { 3754 s->float_exception_flags |= float_flag_divbyzero; 3755 return float64_set_sign(float64_infinity, sign); 3756 } 3757 3758 /* frec7(sNaN) = canonical NaN */ 3759 if (float64_is_signaling_nan(f, s)) { 3760 s->float_exception_flags |= float_flag_invalid; 3761 return float64_default_nan(s); 3762 } 3763 3764 /* frec7(qNaN) = canonical NaN */ 3765 if (float64_is_quiet_nan(f, s)) { 3766 return float64_default_nan(s); 3767 } 3768 3769 /* +-normal, +-subnormal */ 3770 uint64_t val = frec7(f, exp_size, frac_size, s); 3771 return make_float64(val); 3772 } 3773 3774 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3775 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3776 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3777 GEN_VEXT_V_ENV(vfrec7_v_h, 2, 2) 3778 GEN_VEXT_V_ENV(vfrec7_v_w, 4, 4) 3779 GEN_VEXT_V_ENV(vfrec7_v_d, 8, 8) 3780 3781 /* Vector Floating-Point MIN/MAX Instructions */ 3782 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3783 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3784 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3785 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3786 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3787 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3788 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3789 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3790 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3791 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3792 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3793 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3794 3795 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3796 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3797 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3798 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3799 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3800 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3801 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3802 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3803 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3804 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3805 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3806 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3807 3808 /* Vector Floating-Point Sign-Injection Instructions */ 3809 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3810 { 3811 return deposit64(b, 0, 15, a); 3812 } 3813 3814 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3815 { 3816 return deposit64(b, 0, 31, a); 3817 } 3818 3819 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3820 { 3821 return deposit64(b, 0, 63, a); 3822 } 3823 3824 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3825 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3826 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3827 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3828 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3829 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3830 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3831 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3832 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3833 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3834 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3835 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3836 3837 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3838 { 3839 return deposit64(~b, 0, 15, a); 3840 } 3841 3842 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3843 { 3844 return deposit64(~b, 0, 31, a); 3845 } 3846 3847 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3848 { 3849 return deposit64(~b, 0, 63, a); 3850 } 3851 3852 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3853 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3854 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3855 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3856 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3857 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3858 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3859 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3860 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3861 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3862 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3863 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3864 3865 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3866 { 3867 return deposit64(b ^ a, 0, 15, a); 3868 } 3869 3870 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3871 { 3872 return deposit64(b ^ a, 0, 31, a); 3873 } 3874 3875 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3876 { 3877 return deposit64(b ^ a, 0, 63, a); 3878 } 3879 3880 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3881 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3882 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3883 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3884 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3885 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3886 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3887 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3888 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3889 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3890 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3891 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3892 3893 /* Vector Floating-Point Compare Instructions */ 3894 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3895 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3896 CPURISCVState *env, uint32_t desc) \ 3897 { \ 3898 uint32_t vm = vext_vm(desc); \ 3899 uint32_t vl = env->vl; \ 3900 uint32_t i; \ 3901 \ 3902 for (i = env->vstart; i < vl; i++) { \ 3903 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3904 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3905 if (!vm && !vext_elem_mask(v0, i)) { \ 3906 continue; \ 3907 } \ 3908 vext_set_elem_mask(vd, i, \ 3909 DO_OP(s2, s1, &env->fp_status)); \ 3910 } \ 3911 env->vstart = 0; \ 3912 } 3913 3914 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3915 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3916 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3917 3918 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3919 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3920 CPURISCVState *env, uint32_t desc) \ 3921 { \ 3922 uint32_t vm = vext_vm(desc); \ 3923 uint32_t vl = env->vl; \ 3924 uint32_t i; \ 3925 \ 3926 for (i = env->vstart; i < vl; i++) { \ 3927 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3928 if (!vm && !vext_elem_mask(v0, i)) { \ 3929 continue; \ 3930 } \ 3931 vext_set_elem_mask(vd, i, \ 3932 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3933 } \ 3934 env->vstart = 0; \ 3935 } 3936 3937 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3938 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3939 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3940 3941 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3942 { 3943 FloatRelation compare = float16_compare_quiet(a, b, s); 3944 return compare != float_relation_equal; 3945 } 3946 3947 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3948 { 3949 FloatRelation compare = float32_compare_quiet(a, b, s); 3950 return compare != float_relation_equal; 3951 } 3952 3953 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3954 { 3955 FloatRelation compare = float64_compare_quiet(a, b, s); 3956 return compare != float_relation_equal; 3957 } 3958 3959 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3960 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3961 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3962 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3963 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3964 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3965 3966 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3967 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3968 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3969 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3970 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3971 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3972 3973 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3974 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3975 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3976 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3977 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3978 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3979 3980 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3981 { 3982 FloatRelation compare = float16_compare(a, b, s); 3983 return compare == float_relation_greater; 3984 } 3985 3986 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3987 { 3988 FloatRelation compare = float32_compare(a, b, s); 3989 return compare == float_relation_greater; 3990 } 3991 3992 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3993 { 3994 FloatRelation compare = float64_compare(a, b, s); 3995 return compare == float_relation_greater; 3996 } 3997 3998 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3999 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4000 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4001 4002 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4003 { 4004 FloatRelation compare = float16_compare(a, b, s); 4005 return compare == float_relation_greater || 4006 compare == float_relation_equal; 4007 } 4008 4009 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4010 { 4011 FloatRelation compare = float32_compare(a, b, s); 4012 return compare == float_relation_greater || 4013 compare == float_relation_equal; 4014 } 4015 4016 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4017 { 4018 FloatRelation compare = float64_compare(a, b, s); 4019 return compare == float_relation_greater || 4020 compare == float_relation_equal; 4021 } 4022 4023 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4024 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4025 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4026 4027 /* Vector Floating-Point Classify Instruction */ 4028 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4029 static void do_##NAME(void *vd, void *vs2, int i) \ 4030 { \ 4031 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4032 *((TD *)vd + HD(i)) = OP(s2); \ 4033 } 4034 4035 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 4036 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4037 CPURISCVState *env, uint32_t desc) \ 4038 { \ 4039 uint32_t vm = vext_vm(desc); \ 4040 uint32_t vl = env->vl; \ 4041 uint32_t i; \ 4042 \ 4043 for (i = env->vstart; i < vl; i++) { \ 4044 if (!vm && !vext_elem_mask(v0, i)) { \ 4045 continue; \ 4046 } \ 4047 do_##NAME(vd, vs2, i); \ 4048 } \ 4049 env->vstart = 0; \ 4050 } 4051 4052 target_ulong fclass_h(uint64_t frs1) 4053 { 4054 float16 f = frs1; 4055 bool sign = float16_is_neg(f); 4056 4057 if (float16_is_infinity(f)) { 4058 return sign ? 1 << 0 : 1 << 7; 4059 } else if (float16_is_zero(f)) { 4060 return sign ? 1 << 3 : 1 << 4; 4061 } else if (float16_is_zero_or_denormal(f)) { 4062 return sign ? 1 << 2 : 1 << 5; 4063 } else if (float16_is_any_nan(f)) { 4064 float_status s = { }; /* for snan_bit_is_one */ 4065 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4066 } else { 4067 return sign ? 1 << 1 : 1 << 6; 4068 } 4069 } 4070 4071 target_ulong fclass_s(uint64_t frs1) 4072 { 4073 float32 f = frs1; 4074 bool sign = float32_is_neg(f); 4075 4076 if (float32_is_infinity(f)) { 4077 return sign ? 1 << 0 : 1 << 7; 4078 } else if (float32_is_zero(f)) { 4079 return sign ? 1 << 3 : 1 << 4; 4080 } else if (float32_is_zero_or_denormal(f)) { 4081 return sign ? 1 << 2 : 1 << 5; 4082 } else if (float32_is_any_nan(f)) { 4083 float_status s = { }; /* for snan_bit_is_one */ 4084 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4085 } else { 4086 return sign ? 1 << 1 : 1 << 6; 4087 } 4088 } 4089 4090 target_ulong fclass_d(uint64_t frs1) 4091 { 4092 float64 f = frs1; 4093 bool sign = float64_is_neg(f); 4094 4095 if (float64_is_infinity(f)) { 4096 return sign ? 1 << 0 : 1 << 7; 4097 } else if (float64_is_zero(f)) { 4098 return sign ? 1 << 3 : 1 << 4; 4099 } else if (float64_is_zero_or_denormal(f)) { 4100 return sign ? 1 << 2 : 1 << 5; 4101 } else if (float64_is_any_nan(f)) { 4102 float_status s = { }; /* for snan_bit_is_one */ 4103 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4104 } else { 4105 return sign ? 1 << 1 : 1 << 6; 4106 } 4107 } 4108 4109 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4110 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4111 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4112 GEN_VEXT_V(vfclass_v_h, 2, 2) 4113 GEN_VEXT_V(vfclass_v_w, 4, 4) 4114 GEN_VEXT_V(vfclass_v_d, 8, 8) 4115 4116 /* Vector Floating-Point Merge Instruction */ 4117 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4118 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4119 CPURISCVState *env, uint32_t desc) \ 4120 { \ 4121 uint32_t vm = vext_vm(desc); \ 4122 uint32_t vl = env->vl; \ 4123 uint32_t i; \ 4124 \ 4125 for (i = env->vstart; i < vl; i++) { \ 4126 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4127 *((ETYPE *)vd + H(i)) \ 4128 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4129 } \ 4130 env->vstart = 0; \ 4131 } 4132 4133 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4134 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4135 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4136 4137 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4138 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4139 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4140 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4141 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4142 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 4143 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 4144 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 4145 4146 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4147 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4148 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4149 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4150 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 4151 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 4152 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 4153 4154 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4155 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4156 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4157 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4158 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 4159 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 4160 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 4161 4162 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4163 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4164 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4165 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4166 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4167 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4168 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4169 4170 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4171 /* (TD, T2, TX2) */ 4172 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4173 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4174 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4175 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4176 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4177 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4178 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4179 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4180 4181 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4182 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4183 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4184 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4185 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4186 4187 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4188 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4189 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4190 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4191 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 1, 2) 4192 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4193 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4194 4195 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4196 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4197 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4198 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4199 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 1, 2) 4200 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4201 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4202 4203 /* 4204 * vfwcvt.f.f.v vd, vs2, vm 4205 * Convert single-width float to double-width float. 4206 */ 4207 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4208 { 4209 return float16_to_float32(a, true, s); 4210 } 4211 4212 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4213 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4214 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4215 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4216 4217 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4218 /* (TD, T2, TX2) */ 4219 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4220 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4221 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4222 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4223 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4224 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4225 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4226 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1, 1) 4227 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2, 2) 4228 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4, 4) 4229 4230 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4231 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4232 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4233 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4234 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1, 1) 4235 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2, 2) 4236 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4, 4) 4237 4238 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4239 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4240 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4241 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2, 2) 4242 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4, 4) 4243 4244 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4245 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4246 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4247 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2, 2) 4248 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4, 4) 4249 4250 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4251 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4252 { 4253 return float32_to_float16(a, true, s); 4254 } 4255 4256 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4257 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4258 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2, 2) 4259 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4, 4) 4260 4261 /* 4262 *** Vector Reduction Operations 4263 */ 4264 /* Vector Single-Width Integer Reduction Instructions */ 4265 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4266 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4267 void *vs2, CPURISCVState *env, uint32_t desc) \ 4268 { \ 4269 uint32_t vm = vext_vm(desc); \ 4270 uint32_t vl = env->vl; \ 4271 uint32_t i; \ 4272 TD s1 = *((TD *)vs1 + HD(0)); \ 4273 \ 4274 for (i = env->vstart; i < vl; i++) { \ 4275 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4276 if (!vm && !vext_elem_mask(v0, i)) { \ 4277 continue; \ 4278 } \ 4279 s1 = OP(s1, (TD)s2); \ 4280 } \ 4281 *((TD *)vd + HD(0)) = s1; \ 4282 env->vstart = 0; \ 4283 } 4284 4285 /* vd[0] = sum(vs1[0], vs2[*]) */ 4286 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4287 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4288 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4289 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4290 4291 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4292 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4293 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4294 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4295 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4296 4297 /* vd[0] = max(vs1[0], vs2[*]) */ 4298 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4299 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4300 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4301 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4302 4303 /* vd[0] = minu(vs1[0], vs2[*]) */ 4304 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4305 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4306 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4307 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4308 4309 /* vd[0] = min(vs1[0], vs2[*]) */ 4310 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4311 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4312 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4313 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4314 4315 /* vd[0] = and(vs1[0], vs2[*]) */ 4316 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4317 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4318 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4319 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4320 4321 /* vd[0] = or(vs1[0], vs2[*]) */ 4322 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4323 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4324 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4325 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4326 4327 /* vd[0] = xor(vs1[0], vs2[*]) */ 4328 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4329 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4330 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4331 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4332 4333 /* Vector Widening Integer Reduction Instructions */ 4334 /* signed sum reduction into double-width accumulator */ 4335 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4336 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4337 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4338 4339 /* Unsigned sum reduction into double-width accumulator */ 4340 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4341 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4342 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4343 4344 /* Vector Single-Width Floating-Point Reduction Instructions */ 4345 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4346 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4347 void *vs2, CPURISCVState *env, \ 4348 uint32_t desc) \ 4349 { \ 4350 uint32_t vm = vext_vm(desc); \ 4351 uint32_t vl = env->vl; \ 4352 uint32_t i; \ 4353 TD s1 = *((TD *)vs1 + HD(0)); \ 4354 \ 4355 for (i = env->vstart; i < vl; i++) { \ 4356 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4357 if (!vm && !vext_elem_mask(v0, i)) { \ 4358 continue; \ 4359 } \ 4360 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4361 } \ 4362 *((TD *)vd + HD(0)) = s1; \ 4363 env->vstart = 0; \ 4364 } 4365 4366 /* Unordered sum */ 4367 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4368 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4369 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4370 4371 /* Maximum value */ 4372 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number) 4373 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number) 4374 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number) 4375 4376 /* Minimum value */ 4377 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number) 4378 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number) 4379 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number) 4380 4381 /* Vector Widening Floating-Point Reduction Instructions */ 4382 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4383 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4384 void *vs2, CPURISCVState *env, uint32_t desc) 4385 { 4386 uint32_t vm = vext_vm(desc); 4387 uint32_t vl = env->vl; 4388 uint32_t i; 4389 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4390 4391 for (i = env->vstart; i < vl; i++) { 4392 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4393 if (!vm && !vext_elem_mask(v0, i)) { 4394 continue; 4395 } 4396 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4397 &env->fp_status); 4398 } 4399 *((uint32_t *)vd + H4(0)) = s1; 4400 env->vstart = 0; 4401 } 4402 4403 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4404 void *vs2, CPURISCVState *env, uint32_t desc) 4405 { 4406 uint32_t vm = vext_vm(desc); 4407 uint32_t vl = env->vl; 4408 uint32_t i; 4409 uint64_t s1 = *((uint64_t *)vs1); 4410 4411 for (i = env->vstart; i < vl; i++) { 4412 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4413 if (!vm && !vext_elem_mask(v0, i)) { 4414 continue; 4415 } 4416 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4417 &env->fp_status); 4418 } 4419 *((uint64_t *)vd) = s1; 4420 env->vstart = 0; 4421 } 4422 4423 /* 4424 *** Vector Mask Operations 4425 */ 4426 /* Vector Mask-Register Logical Instructions */ 4427 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4428 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4429 void *vs2, CPURISCVState *env, \ 4430 uint32_t desc) \ 4431 { \ 4432 uint32_t vl = env->vl; \ 4433 uint32_t i; \ 4434 int a, b; \ 4435 \ 4436 for (i = env->vstart; i < vl; i++) { \ 4437 a = vext_elem_mask(vs1, i); \ 4438 b = vext_elem_mask(vs2, i); \ 4439 vext_set_elem_mask(vd, i, OP(b, a)); \ 4440 } \ 4441 env->vstart = 0; \ 4442 } 4443 4444 #define DO_NAND(N, M) (!(N & M)) 4445 #define DO_ANDNOT(N, M) (N & !M) 4446 #define DO_NOR(N, M) (!(N | M)) 4447 #define DO_ORNOT(N, M) (N | !M) 4448 #define DO_XNOR(N, M) (!(N ^ M)) 4449 4450 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4451 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4452 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4453 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4454 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4455 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4456 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4457 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4458 4459 /* Vector count population in mask vcpop */ 4460 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4461 uint32_t desc) 4462 { 4463 target_ulong cnt = 0; 4464 uint32_t vm = vext_vm(desc); 4465 uint32_t vl = env->vl; 4466 int i; 4467 4468 for (i = env->vstart; i < vl; i++) { 4469 if (vm || vext_elem_mask(v0, i)) { 4470 if (vext_elem_mask(vs2, i)) { 4471 cnt++; 4472 } 4473 } 4474 } 4475 env->vstart = 0; 4476 return cnt; 4477 } 4478 4479 /* vfirst find-first-set mask bit*/ 4480 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4481 uint32_t desc) 4482 { 4483 uint32_t vm = vext_vm(desc); 4484 uint32_t vl = env->vl; 4485 int i; 4486 4487 for (i = env->vstart; i < vl; i++) { 4488 if (vm || vext_elem_mask(v0, i)) { 4489 if (vext_elem_mask(vs2, i)) { 4490 return i; 4491 } 4492 } 4493 } 4494 env->vstart = 0; 4495 return -1LL; 4496 } 4497 4498 enum set_mask_type { 4499 ONLY_FIRST = 1, 4500 INCLUDE_FIRST, 4501 BEFORE_FIRST, 4502 }; 4503 4504 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4505 uint32_t desc, enum set_mask_type type) 4506 { 4507 uint32_t vm = vext_vm(desc); 4508 uint32_t vl = env->vl; 4509 int i; 4510 bool first_mask_bit = false; 4511 4512 for (i = env->vstart; i < vl; i++) { 4513 if (!vm && !vext_elem_mask(v0, i)) { 4514 continue; 4515 } 4516 /* write a zero to all following active elements */ 4517 if (first_mask_bit) { 4518 vext_set_elem_mask(vd, i, 0); 4519 continue; 4520 } 4521 if (vext_elem_mask(vs2, i)) { 4522 first_mask_bit = true; 4523 if (type == BEFORE_FIRST) { 4524 vext_set_elem_mask(vd, i, 0); 4525 } else { 4526 vext_set_elem_mask(vd, i, 1); 4527 } 4528 } else { 4529 if (type == ONLY_FIRST) { 4530 vext_set_elem_mask(vd, i, 0); 4531 } else { 4532 vext_set_elem_mask(vd, i, 1); 4533 } 4534 } 4535 } 4536 env->vstart = 0; 4537 } 4538 4539 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4540 uint32_t desc) 4541 { 4542 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4543 } 4544 4545 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4546 uint32_t desc) 4547 { 4548 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4549 } 4550 4551 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4552 uint32_t desc) 4553 { 4554 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4555 } 4556 4557 /* Vector Iota Instruction */ 4558 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4559 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4560 uint32_t desc) \ 4561 { \ 4562 uint32_t vm = vext_vm(desc); \ 4563 uint32_t vl = env->vl; \ 4564 uint32_t sum = 0; \ 4565 int i; \ 4566 \ 4567 for (i = env->vstart; i < vl; i++) { \ 4568 if (!vm && !vext_elem_mask(v0, i)) { \ 4569 continue; \ 4570 } \ 4571 *((ETYPE *)vd + H(i)) = sum; \ 4572 if (vext_elem_mask(vs2, i)) { \ 4573 sum++; \ 4574 } \ 4575 } \ 4576 env->vstart = 0; \ 4577 } 4578 4579 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4580 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4581 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4582 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4583 4584 /* Vector Element Index Instruction */ 4585 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4586 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4587 { \ 4588 uint32_t vm = vext_vm(desc); \ 4589 uint32_t vl = env->vl; \ 4590 int i; \ 4591 \ 4592 for (i = env->vstart; i < vl; i++) { \ 4593 if (!vm && !vext_elem_mask(v0, i)) { \ 4594 continue; \ 4595 } \ 4596 *((ETYPE *)vd + H(i)) = i; \ 4597 } \ 4598 env->vstart = 0; \ 4599 } 4600 4601 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4602 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4603 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4604 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4605 4606 /* 4607 *** Vector Permutation Instructions 4608 */ 4609 4610 /* Vector Slide Instructions */ 4611 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4612 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4613 CPURISCVState *env, uint32_t desc) \ 4614 { \ 4615 uint32_t vm = vext_vm(desc); \ 4616 uint32_t vl = env->vl; \ 4617 target_ulong offset = s1, i_min, i; \ 4618 \ 4619 i_min = MAX(env->vstart, offset); \ 4620 for (i = i_min; i < vl; i++) { \ 4621 if (!vm && !vext_elem_mask(v0, i)) { \ 4622 continue; \ 4623 } \ 4624 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4625 } \ 4626 } 4627 4628 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4629 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4630 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4631 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4632 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4633 4634 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4635 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4636 CPURISCVState *env, uint32_t desc) \ 4637 { \ 4638 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4639 uint32_t vm = vext_vm(desc); \ 4640 uint32_t vl = env->vl; \ 4641 target_ulong i_max, i; \ 4642 \ 4643 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4644 for (i = env->vstart; i < i_max; ++i) { \ 4645 if (vm || vext_elem_mask(v0, i)) { \ 4646 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4647 } \ 4648 } \ 4649 \ 4650 for (i = i_max; i < vl; ++i) { \ 4651 if (vm || vext_elem_mask(v0, i)) { \ 4652 *((ETYPE *)vd + H(i)) = 0; \ 4653 } \ 4654 } \ 4655 \ 4656 env->vstart = 0; \ 4657 } 4658 4659 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4660 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4661 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4662 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4663 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4664 4665 #define GEN_VEXT_VSLIE1UP(ESZ, H) \ 4666 static void vslide1up_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4667 CPURISCVState *env, uint32_t desc) \ 4668 { \ 4669 typedef uint##ESZ##_t ETYPE; \ 4670 uint32_t vm = vext_vm(desc); \ 4671 uint32_t vl = env->vl; \ 4672 uint32_t i; \ 4673 \ 4674 for (i = env->vstart; i < vl; i++) { \ 4675 if (!vm && !vext_elem_mask(v0, i)) { \ 4676 continue; \ 4677 } \ 4678 if (i == 0) { \ 4679 *((ETYPE *)vd + H(i)) = s1; \ 4680 } else { \ 4681 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4682 } \ 4683 } \ 4684 env->vstart = 0; \ 4685 } 4686 4687 GEN_VEXT_VSLIE1UP(8, H1) 4688 GEN_VEXT_VSLIE1UP(16, H2) 4689 GEN_VEXT_VSLIE1UP(32, H4) 4690 GEN_VEXT_VSLIE1UP(64, H8) 4691 4692 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ESZ) \ 4693 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4694 CPURISCVState *env, uint32_t desc) \ 4695 { \ 4696 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4697 } 4698 4699 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4700 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4701 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4702 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4703 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4704 4705 #define GEN_VEXT_VSLIDE1DOWN(ESZ, H) \ 4706 static void vslide1down_##ESZ(void *vd, void *v0, target_ulong s1, void *vs2, \ 4707 CPURISCVState *env, uint32_t desc) \ 4708 { \ 4709 typedef uint##ESZ##_t ETYPE; \ 4710 uint32_t vm = vext_vm(desc); \ 4711 uint32_t vl = env->vl; \ 4712 uint32_t i; \ 4713 \ 4714 for (i = env->vstart; i < vl; i++) { \ 4715 if (!vm && !vext_elem_mask(v0, i)) { \ 4716 continue; \ 4717 } \ 4718 if (i == vl - 1) { \ 4719 *((ETYPE *)vd + H(i)) = s1; \ 4720 } else { \ 4721 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4722 } \ 4723 } \ 4724 env->vstart = 0; \ 4725 } 4726 4727 GEN_VEXT_VSLIDE1DOWN(8, H1) 4728 GEN_VEXT_VSLIDE1DOWN(16, H2) 4729 GEN_VEXT_VSLIDE1DOWN(32, H4) 4730 GEN_VEXT_VSLIDE1DOWN(64, H8) 4731 4732 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ESZ) \ 4733 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4734 CPURISCVState *env, uint32_t desc) \ 4735 { \ 4736 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4737 } 4738 4739 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4740 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4741 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4742 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4743 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4744 4745 /* Vector Floating-Point Slide Instructions */ 4746 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, ESZ) \ 4747 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4748 CPURISCVState *env, uint32_t desc) \ 4749 { \ 4750 vslide1up_##ESZ(vd, v0, s1, vs2, env, desc); \ 4751 } 4752 4753 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4754 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4755 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4756 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4757 4758 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, ESZ) \ 4759 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4760 CPURISCVState *env, uint32_t desc) \ 4761 { \ 4762 vslide1down_##ESZ(vd, v0, s1, vs2, env, desc); \ 4763 } 4764 4765 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4766 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4767 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4768 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4769 4770 /* Vector Register Gather Instruction */ 4771 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4772 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4773 CPURISCVState *env, uint32_t desc) \ 4774 { \ 4775 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4776 uint32_t vm = vext_vm(desc); \ 4777 uint32_t vl = env->vl; \ 4778 uint64_t index; \ 4779 uint32_t i; \ 4780 \ 4781 for (i = env->vstart; i < vl; i++) { \ 4782 if (!vm && !vext_elem_mask(v0, i)) { \ 4783 continue; \ 4784 } \ 4785 index = *((TS1 *)vs1 + HS1(i)); \ 4786 if (index >= vlmax) { \ 4787 *((TS2 *)vd + HS2(i)) = 0; \ 4788 } else { \ 4789 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4790 } \ 4791 } \ 4792 env->vstart = 0; \ 4793 } 4794 4795 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4796 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4797 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4798 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4799 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4800 4801 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4802 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4803 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4804 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4805 4806 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4807 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4808 CPURISCVState *env, uint32_t desc) \ 4809 { \ 4810 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4811 uint32_t vm = vext_vm(desc); \ 4812 uint32_t vl = env->vl; \ 4813 uint64_t index = s1; \ 4814 uint32_t i; \ 4815 \ 4816 for (i = env->vstart; i < vl; i++) { \ 4817 if (!vm && !vext_elem_mask(v0, i)) { \ 4818 continue; \ 4819 } \ 4820 if (index >= vlmax) { \ 4821 *((ETYPE *)vd + H(i)) = 0; \ 4822 } else { \ 4823 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4824 } \ 4825 } \ 4826 env->vstart = 0; \ 4827 } 4828 4829 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4830 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4831 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4832 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4833 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4834 4835 /* Vector Compress Instruction */ 4836 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4837 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4838 CPURISCVState *env, uint32_t desc) \ 4839 { \ 4840 uint32_t vl = env->vl; \ 4841 uint32_t num = 0, i; \ 4842 \ 4843 for (i = env->vstart; i < vl; i++) { \ 4844 if (!vext_elem_mask(vs1, i)) { \ 4845 continue; \ 4846 } \ 4847 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4848 num++; \ 4849 } \ 4850 env->vstart = 0; \ 4851 } 4852 4853 /* Compress into vd elements of vs2 where vs1 is enabled */ 4854 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4855 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4856 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4857 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4858 4859 /* Vector Whole Register Move */ 4860 #define GEN_VEXT_VMV_WHOLE(NAME, LEN) \ 4861 void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ 4862 uint32_t desc) \ 4863 { \ 4864 /* EEW = 8 */ \ 4865 uint32_t maxsz = simd_maxsz(desc); \ 4866 uint32_t i = env->vstart; \ 4867 \ 4868 memcpy((uint8_t *)vd + H1(i), \ 4869 (uint8_t *)vs2 + H1(i), \ 4870 maxsz - env->vstart); \ 4871 \ 4872 env->vstart = 0; \ 4873 } 4874 4875 GEN_VEXT_VMV_WHOLE(vmv1r_v, 1) 4876 GEN_VEXT_VMV_WHOLE(vmv2r_v, 2) 4877 GEN_VEXT_VMV_WHOLE(vmv4r_v, 4) 4878 GEN_VEXT_VMV_WHOLE(vmv8r_v, 8) 4879 4880 /* Vector Integer Extension */ 4881 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 4882 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4883 CPURISCVState *env, uint32_t desc) \ 4884 { \ 4885 uint32_t vl = env->vl; \ 4886 uint32_t vm = vext_vm(desc); \ 4887 uint32_t i; \ 4888 \ 4889 for (i = env->vstart; i < vl; i++) { \ 4890 if (!vm && !vext_elem_mask(v0, i)) { \ 4891 continue; \ 4892 } \ 4893 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 4894 } \ 4895 env->vstart = 0; \ 4896 } 4897 4898 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 4899 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 4900 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 4901 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 4902 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 4903 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 4904 4905 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 4906 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 4907 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 4908 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 4909 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 4910 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 4911