1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/page-protection.h" 27 #include "exec/helper-proto.h" 28 #include "fpu/softfloat.h" 29 #include "tcg/tcg-gvec-desc.h" 30 #include "internals.h" 31 #include "vector_internals.h" 32 #include <math.h> 33 34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 35 target_ulong s2) 36 { 37 int vlmax, vl; 38 RISCVCPU *cpu = env_archcpu(env); 39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 41 uint16_t sew = 8 << vsew; 42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 43 int xlen = riscv_cpu_xlen(env); 44 bool vill = (s2 >> (xlen - 1)) & 0x1; 45 target_ulong reserved = s2 & 46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 47 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 48 uint16_t vlen = cpu->cfg.vlenb << 3; 49 int8_t lmul; 50 51 if (vlmul & 4) { 52 /* 53 * Fractional LMUL, check: 54 * 55 * VLEN * LMUL >= SEW 56 * VLEN >> (8 - lmul) >= sew 57 * (vlenb << 3) >> (8 - lmul) >= sew 58 */ 59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) { 60 vill = true; 61 } 62 } 63 64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 65 /* only set vill bit. */ 66 env->vill = 1; 67 env->vtype = 0; 68 env->vl = 0; 69 env->vstart = 0; 70 return 0; 71 } 72 73 /* lmul encoded as in DisasContext::lmul */ 74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 76 if (s1 <= vlmax) { 77 vl = s1; 78 } else if (s1 < 2 * vlmax && cpu->cfg.rvv_vl_half_avl) { 79 vl = (s1 + 1) >> 1; 80 } else { 81 vl = vlmax; 82 } 83 env->vl = vl; 84 env->vtype = s2; 85 env->vstart = 0; 86 env->vill = 0; 87 return vl; 88 } 89 90 /* 91 * Get the maximum number of elements can be operated. 92 * 93 * log2_esz: log2 of element size in bytes. 94 */ 95 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 96 { 97 /* 98 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 99 * so vlen in bytes (vlenb) is encoded as maxsz. 100 */ 101 uint32_t vlenb = simd_maxsz(desc); 102 103 /* Return VLMAX */ 104 int scale = vext_lmul(desc) - log2_esz; 105 return scale < 0 ? vlenb >> -scale : vlenb << scale; 106 } 107 108 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 109 { 110 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 111 } 112 113 /* 114 * This function checks watchpoint before real load operation. 115 * 116 * In system mode, the TLB API probe_access is enough for watchpoint check. 117 * In user mode, there is no watchpoint support now. 118 * 119 * It will trigger an exception if there is no mapping in TLB 120 * and page table walk can't fill the TLB entry. Then the guest 121 * software can return here after process the exception or never return. 122 */ 123 static void probe_pages(CPURISCVState *env, target_ulong addr, 124 target_ulong len, uintptr_t ra, 125 MMUAccessType access_type) 126 { 127 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 128 target_ulong curlen = MIN(pagelen, len); 129 int mmu_index = riscv_env_mmu_index(env, false); 130 131 probe_access(env, adjust_addr(env, addr), curlen, access_type, 132 mmu_index, ra); 133 if (len > curlen) { 134 addr += curlen; 135 curlen = len - curlen; 136 probe_access(env, adjust_addr(env, addr), curlen, access_type, 137 mmu_index, ra); 138 } 139 } 140 141 static inline void vext_set_elem_mask(void *v0, int index, 142 uint8_t value) 143 { 144 int idx = index / 64; 145 int pos = index % 64; 146 uint64_t old = ((uint64_t *)v0)[idx]; 147 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 148 } 149 150 /* elements operations for load and store */ 151 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 152 uint32_t idx, void *vd, uintptr_t retaddr); 153 154 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 155 static void NAME(CPURISCVState *env, abi_ptr addr, \ 156 uint32_t idx, void *vd, uintptr_t retaddr)\ 157 { \ 158 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 159 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 160 } \ 161 162 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 163 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 164 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 165 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 166 167 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 168 static void NAME(CPURISCVState *env, abi_ptr addr, \ 169 uint32_t idx, void *vd, uintptr_t retaddr)\ 170 { \ 171 ETYPE data = *((ETYPE *)vd + H(idx)); \ 172 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 173 } 174 175 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 176 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 177 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 178 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 179 180 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 181 uint32_t desc, uint32_t nf, 182 uint32_t esz, uint32_t max_elems) 183 { 184 uint32_t vta = vext_vta(desc); 185 int k; 186 187 if (vta == 0) { 188 return; 189 } 190 191 for (k = 0; k < nf; ++k) { 192 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 193 (k * max_elems + max_elems) * esz); 194 } 195 } 196 197 /* 198 * stride: access vector element from strided memory 199 */ 200 static void 201 vext_ldst_stride(void *vd, void *v0, target_ulong base, 202 target_ulong stride, CPURISCVState *env, 203 uint32_t desc, uint32_t vm, 204 vext_ldst_elem_fn *ldst_elem, 205 uint32_t log2_esz, uintptr_t ra) 206 { 207 uint32_t i, k; 208 uint32_t nf = vext_nf(desc); 209 uint32_t max_elems = vext_max_elems(desc, log2_esz); 210 uint32_t esz = 1 << log2_esz; 211 uint32_t vma = vext_vma(desc); 212 213 VSTART_CHECK_EARLY_EXIT(env); 214 215 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 216 k = 0; 217 while (k < nf) { 218 if (!vm && !vext_elem_mask(v0, i)) { 219 /* set masked-off elements to 1s */ 220 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 221 (i + k * max_elems + 1) * esz); 222 k++; 223 continue; 224 } 225 target_ulong addr = base + stride * i + (k << log2_esz); 226 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 227 k++; 228 } 229 } 230 env->vstart = 0; 231 232 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 233 } 234 235 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 236 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 237 target_ulong stride, CPURISCVState *env, \ 238 uint32_t desc) \ 239 { \ 240 uint32_t vm = vext_vm(desc); \ 241 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 242 ctzl(sizeof(ETYPE)), GETPC()); \ 243 } 244 245 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 246 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 247 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 248 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 249 250 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 251 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 252 target_ulong stride, CPURISCVState *env, \ 253 uint32_t desc) \ 254 { \ 255 uint32_t vm = vext_vm(desc); \ 256 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 257 ctzl(sizeof(ETYPE)), GETPC()); \ 258 } 259 260 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 261 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 262 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 263 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 264 265 /* 266 * unit-stride: access elements stored contiguously in memory 267 */ 268 269 /* unmasked unit-stride load and store operation */ 270 static void 271 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 272 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 273 uintptr_t ra) 274 { 275 uint32_t i, k; 276 uint32_t nf = vext_nf(desc); 277 uint32_t max_elems = vext_max_elems(desc, log2_esz); 278 uint32_t esz = 1 << log2_esz; 279 280 VSTART_CHECK_EARLY_EXIT(env); 281 282 /* load bytes from guest memory */ 283 for (i = env->vstart; i < evl; env->vstart = ++i) { 284 k = 0; 285 while (k < nf) { 286 target_ulong addr = base + ((i * nf + k) << log2_esz); 287 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 288 k++; 289 } 290 } 291 env->vstart = 0; 292 293 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 294 } 295 296 /* 297 * masked unit-stride load and store operation will be a special case of 298 * stride, stride = NF * sizeof (ETYPE) 299 */ 300 301 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 302 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 303 CPURISCVState *env, uint32_t desc) \ 304 { \ 305 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 306 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 307 ctzl(sizeof(ETYPE)), GETPC()); \ 308 } \ 309 \ 310 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 311 CPURISCVState *env, uint32_t desc) \ 312 { \ 313 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 314 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 315 } 316 317 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 318 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 319 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 320 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 321 322 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 323 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 324 CPURISCVState *env, uint32_t desc) \ 325 { \ 326 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 327 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 328 ctzl(sizeof(ETYPE)), GETPC()); \ 329 } \ 330 \ 331 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 332 CPURISCVState *env, uint32_t desc) \ 333 { \ 334 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 335 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 336 } 337 338 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 339 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 340 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 341 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 342 343 /* 344 * unit stride mask load and store, EEW = 1 345 */ 346 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 347 CPURISCVState *env, uint32_t desc) 348 { 349 /* evl = ceil(vl/8) */ 350 uint8_t evl = (env->vl + 7) >> 3; 351 vext_ldst_us(vd, base, env, desc, lde_b, 352 0, evl, GETPC()); 353 } 354 355 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 356 CPURISCVState *env, uint32_t desc) 357 { 358 /* evl = ceil(vl/8) */ 359 uint8_t evl = (env->vl + 7) >> 3; 360 vext_ldst_us(vd, base, env, desc, ste_b, 361 0, evl, GETPC()); 362 } 363 364 /* 365 * index: access vector element from indexed memory 366 */ 367 typedef target_ulong vext_get_index_addr(target_ulong base, 368 uint32_t idx, void *vs2); 369 370 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 371 static target_ulong NAME(target_ulong base, \ 372 uint32_t idx, void *vs2) \ 373 { \ 374 return (base + *((ETYPE *)vs2 + H(idx))); \ 375 } 376 377 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 378 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 379 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 380 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 381 382 static inline void 383 vext_ldst_index(void *vd, void *v0, target_ulong base, 384 void *vs2, CPURISCVState *env, uint32_t desc, 385 vext_get_index_addr get_index_addr, 386 vext_ldst_elem_fn *ldst_elem, 387 uint32_t log2_esz, uintptr_t ra) 388 { 389 uint32_t i, k; 390 uint32_t nf = vext_nf(desc); 391 uint32_t vm = vext_vm(desc); 392 uint32_t max_elems = vext_max_elems(desc, log2_esz); 393 uint32_t esz = 1 << log2_esz; 394 uint32_t vma = vext_vma(desc); 395 396 VSTART_CHECK_EARLY_EXIT(env); 397 398 /* load bytes from guest memory */ 399 for (i = env->vstart; i < env->vl; env->vstart = ++i) { 400 k = 0; 401 while (k < nf) { 402 if (!vm && !vext_elem_mask(v0, i)) { 403 /* set masked-off elements to 1s */ 404 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 405 (i + k * max_elems + 1) * esz); 406 k++; 407 continue; 408 } 409 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 410 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 411 k++; 412 } 413 } 414 env->vstart = 0; 415 416 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 417 } 418 419 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 420 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 421 void *vs2, CPURISCVState *env, uint32_t desc) \ 422 { \ 423 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 424 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 425 } 426 427 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 428 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 429 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 430 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 431 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 432 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 433 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 434 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 435 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 436 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 437 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 438 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 439 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 440 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 441 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 442 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 443 444 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 445 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 446 void *vs2, CPURISCVState *env, uint32_t desc) \ 447 { \ 448 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 449 STORE_FN, ctzl(sizeof(ETYPE)), \ 450 GETPC()); \ 451 } 452 453 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 454 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 455 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 456 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 457 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 458 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 459 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 460 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 461 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 462 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 463 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 464 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 465 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 466 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 467 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 468 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 469 470 /* 471 * unit-stride fault-only-fisrt load instructions 472 */ 473 static inline void 474 vext_ldff(void *vd, void *v0, target_ulong base, 475 CPURISCVState *env, uint32_t desc, 476 vext_ldst_elem_fn *ldst_elem, 477 uint32_t log2_esz, uintptr_t ra) 478 { 479 uint32_t i, k, vl = 0; 480 uint32_t nf = vext_nf(desc); 481 uint32_t vm = vext_vm(desc); 482 uint32_t max_elems = vext_max_elems(desc, log2_esz); 483 uint32_t esz = 1 << log2_esz; 484 uint32_t vma = vext_vma(desc); 485 target_ulong addr, offset, remain; 486 int mmu_index = riscv_env_mmu_index(env, false); 487 488 VSTART_CHECK_EARLY_EXIT(env); 489 490 /* probe every access */ 491 for (i = env->vstart; i < env->vl; i++) { 492 if (!vm && !vext_elem_mask(v0, i)) { 493 continue; 494 } 495 addr = adjust_addr(env, base + i * (nf << log2_esz)); 496 if (i == 0) { 497 /* Allow fault on first element. */ 498 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 499 } else { 500 remain = nf << log2_esz; 501 while (remain > 0) { 502 void *host; 503 int flags; 504 505 offset = -(addr | TARGET_PAGE_MASK); 506 507 /* Probe nonfault on subsequent elements. */ 508 flags = probe_access_flags(env, addr, offset, MMU_DATA_LOAD, 509 mmu_index, true, &host, 0); 510 511 /* 512 * Stop if invalid (unmapped) or mmio (transaction may fail). 513 * Do not stop if watchpoint, as the spec says that 514 * first-fault should continue to access the same 515 * elements regardless of any watchpoint. 516 */ 517 if (flags & ~TLB_WATCHPOINT) { 518 vl = i; 519 goto ProbeSuccess; 520 } 521 if (remain <= offset) { 522 break; 523 } 524 remain -= offset; 525 addr = adjust_addr(env, addr + offset); 526 } 527 } 528 } 529 ProbeSuccess: 530 /* load bytes from guest memory */ 531 if (vl != 0) { 532 env->vl = vl; 533 } 534 for (i = env->vstart; i < env->vl; i++) { 535 k = 0; 536 while (k < nf) { 537 if (!vm && !vext_elem_mask(v0, i)) { 538 /* set masked-off elements to 1s */ 539 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 540 (i + k * max_elems + 1) * esz); 541 k++; 542 continue; 543 } 544 addr = base + ((i * nf + k) << log2_esz); 545 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 546 k++; 547 } 548 } 549 env->vstart = 0; 550 551 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 552 } 553 554 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 555 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 556 CPURISCVState *env, uint32_t desc) \ 557 { \ 558 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 559 ctzl(sizeof(ETYPE)), GETPC()); \ 560 } 561 562 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 563 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 564 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 565 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 566 567 #define DO_SWAP(N, M) (M) 568 #define DO_AND(N, M) (N & M) 569 #define DO_XOR(N, M) (N ^ M) 570 #define DO_OR(N, M) (N | M) 571 #define DO_ADD(N, M) (N + M) 572 573 /* Signed min/max */ 574 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 575 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 576 577 /* 578 * load and store whole register instructions 579 */ 580 static void 581 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 582 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 583 { 584 uint32_t i, k, off, pos; 585 uint32_t nf = vext_nf(desc); 586 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 587 uint32_t max_elems = vlenb >> log2_esz; 588 589 if (env->vstart >= ((vlenb * nf) >> log2_esz)) { 590 env->vstart = 0; 591 return; 592 } 593 594 k = env->vstart / max_elems; 595 off = env->vstart % max_elems; 596 597 if (off) { 598 /* load/store rest of elements of current segment pointed by vstart */ 599 for (pos = off; pos < max_elems; pos++, env->vstart++) { 600 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 601 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 602 ra); 603 } 604 k++; 605 } 606 607 /* load/store elements for rest of segments */ 608 for (; k < nf; k++) { 609 for (i = 0; i < max_elems; i++, env->vstart++) { 610 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 611 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 612 } 613 } 614 615 env->vstart = 0; 616 } 617 618 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 619 void HELPER(NAME)(void *vd, target_ulong base, \ 620 CPURISCVState *env, uint32_t desc) \ 621 { \ 622 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 623 ctzl(sizeof(ETYPE)), GETPC()); \ 624 } 625 626 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 627 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 628 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 629 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 630 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 631 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 632 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 633 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 634 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 635 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 636 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 637 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 638 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 639 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 640 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 641 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 642 643 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 644 void HELPER(NAME)(void *vd, target_ulong base, \ 645 CPURISCVState *env, uint32_t desc) \ 646 { \ 647 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 648 ctzl(sizeof(ETYPE)), GETPC()); \ 649 } 650 651 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 652 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 653 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 654 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 655 656 /* 657 * Vector Integer Arithmetic Instructions 658 */ 659 660 /* (TD, T1, T2, TX1, TX2) */ 661 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 662 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 663 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 664 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 665 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 666 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 667 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 668 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 669 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 670 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 671 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 672 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 673 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 674 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 675 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 676 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 677 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 678 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 679 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 680 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 681 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 682 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 683 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 684 685 #define DO_SUB(N, M) (N - M) 686 #define DO_RSUB(N, M) (M - N) 687 688 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 689 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 690 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 691 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 692 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 693 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 694 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 695 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 696 697 GEN_VEXT_VV(vadd_vv_b, 1) 698 GEN_VEXT_VV(vadd_vv_h, 2) 699 GEN_VEXT_VV(vadd_vv_w, 4) 700 GEN_VEXT_VV(vadd_vv_d, 8) 701 GEN_VEXT_VV(vsub_vv_b, 1) 702 GEN_VEXT_VV(vsub_vv_h, 2) 703 GEN_VEXT_VV(vsub_vv_w, 4) 704 GEN_VEXT_VV(vsub_vv_d, 8) 705 706 707 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 708 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 709 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 710 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 711 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 712 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 713 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 714 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 715 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 716 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 717 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 718 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 719 720 GEN_VEXT_VX(vadd_vx_b, 1) 721 GEN_VEXT_VX(vadd_vx_h, 2) 722 GEN_VEXT_VX(vadd_vx_w, 4) 723 GEN_VEXT_VX(vadd_vx_d, 8) 724 GEN_VEXT_VX(vsub_vx_b, 1) 725 GEN_VEXT_VX(vsub_vx_h, 2) 726 GEN_VEXT_VX(vsub_vx_w, 4) 727 GEN_VEXT_VX(vsub_vx_d, 8) 728 GEN_VEXT_VX(vrsub_vx_b, 1) 729 GEN_VEXT_VX(vrsub_vx_h, 2) 730 GEN_VEXT_VX(vrsub_vx_w, 4) 731 GEN_VEXT_VX(vrsub_vx_d, 8) 732 733 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 734 { 735 intptr_t oprsz = simd_oprsz(desc); 736 intptr_t i; 737 738 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 739 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 740 } 741 } 742 743 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 744 { 745 intptr_t oprsz = simd_oprsz(desc); 746 intptr_t i; 747 748 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 749 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 750 } 751 } 752 753 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 754 { 755 intptr_t oprsz = simd_oprsz(desc); 756 intptr_t i; 757 758 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 759 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 760 } 761 } 762 763 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 764 { 765 intptr_t oprsz = simd_oprsz(desc); 766 intptr_t i; 767 768 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 769 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 770 } 771 } 772 773 /* Vector Widening Integer Add/Subtract */ 774 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 775 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 776 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 777 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 778 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 779 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 780 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 781 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 782 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 783 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 784 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 785 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 786 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 787 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 788 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 789 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 790 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 791 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 792 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 793 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 794 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 795 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 796 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 797 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 798 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 799 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 800 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 801 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 802 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 803 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 804 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 805 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 806 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 807 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 808 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 809 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 810 GEN_VEXT_VV(vwaddu_vv_b, 2) 811 GEN_VEXT_VV(vwaddu_vv_h, 4) 812 GEN_VEXT_VV(vwaddu_vv_w, 8) 813 GEN_VEXT_VV(vwsubu_vv_b, 2) 814 GEN_VEXT_VV(vwsubu_vv_h, 4) 815 GEN_VEXT_VV(vwsubu_vv_w, 8) 816 GEN_VEXT_VV(vwadd_vv_b, 2) 817 GEN_VEXT_VV(vwadd_vv_h, 4) 818 GEN_VEXT_VV(vwadd_vv_w, 8) 819 GEN_VEXT_VV(vwsub_vv_b, 2) 820 GEN_VEXT_VV(vwsub_vv_h, 4) 821 GEN_VEXT_VV(vwsub_vv_w, 8) 822 GEN_VEXT_VV(vwaddu_wv_b, 2) 823 GEN_VEXT_VV(vwaddu_wv_h, 4) 824 GEN_VEXT_VV(vwaddu_wv_w, 8) 825 GEN_VEXT_VV(vwsubu_wv_b, 2) 826 GEN_VEXT_VV(vwsubu_wv_h, 4) 827 GEN_VEXT_VV(vwsubu_wv_w, 8) 828 GEN_VEXT_VV(vwadd_wv_b, 2) 829 GEN_VEXT_VV(vwadd_wv_h, 4) 830 GEN_VEXT_VV(vwadd_wv_w, 8) 831 GEN_VEXT_VV(vwsub_wv_b, 2) 832 GEN_VEXT_VV(vwsub_wv_h, 4) 833 GEN_VEXT_VV(vwsub_wv_w, 8) 834 835 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 836 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 837 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 838 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 839 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 840 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 841 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 842 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 843 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 844 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 845 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 846 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 847 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 848 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 849 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 850 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 851 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 852 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 853 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 854 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 855 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 856 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 857 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 858 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 859 GEN_VEXT_VX(vwaddu_vx_b, 2) 860 GEN_VEXT_VX(vwaddu_vx_h, 4) 861 GEN_VEXT_VX(vwaddu_vx_w, 8) 862 GEN_VEXT_VX(vwsubu_vx_b, 2) 863 GEN_VEXT_VX(vwsubu_vx_h, 4) 864 GEN_VEXT_VX(vwsubu_vx_w, 8) 865 GEN_VEXT_VX(vwadd_vx_b, 2) 866 GEN_VEXT_VX(vwadd_vx_h, 4) 867 GEN_VEXT_VX(vwadd_vx_w, 8) 868 GEN_VEXT_VX(vwsub_vx_b, 2) 869 GEN_VEXT_VX(vwsub_vx_h, 4) 870 GEN_VEXT_VX(vwsub_vx_w, 8) 871 GEN_VEXT_VX(vwaddu_wx_b, 2) 872 GEN_VEXT_VX(vwaddu_wx_h, 4) 873 GEN_VEXT_VX(vwaddu_wx_w, 8) 874 GEN_VEXT_VX(vwsubu_wx_b, 2) 875 GEN_VEXT_VX(vwsubu_wx_h, 4) 876 GEN_VEXT_VX(vwsubu_wx_w, 8) 877 GEN_VEXT_VX(vwadd_wx_b, 2) 878 GEN_VEXT_VX(vwadd_wx_h, 4) 879 GEN_VEXT_VX(vwadd_wx_w, 8) 880 GEN_VEXT_VX(vwsub_wx_b, 2) 881 GEN_VEXT_VX(vwsub_wx_h, 4) 882 GEN_VEXT_VX(vwsub_wx_w, 8) 883 884 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 885 #define DO_VADC(N, M, C) (N + M + C) 886 #define DO_VSBC(N, M, C) (N - M - C) 887 888 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 889 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 890 CPURISCVState *env, uint32_t desc) \ 891 { \ 892 uint32_t vl = env->vl; \ 893 uint32_t esz = sizeof(ETYPE); \ 894 uint32_t total_elems = \ 895 vext_get_total_elems(env, desc, esz); \ 896 uint32_t vta = vext_vta(desc); \ 897 uint32_t i; \ 898 \ 899 VSTART_CHECK_EARLY_EXIT(env); \ 900 \ 901 for (i = env->vstart; i < vl; i++) { \ 902 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 903 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 904 ETYPE carry = vext_elem_mask(v0, i); \ 905 \ 906 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 907 } \ 908 env->vstart = 0; \ 909 /* set tail elements to 1s */ \ 910 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 911 } 912 913 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 914 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 915 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 916 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 917 918 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 919 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 920 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 921 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 922 923 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 924 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 925 CPURISCVState *env, uint32_t desc) \ 926 { \ 927 uint32_t vl = env->vl; \ 928 uint32_t esz = sizeof(ETYPE); \ 929 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 930 uint32_t vta = vext_vta(desc); \ 931 uint32_t i; \ 932 \ 933 VSTART_CHECK_EARLY_EXIT(env); \ 934 \ 935 for (i = env->vstart; i < vl; i++) { \ 936 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 937 ETYPE carry = vext_elem_mask(v0, i); \ 938 \ 939 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 940 } \ 941 env->vstart = 0; \ 942 /* set tail elements to 1s */ \ 943 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 944 } 945 946 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 947 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 948 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 949 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 950 951 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 952 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 953 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 954 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 955 956 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 957 (__typeof(N))(N + M) < N) 958 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 959 960 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 962 CPURISCVState *env, uint32_t desc) \ 963 { \ 964 uint32_t vl = env->vl; \ 965 uint32_t vm = vext_vm(desc); \ 966 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 967 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 968 uint32_t i; \ 969 \ 970 VSTART_CHECK_EARLY_EXIT(env); \ 971 \ 972 for (i = env->vstart; i < vl; i++) { \ 973 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 974 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 975 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 976 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 977 } \ 978 env->vstart = 0; \ 979 /* 980 * mask destination register are always tail-agnostic 981 * set tail elements to 1s 982 */ \ 983 if (vta_all_1s) { \ 984 for (; i < total_elems; i++) { \ 985 vext_set_elem_mask(vd, i, 1); \ 986 } \ 987 } \ 988 } 989 990 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 991 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 992 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 993 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 994 995 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 996 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 997 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 998 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 999 1000 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1001 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1002 void *vs2, CPURISCVState *env, uint32_t desc) \ 1003 { \ 1004 uint32_t vl = env->vl; \ 1005 uint32_t vm = vext_vm(desc); \ 1006 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1007 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1008 uint32_t i; \ 1009 \ 1010 VSTART_CHECK_EARLY_EXIT(env); \ 1011 \ 1012 for (i = env->vstart; i < vl; i++) { \ 1013 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1014 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 1015 vext_set_elem_mask(vd, i, \ 1016 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1017 } \ 1018 env->vstart = 0; \ 1019 /* 1020 * mask destination register are always tail-agnostic 1021 * set tail elements to 1s 1022 */ \ 1023 if (vta_all_1s) { \ 1024 for (; i < total_elems; i++) { \ 1025 vext_set_elem_mask(vd, i, 1); \ 1026 } \ 1027 } \ 1028 } 1029 1030 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1031 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1032 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1033 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1034 1035 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1036 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1037 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1038 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1039 1040 /* Vector Bitwise Logical Instructions */ 1041 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1042 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1043 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1044 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1045 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1046 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1047 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1048 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1049 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1050 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1051 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1052 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1053 GEN_VEXT_VV(vand_vv_b, 1) 1054 GEN_VEXT_VV(vand_vv_h, 2) 1055 GEN_VEXT_VV(vand_vv_w, 4) 1056 GEN_VEXT_VV(vand_vv_d, 8) 1057 GEN_VEXT_VV(vor_vv_b, 1) 1058 GEN_VEXT_VV(vor_vv_h, 2) 1059 GEN_VEXT_VV(vor_vv_w, 4) 1060 GEN_VEXT_VV(vor_vv_d, 8) 1061 GEN_VEXT_VV(vxor_vv_b, 1) 1062 GEN_VEXT_VV(vxor_vv_h, 2) 1063 GEN_VEXT_VV(vxor_vv_w, 4) 1064 GEN_VEXT_VV(vxor_vv_d, 8) 1065 1066 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1067 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1068 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1069 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1070 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1071 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1072 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1073 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1074 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1075 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1076 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1077 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1078 GEN_VEXT_VX(vand_vx_b, 1) 1079 GEN_VEXT_VX(vand_vx_h, 2) 1080 GEN_VEXT_VX(vand_vx_w, 4) 1081 GEN_VEXT_VX(vand_vx_d, 8) 1082 GEN_VEXT_VX(vor_vx_b, 1) 1083 GEN_VEXT_VX(vor_vx_h, 2) 1084 GEN_VEXT_VX(vor_vx_w, 4) 1085 GEN_VEXT_VX(vor_vx_d, 8) 1086 GEN_VEXT_VX(vxor_vx_b, 1) 1087 GEN_VEXT_VX(vxor_vx_h, 2) 1088 GEN_VEXT_VX(vxor_vx_w, 4) 1089 GEN_VEXT_VX(vxor_vx_d, 8) 1090 1091 /* Vector Single-Width Bit Shift Instructions */ 1092 #define DO_SLL(N, M) (N << (M)) 1093 #define DO_SRL(N, M) (N >> (M)) 1094 1095 /* generate the helpers for shift instructions with two vector operators */ 1096 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1097 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1098 void *vs2, CPURISCVState *env, uint32_t desc) \ 1099 { \ 1100 uint32_t vm = vext_vm(desc); \ 1101 uint32_t vl = env->vl; \ 1102 uint32_t esz = sizeof(TS1); \ 1103 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1104 uint32_t vta = vext_vta(desc); \ 1105 uint32_t vma = vext_vma(desc); \ 1106 uint32_t i; \ 1107 \ 1108 VSTART_CHECK_EARLY_EXIT(env); \ 1109 \ 1110 for (i = env->vstart; i < vl; i++) { \ 1111 if (!vm && !vext_elem_mask(v0, i)) { \ 1112 /* set masked-off elements to 1s */ \ 1113 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1114 continue; \ 1115 } \ 1116 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1117 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1118 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1119 } \ 1120 env->vstart = 0; \ 1121 /* set tail elements to 1s */ \ 1122 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1123 } 1124 1125 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1126 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1127 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1128 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1129 1130 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1131 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1132 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1133 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1134 1135 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1136 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1137 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1138 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1139 1140 /* 1141 * generate the helpers for shift instructions with one vector and one scalar 1142 */ 1143 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1144 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1145 void *vs2, CPURISCVState *env, \ 1146 uint32_t desc) \ 1147 { \ 1148 uint32_t vm = vext_vm(desc); \ 1149 uint32_t vl = env->vl; \ 1150 uint32_t esz = sizeof(TD); \ 1151 uint32_t total_elems = \ 1152 vext_get_total_elems(env, desc, esz); \ 1153 uint32_t vta = vext_vta(desc); \ 1154 uint32_t vma = vext_vma(desc); \ 1155 uint32_t i; \ 1156 \ 1157 VSTART_CHECK_EARLY_EXIT(env); \ 1158 \ 1159 for (i = env->vstart; i < vl; i++) { \ 1160 if (!vm && !vext_elem_mask(v0, i)) { \ 1161 /* set masked-off elements to 1s */ \ 1162 vext_set_elems_1s(vd, vma, i * esz, \ 1163 (i + 1) * esz); \ 1164 continue; \ 1165 } \ 1166 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1167 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1168 } \ 1169 env->vstart = 0; \ 1170 /* set tail elements to 1s */ \ 1171 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1172 } 1173 1174 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1175 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1176 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1177 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1178 1179 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1180 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1181 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1182 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1183 1184 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1185 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1186 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1187 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1188 1189 /* Vector Narrowing Integer Right Shift Instructions */ 1190 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1191 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1192 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1193 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1194 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1195 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1196 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1197 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1198 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1199 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1200 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1201 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1202 1203 /* Vector Integer Comparison Instructions */ 1204 #define DO_MSEQ(N, M) (N == M) 1205 #define DO_MSNE(N, M) (N != M) 1206 #define DO_MSLT(N, M) (N < M) 1207 #define DO_MSLE(N, M) (N <= M) 1208 #define DO_MSGT(N, M) (N > M) 1209 1210 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1211 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1212 CPURISCVState *env, uint32_t desc) \ 1213 { \ 1214 uint32_t vm = vext_vm(desc); \ 1215 uint32_t vl = env->vl; \ 1216 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1217 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1218 uint32_t vma = vext_vma(desc); \ 1219 uint32_t i; \ 1220 \ 1221 VSTART_CHECK_EARLY_EXIT(env); \ 1222 \ 1223 for (i = env->vstart; i < vl; i++) { \ 1224 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1225 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1226 if (!vm && !vext_elem_mask(v0, i)) { \ 1227 /* set masked-off elements to 1s */ \ 1228 if (vma) { \ 1229 vext_set_elem_mask(vd, i, 1); \ 1230 } \ 1231 continue; \ 1232 } \ 1233 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1234 } \ 1235 env->vstart = 0; \ 1236 /* 1237 * mask destination register are always tail-agnostic 1238 * set tail elements to 1s 1239 */ \ 1240 if (vta_all_1s) { \ 1241 for (; i < total_elems; i++) { \ 1242 vext_set_elem_mask(vd, i, 1); \ 1243 } \ 1244 } \ 1245 } 1246 1247 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1248 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1249 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1250 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1251 1252 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1253 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1254 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1255 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1256 1257 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1258 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1259 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1260 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1261 1262 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1263 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1264 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1265 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1266 1267 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1268 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1269 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1270 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1271 1272 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1273 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1274 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1275 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1276 1277 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1278 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1279 CPURISCVState *env, uint32_t desc) \ 1280 { \ 1281 uint32_t vm = vext_vm(desc); \ 1282 uint32_t vl = env->vl; \ 1283 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1284 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1285 uint32_t vma = vext_vma(desc); \ 1286 uint32_t i; \ 1287 \ 1288 VSTART_CHECK_EARLY_EXIT(env); \ 1289 \ 1290 for (i = env->vstart; i < vl; i++) { \ 1291 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1292 if (!vm && !vext_elem_mask(v0, i)) { \ 1293 /* set masked-off elements to 1s */ \ 1294 if (vma) { \ 1295 vext_set_elem_mask(vd, i, 1); \ 1296 } \ 1297 continue; \ 1298 } \ 1299 vext_set_elem_mask(vd, i, \ 1300 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1301 } \ 1302 env->vstart = 0; \ 1303 /* 1304 * mask destination register are always tail-agnostic 1305 * set tail elements to 1s 1306 */ \ 1307 if (vta_all_1s) { \ 1308 for (; i < total_elems; i++) { \ 1309 vext_set_elem_mask(vd, i, 1); \ 1310 } \ 1311 } \ 1312 } 1313 1314 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1315 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1316 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1317 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1318 1319 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1320 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1321 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1322 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1323 1324 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1325 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1326 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1327 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1328 1329 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1330 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1331 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1332 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1333 1334 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1335 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1336 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1337 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1338 1339 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1340 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1341 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1342 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1343 1344 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1345 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1346 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1347 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1348 1349 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1350 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1351 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1352 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1353 1354 /* Vector Integer Min/Max Instructions */ 1355 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1356 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1357 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1358 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1359 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1360 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1361 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1362 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1363 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1364 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1365 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1366 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1367 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1368 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1369 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1370 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1371 GEN_VEXT_VV(vminu_vv_b, 1) 1372 GEN_VEXT_VV(vminu_vv_h, 2) 1373 GEN_VEXT_VV(vminu_vv_w, 4) 1374 GEN_VEXT_VV(vminu_vv_d, 8) 1375 GEN_VEXT_VV(vmin_vv_b, 1) 1376 GEN_VEXT_VV(vmin_vv_h, 2) 1377 GEN_VEXT_VV(vmin_vv_w, 4) 1378 GEN_VEXT_VV(vmin_vv_d, 8) 1379 GEN_VEXT_VV(vmaxu_vv_b, 1) 1380 GEN_VEXT_VV(vmaxu_vv_h, 2) 1381 GEN_VEXT_VV(vmaxu_vv_w, 4) 1382 GEN_VEXT_VV(vmaxu_vv_d, 8) 1383 GEN_VEXT_VV(vmax_vv_b, 1) 1384 GEN_VEXT_VV(vmax_vv_h, 2) 1385 GEN_VEXT_VV(vmax_vv_w, 4) 1386 GEN_VEXT_VV(vmax_vv_d, 8) 1387 1388 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1389 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1390 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1391 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1392 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1393 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1394 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1395 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1396 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1397 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1398 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1399 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1400 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1401 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1402 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1403 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1404 GEN_VEXT_VX(vminu_vx_b, 1) 1405 GEN_VEXT_VX(vminu_vx_h, 2) 1406 GEN_VEXT_VX(vminu_vx_w, 4) 1407 GEN_VEXT_VX(vminu_vx_d, 8) 1408 GEN_VEXT_VX(vmin_vx_b, 1) 1409 GEN_VEXT_VX(vmin_vx_h, 2) 1410 GEN_VEXT_VX(vmin_vx_w, 4) 1411 GEN_VEXT_VX(vmin_vx_d, 8) 1412 GEN_VEXT_VX(vmaxu_vx_b, 1) 1413 GEN_VEXT_VX(vmaxu_vx_h, 2) 1414 GEN_VEXT_VX(vmaxu_vx_w, 4) 1415 GEN_VEXT_VX(vmaxu_vx_d, 8) 1416 GEN_VEXT_VX(vmax_vx_b, 1) 1417 GEN_VEXT_VX(vmax_vx_h, 2) 1418 GEN_VEXT_VX(vmax_vx_w, 4) 1419 GEN_VEXT_VX(vmax_vx_d, 8) 1420 1421 /* Vector Single-Width Integer Multiply Instructions */ 1422 #define DO_MUL(N, M) (N * M) 1423 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1424 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1425 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1426 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1427 GEN_VEXT_VV(vmul_vv_b, 1) 1428 GEN_VEXT_VV(vmul_vv_h, 2) 1429 GEN_VEXT_VV(vmul_vv_w, 4) 1430 GEN_VEXT_VV(vmul_vv_d, 8) 1431 1432 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1433 { 1434 return (int16_t)s2 * (int16_t)s1 >> 8; 1435 } 1436 1437 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1438 { 1439 return (int32_t)s2 * (int32_t)s1 >> 16; 1440 } 1441 1442 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1443 { 1444 return (int64_t)s2 * (int64_t)s1 >> 32; 1445 } 1446 1447 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1448 { 1449 uint64_t hi_64, lo_64; 1450 1451 muls64(&lo_64, &hi_64, s1, s2); 1452 return hi_64; 1453 } 1454 1455 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1456 { 1457 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1458 } 1459 1460 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1461 { 1462 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1463 } 1464 1465 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1466 { 1467 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1468 } 1469 1470 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1471 { 1472 uint64_t hi_64, lo_64; 1473 1474 mulu64(&lo_64, &hi_64, s2, s1); 1475 return hi_64; 1476 } 1477 1478 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1479 { 1480 return (int16_t)s2 * (uint16_t)s1 >> 8; 1481 } 1482 1483 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1484 { 1485 return (int32_t)s2 * (uint32_t)s1 >> 16; 1486 } 1487 1488 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1489 { 1490 return (int64_t)s2 * (uint64_t)s1 >> 32; 1491 } 1492 1493 /* 1494 * Let A = signed operand, 1495 * B = unsigned operand 1496 * P = mulu64(A, B), unsigned product 1497 * 1498 * LET X = 2 ** 64 - A, 2's complement of A 1499 * SP = signed product 1500 * THEN 1501 * IF A < 0 1502 * SP = -X * B 1503 * = -(2 ** 64 - A) * B 1504 * = A * B - 2 ** 64 * B 1505 * = P - 2 ** 64 * B 1506 * ELSE 1507 * SP = P 1508 * THEN 1509 * HI_P -= (A < 0 ? B : 0) 1510 */ 1511 1512 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1513 { 1514 uint64_t hi_64, lo_64; 1515 1516 mulu64(&lo_64, &hi_64, s2, s1); 1517 1518 hi_64 -= s2 < 0 ? s1 : 0; 1519 return hi_64; 1520 } 1521 1522 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1523 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1524 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1525 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1526 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1527 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1528 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1529 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1530 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1531 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1532 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1533 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1534 GEN_VEXT_VV(vmulh_vv_b, 1) 1535 GEN_VEXT_VV(vmulh_vv_h, 2) 1536 GEN_VEXT_VV(vmulh_vv_w, 4) 1537 GEN_VEXT_VV(vmulh_vv_d, 8) 1538 GEN_VEXT_VV(vmulhu_vv_b, 1) 1539 GEN_VEXT_VV(vmulhu_vv_h, 2) 1540 GEN_VEXT_VV(vmulhu_vv_w, 4) 1541 GEN_VEXT_VV(vmulhu_vv_d, 8) 1542 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1543 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1544 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1545 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1546 1547 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1548 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1549 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1550 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1551 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1552 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1553 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1554 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1555 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1556 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1557 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1558 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1559 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1560 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1561 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1562 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1563 GEN_VEXT_VX(vmul_vx_b, 1) 1564 GEN_VEXT_VX(vmul_vx_h, 2) 1565 GEN_VEXT_VX(vmul_vx_w, 4) 1566 GEN_VEXT_VX(vmul_vx_d, 8) 1567 GEN_VEXT_VX(vmulh_vx_b, 1) 1568 GEN_VEXT_VX(vmulh_vx_h, 2) 1569 GEN_VEXT_VX(vmulh_vx_w, 4) 1570 GEN_VEXT_VX(vmulh_vx_d, 8) 1571 GEN_VEXT_VX(vmulhu_vx_b, 1) 1572 GEN_VEXT_VX(vmulhu_vx_h, 2) 1573 GEN_VEXT_VX(vmulhu_vx_w, 4) 1574 GEN_VEXT_VX(vmulhu_vx_d, 8) 1575 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1576 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1577 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1578 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1579 1580 /* Vector Integer Divide Instructions */ 1581 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1582 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1583 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1584 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1585 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1586 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1587 1588 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1589 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1590 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1591 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1592 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1593 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1594 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1595 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1596 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1597 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1598 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1599 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1600 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1601 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1602 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1603 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1604 GEN_VEXT_VV(vdivu_vv_b, 1) 1605 GEN_VEXT_VV(vdivu_vv_h, 2) 1606 GEN_VEXT_VV(vdivu_vv_w, 4) 1607 GEN_VEXT_VV(vdivu_vv_d, 8) 1608 GEN_VEXT_VV(vdiv_vv_b, 1) 1609 GEN_VEXT_VV(vdiv_vv_h, 2) 1610 GEN_VEXT_VV(vdiv_vv_w, 4) 1611 GEN_VEXT_VV(vdiv_vv_d, 8) 1612 GEN_VEXT_VV(vremu_vv_b, 1) 1613 GEN_VEXT_VV(vremu_vv_h, 2) 1614 GEN_VEXT_VV(vremu_vv_w, 4) 1615 GEN_VEXT_VV(vremu_vv_d, 8) 1616 GEN_VEXT_VV(vrem_vv_b, 1) 1617 GEN_VEXT_VV(vrem_vv_h, 2) 1618 GEN_VEXT_VV(vrem_vv_w, 4) 1619 GEN_VEXT_VV(vrem_vv_d, 8) 1620 1621 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1622 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1623 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1624 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1625 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1626 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1627 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1628 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1629 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1630 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1631 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1632 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1633 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1634 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1635 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1636 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1637 GEN_VEXT_VX(vdivu_vx_b, 1) 1638 GEN_VEXT_VX(vdivu_vx_h, 2) 1639 GEN_VEXT_VX(vdivu_vx_w, 4) 1640 GEN_VEXT_VX(vdivu_vx_d, 8) 1641 GEN_VEXT_VX(vdiv_vx_b, 1) 1642 GEN_VEXT_VX(vdiv_vx_h, 2) 1643 GEN_VEXT_VX(vdiv_vx_w, 4) 1644 GEN_VEXT_VX(vdiv_vx_d, 8) 1645 GEN_VEXT_VX(vremu_vx_b, 1) 1646 GEN_VEXT_VX(vremu_vx_h, 2) 1647 GEN_VEXT_VX(vremu_vx_w, 4) 1648 GEN_VEXT_VX(vremu_vx_d, 8) 1649 GEN_VEXT_VX(vrem_vx_b, 1) 1650 GEN_VEXT_VX(vrem_vx_h, 2) 1651 GEN_VEXT_VX(vrem_vx_w, 4) 1652 GEN_VEXT_VX(vrem_vx_d, 8) 1653 1654 /* Vector Widening Integer Multiply Instructions */ 1655 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1656 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1657 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1658 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1659 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1660 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1661 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1662 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1663 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1664 GEN_VEXT_VV(vwmul_vv_b, 2) 1665 GEN_VEXT_VV(vwmul_vv_h, 4) 1666 GEN_VEXT_VV(vwmul_vv_w, 8) 1667 GEN_VEXT_VV(vwmulu_vv_b, 2) 1668 GEN_VEXT_VV(vwmulu_vv_h, 4) 1669 GEN_VEXT_VV(vwmulu_vv_w, 8) 1670 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1671 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1672 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1673 1674 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1675 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1676 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1677 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1678 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1679 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1680 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1681 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1682 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1683 GEN_VEXT_VX(vwmul_vx_b, 2) 1684 GEN_VEXT_VX(vwmul_vx_h, 4) 1685 GEN_VEXT_VX(vwmul_vx_w, 8) 1686 GEN_VEXT_VX(vwmulu_vx_b, 2) 1687 GEN_VEXT_VX(vwmulu_vx_h, 4) 1688 GEN_VEXT_VX(vwmulu_vx_w, 8) 1689 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1690 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1691 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1692 1693 /* Vector Single-Width Integer Multiply-Add Instructions */ 1694 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1695 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1696 { \ 1697 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1698 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1699 TD d = *((TD *)vd + HD(i)); \ 1700 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1701 } 1702 1703 #define DO_MACC(N, M, D) (M * N + D) 1704 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1705 #define DO_MADD(N, M, D) (M * D + N) 1706 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1707 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1708 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1709 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1710 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1711 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1712 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1713 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1714 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1715 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1716 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1717 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1718 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1719 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1720 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1721 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1722 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1723 GEN_VEXT_VV(vmacc_vv_b, 1) 1724 GEN_VEXT_VV(vmacc_vv_h, 2) 1725 GEN_VEXT_VV(vmacc_vv_w, 4) 1726 GEN_VEXT_VV(vmacc_vv_d, 8) 1727 GEN_VEXT_VV(vnmsac_vv_b, 1) 1728 GEN_VEXT_VV(vnmsac_vv_h, 2) 1729 GEN_VEXT_VV(vnmsac_vv_w, 4) 1730 GEN_VEXT_VV(vnmsac_vv_d, 8) 1731 GEN_VEXT_VV(vmadd_vv_b, 1) 1732 GEN_VEXT_VV(vmadd_vv_h, 2) 1733 GEN_VEXT_VV(vmadd_vv_w, 4) 1734 GEN_VEXT_VV(vmadd_vv_d, 8) 1735 GEN_VEXT_VV(vnmsub_vv_b, 1) 1736 GEN_VEXT_VV(vnmsub_vv_h, 2) 1737 GEN_VEXT_VV(vnmsub_vv_w, 4) 1738 GEN_VEXT_VV(vnmsub_vv_d, 8) 1739 1740 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1741 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1742 { \ 1743 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1744 TD d = *((TD *)vd + HD(i)); \ 1745 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1746 } 1747 1748 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1749 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1750 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1751 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1752 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1753 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1754 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1755 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1756 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1757 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1758 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1759 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1760 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1761 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1762 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1763 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1764 GEN_VEXT_VX(vmacc_vx_b, 1) 1765 GEN_VEXT_VX(vmacc_vx_h, 2) 1766 GEN_VEXT_VX(vmacc_vx_w, 4) 1767 GEN_VEXT_VX(vmacc_vx_d, 8) 1768 GEN_VEXT_VX(vnmsac_vx_b, 1) 1769 GEN_VEXT_VX(vnmsac_vx_h, 2) 1770 GEN_VEXT_VX(vnmsac_vx_w, 4) 1771 GEN_VEXT_VX(vnmsac_vx_d, 8) 1772 GEN_VEXT_VX(vmadd_vx_b, 1) 1773 GEN_VEXT_VX(vmadd_vx_h, 2) 1774 GEN_VEXT_VX(vmadd_vx_w, 4) 1775 GEN_VEXT_VX(vmadd_vx_d, 8) 1776 GEN_VEXT_VX(vnmsub_vx_b, 1) 1777 GEN_VEXT_VX(vnmsub_vx_h, 2) 1778 GEN_VEXT_VX(vnmsub_vx_w, 4) 1779 GEN_VEXT_VX(vnmsub_vx_d, 8) 1780 1781 /* Vector Widening Integer Multiply-Add Instructions */ 1782 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1783 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1784 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1785 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1786 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1787 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1788 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1789 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1790 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1791 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1792 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1793 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1794 GEN_VEXT_VV(vwmacc_vv_b, 2) 1795 GEN_VEXT_VV(vwmacc_vv_h, 4) 1796 GEN_VEXT_VV(vwmacc_vv_w, 8) 1797 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1798 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1799 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1800 1801 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1802 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1803 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1804 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1805 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1806 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1807 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1808 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1809 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1810 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1811 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1812 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1813 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1814 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1815 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1816 GEN_VEXT_VX(vwmacc_vx_b, 2) 1817 GEN_VEXT_VX(vwmacc_vx_h, 4) 1818 GEN_VEXT_VX(vwmacc_vx_w, 8) 1819 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1820 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1821 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1822 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1823 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1824 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1825 1826 /* Vector Integer Merge and Move Instructions */ 1827 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1828 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1829 uint32_t desc) \ 1830 { \ 1831 uint32_t vl = env->vl; \ 1832 uint32_t esz = sizeof(ETYPE); \ 1833 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1834 uint32_t vta = vext_vta(desc); \ 1835 uint32_t i; \ 1836 \ 1837 VSTART_CHECK_EARLY_EXIT(env); \ 1838 \ 1839 for (i = env->vstart; i < vl; i++) { \ 1840 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1841 *((ETYPE *)vd + H(i)) = s1; \ 1842 } \ 1843 env->vstart = 0; \ 1844 /* set tail elements to 1s */ \ 1845 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1846 } 1847 1848 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1849 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1850 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1851 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1852 1853 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1854 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1855 uint32_t desc) \ 1856 { \ 1857 uint32_t vl = env->vl; \ 1858 uint32_t esz = sizeof(ETYPE); \ 1859 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1860 uint32_t vta = vext_vta(desc); \ 1861 uint32_t i; \ 1862 \ 1863 VSTART_CHECK_EARLY_EXIT(env); \ 1864 \ 1865 for (i = env->vstart; i < vl; i++) { \ 1866 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1867 } \ 1868 env->vstart = 0; \ 1869 /* set tail elements to 1s */ \ 1870 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1871 } 1872 1873 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1874 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1875 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1876 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1877 1878 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1879 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1880 CPURISCVState *env, uint32_t desc) \ 1881 { \ 1882 uint32_t vl = env->vl; \ 1883 uint32_t esz = sizeof(ETYPE); \ 1884 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1885 uint32_t vta = vext_vta(desc); \ 1886 uint32_t i; \ 1887 \ 1888 VSTART_CHECK_EARLY_EXIT(env); \ 1889 \ 1890 for (i = env->vstart; i < vl; i++) { \ 1891 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1892 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1893 } \ 1894 env->vstart = 0; \ 1895 /* set tail elements to 1s */ \ 1896 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1897 } 1898 1899 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1900 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1901 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1902 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1903 1904 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1905 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1906 void *vs2, CPURISCVState *env, uint32_t desc) \ 1907 { \ 1908 uint32_t vl = env->vl; \ 1909 uint32_t esz = sizeof(ETYPE); \ 1910 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1911 uint32_t vta = vext_vta(desc); \ 1912 uint32_t i; \ 1913 \ 1914 VSTART_CHECK_EARLY_EXIT(env); \ 1915 \ 1916 for (i = env->vstart; i < vl; i++) { \ 1917 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1918 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1919 (ETYPE)(target_long)s1); \ 1920 *((ETYPE *)vd + H(i)) = d; \ 1921 } \ 1922 env->vstart = 0; \ 1923 /* set tail elements to 1s */ \ 1924 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1925 } 1926 1927 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1928 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1929 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1930 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1931 1932 /* 1933 * Vector Fixed-Point Arithmetic Instructions 1934 */ 1935 1936 /* Vector Single-Width Saturating Add and Subtract */ 1937 1938 /* 1939 * As fixed point instructions probably have round mode and saturation, 1940 * define common macros for fixed point here. 1941 */ 1942 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1943 CPURISCVState *env, int vxrm); 1944 1945 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1946 static inline void \ 1947 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1948 CPURISCVState *env, int vxrm) \ 1949 { \ 1950 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1951 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1952 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1953 } 1954 1955 static inline void 1956 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1957 CPURISCVState *env, 1958 uint32_t vl, uint32_t vm, int vxrm, 1959 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 1960 { 1961 VSTART_CHECK_EARLY_EXIT(env); 1962 1963 for (uint32_t i = env->vstart; i < vl; i++) { 1964 if (!vm && !vext_elem_mask(v0, i)) { 1965 /* set masked-off elements to 1s */ 1966 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 1967 continue; 1968 } 1969 fn(vd, vs1, vs2, i, env, vxrm); 1970 } 1971 env->vstart = 0; 1972 } 1973 1974 static inline void 1975 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1976 CPURISCVState *env, 1977 uint32_t desc, 1978 opivv2_rm_fn *fn, uint32_t esz) 1979 { 1980 uint32_t vm = vext_vm(desc); 1981 uint32_t vl = env->vl; 1982 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 1983 uint32_t vta = vext_vta(desc); 1984 uint32_t vma = vext_vma(desc); 1985 1986 switch (env->vxrm) { 1987 case 0: /* rnu */ 1988 vext_vv_rm_1(vd, v0, vs1, vs2, 1989 env, vl, vm, 0, fn, vma, esz); 1990 break; 1991 case 1: /* rne */ 1992 vext_vv_rm_1(vd, v0, vs1, vs2, 1993 env, vl, vm, 1, fn, vma, esz); 1994 break; 1995 case 2: /* rdn */ 1996 vext_vv_rm_1(vd, v0, vs1, vs2, 1997 env, vl, vm, 2, fn, vma, esz); 1998 break; 1999 default: /* rod */ 2000 vext_vv_rm_1(vd, v0, vs1, vs2, 2001 env, vl, vm, 3, fn, vma, esz); 2002 break; 2003 } 2004 /* set tail elements to 1s */ 2005 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2006 } 2007 2008 /* generate helpers for fixed point instructions with OPIVV format */ 2009 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 2010 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2011 CPURISCVState *env, uint32_t desc) \ 2012 { \ 2013 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 2014 do_##NAME, ESZ); \ 2015 } 2016 2017 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 2018 uint8_t b) 2019 { 2020 uint8_t res = a + b; 2021 if (res < a) { 2022 res = UINT8_MAX; 2023 env->vxsat = 0x1; 2024 } 2025 return res; 2026 } 2027 2028 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2029 uint16_t b) 2030 { 2031 uint16_t res = a + b; 2032 if (res < a) { 2033 res = UINT16_MAX; 2034 env->vxsat = 0x1; 2035 } 2036 return res; 2037 } 2038 2039 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2040 uint32_t b) 2041 { 2042 uint32_t res = a + b; 2043 if (res < a) { 2044 res = UINT32_MAX; 2045 env->vxsat = 0x1; 2046 } 2047 return res; 2048 } 2049 2050 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2051 uint64_t b) 2052 { 2053 uint64_t res = a + b; 2054 if (res < a) { 2055 res = UINT64_MAX; 2056 env->vxsat = 0x1; 2057 } 2058 return res; 2059 } 2060 2061 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2062 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2063 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2064 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2065 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2066 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2067 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2068 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2069 2070 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2071 CPURISCVState *env, int vxrm); 2072 2073 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2074 static inline void \ 2075 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2076 CPURISCVState *env, int vxrm) \ 2077 { \ 2078 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2079 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2080 } 2081 2082 static inline void 2083 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2084 CPURISCVState *env, 2085 uint32_t vl, uint32_t vm, int vxrm, 2086 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2087 { 2088 VSTART_CHECK_EARLY_EXIT(env); 2089 2090 for (uint32_t i = env->vstart; i < vl; i++) { 2091 if (!vm && !vext_elem_mask(v0, i)) { 2092 /* set masked-off elements to 1s */ 2093 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2094 continue; 2095 } 2096 fn(vd, s1, vs2, i, env, vxrm); 2097 } 2098 env->vstart = 0; 2099 } 2100 2101 static inline void 2102 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2103 CPURISCVState *env, 2104 uint32_t desc, 2105 opivx2_rm_fn *fn, uint32_t esz) 2106 { 2107 uint32_t vm = vext_vm(desc); 2108 uint32_t vl = env->vl; 2109 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2110 uint32_t vta = vext_vta(desc); 2111 uint32_t vma = vext_vma(desc); 2112 2113 switch (env->vxrm) { 2114 case 0: /* rnu */ 2115 vext_vx_rm_1(vd, v0, s1, vs2, 2116 env, vl, vm, 0, fn, vma, esz); 2117 break; 2118 case 1: /* rne */ 2119 vext_vx_rm_1(vd, v0, s1, vs2, 2120 env, vl, vm, 1, fn, vma, esz); 2121 break; 2122 case 2: /* rdn */ 2123 vext_vx_rm_1(vd, v0, s1, vs2, 2124 env, vl, vm, 2, fn, vma, esz); 2125 break; 2126 default: /* rod */ 2127 vext_vx_rm_1(vd, v0, s1, vs2, 2128 env, vl, vm, 3, fn, vma, esz); 2129 break; 2130 } 2131 /* set tail elements to 1s */ 2132 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2133 } 2134 2135 /* generate helpers for fixed point instructions with OPIVX format */ 2136 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2137 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2138 void *vs2, CPURISCVState *env, \ 2139 uint32_t desc) \ 2140 { \ 2141 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2142 do_##NAME, ESZ); \ 2143 } 2144 2145 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2146 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2147 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2148 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2149 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2150 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2151 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2152 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2153 2154 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2155 { 2156 int8_t res = a + b; 2157 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2158 res = a > 0 ? INT8_MAX : INT8_MIN; 2159 env->vxsat = 0x1; 2160 } 2161 return res; 2162 } 2163 2164 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2165 int16_t b) 2166 { 2167 int16_t res = a + b; 2168 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2169 res = a > 0 ? INT16_MAX : INT16_MIN; 2170 env->vxsat = 0x1; 2171 } 2172 return res; 2173 } 2174 2175 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2176 int32_t b) 2177 { 2178 int32_t res = a + b; 2179 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2180 res = a > 0 ? INT32_MAX : INT32_MIN; 2181 env->vxsat = 0x1; 2182 } 2183 return res; 2184 } 2185 2186 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2187 int64_t b) 2188 { 2189 int64_t res = a + b; 2190 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2191 res = a > 0 ? INT64_MAX : INT64_MIN; 2192 env->vxsat = 0x1; 2193 } 2194 return res; 2195 } 2196 2197 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2198 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2199 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2200 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2201 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2202 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2203 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2204 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2205 2206 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2207 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2208 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2209 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2210 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2211 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2212 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2213 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2214 2215 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2216 uint8_t b) 2217 { 2218 uint8_t res = a - b; 2219 if (res > a) { 2220 res = 0; 2221 env->vxsat = 0x1; 2222 } 2223 return res; 2224 } 2225 2226 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2227 uint16_t b) 2228 { 2229 uint16_t res = a - b; 2230 if (res > a) { 2231 res = 0; 2232 env->vxsat = 0x1; 2233 } 2234 return res; 2235 } 2236 2237 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2238 uint32_t b) 2239 { 2240 uint32_t res = a - b; 2241 if (res > a) { 2242 res = 0; 2243 env->vxsat = 0x1; 2244 } 2245 return res; 2246 } 2247 2248 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2249 uint64_t b) 2250 { 2251 uint64_t res = a - b; 2252 if (res > a) { 2253 res = 0; 2254 env->vxsat = 0x1; 2255 } 2256 return res; 2257 } 2258 2259 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2260 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2261 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2262 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2263 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2264 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2265 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2266 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2267 2268 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2269 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2270 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2271 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2272 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2273 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2274 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2275 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2276 2277 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2278 { 2279 int8_t res = a - b; 2280 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2281 res = a >= 0 ? INT8_MAX : INT8_MIN; 2282 env->vxsat = 0x1; 2283 } 2284 return res; 2285 } 2286 2287 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2288 int16_t b) 2289 { 2290 int16_t res = a - b; 2291 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2292 res = a >= 0 ? INT16_MAX : INT16_MIN; 2293 env->vxsat = 0x1; 2294 } 2295 return res; 2296 } 2297 2298 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2299 int32_t b) 2300 { 2301 int32_t res = a - b; 2302 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2303 res = a >= 0 ? INT32_MAX : INT32_MIN; 2304 env->vxsat = 0x1; 2305 } 2306 return res; 2307 } 2308 2309 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2310 int64_t b) 2311 { 2312 int64_t res = a - b; 2313 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2314 res = a >= 0 ? INT64_MAX : INT64_MIN; 2315 env->vxsat = 0x1; 2316 } 2317 return res; 2318 } 2319 2320 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2321 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2322 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2323 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2324 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2325 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2326 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2327 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2328 2329 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2330 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2331 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2332 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2333 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2334 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2335 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2336 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2337 2338 /* Vector Single-Width Averaging Add and Subtract */ 2339 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2340 { 2341 uint8_t d = extract64(v, shift, 1); 2342 uint8_t d1; 2343 uint64_t D1, D2; 2344 2345 if (shift == 0 || shift > 64) { 2346 return 0; 2347 } 2348 2349 d1 = extract64(v, shift - 1, 1); 2350 D1 = extract64(v, 0, shift); 2351 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2352 return d1; 2353 } else if (vxrm == 1) { /* round-to-nearest-even */ 2354 if (shift > 1) { 2355 D2 = extract64(v, 0, shift - 1); 2356 return d1 & ((D2 != 0) | d); 2357 } else { 2358 return d1 & d; 2359 } 2360 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2361 return !d & (D1 != 0); 2362 } 2363 return 0; /* round-down (truncate) */ 2364 } 2365 2366 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2367 int32_t b) 2368 { 2369 int64_t res = (int64_t)a + b; 2370 uint8_t round = get_round(vxrm, res, 1); 2371 2372 return (res >> 1) + round; 2373 } 2374 2375 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2376 int64_t b) 2377 { 2378 int64_t res = a + b; 2379 uint8_t round = get_round(vxrm, res, 1); 2380 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2381 2382 /* With signed overflow, bit 64 is inverse of bit 63. */ 2383 return ((res >> 1) ^ over) + round; 2384 } 2385 2386 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2387 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2388 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2389 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2390 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2391 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2392 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2393 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2394 2395 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2396 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2397 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2398 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2399 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2400 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2401 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2402 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2403 2404 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2405 uint32_t a, uint32_t b) 2406 { 2407 uint64_t res = (uint64_t)a + b; 2408 uint8_t round = get_round(vxrm, res, 1); 2409 2410 return (res >> 1) + round; 2411 } 2412 2413 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2414 uint64_t a, uint64_t b) 2415 { 2416 uint64_t res = a + b; 2417 uint8_t round = get_round(vxrm, res, 1); 2418 uint64_t over = (uint64_t)(res < a) << 63; 2419 2420 return ((res >> 1) | over) + round; 2421 } 2422 2423 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2424 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2425 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2426 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2427 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2428 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2429 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2430 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2431 2432 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2433 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2434 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2435 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2436 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2437 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2438 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2439 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2440 2441 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2442 int32_t b) 2443 { 2444 int64_t res = (int64_t)a - b; 2445 uint8_t round = get_round(vxrm, res, 1); 2446 2447 return (res >> 1) + round; 2448 } 2449 2450 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2451 int64_t b) 2452 { 2453 int64_t res = (int64_t)a - b; 2454 uint8_t round = get_round(vxrm, res, 1); 2455 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2456 2457 /* With signed overflow, bit 64 is inverse of bit 63. */ 2458 return ((res >> 1) ^ over) + round; 2459 } 2460 2461 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2462 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2463 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2464 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2465 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2466 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2467 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2468 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2469 2470 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2471 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2472 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2473 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2474 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2475 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2476 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2477 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2478 2479 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2480 uint32_t a, uint32_t b) 2481 { 2482 int64_t res = (int64_t)a - b; 2483 uint8_t round = get_round(vxrm, res, 1); 2484 2485 return (res >> 1) + round; 2486 } 2487 2488 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2489 uint64_t a, uint64_t b) 2490 { 2491 uint64_t res = (uint64_t)a - b; 2492 uint8_t round = get_round(vxrm, res, 1); 2493 uint64_t over = (uint64_t)(res > a) << 63; 2494 2495 return ((res >> 1) | over) + round; 2496 } 2497 2498 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2499 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2500 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2501 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2502 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2503 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2504 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2505 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2506 2507 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2508 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2509 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2510 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2511 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2512 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2513 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2514 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2515 2516 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2517 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2518 { 2519 uint8_t round; 2520 int16_t res; 2521 2522 res = (int16_t)a * (int16_t)b; 2523 round = get_round(vxrm, res, 7); 2524 res = (res >> 7) + round; 2525 2526 if (res > INT8_MAX) { 2527 env->vxsat = 0x1; 2528 return INT8_MAX; 2529 } else if (res < INT8_MIN) { 2530 env->vxsat = 0x1; 2531 return INT8_MIN; 2532 } else { 2533 return res; 2534 } 2535 } 2536 2537 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2538 { 2539 uint8_t round; 2540 int32_t res; 2541 2542 res = (int32_t)a * (int32_t)b; 2543 round = get_round(vxrm, res, 15); 2544 res = (res >> 15) + round; 2545 2546 if (res > INT16_MAX) { 2547 env->vxsat = 0x1; 2548 return INT16_MAX; 2549 } else if (res < INT16_MIN) { 2550 env->vxsat = 0x1; 2551 return INT16_MIN; 2552 } else { 2553 return res; 2554 } 2555 } 2556 2557 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2558 { 2559 uint8_t round; 2560 int64_t res; 2561 2562 res = (int64_t)a * (int64_t)b; 2563 round = get_round(vxrm, res, 31); 2564 res = (res >> 31) + round; 2565 2566 if (res > INT32_MAX) { 2567 env->vxsat = 0x1; 2568 return INT32_MAX; 2569 } else if (res < INT32_MIN) { 2570 env->vxsat = 0x1; 2571 return INT32_MIN; 2572 } else { 2573 return res; 2574 } 2575 } 2576 2577 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2578 { 2579 uint8_t round; 2580 uint64_t hi_64, lo_64; 2581 int64_t res; 2582 2583 if (a == INT64_MIN && b == INT64_MIN) { 2584 env->vxsat = 1; 2585 return INT64_MAX; 2586 } 2587 2588 muls64(&lo_64, &hi_64, a, b); 2589 round = get_round(vxrm, lo_64, 63); 2590 /* 2591 * Cannot overflow, as there are always 2592 * 2 sign bits after multiply. 2593 */ 2594 res = (hi_64 << 1) | (lo_64 >> 63); 2595 if (round) { 2596 if (res == INT64_MAX) { 2597 env->vxsat = 1; 2598 } else { 2599 res += 1; 2600 } 2601 } 2602 return res; 2603 } 2604 2605 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2606 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2607 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2608 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2609 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2610 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2611 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2612 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2613 2614 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2615 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2616 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2617 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2618 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2619 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2620 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2621 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2622 2623 /* Vector Single-Width Scaling Shift Instructions */ 2624 static inline uint8_t 2625 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2626 { 2627 uint8_t round, shift = b & 0x7; 2628 uint8_t res; 2629 2630 round = get_round(vxrm, a, shift); 2631 res = (a >> shift) + round; 2632 return res; 2633 } 2634 static inline uint16_t 2635 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2636 { 2637 uint8_t round, shift = b & 0xf; 2638 2639 round = get_round(vxrm, a, shift); 2640 return (a >> shift) + round; 2641 } 2642 static inline uint32_t 2643 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2644 { 2645 uint8_t round, shift = b & 0x1f; 2646 2647 round = get_round(vxrm, a, shift); 2648 return (a >> shift) + round; 2649 } 2650 static inline uint64_t 2651 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2652 { 2653 uint8_t round, shift = b & 0x3f; 2654 2655 round = get_round(vxrm, a, shift); 2656 return (a >> shift) + round; 2657 } 2658 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2659 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2660 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2661 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2662 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2663 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2664 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2665 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2666 2667 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2668 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2669 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2670 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2671 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2672 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2673 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2674 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2675 2676 static inline int8_t 2677 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2678 { 2679 uint8_t round, shift = b & 0x7; 2680 2681 round = get_round(vxrm, a, shift); 2682 return (a >> shift) + round; 2683 } 2684 static inline int16_t 2685 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2686 { 2687 uint8_t round, shift = b & 0xf; 2688 2689 round = get_round(vxrm, a, shift); 2690 return (a >> shift) + round; 2691 } 2692 static inline int32_t 2693 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2694 { 2695 uint8_t round, shift = b & 0x1f; 2696 2697 round = get_round(vxrm, a, shift); 2698 return (a >> shift) + round; 2699 } 2700 static inline int64_t 2701 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2702 { 2703 uint8_t round, shift = b & 0x3f; 2704 2705 round = get_round(vxrm, a, shift); 2706 return (a >> shift) + round; 2707 } 2708 2709 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2710 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2711 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2712 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2713 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2714 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2715 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2716 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2717 2718 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2719 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2720 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2721 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2722 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2723 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2724 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2725 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2726 2727 /* Vector Narrowing Fixed-Point Clip Instructions */ 2728 static inline int8_t 2729 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2730 { 2731 uint8_t round, shift = b & 0xf; 2732 int16_t res; 2733 2734 round = get_round(vxrm, a, shift); 2735 res = (a >> shift) + round; 2736 if (res > INT8_MAX) { 2737 env->vxsat = 0x1; 2738 return INT8_MAX; 2739 } else if (res < INT8_MIN) { 2740 env->vxsat = 0x1; 2741 return INT8_MIN; 2742 } else { 2743 return res; 2744 } 2745 } 2746 2747 static inline int16_t 2748 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2749 { 2750 uint8_t round, shift = b & 0x1f; 2751 int32_t res; 2752 2753 round = get_round(vxrm, a, shift); 2754 res = (a >> shift) + round; 2755 if (res > INT16_MAX) { 2756 env->vxsat = 0x1; 2757 return INT16_MAX; 2758 } else if (res < INT16_MIN) { 2759 env->vxsat = 0x1; 2760 return INT16_MIN; 2761 } else { 2762 return res; 2763 } 2764 } 2765 2766 static inline int32_t 2767 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2768 { 2769 uint8_t round, shift = b & 0x3f; 2770 int64_t res; 2771 2772 round = get_round(vxrm, a, shift); 2773 res = (a >> shift) + round; 2774 if (res > INT32_MAX) { 2775 env->vxsat = 0x1; 2776 return INT32_MAX; 2777 } else if (res < INT32_MIN) { 2778 env->vxsat = 0x1; 2779 return INT32_MIN; 2780 } else { 2781 return res; 2782 } 2783 } 2784 2785 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2786 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2787 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2788 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2789 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2790 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2791 2792 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2793 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2794 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2795 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2796 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2797 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2798 2799 static inline uint8_t 2800 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2801 { 2802 uint8_t round, shift = b & 0xf; 2803 uint16_t res; 2804 2805 round = get_round(vxrm, a, shift); 2806 res = (a >> shift) + round; 2807 if (res > UINT8_MAX) { 2808 env->vxsat = 0x1; 2809 return UINT8_MAX; 2810 } else { 2811 return res; 2812 } 2813 } 2814 2815 static inline uint16_t 2816 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2817 { 2818 uint8_t round, shift = b & 0x1f; 2819 uint32_t res; 2820 2821 round = get_round(vxrm, a, shift); 2822 res = (a >> shift) + round; 2823 if (res > UINT16_MAX) { 2824 env->vxsat = 0x1; 2825 return UINT16_MAX; 2826 } else { 2827 return res; 2828 } 2829 } 2830 2831 static inline uint32_t 2832 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2833 { 2834 uint8_t round, shift = b & 0x3f; 2835 uint64_t res; 2836 2837 round = get_round(vxrm, a, shift); 2838 res = (a >> shift) + round; 2839 if (res > UINT32_MAX) { 2840 env->vxsat = 0x1; 2841 return UINT32_MAX; 2842 } else { 2843 return res; 2844 } 2845 } 2846 2847 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2848 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2849 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2850 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2851 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2852 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2853 2854 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2855 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2856 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2857 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2858 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2859 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2860 2861 /* 2862 * Vector Float Point Arithmetic Instructions 2863 */ 2864 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2865 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2866 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2867 CPURISCVState *env) \ 2868 { \ 2869 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2870 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2871 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2872 } 2873 2874 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 2875 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2876 void *vs2, CPURISCVState *env, \ 2877 uint32_t desc) \ 2878 { \ 2879 uint32_t vm = vext_vm(desc); \ 2880 uint32_t vl = env->vl; \ 2881 uint32_t total_elems = \ 2882 vext_get_total_elems(env, desc, ESZ); \ 2883 uint32_t vta = vext_vta(desc); \ 2884 uint32_t vma = vext_vma(desc); \ 2885 uint32_t i; \ 2886 \ 2887 VSTART_CHECK_EARLY_EXIT(env); \ 2888 \ 2889 for (i = env->vstart; i < vl; i++) { \ 2890 if (!vm && !vext_elem_mask(v0, i)) { \ 2891 /* set masked-off elements to 1s */ \ 2892 vext_set_elems_1s(vd, vma, i * ESZ, \ 2893 (i + 1) * ESZ); \ 2894 continue; \ 2895 } \ 2896 do_##NAME(vd, vs1, vs2, i, env); \ 2897 } \ 2898 env->vstart = 0; \ 2899 /* set tail elements to 1s */ \ 2900 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2901 total_elems * ESZ); \ 2902 } 2903 2904 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2905 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2906 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2907 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 2908 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 2909 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 2910 2911 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2912 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2913 CPURISCVState *env) \ 2914 { \ 2915 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2916 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2917 } 2918 2919 #define GEN_VEXT_VF(NAME, ESZ) \ 2920 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2921 void *vs2, CPURISCVState *env, \ 2922 uint32_t desc) \ 2923 { \ 2924 uint32_t vm = vext_vm(desc); \ 2925 uint32_t vl = env->vl; \ 2926 uint32_t total_elems = \ 2927 vext_get_total_elems(env, desc, ESZ); \ 2928 uint32_t vta = vext_vta(desc); \ 2929 uint32_t vma = vext_vma(desc); \ 2930 uint32_t i; \ 2931 \ 2932 VSTART_CHECK_EARLY_EXIT(env); \ 2933 \ 2934 for (i = env->vstart; i < vl; i++) { \ 2935 if (!vm && !vext_elem_mask(v0, i)) { \ 2936 /* set masked-off elements to 1s */ \ 2937 vext_set_elems_1s(vd, vma, i * ESZ, \ 2938 (i + 1) * ESZ); \ 2939 continue; \ 2940 } \ 2941 do_##NAME(vd, s1, vs2, i, env); \ 2942 } \ 2943 env->vstart = 0; \ 2944 /* set tail elements to 1s */ \ 2945 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2946 total_elems * ESZ); \ 2947 } 2948 2949 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2950 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2951 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2952 GEN_VEXT_VF(vfadd_vf_h, 2) 2953 GEN_VEXT_VF(vfadd_vf_w, 4) 2954 GEN_VEXT_VF(vfadd_vf_d, 8) 2955 2956 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2957 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2958 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2959 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 2960 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 2961 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 2962 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2963 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2964 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2965 GEN_VEXT_VF(vfsub_vf_h, 2) 2966 GEN_VEXT_VF(vfsub_vf_w, 4) 2967 GEN_VEXT_VF(vfsub_vf_d, 8) 2968 2969 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2970 { 2971 return float16_sub(b, a, s); 2972 } 2973 2974 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2975 { 2976 return float32_sub(b, a, s); 2977 } 2978 2979 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2980 { 2981 return float64_sub(b, a, s); 2982 } 2983 2984 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2985 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2986 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2987 GEN_VEXT_VF(vfrsub_vf_h, 2) 2988 GEN_VEXT_VF(vfrsub_vf_w, 4) 2989 GEN_VEXT_VF(vfrsub_vf_d, 8) 2990 2991 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2992 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2993 { 2994 return float32_add(float16_to_float32(a, true, s), 2995 float16_to_float32(b, true, s), s); 2996 } 2997 2998 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2999 { 3000 return float64_add(float32_to_float64(a, s), 3001 float32_to_float64(b, s), s); 3002 3003 } 3004 3005 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3006 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3007 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 3008 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 3009 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3010 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3011 GEN_VEXT_VF(vfwadd_vf_h, 4) 3012 GEN_VEXT_VF(vfwadd_vf_w, 8) 3013 3014 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3015 { 3016 return float32_sub(float16_to_float32(a, true, s), 3017 float16_to_float32(b, true, s), s); 3018 } 3019 3020 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3021 { 3022 return float64_sub(float32_to_float64(a, s), 3023 float32_to_float64(b, s), s); 3024 3025 } 3026 3027 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3028 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3029 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 3030 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 3031 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3032 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3033 GEN_VEXT_VF(vfwsub_vf_h, 4) 3034 GEN_VEXT_VF(vfwsub_vf_w, 8) 3035 3036 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3037 { 3038 return float32_add(a, float16_to_float32(b, true, s), s); 3039 } 3040 3041 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3042 { 3043 return float64_add(a, float32_to_float64(b, s), s); 3044 } 3045 3046 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3047 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3048 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 3049 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3050 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3051 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3052 GEN_VEXT_VF(vfwadd_wf_h, 4) 3053 GEN_VEXT_VF(vfwadd_wf_w, 8) 3054 3055 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3056 { 3057 return float32_sub(a, float16_to_float32(b, true, s), s); 3058 } 3059 3060 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3061 { 3062 return float64_sub(a, float32_to_float64(b, s), s); 3063 } 3064 3065 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3066 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3067 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3068 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3069 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3070 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3071 GEN_VEXT_VF(vfwsub_wf_h, 4) 3072 GEN_VEXT_VF(vfwsub_wf_w, 8) 3073 3074 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3075 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3076 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3077 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3078 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3079 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3080 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3081 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3082 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3083 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3084 GEN_VEXT_VF(vfmul_vf_h, 2) 3085 GEN_VEXT_VF(vfmul_vf_w, 4) 3086 GEN_VEXT_VF(vfmul_vf_d, 8) 3087 3088 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3089 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3090 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3091 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3092 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3093 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3094 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3095 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3096 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3097 GEN_VEXT_VF(vfdiv_vf_h, 2) 3098 GEN_VEXT_VF(vfdiv_vf_w, 4) 3099 GEN_VEXT_VF(vfdiv_vf_d, 8) 3100 3101 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3102 { 3103 return float16_div(b, a, s); 3104 } 3105 3106 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3107 { 3108 return float32_div(b, a, s); 3109 } 3110 3111 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3112 { 3113 return float64_div(b, a, s); 3114 } 3115 3116 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3117 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3118 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3119 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3120 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3121 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3122 3123 /* Vector Widening Floating-Point Multiply */ 3124 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3125 { 3126 return float32_mul(float16_to_float32(a, true, s), 3127 float16_to_float32(b, true, s), s); 3128 } 3129 3130 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3131 { 3132 return float64_mul(float32_to_float64(a, s), 3133 float32_to_float64(b, s), s); 3134 3135 } 3136 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3137 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3138 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3139 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3140 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3141 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3142 GEN_VEXT_VF(vfwmul_vf_h, 4) 3143 GEN_VEXT_VF(vfwmul_vf_w, 8) 3144 3145 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3146 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3147 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3148 CPURISCVState *env) \ 3149 { \ 3150 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3151 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3152 TD d = *((TD *)vd + HD(i)); \ 3153 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3154 } 3155 3156 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3157 { 3158 return float16_muladd(a, b, d, 0, s); 3159 } 3160 3161 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3162 { 3163 return float32_muladd(a, b, d, 0, s); 3164 } 3165 3166 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3167 { 3168 return float64_muladd(a, b, d, 0, s); 3169 } 3170 3171 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3172 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3173 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3174 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3175 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3176 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3177 3178 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3179 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3180 CPURISCVState *env) \ 3181 { \ 3182 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3183 TD d = *((TD *)vd + HD(i)); \ 3184 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3185 } 3186 3187 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3188 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3189 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3190 GEN_VEXT_VF(vfmacc_vf_h, 2) 3191 GEN_VEXT_VF(vfmacc_vf_w, 4) 3192 GEN_VEXT_VF(vfmacc_vf_d, 8) 3193 3194 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3195 { 3196 return float16_muladd(a, b, d, float_muladd_negate_c | 3197 float_muladd_negate_product, s); 3198 } 3199 3200 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3201 { 3202 return float32_muladd(a, b, d, float_muladd_negate_c | 3203 float_muladd_negate_product, s); 3204 } 3205 3206 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3207 { 3208 return float64_muladd(a, b, d, float_muladd_negate_c | 3209 float_muladd_negate_product, s); 3210 } 3211 3212 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3213 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3214 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3215 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3216 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3217 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3218 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3219 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3220 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3221 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3222 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3223 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3224 3225 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3226 { 3227 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3228 } 3229 3230 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3231 { 3232 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3233 } 3234 3235 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3236 { 3237 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3238 } 3239 3240 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3241 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3242 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3243 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3244 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3245 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3246 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3247 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3248 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3249 GEN_VEXT_VF(vfmsac_vf_h, 2) 3250 GEN_VEXT_VF(vfmsac_vf_w, 4) 3251 GEN_VEXT_VF(vfmsac_vf_d, 8) 3252 3253 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3254 { 3255 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3256 } 3257 3258 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3259 { 3260 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3261 } 3262 3263 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3264 { 3265 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3266 } 3267 3268 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3269 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3270 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3271 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3272 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3273 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3274 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3275 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3276 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3277 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3278 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3279 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3280 3281 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3282 { 3283 return float16_muladd(d, b, a, 0, s); 3284 } 3285 3286 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3287 { 3288 return float32_muladd(d, b, a, 0, s); 3289 } 3290 3291 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3292 { 3293 return float64_muladd(d, b, a, 0, s); 3294 } 3295 3296 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3297 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3298 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3299 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3300 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3301 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3302 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3303 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3304 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3305 GEN_VEXT_VF(vfmadd_vf_h, 2) 3306 GEN_VEXT_VF(vfmadd_vf_w, 4) 3307 GEN_VEXT_VF(vfmadd_vf_d, 8) 3308 3309 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3310 { 3311 return float16_muladd(d, b, a, float_muladd_negate_c | 3312 float_muladd_negate_product, s); 3313 } 3314 3315 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3316 { 3317 return float32_muladd(d, b, a, float_muladd_negate_c | 3318 float_muladd_negate_product, s); 3319 } 3320 3321 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3322 { 3323 return float64_muladd(d, b, a, float_muladd_negate_c | 3324 float_muladd_negate_product, s); 3325 } 3326 3327 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3328 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3329 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3330 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3331 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3332 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3333 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3334 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3335 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3336 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3337 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3338 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3339 3340 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3341 { 3342 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3343 } 3344 3345 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3346 { 3347 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3348 } 3349 3350 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3351 { 3352 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3353 } 3354 3355 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3356 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3357 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3358 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3359 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3360 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3361 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3362 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3363 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3364 GEN_VEXT_VF(vfmsub_vf_h, 2) 3365 GEN_VEXT_VF(vfmsub_vf_w, 4) 3366 GEN_VEXT_VF(vfmsub_vf_d, 8) 3367 3368 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3369 { 3370 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3371 } 3372 3373 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3374 { 3375 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3376 } 3377 3378 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3379 { 3380 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3381 } 3382 3383 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3384 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3385 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3386 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3387 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3388 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3389 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3390 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3391 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3392 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3393 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3394 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3395 3396 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3397 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3398 { 3399 return float32_muladd(float16_to_float32(a, true, s), 3400 float16_to_float32(b, true, s), d, 0, s); 3401 } 3402 3403 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3404 { 3405 return float64_muladd(float32_to_float64(a, s), 3406 float32_to_float64(b, s), d, 0, s); 3407 } 3408 3409 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3410 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3411 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3412 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3413 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3414 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3415 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3416 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3417 3418 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3419 { 3420 return float32_muladd(bfloat16_to_float32(a, s), 3421 bfloat16_to_float32(b, s), d, 0, s); 3422 } 3423 3424 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3425 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3426 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3427 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3428 3429 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3430 { 3431 return float32_muladd(float16_to_float32(a, true, s), 3432 float16_to_float32(b, true, s), d, 3433 float_muladd_negate_c | float_muladd_negate_product, 3434 s); 3435 } 3436 3437 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3438 { 3439 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3440 d, float_muladd_negate_c | 3441 float_muladd_negate_product, s); 3442 } 3443 3444 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3445 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3446 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3447 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3448 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3449 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3450 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3451 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3452 3453 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3454 { 3455 return float32_muladd(float16_to_float32(a, true, s), 3456 float16_to_float32(b, true, s), d, 3457 float_muladd_negate_c, s); 3458 } 3459 3460 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3461 { 3462 return float64_muladd(float32_to_float64(a, s), 3463 float32_to_float64(b, s), d, 3464 float_muladd_negate_c, s); 3465 } 3466 3467 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3468 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3469 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3470 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3471 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3472 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3473 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3474 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3475 3476 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3477 { 3478 return float32_muladd(float16_to_float32(a, true, s), 3479 float16_to_float32(b, true, s), d, 3480 float_muladd_negate_product, s); 3481 } 3482 3483 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3484 { 3485 return float64_muladd(float32_to_float64(a, s), 3486 float32_to_float64(b, s), d, 3487 float_muladd_negate_product, s); 3488 } 3489 3490 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3491 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3492 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3493 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3494 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3495 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3496 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3497 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3498 3499 /* Vector Floating-Point Square-Root Instruction */ 3500 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3501 static void do_##NAME(void *vd, void *vs2, int i, \ 3502 CPURISCVState *env) \ 3503 { \ 3504 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3505 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3506 } 3507 3508 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3509 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3510 CPURISCVState *env, uint32_t desc) \ 3511 { \ 3512 uint32_t vm = vext_vm(desc); \ 3513 uint32_t vl = env->vl; \ 3514 uint32_t total_elems = \ 3515 vext_get_total_elems(env, desc, ESZ); \ 3516 uint32_t vta = vext_vta(desc); \ 3517 uint32_t vma = vext_vma(desc); \ 3518 uint32_t i; \ 3519 \ 3520 VSTART_CHECK_EARLY_EXIT(env); \ 3521 \ 3522 if (vl == 0) { \ 3523 return; \ 3524 } \ 3525 for (i = env->vstart; i < vl; i++) { \ 3526 if (!vm && !vext_elem_mask(v0, i)) { \ 3527 /* set masked-off elements to 1s */ \ 3528 vext_set_elems_1s(vd, vma, i * ESZ, \ 3529 (i + 1) * ESZ); \ 3530 continue; \ 3531 } \ 3532 do_##NAME(vd, vs2, i, env); \ 3533 } \ 3534 env->vstart = 0; \ 3535 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3536 total_elems * ESZ); \ 3537 } 3538 3539 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3540 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3541 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3542 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3543 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3544 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3545 3546 /* 3547 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3548 * 3549 * Adapted from riscv-v-spec recip.c: 3550 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3551 */ 3552 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3553 { 3554 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3555 uint64_t exp = extract64(f, frac_size, exp_size); 3556 uint64_t frac = extract64(f, 0, frac_size); 3557 3558 const uint8_t lookup_table[] = { 3559 52, 51, 50, 48, 47, 46, 44, 43, 3560 42, 41, 40, 39, 38, 36, 35, 34, 3561 33, 32, 31, 30, 30, 29, 28, 27, 3562 26, 25, 24, 23, 23, 22, 21, 20, 3563 19, 19, 18, 17, 16, 16, 15, 14, 3564 14, 13, 12, 12, 11, 10, 10, 9, 3565 9, 8, 7, 7, 6, 6, 5, 4, 3566 4, 3, 3, 2, 2, 1, 1, 0, 3567 127, 125, 123, 121, 119, 118, 116, 114, 3568 113, 111, 109, 108, 106, 105, 103, 102, 3569 100, 99, 97, 96, 95, 93, 92, 91, 3570 90, 88, 87, 86, 85, 84, 83, 82, 3571 80, 79, 78, 77, 76, 75, 74, 73, 3572 72, 71, 70, 70, 69, 68, 67, 66, 3573 65, 64, 63, 63, 62, 61, 60, 59, 3574 59, 58, 57, 56, 56, 55, 54, 53 3575 }; 3576 const int precision = 7; 3577 3578 if (exp == 0 && frac != 0) { /* subnormal */ 3579 /* Normalize the subnormal. */ 3580 while (extract64(frac, frac_size - 1, 1) == 0) { 3581 exp--; 3582 frac <<= 1; 3583 } 3584 3585 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3586 } 3587 3588 int idx = ((exp & 1) << (precision - 1)) | 3589 (frac >> (frac_size - precision + 1)); 3590 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3591 (frac_size - precision); 3592 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3593 3594 uint64_t val = 0; 3595 val = deposit64(val, 0, frac_size, out_frac); 3596 val = deposit64(val, frac_size, exp_size, out_exp); 3597 val = deposit64(val, frac_size + exp_size, 1, sign); 3598 return val; 3599 } 3600 3601 static float16 frsqrt7_h(float16 f, float_status *s) 3602 { 3603 int exp_size = 5, frac_size = 10; 3604 bool sign = float16_is_neg(f); 3605 3606 /* 3607 * frsqrt7(sNaN) = canonical NaN 3608 * frsqrt7(-inf) = canonical NaN 3609 * frsqrt7(-normal) = canonical NaN 3610 * frsqrt7(-subnormal) = canonical NaN 3611 */ 3612 if (float16_is_signaling_nan(f, s) || 3613 (float16_is_infinity(f) && sign) || 3614 (float16_is_normal(f) && sign) || 3615 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3616 s->float_exception_flags |= float_flag_invalid; 3617 return float16_default_nan(s); 3618 } 3619 3620 /* frsqrt7(qNaN) = canonical NaN */ 3621 if (float16_is_quiet_nan(f, s)) { 3622 return float16_default_nan(s); 3623 } 3624 3625 /* frsqrt7(+-0) = +-inf */ 3626 if (float16_is_zero(f)) { 3627 s->float_exception_flags |= float_flag_divbyzero; 3628 return float16_set_sign(float16_infinity, sign); 3629 } 3630 3631 /* frsqrt7(+inf) = +0 */ 3632 if (float16_is_infinity(f) && !sign) { 3633 return float16_set_sign(float16_zero, sign); 3634 } 3635 3636 /* +normal, +subnormal */ 3637 uint64_t val = frsqrt7(f, exp_size, frac_size); 3638 return make_float16(val); 3639 } 3640 3641 static float32 frsqrt7_s(float32 f, float_status *s) 3642 { 3643 int exp_size = 8, frac_size = 23; 3644 bool sign = float32_is_neg(f); 3645 3646 /* 3647 * frsqrt7(sNaN) = canonical NaN 3648 * frsqrt7(-inf) = canonical NaN 3649 * frsqrt7(-normal) = canonical NaN 3650 * frsqrt7(-subnormal) = canonical NaN 3651 */ 3652 if (float32_is_signaling_nan(f, s) || 3653 (float32_is_infinity(f) && sign) || 3654 (float32_is_normal(f) && sign) || 3655 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3656 s->float_exception_flags |= float_flag_invalid; 3657 return float32_default_nan(s); 3658 } 3659 3660 /* frsqrt7(qNaN) = canonical NaN */ 3661 if (float32_is_quiet_nan(f, s)) { 3662 return float32_default_nan(s); 3663 } 3664 3665 /* frsqrt7(+-0) = +-inf */ 3666 if (float32_is_zero(f)) { 3667 s->float_exception_flags |= float_flag_divbyzero; 3668 return float32_set_sign(float32_infinity, sign); 3669 } 3670 3671 /* frsqrt7(+inf) = +0 */ 3672 if (float32_is_infinity(f) && !sign) { 3673 return float32_set_sign(float32_zero, sign); 3674 } 3675 3676 /* +normal, +subnormal */ 3677 uint64_t val = frsqrt7(f, exp_size, frac_size); 3678 return make_float32(val); 3679 } 3680 3681 static float64 frsqrt7_d(float64 f, float_status *s) 3682 { 3683 int exp_size = 11, frac_size = 52; 3684 bool sign = float64_is_neg(f); 3685 3686 /* 3687 * frsqrt7(sNaN) = canonical NaN 3688 * frsqrt7(-inf) = canonical NaN 3689 * frsqrt7(-normal) = canonical NaN 3690 * frsqrt7(-subnormal) = canonical NaN 3691 */ 3692 if (float64_is_signaling_nan(f, s) || 3693 (float64_is_infinity(f) && sign) || 3694 (float64_is_normal(f) && sign) || 3695 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3696 s->float_exception_flags |= float_flag_invalid; 3697 return float64_default_nan(s); 3698 } 3699 3700 /* frsqrt7(qNaN) = canonical NaN */ 3701 if (float64_is_quiet_nan(f, s)) { 3702 return float64_default_nan(s); 3703 } 3704 3705 /* frsqrt7(+-0) = +-inf */ 3706 if (float64_is_zero(f)) { 3707 s->float_exception_flags |= float_flag_divbyzero; 3708 return float64_set_sign(float64_infinity, sign); 3709 } 3710 3711 /* frsqrt7(+inf) = +0 */ 3712 if (float64_is_infinity(f) && !sign) { 3713 return float64_set_sign(float64_zero, sign); 3714 } 3715 3716 /* +normal, +subnormal */ 3717 uint64_t val = frsqrt7(f, exp_size, frac_size); 3718 return make_float64(val); 3719 } 3720 3721 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3722 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3723 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3724 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3725 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3726 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3727 3728 /* 3729 * Vector Floating-Point Reciprocal Estimate Instruction 3730 * 3731 * Adapted from riscv-v-spec recip.c: 3732 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3733 */ 3734 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3735 float_status *s) 3736 { 3737 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3738 uint64_t exp = extract64(f, frac_size, exp_size); 3739 uint64_t frac = extract64(f, 0, frac_size); 3740 3741 const uint8_t lookup_table[] = { 3742 127, 125, 123, 121, 119, 117, 116, 114, 3743 112, 110, 109, 107, 105, 104, 102, 100, 3744 99, 97, 96, 94, 93, 91, 90, 88, 3745 87, 85, 84, 83, 81, 80, 79, 77, 3746 76, 75, 74, 72, 71, 70, 69, 68, 3747 66, 65, 64, 63, 62, 61, 60, 59, 3748 58, 57, 56, 55, 54, 53, 52, 51, 3749 50, 49, 48, 47, 46, 45, 44, 43, 3750 42, 41, 40, 40, 39, 38, 37, 36, 3751 35, 35, 34, 33, 32, 31, 31, 30, 3752 29, 28, 28, 27, 26, 25, 25, 24, 3753 23, 23, 22, 21, 21, 20, 19, 19, 3754 18, 17, 17, 16, 15, 15, 14, 14, 3755 13, 12, 12, 11, 11, 10, 9, 9, 3756 8, 8, 7, 7, 6, 5, 5, 4, 3757 4, 3, 3, 2, 2, 1, 1, 0 3758 }; 3759 const int precision = 7; 3760 3761 if (exp == 0 && frac != 0) { /* subnormal */ 3762 /* Normalize the subnormal. */ 3763 while (extract64(frac, frac_size - 1, 1) == 0) { 3764 exp--; 3765 frac <<= 1; 3766 } 3767 3768 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3769 3770 if (exp != 0 && exp != UINT64_MAX) { 3771 /* 3772 * Overflow to inf or max value of same sign, 3773 * depending on sign and rounding mode. 3774 */ 3775 s->float_exception_flags |= (float_flag_inexact | 3776 float_flag_overflow); 3777 3778 if ((s->float_rounding_mode == float_round_to_zero) || 3779 ((s->float_rounding_mode == float_round_down) && !sign) || 3780 ((s->float_rounding_mode == float_round_up) && sign)) { 3781 /* Return greatest/negative finite value. */ 3782 return (sign << (exp_size + frac_size)) | 3783 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3784 } else { 3785 /* Return +-inf. */ 3786 return (sign << (exp_size + frac_size)) | 3787 MAKE_64BIT_MASK(frac_size, exp_size); 3788 } 3789 } 3790 } 3791 3792 int idx = frac >> (frac_size - precision); 3793 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3794 (frac_size - precision); 3795 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3796 3797 if (out_exp == 0 || out_exp == UINT64_MAX) { 3798 /* 3799 * The result is subnormal, but don't raise the underflow exception, 3800 * because there's no additional loss of precision. 3801 */ 3802 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3803 if (out_exp == UINT64_MAX) { 3804 out_frac >>= 1; 3805 out_exp = 0; 3806 } 3807 } 3808 3809 uint64_t val = 0; 3810 val = deposit64(val, 0, frac_size, out_frac); 3811 val = deposit64(val, frac_size, exp_size, out_exp); 3812 val = deposit64(val, frac_size + exp_size, 1, sign); 3813 return val; 3814 } 3815 3816 static float16 frec7_h(float16 f, float_status *s) 3817 { 3818 int exp_size = 5, frac_size = 10; 3819 bool sign = float16_is_neg(f); 3820 3821 /* frec7(+-inf) = +-0 */ 3822 if (float16_is_infinity(f)) { 3823 return float16_set_sign(float16_zero, sign); 3824 } 3825 3826 /* frec7(+-0) = +-inf */ 3827 if (float16_is_zero(f)) { 3828 s->float_exception_flags |= float_flag_divbyzero; 3829 return float16_set_sign(float16_infinity, sign); 3830 } 3831 3832 /* frec7(sNaN) = canonical NaN */ 3833 if (float16_is_signaling_nan(f, s)) { 3834 s->float_exception_flags |= float_flag_invalid; 3835 return float16_default_nan(s); 3836 } 3837 3838 /* frec7(qNaN) = canonical NaN */ 3839 if (float16_is_quiet_nan(f, s)) { 3840 return float16_default_nan(s); 3841 } 3842 3843 /* +-normal, +-subnormal */ 3844 uint64_t val = frec7(f, exp_size, frac_size, s); 3845 return make_float16(val); 3846 } 3847 3848 static float32 frec7_s(float32 f, float_status *s) 3849 { 3850 int exp_size = 8, frac_size = 23; 3851 bool sign = float32_is_neg(f); 3852 3853 /* frec7(+-inf) = +-0 */ 3854 if (float32_is_infinity(f)) { 3855 return float32_set_sign(float32_zero, sign); 3856 } 3857 3858 /* frec7(+-0) = +-inf */ 3859 if (float32_is_zero(f)) { 3860 s->float_exception_flags |= float_flag_divbyzero; 3861 return float32_set_sign(float32_infinity, sign); 3862 } 3863 3864 /* frec7(sNaN) = canonical NaN */ 3865 if (float32_is_signaling_nan(f, s)) { 3866 s->float_exception_flags |= float_flag_invalid; 3867 return float32_default_nan(s); 3868 } 3869 3870 /* frec7(qNaN) = canonical NaN */ 3871 if (float32_is_quiet_nan(f, s)) { 3872 return float32_default_nan(s); 3873 } 3874 3875 /* +-normal, +-subnormal */ 3876 uint64_t val = frec7(f, exp_size, frac_size, s); 3877 return make_float32(val); 3878 } 3879 3880 static float64 frec7_d(float64 f, float_status *s) 3881 { 3882 int exp_size = 11, frac_size = 52; 3883 bool sign = float64_is_neg(f); 3884 3885 /* frec7(+-inf) = +-0 */ 3886 if (float64_is_infinity(f)) { 3887 return float64_set_sign(float64_zero, sign); 3888 } 3889 3890 /* frec7(+-0) = +-inf */ 3891 if (float64_is_zero(f)) { 3892 s->float_exception_flags |= float_flag_divbyzero; 3893 return float64_set_sign(float64_infinity, sign); 3894 } 3895 3896 /* frec7(sNaN) = canonical NaN */ 3897 if (float64_is_signaling_nan(f, s)) { 3898 s->float_exception_flags |= float_flag_invalid; 3899 return float64_default_nan(s); 3900 } 3901 3902 /* frec7(qNaN) = canonical NaN */ 3903 if (float64_is_quiet_nan(f, s)) { 3904 return float64_default_nan(s); 3905 } 3906 3907 /* +-normal, +-subnormal */ 3908 uint64_t val = frec7(f, exp_size, frac_size, s); 3909 return make_float64(val); 3910 } 3911 3912 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3913 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3914 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3915 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 3916 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 3917 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 3918 3919 /* Vector Floating-Point MIN/MAX Instructions */ 3920 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3921 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3922 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3923 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 3924 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 3925 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 3926 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3927 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3928 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3929 GEN_VEXT_VF(vfmin_vf_h, 2) 3930 GEN_VEXT_VF(vfmin_vf_w, 4) 3931 GEN_VEXT_VF(vfmin_vf_d, 8) 3932 3933 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3934 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3935 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3936 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 3937 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 3938 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 3939 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3940 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3941 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3942 GEN_VEXT_VF(vfmax_vf_h, 2) 3943 GEN_VEXT_VF(vfmax_vf_w, 4) 3944 GEN_VEXT_VF(vfmax_vf_d, 8) 3945 3946 /* Vector Floating-Point Sign-Injection Instructions */ 3947 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3948 { 3949 return deposit64(b, 0, 15, a); 3950 } 3951 3952 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3953 { 3954 return deposit64(b, 0, 31, a); 3955 } 3956 3957 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3958 { 3959 return deposit64(b, 0, 63, a); 3960 } 3961 3962 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3963 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3964 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3965 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 3966 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 3967 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 3968 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3969 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3970 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3971 GEN_VEXT_VF(vfsgnj_vf_h, 2) 3972 GEN_VEXT_VF(vfsgnj_vf_w, 4) 3973 GEN_VEXT_VF(vfsgnj_vf_d, 8) 3974 3975 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3976 { 3977 return deposit64(~b, 0, 15, a); 3978 } 3979 3980 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3981 { 3982 return deposit64(~b, 0, 31, a); 3983 } 3984 3985 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3986 { 3987 return deposit64(~b, 0, 63, a); 3988 } 3989 3990 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3991 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3992 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3993 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 3994 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 3995 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 3996 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3997 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3998 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3999 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 4000 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 4001 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 4002 4003 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 4004 { 4005 return deposit64(b ^ a, 0, 15, a); 4006 } 4007 4008 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 4009 { 4010 return deposit64(b ^ a, 0, 31, a); 4011 } 4012 4013 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 4014 { 4015 return deposit64(b ^ a, 0, 63, a); 4016 } 4017 4018 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 4019 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 4020 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 4021 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 4022 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 4023 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 4024 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 4025 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 4026 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 4027 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 4028 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 4029 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 4030 4031 /* Vector Floating-Point Compare Instructions */ 4032 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 4033 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4034 CPURISCVState *env, uint32_t desc) \ 4035 { \ 4036 uint32_t vm = vext_vm(desc); \ 4037 uint32_t vl = env->vl; \ 4038 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4039 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4040 uint32_t vma = vext_vma(desc); \ 4041 uint32_t i; \ 4042 \ 4043 VSTART_CHECK_EARLY_EXIT(env); \ 4044 \ 4045 for (i = env->vstart; i < vl; i++) { \ 4046 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 4047 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4048 if (!vm && !vext_elem_mask(v0, i)) { \ 4049 /* set masked-off elements to 1s */ \ 4050 if (vma) { \ 4051 vext_set_elem_mask(vd, i, 1); \ 4052 } \ 4053 continue; \ 4054 } \ 4055 vext_set_elem_mask(vd, i, \ 4056 DO_OP(s2, s1, &env->fp_status)); \ 4057 } \ 4058 env->vstart = 0; \ 4059 /* 4060 * mask destination register are always tail-agnostic 4061 * set tail elements to 1s 4062 */ \ 4063 if (vta_all_1s) { \ 4064 for (; i < total_elems; i++) { \ 4065 vext_set_elem_mask(vd, i, 1); \ 4066 } \ 4067 } \ 4068 } 4069 4070 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4071 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4072 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4073 4074 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4075 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4076 CPURISCVState *env, uint32_t desc) \ 4077 { \ 4078 uint32_t vm = vext_vm(desc); \ 4079 uint32_t vl = env->vl; \ 4080 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4081 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4082 uint32_t vma = vext_vma(desc); \ 4083 uint32_t i; \ 4084 \ 4085 VSTART_CHECK_EARLY_EXIT(env); \ 4086 \ 4087 for (i = env->vstart; i < vl; i++) { \ 4088 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4089 if (!vm && !vext_elem_mask(v0, i)) { \ 4090 /* set masked-off elements to 1s */ \ 4091 if (vma) { \ 4092 vext_set_elem_mask(vd, i, 1); \ 4093 } \ 4094 continue; \ 4095 } \ 4096 vext_set_elem_mask(vd, i, \ 4097 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4098 } \ 4099 env->vstart = 0; \ 4100 /* 4101 * mask destination register are always tail-agnostic 4102 * set tail elements to 1s 4103 */ \ 4104 if (vta_all_1s) { \ 4105 for (; i < total_elems; i++) { \ 4106 vext_set_elem_mask(vd, i, 1); \ 4107 } \ 4108 } \ 4109 } 4110 4111 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4112 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4113 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4114 4115 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4116 { 4117 FloatRelation compare = float16_compare_quiet(a, b, s); 4118 return compare != float_relation_equal; 4119 } 4120 4121 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4122 { 4123 FloatRelation compare = float32_compare_quiet(a, b, s); 4124 return compare != float_relation_equal; 4125 } 4126 4127 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4128 { 4129 FloatRelation compare = float64_compare_quiet(a, b, s); 4130 return compare != float_relation_equal; 4131 } 4132 4133 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4134 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4135 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4136 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4137 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4138 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4139 4140 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4141 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4142 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4143 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4144 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4145 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4146 4147 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4148 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4149 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4150 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4151 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4152 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4153 4154 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4155 { 4156 FloatRelation compare = float16_compare(a, b, s); 4157 return compare == float_relation_greater; 4158 } 4159 4160 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4161 { 4162 FloatRelation compare = float32_compare(a, b, s); 4163 return compare == float_relation_greater; 4164 } 4165 4166 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4167 { 4168 FloatRelation compare = float64_compare(a, b, s); 4169 return compare == float_relation_greater; 4170 } 4171 4172 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4173 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4174 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4175 4176 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4177 { 4178 FloatRelation compare = float16_compare(a, b, s); 4179 return compare == float_relation_greater || 4180 compare == float_relation_equal; 4181 } 4182 4183 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4184 { 4185 FloatRelation compare = float32_compare(a, b, s); 4186 return compare == float_relation_greater || 4187 compare == float_relation_equal; 4188 } 4189 4190 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4191 { 4192 FloatRelation compare = float64_compare(a, b, s); 4193 return compare == float_relation_greater || 4194 compare == float_relation_equal; 4195 } 4196 4197 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4198 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4199 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4200 4201 /* Vector Floating-Point Classify Instruction */ 4202 target_ulong fclass_h(uint64_t frs1) 4203 { 4204 float16 f = frs1; 4205 bool sign = float16_is_neg(f); 4206 4207 if (float16_is_infinity(f)) { 4208 return sign ? 1 << 0 : 1 << 7; 4209 } else if (float16_is_zero(f)) { 4210 return sign ? 1 << 3 : 1 << 4; 4211 } else if (float16_is_zero_or_denormal(f)) { 4212 return sign ? 1 << 2 : 1 << 5; 4213 } else if (float16_is_any_nan(f)) { 4214 float_status s = { }; /* for snan_bit_is_one */ 4215 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4216 } else { 4217 return sign ? 1 << 1 : 1 << 6; 4218 } 4219 } 4220 4221 target_ulong fclass_s(uint64_t frs1) 4222 { 4223 float32 f = frs1; 4224 bool sign = float32_is_neg(f); 4225 4226 if (float32_is_infinity(f)) { 4227 return sign ? 1 << 0 : 1 << 7; 4228 } else if (float32_is_zero(f)) { 4229 return sign ? 1 << 3 : 1 << 4; 4230 } else if (float32_is_zero_or_denormal(f)) { 4231 return sign ? 1 << 2 : 1 << 5; 4232 } else if (float32_is_any_nan(f)) { 4233 float_status s = { }; /* for snan_bit_is_one */ 4234 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4235 } else { 4236 return sign ? 1 << 1 : 1 << 6; 4237 } 4238 } 4239 4240 target_ulong fclass_d(uint64_t frs1) 4241 { 4242 float64 f = frs1; 4243 bool sign = float64_is_neg(f); 4244 4245 if (float64_is_infinity(f)) { 4246 return sign ? 1 << 0 : 1 << 7; 4247 } else if (float64_is_zero(f)) { 4248 return sign ? 1 << 3 : 1 << 4; 4249 } else if (float64_is_zero_or_denormal(f)) { 4250 return sign ? 1 << 2 : 1 << 5; 4251 } else if (float64_is_any_nan(f)) { 4252 float_status s = { }; /* for snan_bit_is_one */ 4253 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4254 } else { 4255 return sign ? 1 << 1 : 1 << 6; 4256 } 4257 } 4258 4259 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4260 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4261 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4262 GEN_VEXT_V(vfclass_v_h, 2) 4263 GEN_VEXT_V(vfclass_v_w, 4) 4264 GEN_VEXT_V(vfclass_v_d, 8) 4265 4266 /* Vector Floating-Point Merge Instruction */ 4267 4268 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4269 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4270 CPURISCVState *env, uint32_t desc) \ 4271 { \ 4272 uint32_t vm = vext_vm(desc); \ 4273 uint32_t vl = env->vl; \ 4274 uint32_t esz = sizeof(ETYPE); \ 4275 uint32_t total_elems = \ 4276 vext_get_total_elems(env, desc, esz); \ 4277 uint32_t vta = vext_vta(desc); \ 4278 uint32_t i; \ 4279 \ 4280 VSTART_CHECK_EARLY_EXIT(env); \ 4281 \ 4282 for (i = env->vstart; i < vl; i++) { \ 4283 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4284 *((ETYPE *)vd + H(i)) = \ 4285 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4286 } \ 4287 env->vstart = 0; \ 4288 /* set tail elements to 1s */ \ 4289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4290 } 4291 4292 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4293 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4294 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4295 4296 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4297 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4298 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4299 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4300 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4301 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4302 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4303 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4304 4305 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4306 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4307 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4308 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4309 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4310 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4311 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4312 4313 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4314 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4315 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4316 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4317 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4318 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4319 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4320 4321 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4322 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4323 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4324 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4325 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4326 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4327 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4328 4329 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4330 /* (TD, T2, TX2) */ 4331 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4332 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4333 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4334 /* 4335 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4336 */ 4337 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4338 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4339 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4340 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4341 4342 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4343 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4344 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4345 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4346 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4347 4348 /* 4349 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4350 */ 4351 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4352 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4353 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4354 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4355 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4356 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4357 4358 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4359 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4360 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4361 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4362 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4363 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4364 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4365 4366 /* 4367 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4368 */ 4369 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4370 { 4371 return float16_to_float32(a, true, s); 4372 } 4373 4374 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4375 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4376 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4377 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4378 4379 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4380 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4381 4382 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4383 /* (TD, T2, TX2) */ 4384 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4385 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4386 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4387 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4388 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4389 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4390 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4391 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4392 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4393 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4394 4395 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4396 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4397 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4398 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4399 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4400 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4401 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4402 4403 /* 4404 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4405 */ 4406 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4407 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4408 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4409 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4410 4411 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4412 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4413 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4414 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4415 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4416 4417 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4418 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4419 { 4420 return float32_to_float16(a, true, s); 4421 } 4422 4423 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4424 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4425 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4426 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4427 4428 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4429 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4430 4431 /* 4432 * Vector Reduction Operations 4433 */ 4434 /* Vector Single-Width Integer Reduction Instructions */ 4435 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4436 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4437 void *vs2, CPURISCVState *env, \ 4438 uint32_t desc) \ 4439 { \ 4440 uint32_t vm = vext_vm(desc); \ 4441 uint32_t vl = env->vl; \ 4442 uint32_t esz = sizeof(TD); \ 4443 uint32_t vlenb = simd_maxsz(desc); \ 4444 uint32_t vta = vext_vta(desc); \ 4445 uint32_t i; \ 4446 TD s1 = *((TD *)vs1 + HD(0)); \ 4447 \ 4448 for (i = env->vstart; i < vl; i++) { \ 4449 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4450 if (!vm && !vext_elem_mask(v0, i)) { \ 4451 continue; \ 4452 } \ 4453 s1 = OP(s1, (TD)s2); \ 4454 } \ 4455 *((TD *)vd + HD(0)) = s1; \ 4456 env->vstart = 0; \ 4457 /* set tail elements to 1s */ \ 4458 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4459 } 4460 4461 /* vd[0] = sum(vs1[0], vs2[*]) */ 4462 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4463 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4464 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4465 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4466 4467 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4468 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4469 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4470 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4471 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4472 4473 /* vd[0] = max(vs1[0], vs2[*]) */ 4474 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4475 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4476 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4477 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4478 4479 /* vd[0] = minu(vs1[0], vs2[*]) */ 4480 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4481 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4482 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4483 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4484 4485 /* vd[0] = min(vs1[0], vs2[*]) */ 4486 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4487 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4488 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4489 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4490 4491 /* vd[0] = and(vs1[0], vs2[*]) */ 4492 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4493 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4494 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4495 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4496 4497 /* vd[0] = or(vs1[0], vs2[*]) */ 4498 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4499 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4500 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4501 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4502 4503 /* vd[0] = xor(vs1[0], vs2[*]) */ 4504 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4505 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4506 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4507 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4508 4509 /* Vector Widening Integer Reduction Instructions */ 4510 /* signed sum reduction into double-width accumulator */ 4511 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4512 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4513 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4514 4515 /* Unsigned sum reduction into double-width accumulator */ 4516 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4517 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4518 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4519 4520 /* Vector Single-Width Floating-Point Reduction Instructions */ 4521 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4522 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4523 void *vs2, CPURISCVState *env, \ 4524 uint32_t desc) \ 4525 { \ 4526 uint32_t vm = vext_vm(desc); \ 4527 uint32_t vl = env->vl; \ 4528 uint32_t esz = sizeof(TD); \ 4529 uint32_t vlenb = simd_maxsz(desc); \ 4530 uint32_t vta = vext_vta(desc); \ 4531 uint32_t i; \ 4532 TD s1 = *((TD *)vs1 + HD(0)); \ 4533 \ 4534 for (i = env->vstart; i < vl; i++) { \ 4535 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4536 if (!vm && !vext_elem_mask(v0, i)) { \ 4537 continue; \ 4538 } \ 4539 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4540 } \ 4541 *((TD *)vd + HD(0)) = s1; \ 4542 env->vstart = 0; \ 4543 /* set tail elements to 1s */ \ 4544 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4545 } 4546 4547 /* Unordered sum */ 4548 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4549 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4550 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4551 4552 /* Ordered sum */ 4553 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4554 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4555 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4556 4557 /* Maximum value */ 4558 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4559 float16_maximum_number) 4560 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4561 float32_maximum_number) 4562 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4563 float64_maximum_number) 4564 4565 /* Minimum value */ 4566 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4567 float16_minimum_number) 4568 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4569 float32_minimum_number) 4570 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4571 float64_minimum_number) 4572 4573 /* Vector Widening Floating-Point Add Instructions */ 4574 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4575 { 4576 return float32_add(a, float16_to_float32(b, true, s), s); 4577 } 4578 4579 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4580 { 4581 return float64_add(a, float32_to_float64(b, s), s); 4582 } 4583 4584 /* Vector Widening Floating-Point Reduction Instructions */ 4585 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4586 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4587 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4588 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4589 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4590 4591 /* 4592 * Vector Mask Operations 4593 */ 4594 /* Vector Mask-Register Logical Instructions */ 4595 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4596 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4597 void *vs2, CPURISCVState *env, \ 4598 uint32_t desc) \ 4599 { \ 4600 uint32_t vl = env->vl; \ 4601 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4602 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4603 uint32_t i; \ 4604 int a, b; \ 4605 \ 4606 VSTART_CHECK_EARLY_EXIT(env); \ 4607 \ 4608 for (i = env->vstart; i < vl; i++) { \ 4609 a = vext_elem_mask(vs1, i); \ 4610 b = vext_elem_mask(vs2, i); \ 4611 vext_set_elem_mask(vd, i, OP(b, a)); \ 4612 } \ 4613 env->vstart = 0; \ 4614 /* 4615 * mask destination register are always tail-agnostic 4616 * set tail elements to 1s 4617 */ \ 4618 if (vta_all_1s) { \ 4619 for (; i < total_elems; i++) { \ 4620 vext_set_elem_mask(vd, i, 1); \ 4621 } \ 4622 } \ 4623 } 4624 4625 #define DO_NAND(N, M) (!(N & M)) 4626 #define DO_ANDNOT(N, M) (N & !M) 4627 #define DO_NOR(N, M) (!(N | M)) 4628 #define DO_ORNOT(N, M) (N | !M) 4629 #define DO_XNOR(N, M) (!(N ^ M)) 4630 4631 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4632 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4633 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4634 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4635 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4636 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4637 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4638 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4639 4640 /* Vector count population in mask vcpop */ 4641 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4642 uint32_t desc) 4643 { 4644 target_ulong cnt = 0; 4645 uint32_t vm = vext_vm(desc); 4646 uint32_t vl = env->vl; 4647 int i; 4648 4649 for (i = env->vstart; i < vl; i++) { 4650 if (vm || vext_elem_mask(v0, i)) { 4651 if (vext_elem_mask(vs2, i)) { 4652 cnt++; 4653 } 4654 } 4655 } 4656 env->vstart = 0; 4657 return cnt; 4658 } 4659 4660 /* vfirst find-first-set mask bit */ 4661 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4662 uint32_t desc) 4663 { 4664 uint32_t vm = vext_vm(desc); 4665 uint32_t vl = env->vl; 4666 int i; 4667 4668 for (i = env->vstart; i < vl; i++) { 4669 if (vm || vext_elem_mask(v0, i)) { 4670 if (vext_elem_mask(vs2, i)) { 4671 return i; 4672 } 4673 } 4674 } 4675 env->vstart = 0; 4676 return -1LL; 4677 } 4678 4679 enum set_mask_type { 4680 ONLY_FIRST = 1, 4681 INCLUDE_FIRST, 4682 BEFORE_FIRST, 4683 }; 4684 4685 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4686 uint32_t desc, enum set_mask_type type) 4687 { 4688 uint32_t vm = vext_vm(desc); 4689 uint32_t vl = env->vl; 4690 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4691 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4692 uint32_t vma = vext_vma(desc); 4693 int i; 4694 bool first_mask_bit = false; 4695 4696 for (i = env->vstart; i < vl; i++) { 4697 if (!vm && !vext_elem_mask(v0, i)) { 4698 /* set masked-off elements to 1s */ 4699 if (vma) { 4700 vext_set_elem_mask(vd, i, 1); 4701 } 4702 continue; 4703 } 4704 /* write a zero to all following active elements */ 4705 if (first_mask_bit) { 4706 vext_set_elem_mask(vd, i, 0); 4707 continue; 4708 } 4709 if (vext_elem_mask(vs2, i)) { 4710 first_mask_bit = true; 4711 if (type == BEFORE_FIRST) { 4712 vext_set_elem_mask(vd, i, 0); 4713 } else { 4714 vext_set_elem_mask(vd, i, 1); 4715 } 4716 } else { 4717 if (type == ONLY_FIRST) { 4718 vext_set_elem_mask(vd, i, 0); 4719 } else { 4720 vext_set_elem_mask(vd, i, 1); 4721 } 4722 } 4723 } 4724 env->vstart = 0; 4725 /* 4726 * mask destination register are always tail-agnostic 4727 * set tail elements to 1s 4728 */ 4729 if (vta_all_1s) { 4730 for (; i < total_elems; i++) { 4731 vext_set_elem_mask(vd, i, 1); 4732 } 4733 } 4734 } 4735 4736 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4737 uint32_t desc) 4738 { 4739 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4740 } 4741 4742 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4743 uint32_t desc) 4744 { 4745 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4746 } 4747 4748 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4749 uint32_t desc) 4750 { 4751 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4752 } 4753 4754 /* Vector Iota Instruction */ 4755 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4756 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4757 uint32_t desc) \ 4758 { \ 4759 uint32_t vm = vext_vm(desc); \ 4760 uint32_t vl = env->vl; \ 4761 uint32_t esz = sizeof(ETYPE); \ 4762 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4763 uint32_t vta = vext_vta(desc); \ 4764 uint32_t vma = vext_vma(desc); \ 4765 uint32_t sum = 0; \ 4766 int i; \ 4767 \ 4768 for (i = env->vstart; i < vl; i++) { \ 4769 if (!vm && !vext_elem_mask(v0, i)) { \ 4770 /* set masked-off elements to 1s */ \ 4771 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4772 continue; \ 4773 } \ 4774 *((ETYPE *)vd + H(i)) = sum; \ 4775 if (vext_elem_mask(vs2, i)) { \ 4776 sum++; \ 4777 } \ 4778 } \ 4779 env->vstart = 0; \ 4780 /* set tail elements to 1s */ \ 4781 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4782 } 4783 4784 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4785 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4786 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4787 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4788 4789 /* Vector Element Index Instruction */ 4790 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4791 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4792 { \ 4793 uint32_t vm = vext_vm(desc); \ 4794 uint32_t vl = env->vl; \ 4795 uint32_t esz = sizeof(ETYPE); \ 4796 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4797 uint32_t vta = vext_vta(desc); \ 4798 uint32_t vma = vext_vma(desc); \ 4799 int i; \ 4800 \ 4801 VSTART_CHECK_EARLY_EXIT(env); \ 4802 \ 4803 for (i = env->vstart; i < vl; i++) { \ 4804 if (!vm && !vext_elem_mask(v0, i)) { \ 4805 /* set masked-off elements to 1s */ \ 4806 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4807 continue; \ 4808 } \ 4809 *((ETYPE *)vd + H(i)) = i; \ 4810 } \ 4811 env->vstart = 0; \ 4812 /* set tail elements to 1s */ \ 4813 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4814 } 4815 4816 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4817 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4818 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4819 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4820 4821 /* 4822 * Vector Permutation Instructions 4823 */ 4824 4825 /* Vector Slide Instructions */ 4826 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4827 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4828 CPURISCVState *env, uint32_t desc) \ 4829 { \ 4830 uint32_t vm = vext_vm(desc); \ 4831 uint32_t vl = env->vl; \ 4832 uint32_t esz = sizeof(ETYPE); \ 4833 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4834 uint32_t vta = vext_vta(desc); \ 4835 uint32_t vma = vext_vma(desc); \ 4836 target_ulong offset = s1, i_min, i; \ 4837 \ 4838 VSTART_CHECK_EARLY_EXIT(env); \ 4839 \ 4840 i_min = MAX(env->vstart, offset); \ 4841 for (i = i_min; i < vl; i++) { \ 4842 if (!vm && !vext_elem_mask(v0, i)) { \ 4843 /* set masked-off elements to 1s */ \ 4844 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4845 continue; \ 4846 } \ 4847 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4848 } \ 4849 env->vstart = 0; \ 4850 /* set tail elements to 1s */ \ 4851 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4852 } 4853 4854 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4855 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4856 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4857 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4858 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4859 4860 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4861 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4862 CPURISCVState *env, uint32_t desc) \ 4863 { \ 4864 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4865 uint32_t vm = vext_vm(desc); \ 4866 uint32_t vl = env->vl; \ 4867 uint32_t esz = sizeof(ETYPE); \ 4868 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4869 uint32_t vta = vext_vta(desc); \ 4870 uint32_t vma = vext_vma(desc); \ 4871 target_ulong i_max, i_min, i; \ 4872 \ 4873 VSTART_CHECK_EARLY_EXIT(env); \ 4874 \ 4875 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 4876 i_max = MAX(i_min, env->vstart); \ 4877 for (i = env->vstart; i < i_max; ++i) { \ 4878 if (!vm && !vext_elem_mask(v0, i)) { \ 4879 /* set masked-off elements to 1s */ \ 4880 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4881 continue; \ 4882 } \ 4883 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4884 } \ 4885 \ 4886 for (i = i_max; i < vl; ++i) { \ 4887 if (vm || vext_elem_mask(v0, i)) { \ 4888 *((ETYPE *)vd + H(i)) = 0; \ 4889 } \ 4890 } \ 4891 \ 4892 env->vstart = 0; \ 4893 /* set tail elements to 1s */ \ 4894 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4895 } 4896 4897 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4898 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4899 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4900 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4901 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4902 4903 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4904 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4905 void *vs2, CPURISCVState *env, \ 4906 uint32_t desc) \ 4907 { \ 4908 typedef uint##BITWIDTH##_t ETYPE; \ 4909 uint32_t vm = vext_vm(desc); \ 4910 uint32_t vl = env->vl; \ 4911 uint32_t esz = sizeof(ETYPE); \ 4912 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4913 uint32_t vta = vext_vta(desc); \ 4914 uint32_t vma = vext_vma(desc); \ 4915 uint32_t i; \ 4916 \ 4917 VSTART_CHECK_EARLY_EXIT(env); \ 4918 \ 4919 for (i = env->vstart; i < vl; i++) { \ 4920 if (!vm && !vext_elem_mask(v0, i)) { \ 4921 /* set masked-off elements to 1s */ \ 4922 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4923 continue; \ 4924 } \ 4925 if (i == 0) { \ 4926 *((ETYPE *)vd + H(i)) = s1; \ 4927 } else { \ 4928 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4929 } \ 4930 } \ 4931 env->vstart = 0; \ 4932 /* set tail elements to 1s */ \ 4933 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4934 } 4935 4936 GEN_VEXT_VSLIE1UP(8, H1) 4937 GEN_VEXT_VSLIE1UP(16, H2) 4938 GEN_VEXT_VSLIE1UP(32, H4) 4939 GEN_VEXT_VSLIE1UP(64, H8) 4940 4941 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4942 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4943 CPURISCVState *env, uint32_t desc) \ 4944 { \ 4945 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4946 } 4947 4948 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4949 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4950 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4951 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4952 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4953 4954 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4955 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4956 void *vs2, CPURISCVState *env, \ 4957 uint32_t desc) \ 4958 { \ 4959 typedef uint##BITWIDTH##_t ETYPE; \ 4960 uint32_t vm = vext_vm(desc); \ 4961 uint32_t vl = env->vl; \ 4962 uint32_t esz = sizeof(ETYPE); \ 4963 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4964 uint32_t vta = vext_vta(desc); \ 4965 uint32_t vma = vext_vma(desc); \ 4966 uint32_t i; \ 4967 \ 4968 VSTART_CHECK_EARLY_EXIT(env); \ 4969 \ 4970 for (i = env->vstart; i < vl; i++) { \ 4971 if (!vm && !vext_elem_mask(v0, i)) { \ 4972 /* set masked-off elements to 1s */ \ 4973 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4974 continue; \ 4975 } \ 4976 if (i == vl - 1) { \ 4977 *((ETYPE *)vd + H(i)) = s1; \ 4978 } else { \ 4979 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4980 } \ 4981 } \ 4982 env->vstart = 0; \ 4983 /* set tail elements to 1s */ \ 4984 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4985 } 4986 4987 GEN_VEXT_VSLIDE1DOWN(8, H1) 4988 GEN_VEXT_VSLIDE1DOWN(16, H2) 4989 GEN_VEXT_VSLIDE1DOWN(32, H4) 4990 GEN_VEXT_VSLIDE1DOWN(64, H8) 4991 4992 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4993 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4994 CPURISCVState *env, uint32_t desc) \ 4995 { \ 4996 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4997 } 4998 4999 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 5000 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 5001 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 5002 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 5003 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 5004 5005 /* Vector Floating-Point Slide Instructions */ 5006 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 5007 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5008 CPURISCVState *env, uint32_t desc) \ 5009 { \ 5010 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5011 } 5012 5013 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 5014 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 5015 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 5016 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 5017 5018 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 5019 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 5020 CPURISCVState *env, uint32_t desc) \ 5021 { \ 5022 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 5023 } 5024 5025 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 5026 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 5027 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 5028 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 5029 5030 /* Vector Register Gather Instruction */ 5031 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 5032 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5033 CPURISCVState *env, uint32_t desc) \ 5034 { \ 5035 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 5036 uint32_t vm = vext_vm(desc); \ 5037 uint32_t vl = env->vl; \ 5038 uint32_t esz = sizeof(TS2); \ 5039 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5040 uint32_t vta = vext_vta(desc); \ 5041 uint32_t vma = vext_vma(desc); \ 5042 uint64_t index; \ 5043 uint32_t i; \ 5044 \ 5045 VSTART_CHECK_EARLY_EXIT(env); \ 5046 \ 5047 for (i = env->vstart; i < vl; i++) { \ 5048 if (!vm && !vext_elem_mask(v0, i)) { \ 5049 /* set masked-off elements to 1s */ \ 5050 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5051 continue; \ 5052 } \ 5053 index = *((TS1 *)vs1 + HS1(i)); \ 5054 if (index >= vlmax) { \ 5055 *((TS2 *)vd + HS2(i)) = 0; \ 5056 } else { \ 5057 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5058 } \ 5059 } \ 5060 env->vstart = 0; \ 5061 /* set tail elements to 1s */ \ 5062 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5063 } 5064 5065 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5066 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5067 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5068 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5069 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5070 5071 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5072 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5073 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5074 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5075 5076 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5077 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5078 CPURISCVState *env, uint32_t desc) \ 5079 { \ 5080 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5081 uint32_t vm = vext_vm(desc); \ 5082 uint32_t vl = env->vl; \ 5083 uint32_t esz = sizeof(ETYPE); \ 5084 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5085 uint32_t vta = vext_vta(desc); \ 5086 uint32_t vma = vext_vma(desc); \ 5087 uint64_t index = s1; \ 5088 uint32_t i; \ 5089 \ 5090 VSTART_CHECK_EARLY_EXIT(env); \ 5091 \ 5092 for (i = env->vstart; i < vl; i++) { \ 5093 if (!vm && !vext_elem_mask(v0, i)) { \ 5094 /* set masked-off elements to 1s */ \ 5095 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5096 continue; \ 5097 } \ 5098 if (index >= vlmax) { \ 5099 *((ETYPE *)vd + H(i)) = 0; \ 5100 } else { \ 5101 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5102 } \ 5103 } \ 5104 env->vstart = 0; \ 5105 /* set tail elements to 1s */ \ 5106 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5107 } 5108 5109 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5110 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5111 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5112 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5113 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5114 5115 /* Vector Compress Instruction */ 5116 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5117 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5118 CPURISCVState *env, uint32_t desc) \ 5119 { \ 5120 uint32_t vl = env->vl; \ 5121 uint32_t esz = sizeof(ETYPE); \ 5122 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5123 uint32_t vta = vext_vta(desc); \ 5124 uint32_t num = 0, i; \ 5125 \ 5126 for (i = env->vstart; i < vl; i++) { \ 5127 if (!vext_elem_mask(vs1, i)) { \ 5128 continue; \ 5129 } \ 5130 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5131 num++; \ 5132 } \ 5133 env->vstart = 0; \ 5134 /* set tail elements to 1s */ \ 5135 vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ 5136 } 5137 5138 /* Compress into vd elements of vs2 where vs1 is enabled */ 5139 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5140 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5141 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5142 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5143 5144 /* Vector Whole Register Move */ 5145 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5146 { 5147 /* EEW = SEW */ 5148 uint32_t maxsz = simd_maxsz(desc); 5149 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5150 uint32_t startb = env->vstart * sewb; 5151 uint32_t i = startb; 5152 5153 if (startb >= maxsz) { 5154 env->vstart = 0; 5155 return; 5156 } 5157 5158 if (HOST_BIG_ENDIAN && i % 8 != 0) { 5159 uint32_t j = ROUND_UP(i, 8); 5160 memcpy((uint8_t *)vd + H1(j - 1), 5161 (uint8_t *)vs2 + H1(j - 1), 5162 j - i); 5163 i = j; 5164 } 5165 5166 memcpy((uint8_t *)vd + H1(i), 5167 (uint8_t *)vs2 + H1(i), 5168 maxsz - i); 5169 5170 env->vstart = 0; 5171 } 5172 5173 /* Vector Integer Extension */ 5174 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5175 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5176 CPURISCVState *env, uint32_t desc) \ 5177 { \ 5178 uint32_t vl = env->vl; \ 5179 uint32_t vm = vext_vm(desc); \ 5180 uint32_t esz = sizeof(ETYPE); \ 5181 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5182 uint32_t vta = vext_vta(desc); \ 5183 uint32_t vma = vext_vma(desc); \ 5184 uint32_t i; \ 5185 \ 5186 VSTART_CHECK_EARLY_EXIT(env); \ 5187 \ 5188 for (i = env->vstart; i < vl; i++) { \ 5189 if (!vm && !vext_elem_mask(v0, i)) { \ 5190 /* set masked-off elements to 1s */ \ 5191 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5192 continue; \ 5193 } \ 5194 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5195 } \ 5196 env->vstart = 0; \ 5197 /* set tail elements to 1s */ \ 5198 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5199 } 5200 5201 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5202 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5203 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5204 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5205 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5206 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5207 5208 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5209 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5210 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5211 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5212 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5213 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5214