1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "tcg/tcg-gvec-desc.h" 29 #include "internals.h" 30 #include "vector_internals.h" 31 #include <math.h> 32 33 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 34 target_ulong s2) 35 { 36 int vlmax, vl; 37 RISCVCPU *cpu = env_archcpu(env); 38 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL); 39 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW); 40 uint16_t sew = 8 << vsew; 41 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 42 int xlen = riscv_cpu_xlen(env); 43 bool vill = (s2 >> (xlen - 1)) & 0x1; 44 target_ulong reserved = s2 & 45 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 46 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 47 int8_t lmul; 48 49 if (vlmul & 4) { 50 /* 51 * Fractional LMUL, check: 52 * 53 * VLEN * LMUL >= SEW 54 * VLEN >> (8 - lmul) >= sew 55 * (vlenb << 3) >> (8 - lmul) >= sew 56 * vlenb >> (8 - 3 - lmul) >= sew 57 */ 58 if (vlmul == 4 || 59 cpu->cfg.vlenb >> (8 - 3 - vlmul) < sew) { 60 vill = true; 61 } 62 } 63 64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 65 /* only set vill bit. */ 66 env->vill = 1; 67 env->vtype = 0; 68 env->vl = 0; 69 env->vstart = 0; 70 return 0; 71 } 72 73 /* lmul encoded as in DisasContext::lmul */ 74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3); 75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul); 76 if (s1 <= vlmax) { 77 vl = s1; 78 } else { 79 vl = vlmax; 80 } 81 env->vl = vl; 82 env->vtype = s2; 83 env->vstart = 0; 84 env->vill = 0; 85 return vl; 86 } 87 88 /* 89 * Get the maximum number of elements can be operated. 90 * 91 * log2_esz: log2 of element size in bytes. 92 */ 93 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 94 { 95 /* 96 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 97 * so vlen in bytes (vlenb) is encoded as maxsz. 98 */ 99 uint32_t vlenb = simd_maxsz(desc); 100 101 /* Return VLMAX */ 102 int scale = vext_lmul(desc) - log2_esz; 103 return scale < 0 ? vlenb >> -scale : vlenb << scale; 104 } 105 106 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 107 { 108 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 109 } 110 111 /* 112 * This function checks watchpoint before real load operation. 113 * 114 * In system mode, the TLB API probe_access is enough for watchpoint check. 115 * In user mode, there is no watchpoint support now. 116 * 117 * It will trigger an exception if there is no mapping in TLB 118 * and page table walk can't fill the TLB entry. Then the guest 119 * software can return here after process the exception or never return. 120 */ 121 static void probe_pages(CPURISCVState *env, target_ulong addr, 122 target_ulong len, uintptr_t ra, 123 MMUAccessType access_type) 124 { 125 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 126 target_ulong curlen = MIN(pagelen, len); 127 int mmu_index = riscv_env_mmu_index(env, false); 128 129 probe_access(env, adjust_addr(env, addr), curlen, access_type, 130 mmu_index, ra); 131 if (len > curlen) { 132 addr += curlen; 133 curlen = len - curlen; 134 probe_access(env, adjust_addr(env, addr), curlen, access_type, 135 mmu_index, ra); 136 } 137 } 138 139 static inline void vext_set_elem_mask(void *v0, int index, 140 uint8_t value) 141 { 142 int idx = index / 64; 143 int pos = index % 64; 144 uint64_t old = ((uint64_t *)v0)[idx]; 145 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 146 } 147 148 /* elements operations for load and store */ 149 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 150 uint32_t idx, void *vd, uintptr_t retaddr); 151 152 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 153 static void NAME(CPURISCVState *env, abi_ptr addr, \ 154 uint32_t idx, void *vd, uintptr_t retaddr)\ 155 { \ 156 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 157 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 158 } \ 159 160 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 161 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 162 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 163 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 164 165 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 166 static void NAME(CPURISCVState *env, abi_ptr addr, \ 167 uint32_t idx, void *vd, uintptr_t retaddr)\ 168 { \ 169 ETYPE data = *((ETYPE *)vd + H(idx)); \ 170 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 171 } 172 173 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 174 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 175 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 176 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 177 178 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 179 uint32_t desc, uint32_t nf, 180 uint32_t esz, uint32_t max_elems) 181 { 182 uint32_t vta = vext_vta(desc); 183 int k; 184 185 if (vta == 0) { 186 return; 187 } 188 189 for (k = 0; k < nf; ++k) { 190 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 191 (k * max_elems + max_elems) * esz); 192 } 193 } 194 195 /* 196 * stride: access vector element from strided memory 197 */ 198 static void 199 vext_ldst_stride(void *vd, void *v0, target_ulong base, 200 target_ulong stride, CPURISCVState *env, 201 uint32_t desc, uint32_t vm, 202 vext_ldst_elem_fn *ldst_elem, 203 uint32_t log2_esz, uintptr_t ra) 204 { 205 uint32_t i, k; 206 uint32_t nf = vext_nf(desc); 207 uint32_t max_elems = vext_max_elems(desc, log2_esz); 208 uint32_t esz = 1 << log2_esz; 209 uint32_t vma = vext_vma(desc); 210 211 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 212 k = 0; 213 while (k < nf) { 214 if (!vm && !vext_elem_mask(v0, i)) { 215 /* set masked-off elements to 1s */ 216 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 217 (i + k * max_elems + 1) * esz); 218 k++; 219 continue; 220 } 221 target_ulong addr = base + stride * i + (k << log2_esz); 222 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 223 k++; 224 } 225 } 226 env->vstart = 0; 227 228 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 229 } 230 231 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 232 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 233 target_ulong stride, CPURISCVState *env, \ 234 uint32_t desc) \ 235 { \ 236 uint32_t vm = vext_vm(desc); \ 237 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 238 ctzl(sizeof(ETYPE)), GETPC()); \ 239 } 240 241 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 242 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 243 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 244 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 245 246 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 247 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 248 target_ulong stride, CPURISCVState *env, \ 249 uint32_t desc) \ 250 { \ 251 uint32_t vm = vext_vm(desc); \ 252 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 253 ctzl(sizeof(ETYPE)), GETPC()); \ 254 } 255 256 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 257 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 258 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 259 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 260 261 /* 262 * unit-stride: access elements stored contiguously in memory 263 */ 264 265 /* unmasked unit-stride load and store operation */ 266 static void 267 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 268 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 269 uintptr_t ra) 270 { 271 uint32_t i, k; 272 uint32_t nf = vext_nf(desc); 273 uint32_t max_elems = vext_max_elems(desc, log2_esz); 274 uint32_t esz = 1 << log2_esz; 275 276 /* load bytes from guest memory */ 277 for (i = env->vstart; i < evl; i++, env->vstart++) { 278 k = 0; 279 while (k < nf) { 280 target_ulong addr = base + ((i * nf + k) << log2_esz); 281 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 282 k++; 283 } 284 } 285 env->vstart = 0; 286 287 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 288 } 289 290 /* 291 * masked unit-stride load and store operation will be a special case of 292 * stride, stride = NF * sizeof (ETYPE) 293 */ 294 295 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 296 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 297 CPURISCVState *env, uint32_t desc) \ 298 { \ 299 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 300 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 301 ctzl(sizeof(ETYPE)), GETPC()); \ 302 } \ 303 \ 304 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 305 CPURISCVState *env, uint32_t desc) \ 306 { \ 307 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 308 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 309 } 310 311 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 312 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 313 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 314 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 315 316 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 317 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 318 CPURISCVState *env, uint32_t desc) \ 319 { \ 320 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 321 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 322 ctzl(sizeof(ETYPE)), GETPC()); \ 323 } \ 324 \ 325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 326 CPURISCVState *env, uint32_t desc) \ 327 { \ 328 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 329 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 330 } 331 332 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 333 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 334 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 335 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 336 337 /* 338 * unit stride mask load and store, EEW = 1 339 */ 340 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 341 CPURISCVState *env, uint32_t desc) 342 { 343 /* evl = ceil(vl/8) */ 344 uint8_t evl = (env->vl + 7) >> 3; 345 vext_ldst_us(vd, base, env, desc, lde_b, 346 0, evl, GETPC()); 347 } 348 349 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 350 CPURISCVState *env, uint32_t desc) 351 { 352 /* evl = ceil(vl/8) */ 353 uint8_t evl = (env->vl + 7) >> 3; 354 vext_ldst_us(vd, base, env, desc, ste_b, 355 0, evl, GETPC()); 356 } 357 358 /* 359 * index: access vector element from indexed memory 360 */ 361 typedef target_ulong vext_get_index_addr(target_ulong base, 362 uint32_t idx, void *vs2); 363 364 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 365 static target_ulong NAME(target_ulong base, \ 366 uint32_t idx, void *vs2) \ 367 { \ 368 return (base + *((ETYPE *)vs2 + H(idx))); \ 369 } 370 371 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 372 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 373 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 374 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 375 376 static inline void 377 vext_ldst_index(void *vd, void *v0, target_ulong base, 378 void *vs2, CPURISCVState *env, uint32_t desc, 379 vext_get_index_addr get_index_addr, 380 vext_ldst_elem_fn *ldst_elem, 381 uint32_t log2_esz, uintptr_t ra) 382 { 383 uint32_t i, k; 384 uint32_t nf = vext_nf(desc); 385 uint32_t vm = vext_vm(desc); 386 uint32_t max_elems = vext_max_elems(desc, log2_esz); 387 uint32_t esz = 1 << log2_esz; 388 uint32_t vma = vext_vma(desc); 389 390 /* load bytes from guest memory */ 391 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 392 k = 0; 393 while (k < nf) { 394 if (!vm && !vext_elem_mask(v0, i)) { 395 /* set masked-off elements to 1s */ 396 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 397 (i + k * max_elems + 1) * esz); 398 k++; 399 continue; 400 } 401 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 402 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 403 k++; 404 } 405 } 406 env->vstart = 0; 407 408 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 409 } 410 411 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 412 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 413 void *vs2, CPURISCVState *env, uint32_t desc) \ 414 { \ 415 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 416 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 417 } 418 419 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 420 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 421 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 422 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 423 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 424 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 425 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 426 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 427 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 428 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 429 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 430 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 431 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 432 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 433 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 434 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 435 436 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 437 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 438 void *vs2, CPURISCVState *env, uint32_t desc) \ 439 { \ 440 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 441 STORE_FN, ctzl(sizeof(ETYPE)), \ 442 GETPC()); \ 443 } 444 445 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 446 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 447 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 448 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 449 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 450 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 451 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 452 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 453 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 454 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 455 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 456 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 457 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 458 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 459 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 460 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 461 462 /* 463 * unit-stride fault-only-fisrt load instructions 464 */ 465 static inline void 466 vext_ldff(void *vd, void *v0, target_ulong base, 467 CPURISCVState *env, uint32_t desc, 468 vext_ldst_elem_fn *ldst_elem, 469 uint32_t log2_esz, uintptr_t ra) 470 { 471 void *host; 472 uint32_t i, k, vl = 0; 473 uint32_t nf = vext_nf(desc); 474 uint32_t vm = vext_vm(desc); 475 uint32_t max_elems = vext_max_elems(desc, log2_esz); 476 uint32_t esz = 1 << log2_esz; 477 uint32_t vma = vext_vma(desc); 478 target_ulong addr, offset, remain; 479 int mmu_index = riscv_env_mmu_index(env, false); 480 481 /* probe every access */ 482 for (i = env->vstart; i < env->vl; i++) { 483 if (!vm && !vext_elem_mask(v0, i)) { 484 continue; 485 } 486 addr = adjust_addr(env, base + i * (nf << log2_esz)); 487 if (i == 0) { 488 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 489 } else { 490 /* if it triggers an exception, no need to check watchpoint */ 491 remain = nf << log2_esz; 492 while (remain > 0) { 493 offset = -(addr | TARGET_PAGE_MASK); 494 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index); 495 if (host) { 496 #ifdef CONFIG_USER_ONLY 497 if (!page_check_range(addr, offset, PAGE_READ)) { 498 vl = i; 499 goto ProbeSuccess; 500 } 501 #else 502 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 503 #endif 504 } else { 505 vl = i; 506 goto ProbeSuccess; 507 } 508 if (remain <= offset) { 509 break; 510 } 511 remain -= offset; 512 addr = adjust_addr(env, addr + offset); 513 } 514 } 515 } 516 ProbeSuccess: 517 /* load bytes from guest memory */ 518 if (vl != 0) { 519 env->vl = vl; 520 } 521 for (i = env->vstart; i < env->vl; i++) { 522 k = 0; 523 while (k < nf) { 524 if (!vm && !vext_elem_mask(v0, i)) { 525 /* set masked-off elements to 1s */ 526 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 527 (i + k * max_elems + 1) * esz); 528 k++; 529 continue; 530 } 531 addr = base + ((i * nf + k) << log2_esz); 532 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 533 k++; 534 } 535 } 536 env->vstart = 0; 537 538 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 539 } 540 541 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 542 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 543 CPURISCVState *env, uint32_t desc) \ 544 { \ 545 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 546 ctzl(sizeof(ETYPE)), GETPC()); \ 547 } 548 549 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 550 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 551 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 552 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 553 554 #define DO_SWAP(N, M) (M) 555 #define DO_AND(N, M) (N & M) 556 #define DO_XOR(N, M) (N ^ M) 557 #define DO_OR(N, M) (N | M) 558 #define DO_ADD(N, M) (N + M) 559 560 /* Signed min/max */ 561 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 562 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 563 564 /* 565 * load and store whole register instructions 566 */ 567 static void 568 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 569 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 570 { 571 uint32_t i, k, off, pos; 572 uint32_t nf = vext_nf(desc); 573 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; 574 uint32_t max_elems = vlenb >> log2_esz; 575 576 k = env->vstart / max_elems; 577 off = env->vstart % max_elems; 578 579 if (off) { 580 /* load/store rest of elements of current segment pointed by vstart */ 581 for (pos = off; pos < max_elems; pos++, env->vstart++) { 582 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 583 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 584 ra); 585 } 586 k++; 587 } 588 589 /* load/store elements for rest of segments */ 590 for (; k < nf; k++) { 591 for (i = 0; i < max_elems; i++, env->vstart++) { 592 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 593 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 594 } 595 } 596 597 env->vstart = 0; 598 } 599 600 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 601 void HELPER(NAME)(void *vd, target_ulong base, \ 602 CPURISCVState *env, uint32_t desc) \ 603 { \ 604 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 605 ctzl(sizeof(ETYPE)), GETPC()); \ 606 } 607 608 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 609 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 610 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 611 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 612 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 613 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 614 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 615 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 616 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 617 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 618 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 619 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 620 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 621 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 622 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 623 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 624 625 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 626 void HELPER(NAME)(void *vd, target_ulong base, \ 627 CPURISCVState *env, uint32_t desc) \ 628 { \ 629 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 630 ctzl(sizeof(ETYPE)), GETPC()); \ 631 } 632 633 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 634 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 635 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 636 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 637 638 /* 639 * Vector Integer Arithmetic Instructions 640 */ 641 642 /* (TD, T1, T2, TX1, TX2) */ 643 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 644 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 645 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 646 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 647 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 648 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 649 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 650 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 651 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 652 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 653 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 654 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 655 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 656 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 657 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 658 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 659 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 660 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 661 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 662 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 663 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 664 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 665 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 666 667 #define DO_SUB(N, M) (N - M) 668 #define DO_RSUB(N, M) (M - N) 669 670 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 671 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 672 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 673 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 674 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 675 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 676 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 677 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 678 679 GEN_VEXT_VV(vadd_vv_b, 1) 680 GEN_VEXT_VV(vadd_vv_h, 2) 681 GEN_VEXT_VV(vadd_vv_w, 4) 682 GEN_VEXT_VV(vadd_vv_d, 8) 683 GEN_VEXT_VV(vsub_vv_b, 1) 684 GEN_VEXT_VV(vsub_vv_h, 2) 685 GEN_VEXT_VV(vsub_vv_w, 4) 686 GEN_VEXT_VV(vsub_vv_d, 8) 687 688 689 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 690 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 691 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 692 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 693 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 694 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 695 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 696 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 697 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 698 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 699 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 700 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 701 702 GEN_VEXT_VX(vadd_vx_b, 1) 703 GEN_VEXT_VX(vadd_vx_h, 2) 704 GEN_VEXT_VX(vadd_vx_w, 4) 705 GEN_VEXT_VX(vadd_vx_d, 8) 706 GEN_VEXT_VX(vsub_vx_b, 1) 707 GEN_VEXT_VX(vsub_vx_h, 2) 708 GEN_VEXT_VX(vsub_vx_w, 4) 709 GEN_VEXT_VX(vsub_vx_d, 8) 710 GEN_VEXT_VX(vrsub_vx_b, 1) 711 GEN_VEXT_VX(vrsub_vx_h, 2) 712 GEN_VEXT_VX(vrsub_vx_w, 4) 713 GEN_VEXT_VX(vrsub_vx_d, 8) 714 715 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 716 { 717 intptr_t oprsz = simd_oprsz(desc); 718 intptr_t i; 719 720 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 721 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 722 } 723 } 724 725 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 726 { 727 intptr_t oprsz = simd_oprsz(desc); 728 intptr_t i; 729 730 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 731 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 732 } 733 } 734 735 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 736 { 737 intptr_t oprsz = simd_oprsz(desc); 738 intptr_t i; 739 740 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 741 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 742 } 743 } 744 745 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 746 { 747 intptr_t oprsz = simd_oprsz(desc); 748 intptr_t i; 749 750 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 751 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 752 } 753 } 754 755 /* Vector Widening Integer Add/Subtract */ 756 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 757 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 758 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 759 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 760 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 761 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 762 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 763 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 764 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 765 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 766 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 767 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 768 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 769 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 770 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 771 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 772 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 773 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 774 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 775 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 776 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 777 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 778 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 779 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 780 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 781 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 782 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 783 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 784 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 785 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 786 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 787 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 788 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 789 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 790 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 791 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 792 GEN_VEXT_VV(vwaddu_vv_b, 2) 793 GEN_VEXT_VV(vwaddu_vv_h, 4) 794 GEN_VEXT_VV(vwaddu_vv_w, 8) 795 GEN_VEXT_VV(vwsubu_vv_b, 2) 796 GEN_VEXT_VV(vwsubu_vv_h, 4) 797 GEN_VEXT_VV(vwsubu_vv_w, 8) 798 GEN_VEXT_VV(vwadd_vv_b, 2) 799 GEN_VEXT_VV(vwadd_vv_h, 4) 800 GEN_VEXT_VV(vwadd_vv_w, 8) 801 GEN_VEXT_VV(vwsub_vv_b, 2) 802 GEN_VEXT_VV(vwsub_vv_h, 4) 803 GEN_VEXT_VV(vwsub_vv_w, 8) 804 GEN_VEXT_VV(vwaddu_wv_b, 2) 805 GEN_VEXT_VV(vwaddu_wv_h, 4) 806 GEN_VEXT_VV(vwaddu_wv_w, 8) 807 GEN_VEXT_VV(vwsubu_wv_b, 2) 808 GEN_VEXT_VV(vwsubu_wv_h, 4) 809 GEN_VEXT_VV(vwsubu_wv_w, 8) 810 GEN_VEXT_VV(vwadd_wv_b, 2) 811 GEN_VEXT_VV(vwadd_wv_h, 4) 812 GEN_VEXT_VV(vwadd_wv_w, 8) 813 GEN_VEXT_VV(vwsub_wv_b, 2) 814 GEN_VEXT_VV(vwsub_wv_h, 4) 815 GEN_VEXT_VV(vwsub_wv_w, 8) 816 817 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 818 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 819 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 820 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 821 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 822 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 823 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 824 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 825 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 826 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 827 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 828 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 829 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 830 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 831 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 832 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 833 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 834 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 835 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 836 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 837 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 838 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 839 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 840 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 841 GEN_VEXT_VX(vwaddu_vx_b, 2) 842 GEN_VEXT_VX(vwaddu_vx_h, 4) 843 GEN_VEXT_VX(vwaddu_vx_w, 8) 844 GEN_VEXT_VX(vwsubu_vx_b, 2) 845 GEN_VEXT_VX(vwsubu_vx_h, 4) 846 GEN_VEXT_VX(vwsubu_vx_w, 8) 847 GEN_VEXT_VX(vwadd_vx_b, 2) 848 GEN_VEXT_VX(vwadd_vx_h, 4) 849 GEN_VEXT_VX(vwadd_vx_w, 8) 850 GEN_VEXT_VX(vwsub_vx_b, 2) 851 GEN_VEXT_VX(vwsub_vx_h, 4) 852 GEN_VEXT_VX(vwsub_vx_w, 8) 853 GEN_VEXT_VX(vwaddu_wx_b, 2) 854 GEN_VEXT_VX(vwaddu_wx_h, 4) 855 GEN_VEXT_VX(vwaddu_wx_w, 8) 856 GEN_VEXT_VX(vwsubu_wx_b, 2) 857 GEN_VEXT_VX(vwsubu_wx_h, 4) 858 GEN_VEXT_VX(vwsubu_wx_w, 8) 859 GEN_VEXT_VX(vwadd_wx_b, 2) 860 GEN_VEXT_VX(vwadd_wx_h, 4) 861 GEN_VEXT_VX(vwadd_wx_w, 8) 862 GEN_VEXT_VX(vwsub_wx_b, 2) 863 GEN_VEXT_VX(vwsub_wx_h, 4) 864 GEN_VEXT_VX(vwsub_wx_w, 8) 865 866 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 867 #define DO_VADC(N, M, C) (N + M + C) 868 #define DO_VSBC(N, M, C) (N - M - C) 869 870 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 871 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 872 CPURISCVState *env, uint32_t desc) \ 873 { \ 874 uint32_t vl = env->vl; \ 875 uint32_t esz = sizeof(ETYPE); \ 876 uint32_t total_elems = \ 877 vext_get_total_elems(env, desc, esz); \ 878 uint32_t vta = vext_vta(desc); \ 879 uint32_t i; \ 880 \ 881 for (i = env->vstart; i < vl; i++) { \ 882 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 883 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 884 ETYPE carry = vext_elem_mask(v0, i); \ 885 \ 886 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 887 } \ 888 env->vstart = 0; \ 889 /* set tail elements to 1s */ \ 890 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 891 } 892 893 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 894 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 895 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 896 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 897 898 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 899 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 900 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 901 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 902 903 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 904 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 905 CPURISCVState *env, uint32_t desc) \ 906 { \ 907 uint32_t vl = env->vl; \ 908 uint32_t esz = sizeof(ETYPE); \ 909 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 910 uint32_t vta = vext_vta(desc); \ 911 uint32_t i; \ 912 \ 913 for (i = env->vstart; i < vl; i++) { \ 914 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 915 ETYPE carry = vext_elem_mask(v0, i); \ 916 \ 917 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 918 } \ 919 env->vstart = 0; \ 920 /* set tail elements to 1s */ \ 921 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 922 } 923 924 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 925 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 926 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 927 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 928 929 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 930 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 931 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 932 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 933 934 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 935 (__typeof(N))(N + M) < N) 936 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 937 938 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 939 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 940 CPURISCVState *env, uint32_t desc) \ 941 { \ 942 uint32_t vl = env->vl; \ 943 uint32_t vm = vext_vm(desc); \ 944 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 945 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 946 uint32_t i; \ 947 \ 948 for (i = env->vstart; i < vl; i++) { \ 949 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 950 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 951 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 952 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 953 } \ 954 env->vstart = 0; \ 955 /* 956 * mask destination register are always tail-agnostic 957 * set tail elements to 1s 958 */ \ 959 if (vta_all_1s) { \ 960 for (; i < total_elems; i++) { \ 961 vext_set_elem_mask(vd, i, 1); \ 962 } \ 963 } \ 964 } 965 966 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 967 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 968 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 969 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 970 971 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 972 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 973 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 974 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 975 976 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 977 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 978 void *vs2, CPURISCVState *env, uint32_t desc) \ 979 { \ 980 uint32_t vl = env->vl; \ 981 uint32_t vm = vext_vm(desc); \ 982 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 983 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 984 uint32_t i; \ 985 \ 986 for (i = env->vstart; i < vl; i++) { \ 987 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 988 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 989 vext_set_elem_mask(vd, i, \ 990 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 991 } \ 992 env->vstart = 0; \ 993 /* 994 * mask destination register are always tail-agnostic 995 * set tail elements to 1s 996 */ \ 997 if (vta_all_1s) { \ 998 for (; i < total_elems; i++) { \ 999 vext_set_elem_mask(vd, i, 1); \ 1000 } \ 1001 } \ 1002 } 1003 1004 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1005 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1006 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1007 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1008 1009 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1010 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1011 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1012 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1013 1014 /* Vector Bitwise Logical Instructions */ 1015 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1016 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1017 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1018 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1019 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1020 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1021 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1022 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1023 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1024 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1025 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1026 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1027 GEN_VEXT_VV(vand_vv_b, 1) 1028 GEN_VEXT_VV(vand_vv_h, 2) 1029 GEN_VEXT_VV(vand_vv_w, 4) 1030 GEN_VEXT_VV(vand_vv_d, 8) 1031 GEN_VEXT_VV(vor_vv_b, 1) 1032 GEN_VEXT_VV(vor_vv_h, 2) 1033 GEN_VEXT_VV(vor_vv_w, 4) 1034 GEN_VEXT_VV(vor_vv_d, 8) 1035 GEN_VEXT_VV(vxor_vv_b, 1) 1036 GEN_VEXT_VV(vxor_vv_h, 2) 1037 GEN_VEXT_VV(vxor_vv_w, 4) 1038 GEN_VEXT_VV(vxor_vv_d, 8) 1039 1040 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1041 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1042 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1043 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1044 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1045 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1046 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1047 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1048 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1049 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1050 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1051 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1052 GEN_VEXT_VX(vand_vx_b, 1) 1053 GEN_VEXT_VX(vand_vx_h, 2) 1054 GEN_VEXT_VX(vand_vx_w, 4) 1055 GEN_VEXT_VX(vand_vx_d, 8) 1056 GEN_VEXT_VX(vor_vx_b, 1) 1057 GEN_VEXT_VX(vor_vx_h, 2) 1058 GEN_VEXT_VX(vor_vx_w, 4) 1059 GEN_VEXT_VX(vor_vx_d, 8) 1060 GEN_VEXT_VX(vxor_vx_b, 1) 1061 GEN_VEXT_VX(vxor_vx_h, 2) 1062 GEN_VEXT_VX(vxor_vx_w, 4) 1063 GEN_VEXT_VX(vxor_vx_d, 8) 1064 1065 /* Vector Single-Width Bit Shift Instructions */ 1066 #define DO_SLL(N, M) (N << (M)) 1067 #define DO_SRL(N, M) (N >> (M)) 1068 1069 /* generate the helpers for shift instructions with two vector operators */ 1070 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1071 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1072 void *vs2, CPURISCVState *env, uint32_t desc) \ 1073 { \ 1074 uint32_t vm = vext_vm(desc); \ 1075 uint32_t vl = env->vl; \ 1076 uint32_t esz = sizeof(TS1); \ 1077 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1078 uint32_t vta = vext_vta(desc); \ 1079 uint32_t vma = vext_vma(desc); \ 1080 uint32_t i; \ 1081 \ 1082 for (i = env->vstart; i < vl; i++) { \ 1083 if (!vm && !vext_elem_mask(v0, i)) { \ 1084 /* set masked-off elements to 1s */ \ 1085 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1086 continue; \ 1087 } \ 1088 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1089 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1090 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1091 } \ 1092 env->vstart = 0; \ 1093 /* set tail elements to 1s */ \ 1094 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1095 } 1096 1097 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1098 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1099 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1100 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1101 1102 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1103 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1104 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1105 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1106 1107 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1108 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1109 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1110 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1111 1112 /* 1113 * generate the helpers for shift instructions with one vector and one scalar 1114 */ 1115 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1116 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1117 void *vs2, CPURISCVState *env, \ 1118 uint32_t desc) \ 1119 { \ 1120 uint32_t vm = vext_vm(desc); \ 1121 uint32_t vl = env->vl; \ 1122 uint32_t esz = sizeof(TD); \ 1123 uint32_t total_elems = \ 1124 vext_get_total_elems(env, desc, esz); \ 1125 uint32_t vta = vext_vta(desc); \ 1126 uint32_t vma = vext_vma(desc); \ 1127 uint32_t i; \ 1128 \ 1129 for (i = env->vstart; i < vl; i++) { \ 1130 if (!vm && !vext_elem_mask(v0, i)) { \ 1131 /* set masked-off elements to 1s */ \ 1132 vext_set_elems_1s(vd, vma, i * esz, \ 1133 (i + 1) * esz); \ 1134 continue; \ 1135 } \ 1136 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1137 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1138 } \ 1139 env->vstart = 0; \ 1140 /* set tail elements to 1s */ \ 1141 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1142 } 1143 1144 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1145 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1146 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1147 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1148 1149 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1150 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1151 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1152 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1153 1154 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1155 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1156 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1157 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1158 1159 /* Vector Narrowing Integer Right Shift Instructions */ 1160 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1161 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1162 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1163 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1164 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1165 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1166 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1167 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1168 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1169 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1170 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1171 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1172 1173 /* Vector Integer Comparison Instructions */ 1174 #define DO_MSEQ(N, M) (N == M) 1175 #define DO_MSNE(N, M) (N != M) 1176 #define DO_MSLT(N, M) (N < M) 1177 #define DO_MSLE(N, M) (N <= M) 1178 #define DO_MSGT(N, M) (N > M) 1179 1180 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1181 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1182 CPURISCVState *env, uint32_t desc) \ 1183 { \ 1184 uint32_t vm = vext_vm(desc); \ 1185 uint32_t vl = env->vl; \ 1186 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1187 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1188 uint32_t vma = vext_vma(desc); \ 1189 uint32_t i; \ 1190 \ 1191 for (i = env->vstart; i < vl; i++) { \ 1192 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1193 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1194 if (!vm && !vext_elem_mask(v0, i)) { \ 1195 /* set masked-off elements to 1s */ \ 1196 if (vma) { \ 1197 vext_set_elem_mask(vd, i, 1); \ 1198 } \ 1199 continue; \ 1200 } \ 1201 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1202 } \ 1203 env->vstart = 0; \ 1204 /* 1205 * mask destination register are always tail-agnostic 1206 * set tail elements to 1s 1207 */ \ 1208 if (vta_all_1s) { \ 1209 for (; i < total_elems; i++) { \ 1210 vext_set_elem_mask(vd, i, 1); \ 1211 } \ 1212 } \ 1213 } 1214 1215 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1216 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1217 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1218 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1219 1220 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1221 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1222 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1223 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1224 1225 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1226 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1227 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1228 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1229 1230 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1231 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1232 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1233 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1234 1235 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1236 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1237 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1238 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1239 1240 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1241 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1242 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1243 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1244 1245 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1246 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1247 CPURISCVState *env, uint32_t desc) \ 1248 { \ 1249 uint32_t vm = vext_vm(desc); \ 1250 uint32_t vl = env->vl; \ 1251 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 1252 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1253 uint32_t vma = vext_vma(desc); \ 1254 uint32_t i; \ 1255 \ 1256 for (i = env->vstart; i < vl; i++) { \ 1257 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1258 if (!vm && !vext_elem_mask(v0, i)) { \ 1259 /* set masked-off elements to 1s */ \ 1260 if (vma) { \ 1261 vext_set_elem_mask(vd, i, 1); \ 1262 } \ 1263 continue; \ 1264 } \ 1265 vext_set_elem_mask(vd, i, \ 1266 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1267 } \ 1268 env->vstart = 0; \ 1269 /* 1270 * mask destination register are always tail-agnostic 1271 * set tail elements to 1s 1272 */ \ 1273 if (vta_all_1s) { \ 1274 for (; i < total_elems; i++) { \ 1275 vext_set_elem_mask(vd, i, 1); \ 1276 } \ 1277 } \ 1278 } 1279 1280 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1281 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1282 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1283 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1284 1285 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1286 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1287 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1288 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1289 1290 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1291 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1292 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1293 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1294 1295 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1296 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1297 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1298 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1299 1300 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1301 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1302 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1303 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1304 1305 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1306 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1307 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1308 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1309 1310 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1311 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1312 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1313 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1314 1315 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1316 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1317 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1318 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1319 1320 /* Vector Integer Min/Max Instructions */ 1321 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1322 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1323 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1324 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1325 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1326 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1327 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1328 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1329 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1330 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1331 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1332 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1333 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1334 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1335 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1336 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1337 GEN_VEXT_VV(vminu_vv_b, 1) 1338 GEN_VEXT_VV(vminu_vv_h, 2) 1339 GEN_VEXT_VV(vminu_vv_w, 4) 1340 GEN_VEXT_VV(vminu_vv_d, 8) 1341 GEN_VEXT_VV(vmin_vv_b, 1) 1342 GEN_VEXT_VV(vmin_vv_h, 2) 1343 GEN_VEXT_VV(vmin_vv_w, 4) 1344 GEN_VEXT_VV(vmin_vv_d, 8) 1345 GEN_VEXT_VV(vmaxu_vv_b, 1) 1346 GEN_VEXT_VV(vmaxu_vv_h, 2) 1347 GEN_VEXT_VV(vmaxu_vv_w, 4) 1348 GEN_VEXT_VV(vmaxu_vv_d, 8) 1349 GEN_VEXT_VV(vmax_vv_b, 1) 1350 GEN_VEXT_VV(vmax_vv_h, 2) 1351 GEN_VEXT_VV(vmax_vv_w, 4) 1352 GEN_VEXT_VV(vmax_vv_d, 8) 1353 1354 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1355 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1356 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1357 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1358 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1359 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1360 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1361 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1362 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1363 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1364 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1365 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1366 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1367 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1368 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1369 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1370 GEN_VEXT_VX(vminu_vx_b, 1) 1371 GEN_VEXT_VX(vminu_vx_h, 2) 1372 GEN_VEXT_VX(vminu_vx_w, 4) 1373 GEN_VEXT_VX(vminu_vx_d, 8) 1374 GEN_VEXT_VX(vmin_vx_b, 1) 1375 GEN_VEXT_VX(vmin_vx_h, 2) 1376 GEN_VEXT_VX(vmin_vx_w, 4) 1377 GEN_VEXT_VX(vmin_vx_d, 8) 1378 GEN_VEXT_VX(vmaxu_vx_b, 1) 1379 GEN_VEXT_VX(vmaxu_vx_h, 2) 1380 GEN_VEXT_VX(vmaxu_vx_w, 4) 1381 GEN_VEXT_VX(vmaxu_vx_d, 8) 1382 GEN_VEXT_VX(vmax_vx_b, 1) 1383 GEN_VEXT_VX(vmax_vx_h, 2) 1384 GEN_VEXT_VX(vmax_vx_w, 4) 1385 GEN_VEXT_VX(vmax_vx_d, 8) 1386 1387 /* Vector Single-Width Integer Multiply Instructions */ 1388 #define DO_MUL(N, M) (N * M) 1389 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1390 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1391 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1392 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1393 GEN_VEXT_VV(vmul_vv_b, 1) 1394 GEN_VEXT_VV(vmul_vv_h, 2) 1395 GEN_VEXT_VV(vmul_vv_w, 4) 1396 GEN_VEXT_VV(vmul_vv_d, 8) 1397 1398 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1399 { 1400 return (int16_t)s2 * (int16_t)s1 >> 8; 1401 } 1402 1403 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1404 { 1405 return (int32_t)s2 * (int32_t)s1 >> 16; 1406 } 1407 1408 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1409 { 1410 return (int64_t)s2 * (int64_t)s1 >> 32; 1411 } 1412 1413 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1414 { 1415 uint64_t hi_64, lo_64; 1416 1417 muls64(&lo_64, &hi_64, s1, s2); 1418 return hi_64; 1419 } 1420 1421 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1422 { 1423 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1424 } 1425 1426 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1427 { 1428 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1429 } 1430 1431 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1432 { 1433 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1434 } 1435 1436 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1437 { 1438 uint64_t hi_64, lo_64; 1439 1440 mulu64(&lo_64, &hi_64, s2, s1); 1441 return hi_64; 1442 } 1443 1444 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1445 { 1446 return (int16_t)s2 * (uint16_t)s1 >> 8; 1447 } 1448 1449 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1450 { 1451 return (int32_t)s2 * (uint32_t)s1 >> 16; 1452 } 1453 1454 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1455 { 1456 return (int64_t)s2 * (uint64_t)s1 >> 32; 1457 } 1458 1459 /* 1460 * Let A = signed operand, 1461 * B = unsigned operand 1462 * P = mulu64(A, B), unsigned product 1463 * 1464 * LET X = 2 ** 64 - A, 2's complement of A 1465 * SP = signed product 1466 * THEN 1467 * IF A < 0 1468 * SP = -X * B 1469 * = -(2 ** 64 - A) * B 1470 * = A * B - 2 ** 64 * B 1471 * = P - 2 ** 64 * B 1472 * ELSE 1473 * SP = P 1474 * THEN 1475 * HI_P -= (A < 0 ? B : 0) 1476 */ 1477 1478 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1479 { 1480 uint64_t hi_64, lo_64; 1481 1482 mulu64(&lo_64, &hi_64, s2, s1); 1483 1484 hi_64 -= s2 < 0 ? s1 : 0; 1485 return hi_64; 1486 } 1487 1488 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1489 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1490 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1491 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1492 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1493 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1494 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1495 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1496 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1497 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1498 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1499 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1500 GEN_VEXT_VV(vmulh_vv_b, 1) 1501 GEN_VEXT_VV(vmulh_vv_h, 2) 1502 GEN_VEXT_VV(vmulh_vv_w, 4) 1503 GEN_VEXT_VV(vmulh_vv_d, 8) 1504 GEN_VEXT_VV(vmulhu_vv_b, 1) 1505 GEN_VEXT_VV(vmulhu_vv_h, 2) 1506 GEN_VEXT_VV(vmulhu_vv_w, 4) 1507 GEN_VEXT_VV(vmulhu_vv_d, 8) 1508 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1509 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1510 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1511 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1512 1513 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1514 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1515 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1516 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1517 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1518 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1519 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1520 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1521 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1522 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1523 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1524 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1525 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1526 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1527 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1528 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1529 GEN_VEXT_VX(vmul_vx_b, 1) 1530 GEN_VEXT_VX(vmul_vx_h, 2) 1531 GEN_VEXT_VX(vmul_vx_w, 4) 1532 GEN_VEXT_VX(vmul_vx_d, 8) 1533 GEN_VEXT_VX(vmulh_vx_b, 1) 1534 GEN_VEXT_VX(vmulh_vx_h, 2) 1535 GEN_VEXT_VX(vmulh_vx_w, 4) 1536 GEN_VEXT_VX(vmulh_vx_d, 8) 1537 GEN_VEXT_VX(vmulhu_vx_b, 1) 1538 GEN_VEXT_VX(vmulhu_vx_h, 2) 1539 GEN_VEXT_VX(vmulhu_vx_w, 4) 1540 GEN_VEXT_VX(vmulhu_vx_d, 8) 1541 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1542 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1543 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1544 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1545 1546 /* Vector Integer Divide Instructions */ 1547 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1548 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1549 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1550 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1551 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1552 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1553 1554 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1555 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1556 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1557 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1558 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1559 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1560 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1561 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1562 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1563 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1564 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1565 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1566 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1567 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1568 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1569 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1570 GEN_VEXT_VV(vdivu_vv_b, 1) 1571 GEN_VEXT_VV(vdivu_vv_h, 2) 1572 GEN_VEXT_VV(vdivu_vv_w, 4) 1573 GEN_VEXT_VV(vdivu_vv_d, 8) 1574 GEN_VEXT_VV(vdiv_vv_b, 1) 1575 GEN_VEXT_VV(vdiv_vv_h, 2) 1576 GEN_VEXT_VV(vdiv_vv_w, 4) 1577 GEN_VEXT_VV(vdiv_vv_d, 8) 1578 GEN_VEXT_VV(vremu_vv_b, 1) 1579 GEN_VEXT_VV(vremu_vv_h, 2) 1580 GEN_VEXT_VV(vremu_vv_w, 4) 1581 GEN_VEXT_VV(vremu_vv_d, 8) 1582 GEN_VEXT_VV(vrem_vv_b, 1) 1583 GEN_VEXT_VV(vrem_vv_h, 2) 1584 GEN_VEXT_VV(vrem_vv_w, 4) 1585 GEN_VEXT_VV(vrem_vv_d, 8) 1586 1587 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1588 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1589 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1590 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1591 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1592 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1593 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1594 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1595 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1596 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1597 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1598 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1599 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1600 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1601 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1602 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1603 GEN_VEXT_VX(vdivu_vx_b, 1) 1604 GEN_VEXT_VX(vdivu_vx_h, 2) 1605 GEN_VEXT_VX(vdivu_vx_w, 4) 1606 GEN_VEXT_VX(vdivu_vx_d, 8) 1607 GEN_VEXT_VX(vdiv_vx_b, 1) 1608 GEN_VEXT_VX(vdiv_vx_h, 2) 1609 GEN_VEXT_VX(vdiv_vx_w, 4) 1610 GEN_VEXT_VX(vdiv_vx_d, 8) 1611 GEN_VEXT_VX(vremu_vx_b, 1) 1612 GEN_VEXT_VX(vremu_vx_h, 2) 1613 GEN_VEXT_VX(vremu_vx_w, 4) 1614 GEN_VEXT_VX(vremu_vx_d, 8) 1615 GEN_VEXT_VX(vrem_vx_b, 1) 1616 GEN_VEXT_VX(vrem_vx_h, 2) 1617 GEN_VEXT_VX(vrem_vx_w, 4) 1618 GEN_VEXT_VX(vrem_vx_d, 8) 1619 1620 /* Vector Widening Integer Multiply Instructions */ 1621 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1622 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1623 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1624 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1625 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1626 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1627 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1628 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1629 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1630 GEN_VEXT_VV(vwmul_vv_b, 2) 1631 GEN_VEXT_VV(vwmul_vv_h, 4) 1632 GEN_VEXT_VV(vwmul_vv_w, 8) 1633 GEN_VEXT_VV(vwmulu_vv_b, 2) 1634 GEN_VEXT_VV(vwmulu_vv_h, 4) 1635 GEN_VEXT_VV(vwmulu_vv_w, 8) 1636 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1637 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1638 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1639 1640 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1641 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1642 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1643 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1644 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1645 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1646 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1647 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1648 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1649 GEN_VEXT_VX(vwmul_vx_b, 2) 1650 GEN_VEXT_VX(vwmul_vx_h, 4) 1651 GEN_VEXT_VX(vwmul_vx_w, 8) 1652 GEN_VEXT_VX(vwmulu_vx_b, 2) 1653 GEN_VEXT_VX(vwmulu_vx_h, 4) 1654 GEN_VEXT_VX(vwmulu_vx_w, 8) 1655 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1656 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1657 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1658 1659 /* Vector Single-Width Integer Multiply-Add Instructions */ 1660 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1661 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1662 { \ 1663 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1664 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1665 TD d = *((TD *)vd + HD(i)); \ 1666 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1667 } 1668 1669 #define DO_MACC(N, M, D) (M * N + D) 1670 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1671 #define DO_MADD(N, M, D) (M * D + N) 1672 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1673 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1674 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1675 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1676 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1677 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1678 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1679 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1680 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1681 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1682 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1683 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1684 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1685 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1686 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1687 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1688 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1689 GEN_VEXT_VV(vmacc_vv_b, 1) 1690 GEN_VEXT_VV(vmacc_vv_h, 2) 1691 GEN_VEXT_VV(vmacc_vv_w, 4) 1692 GEN_VEXT_VV(vmacc_vv_d, 8) 1693 GEN_VEXT_VV(vnmsac_vv_b, 1) 1694 GEN_VEXT_VV(vnmsac_vv_h, 2) 1695 GEN_VEXT_VV(vnmsac_vv_w, 4) 1696 GEN_VEXT_VV(vnmsac_vv_d, 8) 1697 GEN_VEXT_VV(vmadd_vv_b, 1) 1698 GEN_VEXT_VV(vmadd_vv_h, 2) 1699 GEN_VEXT_VV(vmadd_vv_w, 4) 1700 GEN_VEXT_VV(vmadd_vv_d, 8) 1701 GEN_VEXT_VV(vnmsub_vv_b, 1) 1702 GEN_VEXT_VV(vnmsub_vv_h, 2) 1703 GEN_VEXT_VV(vnmsub_vv_w, 4) 1704 GEN_VEXT_VV(vnmsub_vv_d, 8) 1705 1706 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1707 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1708 { \ 1709 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1710 TD d = *((TD *)vd + HD(i)); \ 1711 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1712 } 1713 1714 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1715 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1716 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1717 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1718 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1719 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1720 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1721 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1722 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1723 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1724 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1725 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1726 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1727 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1728 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1729 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1730 GEN_VEXT_VX(vmacc_vx_b, 1) 1731 GEN_VEXT_VX(vmacc_vx_h, 2) 1732 GEN_VEXT_VX(vmacc_vx_w, 4) 1733 GEN_VEXT_VX(vmacc_vx_d, 8) 1734 GEN_VEXT_VX(vnmsac_vx_b, 1) 1735 GEN_VEXT_VX(vnmsac_vx_h, 2) 1736 GEN_VEXT_VX(vnmsac_vx_w, 4) 1737 GEN_VEXT_VX(vnmsac_vx_d, 8) 1738 GEN_VEXT_VX(vmadd_vx_b, 1) 1739 GEN_VEXT_VX(vmadd_vx_h, 2) 1740 GEN_VEXT_VX(vmadd_vx_w, 4) 1741 GEN_VEXT_VX(vmadd_vx_d, 8) 1742 GEN_VEXT_VX(vnmsub_vx_b, 1) 1743 GEN_VEXT_VX(vnmsub_vx_h, 2) 1744 GEN_VEXT_VX(vnmsub_vx_w, 4) 1745 GEN_VEXT_VX(vnmsub_vx_d, 8) 1746 1747 /* Vector Widening Integer Multiply-Add Instructions */ 1748 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1749 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1750 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1751 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1752 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1753 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1754 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1755 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1756 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1757 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1758 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1759 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1760 GEN_VEXT_VV(vwmacc_vv_b, 2) 1761 GEN_VEXT_VV(vwmacc_vv_h, 4) 1762 GEN_VEXT_VV(vwmacc_vv_w, 8) 1763 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1764 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1765 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1766 1767 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1768 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1769 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1770 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1771 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1772 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1773 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1774 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1775 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1776 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1777 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1778 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1779 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1780 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1781 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1782 GEN_VEXT_VX(vwmacc_vx_b, 2) 1783 GEN_VEXT_VX(vwmacc_vx_h, 4) 1784 GEN_VEXT_VX(vwmacc_vx_w, 8) 1785 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1786 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1787 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1788 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1789 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1790 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1791 1792 /* Vector Integer Merge and Move Instructions */ 1793 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1794 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1795 uint32_t desc) \ 1796 { \ 1797 uint32_t vl = env->vl; \ 1798 uint32_t esz = sizeof(ETYPE); \ 1799 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1800 uint32_t vta = vext_vta(desc); \ 1801 uint32_t i; \ 1802 \ 1803 for (i = env->vstart; i < vl; i++) { \ 1804 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1805 *((ETYPE *)vd + H(i)) = s1; \ 1806 } \ 1807 env->vstart = 0; \ 1808 /* set tail elements to 1s */ \ 1809 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1810 } 1811 1812 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1813 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1814 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1815 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1816 1817 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1818 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1819 uint32_t desc) \ 1820 { \ 1821 uint32_t vl = env->vl; \ 1822 uint32_t esz = sizeof(ETYPE); \ 1823 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1824 uint32_t vta = vext_vta(desc); \ 1825 uint32_t i; \ 1826 \ 1827 for (i = env->vstart; i < vl; i++) { \ 1828 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1829 } \ 1830 env->vstart = 0; \ 1831 /* set tail elements to 1s */ \ 1832 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1833 } 1834 1835 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1836 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1837 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1838 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1839 1840 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1841 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1842 CPURISCVState *env, uint32_t desc) \ 1843 { \ 1844 uint32_t vl = env->vl; \ 1845 uint32_t esz = sizeof(ETYPE); \ 1846 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1847 uint32_t vta = vext_vta(desc); \ 1848 uint32_t i; \ 1849 \ 1850 for (i = env->vstart; i < vl; i++) { \ 1851 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1852 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1853 } \ 1854 env->vstart = 0; \ 1855 /* set tail elements to 1s */ \ 1856 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1857 } 1858 1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1860 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1861 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1862 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1863 1864 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1865 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1866 void *vs2, CPURISCVState *env, uint32_t desc) \ 1867 { \ 1868 uint32_t vl = env->vl; \ 1869 uint32_t esz = sizeof(ETYPE); \ 1870 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1871 uint32_t vta = vext_vta(desc); \ 1872 uint32_t i; \ 1873 \ 1874 for (i = env->vstart; i < vl; i++) { \ 1875 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1876 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1877 (ETYPE)(target_long)s1); \ 1878 *((ETYPE *)vd + H(i)) = d; \ 1879 } \ 1880 env->vstart = 0; \ 1881 /* set tail elements to 1s */ \ 1882 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1883 } 1884 1885 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1886 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1887 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1888 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1889 1890 /* 1891 * Vector Fixed-Point Arithmetic Instructions 1892 */ 1893 1894 /* Vector Single-Width Saturating Add and Subtract */ 1895 1896 /* 1897 * As fixed point instructions probably have round mode and saturation, 1898 * define common macros for fixed point here. 1899 */ 1900 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1901 CPURISCVState *env, int vxrm); 1902 1903 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1904 static inline void \ 1905 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1906 CPURISCVState *env, int vxrm) \ 1907 { \ 1908 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1909 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1910 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1911 } 1912 1913 static inline void 1914 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1915 CPURISCVState *env, 1916 uint32_t vl, uint32_t vm, int vxrm, 1917 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 1918 { 1919 for (uint32_t i = env->vstart; i < vl; i++) { 1920 if (!vm && !vext_elem_mask(v0, i)) { 1921 /* set masked-off elements to 1s */ 1922 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 1923 continue; 1924 } 1925 fn(vd, vs1, vs2, i, env, vxrm); 1926 } 1927 env->vstart = 0; 1928 } 1929 1930 static inline void 1931 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1932 CPURISCVState *env, 1933 uint32_t desc, 1934 opivv2_rm_fn *fn, uint32_t esz) 1935 { 1936 uint32_t vm = vext_vm(desc); 1937 uint32_t vl = env->vl; 1938 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 1939 uint32_t vta = vext_vta(desc); 1940 uint32_t vma = vext_vma(desc); 1941 1942 switch (env->vxrm) { 1943 case 0: /* rnu */ 1944 vext_vv_rm_1(vd, v0, vs1, vs2, 1945 env, vl, vm, 0, fn, vma, esz); 1946 break; 1947 case 1: /* rne */ 1948 vext_vv_rm_1(vd, v0, vs1, vs2, 1949 env, vl, vm, 1, fn, vma, esz); 1950 break; 1951 case 2: /* rdn */ 1952 vext_vv_rm_1(vd, v0, vs1, vs2, 1953 env, vl, vm, 2, fn, vma, esz); 1954 break; 1955 default: /* rod */ 1956 vext_vv_rm_1(vd, v0, vs1, vs2, 1957 env, vl, vm, 3, fn, vma, esz); 1958 break; 1959 } 1960 /* set tail elements to 1s */ 1961 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 1962 } 1963 1964 /* generate helpers for fixed point instructions with OPIVV format */ 1965 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 1966 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1967 CPURISCVState *env, uint32_t desc) \ 1968 { \ 1969 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1970 do_##NAME, ESZ); \ 1971 } 1972 1973 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 1974 uint8_t b) 1975 { 1976 uint8_t res = a + b; 1977 if (res < a) { 1978 res = UINT8_MAX; 1979 env->vxsat = 0x1; 1980 } 1981 return res; 1982 } 1983 1984 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1985 uint16_t b) 1986 { 1987 uint16_t res = a + b; 1988 if (res < a) { 1989 res = UINT16_MAX; 1990 env->vxsat = 0x1; 1991 } 1992 return res; 1993 } 1994 1995 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1996 uint32_t b) 1997 { 1998 uint32_t res = a + b; 1999 if (res < a) { 2000 res = UINT32_MAX; 2001 env->vxsat = 0x1; 2002 } 2003 return res; 2004 } 2005 2006 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2007 uint64_t b) 2008 { 2009 uint64_t res = a + b; 2010 if (res < a) { 2011 res = UINT64_MAX; 2012 env->vxsat = 0x1; 2013 } 2014 return res; 2015 } 2016 2017 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2018 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2019 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2020 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2021 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2022 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2023 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2024 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2025 2026 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2027 CPURISCVState *env, int vxrm); 2028 2029 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2030 static inline void \ 2031 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2032 CPURISCVState *env, int vxrm) \ 2033 { \ 2034 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2035 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2036 } 2037 2038 static inline void 2039 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2040 CPURISCVState *env, 2041 uint32_t vl, uint32_t vm, int vxrm, 2042 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2043 { 2044 for (uint32_t i = env->vstart; i < vl; i++) { 2045 if (!vm && !vext_elem_mask(v0, i)) { 2046 /* set masked-off elements to 1s */ 2047 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2048 continue; 2049 } 2050 fn(vd, s1, vs2, i, env, vxrm); 2051 } 2052 env->vstart = 0; 2053 } 2054 2055 static inline void 2056 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2057 CPURISCVState *env, 2058 uint32_t desc, 2059 opivx2_rm_fn *fn, uint32_t esz) 2060 { 2061 uint32_t vm = vext_vm(desc); 2062 uint32_t vl = env->vl; 2063 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2064 uint32_t vta = vext_vta(desc); 2065 uint32_t vma = vext_vma(desc); 2066 2067 switch (env->vxrm) { 2068 case 0: /* rnu */ 2069 vext_vx_rm_1(vd, v0, s1, vs2, 2070 env, vl, vm, 0, fn, vma, esz); 2071 break; 2072 case 1: /* rne */ 2073 vext_vx_rm_1(vd, v0, s1, vs2, 2074 env, vl, vm, 1, fn, vma, esz); 2075 break; 2076 case 2: /* rdn */ 2077 vext_vx_rm_1(vd, v0, s1, vs2, 2078 env, vl, vm, 2, fn, vma, esz); 2079 break; 2080 default: /* rod */ 2081 vext_vx_rm_1(vd, v0, s1, vs2, 2082 env, vl, vm, 3, fn, vma, esz); 2083 break; 2084 } 2085 /* set tail elements to 1s */ 2086 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2087 } 2088 2089 /* generate helpers for fixed point instructions with OPIVX format */ 2090 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2091 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2092 void *vs2, CPURISCVState *env, \ 2093 uint32_t desc) \ 2094 { \ 2095 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2096 do_##NAME, ESZ); \ 2097 } 2098 2099 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2100 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2101 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2102 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2103 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2104 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2105 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2106 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2107 2108 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2109 { 2110 int8_t res = a + b; 2111 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2112 res = a > 0 ? INT8_MAX : INT8_MIN; 2113 env->vxsat = 0x1; 2114 } 2115 return res; 2116 } 2117 2118 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2119 int16_t b) 2120 { 2121 int16_t res = a + b; 2122 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2123 res = a > 0 ? INT16_MAX : INT16_MIN; 2124 env->vxsat = 0x1; 2125 } 2126 return res; 2127 } 2128 2129 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2130 int32_t b) 2131 { 2132 int32_t res = a + b; 2133 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2134 res = a > 0 ? INT32_MAX : INT32_MIN; 2135 env->vxsat = 0x1; 2136 } 2137 return res; 2138 } 2139 2140 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2141 int64_t b) 2142 { 2143 int64_t res = a + b; 2144 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2145 res = a > 0 ? INT64_MAX : INT64_MIN; 2146 env->vxsat = 0x1; 2147 } 2148 return res; 2149 } 2150 2151 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2152 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2153 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2154 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2155 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2156 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2157 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2158 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2159 2160 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2161 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2162 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2163 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2164 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2165 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2166 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2167 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2168 2169 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2170 uint8_t b) 2171 { 2172 uint8_t res = a - b; 2173 if (res > a) { 2174 res = 0; 2175 env->vxsat = 0x1; 2176 } 2177 return res; 2178 } 2179 2180 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2181 uint16_t b) 2182 { 2183 uint16_t res = a - b; 2184 if (res > a) { 2185 res = 0; 2186 env->vxsat = 0x1; 2187 } 2188 return res; 2189 } 2190 2191 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2192 uint32_t b) 2193 { 2194 uint32_t res = a - b; 2195 if (res > a) { 2196 res = 0; 2197 env->vxsat = 0x1; 2198 } 2199 return res; 2200 } 2201 2202 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2203 uint64_t b) 2204 { 2205 uint64_t res = a - b; 2206 if (res > a) { 2207 res = 0; 2208 env->vxsat = 0x1; 2209 } 2210 return res; 2211 } 2212 2213 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2214 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2215 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2216 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2217 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2218 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2219 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2220 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2221 2222 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2223 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2224 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2225 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2226 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2227 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2228 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2229 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2230 2231 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2232 { 2233 int8_t res = a - b; 2234 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2235 res = a >= 0 ? INT8_MAX : INT8_MIN; 2236 env->vxsat = 0x1; 2237 } 2238 return res; 2239 } 2240 2241 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2242 int16_t b) 2243 { 2244 int16_t res = a - b; 2245 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2246 res = a >= 0 ? INT16_MAX : INT16_MIN; 2247 env->vxsat = 0x1; 2248 } 2249 return res; 2250 } 2251 2252 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2253 int32_t b) 2254 { 2255 int32_t res = a - b; 2256 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2257 res = a >= 0 ? INT32_MAX : INT32_MIN; 2258 env->vxsat = 0x1; 2259 } 2260 return res; 2261 } 2262 2263 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2264 int64_t b) 2265 { 2266 int64_t res = a - b; 2267 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2268 res = a >= 0 ? INT64_MAX : INT64_MIN; 2269 env->vxsat = 0x1; 2270 } 2271 return res; 2272 } 2273 2274 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2275 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2276 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2277 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2278 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2279 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2280 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2281 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2282 2283 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2284 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2285 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2286 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2287 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2288 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2289 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2290 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2291 2292 /* Vector Single-Width Averaging Add and Subtract */ 2293 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2294 { 2295 uint8_t d = extract64(v, shift, 1); 2296 uint8_t d1; 2297 uint64_t D1, D2; 2298 2299 if (shift == 0 || shift > 64) { 2300 return 0; 2301 } 2302 2303 d1 = extract64(v, shift - 1, 1); 2304 D1 = extract64(v, 0, shift); 2305 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2306 return d1; 2307 } else if (vxrm == 1) { /* round-to-nearest-even */ 2308 if (shift > 1) { 2309 D2 = extract64(v, 0, shift - 1); 2310 return d1 & ((D2 != 0) | d); 2311 } else { 2312 return d1 & d; 2313 } 2314 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2315 return !d & (D1 != 0); 2316 } 2317 return 0; /* round-down (truncate) */ 2318 } 2319 2320 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2321 int32_t b) 2322 { 2323 int64_t res = (int64_t)a + b; 2324 uint8_t round = get_round(vxrm, res, 1); 2325 2326 return (res >> 1) + round; 2327 } 2328 2329 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2330 int64_t b) 2331 { 2332 int64_t res = a + b; 2333 uint8_t round = get_round(vxrm, res, 1); 2334 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2335 2336 /* With signed overflow, bit 64 is inverse of bit 63. */ 2337 return ((res >> 1) ^ over) + round; 2338 } 2339 2340 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2341 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2342 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2343 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2344 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2345 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2346 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2347 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2348 2349 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2350 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2351 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2352 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2353 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2354 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2355 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2356 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2357 2358 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2359 uint32_t a, uint32_t b) 2360 { 2361 uint64_t res = (uint64_t)a + b; 2362 uint8_t round = get_round(vxrm, res, 1); 2363 2364 return (res >> 1) + round; 2365 } 2366 2367 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2368 uint64_t a, uint64_t b) 2369 { 2370 uint64_t res = a + b; 2371 uint8_t round = get_round(vxrm, res, 1); 2372 uint64_t over = (uint64_t)(res < a) << 63; 2373 2374 return ((res >> 1) | over) + round; 2375 } 2376 2377 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2378 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2379 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2380 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2381 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2382 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2383 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2384 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2385 2386 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2387 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2388 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2389 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2390 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2391 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2392 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2393 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2394 2395 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2396 int32_t b) 2397 { 2398 int64_t res = (int64_t)a - b; 2399 uint8_t round = get_round(vxrm, res, 1); 2400 2401 return (res >> 1) + round; 2402 } 2403 2404 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2405 int64_t b) 2406 { 2407 int64_t res = (int64_t)a - b; 2408 uint8_t round = get_round(vxrm, res, 1); 2409 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2410 2411 /* With signed overflow, bit 64 is inverse of bit 63. */ 2412 return ((res >> 1) ^ over) + round; 2413 } 2414 2415 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2416 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2417 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2418 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2419 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2420 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2421 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2422 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2423 2424 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2425 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2426 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2427 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2428 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2429 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2430 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2431 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2432 2433 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2434 uint32_t a, uint32_t b) 2435 { 2436 int64_t res = (int64_t)a - b; 2437 uint8_t round = get_round(vxrm, res, 1); 2438 2439 return (res >> 1) + round; 2440 } 2441 2442 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2443 uint64_t a, uint64_t b) 2444 { 2445 uint64_t res = (uint64_t)a - b; 2446 uint8_t round = get_round(vxrm, res, 1); 2447 uint64_t over = (uint64_t)(res > a) << 63; 2448 2449 return ((res >> 1) | over) + round; 2450 } 2451 2452 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2453 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2454 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2455 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2456 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2457 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2458 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2459 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2460 2461 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2462 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2463 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2464 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2465 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2466 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2467 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2468 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2469 2470 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2471 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2472 { 2473 uint8_t round; 2474 int16_t res; 2475 2476 res = (int16_t)a * (int16_t)b; 2477 round = get_round(vxrm, res, 7); 2478 res = (res >> 7) + round; 2479 2480 if (res > INT8_MAX) { 2481 env->vxsat = 0x1; 2482 return INT8_MAX; 2483 } else if (res < INT8_MIN) { 2484 env->vxsat = 0x1; 2485 return INT8_MIN; 2486 } else { 2487 return res; 2488 } 2489 } 2490 2491 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2492 { 2493 uint8_t round; 2494 int32_t res; 2495 2496 res = (int32_t)a * (int32_t)b; 2497 round = get_round(vxrm, res, 15); 2498 res = (res >> 15) + round; 2499 2500 if (res > INT16_MAX) { 2501 env->vxsat = 0x1; 2502 return INT16_MAX; 2503 } else if (res < INT16_MIN) { 2504 env->vxsat = 0x1; 2505 return INT16_MIN; 2506 } else { 2507 return res; 2508 } 2509 } 2510 2511 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2512 { 2513 uint8_t round; 2514 int64_t res; 2515 2516 res = (int64_t)a * (int64_t)b; 2517 round = get_round(vxrm, res, 31); 2518 res = (res >> 31) + round; 2519 2520 if (res > INT32_MAX) { 2521 env->vxsat = 0x1; 2522 return INT32_MAX; 2523 } else if (res < INT32_MIN) { 2524 env->vxsat = 0x1; 2525 return INT32_MIN; 2526 } else { 2527 return res; 2528 } 2529 } 2530 2531 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2532 { 2533 uint8_t round; 2534 uint64_t hi_64, lo_64; 2535 int64_t res; 2536 2537 if (a == INT64_MIN && b == INT64_MIN) { 2538 env->vxsat = 1; 2539 return INT64_MAX; 2540 } 2541 2542 muls64(&lo_64, &hi_64, a, b); 2543 round = get_round(vxrm, lo_64, 63); 2544 /* 2545 * Cannot overflow, as there are always 2546 * 2 sign bits after multiply. 2547 */ 2548 res = (hi_64 << 1) | (lo_64 >> 63); 2549 if (round) { 2550 if (res == INT64_MAX) { 2551 env->vxsat = 1; 2552 } else { 2553 res += 1; 2554 } 2555 } 2556 return res; 2557 } 2558 2559 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2560 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2561 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2562 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2563 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2564 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2565 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2566 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2567 2568 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2569 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2570 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2571 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2572 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2573 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2574 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2575 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2576 2577 /* Vector Single-Width Scaling Shift Instructions */ 2578 static inline uint8_t 2579 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2580 { 2581 uint8_t round, shift = b & 0x7; 2582 uint8_t res; 2583 2584 round = get_round(vxrm, a, shift); 2585 res = (a >> shift) + round; 2586 return res; 2587 } 2588 static inline uint16_t 2589 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2590 { 2591 uint8_t round, shift = b & 0xf; 2592 2593 round = get_round(vxrm, a, shift); 2594 return (a >> shift) + round; 2595 } 2596 static inline uint32_t 2597 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2598 { 2599 uint8_t round, shift = b & 0x1f; 2600 2601 round = get_round(vxrm, a, shift); 2602 return (a >> shift) + round; 2603 } 2604 static inline uint64_t 2605 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2606 { 2607 uint8_t round, shift = b & 0x3f; 2608 2609 round = get_round(vxrm, a, shift); 2610 return (a >> shift) + round; 2611 } 2612 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2613 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2614 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2615 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2616 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2617 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2618 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2619 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2620 2621 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2622 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2623 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2624 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2625 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2626 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2627 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2628 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2629 2630 static inline int8_t 2631 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2632 { 2633 uint8_t round, shift = b & 0x7; 2634 2635 round = get_round(vxrm, a, shift); 2636 return (a >> shift) + round; 2637 } 2638 static inline int16_t 2639 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2640 { 2641 uint8_t round, shift = b & 0xf; 2642 2643 round = get_round(vxrm, a, shift); 2644 return (a >> shift) + round; 2645 } 2646 static inline int32_t 2647 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2648 { 2649 uint8_t round, shift = b & 0x1f; 2650 2651 round = get_round(vxrm, a, shift); 2652 return (a >> shift) + round; 2653 } 2654 static inline int64_t 2655 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2656 { 2657 uint8_t round, shift = b & 0x3f; 2658 2659 round = get_round(vxrm, a, shift); 2660 return (a >> shift) + round; 2661 } 2662 2663 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2664 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2665 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2666 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2667 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2668 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2669 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2670 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2671 2672 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2673 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2674 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2675 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2676 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2677 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2678 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2679 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2680 2681 /* Vector Narrowing Fixed-Point Clip Instructions */ 2682 static inline int8_t 2683 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2684 { 2685 uint8_t round, shift = b & 0xf; 2686 int16_t res; 2687 2688 round = get_round(vxrm, a, shift); 2689 res = (a >> shift) + round; 2690 if (res > INT8_MAX) { 2691 env->vxsat = 0x1; 2692 return INT8_MAX; 2693 } else if (res < INT8_MIN) { 2694 env->vxsat = 0x1; 2695 return INT8_MIN; 2696 } else { 2697 return res; 2698 } 2699 } 2700 2701 static inline int16_t 2702 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2703 { 2704 uint8_t round, shift = b & 0x1f; 2705 int32_t res; 2706 2707 round = get_round(vxrm, a, shift); 2708 res = (a >> shift) + round; 2709 if (res > INT16_MAX) { 2710 env->vxsat = 0x1; 2711 return INT16_MAX; 2712 } else if (res < INT16_MIN) { 2713 env->vxsat = 0x1; 2714 return INT16_MIN; 2715 } else { 2716 return res; 2717 } 2718 } 2719 2720 static inline int32_t 2721 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2722 { 2723 uint8_t round, shift = b & 0x3f; 2724 int64_t res; 2725 2726 round = get_round(vxrm, a, shift); 2727 res = (a >> shift) + round; 2728 if (res > INT32_MAX) { 2729 env->vxsat = 0x1; 2730 return INT32_MAX; 2731 } else if (res < INT32_MIN) { 2732 env->vxsat = 0x1; 2733 return INT32_MIN; 2734 } else { 2735 return res; 2736 } 2737 } 2738 2739 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2740 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2741 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2742 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2743 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2744 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2745 2746 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2747 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2748 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2749 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2750 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2751 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2752 2753 static inline uint8_t 2754 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2755 { 2756 uint8_t round, shift = b & 0xf; 2757 uint16_t res; 2758 2759 round = get_round(vxrm, a, shift); 2760 res = (a >> shift) + round; 2761 if (res > UINT8_MAX) { 2762 env->vxsat = 0x1; 2763 return UINT8_MAX; 2764 } else { 2765 return res; 2766 } 2767 } 2768 2769 static inline uint16_t 2770 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2771 { 2772 uint8_t round, shift = b & 0x1f; 2773 uint32_t res; 2774 2775 round = get_round(vxrm, a, shift); 2776 res = (a >> shift) + round; 2777 if (res > UINT16_MAX) { 2778 env->vxsat = 0x1; 2779 return UINT16_MAX; 2780 } else { 2781 return res; 2782 } 2783 } 2784 2785 static inline uint32_t 2786 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2787 { 2788 uint8_t round, shift = b & 0x3f; 2789 uint64_t res; 2790 2791 round = get_round(vxrm, a, shift); 2792 res = (a >> shift) + round; 2793 if (res > UINT32_MAX) { 2794 env->vxsat = 0x1; 2795 return UINT32_MAX; 2796 } else { 2797 return res; 2798 } 2799 } 2800 2801 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2802 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2803 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2804 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2805 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2806 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2807 2808 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2809 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2810 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2811 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2812 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2813 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2814 2815 /* 2816 * Vector Float Point Arithmetic Instructions 2817 */ 2818 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2819 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2820 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2821 CPURISCVState *env) \ 2822 { \ 2823 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2824 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2825 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2826 } 2827 2828 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 2829 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2830 void *vs2, CPURISCVState *env, \ 2831 uint32_t desc) \ 2832 { \ 2833 uint32_t vm = vext_vm(desc); \ 2834 uint32_t vl = env->vl; \ 2835 uint32_t total_elems = \ 2836 vext_get_total_elems(env, desc, ESZ); \ 2837 uint32_t vta = vext_vta(desc); \ 2838 uint32_t vma = vext_vma(desc); \ 2839 uint32_t i; \ 2840 \ 2841 for (i = env->vstart; i < vl; i++) { \ 2842 if (!vm && !vext_elem_mask(v0, i)) { \ 2843 /* set masked-off elements to 1s */ \ 2844 vext_set_elems_1s(vd, vma, i * ESZ, \ 2845 (i + 1) * ESZ); \ 2846 continue; \ 2847 } \ 2848 do_##NAME(vd, vs1, vs2, i, env); \ 2849 } \ 2850 env->vstart = 0; \ 2851 /* set tail elements to 1s */ \ 2852 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2853 total_elems * ESZ); \ 2854 } 2855 2856 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2857 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2858 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2859 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 2860 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 2861 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 2862 2863 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2864 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2865 CPURISCVState *env) \ 2866 { \ 2867 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2868 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2869 } 2870 2871 #define GEN_VEXT_VF(NAME, ESZ) \ 2872 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2873 void *vs2, CPURISCVState *env, \ 2874 uint32_t desc) \ 2875 { \ 2876 uint32_t vm = vext_vm(desc); \ 2877 uint32_t vl = env->vl; \ 2878 uint32_t total_elems = \ 2879 vext_get_total_elems(env, desc, ESZ); \ 2880 uint32_t vta = vext_vta(desc); \ 2881 uint32_t vma = vext_vma(desc); \ 2882 uint32_t i; \ 2883 \ 2884 for (i = env->vstart; i < vl; i++) { \ 2885 if (!vm && !vext_elem_mask(v0, i)) { \ 2886 /* set masked-off elements to 1s */ \ 2887 vext_set_elems_1s(vd, vma, i * ESZ, \ 2888 (i + 1) * ESZ); \ 2889 continue; \ 2890 } \ 2891 do_##NAME(vd, s1, vs2, i, env); \ 2892 } \ 2893 env->vstart = 0; \ 2894 /* set tail elements to 1s */ \ 2895 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2896 total_elems * ESZ); \ 2897 } 2898 2899 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2900 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2901 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2902 GEN_VEXT_VF(vfadd_vf_h, 2) 2903 GEN_VEXT_VF(vfadd_vf_w, 4) 2904 GEN_VEXT_VF(vfadd_vf_d, 8) 2905 2906 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2907 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2908 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2909 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 2910 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 2911 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 2912 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2913 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2914 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2915 GEN_VEXT_VF(vfsub_vf_h, 2) 2916 GEN_VEXT_VF(vfsub_vf_w, 4) 2917 GEN_VEXT_VF(vfsub_vf_d, 8) 2918 2919 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2920 { 2921 return float16_sub(b, a, s); 2922 } 2923 2924 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2925 { 2926 return float32_sub(b, a, s); 2927 } 2928 2929 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2930 { 2931 return float64_sub(b, a, s); 2932 } 2933 2934 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2935 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2936 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2937 GEN_VEXT_VF(vfrsub_vf_h, 2) 2938 GEN_VEXT_VF(vfrsub_vf_w, 4) 2939 GEN_VEXT_VF(vfrsub_vf_d, 8) 2940 2941 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2942 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2943 { 2944 return float32_add(float16_to_float32(a, true, s), 2945 float16_to_float32(b, true, s), s); 2946 } 2947 2948 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2949 { 2950 return float64_add(float32_to_float64(a, s), 2951 float32_to_float64(b, s), s); 2952 2953 } 2954 2955 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2956 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2957 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 2958 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 2959 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2960 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2961 GEN_VEXT_VF(vfwadd_vf_h, 4) 2962 GEN_VEXT_VF(vfwadd_vf_w, 8) 2963 2964 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2965 { 2966 return float32_sub(float16_to_float32(a, true, s), 2967 float16_to_float32(b, true, s), s); 2968 } 2969 2970 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2971 { 2972 return float64_sub(float32_to_float64(a, s), 2973 float32_to_float64(b, s), s); 2974 2975 } 2976 2977 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2978 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2979 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 2980 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 2981 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2982 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2983 GEN_VEXT_VF(vfwsub_vf_h, 4) 2984 GEN_VEXT_VF(vfwsub_vf_w, 8) 2985 2986 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2987 { 2988 return float32_add(a, float16_to_float32(b, true, s), s); 2989 } 2990 2991 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2992 { 2993 return float64_add(a, float32_to_float64(b, s), s); 2994 } 2995 2996 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2997 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2998 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 2999 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 3000 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3001 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3002 GEN_VEXT_VF(vfwadd_wf_h, 4) 3003 GEN_VEXT_VF(vfwadd_wf_w, 8) 3004 3005 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3006 { 3007 return float32_sub(a, float16_to_float32(b, true, s), s); 3008 } 3009 3010 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3011 { 3012 return float64_sub(a, float32_to_float64(b, s), s); 3013 } 3014 3015 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3016 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3017 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3018 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3019 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3020 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3021 GEN_VEXT_VF(vfwsub_wf_h, 4) 3022 GEN_VEXT_VF(vfwsub_wf_w, 8) 3023 3024 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3025 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3026 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3027 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3028 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3029 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3030 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3031 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3032 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3033 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3034 GEN_VEXT_VF(vfmul_vf_h, 2) 3035 GEN_VEXT_VF(vfmul_vf_w, 4) 3036 GEN_VEXT_VF(vfmul_vf_d, 8) 3037 3038 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3039 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3040 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3041 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3042 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3043 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3044 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3045 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3046 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3047 GEN_VEXT_VF(vfdiv_vf_h, 2) 3048 GEN_VEXT_VF(vfdiv_vf_w, 4) 3049 GEN_VEXT_VF(vfdiv_vf_d, 8) 3050 3051 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3052 { 3053 return float16_div(b, a, s); 3054 } 3055 3056 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3057 { 3058 return float32_div(b, a, s); 3059 } 3060 3061 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3062 { 3063 return float64_div(b, a, s); 3064 } 3065 3066 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3067 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3068 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3069 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3070 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3071 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3072 3073 /* Vector Widening Floating-Point Multiply */ 3074 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3075 { 3076 return float32_mul(float16_to_float32(a, true, s), 3077 float16_to_float32(b, true, s), s); 3078 } 3079 3080 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3081 { 3082 return float64_mul(float32_to_float64(a, s), 3083 float32_to_float64(b, s), s); 3084 3085 } 3086 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3087 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3088 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3089 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3090 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3091 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3092 GEN_VEXT_VF(vfwmul_vf_h, 4) 3093 GEN_VEXT_VF(vfwmul_vf_w, 8) 3094 3095 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3096 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3097 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3098 CPURISCVState *env) \ 3099 { \ 3100 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3101 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3102 TD d = *((TD *)vd + HD(i)); \ 3103 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3104 } 3105 3106 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3107 { 3108 return float16_muladd(a, b, d, 0, s); 3109 } 3110 3111 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3112 { 3113 return float32_muladd(a, b, d, 0, s); 3114 } 3115 3116 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3117 { 3118 return float64_muladd(a, b, d, 0, s); 3119 } 3120 3121 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3122 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3123 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3124 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3125 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3126 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3127 3128 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3129 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3130 CPURISCVState *env) \ 3131 { \ 3132 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3133 TD d = *((TD *)vd + HD(i)); \ 3134 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3135 } 3136 3137 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3138 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3139 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3140 GEN_VEXT_VF(vfmacc_vf_h, 2) 3141 GEN_VEXT_VF(vfmacc_vf_w, 4) 3142 GEN_VEXT_VF(vfmacc_vf_d, 8) 3143 3144 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3145 { 3146 return float16_muladd(a, b, d, float_muladd_negate_c | 3147 float_muladd_negate_product, s); 3148 } 3149 3150 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3151 { 3152 return float32_muladd(a, b, d, float_muladd_negate_c | 3153 float_muladd_negate_product, s); 3154 } 3155 3156 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3157 { 3158 return float64_muladd(a, b, d, float_muladd_negate_c | 3159 float_muladd_negate_product, s); 3160 } 3161 3162 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3163 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3164 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3165 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3166 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3167 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3168 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3169 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3170 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3171 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3172 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3173 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3174 3175 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3176 { 3177 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3178 } 3179 3180 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3181 { 3182 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3183 } 3184 3185 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3186 { 3187 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3188 } 3189 3190 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3191 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3192 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3193 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3194 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3195 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3196 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3197 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3198 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3199 GEN_VEXT_VF(vfmsac_vf_h, 2) 3200 GEN_VEXT_VF(vfmsac_vf_w, 4) 3201 GEN_VEXT_VF(vfmsac_vf_d, 8) 3202 3203 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3204 { 3205 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3206 } 3207 3208 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3209 { 3210 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3211 } 3212 3213 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3214 { 3215 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3216 } 3217 3218 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3219 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3220 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3221 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3222 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3223 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3224 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3225 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3226 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3227 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3228 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3229 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3230 3231 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3232 { 3233 return float16_muladd(d, b, a, 0, s); 3234 } 3235 3236 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3237 { 3238 return float32_muladd(d, b, a, 0, s); 3239 } 3240 3241 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3242 { 3243 return float64_muladd(d, b, a, 0, s); 3244 } 3245 3246 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3247 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3248 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3249 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3250 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3251 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3252 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3253 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3254 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3255 GEN_VEXT_VF(vfmadd_vf_h, 2) 3256 GEN_VEXT_VF(vfmadd_vf_w, 4) 3257 GEN_VEXT_VF(vfmadd_vf_d, 8) 3258 3259 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3260 { 3261 return float16_muladd(d, b, a, float_muladd_negate_c | 3262 float_muladd_negate_product, s); 3263 } 3264 3265 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3266 { 3267 return float32_muladd(d, b, a, float_muladd_negate_c | 3268 float_muladd_negate_product, s); 3269 } 3270 3271 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3272 { 3273 return float64_muladd(d, b, a, float_muladd_negate_c | 3274 float_muladd_negate_product, s); 3275 } 3276 3277 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3278 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3279 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3280 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3281 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3282 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3283 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3284 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3285 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3286 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3287 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3288 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3289 3290 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3291 { 3292 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3293 } 3294 3295 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3296 { 3297 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3298 } 3299 3300 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3301 { 3302 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3303 } 3304 3305 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3306 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3307 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3308 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3309 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3310 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3311 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3312 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3313 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3314 GEN_VEXT_VF(vfmsub_vf_h, 2) 3315 GEN_VEXT_VF(vfmsub_vf_w, 4) 3316 GEN_VEXT_VF(vfmsub_vf_d, 8) 3317 3318 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3319 { 3320 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3321 } 3322 3323 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3324 { 3325 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3326 } 3327 3328 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3329 { 3330 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3331 } 3332 3333 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3334 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3335 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3336 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3337 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3338 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3339 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3340 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3341 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3342 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3343 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3344 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3345 3346 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3347 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3348 { 3349 return float32_muladd(float16_to_float32(a, true, s), 3350 float16_to_float32(b, true, s), d, 0, s); 3351 } 3352 3353 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3354 { 3355 return float64_muladd(float32_to_float64(a, s), 3356 float32_to_float64(b, s), d, 0, s); 3357 } 3358 3359 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3360 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3361 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3362 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3363 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3364 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3365 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3366 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3367 3368 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3369 { 3370 return float32_muladd(bfloat16_to_float32(a, s), 3371 bfloat16_to_float32(b, s), d, 0, s); 3372 } 3373 3374 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3375 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3376 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3377 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3378 3379 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3380 { 3381 return float32_muladd(float16_to_float32(a, true, s), 3382 float16_to_float32(b, true, s), d, 3383 float_muladd_negate_c | float_muladd_negate_product, 3384 s); 3385 } 3386 3387 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3388 { 3389 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3390 d, float_muladd_negate_c | 3391 float_muladd_negate_product, s); 3392 } 3393 3394 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3395 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3396 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3397 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3398 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3399 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3400 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3401 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3402 3403 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3404 { 3405 return float32_muladd(float16_to_float32(a, true, s), 3406 float16_to_float32(b, true, s), d, 3407 float_muladd_negate_c, s); 3408 } 3409 3410 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3411 { 3412 return float64_muladd(float32_to_float64(a, s), 3413 float32_to_float64(b, s), d, 3414 float_muladd_negate_c, s); 3415 } 3416 3417 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3418 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3419 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3420 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3421 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3422 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3423 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3424 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3425 3426 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3427 { 3428 return float32_muladd(float16_to_float32(a, true, s), 3429 float16_to_float32(b, true, s), d, 3430 float_muladd_negate_product, s); 3431 } 3432 3433 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3434 { 3435 return float64_muladd(float32_to_float64(a, s), 3436 float32_to_float64(b, s), d, 3437 float_muladd_negate_product, s); 3438 } 3439 3440 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3441 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3442 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3443 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3444 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3445 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3446 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3447 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3448 3449 /* Vector Floating-Point Square-Root Instruction */ 3450 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3451 static void do_##NAME(void *vd, void *vs2, int i, \ 3452 CPURISCVState *env) \ 3453 { \ 3454 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3455 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3456 } 3457 3458 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3459 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3460 CPURISCVState *env, uint32_t desc) \ 3461 { \ 3462 uint32_t vm = vext_vm(desc); \ 3463 uint32_t vl = env->vl; \ 3464 uint32_t total_elems = \ 3465 vext_get_total_elems(env, desc, ESZ); \ 3466 uint32_t vta = vext_vta(desc); \ 3467 uint32_t vma = vext_vma(desc); \ 3468 uint32_t i; \ 3469 \ 3470 if (vl == 0) { \ 3471 return; \ 3472 } \ 3473 for (i = env->vstart; i < vl; i++) { \ 3474 if (!vm && !vext_elem_mask(v0, i)) { \ 3475 /* set masked-off elements to 1s */ \ 3476 vext_set_elems_1s(vd, vma, i * ESZ, \ 3477 (i + 1) * ESZ); \ 3478 continue; \ 3479 } \ 3480 do_##NAME(vd, vs2, i, env); \ 3481 } \ 3482 env->vstart = 0; \ 3483 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3484 total_elems * ESZ); \ 3485 } 3486 3487 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3488 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3489 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3490 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3491 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3492 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3493 3494 /* 3495 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3496 * 3497 * Adapted from riscv-v-spec recip.c: 3498 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3499 */ 3500 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3501 { 3502 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3503 uint64_t exp = extract64(f, frac_size, exp_size); 3504 uint64_t frac = extract64(f, 0, frac_size); 3505 3506 const uint8_t lookup_table[] = { 3507 52, 51, 50, 48, 47, 46, 44, 43, 3508 42, 41, 40, 39, 38, 36, 35, 34, 3509 33, 32, 31, 30, 30, 29, 28, 27, 3510 26, 25, 24, 23, 23, 22, 21, 20, 3511 19, 19, 18, 17, 16, 16, 15, 14, 3512 14, 13, 12, 12, 11, 10, 10, 9, 3513 9, 8, 7, 7, 6, 6, 5, 4, 3514 4, 3, 3, 2, 2, 1, 1, 0, 3515 127, 125, 123, 121, 119, 118, 116, 114, 3516 113, 111, 109, 108, 106, 105, 103, 102, 3517 100, 99, 97, 96, 95, 93, 92, 91, 3518 90, 88, 87, 86, 85, 84, 83, 82, 3519 80, 79, 78, 77, 76, 75, 74, 73, 3520 72, 71, 70, 70, 69, 68, 67, 66, 3521 65, 64, 63, 63, 62, 61, 60, 59, 3522 59, 58, 57, 56, 56, 55, 54, 53 3523 }; 3524 const int precision = 7; 3525 3526 if (exp == 0 && frac != 0) { /* subnormal */ 3527 /* Normalize the subnormal. */ 3528 while (extract64(frac, frac_size - 1, 1) == 0) { 3529 exp--; 3530 frac <<= 1; 3531 } 3532 3533 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3534 } 3535 3536 int idx = ((exp & 1) << (precision - 1)) | 3537 (frac >> (frac_size - precision + 1)); 3538 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3539 (frac_size - precision); 3540 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3541 3542 uint64_t val = 0; 3543 val = deposit64(val, 0, frac_size, out_frac); 3544 val = deposit64(val, frac_size, exp_size, out_exp); 3545 val = deposit64(val, frac_size + exp_size, 1, sign); 3546 return val; 3547 } 3548 3549 static float16 frsqrt7_h(float16 f, float_status *s) 3550 { 3551 int exp_size = 5, frac_size = 10; 3552 bool sign = float16_is_neg(f); 3553 3554 /* 3555 * frsqrt7(sNaN) = canonical NaN 3556 * frsqrt7(-inf) = canonical NaN 3557 * frsqrt7(-normal) = canonical NaN 3558 * frsqrt7(-subnormal) = canonical NaN 3559 */ 3560 if (float16_is_signaling_nan(f, s) || 3561 (float16_is_infinity(f) && sign) || 3562 (float16_is_normal(f) && sign) || 3563 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3564 s->float_exception_flags |= float_flag_invalid; 3565 return float16_default_nan(s); 3566 } 3567 3568 /* frsqrt7(qNaN) = canonical NaN */ 3569 if (float16_is_quiet_nan(f, s)) { 3570 return float16_default_nan(s); 3571 } 3572 3573 /* frsqrt7(+-0) = +-inf */ 3574 if (float16_is_zero(f)) { 3575 s->float_exception_flags |= float_flag_divbyzero; 3576 return float16_set_sign(float16_infinity, sign); 3577 } 3578 3579 /* frsqrt7(+inf) = +0 */ 3580 if (float16_is_infinity(f) && !sign) { 3581 return float16_set_sign(float16_zero, sign); 3582 } 3583 3584 /* +normal, +subnormal */ 3585 uint64_t val = frsqrt7(f, exp_size, frac_size); 3586 return make_float16(val); 3587 } 3588 3589 static float32 frsqrt7_s(float32 f, float_status *s) 3590 { 3591 int exp_size = 8, frac_size = 23; 3592 bool sign = float32_is_neg(f); 3593 3594 /* 3595 * frsqrt7(sNaN) = canonical NaN 3596 * frsqrt7(-inf) = canonical NaN 3597 * frsqrt7(-normal) = canonical NaN 3598 * frsqrt7(-subnormal) = canonical NaN 3599 */ 3600 if (float32_is_signaling_nan(f, s) || 3601 (float32_is_infinity(f) && sign) || 3602 (float32_is_normal(f) && sign) || 3603 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3604 s->float_exception_flags |= float_flag_invalid; 3605 return float32_default_nan(s); 3606 } 3607 3608 /* frsqrt7(qNaN) = canonical NaN */ 3609 if (float32_is_quiet_nan(f, s)) { 3610 return float32_default_nan(s); 3611 } 3612 3613 /* frsqrt7(+-0) = +-inf */ 3614 if (float32_is_zero(f)) { 3615 s->float_exception_flags |= float_flag_divbyzero; 3616 return float32_set_sign(float32_infinity, sign); 3617 } 3618 3619 /* frsqrt7(+inf) = +0 */ 3620 if (float32_is_infinity(f) && !sign) { 3621 return float32_set_sign(float32_zero, sign); 3622 } 3623 3624 /* +normal, +subnormal */ 3625 uint64_t val = frsqrt7(f, exp_size, frac_size); 3626 return make_float32(val); 3627 } 3628 3629 static float64 frsqrt7_d(float64 f, float_status *s) 3630 { 3631 int exp_size = 11, frac_size = 52; 3632 bool sign = float64_is_neg(f); 3633 3634 /* 3635 * frsqrt7(sNaN) = canonical NaN 3636 * frsqrt7(-inf) = canonical NaN 3637 * frsqrt7(-normal) = canonical NaN 3638 * frsqrt7(-subnormal) = canonical NaN 3639 */ 3640 if (float64_is_signaling_nan(f, s) || 3641 (float64_is_infinity(f) && sign) || 3642 (float64_is_normal(f) && sign) || 3643 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3644 s->float_exception_flags |= float_flag_invalid; 3645 return float64_default_nan(s); 3646 } 3647 3648 /* frsqrt7(qNaN) = canonical NaN */ 3649 if (float64_is_quiet_nan(f, s)) { 3650 return float64_default_nan(s); 3651 } 3652 3653 /* frsqrt7(+-0) = +-inf */ 3654 if (float64_is_zero(f)) { 3655 s->float_exception_flags |= float_flag_divbyzero; 3656 return float64_set_sign(float64_infinity, sign); 3657 } 3658 3659 /* frsqrt7(+inf) = +0 */ 3660 if (float64_is_infinity(f) && !sign) { 3661 return float64_set_sign(float64_zero, sign); 3662 } 3663 3664 /* +normal, +subnormal */ 3665 uint64_t val = frsqrt7(f, exp_size, frac_size); 3666 return make_float64(val); 3667 } 3668 3669 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3670 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3671 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3672 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3673 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3674 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3675 3676 /* 3677 * Vector Floating-Point Reciprocal Estimate Instruction 3678 * 3679 * Adapted from riscv-v-spec recip.c: 3680 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3681 */ 3682 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3683 float_status *s) 3684 { 3685 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3686 uint64_t exp = extract64(f, frac_size, exp_size); 3687 uint64_t frac = extract64(f, 0, frac_size); 3688 3689 const uint8_t lookup_table[] = { 3690 127, 125, 123, 121, 119, 117, 116, 114, 3691 112, 110, 109, 107, 105, 104, 102, 100, 3692 99, 97, 96, 94, 93, 91, 90, 88, 3693 87, 85, 84, 83, 81, 80, 79, 77, 3694 76, 75, 74, 72, 71, 70, 69, 68, 3695 66, 65, 64, 63, 62, 61, 60, 59, 3696 58, 57, 56, 55, 54, 53, 52, 51, 3697 50, 49, 48, 47, 46, 45, 44, 43, 3698 42, 41, 40, 40, 39, 38, 37, 36, 3699 35, 35, 34, 33, 32, 31, 31, 30, 3700 29, 28, 28, 27, 26, 25, 25, 24, 3701 23, 23, 22, 21, 21, 20, 19, 19, 3702 18, 17, 17, 16, 15, 15, 14, 14, 3703 13, 12, 12, 11, 11, 10, 9, 9, 3704 8, 8, 7, 7, 6, 5, 5, 4, 3705 4, 3, 3, 2, 2, 1, 1, 0 3706 }; 3707 const int precision = 7; 3708 3709 if (exp == 0 && frac != 0) { /* subnormal */ 3710 /* Normalize the subnormal. */ 3711 while (extract64(frac, frac_size - 1, 1) == 0) { 3712 exp--; 3713 frac <<= 1; 3714 } 3715 3716 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3717 3718 if (exp != 0 && exp != UINT64_MAX) { 3719 /* 3720 * Overflow to inf or max value of same sign, 3721 * depending on sign and rounding mode. 3722 */ 3723 s->float_exception_flags |= (float_flag_inexact | 3724 float_flag_overflow); 3725 3726 if ((s->float_rounding_mode == float_round_to_zero) || 3727 ((s->float_rounding_mode == float_round_down) && !sign) || 3728 ((s->float_rounding_mode == float_round_up) && sign)) { 3729 /* Return greatest/negative finite value. */ 3730 return (sign << (exp_size + frac_size)) | 3731 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3732 } else { 3733 /* Return +-inf. */ 3734 return (sign << (exp_size + frac_size)) | 3735 MAKE_64BIT_MASK(frac_size, exp_size); 3736 } 3737 } 3738 } 3739 3740 int idx = frac >> (frac_size - precision); 3741 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3742 (frac_size - precision); 3743 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3744 3745 if (out_exp == 0 || out_exp == UINT64_MAX) { 3746 /* 3747 * The result is subnormal, but don't raise the underflow exception, 3748 * because there's no additional loss of precision. 3749 */ 3750 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3751 if (out_exp == UINT64_MAX) { 3752 out_frac >>= 1; 3753 out_exp = 0; 3754 } 3755 } 3756 3757 uint64_t val = 0; 3758 val = deposit64(val, 0, frac_size, out_frac); 3759 val = deposit64(val, frac_size, exp_size, out_exp); 3760 val = deposit64(val, frac_size + exp_size, 1, sign); 3761 return val; 3762 } 3763 3764 static float16 frec7_h(float16 f, float_status *s) 3765 { 3766 int exp_size = 5, frac_size = 10; 3767 bool sign = float16_is_neg(f); 3768 3769 /* frec7(+-inf) = +-0 */ 3770 if (float16_is_infinity(f)) { 3771 return float16_set_sign(float16_zero, sign); 3772 } 3773 3774 /* frec7(+-0) = +-inf */ 3775 if (float16_is_zero(f)) { 3776 s->float_exception_flags |= float_flag_divbyzero; 3777 return float16_set_sign(float16_infinity, sign); 3778 } 3779 3780 /* frec7(sNaN) = canonical NaN */ 3781 if (float16_is_signaling_nan(f, s)) { 3782 s->float_exception_flags |= float_flag_invalid; 3783 return float16_default_nan(s); 3784 } 3785 3786 /* frec7(qNaN) = canonical NaN */ 3787 if (float16_is_quiet_nan(f, s)) { 3788 return float16_default_nan(s); 3789 } 3790 3791 /* +-normal, +-subnormal */ 3792 uint64_t val = frec7(f, exp_size, frac_size, s); 3793 return make_float16(val); 3794 } 3795 3796 static float32 frec7_s(float32 f, float_status *s) 3797 { 3798 int exp_size = 8, frac_size = 23; 3799 bool sign = float32_is_neg(f); 3800 3801 /* frec7(+-inf) = +-0 */ 3802 if (float32_is_infinity(f)) { 3803 return float32_set_sign(float32_zero, sign); 3804 } 3805 3806 /* frec7(+-0) = +-inf */ 3807 if (float32_is_zero(f)) { 3808 s->float_exception_flags |= float_flag_divbyzero; 3809 return float32_set_sign(float32_infinity, sign); 3810 } 3811 3812 /* frec7(sNaN) = canonical NaN */ 3813 if (float32_is_signaling_nan(f, s)) { 3814 s->float_exception_flags |= float_flag_invalid; 3815 return float32_default_nan(s); 3816 } 3817 3818 /* frec7(qNaN) = canonical NaN */ 3819 if (float32_is_quiet_nan(f, s)) { 3820 return float32_default_nan(s); 3821 } 3822 3823 /* +-normal, +-subnormal */ 3824 uint64_t val = frec7(f, exp_size, frac_size, s); 3825 return make_float32(val); 3826 } 3827 3828 static float64 frec7_d(float64 f, float_status *s) 3829 { 3830 int exp_size = 11, frac_size = 52; 3831 bool sign = float64_is_neg(f); 3832 3833 /* frec7(+-inf) = +-0 */ 3834 if (float64_is_infinity(f)) { 3835 return float64_set_sign(float64_zero, sign); 3836 } 3837 3838 /* frec7(+-0) = +-inf */ 3839 if (float64_is_zero(f)) { 3840 s->float_exception_flags |= float_flag_divbyzero; 3841 return float64_set_sign(float64_infinity, sign); 3842 } 3843 3844 /* frec7(sNaN) = canonical NaN */ 3845 if (float64_is_signaling_nan(f, s)) { 3846 s->float_exception_flags |= float_flag_invalid; 3847 return float64_default_nan(s); 3848 } 3849 3850 /* frec7(qNaN) = canonical NaN */ 3851 if (float64_is_quiet_nan(f, s)) { 3852 return float64_default_nan(s); 3853 } 3854 3855 /* +-normal, +-subnormal */ 3856 uint64_t val = frec7(f, exp_size, frac_size, s); 3857 return make_float64(val); 3858 } 3859 3860 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3861 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3862 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3863 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 3864 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 3865 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 3866 3867 /* Vector Floating-Point MIN/MAX Instructions */ 3868 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3869 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3870 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3871 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 3872 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 3873 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 3874 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3875 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3876 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3877 GEN_VEXT_VF(vfmin_vf_h, 2) 3878 GEN_VEXT_VF(vfmin_vf_w, 4) 3879 GEN_VEXT_VF(vfmin_vf_d, 8) 3880 3881 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3882 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3883 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3884 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 3885 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 3886 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 3887 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3888 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3889 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3890 GEN_VEXT_VF(vfmax_vf_h, 2) 3891 GEN_VEXT_VF(vfmax_vf_w, 4) 3892 GEN_VEXT_VF(vfmax_vf_d, 8) 3893 3894 /* Vector Floating-Point Sign-Injection Instructions */ 3895 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3896 { 3897 return deposit64(b, 0, 15, a); 3898 } 3899 3900 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3901 { 3902 return deposit64(b, 0, 31, a); 3903 } 3904 3905 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3906 { 3907 return deposit64(b, 0, 63, a); 3908 } 3909 3910 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3911 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3912 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3913 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 3914 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 3915 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 3916 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3917 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3918 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3919 GEN_VEXT_VF(vfsgnj_vf_h, 2) 3920 GEN_VEXT_VF(vfsgnj_vf_w, 4) 3921 GEN_VEXT_VF(vfsgnj_vf_d, 8) 3922 3923 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3924 { 3925 return deposit64(~b, 0, 15, a); 3926 } 3927 3928 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3929 { 3930 return deposit64(~b, 0, 31, a); 3931 } 3932 3933 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3934 { 3935 return deposit64(~b, 0, 63, a); 3936 } 3937 3938 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3939 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3940 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3941 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 3942 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 3943 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 3944 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3945 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3946 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3947 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 3948 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 3949 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 3950 3951 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3952 { 3953 return deposit64(b ^ a, 0, 15, a); 3954 } 3955 3956 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3957 { 3958 return deposit64(b ^ a, 0, 31, a); 3959 } 3960 3961 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3962 { 3963 return deposit64(b ^ a, 0, 63, a); 3964 } 3965 3966 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3967 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3968 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3969 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 3970 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 3971 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 3972 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3973 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3974 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3975 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 3976 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 3977 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 3978 3979 /* Vector Floating-Point Compare Instructions */ 3980 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3981 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3982 CPURISCVState *env, uint32_t desc) \ 3983 { \ 3984 uint32_t vm = vext_vm(desc); \ 3985 uint32_t vl = env->vl; \ 3986 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 3987 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 3988 uint32_t vma = vext_vma(desc); \ 3989 uint32_t i; \ 3990 \ 3991 for (i = env->vstart; i < vl; i++) { \ 3992 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3993 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3994 if (!vm && !vext_elem_mask(v0, i)) { \ 3995 /* set masked-off elements to 1s */ \ 3996 if (vma) { \ 3997 vext_set_elem_mask(vd, i, 1); \ 3998 } \ 3999 continue; \ 4000 } \ 4001 vext_set_elem_mask(vd, i, \ 4002 DO_OP(s2, s1, &env->fp_status)); \ 4003 } \ 4004 env->vstart = 0; \ 4005 /* 4006 * mask destination register are always tail-agnostic 4007 * set tail elements to 1s 4008 */ \ 4009 if (vta_all_1s) { \ 4010 for (; i < total_elems; i++) { \ 4011 vext_set_elem_mask(vd, i, 1); \ 4012 } \ 4013 } \ 4014 } 4015 4016 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4017 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4018 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4019 4020 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4021 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4022 CPURISCVState *env, uint32_t desc) \ 4023 { \ 4024 uint32_t vm = vext_vm(desc); \ 4025 uint32_t vl = env->vl; \ 4026 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \ 4027 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4028 uint32_t vma = vext_vma(desc); \ 4029 uint32_t i; \ 4030 \ 4031 for (i = env->vstart; i < vl; i++) { \ 4032 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4033 if (!vm && !vext_elem_mask(v0, i)) { \ 4034 /* set masked-off elements to 1s */ \ 4035 if (vma) { \ 4036 vext_set_elem_mask(vd, i, 1); \ 4037 } \ 4038 continue; \ 4039 } \ 4040 vext_set_elem_mask(vd, i, \ 4041 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4042 } \ 4043 env->vstart = 0; \ 4044 /* 4045 * mask destination register are always tail-agnostic 4046 * set tail elements to 1s 4047 */ \ 4048 if (vta_all_1s) { \ 4049 for (; i < total_elems; i++) { \ 4050 vext_set_elem_mask(vd, i, 1); \ 4051 } \ 4052 } \ 4053 } 4054 4055 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4056 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4057 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4058 4059 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4060 { 4061 FloatRelation compare = float16_compare_quiet(a, b, s); 4062 return compare != float_relation_equal; 4063 } 4064 4065 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4066 { 4067 FloatRelation compare = float32_compare_quiet(a, b, s); 4068 return compare != float_relation_equal; 4069 } 4070 4071 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4072 { 4073 FloatRelation compare = float64_compare_quiet(a, b, s); 4074 return compare != float_relation_equal; 4075 } 4076 4077 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4078 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4079 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4080 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4081 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4082 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4083 4084 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4085 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4086 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4087 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4088 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4089 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4090 4091 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4092 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4093 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4094 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4095 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4096 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4097 4098 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4099 { 4100 FloatRelation compare = float16_compare(a, b, s); 4101 return compare == float_relation_greater; 4102 } 4103 4104 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4105 { 4106 FloatRelation compare = float32_compare(a, b, s); 4107 return compare == float_relation_greater; 4108 } 4109 4110 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4111 { 4112 FloatRelation compare = float64_compare(a, b, s); 4113 return compare == float_relation_greater; 4114 } 4115 4116 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4117 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4118 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4119 4120 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4121 { 4122 FloatRelation compare = float16_compare(a, b, s); 4123 return compare == float_relation_greater || 4124 compare == float_relation_equal; 4125 } 4126 4127 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4128 { 4129 FloatRelation compare = float32_compare(a, b, s); 4130 return compare == float_relation_greater || 4131 compare == float_relation_equal; 4132 } 4133 4134 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4135 { 4136 FloatRelation compare = float64_compare(a, b, s); 4137 return compare == float_relation_greater || 4138 compare == float_relation_equal; 4139 } 4140 4141 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4142 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4143 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4144 4145 /* Vector Floating-Point Classify Instruction */ 4146 target_ulong fclass_h(uint64_t frs1) 4147 { 4148 float16 f = frs1; 4149 bool sign = float16_is_neg(f); 4150 4151 if (float16_is_infinity(f)) { 4152 return sign ? 1 << 0 : 1 << 7; 4153 } else if (float16_is_zero(f)) { 4154 return sign ? 1 << 3 : 1 << 4; 4155 } else if (float16_is_zero_or_denormal(f)) { 4156 return sign ? 1 << 2 : 1 << 5; 4157 } else if (float16_is_any_nan(f)) { 4158 float_status s = { }; /* for snan_bit_is_one */ 4159 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4160 } else { 4161 return sign ? 1 << 1 : 1 << 6; 4162 } 4163 } 4164 4165 target_ulong fclass_s(uint64_t frs1) 4166 { 4167 float32 f = frs1; 4168 bool sign = float32_is_neg(f); 4169 4170 if (float32_is_infinity(f)) { 4171 return sign ? 1 << 0 : 1 << 7; 4172 } else if (float32_is_zero(f)) { 4173 return sign ? 1 << 3 : 1 << 4; 4174 } else if (float32_is_zero_or_denormal(f)) { 4175 return sign ? 1 << 2 : 1 << 5; 4176 } else if (float32_is_any_nan(f)) { 4177 float_status s = { }; /* for snan_bit_is_one */ 4178 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4179 } else { 4180 return sign ? 1 << 1 : 1 << 6; 4181 } 4182 } 4183 4184 target_ulong fclass_d(uint64_t frs1) 4185 { 4186 float64 f = frs1; 4187 bool sign = float64_is_neg(f); 4188 4189 if (float64_is_infinity(f)) { 4190 return sign ? 1 << 0 : 1 << 7; 4191 } else if (float64_is_zero(f)) { 4192 return sign ? 1 << 3 : 1 << 4; 4193 } else if (float64_is_zero_or_denormal(f)) { 4194 return sign ? 1 << 2 : 1 << 5; 4195 } else if (float64_is_any_nan(f)) { 4196 float_status s = { }; /* for snan_bit_is_one */ 4197 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4198 } else { 4199 return sign ? 1 << 1 : 1 << 6; 4200 } 4201 } 4202 4203 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4204 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4205 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4206 GEN_VEXT_V(vfclass_v_h, 2) 4207 GEN_VEXT_V(vfclass_v_w, 4) 4208 GEN_VEXT_V(vfclass_v_d, 8) 4209 4210 /* Vector Floating-Point Merge Instruction */ 4211 4212 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4213 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4214 CPURISCVState *env, uint32_t desc) \ 4215 { \ 4216 uint32_t vm = vext_vm(desc); \ 4217 uint32_t vl = env->vl; \ 4218 uint32_t esz = sizeof(ETYPE); \ 4219 uint32_t total_elems = \ 4220 vext_get_total_elems(env, desc, esz); \ 4221 uint32_t vta = vext_vta(desc); \ 4222 uint32_t i; \ 4223 \ 4224 for (i = env->vstart; i < vl; i++) { \ 4225 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4226 *((ETYPE *)vd + H(i)) = \ 4227 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4228 } \ 4229 env->vstart = 0; \ 4230 /* set tail elements to 1s */ \ 4231 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4232 } 4233 4234 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4235 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4236 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4237 4238 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4239 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4240 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4241 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4242 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4243 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4244 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4245 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4246 4247 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4248 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4249 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4250 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4251 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4252 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4253 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4254 4255 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4256 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4257 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4258 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4259 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4260 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4261 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4262 4263 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4264 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4265 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4266 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4267 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4268 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4269 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4270 4271 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4272 /* (TD, T2, TX2) */ 4273 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4274 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4275 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4276 /* 4277 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4278 */ 4279 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4280 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4281 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4282 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4283 4284 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4285 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4286 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4287 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4288 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4289 4290 /* 4291 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4292 */ 4293 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4294 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4295 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4296 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4297 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4298 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4299 4300 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4301 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4302 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4303 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4304 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4305 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4306 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4307 4308 /* 4309 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4310 */ 4311 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4312 { 4313 return float16_to_float32(a, true, s); 4314 } 4315 4316 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4317 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4318 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4319 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4320 4321 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4322 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4323 4324 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4325 /* (TD, T2, TX2) */ 4326 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4327 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4328 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4329 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4330 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4331 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4332 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4333 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4334 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4335 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4336 4337 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4338 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4339 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4340 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4341 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4342 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4343 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4344 4345 /* 4346 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4347 */ 4348 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4349 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4350 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4351 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4352 4353 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4354 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4355 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4356 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4357 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4358 4359 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4360 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4361 { 4362 return float32_to_float16(a, true, s); 4363 } 4364 4365 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4366 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4367 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4368 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4369 4370 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4371 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4372 4373 /* 4374 * Vector Reduction Operations 4375 */ 4376 /* Vector Single-Width Integer Reduction Instructions */ 4377 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4378 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4379 void *vs2, CPURISCVState *env, \ 4380 uint32_t desc) \ 4381 { \ 4382 uint32_t vm = vext_vm(desc); \ 4383 uint32_t vl = env->vl; \ 4384 uint32_t esz = sizeof(TD); \ 4385 uint32_t vlenb = simd_maxsz(desc); \ 4386 uint32_t vta = vext_vta(desc); \ 4387 uint32_t i; \ 4388 TD s1 = *((TD *)vs1 + HD(0)); \ 4389 \ 4390 for (i = env->vstart; i < vl; i++) { \ 4391 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4392 if (!vm && !vext_elem_mask(v0, i)) { \ 4393 continue; \ 4394 } \ 4395 s1 = OP(s1, (TD)s2); \ 4396 } \ 4397 *((TD *)vd + HD(0)) = s1; \ 4398 env->vstart = 0; \ 4399 /* set tail elements to 1s */ \ 4400 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4401 } 4402 4403 /* vd[0] = sum(vs1[0], vs2[*]) */ 4404 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4405 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4406 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4407 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4408 4409 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4410 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4411 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4412 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4413 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4414 4415 /* vd[0] = max(vs1[0], vs2[*]) */ 4416 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4417 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4418 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4419 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4420 4421 /* vd[0] = minu(vs1[0], vs2[*]) */ 4422 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4423 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4424 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4425 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4426 4427 /* vd[0] = min(vs1[0], vs2[*]) */ 4428 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4429 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4430 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4431 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4432 4433 /* vd[0] = and(vs1[0], vs2[*]) */ 4434 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4435 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4436 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4437 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4438 4439 /* vd[0] = or(vs1[0], vs2[*]) */ 4440 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4441 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4442 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4443 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4444 4445 /* vd[0] = xor(vs1[0], vs2[*]) */ 4446 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4447 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4448 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4449 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4450 4451 /* Vector Widening Integer Reduction Instructions */ 4452 /* signed sum reduction into double-width accumulator */ 4453 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4454 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4455 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4456 4457 /* Unsigned sum reduction into double-width accumulator */ 4458 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4459 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4460 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4461 4462 /* Vector Single-Width Floating-Point Reduction Instructions */ 4463 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4464 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4465 void *vs2, CPURISCVState *env, \ 4466 uint32_t desc) \ 4467 { \ 4468 uint32_t vm = vext_vm(desc); \ 4469 uint32_t vl = env->vl; \ 4470 uint32_t esz = sizeof(TD); \ 4471 uint32_t vlenb = simd_maxsz(desc); \ 4472 uint32_t vta = vext_vta(desc); \ 4473 uint32_t i; \ 4474 TD s1 = *((TD *)vs1 + HD(0)); \ 4475 \ 4476 for (i = env->vstart; i < vl; i++) { \ 4477 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4478 if (!vm && !vext_elem_mask(v0, i)) { \ 4479 continue; \ 4480 } \ 4481 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4482 } \ 4483 *((TD *)vd + HD(0)) = s1; \ 4484 env->vstart = 0; \ 4485 /* set tail elements to 1s */ \ 4486 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4487 } 4488 4489 /* Unordered sum */ 4490 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4491 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4492 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4493 4494 /* Ordered sum */ 4495 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4496 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4497 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4498 4499 /* Maximum value */ 4500 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4501 float16_maximum_number) 4502 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4503 float32_maximum_number) 4504 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4505 float64_maximum_number) 4506 4507 /* Minimum value */ 4508 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4509 float16_minimum_number) 4510 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4511 float32_minimum_number) 4512 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4513 float64_minimum_number) 4514 4515 /* Vector Widening Floating-Point Add Instructions */ 4516 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4517 { 4518 return float32_add(a, float16_to_float32(b, true, s), s); 4519 } 4520 4521 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4522 { 4523 return float64_add(a, float32_to_float64(b, s), s); 4524 } 4525 4526 /* Vector Widening Floating-Point Reduction Instructions */ 4527 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4528 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4529 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4530 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4531 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4532 4533 /* 4534 * Vector Mask Operations 4535 */ 4536 /* Vector Mask-Register Logical Instructions */ 4537 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4538 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4539 void *vs2, CPURISCVState *env, \ 4540 uint32_t desc) \ 4541 { \ 4542 uint32_t vl = env->vl; \ 4543 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\ 4544 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4545 uint32_t i; \ 4546 int a, b; \ 4547 \ 4548 for (i = env->vstart; i < vl; i++) { \ 4549 a = vext_elem_mask(vs1, i); \ 4550 b = vext_elem_mask(vs2, i); \ 4551 vext_set_elem_mask(vd, i, OP(b, a)); \ 4552 } \ 4553 env->vstart = 0; \ 4554 /* 4555 * mask destination register are always tail-agnostic 4556 * set tail elements to 1s 4557 */ \ 4558 if (vta_all_1s) { \ 4559 for (; i < total_elems; i++) { \ 4560 vext_set_elem_mask(vd, i, 1); \ 4561 } \ 4562 } \ 4563 } 4564 4565 #define DO_NAND(N, M) (!(N & M)) 4566 #define DO_ANDNOT(N, M) (N & !M) 4567 #define DO_NOR(N, M) (!(N | M)) 4568 #define DO_ORNOT(N, M) (N | !M) 4569 #define DO_XNOR(N, M) (!(N ^ M)) 4570 4571 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4572 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4573 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4574 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4575 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4576 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4577 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4578 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4579 4580 /* Vector count population in mask vcpop */ 4581 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4582 uint32_t desc) 4583 { 4584 target_ulong cnt = 0; 4585 uint32_t vm = vext_vm(desc); 4586 uint32_t vl = env->vl; 4587 int i; 4588 4589 for (i = env->vstart; i < vl; i++) { 4590 if (vm || vext_elem_mask(v0, i)) { 4591 if (vext_elem_mask(vs2, i)) { 4592 cnt++; 4593 } 4594 } 4595 } 4596 env->vstart = 0; 4597 return cnt; 4598 } 4599 4600 /* vfirst find-first-set mask bit */ 4601 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4602 uint32_t desc) 4603 { 4604 uint32_t vm = vext_vm(desc); 4605 uint32_t vl = env->vl; 4606 int i; 4607 4608 for (i = env->vstart; i < vl; i++) { 4609 if (vm || vext_elem_mask(v0, i)) { 4610 if (vext_elem_mask(vs2, i)) { 4611 return i; 4612 } 4613 } 4614 } 4615 env->vstart = 0; 4616 return -1LL; 4617 } 4618 4619 enum set_mask_type { 4620 ONLY_FIRST = 1, 4621 INCLUDE_FIRST, 4622 BEFORE_FIRST, 4623 }; 4624 4625 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4626 uint32_t desc, enum set_mask_type type) 4627 { 4628 uint32_t vm = vext_vm(desc); 4629 uint32_t vl = env->vl; 4630 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; 4631 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4632 uint32_t vma = vext_vma(desc); 4633 int i; 4634 bool first_mask_bit = false; 4635 4636 for (i = env->vstart; i < vl; i++) { 4637 if (!vm && !vext_elem_mask(v0, i)) { 4638 /* set masked-off elements to 1s */ 4639 if (vma) { 4640 vext_set_elem_mask(vd, i, 1); 4641 } 4642 continue; 4643 } 4644 /* write a zero to all following active elements */ 4645 if (first_mask_bit) { 4646 vext_set_elem_mask(vd, i, 0); 4647 continue; 4648 } 4649 if (vext_elem_mask(vs2, i)) { 4650 first_mask_bit = true; 4651 if (type == BEFORE_FIRST) { 4652 vext_set_elem_mask(vd, i, 0); 4653 } else { 4654 vext_set_elem_mask(vd, i, 1); 4655 } 4656 } else { 4657 if (type == ONLY_FIRST) { 4658 vext_set_elem_mask(vd, i, 0); 4659 } else { 4660 vext_set_elem_mask(vd, i, 1); 4661 } 4662 } 4663 } 4664 env->vstart = 0; 4665 /* 4666 * mask destination register are always tail-agnostic 4667 * set tail elements to 1s 4668 */ 4669 if (vta_all_1s) { 4670 for (; i < total_elems; i++) { 4671 vext_set_elem_mask(vd, i, 1); 4672 } 4673 } 4674 } 4675 4676 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4677 uint32_t desc) 4678 { 4679 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4680 } 4681 4682 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4683 uint32_t desc) 4684 { 4685 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4686 } 4687 4688 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4689 uint32_t desc) 4690 { 4691 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4692 } 4693 4694 /* Vector Iota Instruction */ 4695 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4696 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4697 uint32_t desc) \ 4698 { \ 4699 uint32_t vm = vext_vm(desc); \ 4700 uint32_t vl = env->vl; \ 4701 uint32_t esz = sizeof(ETYPE); \ 4702 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4703 uint32_t vta = vext_vta(desc); \ 4704 uint32_t vma = vext_vma(desc); \ 4705 uint32_t sum = 0; \ 4706 int i; \ 4707 \ 4708 for (i = env->vstart; i < vl; i++) { \ 4709 if (!vm && !vext_elem_mask(v0, i)) { \ 4710 /* set masked-off elements to 1s */ \ 4711 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4712 continue; \ 4713 } \ 4714 *((ETYPE *)vd + H(i)) = sum; \ 4715 if (vext_elem_mask(vs2, i)) { \ 4716 sum++; \ 4717 } \ 4718 } \ 4719 env->vstart = 0; \ 4720 /* set tail elements to 1s */ \ 4721 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4722 } 4723 4724 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4725 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4726 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4727 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4728 4729 /* Vector Element Index Instruction */ 4730 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4731 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4732 { \ 4733 uint32_t vm = vext_vm(desc); \ 4734 uint32_t vl = env->vl; \ 4735 uint32_t esz = sizeof(ETYPE); \ 4736 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4737 uint32_t vta = vext_vta(desc); \ 4738 uint32_t vma = vext_vma(desc); \ 4739 int i; \ 4740 \ 4741 for (i = env->vstart; i < vl; i++) { \ 4742 if (!vm && !vext_elem_mask(v0, i)) { \ 4743 /* set masked-off elements to 1s */ \ 4744 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4745 continue; \ 4746 } \ 4747 *((ETYPE *)vd + H(i)) = i; \ 4748 } \ 4749 env->vstart = 0; \ 4750 /* set tail elements to 1s */ \ 4751 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4752 } 4753 4754 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4755 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4756 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4757 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4758 4759 /* 4760 * Vector Permutation Instructions 4761 */ 4762 4763 /* Vector Slide Instructions */ 4764 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4765 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4766 CPURISCVState *env, uint32_t desc) \ 4767 { \ 4768 uint32_t vm = vext_vm(desc); \ 4769 uint32_t vl = env->vl; \ 4770 uint32_t esz = sizeof(ETYPE); \ 4771 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4772 uint32_t vta = vext_vta(desc); \ 4773 uint32_t vma = vext_vma(desc); \ 4774 target_ulong offset = s1, i_min, i; \ 4775 \ 4776 i_min = MAX(env->vstart, offset); \ 4777 for (i = i_min; i < vl; i++) { \ 4778 if (!vm && !vext_elem_mask(v0, i)) { \ 4779 /* set masked-off elements to 1s */ \ 4780 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4781 continue; \ 4782 } \ 4783 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4784 } \ 4785 /* set tail elements to 1s */ \ 4786 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4787 } 4788 4789 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4790 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4791 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4792 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4793 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4794 4795 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4796 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4797 CPURISCVState *env, uint32_t desc) \ 4798 { \ 4799 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4800 uint32_t vm = vext_vm(desc); \ 4801 uint32_t vl = env->vl; \ 4802 uint32_t esz = sizeof(ETYPE); \ 4803 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4804 uint32_t vta = vext_vta(desc); \ 4805 uint32_t vma = vext_vma(desc); \ 4806 target_ulong i_max, i_min, i; \ 4807 \ 4808 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 4809 i_max = MAX(i_min, env->vstart); \ 4810 for (i = env->vstart; i < i_max; ++i) { \ 4811 if (!vm && !vext_elem_mask(v0, i)) { \ 4812 /* set masked-off elements to 1s */ \ 4813 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4814 continue; \ 4815 } \ 4816 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4817 } \ 4818 \ 4819 for (i = i_max; i < vl; ++i) { \ 4820 if (vm || vext_elem_mask(v0, i)) { \ 4821 *((ETYPE *)vd + H(i)) = 0; \ 4822 } \ 4823 } \ 4824 \ 4825 env->vstart = 0; \ 4826 /* set tail elements to 1s */ \ 4827 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4828 } 4829 4830 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4831 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4832 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4833 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4834 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4835 4836 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4837 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4838 void *vs2, CPURISCVState *env, \ 4839 uint32_t desc) \ 4840 { \ 4841 typedef uint##BITWIDTH##_t ETYPE; \ 4842 uint32_t vm = vext_vm(desc); \ 4843 uint32_t vl = env->vl; \ 4844 uint32_t esz = sizeof(ETYPE); \ 4845 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4846 uint32_t vta = vext_vta(desc); \ 4847 uint32_t vma = vext_vma(desc); \ 4848 uint32_t i; \ 4849 \ 4850 for (i = env->vstart; i < vl; i++) { \ 4851 if (!vm && !vext_elem_mask(v0, i)) { \ 4852 /* set masked-off elements to 1s */ \ 4853 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4854 continue; \ 4855 } \ 4856 if (i == 0) { \ 4857 *((ETYPE *)vd + H(i)) = s1; \ 4858 } else { \ 4859 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4860 } \ 4861 } \ 4862 env->vstart = 0; \ 4863 /* set tail elements to 1s */ \ 4864 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4865 } 4866 4867 GEN_VEXT_VSLIE1UP(8, H1) 4868 GEN_VEXT_VSLIE1UP(16, H2) 4869 GEN_VEXT_VSLIE1UP(32, H4) 4870 GEN_VEXT_VSLIE1UP(64, H8) 4871 4872 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4873 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4874 CPURISCVState *env, uint32_t desc) \ 4875 { \ 4876 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4877 } 4878 4879 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4880 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4881 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4882 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4883 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4884 4885 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4886 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4887 void *vs2, CPURISCVState *env, \ 4888 uint32_t desc) \ 4889 { \ 4890 typedef uint##BITWIDTH##_t ETYPE; \ 4891 uint32_t vm = vext_vm(desc); \ 4892 uint32_t vl = env->vl; \ 4893 uint32_t esz = sizeof(ETYPE); \ 4894 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4895 uint32_t vta = vext_vta(desc); \ 4896 uint32_t vma = vext_vma(desc); \ 4897 uint32_t i; \ 4898 \ 4899 for (i = env->vstart; i < vl; i++) { \ 4900 if (!vm && !vext_elem_mask(v0, i)) { \ 4901 /* set masked-off elements to 1s */ \ 4902 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4903 continue; \ 4904 } \ 4905 if (i == vl - 1) { \ 4906 *((ETYPE *)vd + H(i)) = s1; \ 4907 } else { \ 4908 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4909 } \ 4910 } \ 4911 env->vstart = 0; \ 4912 /* set tail elements to 1s */ \ 4913 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4914 } 4915 4916 GEN_VEXT_VSLIDE1DOWN(8, H1) 4917 GEN_VEXT_VSLIDE1DOWN(16, H2) 4918 GEN_VEXT_VSLIDE1DOWN(32, H4) 4919 GEN_VEXT_VSLIDE1DOWN(64, H8) 4920 4921 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4922 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4923 CPURISCVState *env, uint32_t desc) \ 4924 { \ 4925 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4926 } 4927 4928 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4929 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4930 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4931 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4932 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4933 4934 /* Vector Floating-Point Slide Instructions */ 4935 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4936 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4937 CPURISCVState *env, uint32_t desc) \ 4938 { \ 4939 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4940 } 4941 4942 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4943 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4944 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4945 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4946 4947 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4948 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4949 CPURISCVState *env, uint32_t desc) \ 4950 { \ 4951 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4952 } 4953 4954 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4955 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4956 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4957 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4958 4959 /* Vector Register Gather Instruction */ 4960 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4962 CPURISCVState *env, uint32_t desc) \ 4963 { \ 4964 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4965 uint32_t vm = vext_vm(desc); \ 4966 uint32_t vl = env->vl; \ 4967 uint32_t esz = sizeof(TS2); \ 4968 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4969 uint32_t vta = vext_vta(desc); \ 4970 uint32_t vma = vext_vma(desc); \ 4971 uint64_t index; \ 4972 uint32_t i; \ 4973 \ 4974 for (i = env->vstart; i < vl; i++) { \ 4975 if (!vm && !vext_elem_mask(v0, i)) { \ 4976 /* set masked-off elements to 1s */ \ 4977 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4978 continue; \ 4979 } \ 4980 index = *((TS1 *)vs1 + HS1(i)); \ 4981 if (index >= vlmax) { \ 4982 *((TS2 *)vd + HS2(i)) = 0; \ 4983 } else { \ 4984 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4985 } \ 4986 } \ 4987 env->vstart = 0; \ 4988 /* set tail elements to 1s */ \ 4989 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4990 } 4991 4992 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4993 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4994 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4995 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4996 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4997 4998 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4999 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5000 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5001 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5002 5003 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5004 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5005 CPURISCVState *env, uint32_t desc) \ 5006 { \ 5007 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5008 uint32_t vm = vext_vm(desc); \ 5009 uint32_t vl = env->vl; \ 5010 uint32_t esz = sizeof(ETYPE); \ 5011 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5012 uint32_t vta = vext_vta(desc); \ 5013 uint32_t vma = vext_vma(desc); \ 5014 uint64_t index = s1; \ 5015 uint32_t i; \ 5016 \ 5017 for (i = env->vstart; i < vl; i++) { \ 5018 if (!vm && !vext_elem_mask(v0, i)) { \ 5019 /* set masked-off elements to 1s */ \ 5020 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5021 continue; \ 5022 } \ 5023 if (index >= vlmax) { \ 5024 *((ETYPE *)vd + H(i)) = 0; \ 5025 } else { \ 5026 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5027 } \ 5028 } \ 5029 env->vstart = 0; \ 5030 /* set tail elements to 1s */ \ 5031 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5032 } 5033 5034 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5035 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5036 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5037 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5038 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5039 5040 /* Vector Compress Instruction */ 5041 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5042 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5043 CPURISCVState *env, uint32_t desc) \ 5044 { \ 5045 uint32_t vl = env->vl; \ 5046 uint32_t esz = sizeof(ETYPE); \ 5047 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5048 uint32_t vta = vext_vta(desc); \ 5049 uint32_t num = 0, i; \ 5050 \ 5051 for (i = env->vstart; i < vl; i++) { \ 5052 if (!vext_elem_mask(vs1, i)) { \ 5053 continue; \ 5054 } \ 5055 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5056 num++; \ 5057 } \ 5058 env->vstart = 0; \ 5059 /* set tail elements to 1s */ \ 5060 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5061 } 5062 5063 /* Compress into vd elements of vs2 where vs1 is enabled */ 5064 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5065 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5066 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5067 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5068 5069 /* Vector Whole Register Move */ 5070 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5071 { 5072 /* EEW = SEW */ 5073 uint32_t maxsz = simd_maxsz(desc); 5074 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5075 uint32_t startb = env->vstart * sewb; 5076 uint32_t i = startb; 5077 5078 memcpy((uint8_t *)vd + H1(i), 5079 (uint8_t *)vs2 + H1(i), 5080 maxsz - startb); 5081 5082 env->vstart = 0; 5083 } 5084 5085 /* Vector Integer Extension */ 5086 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5087 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5088 CPURISCVState *env, uint32_t desc) \ 5089 { \ 5090 uint32_t vl = env->vl; \ 5091 uint32_t vm = vext_vm(desc); \ 5092 uint32_t esz = sizeof(ETYPE); \ 5093 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5094 uint32_t vta = vext_vta(desc); \ 5095 uint32_t vma = vext_vma(desc); \ 5096 uint32_t i; \ 5097 \ 5098 for (i = env->vstart; i < vl; i++) { \ 5099 if (!vm && !vext_elem_mask(v0, i)) { \ 5100 /* set masked-off elements to 1s */ \ 5101 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5102 continue; \ 5103 } \ 5104 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5105 } \ 5106 env->vstart = 0; \ 5107 /* set tail elements to 1s */ \ 5108 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5109 } 5110 5111 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5112 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5113 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5114 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5115 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5116 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5117 5118 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5119 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5120 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5121 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5122 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5123 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5124