1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "tcg/tcg-gvec-desc.h" 29 #include "internals.h" 30 #include "vector_internals.h" 31 #include <math.h> 32 33 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 34 target_ulong s2) 35 { 36 int vlmax, vl; 37 RISCVCPU *cpu = env_archcpu(env); 38 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 39 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 40 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 41 int xlen = riscv_cpu_xlen(env); 42 bool vill = (s2 >> (xlen - 1)) & 0x1; 43 target_ulong reserved = s2 & 44 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 45 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 46 47 if (lmul & 4) { 48 /* Fractional LMUL - check LMUL * VLEN >= SEW */ 49 if (lmul == 4 || 50 cpu->cfg.vlen >> (8 - lmul) < sew) { 51 vill = true; 52 } 53 } 54 55 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 56 /* only set vill bit. */ 57 env->vill = 1; 58 env->vtype = 0; 59 env->vl = 0; 60 env->vstart = 0; 61 return 0; 62 } 63 64 vlmax = vext_get_vlmax(cpu, s2); 65 if (s1 <= vlmax) { 66 vl = s1; 67 } else { 68 vl = vlmax; 69 } 70 env->vl = vl; 71 env->vtype = s2; 72 env->vstart = 0; 73 env->vill = 0; 74 return vl; 75 } 76 77 /* 78 * Get the maximum number of elements can be operated. 79 * 80 * log2_esz: log2 of element size in bytes. 81 */ 82 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 83 { 84 /* 85 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 86 * so vlen in bytes (vlenb) is encoded as maxsz. 87 */ 88 uint32_t vlenb = simd_maxsz(desc); 89 90 /* Return VLMAX */ 91 int scale = vext_lmul(desc) - log2_esz; 92 return scale < 0 ? vlenb >> -scale : vlenb << scale; 93 } 94 95 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 96 { 97 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 98 } 99 100 /* 101 * This function checks watchpoint before real load operation. 102 * 103 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 104 * In user mode, there is no watchpoint support now. 105 * 106 * It will trigger an exception if there is no mapping in TLB 107 * and page table walk can't fill the TLB entry. Then the guest 108 * software can return here after process the exception or never return. 109 */ 110 static void probe_pages(CPURISCVState *env, target_ulong addr, 111 target_ulong len, uintptr_t ra, 112 MMUAccessType access_type) 113 { 114 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 115 target_ulong curlen = MIN(pagelen, len); 116 117 probe_access(env, adjust_addr(env, addr), curlen, access_type, 118 cpu_mmu_index(env, false), ra); 119 if (len > curlen) { 120 addr += curlen; 121 curlen = len - curlen; 122 probe_access(env, adjust_addr(env, addr), curlen, access_type, 123 cpu_mmu_index(env, false), ra); 124 } 125 } 126 127 static inline void vext_set_elem_mask(void *v0, int index, 128 uint8_t value) 129 { 130 int idx = index / 64; 131 int pos = index % 64; 132 uint64_t old = ((uint64_t *)v0)[idx]; 133 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 134 } 135 136 /* elements operations for load and store */ 137 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 138 uint32_t idx, void *vd, uintptr_t retaddr); 139 140 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 141 static void NAME(CPURISCVState *env, abi_ptr addr, \ 142 uint32_t idx, void *vd, uintptr_t retaddr)\ 143 { \ 144 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 145 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 146 } \ 147 148 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 149 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 150 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 151 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 152 153 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 154 static void NAME(CPURISCVState *env, abi_ptr addr, \ 155 uint32_t idx, void *vd, uintptr_t retaddr)\ 156 { \ 157 ETYPE data = *((ETYPE *)vd + H(idx)); \ 158 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 159 } 160 161 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 162 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 163 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 164 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 165 166 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 167 uint32_t desc, uint32_t nf, 168 uint32_t esz, uint32_t max_elems) 169 { 170 uint32_t vta = vext_vta(desc); 171 int k; 172 173 if (vta == 0) { 174 return; 175 } 176 177 for (k = 0; k < nf; ++k) { 178 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 179 (k * max_elems + max_elems) * esz); 180 } 181 } 182 183 /* 184 * stride: access vector element from strided memory 185 */ 186 static void 187 vext_ldst_stride(void *vd, void *v0, target_ulong base, 188 target_ulong stride, CPURISCVState *env, 189 uint32_t desc, uint32_t vm, 190 vext_ldst_elem_fn *ldst_elem, 191 uint32_t log2_esz, uintptr_t ra) 192 { 193 uint32_t i, k; 194 uint32_t nf = vext_nf(desc); 195 uint32_t max_elems = vext_max_elems(desc, log2_esz); 196 uint32_t esz = 1 << log2_esz; 197 uint32_t vma = vext_vma(desc); 198 199 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 200 k = 0; 201 while (k < nf) { 202 if (!vm && !vext_elem_mask(v0, i)) { 203 /* set masked-off elements to 1s */ 204 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 205 (i + k * max_elems + 1) * esz); 206 k++; 207 continue; 208 } 209 target_ulong addr = base + stride * i + (k << log2_esz); 210 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 211 k++; 212 } 213 } 214 env->vstart = 0; 215 216 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 217 } 218 219 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 220 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 221 target_ulong stride, CPURISCVState *env, \ 222 uint32_t desc) \ 223 { \ 224 uint32_t vm = vext_vm(desc); \ 225 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 226 ctzl(sizeof(ETYPE)), GETPC()); \ 227 } 228 229 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 230 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 231 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 232 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 233 234 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 235 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 236 target_ulong stride, CPURISCVState *env, \ 237 uint32_t desc) \ 238 { \ 239 uint32_t vm = vext_vm(desc); \ 240 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 241 ctzl(sizeof(ETYPE)), GETPC()); \ 242 } 243 244 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 245 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 246 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 247 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 248 249 /* 250 * unit-stride: access elements stored contiguously in memory 251 */ 252 253 /* unmasked unit-stride load and store operation */ 254 static void 255 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 256 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 257 uintptr_t ra) 258 { 259 uint32_t i, k; 260 uint32_t nf = vext_nf(desc); 261 uint32_t max_elems = vext_max_elems(desc, log2_esz); 262 uint32_t esz = 1 << log2_esz; 263 264 /* load bytes from guest memory */ 265 for (i = env->vstart; i < evl; i++, env->vstart++) { 266 k = 0; 267 while (k < nf) { 268 target_ulong addr = base + ((i * nf + k) << log2_esz); 269 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 270 k++; 271 } 272 } 273 env->vstart = 0; 274 275 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 276 } 277 278 /* 279 * masked unit-stride load and store operation will be a special case of 280 * stride, stride = NF * sizeof (ETYPE) 281 */ 282 283 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 284 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 285 CPURISCVState *env, uint32_t desc) \ 286 { \ 287 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 288 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 289 ctzl(sizeof(ETYPE)), GETPC()); \ 290 } \ 291 \ 292 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 293 CPURISCVState *env, uint32_t desc) \ 294 { \ 295 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 296 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 297 } 298 299 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 300 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 301 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 302 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 303 304 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 305 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 306 CPURISCVState *env, uint32_t desc) \ 307 { \ 308 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 309 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 310 ctzl(sizeof(ETYPE)), GETPC()); \ 311 } \ 312 \ 313 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 317 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 318 } 319 320 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 321 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 322 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 323 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 324 325 /* 326 * unit stride mask load and store, EEW = 1 327 */ 328 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 329 CPURISCVState *env, uint32_t desc) 330 { 331 /* evl = ceil(vl/8) */ 332 uint8_t evl = (env->vl + 7) >> 3; 333 vext_ldst_us(vd, base, env, desc, lde_b, 334 0, evl, GETPC()); 335 } 336 337 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 338 CPURISCVState *env, uint32_t desc) 339 { 340 /* evl = ceil(vl/8) */ 341 uint8_t evl = (env->vl + 7) >> 3; 342 vext_ldst_us(vd, base, env, desc, ste_b, 343 0, evl, GETPC()); 344 } 345 346 /* 347 * index: access vector element from indexed memory 348 */ 349 typedef target_ulong vext_get_index_addr(target_ulong base, 350 uint32_t idx, void *vs2); 351 352 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 353 static target_ulong NAME(target_ulong base, \ 354 uint32_t idx, void *vs2) \ 355 { \ 356 return (base + *((ETYPE *)vs2 + H(idx))); \ 357 } 358 359 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 360 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 361 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 362 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 363 364 static inline void 365 vext_ldst_index(void *vd, void *v0, target_ulong base, 366 void *vs2, CPURISCVState *env, uint32_t desc, 367 vext_get_index_addr get_index_addr, 368 vext_ldst_elem_fn *ldst_elem, 369 uint32_t log2_esz, uintptr_t ra) 370 { 371 uint32_t i, k; 372 uint32_t nf = vext_nf(desc); 373 uint32_t vm = vext_vm(desc); 374 uint32_t max_elems = vext_max_elems(desc, log2_esz); 375 uint32_t esz = 1 << log2_esz; 376 uint32_t vma = vext_vma(desc); 377 378 /* load bytes from guest memory */ 379 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 380 k = 0; 381 while (k < nf) { 382 if (!vm && !vext_elem_mask(v0, i)) { 383 /* set masked-off elements to 1s */ 384 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 385 (i + k * max_elems + 1) * esz); 386 k++; 387 continue; 388 } 389 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 390 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 391 k++; 392 } 393 } 394 env->vstart = 0; 395 396 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 397 } 398 399 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 400 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 401 void *vs2, CPURISCVState *env, uint32_t desc) \ 402 { \ 403 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 404 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 405 } 406 407 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 408 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 409 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 410 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 411 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 412 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 413 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 414 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 415 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 416 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 417 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 418 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 419 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 420 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 421 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 422 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 423 424 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 425 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 426 void *vs2, CPURISCVState *env, uint32_t desc) \ 427 { \ 428 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 429 STORE_FN, ctzl(sizeof(ETYPE)), \ 430 GETPC()); \ 431 } 432 433 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 434 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 435 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 436 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 437 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 438 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 439 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 440 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 441 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 442 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 443 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 444 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 445 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 446 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 447 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 448 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 449 450 /* 451 * unit-stride fault-only-fisrt load instructions 452 */ 453 static inline void 454 vext_ldff(void *vd, void *v0, target_ulong base, 455 CPURISCVState *env, uint32_t desc, 456 vext_ldst_elem_fn *ldst_elem, 457 uint32_t log2_esz, uintptr_t ra) 458 { 459 void *host; 460 uint32_t i, k, vl = 0; 461 uint32_t nf = vext_nf(desc); 462 uint32_t vm = vext_vm(desc); 463 uint32_t max_elems = vext_max_elems(desc, log2_esz); 464 uint32_t esz = 1 << log2_esz; 465 uint32_t vma = vext_vma(desc); 466 target_ulong addr, offset, remain; 467 468 /* probe every access */ 469 for (i = env->vstart; i < env->vl; i++) { 470 if (!vm && !vext_elem_mask(v0, i)) { 471 continue; 472 } 473 addr = adjust_addr(env, base + i * (nf << log2_esz)); 474 if (i == 0) { 475 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 476 } else { 477 /* if it triggers an exception, no need to check watchpoint */ 478 remain = nf << log2_esz; 479 while (remain > 0) { 480 offset = -(addr | TARGET_PAGE_MASK); 481 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 482 cpu_mmu_index(env, false)); 483 if (host) { 484 #ifdef CONFIG_USER_ONLY 485 if (!page_check_range(addr, offset, PAGE_READ)) { 486 vl = i; 487 goto ProbeSuccess; 488 } 489 #else 490 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 491 #endif 492 } else { 493 vl = i; 494 goto ProbeSuccess; 495 } 496 if (remain <= offset) { 497 break; 498 } 499 remain -= offset; 500 addr = adjust_addr(env, addr + offset); 501 } 502 } 503 } 504 ProbeSuccess: 505 /* load bytes from guest memory */ 506 if (vl != 0) { 507 env->vl = vl; 508 } 509 for (i = env->vstart; i < env->vl; i++) { 510 k = 0; 511 while (k < nf) { 512 if (!vm && !vext_elem_mask(v0, i)) { 513 /* set masked-off elements to 1s */ 514 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 515 (i + k * max_elems + 1) * esz); 516 k++; 517 continue; 518 } 519 target_ulong addr = base + ((i * nf + k) << log2_esz); 520 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 521 k++; 522 } 523 } 524 env->vstart = 0; 525 526 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 527 } 528 529 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 530 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 531 CPURISCVState *env, uint32_t desc) \ 532 { \ 533 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 534 ctzl(sizeof(ETYPE)), GETPC()); \ 535 } 536 537 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 538 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 539 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 540 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 541 542 #define DO_SWAP(N, M) (M) 543 #define DO_AND(N, M) (N & M) 544 #define DO_XOR(N, M) (N ^ M) 545 #define DO_OR(N, M) (N | M) 546 #define DO_ADD(N, M) (N + M) 547 548 /* Signed min/max */ 549 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 550 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 551 552 /* 553 * load and store whole register instructions 554 */ 555 static void 556 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 557 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 558 { 559 uint32_t i, k, off, pos; 560 uint32_t nf = vext_nf(desc); 561 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 562 uint32_t max_elems = vlenb >> log2_esz; 563 564 k = env->vstart / max_elems; 565 off = env->vstart % max_elems; 566 567 if (off) { 568 /* load/store rest of elements of current segment pointed by vstart */ 569 for (pos = off; pos < max_elems; pos++, env->vstart++) { 570 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 571 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 572 ra); 573 } 574 k++; 575 } 576 577 /* load/store elements for rest of segments */ 578 for (; k < nf; k++) { 579 for (i = 0; i < max_elems; i++, env->vstart++) { 580 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 581 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 582 } 583 } 584 585 env->vstart = 0; 586 } 587 588 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 589 void HELPER(NAME)(void *vd, target_ulong base, \ 590 CPURISCVState *env, uint32_t desc) \ 591 { \ 592 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 593 ctzl(sizeof(ETYPE)), GETPC()); \ 594 } 595 596 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 604 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 605 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 606 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 607 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 608 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 609 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 610 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 611 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 612 613 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 614 void HELPER(NAME)(void *vd, target_ulong base, \ 615 CPURISCVState *env, uint32_t desc) \ 616 { \ 617 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 618 ctzl(sizeof(ETYPE)), GETPC()); \ 619 } 620 621 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 622 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 623 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 624 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 625 626 /* 627 * Vector Integer Arithmetic Instructions 628 */ 629 630 /* (TD, T1, T2, TX1, TX2) */ 631 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 632 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 633 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 634 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 640 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 641 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 642 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 643 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 644 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 645 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 646 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 647 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 648 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 649 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 650 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 651 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 652 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 653 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 654 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 655 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 656 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 657 658 #define DO_SUB(N, M) (N - M) 659 #define DO_RSUB(N, M) (M - N) 660 661 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 662 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 663 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 664 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 665 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 666 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 667 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 668 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 669 670 GEN_VEXT_VV(vadd_vv_b, 1) 671 GEN_VEXT_VV(vadd_vv_h, 2) 672 GEN_VEXT_VV(vadd_vv_w, 4) 673 GEN_VEXT_VV(vadd_vv_d, 8) 674 GEN_VEXT_VV(vsub_vv_b, 1) 675 GEN_VEXT_VV(vsub_vv_h, 2) 676 GEN_VEXT_VV(vsub_vv_w, 4) 677 GEN_VEXT_VV(vsub_vv_d, 8) 678 679 680 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 681 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 682 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 683 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 684 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 685 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 686 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 687 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 688 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 689 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 690 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 691 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 692 693 GEN_VEXT_VX(vadd_vx_b, 1) 694 GEN_VEXT_VX(vadd_vx_h, 2) 695 GEN_VEXT_VX(vadd_vx_w, 4) 696 GEN_VEXT_VX(vadd_vx_d, 8) 697 GEN_VEXT_VX(vsub_vx_b, 1) 698 GEN_VEXT_VX(vsub_vx_h, 2) 699 GEN_VEXT_VX(vsub_vx_w, 4) 700 GEN_VEXT_VX(vsub_vx_d, 8) 701 GEN_VEXT_VX(vrsub_vx_b, 1) 702 GEN_VEXT_VX(vrsub_vx_h, 2) 703 GEN_VEXT_VX(vrsub_vx_w, 4) 704 GEN_VEXT_VX(vrsub_vx_d, 8) 705 706 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 707 { 708 intptr_t oprsz = simd_oprsz(desc); 709 intptr_t i; 710 711 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 712 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 713 } 714 } 715 716 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 717 { 718 intptr_t oprsz = simd_oprsz(desc); 719 intptr_t i; 720 721 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 722 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 723 } 724 } 725 726 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 727 { 728 intptr_t oprsz = simd_oprsz(desc); 729 intptr_t i; 730 731 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 732 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 733 } 734 } 735 736 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 737 { 738 intptr_t oprsz = simd_oprsz(desc); 739 intptr_t i; 740 741 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 742 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 743 } 744 } 745 746 /* Vector Widening Integer Add/Subtract */ 747 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 748 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 749 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 750 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 751 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 752 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 753 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 754 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 755 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 756 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 757 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 758 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 759 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 760 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 761 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 762 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 763 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 764 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 765 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 766 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 767 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 768 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 769 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 770 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 771 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 772 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 773 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 774 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 775 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 776 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 777 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 778 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 779 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 780 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 781 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 782 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 783 GEN_VEXT_VV(vwaddu_vv_b, 2) 784 GEN_VEXT_VV(vwaddu_vv_h, 4) 785 GEN_VEXT_VV(vwaddu_vv_w, 8) 786 GEN_VEXT_VV(vwsubu_vv_b, 2) 787 GEN_VEXT_VV(vwsubu_vv_h, 4) 788 GEN_VEXT_VV(vwsubu_vv_w, 8) 789 GEN_VEXT_VV(vwadd_vv_b, 2) 790 GEN_VEXT_VV(vwadd_vv_h, 4) 791 GEN_VEXT_VV(vwadd_vv_w, 8) 792 GEN_VEXT_VV(vwsub_vv_b, 2) 793 GEN_VEXT_VV(vwsub_vv_h, 4) 794 GEN_VEXT_VV(vwsub_vv_w, 8) 795 GEN_VEXT_VV(vwaddu_wv_b, 2) 796 GEN_VEXT_VV(vwaddu_wv_h, 4) 797 GEN_VEXT_VV(vwaddu_wv_w, 8) 798 GEN_VEXT_VV(vwsubu_wv_b, 2) 799 GEN_VEXT_VV(vwsubu_wv_h, 4) 800 GEN_VEXT_VV(vwsubu_wv_w, 8) 801 GEN_VEXT_VV(vwadd_wv_b, 2) 802 GEN_VEXT_VV(vwadd_wv_h, 4) 803 GEN_VEXT_VV(vwadd_wv_w, 8) 804 GEN_VEXT_VV(vwsub_wv_b, 2) 805 GEN_VEXT_VV(vwsub_wv_h, 4) 806 GEN_VEXT_VV(vwsub_wv_w, 8) 807 808 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 809 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 810 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 811 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 812 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 813 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 814 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 815 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 816 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 817 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 818 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 819 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 820 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 821 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 822 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 823 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 824 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 825 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 826 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 827 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 828 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 829 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 830 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 831 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 832 GEN_VEXT_VX(vwaddu_vx_b, 2) 833 GEN_VEXT_VX(vwaddu_vx_h, 4) 834 GEN_VEXT_VX(vwaddu_vx_w, 8) 835 GEN_VEXT_VX(vwsubu_vx_b, 2) 836 GEN_VEXT_VX(vwsubu_vx_h, 4) 837 GEN_VEXT_VX(vwsubu_vx_w, 8) 838 GEN_VEXT_VX(vwadd_vx_b, 2) 839 GEN_VEXT_VX(vwadd_vx_h, 4) 840 GEN_VEXT_VX(vwadd_vx_w, 8) 841 GEN_VEXT_VX(vwsub_vx_b, 2) 842 GEN_VEXT_VX(vwsub_vx_h, 4) 843 GEN_VEXT_VX(vwsub_vx_w, 8) 844 GEN_VEXT_VX(vwaddu_wx_b, 2) 845 GEN_VEXT_VX(vwaddu_wx_h, 4) 846 GEN_VEXT_VX(vwaddu_wx_w, 8) 847 GEN_VEXT_VX(vwsubu_wx_b, 2) 848 GEN_VEXT_VX(vwsubu_wx_h, 4) 849 GEN_VEXT_VX(vwsubu_wx_w, 8) 850 GEN_VEXT_VX(vwadd_wx_b, 2) 851 GEN_VEXT_VX(vwadd_wx_h, 4) 852 GEN_VEXT_VX(vwadd_wx_w, 8) 853 GEN_VEXT_VX(vwsub_wx_b, 2) 854 GEN_VEXT_VX(vwsub_wx_h, 4) 855 GEN_VEXT_VX(vwsub_wx_w, 8) 856 857 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 858 #define DO_VADC(N, M, C) (N + M + C) 859 #define DO_VSBC(N, M, C) (N - M - C) 860 861 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 862 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 863 CPURISCVState *env, uint32_t desc) \ 864 { \ 865 uint32_t vl = env->vl; \ 866 uint32_t esz = sizeof(ETYPE); \ 867 uint32_t total_elems = \ 868 vext_get_total_elems(env, desc, esz); \ 869 uint32_t vta = vext_vta(desc); \ 870 uint32_t i; \ 871 \ 872 for (i = env->vstart; i < vl; i++) { \ 873 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 874 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 875 ETYPE carry = vext_elem_mask(v0, i); \ 876 \ 877 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 878 } \ 879 env->vstart = 0; \ 880 /* set tail elements to 1s */ \ 881 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 882 } 883 884 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 885 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 886 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 887 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 888 889 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 890 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 891 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 892 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 893 894 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 895 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 896 CPURISCVState *env, uint32_t desc) \ 897 { \ 898 uint32_t vl = env->vl; \ 899 uint32_t esz = sizeof(ETYPE); \ 900 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 901 uint32_t vta = vext_vta(desc); \ 902 uint32_t i; \ 903 \ 904 for (i = env->vstart; i < vl; i++) { \ 905 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 906 ETYPE carry = vext_elem_mask(v0, i); \ 907 \ 908 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 909 } \ 910 env->vstart = 0; \ 911 /* set tail elements to 1s */ \ 912 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 913 } 914 915 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 916 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 917 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 918 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 919 920 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 921 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 922 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 923 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 924 925 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 926 (__typeof(N))(N + M) < N) 927 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 928 929 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 930 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 931 CPURISCVState *env, uint32_t desc) \ 932 { \ 933 uint32_t vl = env->vl; \ 934 uint32_t vm = vext_vm(desc); \ 935 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 936 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 937 uint32_t i; \ 938 \ 939 for (i = env->vstart; i < vl; i++) { \ 940 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 941 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 942 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 943 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 944 } \ 945 env->vstart = 0; \ 946 /* 947 * mask destination register are always tail-agnostic 948 * set tail elements to 1s 949 */ \ 950 if (vta_all_1s) { \ 951 for (; i < total_elems; i++) { \ 952 vext_set_elem_mask(vd, i, 1); \ 953 } \ 954 } \ 955 } 956 957 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 958 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 959 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 960 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 961 962 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 963 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 964 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 965 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 966 967 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 968 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 969 void *vs2, CPURISCVState *env, uint32_t desc) \ 970 { \ 971 uint32_t vl = env->vl; \ 972 uint32_t vm = vext_vm(desc); \ 973 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 974 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 975 uint32_t i; \ 976 \ 977 for (i = env->vstart; i < vl; i++) { \ 978 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 979 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 980 vext_set_elem_mask(vd, i, \ 981 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 982 } \ 983 env->vstart = 0; \ 984 /* 985 * mask destination register are always tail-agnostic 986 * set tail elements to 1s 987 */ \ 988 if (vta_all_1s) { \ 989 for (; i < total_elems; i++) { \ 990 vext_set_elem_mask(vd, i, 1); \ 991 } \ 992 } \ 993 } 994 995 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 996 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 997 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 998 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 999 1000 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1001 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1002 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1003 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1004 1005 /* Vector Bitwise Logical Instructions */ 1006 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1007 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1008 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1009 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1010 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1011 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1012 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1013 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1014 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1015 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1016 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1017 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1018 GEN_VEXT_VV(vand_vv_b, 1) 1019 GEN_VEXT_VV(vand_vv_h, 2) 1020 GEN_VEXT_VV(vand_vv_w, 4) 1021 GEN_VEXT_VV(vand_vv_d, 8) 1022 GEN_VEXT_VV(vor_vv_b, 1) 1023 GEN_VEXT_VV(vor_vv_h, 2) 1024 GEN_VEXT_VV(vor_vv_w, 4) 1025 GEN_VEXT_VV(vor_vv_d, 8) 1026 GEN_VEXT_VV(vxor_vv_b, 1) 1027 GEN_VEXT_VV(vxor_vv_h, 2) 1028 GEN_VEXT_VV(vxor_vv_w, 4) 1029 GEN_VEXT_VV(vxor_vv_d, 8) 1030 1031 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1032 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1033 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1034 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1035 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1036 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1037 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1038 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1039 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1040 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1041 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1042 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1043 GEN_VEXT_VX(vand_vx_b, 1) 1044 GEN_VEXT_VX(vand_vx_h, 2) 1045 GEN_VEXT_VX(vand_vx_w, 4) 1046 GEN_VEXT_VX(vand_vx_d, 8) 1047 GEN_VEXT_VX(vor_vx_b, 1) 1048 GEN_VEXT_VX(vor_vx_h, 2) 1049 GEN_VEXT_VX(vor_vx_w, 4) 1050 GEN_VEXT_VX(vor_vx_d, 8) 1051 GEN_VEXT_VX(vxor_vx_b, 1) 1052 GEN_VEXT_VX(vxor_vx_h, 2) 1053 GEN_VEXT_VX(vxor_vx_w, 4) 1054 GEN_VEXT_VX(vxor_vx_d, 8) 1055 1056 /* Vector Single-Width Bit Shift Instructions */ 1057 #define DO_SLL(N, M) (N << (M)) 1058 #define DO_SRL(N, M) (N >> (M)) 1059 1060 /* generate the helpers for shift instructions with two vector operators */ 1061 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1062 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1063 void *vs2, CPURISCVState *env, uint32_t desc) \ 1064 { \ 1065 uint32_t vm = vext_vm(desc); \ 1066 uint32_t vl = env->vl; \ 1067 uint32_t esz = sizeof(TS1); \ 1068 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1069 uint32_t vta = vext_vta(desc); \ 1070 uint32_t vma = vext_vma(desc); \ 1071 uint32_t i; \ 1072 \ 1073 for (i = env->vstart; i < vl; i++) { \ 1074 if (!vm && !vext_elem_mask(v0, i)) { \ 1075 /* set masked-off elements to 1s */ \ 1076 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1077 continue; \ 1078 } \ 1079 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1080 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1081 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1082 } \ 1083 env->vstart = 0; \ 1084 /* set tail elements to 1s */ \ 1085 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1086 } 1087 1088 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1089 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1090 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1091 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1092 1093 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1094 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1095 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1096 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1097 1098 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1099 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1100 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1101 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1102 1103 /* 1104 * generate the helpers for shift instructions with one vector and one scalar 1105 */ 1106 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1107 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1108 void *vs2, CPURISCVState *env, \ 1109 uint32_t desc) \ 1110 { \ 1111 uint32_t vm = vext_vm(desc); \ 1112 uint32_t vl = env->vl; \ 1113 uint32_t esz = sizeof(TD); \ 1114 uint32_t total_elems = \ 1115 vext_get_total_elems(env, desc, esz); \ 1116 uint32_t vta = vext_vta(desc); \ 1117 uint32_t vma = vext_vma(desc); \ 1118 uint32_t i; \ 1119 \ 1120 for (i = env->vstart; i < vl; i++) { \ 1121 if (!vm && !vext_elem_mask(v0, i)) { \ 1122 /* set masked-off elements to 1s */ \ 1123 vext_set_elems_1s(vd, vma, i * esz, \ 1124 (i + 1) * esz); \ 1125 continue; \ 1126 } \ 1127 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1128 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1129 } \ 1130 env->vstart = 0; \ 1131 /* set tail elements to 1s */ \ 1132 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1133 } 1134 1135 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1136 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1137 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1138 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1139 1140 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1141 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1142 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1143 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1144 1145 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1146 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1147 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1148 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1149 1150 /* Vector Narrowing Integer Right Shift Instructions */ 1151 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1152 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1153 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1154 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1155 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1156 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1157 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1160 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1161 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1162 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1163 1164 /* Vector Integer Comparison Instructions */ 1165 #define DO_MSEQ(N, M) (N == M) 1166 #define DO_MSNE(N, M) (N != M) 1167 #define DO_MSLT(N, M) (N < M) 1168 #define DO_MSLE(N, M) (N <= M) 1169 #define DO_MSGT(N, M) (N > M) 1170 1171 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1172 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1173 CPURISCVState *env, uint32_t desc) \ 1174 { \ 1175 uint32_t vm = vext_vm(desc); \ 1176 uint32_t vl = env->vl; \ 1177 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1178 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1179 uint32_t vma = vext_vma(desc); \ 1180 uint32_t i; \ 1181 \ 1182 for (i = env->vstart; i < vl; i++) { \ 1183 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1184 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1185 if (!vm && !vext_elem_mask(v0, i)) { \ 1186 /* set masked-off elements to 1s */ \ 1187 if (vma) { \ 1188 vext_set_elem_mask(vd, i, 1); \ 1189 } \ 1190 continue; \ 1191 } \ 1192 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1193 } \ 1194 env->vstart = 0; \ 1195 /* 1196 * mask destination register are always tail-agnostic 1197 * set tail elements to 1s 1198 */ \ 1199 if (vta_all_1s) { \ 1200 for (; i < total_elems; i++) { \ 1201 vext_set_elem_mask(vd, i, 1); \ 1202 } \ 1203 } \ 1204 } 1205 1206 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1207 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1208 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1209 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1210 1211 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1212 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1213 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1214 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1215 1216 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1217 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1218 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1219 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1220 1221 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1222 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1223 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1224 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1225 1226 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1227 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1228 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1229 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1230 1231 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1232 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1233 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1234 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1235 1236 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1237 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1238 CPURISCVState *env, uint32_t desc) \ 1239 { \ 1240 uint32_t vm = vext_vm(desc); \ 1241 uint32_t vl = env->vl; \ 1242 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1243 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1244 uint32_t vma = vext_vma(desc); \ 1245 uint32_t i; \ 1246 \ 1247 for (i = env->vstart; i < vl; i++) { \ 1248 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1249 if (!vm && !vext_elem_mask(v0, i)) { \ 1250 /* set masked-off elements to 1s */ \ 1251 if (vma) { \ 1252 vext_set_elem_mask(vd, i, 1); \ 1253 } \ 1254 continue; \ 1255 } \ 1256 vext_set_elem_mask(vd, i, \ 1257 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1258 } \ 1259 env->vstart = 0; \ 1260 /* 1261 * mask destination register are always tail-agnostic 1262 * set tail elements to 1s 1263 */ \ 1264 if (vta_all_1s) { \ 1265 for (; i < total_elems; i++) { \ 1266 vext_set_elem_mask(vd, i, 1); \ 1267 } \ 1268 } \ 1269 } 1270 1271 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1272 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1273 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1274 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1275 1276 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1277 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1278 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1279 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1280 1281 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1282 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1283 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1284 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1285 1286 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1287 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1288 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1289 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1290 1291 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1292 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1293 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1294 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1295 1296 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1297 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1298 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1299 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1300 1301 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1302 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1303 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1304 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1305 1306 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1307 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1308 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1309 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1310 1311 /* Vector Integer Min/Max Instructions */ 1312 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1313 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1314 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1315 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1316 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1317 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1318 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1319 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1320 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1321 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1322 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1323 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1324 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1325 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1326 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1327 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1328 GEN_VEXT_VV(vminu_vv_b, 1) 1329 GEN_VEXT_VV(vminu_vv_h, 2) 1330 GEN_VEXT_VV(vminu_vv_w, 4) 1331 GEN_VEXT_VV(vminu_vv_d, 8) 1332 GEN_VEXT_VV(vmin_vv_b, 1) 1333 GEN_VEXT_VV(vmin_vv_h, 2) 1334 GEN_VEXT_VV(vmin_vv_w, 4) 1335 GEN_VEXT_VV(vmin_vv_d, 8) 1336 GEN_VEXT_VV(vmaxu_vv_b, 1) 1337 GEN_VEXT_VV(vmaxu_vv_h, 2) 1338 GEN_VEXT_VV(vmaxu_vv_w, 4) 1339 GEN_VEXT_VV(vmaxu_vv_d, 8) 1340 GEN_VEXT_VV(vmax_vv_b, 1) 1341 GEN_VEXT_VV(vmax_vv_h, 2) 1342 GEN_VEXT_VV(vmax_vv_w, 4) 1343 GEN_VEXT_VV(vmax_vv_d, 8) 1344 1345 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1346 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1347 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1348 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1349 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1350 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1351 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1352 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1353 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1354 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1355 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1356 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1357 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1358 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1359 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1360 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1361 GEN_VEXT_VX(vminu_vx_b, 1) 1362 GEN_VEXT_VX(vminu_vx_h, 2) 1363 GEN_VEXT_VX(vminu_vx_w, 4) 1364 GEN_VEXT_VX(vminu_vx_d, 8) 1365 GEN_VEXT_VX(vmin_vx_b, 1) 1366 GEN_VEXT_VX(vmin_vx_h, 2) 1367 GEN_VEXT_VX(vmin_vx_w, 4) 1368 GEN_VEXT_VX(vmin_vx_d, 8) 1369 GEN_VEXT_VX(vmaxu_vx_b, 1) 1370 GEN_VEXT_VX(vmaxu_vx_h, 2) 1371 GEN_VEXT_VX(vmaxu_vx_w, 4) 1372 GEN_VEXT_VX(vmaxu_vx_d, 8) 1373 GEN_VEXT_VX(vmax_vx_b, 1) 1374 GEN_VEXT_VX(vmax_vx_h, 2) 1375 GEN_VEXT_VX(vmax_vx_w, 4) 1376 GEN_VEXT_VX(vmax_vx_d, 8) 1377 1378 /* Vector Single-Width Integer Multiply Instructions */ 1379 #define DO_MUL(N, M) (N * M) 1380 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1381 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1382 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1383 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1384 GEN_VEXT_VV(vmul_vv_b, 1) 1385 GEN_VEXT_VV(vmul_vv_h, 2) 1386 GEN_VEXT_VV(vmul_vv_w, 4) 1387 GEN_VEXT_VV(vmul_vv_d, 8) 1388 1389 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1390 { 1391 return (int16_t)s2 * (int16_t)s1 >> 8; 1392 } 1393 1394 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1395 { 1396 return (int32_t)s2 * (int32_t)s1 >> 16; 1397 } 1398 1399 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1400 { 1401 return (int64_t)s2 * (int64_t)s1 >> 32; 1402 } 1403 1404 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1405 { 1406 uint64_t hi_64, lo_64; 1407 1408 muls64(&lo_64, &hi_64, s1, s2); 1409 return hi_64; 1410 } 1411 1412 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1413 { 1414 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1415 } 1416 1417 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1418 { 1419 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1420 } 1421 1422 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1423 { 1424 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1425 } 1426 1427 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1428 { 1429 uint64_t hi_64, lo_64; 1430 1431 mulu64(&lo_64, &hi_64, s2, s1); 1432 return hi_64; 1433 } 1434 1435 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1436 { 1437 return (int16_t)s2 * (uint16_t)s1 >> 8; 1438 } 1439 1440 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1441 { 1442 return (int32_t)s2 * (uint32_t)s1 >> 16; 1443 } 1444 1445 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1446 { 1447 return (int64_t)s2 * (uint64_t)s1 >> 32; 1448 } 1449 1450 /* 1451 * Let A = signed operand, 1452 * B = unsigned operand 1453 * P = mulu64(A, B), unsigned product 1454 * 1455 * LET X = 2 ** 64 - A, 2's complement of A 1456 * SP = signed product 1457 * THEN 1458 * IF A < 0 1459 * SP = -X * B 1460 * = -(2 ** 64 - A) * B 1461 * = A * B - 2 ** 64 * B 1462 * = P - 2 ** 64 * B 1463 * ELSE 1464 * SP = P 1465 * THEN 1466 * HI_P -= (A < 0 ? B : 0) 1467 */ 1468 1469 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1470 { 1471 uint64_t hi_64, lo_64; 1472 1473 mulu64(&lo_64, &hi_64, s2, s1); 1474 1475 hi_64 -= s2 < 0 ? s1 : 0; 1476 return hi_64; 1477 } 1478 1479 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1480 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1481 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1482 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1483 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1484 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1485 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1486 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1487 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1488 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1489 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1490 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1491 GEN_VEXT_VV(vmulh_vv_b, 1) 1492 GEN_VEXT_VV(vmulh_vv_h, 2) 1493 GEN_VEXT_VV(vmulh_vv_w, 4) 1494 GEN_VEXT_VV(vmulh_vv_d, 8) 1495 GEN_VEXT_VV(vmulhu_vv_b, 1) 1496 GEN_VEXT_VV(vmulhu_vv_h, 2) 1497 GEN_VEXT_VV(vmulhu_vv_w, 4) 1498 GEN_VEXT_VV(vmulhu_vv_d, 8) 1499 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1500 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1501 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1502 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1503 1504 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1505 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1506 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1507 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1508 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1509 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1510 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1511 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1512 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1513 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1514 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1515 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1516 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1517 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1518 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1519 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1520 GEN_VEXT_VX(vmul_vx_b, 1) 1521 GEN_VEXT_VX(vmul_vx_h, 2) 1522 GEN_VEXT_VX(vmul_vx_w, 4) 1523 GEN_VEXT_VX(vmul_vx_d, 8) 1524 GEN_VEXT_VX(vmulh_vx_b, 1) 1525 GEN_VEXT_VX(vmulh_vx_h, 2) 1526 GEN_VEXT_VX(vmulh_vx_w, 4) 1527 GEN_VEXT_VX(vmulh_vx_d, 8) 1528 GEN_VEXT_VX(vmulhu_vx_b, 1) 1529 GEN_VEXT_VX(vmulhu_vx_h, 2) 1530 GEN_VEXT_VX(vmulhu_vx_w, 4) 1531 GEN_VEXT_VX(vmulhu_vx_d, 8) 1532 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1533 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1534 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1535 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1536 1537 /* Vector Integer Divide Instructions */ 1538 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1539 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1540 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1541 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1542 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1543 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1544 1545 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1546 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1547 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1548 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1549 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1550 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1551 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1552 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1553 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1554 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1555 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1556 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1557 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1558 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1559 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1560 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1561 GEN_VEXT_VV(vdivu_vv_b, 1) 1562 GEN_VEXT_VV(vdivu_vv_h, 2) 1563 GEN_VEXT_VV(vdivu_vv_w, 4) 1564 GEN_VEXT_VV(vdivu_vv_d, 8) 1565 GEN_VEXT_VV(vdiv_vv_b, 1) 1566 GEN_VEXT_VV(vdiv_vv_h, 2) 1567 GEN_VEXT_VV(vdiv_vv_w, 4) 1568 GEN_VEXT_VV(vdiv_vv_d, 8) 1569 GEN_VEXT_VV(vremu_vv_b, 1) 1570 GEN_VEXT_VV(vremu_vv_h, 2) 1571 GEN_VEXT_VV(vremu_vv_w, 4) 1572 GEN_VEXT_VV(vremu_vv_d, 8) 1573 GEN_VEXT_VV(vrem_vv_b, 1) 1574 GEN_VEXT_VV(vrem_vv_h, 2) 1575 GEN_VEXT_VV(vrem_vv_w, 4) 1576 GEN_VEXT_VV(vrem_vv_d, 8) 1577 1578 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1579 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1580 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1581 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1582 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1583 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1584 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1585 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1586 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1587 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1588 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1589 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1590 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1591 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1592 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1593 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1594 GEN_VEXT_VX(vdivu_vx_b, 1) 1595 GEN_VEXT_VX(vdivu_vx_h, 2) 1596 GEN_VEXT_VX(vdivu_vx_w, 4) 1597 GEN_VEXT_VX(vdivu_vx_d, 8) 1598 GEN_VEXT_VX(vdiv_vx_b, 1) 1599 GEN_VEXT_VX(vdiv_vx_h, 2) 1600 GEN_VEXT_VX(vdiv_vx_w, 4) 1601 GEN_VEXT_VX(vdiv_vx_d, 8) 1602 GEN_VEXT_VX(vremu_vx_b, 1) 1603 GEN_VEXT_VX(vremu_vx_h, 2) 1604 GEN_VEXT_VX(vremu_vx_w, 4) 1605 GEN_VEXT_VX(vremu_vx_d, 8) 1606 GEN_VEXT_VX(vrem_vx_b, 1) 1607 GEN_VEXT_VX(vrem_vx_h, 2) 1608 GEN_VEXT_VX(vrem_vx_w, 4) 1609 GEN_VEXT_VX(vrem_vx_d, 8) 1610 1611 /* Vector Widening Integer Multiply Instructions */ 1612 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1613 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1614 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1615 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1616 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1617 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1618 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1619 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1620 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1621 GEN_VEXT_VV(vwmul_vv_b, 2) 1622 GEN_VEXT_VV(vwmul_vv_h, 4) 1623 GEN_VEXT_VV(vwmul_vv_w, 8) 1624 GEN_VEXT_VV(vwmulu_vv_b, 2) 1625 GEN_VEXT_VV(vwmulu_vv_h, 4) 1626 GEN_VEXT_VV(vwmulu_vv_w, 8) 1627 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1628 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1629 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1630 1631 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1632 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1633 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1634 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1635 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1636 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1637 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1638 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1639 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1640 GEN_VEXT_VX(vwmul_vx_b, 2) 1641 GEN_VEXT_VX(vwmul_vx_h, 4) 1642 GEN_VEXT_VX(vwmul_vx_w, 8) 1643 GEN_VEXT_VX(vwmulu_vx_b, 2) 1644 GEN_VEXT_VX(vwmulu_vx_h, 4) 1645 GEN_VEXT_VX(vwmulu_vx_w, 8) 1646 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1647 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1648 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1649 1650 /* Vector Single-Width Integer Multiply-Add Instructions */ 1651 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1652 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1653 { \ 1654 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1655 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1656 TD d = *((TD *)vd + HD(i)); \ 1657 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1658 } 1659 1660 #define DO_MACC(N, M, D) (M * N + D) 1661 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1662 #define DO_MADD(N, M, D) (M * D + N) 1663 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1664 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1665 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1666 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1667 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1668 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1669 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1670 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1671 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1672 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1673 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1674 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1675 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1676 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1677 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1678 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1679 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1680 GEN_VEXT_VV(vmacc_vv_b, 1) 1681 GEN_VEXT_VV(vmacc_vv_h, 2) 1682 GEN_VEXT_VV(vmacc_vv_w, 4) 1683 GEN_VEXT_VV(vmacc_vv_d, 8) 1684 GEN_VEXT_VV(vnmsac_vv_b, 1) 1685 GEN_VEXT_VV(vnmsac_vv_h, 2) 1686 GEN_VEXT_VV(vnmsac_vv_w, 4) 1687 GEN_VEXT_VV(vnmsac_vv_d, 8) 1688 GEN_VEXT_VV(vmadd_vv_b, 1) 1689 GEN_VEXT_VV(vmadd_vv_h, 2) 1690 GEN_VEXT_VV(vmadd_vv_w, 4) 1691 GEN_VEXT_VV(vmadd_vv_d, 8) 1692 GEN_VEXT_VV(vnmsub_vv_b, 1) 1693 GEN_VEXT_VV(vnmsub_vv_h, 2) 1694 GEN_VEXT_VV(vnmsub_vv_w, 4) 1695 GEN_VEXT_VV(vnmsub_vv_d, 8) 1696 1697 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1698 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1699 { \ 1700 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1701 TD d = *((TD *)vd + HD(i)); \ 1702 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1703 } 1704 1705 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1706 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1707 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1708 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1709 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1710 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1711 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1712 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1713 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1714 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1715 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1716 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1717 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1718 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1719 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1720 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1721 GEN_VEXT_VX(vmacc_vx_b, 1) 1722 GEN_VEXT_VX(vmacc_vx_h, 2) 1723 GEN_VEXT_VX(vmacc_vx_w, 4) 1724 GEN_VEXT_VX(vmacc_vx_d, 8) 1725 GEN_VEXT_VX(vnmsac_vx_b, 1) 1726 GEN_VEXT_VX(vnmsac_vx_h, 2) 1727 GEN_VEXT_VX(vnmsac_vx_w, 4) 1728 GEN_VEXT_VX(vnmsac_vx_d, 8) 1729 GEN_VEXT_VX(vmadd_vx_b, 1) 1730 GEN_VEXT_VX(vmadd_vx_h, 2) 1731 GEN_VEXT_VX(vmadd_vx_w, 4) 1732 GEN_VEXT_VX(vmadd_vx_d, 8) 1733 GEN_VEXT_VX(vnmsub_vx_b, 1) 1734 GEN_VEXT_VX(vnmsub_vx_h, 2) 1735 GEN_VEXT_VX(vnmsub_vx_w, 4) 1736 GEN_VEXT_VX(vnmsub_vx_d, 8) 1737 1738 /* Vector Widening Integer Multiply-Add Instructions */ 1739 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1740 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1741 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1742 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1743 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1744 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1745 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1746 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1747 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1748 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1749 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1750 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1751 GEN_VEXT_VV(vwmacc_vv_b, 2) 1752 GEN_VEXT_VV(vwmacc_vv_h, 4) 1753 GEN_VEXT_VV(vwmacc_vv_w, 8) 1754 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1755 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1756 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1757 1758 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1759 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1760 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1761 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1762 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1763 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1764 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1765 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1766 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1767 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1768 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1769 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1770 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1771 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1772 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1773 GEN_VEXT_VX(vwmacc_vx_b, 2) 1774 GEN_VEXT_VX(vwmacc_vx_h, 4) 1775 GEN_VEXT_VX(vwmacc_vx_w, 8) 1776 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1777 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1778 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1779 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1780 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1781 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1782 1783 /* Vector Integer Merge and Move Instructions */ 1784 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1785 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1786 uint32_t desc) \ 1787 { \ 1788 uint32_t vl = env->vl; \ 1789 uint32_t esz = sizeof(ETYPE); \ 1790 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1791 uint32_t vta = vext_vta(desc); \ 1792 uint32_t i; \ 1793 \ 1794 for (i = env->vstart; i < vl; i++) { \ 1795 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1796 *((ETYPE *)vd + H(i)) = s1; \ 1797 } \ 1798 env->vstart = 0; \ 1799 /* set tail elements to 1s */ \ 1800 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1801 } 1802 1803 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1804 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1805 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1806 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1807 1808 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1809 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1810 uint32_t desc) \ 1811 { \ 1812 uint32_t vl = env->vl; \ 1813 uint32_t esz = sizeof(ETYPE); \ 1814 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1815 uint32_t vta = vext_vta(desc); \ 1816 uint32_t i; \ 1817 \ 1818 for (i = env->vstart; i < vl; i++) { \ 1819 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1820 } \ 1821 env->vstart = 0; \ 1822 /* set tail elements to 1s */ \ 1823 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1824 } 1825 1826 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1827 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1828 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1829 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1830 1831 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1832 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1833 CPURISCVState *env, uint32_t desc) \ 1834 { \ 1835 uint32_t vl = env->vl; \ 1836 uint32_t esz = sizeof(ETYPE); \ 1837 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1838 uint32_t vta = vext_vta(desc); \ 1839 uint32_t i; \ 1840 \ 1841 for (i = env->vstart; i < vl; i++) { \ 1842 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1843 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1844 } \ 1845 env->vstart = 0; \ 1846 /* set tail elements to 1s */ \ 1847 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1848 } 1849 1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1851 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1852 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1853 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1854 1855 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1856 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1857 void *vs2, CPURISCVState *env, uint32_t desc) \ 1858 { \ 1859 uint32_t vl = env->vl; \ 1860 uint32_t esz = sizeof(ETYPE); \ 1861 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1862 uint32_t vta = vext_vta(desc); \ 1863 uint32_t i; \ 1864 \ 1865 for (i = env->vstart; i < vl; i++) { \ 1866 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1867 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1868 (ETYPE)(target_long)s1); \ 1869 *((ETYPE *)vd + H(i)) = d; \ 1870 } \ 1871 env->vstart = 0; \ 1872 /* set tail elements to 1s */ \ 1873 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1874 } 1875 1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1878 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1880 1881 /* 1882 * Vector Fixed-Point Arithmetic Instructions 1883 */ 1884 1885 /* Vector Single-Width Saturating Add and Subtract */ 1886 1887 /* 1888 * As fixed point instructions probably have round mode and saturation, 1889 * define common macros for fixed point here. 1890 */ 1891 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1892 CPURISCVState *env, int vxrm); 1893 1894 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1895 static inline void \ 1896 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1897 CPURISCVState *env, int vxrm) \ 1898 { \ 1899 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1900 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1901 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1902 } 1903 1904 static inline void 1905 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1906 CPURISCVState *env, 1907 uint32_t vl, uint32_t vm, int vxrm, 1908 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 1909 { 1910 for (uint32_t i = env->vstart; i < vl; i++) { 1911 if (!vm && !vext_elem_mask(v0, i)) { 1912 /* set masked-off elements to 1s */ 1913 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 1914 continue; 1915 } 1916 fn(vd, vs1, vs2, i, env, vxrm); 1917 } 1918 env->vstart = 0; 1919 } 1920 1921 static inline void 1922 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1923 CPURISCVState *env, 1924 uint32_t desc, 1925 opivv2_rm_fn *fn, uint32_t esz) 1926 { 1927 uint32_t vm = vext_vm(desc); 1928 uint32_t vl = env->vl; 1929 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 1930 uint32_t vta = vext_vta(desc); 1931 uint32_t vma = vext_vma(desc); 1932 1933 switch (env->vxrm) { 1934 case 0: /* rnu */ 1935 vext_vv_rm_1(vd, v0, vs1, vs2, 1936 env, vl, vm, 0, fn, vma, esz); 1937 break; 1938 case 1: /* rne */ 1939 vext_vv_rm_1(vd, v0, vs1, vs2, 1940 env, vl, vm, 1, fn, vma, esz); 1941 break; 1942 case 2: /* rdn */ 1943 vext_vv_rm_1(vd, v0, vs1, vs2, 1944 env, vl, vm, 2, fn, vma, esz); 1945 break; 1946 default: /* rod */ 1947 vext_vv_rm_1(vd, v0, vs1, vs2, 1948 env, vl, vm, 3, fn, vma, esz); 1949 break; 1950 } 1951 /* set tail elements to 1s */ 1952 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 1953 } 1954 1955 /* generate helpers for fixed point instructions with OPIVV format */ 1956 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 1957 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1958 CPURISCVState *env, uint32_t desc) \ 1959 { \ 1960 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1961 do_##NAME, ESZ); \ 1962 } 1963 1964 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 1965 uint8_t b) 1966 { 1967 uint8_t res = a + b; 1968 if (res < a) { 1969 res = UINT8_MAX; 1970 env->vxsat = 0x1; 1971 } 1972 return res; 1973 } 1974 1975 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1976 uint16_t b) 1977 { 1978 uint16_t res = a + b; 1979 if (res < a) { 1980 res = UINT16_MAX; 1981 env->vxsat = 0x1; 1982 } 1983 return res; 1984 } 1985 1986 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1987 uint32_t b) 1988 { 1989 uint32_t res = a + b; 1990 if (res < a) { 1991 res = UINT32_MAX; 1992 env->vxsat = 0x1; 1993 } 1994 return res; 1995 } 1996 1997 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1998 uint64_t b) 1999 { 2000 uint64_t res = a + b; 2001 if (res < a) { 2002 res = UINT64_MAX; 2003 env->vxsat = 0x1; 2004 } 2005 return res; 2006 } 2007 2008 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2009 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2010 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2011 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2012 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2013 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2014 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2015 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2016 2017 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2018 CPURISCVState *env, int vxrm); 2019 2020 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2021 static inline void \ 2022 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2023 CPURISCVState *env, int vxrm) \ 2024 { \ 2025 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2026 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2027 } 2028 2029 static inline void 2030 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2031 CPURISCVState *env, 2032 uint32_t vl, uint32_t vm, int vxrm, 2033 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2034 { 2035 for (uint32_t i = env->vstart; i < vl; i++) { 2036 if (!vm && !vext_elem_mask(v0, i)) { 2037 /* set masked-off elements to 1s */ 2038 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2039 continue; 2040 } 2041 fn(vd, s1, vs2, i, env, vxrm); 2042 } 2043 env->vstart = 0; 2044 } 2045 2046 static inline void 2047 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2048 CPURISCVState *env, 2049 uint32_t desc, 2050 opivx2_rm_fn *fn, uint32_t esz) 2051 { 2052 uint32_t vm = vext_vm(desc); 2053 uint32_t vl = env->vl; 2054 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2055 uint32_t vta = vext_vta(desc); 2056 uint32_t vma = vext_vma(desc); 2057 2058 switch (env->vxrm) { 2059 case 0: /* rnu */ 2060 vext_vx_rm_1(vd, v0, s1, vs2, 2061 env, vl, vm, 0, fn, vma, esz); 2062 break; 2063 case 1: /* rne */ 2064 vext_vx_rm_1(vd, v0, s1, vs2, 2065 env, vl, vm, 1, fn, vma, esz); 2066 break; 2067 case 2: /* rdn */ 2068 vext_vx_rm_1(vd, v0, s1, vs2, 2069 env, vl, vm, 2, fn, vma, esz); 2070 break; 2071 default: /* rod */ 2072 vext_vx_rm_1(vd, v0, s1, vs2, 2073 env, vl, vm, 3, fn, vma, esz); 2074 break; 2075 } 2076 /* set tail elements to 1s */ 2077 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2078 } 2079 2080 /* generate helpers for fixed point instructions with OPIVX format */ 2081 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2082 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2083 void *vs2, CPURISCVState *env, \ 2084 uint32_t desc) \ 2085 { \ 2086 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2087 do_##NAME, ESZ); \ 2088 } 2089 2090 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2091 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2092 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2093 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2094 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2095 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2096 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2097 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2098 2099 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2100 { 2101 int8_t res = a + b; 2102 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2103 res = a > 0 ? INT8_MAX : INT8_MIN; 2104 env->vxsat = 0x1; 2105 } 2106 return res; 2107 } 2108 2109 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2110 int16_t b) 2111 { 2112 int16_t res = a + b; 2113 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2114 res = a > 0 ? INT16_MAX : INT16_MIN; 2115 env->vxsat = 0x1; 2116 } 2117 return res; 2118 } 2119 2120 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2121 int32_t b) 2122 { 2123 int32_t res = a + b; 2124 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2125 res = a > 0 ? INT32_MAX : INT32_MIN; 2126 env->vxsat = 0x1; 2127 } 2128 return res; 2129 } 2130 2131 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2132 int64_t b) 2133 { 2134 int64_t res = a + b; 2135 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2136 res = a > 0 ? INT64_MAX : INT64_MIN; 2137 env->vxsat = 0x1; 2138 } 2139 return res; 2140 } 2141 2142 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2143 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2144 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2145 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2146 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2147 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2148 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2149 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2150 2151 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2152 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2153 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2154 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2155 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2156 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2157 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2158 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2159 2160 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2161 uint8_t b) 2162 { 2163 uint8_t res = a - b; 2164 if (res > a) { 2165 res = 0; 2166 env->vxsat = 0x1; 2167 } 2168 return res; 2169 } 2170 2171 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2172 uint16_t b) 2173 { 2174 uint16_t res = a - b; 2175 if (res > a) { 2176 res = 0; 2177 env->vxsat = 0x1; 2178 } 2179 return res; 2180 } 2181 2182 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2183 uint32_t b) 2184 { 2185 uint32_t res = a - b; 2186 if (res > a) { 2187 res = 0; 2188 env->vxsat = 0x1; 2189 } 2190 return res; 2191 } 2192 2193 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2194 uint64_t b) 2195 { 2196 uint64_t res = a - b; 2197 if (res > a) { 2198 res = 0; 2199 env->vxsat = 0x1; 2200 } 2201 return res; 2202 } 2203 2204 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2205 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2206 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2207 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2208 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2209 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2210 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2211 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2212 2213 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2214 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2215 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2216 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2217 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2218 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2219 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2220 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2221 2222 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2223 { 2224 int8_t res = a - b; 2225 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2226 res = a >= 0 ? INT8_MAX : INT8_MIN; 2227 env->vxsat = 0x1; 2228 } 2229 return res; 2230 } 2231 2232 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2233 int16_t b) 2234 { 2235 int16_t res = a - b; 2236 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2237 res = a >= 0 ? INT16_MAX : INT16_MIN; 2238 env->vxsat = 0x1; 2239 } 2240 return res; 2241 } 2242 2243 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2244 int32_t b) 2245 { 2246 int32_t res = a - b; 2247 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2248 res = a >= 0 ? INT32_MAX : INT32_MIN; 2249 env->vxsat = 0x1; 2250 } 2251 return res; 2252 } 2253 2254 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2255 int64_t b) 2256 { 2257 int64_t res = a - b; 2258 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2259 res = a >= 0 ? INT64_MAX : INT64_MIN; 2260 env->vxsat = 0x1; 2261 } 2262 return res; 2263 } 2264 2265 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2266 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2267 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2268 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2269 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2270 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2271 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2272 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2273 2274 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2275 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2276 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2277 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2278 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2279 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2280 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2281 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2282 2283 /* Vector Single-Width Averaging Add and Subtract */ 2284 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2285 { 2286 uint8_t d = extract64(v, shift, 1); 2287 uint8_t d1; 2288 uint64_t D1, D2; 2289 2290 if (shift == 0 || shift > 64) { 2291 return 0; 2292 } 2293 2294 d1 = extract64(v, shift - 1, 1); 2295 D1 = extract64(v, 0, shift); 2296 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2297 return d1; 2298 } else if (vxrm == 1) { /* round-to-nearest-even */ 2299 if (shift > 1) { 2300 D2 = extract64(v, 0, shift - 1); 2301 return d1 & ((D2 != 0) | d); 2302 } else { 2303 return d1 & d; 2304 } 2305 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2306 return !d & (D1 != 0); 2307 } 2308 return 0; /* round-down (truncate) */ 2309 } 2310 2311 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2312 int32_t b) 2313 { 2314 int64_t res = (int64_t)a + b; 2315 uint8_t round = get_round(vxrm, res, 1); 2316 2317 return (res >> 1) + round; 2318 } 2319 2320 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2321 int64_t b) 2322 { 2323 int64_t res = a + b; 2324 uint8_t round = get_round(vxrm, res, 1); 2325 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2326 2327 /* With signed overflow, bit 64 is inverse of bit 63. */ 2328 return ((res >> 1) ^ over) + round; 2329 } 2330 2331 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2332 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2333 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2334 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2335 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2336 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2337 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2338 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2339 2340 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2341 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2342 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2343 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2344 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2345 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2346 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2347 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2348 2349 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2350 uint32_t a, uint32_t b) 2351 { 2352 uint64_t res = (uint64_t)a + b; 2353 uint8_t round = get_round(vxrm, res, 1); 2354 2355 return (res >> 1) + round; 2356 } 2357 2358 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2359 uint64_t a, uint64_t b) 2360 { 2361 uint64_t res = a + b; 2362 uint8_t round = get_round(vxrm, res, 1); 2363 uint64_t over = (uint64_t)(res < a) << 63; 2364 2365 return ((res >> 1) | over) + round; 2366 } 2367 2368 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2369 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2370 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2371 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2372 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2373 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2374 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2375 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2376 2377 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2378 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2379 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2380 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2381 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2382 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2383 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2384 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2385 2386 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2387 int32_t b) 2388 { 2389 int64_t res = (int64_t)a - b; 2390 uint8_t round = get_round(vxrm, res, 1); 2391 2392 return (res >> 1) + round; 2393 } 2394 2395 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2396 int64_t b) 2397 { 2398 int64_t res = (int64_t)a - b; 2399 uint8_t round = get_round(vxrm, res, 1); 2400 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2401 2402 /* With signed overflow, bit 64 is inverse of bit 63. */ 2403 return ((res >> 1) ^ over) + round; 2404 } 2405 2406 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2407 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2408 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2409 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2410 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2411 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2412 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2413 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2414 2415 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2416 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2417 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2418 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2419 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2420 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2421 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2422 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2423 2424 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2425 uint32_t a, uint32_t b) 2426 { 2427 int64_t res = (int64_t)a - b; 2428 uint8_t round = get_round(vxrm, res, 1); 2429 2430 return (res >> 1) + round; 2431 } 2432 2433 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2434 uint64_t a, uint64_t b) 2435 { 2436 uint64_t res = (uint64_t)a - b; 2437 uint8_t round = get_round(vxrm, res, 1); 2438 uint64_t over = (uint64_t)(res > a) << 63; 2439 2440 return ((res >> 1) | over) + round; 2441 } 2442 2443 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2444 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2445 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2446 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2447 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2448 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2449 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2450 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2451 2452 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2453 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2454 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2455 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2456 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2457 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2458 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2459 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2460 2461 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2462 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2463 { 2464 uint8_t round; 2465 int16_t res; 2466 2467 res = (int16_t)a * (int16_t)b; 2468 round = get_round(vxrm, res, 7); 2469 res = (res >> 7) + round; 2470 2471 if (res > INT8_MAX) { 2472 env->vxsat = 0x1; 2473 return INT8_MAX; 2474 } else if (res < INT8_MIN) { 2475 env->vxsat = 0x1; 2476 return INT8_MIN; 2477 } else { 2478 return res; 2479 } 2480 } 2481 2482 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2483 { 2484 uint8_t round; 2485 int32_t res; 2486 2487 res = (int32_t)a * (int32_t)b; 2488 round = get_round(vxrm, res, 15); 2489 res = (res >> 15) + round; 2490 2491 if (res > INT16_MAX) { 2492 env->vxsat = 0x1; 2493 return INT16_MAX; 2494 } else if (res < INT16_MIN) { 2495 env->vxsat = 0x1; 2496 return INT16_MIN; 2497 } else { 2498 return res; 2499 } 2500 } 2501 2502 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2503 { 2504 uint8_t round; 2505 int64_t res; 2506 2507 res = (int64_t)a * (int64_t)b; 2508 round = get_round(vxrm, res, 31); 2509 res = (res >> 31) + round; 2510 2511 if (res > INT32_MAX) { 2512 env->vxsat = 0x1; 2513 return INT32_MAX; 2514 } else if (res < INT32_MIN) { 2515 env->vxsat = 0x1; 2516 return INT32_MIN; 2517 } else { 2518 return res; 2519 } 2520 } 2521 2522 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2523 { 2524 uint8_t round; 2525 uint64_t hi_64, lo_64; 2526 int64_t res; 2527 2528 if (a == INT64_MIN && b == INT64_MIN) { 2529 env->vxsat = 1; 2530 return INT64_MAX; 2531 } 2532 2533 muls64(&lo_64, &hi_64, a, b); 2534 round = get_round(vxrm, lo_64, 63); 2535 /* 2536 * Cannot overflow, as there are always 2537 * 2 sign bits after multiply. 2538 */ 2539 res = (hi_64 << 1) | (lo_64 >> 63); 2540 if (round) { 2541 if (res == INT64_MAX) { 2542 env->vxsat = 1; 2543 } else { 2544 res += 1; 2545 } 2546 } 2547 return res; 2548 } 2549 2550 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2551 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2552 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2553 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2554 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2555 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2556 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2557 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2558 2559 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2560 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2561 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2562 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2563 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2564 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2565 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2566 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2567 2568 /* Vector Single-Width Scaling Shift Instructions */ 2569 static inline uint8_t 2570 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2571 { 2572 uint8_t round, shift = b & 0x7; 2573 uint8_t res; 2574 2575 round = get_round(vxrm, a, shift); 2576 res = (a >> shift) + round; 2577 return res; 2578 } 2579 static inline uint16_t 2580 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2581 { 2582 uint8_t round, shift = b & 0xf; 2583 2584 round = get_round(vxrm, a, shift); 2585 return (a >> shift) + round; 2586 } 2587 static inline uint32_t 2588 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2589 { 2590 uint8_t round, shift = b & 0x1f; 2591 2592 round = get_round(vxrm, a, shift); 2593 return (a >> shift) + round; 2594 } 2595 static inline uint64_t 2596 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2597 { 2598 uint8_t round, shift = b & 0x3f; 2599 2600 round = get_round(vxrm, a, shift); 2601 return (a >> shift) + round; 2602 } 2603 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2604 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2605 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2606 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2607 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2608 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2609 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2610 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2611 2612 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2613 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2614 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2615 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2616 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2617 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2618 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2619 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2620 2621 static inline int8_t 2622 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2623 { 2624 uint8_t round, shift = b & 0x7; 2625 2626 round = get_round(vxrm, a, shift); 2627 return (a >> shift) + round; 2628 } 2629 static inline int16_t 2630 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2631 { 2632 uint8_t round, shift = b & 0xf; 2633 2634 round = get_round(vxrm, a, shift); 2635 return (a >> shift) + round; 2636 } 2637 static inline int32_t 2638 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2639 { 2640 uint8_t round, shift = b & 0x1f; 2641 2642 round = get_round(vxrm, a, shift); 2643 return (a >> shift) + round; 2644 } 2645 static inline int64_t 2646 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2647 { 2648 uint8_t round, shift = b & 0x3f; 2649 2650 round = get_round(vxrm, a, shift); 2651 return (a >> shift) + round; 2652 } 2653 2654 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2655 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2656 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2657 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2658 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2659 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2660 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2661 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2662 2663 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2664 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2665 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2666 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2667 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2668 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2669 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2670 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2671 2672 /* Vector Narrowing Fixed-Point Clip Instructions */ 2673 static inline int8_t 2674 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2675 { 2676 uint8_t round, shift = b & 0xf; 2677 int16_t res; 2678 2679 round = get_round(vxrm, a, shift); 2680 res = (a >> shift) + round; 2681 if (res > INT8_MAX) { 2682 env->vxsat = 0x1; 2683 return INT8_MAX; 2684 } else if (res < INT8_MIN) { 2685 env->vxsat = 0x1; 2686 return INT8_MIN; 2687 } else { 2688 return res; 2689 } 2690 } 2691 2692 static inline int16_t 2693 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2694 { 2695 uint8_t round, shift = b & 0x1f; 2696 int32_t res; 2697 2698 round = get_round(vxrm, a, shift); 2699 res = (a >> shift) + round; 2700 if (res > INT16_MAX) { 2701 env->vxsat = 0x1; 2702 return INT16_MAX; 2703 } else if (res < INT16_MIN) { 2704 env->vxsat = 0x1; 2705 return INT16_MIN; 2706 } else { 2707 return res; 2708 } 2709 } 2710 2711 static inline int32_t 2712 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2713 { 2714 uint8_t round, shift = b & 0x3f; 2715 int64_t res; 2716 2717 round = get_round(vxrm, a, shift); 2718 res = (a >> shift) + round; 2719 if (res > INT32_MAX) { 2720 env->vxsat = 0x1; 2721 return INT32_MAX; 2722 } else if (res < INT32_MIN) { 2723 env->vxsat = 0x1; 2724 return INT32_MIN; 2725 } else { 2726 return res; 2727 } 2728 } 2729 2730 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2731 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2732 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2733 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2734 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2735 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2736 2737 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2738 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2739 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2740 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2741 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2742 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2743 2744 static inline uint8_t 2745 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2746 { 2747 uint8_t round, shift = b & 0xf; 2748 uint16_t res; 2749 2750 round = get_round(vxrm, a, shift); 2751 res = (a >> shift) + round; 2752 if (res > UINT8_MAX) { 2753 env->vxsat = 0x1; 2754 return UINT8_MAX; 2755 } else { 2756 return res; 2757 } 2758 } 2759 2760 static inline uint16_t 2761 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2762 { 2763 uint8_t round, shift = b & 0x1f; 2764 uint32_t res; 2765 2766 round = get_round(vxrm, a, shift); 2767 res = (a >> shift) + round; 2768 if (res > UINT16_MAX) { 2769 env->vxsat = 0x1; 2770 return UINT16_MAX; 2771 } else { 2772 return res; 2773 } 2774 } 2775 2776 static inline uint32_t 2777 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2778 { 2779 uint8_t round, shift = b & 0x3f; 2780 uint64_t res; 2781 2782 round = get_round(vxrm, a, shift); 2783 res = (a >> shift) + round; 2784 if (res > UINT32_MAX) { 2785 env->vxsat = 0x1; 2786 return UINT32_MAX; 2787 } else { 2788 return res; 2789 } 2790 } 2791 2792 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2793 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2794 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2795 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2796 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2797 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2798 2799 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2800 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2801 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2802 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2803 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2804 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2805 2806 /* 2807 * Vector Float Point Arithmetic Instructions 2808 */ 2809 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2810 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2811 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2812 CPURISCVState *env) \ 2813 { \ 2814 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2815 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2816 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2817 } 2818 2819 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 2820 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2821 void *vs2, CPURISCVState *env, \ 2822 uint32_t desc) \ 2823 { \ 2824 uint32_t vm = vext_vm(desc); \ 2825 uint32_t vl = env->vl; \ 2826 uint32_t total_elems = \ 2827 vext_get_total_elems(env, desc, ESZ); \ 2828 uint32_t vta = vext_vta(desc); \ 2829 uint32_t vma = vext_vma(desc); \ 2830 uint32_t i; \ 2831 \ 2832 for (i = env->vstart; i < vl; i++) { \ 2833 if (!vm && !vext_elem_mask(v0, i)) { \ 2834 /* set masked-off elements to 1s */ \ 2835 vext_set_elems_1s(vd, vma, i * ESZ, \ 2836 (i + 1) * ESZ); \ 2837 continue; \ 2838 } \ 2839 do_##NAME(vd, vs1, vs2, i, env); \ 2840 } \ 2841 env->vstart = 0; \ 2842 /* set tail elements to 1s */ \ 2843 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2844 total_elems * ESZ); \ 2845 } 2846 2847 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2848 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2849 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2850 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 2851 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 2852 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 2853 2854 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2855 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2856 CPURISCVState *env) \ 2857 { \ 2858 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2859 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2860 } 2861 2862 #define GEN_VEXT_VF(NAME, ESZ) \ 2863 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2864 void *vs2, CPURISCVState *env, \ 2865 uint32_t desc) \ 2866 { \ 2867 uint32_t vm = vext_vm(desc); \ 2868 uint32_t vl = env->vl; \ 2869 uint32_t total_elems = \ 2870 vext_get_total_elems(env, desc, ESZ); \ 2871 uint32_t vta = vext_vta(desc); \ 2872 uint32_t vma = vext_vma(desc); \ 2873 uint32_t i; \ 2874 \ 2875 for (i = env->vstart; i < vl; i++) { \ 2876 if (!vm && !vext_elem_mask(v0, i)) { \ 2877 /* set masked-off elements to 1s */ \ 2878 vext_set_elems_1s(vd, vma, i * ESZ, \ 2879 (i + 1) * ESZ); \ 2880 continue; \ 2881 } \ 2882 do_##NAME(vd, s1, vs2, i, env); \ 2883 } \ 2884 env->vstart = 0; \ 2885 /* set tail elements to 1s */ \ 2886 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2887 total_elems * ESZ); \ 2888 } 2889 2890 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2891 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2892 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2893 GEN_VEXT_VF(vfadd_vf_h, 2) 2894 GEN_VEXT_VF(vfadd_vf_w, 4) 2895 GEN_VEXT_VF(vfadd_vf_d, 8) 2896 2897 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2898 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2899 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2900 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 2901 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 2902 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 2903 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2904 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2905 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2906 GEN_VEXT_VF(vfsub_vf_h, 2) 2907 GEN_VEXT_VF(vfsub_vf_w, 4) 2908 GEN_VEXT_VF(vfsub_vf_d, 8) 2909 2910 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2911 { 2912 return float16_sub(b, a, s); 2913 } 2914 2915 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2916 { 2917 return float32_sub(b, a, s); 2918 } 2919 2920 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2921 { 2922 return float64_sub(b, a, s); 2923 } 2924 2925 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2926 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2927 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2928 GEN_VEXT_VF(vfrsub_vf_h, 2) 2929 GEN_VEXT_VF(vfrsub_vf_w, 4) 2930 GEN_VEXT_VF(vfrsub_vf_d, 8) 2931 2932 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2933 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2934 { 2935 return float32_add(float16_to_float32(a, true, s), 2936 float16_to_float32(b, true, s), s); 2937 } 2938 2939 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2940 { 2941 return float64_add(float32_to_float64(a, s), 2942 float32_to_float64(b, s), s); 2943 2944 } 2945 2946 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2947 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2948 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 2949 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 2950 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2951 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2952 GEN_VEXT_VF(vfwadd_vf_h, 4) 2953 GEN_VEXT_VF(vfwadd_vf_w, 8) 2954 2955 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2956 { 2957 return float32_sub(float16_to_float32(a, true, s), 2958 float16_to_float32(b, true, s), s); 2959 } 2960 2961 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2962 { 2963 return float64_sub(float32_to_float64(a, s), 2964 float32_to_float64(b, s), s); 2965 2966 } 2967 2968 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2969 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2970 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 2971 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 2972 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2973 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2974 GEN_VEXT_VF(vfwsub_vf_h, 4) 2975 GEN_VEXT_VF(vfwsub_vf_w, 8) 2976 2977 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2978 { 2979 return float32_add(a, float16_to_float32(b, true, s), s); 2980 } 2981 2982 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2983 { 2984 return float64_add(a, float32_to_float64(b, s), s); 2985 } 2986 2987 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2988 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2989 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 2990 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 2991 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2992 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2993 GEN_VEXT_VF(vfwadd_wf_h, 4) 2994 GEN_VEXT_VF(vfwadd_wf_w, 8) 2995 2996 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2997 { 2998 return float32_sub(a, float16_to_float32(b, true, s), s); 2999 } 3000 3001 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3002 { 3003 return float64_sub(a, float32_to_float64(b, s), s); 3004 } 3005 3006 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3007 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3008 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3009 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3010 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3011 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3012 GEN_VEXT_VF(vfwsub_wf_h, 4) 3013 GEN_VEXT_VF(vfwsub_wf_w, 8) 3014 3015 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3016 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3017 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3018 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3019 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3020 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3021 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3022 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3023 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3024 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3025 GEN_VEXT_VF(vfmul_vf_h, 2) 3026 GEN_VEXT_VF(vfmul_vf_w, 4) 3027 GEN_VEXT_VF(vfmul_vf_d, 8) 3028 3029 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3030 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3031 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3032 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3033 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3034 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3035 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3036 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3037 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3038 GEN_VEXT_VF(vfdiv_vf_h, 2) 3039 GEN_VEXT_VF(vfdiv_vf_w, 4) 3040 GEN_VEXT_VF(vfdiv_vf_d, 8) 3041 3042 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3043 { 3044 return float16_div(b, a, s); 3045 } 3046 3047 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3048 { 3049 return float32_div(b, a, s); 3050 } 3051 3052 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3053 { 3054 return float64_div(b, a, s); 3055 } 3056 3057 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3058 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3059 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3060 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3061 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3062 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3063 3064 /* Vector Widening Floating-Point Multiply */ 3065 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3066 { 3067 return float32_mul(float16_to_float32(a, true, s), 3068 float16_to_float32(b, true, s), s); 3069 } 3070 3071 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3072 { 3073 return float64_mul(float32_to_float64(a, s), 3074 float32_to_float64(b, s), s); 3075 3076 } 3077 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3078 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3079 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3080 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3081 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3082 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3083 GEN_VEXT_VF(vfwmul_vf_h, 4) 3084 GEN_VEXT_VF(vfwmul_vf_w, 8) 3085 3086 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3087 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3088 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3089 CPURISCVState *env) \ 3090 { \ 3091 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3092 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3093 TD d = *((TD *)vd + HD(i)); \ 3094 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3095 } 3096 3097 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3098 { 3099 return float16_muladd(a, b, d, 0, s); 3100 } 3101 3102 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3103 { 3104 return float32_muladd(a, b, d, 0, s); 3105 } 3106 3107 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3108 { 3109 return float64_muladd(a, b, d, 0, s); 3110 } 3111 3112 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3113 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3114 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3115 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3116 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3117 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3118 3119 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3120 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3121 CPURISCVState *env) \ 3122 { \ 3123 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3124 TD d = *((TD *)vd + HD(i)); \ 3125 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3126 } 3127 3128 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3129 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3130 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3131 GEN_VEXT_VF(vfmacc_vf_h, 2) 3132 GEN_VEXT_VF(vfmacc_vf_w, 4) 3133 GEN_VEXT_VF(vfmacc_vf_d, 8) 3134 3135 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3136 { 3137 return float16_muladd(a, b, d, float_muladd_negate_c | 3138 float_muladd_negate_product, s); 3139 } 3140 3141 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3142 { 3143 return float32_muladd(a, b, d, float_muladd_negate_c | 3144 float_muladd_negate_product, s); 3145 } 3146 3147 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3148 { 3149 return float64_muladd(a, b, d, float_muladd_negate_c | 3150 float_muladd_negate_product, s); 3151 } 3152 3153 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3154 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3155 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3156 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3157 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3158 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3159 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3160 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3161 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3162 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3163 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3164 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3165 3166 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3167 { 3168 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3169 } 3170 3171 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3172 { 3173 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3174 } 3175 3176 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3177 { 3178 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3179 } 3180 3181 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3182 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3183 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3184 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3185 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3186 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3187 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3188 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3189 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3190 GEN_VEXT_VF(vfmsac_vf_h, 2) 3191 GEN_VEXT_VF(vfmsac_vf_w, 4) 3192 GEN_VEXT_VF(vfmsac_vf_d, 8) 3193 3194 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3195 { 3196 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3197 } 3198 3199 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3200 { 3201 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3202 } 3203 3204 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3205 { 3206 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3207 } 3208 3209 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3210 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3211 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3212 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3213 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3214 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3215 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3216 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3217 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3218 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3219 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3220 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3221 3222 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3223 { 3224 return float16_muladd(d, b, a, 0, s); 3225 } 3226 3227 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3228 { 3229 return float32_muladd(d, b, a, 0, s); 3230 } 3231 3232 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3233 { 3234 return float64_muladd(d, b, a, 0, s); 3235 } 3236 3237 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3238 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3239 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3240 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3241 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3242 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3243 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3244 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3245 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3246 GEN_VEXT_VF(vfmadd_vf_h, 2) 3247 GEN_VEXT_VF(vfmadd_vf_w, 4) 3248 GEN_VEXT_VF(vfmadd_vf_d, 8) 3249 3250 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3251 { 3252 return float16_muladd(d, b, a, float_muladd_negate_c | 3253 float_muladd_negate_product, s); 3254 } 3255 3256 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3257 { 3258 return float32_muladd(d, b, a, float_muladd_negate_c | 3259 float_muladd_negate_product, s); 3260 } 3261 3262 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3263 { 3264 return float64_muladd(d, b, a, float_muladd_negate_c | 3265 float_muladd_negate_product, s); 3266 } 3267 3268 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3269 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3270 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3271 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3272 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3273 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3274 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3275 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3276 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3277 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3278 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3279 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3280 3281 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3282 { 3283 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3284 } 3285 3286 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3287 { 3288 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3289 } 3290 3291 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3292 { 3293 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3294 } 3295 3296 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3297 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3298 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3299 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3300 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3301 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3302 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3303 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3304 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3305 GEN_VEXT_VF(vfmsub_vf_h, 2) 3306 GEN_VEXT_VF(vfmsub_vf_w, 4) 3307 GEN_VEXT_VF(vfmsub_vf_d, 8) 3308 3309 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3310 { 3311 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3312 } 3313 3314 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3315 { 3316 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3317 } 3318 3319 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3320 { 3321 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3322 } 3323 3324 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3325 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3326 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3327 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3328 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3329 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3330 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3331 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3332 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3333 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3334 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3335 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3336 3337 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3338 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3339 { 3340 return float32_muladd(float16_to_float32(a, true, s), 3341 float16_to_float32(b, true, s), d, 0, s); 3342 } 3343 3344 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3345 { 3346 return float64_muladd(float32_to_float64(a, s), 3347 float32_to_float64(b, s), d, 0, s); 3348 } 3349 3350 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3351 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3352 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3353 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3354 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3355 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3356 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3357 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3358 3359 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3360 { 3361 return float32_muladd(bfloat16_to_float32(a, s), 3362 bfloat16_to_float32(b, s), d, 0, s); 3363 } 3364 3365 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3366 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3367 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmacc16) 3368 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3369 3370 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3371 { 3372 return float32_muladd(float16_to_float32(a, true, s), 3373 float16_to_float32(b, true, s), d, 3374 float_muladd_negate_c | float_muladd_negate_product, 3375 s); 3376 } 3377 3378 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3379 { 3380 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3381 d, float_muladd_negate_c | 3382 float_muladd_negate_product, s); 3383 } 3384 3385 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3386 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3387 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3388 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3389 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3390 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3391 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3392 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3393 3394 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3395 { 3396 return float32_muladd(float16_to_float32(a, true, s), 3397 float16_to_float32(b, true, s), d, 3398 float_muladd_negate_c, s); 3399 } 3400 3401 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3402 { 3403 return float64_muladd(float32_to_float64(a, s), 3404 float32_to_float64(b, s), d, 3405 float_muladd_negate_c, s); 3406 } 3407 3408 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3409 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3410 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3411 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3412 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3413 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3414 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3415 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3416 3417 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3418 { 3419 return float32_muladd(float16_to_float32(a, true, s), 3420 float16_to_float32(b, true, s), d, 3421 float_muladd_negate_product, s); 3422 } 3423 3424 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3425 { 3426 return float64_muladd(float32_to_float64(a, s), 3427 float32_to_float64(b, s), d, 3428 float_muladd_negate_product, s); 3429 } 3430 3431 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3432 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3433 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3434 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3435 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3436 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3437 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3438 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3439 3440 /* Vector Floating-Point Square-Root Instruction */ 3441 /* (TD, T2, TX2) */ 3442 #define OP_UU_H uint16_t, uint16_t, uint16_t 3443 #define OP_UU_W uint32_t, uint32_t, uint32_t 3444 #define OP_UU_D uint64_t, uint64_t, uint64_t 3445 3446 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3447 static void do_##NAME(void *vd, void *vs2, int i, \ 3448 CPURISCVState *env) \ 3449 { \ 3450 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3451 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3452 } 3453 3454 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3455 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3456 CPURISCVState *env, uint32_t desc) \ 3457 { \ 3458 uint32_t vm = vext_vm(desc); \ 3459 uint32_t vl = env->vl; \ 3460 uint32_t total_elems = \ 3461 vext_get_total_elems(env, desc, ESZ); \ 3462 uint32_t vta = vext_vta(desc); \ 3463 uint32_t vma = vext_vma(desc); \ 3464 uint32_t i; \ 3465 \ 3466 if (vl == 0) { \ 3467 return; \ 3468 } \ 3469 for (i = env->vstart; i < vl; i++) { \ 3470 if (!vm && !vext_elem_mask(v0, i)) { \ 3471 /* set masked-off elements to 1s */ \ 3472 vext_set_elems_1s(vd, vma, i * ESZ, \ 3473 (i + 1) * ESZ); \ 3474 continue; \ 3475 } \ 3476 do_##NAME(vd, vs2, i, env); \ 3477 } \ 3478 env->vstart = 0; \ 3479 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3480 total_elems * ESZ); \ 3481 } 3482 3483 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3484 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3485 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3486 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3487 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3488 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3489 3490 /* 3491 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3492 * 3493 * Adapted from riscv-v-spec recip.c: 3494 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3495 */ 3496 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3497 { 3498 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3499 uint64_t exp = extract64(f, frac_size, exp_size); 3500 uint64_t frac = extract64(f, 0, frac_size); 3501 3502 const uint8_t lookup_table[] = { 3503 52, 51, 50, 48, 47, 46, 44, 43, 3504 42, 41, 40, 39, 38, 36, 35, 34, 3505 33, 32, 31, 30, 30, 29, 28, 27, 3506 26, 25, 24, 23, 23, 22, 21, 20, 3507 19, 19, 18, 17, 16, 16, 15, 14, 3508 14, 13, 12, 12, 11, 10, 10, 9, 3509 9, 8, 7, 7, 6, 6, 5, 4, 3510 4, 3, 3, 2, 2, 1, 1, 0, 3511 127, 125, 123, 121, 119, 118, 116, 114, 3512 113, 111, 109, 108, 106, 105, 103, 102, 3513 100, 99, 97, 96, 95, 93, 92, 91, 3514 90, 88, 87, 86, 85, 84, 83, 82, 3515 80, 79, 78, 77, 76, 75, 74, 73, 3516 72, 71, 70, 70, 69, 68, 67, 66, 3517 65, 64, 63, 63, 62, 61, 60, 59, 3518 59, 58, 57, 56, 56, 55, 54, 53 3519 }; 3520 const int precision = 7; 3521 3522 if (exp == 0 && frac != 0) { /* subnormal */ 3523 /* Normalize the subnormal. */ 3524 while (extract64(frac, frac_size - 1, 1) == 0) { 3525 exp--; 3526 frac <<= 1; 3527 } 3528 3529 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3530 } 3531 3532 int idx = ((exp & 1) << (precision - 1)) | 3533 (frac >> (frac_size - precision + 1)); 3534 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3535 (frac_size - precision); 3536 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3537 3538 uint64_t val = 0; 3539 val = deposit64(val, 0, frac_size, out_frac); 3540 val = deposit64(val, frac_size, exp_size, out_exp); 3541 val = deposit64(val, frac_size + exp_size, 1, sign); 3542 return val; 3543 } 3544 3545 static float16 frsqrt7_h(float16 f, float_status *s) 3546 { 3547 int exp_size = 5, frac_size = 10; 3548 bool sign = float16_is_neg(f); 3549 3550 /* 3551 * frsqrt7(sNaN) = canonical NaN 3552 * frsqrt7(-inf) = canonical NaN 3553 * frsqrt7(-normal) = canonical NaN 3554 * frsqrt7(-subnormal) = canonical NaN 3555 */ 3556 if (float16_is_signaling_nan(f, s) || 3557 (float16_is_infinity(f) && sign) || 3558 (float16_is_normal(f) && sign) || 3559 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3560 s->float_exception_flags |= float_flag_invalid; 3561 return float16_default_nan(s); 3562 } 3563 3564 /* frsqrt7(qNaN) = canonical NaN */ 3565 if (float16_is_quiet_nan(f, s)) { 3566 return float16_default_nan(s); 3567 } 3568 3569 /* frsqrt7(+-0) = +-inf */ 3570 if (float16_is_zero(f)) { 3571 s->float_exception_flags |= float_flag_divbyzero; 3572 return float16_set_sign(float16_infinity, sign); 3573 } 3574 3575 /* frsqrt7(+inf) = +0 */ 3576 if (float16_is_infinity(f) && !sign) { 3577 return float16_set_sign(float16_zero, sign); 3578 } 3579 3580 /* +normal, +subnormal */ 3581 uint64_t val = frsqrt7(f, exp_size, frac_size); 3582 return make_float16(val); 3583 } 3584 3585 static float32 frsqrt7_s(float32 f, float_status *s) 3586 { 3587 int exp_size = 8, frac_size = 23; 3588 bool sign = float32_is_neg(f); 3589 3590 /* 3591 * frsqrt7(sNaN) = canonical NaN 3592 * frsqrt7(-inf) = canonical NaN 3593 * frsqrt7(-normal) = canonical NaN 3594 * frsqrt7(-subnormal) = canonical NaN 3595 */ 3596 if (float32_is_signaling_nan(f, s) || 3597 (float32_is_infinity(f) && sign) || 3598 (float32_is_normal(f) && sign) || 3599 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3600 s->float_exception_flags |= float_flag_invalid; 3601 return float32_default_nan(s); 3602 } 3603 3604 /* frsqrt7(qNaN) = canonical NaN */ 3605 if (float32_is_quiet_nan(f, s)) { 3606 return float32_default_nan(s); 3607 } 3608 3609 /* frsqrt7(+-0) = +-inf */ 3610 if (float32_is_zero(f)) { 3611 s->float_exception_flags |= float_flag_divbyzero; 3612 return float32_set_sign(float32_infinity, sign); 3613 } 3614 3615 /* frsqrt7(+inf) = +0 */ 3616 if (float32_is_infinity(f) && !sign) { 3617 return float32_set_sign(float32_zero, sign); 3618 } 3619 3620 /* +normal, +subnormal */ 3621 uint64_t val = frsqrt7(f, exp_size, frac_size); 3622 return make_float32(val); 3623 } 3624 3625 static float64 frsqrt7_d(float64 f, float_status *s) 3626 { 3627 int exp_size = 11, frac_size = 52; 3628 bool sign = float64_is_neg(f); 3629 3630 /* 3631 * frsqrt7(sNaN) = canonical NaN 3632 * frsqrt7(-inf) = canonical NaN 3633 * frsqrt7(-normal) = canonical NaN 3634 * frsqrt7(-subnormal) = canonical NaN 3635 */ 3636 if (float64_is_signaling_nan(f, s) || 3637 (float64_is_infinity(f) && sign) || 3638 (float64_is_normal(f) && sign) || 3639 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3640 s->float_exception_flags |= float_flag_invalid; 3641 return float64_default_nan(s); 3642 } 3643 3644 /* frsqrt7(qNaN) = canonical NaN */ 3645 if (float64_is_quiet_nan(f, s)) { 3646 return float64_default_nan(s); 3647 } 3648 3649 /* frsqrt7(+-0) = +-inf */ 3650 if (float64_is_zero(f)) { 3651 s->float_exception_flags |= float_flag_divbyzero; 3652 return float64_set_sign(float64_infinity, sign); 3653 } 3654 3655 /* frsqrt7(+inf) = +0 */ 3656 if (float64_is_infinity(f) && !sign) { 3657 return float64_set_sign(float64_zero, sign); 3658 } 3659 3660 /* +normal, +subnormal */ 3661 uint64_t val = frsqrt7(f, exp_size, frac_size); 3662 return make_float64(val); 3663 } 3664 3665 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3666 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3667 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3668 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3669 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3670 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3671 3672 /* 3673 * Vector Floating-Point Reciprocal Estimate Instruction 3674 * 3675 * Adapted from riscv-v-spec recip.c: 3676 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3677 */ 3678 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3679 float_status *s) 3680 { 3681 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3682 uint64_t exp = extract64(f, frac_size, exp_size); 3683 uint64_t frac = extract64(f, 0, frac_size); 3684 3685 const uint8_t lookup_table[] = { 3686 127, 125, 123, 121, 119, 117, 116, 114, 3687 112, 110, 109, 107, 105, 104, 102, 100, 3688 99, 97, 96, 94, 93, 91, 90, 88, 3689 87, 85, 84, 83, 81, 80, 79, 77, 3690 76, 75, 74, 72, 71, 70, 69, 68, 3691 66, 65, 64, 63, 62, 61, 60, 59, 3692 58, 57, 56, 55, 54, 53, 52, 51, 3693 50, 49, 48, 47, 46, 45, 44, 43, 3694 42, 41, 40, 40, 39, 38, 37, 36, 3695 35, 35, 34, 33, 32, 31, 31, 30, 3696 29, 28, 28, 27, 26, 25, 25, 24, 3697 23, 23, 22, 21, 21, 20, 19, 19, 3698 18, 17, 17, 16, 15, 15, 14, 14, 3699 13, 12, 12, 11, 11, 10, 9, 9, 3700 8, 8, 7, 7, 6, 5, 5, 4, 3701 4, 3, 3, 2, 2, 1, 1, 0 3702 }; 3703 const int precision = 7; 3704 3705 if (exp == 0 && frac != 0) { /* subnormal */ 3706 /* Normalize the subnormal. */ 3707 while (extract64(frac, frac_size - 1, 1) == 0) { 3708 exp--; 3709 frac <<= 1; 3710 } 3711 3712 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3713 3714 if (exp != 0 && exp != UINT64_MAX) { 3715 /* 3716 * Overflow to inf or max value of same sign, 3717 * depending on sign and rounding mode. 3718 */ 3719 s->float_exception_flags |= (float_flag_inexact | 3720 float_flag_overflow); 3721 3722 if ((s->float_rounding_mode == float_round_to_zero) || 3723 ((s->float_rounding_mode == float_round_down) && !sign) || 3724 ((s->float_rounding_mode == float_round_up) && sign)) { 3725 /* Return greatest/negative finite value. */ 3726 return (sign << (exp_size + frac_size)) | 3727 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3728 } else { 3729 /* Return +-inf. */ 3730 return (sign << (exp_size + frac_size)) | 3731 MAKE_64BIT_MASK(frac_size, exp_size); 3732 } 3733 } 3734 } 3735 3736 int idx = frac >> (frac_size - precision); 3737 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3738 (frac_size - precision); 3739 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3740 3741 if (out_exp == 0 || out_exp == UINT64_MAX) { 3742 /* 3743 * The result is subnormal, but don't raise the underflow exception, 3744 * because there's no additional loss of precision. 3745 */ 3746 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3747 if (out_exp == UINT64_MAX) { 3748 out_frac >>= 1; 3749 out_exp = 0; 3750 } 3751 } 3752 3753 uint64_t val = 0; 3754 val = deposit64(val, 0, frac_size, out_frac); 3755 val = deposit64(val, frac_size, exp_size, out_exp); 3756 val = deposit64(val, frac_size + exp_size, 1, sign); 3757 return val; 3758 } 3759 3760 static float16 frec7_h(float16 f, float_status *s) 3761 { 3762 int exp_size = 5, frac_size = 10; 3763 bool sign = float16_is_neg(f); 3764 3765 /* frec7(+-inf) = +-0 */ 3766 if (float16_is_infinity(f)) { 3767 return float16_set_sign(float16_zero, sign); 3768 } 3769 3770 /* frec7(+-0) = +-inf */ 3771 if (float16_is_zero(f)) { 3772 s->float_exception_flags |= float_flag_divbyzero; 3773 return float16_set_sign(float16_infinity, sign); 3774 } 3775 3776 /* frec7(sNaN) = canonical NaN */ 3777 if (float16_is_signaling_nan(f, s)) { 3778 s->float_exception_flags |= float_flag_invalid; 3779 return float16_default_nan(s); 3780 } 3781 3782 /* frec7(qNaN) = canonical NaN */ 3783 if (float16_is_quiet_nan(f, s)) { 3784 return float16_default_nan(s); 3785 } 3786 3787 /* +-normal, +-subnormal */ 3788 uint64_t val = frec7(f, exp_size, frac_size, s); 3789 return make_float16(val); 3790 } 3791 3792 static float32 frec7_s(float32 f, float_status *s) 3793 { 3794 int exp_size = 8, frac_size = 23; 3795 bool sign = float32_is_neg(f); 3796 3797 /* frec7(+-inf) = +-0 */ 3798 if (float32_is_infinity(f)) { 3799 return float32_set_sign(float32_zero, sign); 3800 } 3801 3802 /* frec7(+-0) = +-inf */ 3803 if (float32_is_zero(f)) { 3804 s->float_exception_flags |= float_flag_divbyzero; 3805 return float32_set_sign(float32_infinity, sign); 3806 } 3807 3808 /* frec7(sNaN) = canonical NaN */ 3809 if (float32_is_signaling_nan(f, s)) { 3810 s->float_exception_flags |= float_flag_invalid; 3811 return float32_default_nan(s); 3812 } 3813 3814 /* frec7(qNaN) = canonical NaN */ 3815 if (float32_is_quiet_nan(f, s)) { 3816 return float32_default_nan(s); 3817 } 3818 3819 /* +-normal, +-subnormal */ 3820 uint64_t val = frec7(f, exp_size, frac_size, s); 3821 return make_float32(val); 3822 } 3823 3824 static float64 frec7_d(float64 f, float_status *s) 3825 { 3826 int exp_size = 11, frac_size = 52; 3827 bool sign = float64_is_neg(f); 3828 3829 /* frec7(+-inf) = +-0 */ 3830 if (float64_is_infinity(f)) { 3831 return float64_set_sign(float64_zero, sign); 3832 } 3833 3834 /* frec7(+-0) = +-inf */ 3835 if (float64_is_zero(f)) { 3836 s->float_exception_flags |= float_flag_divbyzero; 3837 return float64_set_sign(float64_infinity, sign); 3838 } 3839 3840 /* frec7(sNaN) = canonical NaN */ 3841 if (float64_is_signaling_nan(f, s)) { 3842 s->float_exception_flags |= float_flag_invalid; 3843 return float64_default_nan(s); 3844 } 3845 3846 /* frec7(qNaN) = canonical NaN */ 3847 if (float64_is_quiet_nan(f, s)) { 3848 return float64_default_nan(s); 3849 } 3850 3851 /* +-normal, +-subnormal */ 3852 uint64_t val = frec7(f, exp_size, frac_size, s); 3853 return make_float64(val); 3854 } 3855 3856 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3857 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3858 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3859 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 3860 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 3861 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 3862 3863 /* Vector Floating-Point MIN/MAX Instructions */ 3864 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3865 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3866 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3867 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 3868 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 3869 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 3870 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3871 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3872 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3873 GEN_VEXT_VF(vfmin_vf_h, 2) 3874 GEN_VEXT_VF(vfmin_vf_w, 4) 3875 GEN_VEXT_VF(vfmin_vf_d, 8) 3876 3877 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3878 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3879 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3880 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 3881 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 3882 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 3883 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3884 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3885 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3886 GEN_VEXT_VF(vfmax_vf_h, 2) 3887 GEN_VEXT_VF(vfmax_vf_w, 4) 3888 GEN_VEXT_VF(vfmax_vf_d, 8) 3889 3890 /* Vector Floating-Point Sign-Injection Instructions */ 3891 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3892 { 3893 return deposit64(b, 0, 15, a); 3894 } 3895 3896 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3897 { 3898 return deposit64(b, 0, 31, a); 3899 } 3900 3901 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3902 { 3903 return deposit64(b, 0, 63, a); 3904 } 3905 3906 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3907 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3908 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3909 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 3910 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 3911 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 3912 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3913 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3914 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3915 GEN_VEXT_VF(vfsgnj_vf_h, 2) 3916 GEN_VEXT_VF(vfsgnj_vf_w, 4) 3917 GEN_VEXT_VF(vfsgnj_vf_d, 8) 3918 3919 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3920 { 3921 return deposit64(~b, 0, 15, a); 3922 } 3923 3924 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3925 { 3926 return deposit64(~b, 0, 31, a); 3927 } 3928 3929 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3930 { 3931 return deposit64(~b, 0, 63, a); 3932 } 3933 3934 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3935 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3936 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3937 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 3938 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 3939 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 3940 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3941 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3942 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3943 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 3944 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 3945 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 3946 3947 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3948 { 3949 return deposit64(b ^ a, 0, 15, a); 3950 } 3951 3952 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3953 { 3954 return deposit64(b ^ a, 0, 31, a); 3955 } 3956 3957 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3958 { 3959 return deposit64(b ^ a, 0, 63, a); 3960 } 3961 3962 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3963 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3964 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3965 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 3966 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 3967 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 3968 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3969 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3970 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3971 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 3972 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 3973 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 3974 3975 /* Vector Floating-Point Compare Instructions */ 3976 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3977 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3978 CPURISCVState *env, uint32_t desc) \ 3979 { \ 3980 uint32_t vm = vext_vm(desc); \ 3981 uint32_t vl = env->vl; \ 3982 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 3983 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 3984 uint32_t vma = vext_vma(desc); \ 3985 uint32_t i; \ 3986 \ 3987 for (i = env->vstart; i < vl; i++) { \ 3988 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3989 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3990 if (!vm && !vext_elem_mask(v0, i)) { \ 3991 /* set masked-off elements to 1s */ \ 3992 if (vma) { \ 3993 vext_set_elem_mask(vd, i, 1); \ 3994 } \ 3995 continue; \ 3996 } \ 3997 vext_set_elem_mask(vd, i, \ 3998 DO_OP(s2, s1, &env->fp_status)); \ 3999 } \ 4000 env->vstart = 0; \ 4001 /* 4002 * mask destination register are always tail-agnostic 4003 * set tail elements to 1s 4004 */ \ 4005 if (vta_all_1s) { \ 4006 for (; i < total_elems; i++) { \ 4007 vext_set_elem_mask(vd, i, 1); \ 4008 } \ 4009 } \ 4010 } 4011 4012 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4013 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4014 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4015 4016 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4017 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4018 CPURISCVState *env, uint32_t desc) \ 4019 { \ 4020 uint32_t vm = vext_vm(desc); \ 4021 uint32_t vl = env->vl; \ 4022 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4023 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4024 uint32_t vma = vext_vma(desc); \ 4025 uint32_t i; \ 4026 \ 4027 for (i = env->vstart; i < vl; i++) { \ 4028 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4029 if (!vm && !vext_elem_mask(v0, i)) { \ 4030 /* set masked-off elements to 1s */ \ 4031 if (vma) { \ 4032 vext_set_elem_mask(vd, i, 1); \ 4033 } \ 4034 continue; \ 4035 } \ 4036 vext_set_elem_mask(vd, i, \ 4037 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4038 } \ 4039 env->vstart = 0; \ 4040 /* 4041 * mask destination register are always tail-agnostic 4042 * set tail elements to 1s 4043 */ \ 4044 if (vta_all_1s) { \ 4045 for (; i < total_elems; i++) { \ 4046 vext_set_elem_mask(vd, i, 1); \ 4047 } \ 4048 } \ 4049 } 4050 4051 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4052 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4053 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4054 4055 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4056 { 4057 FloatRelation compare = float16_compare_quiet(a, b, s); 4058 return compare != float_relation_equal; 4059 } 4060 4061 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4062 { 4063 FloatRelation compare = float32_compare_quiet(a, b, s); 4064 return compare != float_relation_equal; 4065 } 4066 4067 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4068 { 4069 FloatRelation compare = float64_compare_quiet(a, b, s); 4070 return compare != float_relation_equal; 4071 } 4072 4073 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4074 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4075 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4076 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4077 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4078 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4079 4080 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4081 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4082 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4083 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4084 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4085 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4086 4087 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4088 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4089 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4090 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4091 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4092 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4093 4094 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4095 { 4096 FloatRelation compare = float16_compare(a, b, s); 4097 return compare == float_relation_greater; 4098 } 4099 4100 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4101 { 4102 FloatRelation compare = float32_compare(a, b, s); 4103 return compare == float_relation_greater; 4104 } 4105 4106 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4107 { 4108 FloatRelation compare = float64_compare(a, b, s); 4109 return compare == float_relation_greater; 4110 } 4111 4112 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4113 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4114 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4115 4116 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4117 { 4118 FloatRelation compare = float16_compare(a, b, s); 4119 return compare == float_relation_greater || 4120 compare == float_relation_equal; 4121 } 4122 4123 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4124 { 4125 FloatRelation compare = float32_compare(a, b, s); 4126 return compare == float_relation_greater || 4127 compare == float_relation_equal; 4128 } 4129 4130 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4131 { 4132 FloatRelation compare = float64_compare(a, b, s); 4133 return compare == float_relation_greater || 4134 compare == float_relation_equal; 4135 } 4136 4137 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4138 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4139 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4140 4141 /* Vector Floating-Point Classify Instruction */ 4142 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4143 static void do_##NAME(void *vd, void *vs2, int i) \ 4144 { \ 4145 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4146 *((TD *)vd + HD(i)) = OP(s2); \ 4147 } 4148 4149 #define GEN_VEXT_V(NAME, ESZ) \ 4150 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4151 CPURISCVState *env, uint32_t desc) \ 4152 { \ 4153 uint32_t vm = vext_vm(desc); \ 4154 uint32_t vl = env->vl; \ 4155 uint32_t total_elems = \ 4156 vext_get_total_elems(env, desc, ESZ); \ 4157 uint32_t vta = vext_vta(desc); \ 4158 uint32_t vma = vext_vma(desc); \ 4159 uint32_t i; \ 4160 \ 4161 for (i = env->vstart; i < vl; i++) { \ 4162 if (!vm && !vext_elem_mask(v0, i)) { \ 4163 /* set masked-off elements to 1s */ \ 4164 vext_set_elems_1s(vd, vma, i * ESZ, \ 4165 (i + 1) * ESZ); \ 4166 continue; \ 4167 } \ 4168 do_##NAME(vd, vs2, i); \ 4169 } \ 4170 env->vstart = 0; \ 4171 /* set tail elements to 1s */ \ 4172 vext_set_elems_1s(vd, vta, vl * ESZ, \ 4173 total_elems * ESZ); \ 4174 } 4175 4176 target_ulong fclass_h(uint64_t frs1) 4177 { 4178 float16 f = frs1; 4179 bool sign = float16_is_neg(f); 4180 4181 if (float16_is_infinity(f)) { 4182 return sign ? 1 << 0 : 1 << 7; 4183 } else if (float16_is_zero(f)) { 4184 return sign ? 1 << 3 : 1 << 4; 4185 } else if (float16_is_zero_or_denormal(f)) { 4186 return sign ? 1 << 2 : 1 << 5; 4187 } else if (float16_is_any_nan(f)) { 4188 float_status s = { }; /* for snan_bit_is_one */ 4189 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4190 } else { 4191 return sign ? 1 << 1 : 1 << 6; 4192 } 4193 } 4194 4195 target_ulong fclass_s(uint64_t frs1) 4196 { 4197 float32 f = frs1; 4198 bool sign = float32_is_neg(f); 4199 4200 if (float32_is_infinity(f)) { 4201 return sign ? 1 << 0 : 1 << 7; 4202 } else if (float32_is_zero(f)) { 4203 return sign ? 1 << 3 : 1 << 4; 4204 } else if (float32_is_zero_or_denormal(f)) { 4205 return sign ? 1 << 2 : 1 << 5; 4206 } else if (float32_is_any_nan(f)) { 4207 float_status s = { }; /* for snan_bit_is_one */ 4208 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4209 } else { 4210 return sign ? 1 << 1 : 1 << 6; 4211 } 4212 } 4213 4214 target_ulong fclass_d(uint64_t frs1) 4215 { 4216 float64 f = frs1; 4217 bool sign = float64_is_neg(f); 4218 4219 if (float64_is_infinity(f)) { 4220 return sign ? 1 << 0 : 1 << 7; 4221 } else if (float64_is_zero(f)) { 4222 return sign ? 1 << 3 : 1 << 4; 4223 } else if (float64_is_zero_or_denormal(f)) { 4224 return sign ? 1 << 2 : 1 << 5; 4225 } else if (float64_is_any_nan(f)) { 4226 float_status s = { }; /* for snan_bit_is_one */ 4227 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4228 } else { 4229 return sign ? 1 << 1 : 1 << 6; 4230 } 4231 } 4232 4233 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4234 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4235 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4236 GEN_VEXT_V(vfclass_v_h, 2) 4237 GEN_VEXT_V(vfclass_v_w, 4) 4238 GEN_VEXT_V(vfclass_v_d, 8) 4239 4240 /* Vector Floating-Point Merge Instruction */ 4241 4242 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4243 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4244 CPURISCVState *env, uint32_t desc) \ 4245 { \ 4246 uint32_t vm = vext_vm(desc); \ 4247 uint32_t vl = env->vl; \ 4248 uint32_t esz = sizeof(ETYPE); \ 4249 uint32_t total_elems = \ 4250 vext_get_total_elems(env, desc, esz); \ 4251 uint32_t vta = vext_vta(desc); \ 4252 uint32_t i; \ 4253 \ 4254 for (i = env->vstart; i < vl; i++) { \ 4255 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4256 *((ETYPE *)vd + H(i)) = \ 4257 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4258 } \ 4259 env->vstart = 0; \ 4260 /* set tail elements to 1s */ \ 4261 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4262 } 4263 4264 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4265 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4266 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4267 4268 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4269 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4270 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4271 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4272 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4273 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4274 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4275 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4276 4277 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4278 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4279 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4280 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4281 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4282 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4283 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4284 4285 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4286 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4287 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4288 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4289 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4290 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4291 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4292 4293 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4294 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4295 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4296 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4297 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4298 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4299 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4300 4301 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4302 /* (TD, T2, TX2) */ 4303 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4304 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4305 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4306 /* 4307 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4308 */ 4309 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4310 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4311 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4312 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4313 4314 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4315 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4316 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4317 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4318 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4319 4320 /* 4321 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4322 */ 4323 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4324 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4325 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4326 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4327 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4328 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4329 4330 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4331 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4332 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4333 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4334 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4335 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4336 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4337 4338 /* 4339 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4340 */ 4341 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4342 { 4343 return float16_to_float32(a, true, s); 4344 } 4345 4346 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4347 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4348 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4349 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4350 4351 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4352 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4353 4354 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4355 /* (TD, T2, TX2) */ 4356 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4357 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4358 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4359 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4360 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4361 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4362 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4363 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4364 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4365 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4366 4367 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4368 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4369 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4370 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4371 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4372 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4373 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4374 4375 /* 4376 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4377 */ 4378 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4379 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4380 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4381 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4382 4383 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4384 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4385 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4386 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4387 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4388 4389 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4390 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4391 { 4392 return float32_to_float16(a, true, s); 4393 } 4394 4395 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4396 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4397 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4398 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4399 4400 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4401 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4402 4403 /* 4404 * Vector Reduction Operations 4405 */ 4406 /* Vector Single-Width Integer Reduction Instructions */ 4407 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4408 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4409 void *vs2, CPURISCVState *env, \ 4410 uint32_t desc) \ 4411 { \ 4412 uint32_t vm = vext_vm(desc); \ 4413 uint32_t vl = env->vl; \ 4414 uint32_t esz = sizeof(TD); \ 4415 uint32_t vlenb = simd_maxsz(desc); \ 4416 uint32_t vta = vext_vta(desc); \ 4417 uint32_t i; \ 4418 TD s1 = *((TD *)vs1 + HD(0)); \ 4419 \ 4420 for (i = env->vstart; i < vl; i++) { \ 4421 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4422 if (!vm && !vext_elem_mask(v0, i)) { \ 4423 continue; \ 4424 } \ 4425 s1 = OP(s1, (TD)s2); \ 4426 } \ 4427 *((TD *)vd + HD(0)) = s1; \ 4428 env->vstart = 0; \ 4429 /* set tail elements to 1s */ \ 4430 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4431 } 4432 4433 /* vd[0] = sum(vs1[0], vs2[*]) */ 4434 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4435 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4436 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4437 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4438 4439 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4440 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4441 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4442 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4443 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4444 4445 /* vd[0] = max(vs1[0], vs2[*]) */ 4446 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4447 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4448 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4449 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4450 4451 /* vd[0] = minu(vs1[0], vs2[*]) */ 4452 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4453 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4454 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4455 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4456 4457 /* vd[0] = min(vs1[0], vs2[*]) */ 4458 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4459 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4460 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4461 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4462 4463 /* vd[0] = and(vs1[0], vs2[*]) */ 4464 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4465 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4466 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4467 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4468 4469 /* vd[0] = or(vs1[0], vs2[*]) */ 4470 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4471 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4472 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4473 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4474 4475 /* vd[0] = xor(vs1[0], vs2[*]) */ 4476 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4477 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4478 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4479 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4480 4481 /* Vector Widening Integer Reduction Instructions */ 4482 /* signed sum reduction into double-width accumulator */ 4483 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4484 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4485 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4486 4487 /* Unsigned sum reduction into double-width accumulator */ 4488 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4489 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4490 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4491 4492 /* Vector Single-Width Floating-Point Reduction Instructions */ 4493 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4494 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4495 void *vs2, CPURISCVState *env, \ 4496 uint32_t desc) \ 4497 { \ 4498 uint32_t vm = vext_vm(desc); \ 4499 uint32_t vl = env->vl; \ 4500 uint32_t esz = sizeof(TD); \ 4501 uint32_t vlenb = simd_maxsz(desc); \ 4502 uint32_t vta = vext_vta(desc); \ 4503 uint32_t i; \ 4504 TD s1 = *((TD *)vs1 + HD(0)); \ 4505 \ 4506 for (i = env->vstart; i < vl; i++) { \ 4507 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4508 if (!vm && !vext_elem_mask(v0, i)) { \ 4509 continue; \ 4510 } \ 4511 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4512 } \ 4513 *((TD *)vd + HD(0)) = s1; \ 4514 env->vstart = 0; \ 4515 /* set tail elements to 1s */ \ 4516 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4517 } 4518 4519 /* Unordered sum */ 4520 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4521 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4522 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4523 4524 /* Ordered sum */ 4525 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4526 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4527 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4528 4529 /* Maximum value */ 4530 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4531 float16_maximum_number) 4532 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4533 float32_maximum_number) 4534 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4535 float64_maximum_number) 4536 4537 /* Minimum value */ 4538 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4539 float16_minimum_number) 4540 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4541 float32_minimum_number) 4542 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4543 float64_minimum_number) 4544 4545 /* Vector Widening Floating-Point Add Instructions */ 4546 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4547 { 4548 return float32_add(a, float16_to_float32(b, true, s), s); 4549 } 4550 4551 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4552 { 4553 return float64_add(a, float32_to_float64(b, s), s); 4554 } 4555 4556 /* Vector Widening Floating-Point Reduction Instructions */ 4557 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4558 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4559 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4560 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4561 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4562 4563 /* 4564 * Vector Mask Operations 4565 */ 4566 /* Vector Mask-Register Logical Instructions */ 4567 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4568 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4569 void *vs2, CPURISCVState *env, \ 4570 uint32_t desc) \ 4571 { \ 4572 uint32_t vl = env->vl; \ 4573 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4574 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4575 uint32_t i; \ 4576 int a, b; \ 4577 \ 4578 for (i = env->vstart; i < vl; i++) { \ 4579 a = vext_elem_mask(vs1, i); \ 4580 b = vext_elem_mask(vs2, i); \ 4581 vext_set_elem_mask(vd, i, OP(b, a)); \ 4582 } \ 4583 env->vstart = 0; \ 4584 /* 4585 * mask destination register are always tail-agnostic 4586 * set tail elements to 1s 4587 */ \ 4588 if (vta_all_1s) { \ 4589 for (; i < total_elems; i++) { \ 4590 vext_set_elem_mask(vd, i, 1); \ 4591 } \ 4592 } \ 4593 } 4594 4595 #define DO_NAND(N, M) (!(N & M)) 4596 #define DO_ANDNOT(N, M) (N & !M) 4597 #define DO_NOR(N, M) (!(N | M)) 4598 #define DO_ORNOT(N, M) (N | !M) 4599 #define DO_XNOR(N, M) (!(N ^ M)) 4600 4601 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4602 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4603 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4604 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4605 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4606 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4607 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4608 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4609 4610 /* Vector count population in mask vcpop */ 4611 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4612 uint32_t desc) 4613 { 4614 target_ulong cnt = 0; 4615 uint32_t vm = vext_vm(desc); 4616 uint32_t vl = env->vl; 4617 int i; 4618 4619 for (i = env->vstart; i < vl; i++) { 4620 if (vm || vext_elem_mask(v0, i)) { 4621 if (vext_elem_mask(vs2, i)) { 4622 cnt++; 4623 } 4624 } 4625 } 4626 env->vstart = 0; 4627 return cnt; 4628 } 4629 4630 /* vfirst find-first-set mask bit */ 4631 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4632 uint32_t desc) 4633 { 4634 uint32_t vm = vext_vm(desc); 4635 uint32_t vl = env->vl; 4636 int i; 4637 4638 for (i = env->vstart; i < vl; i++) { 4639 if (vm || vext_elem_mask(v0, i)) { 4640 if (vext_elem_mask(vs2, i)) { 4641 return i; 4642 } 4643 } 4644 } 4645 env->vstart = 0; 4646 return -1LL; 4647 } 4648 4649 enum set_mask_type { 4650 ONLY_FIRST = 1, 4651 INCLUDE_FIRST, 4652 BEFORE_FIRST, 4653 }; 4654 4655 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4656 uint32_t desc, enum set_mask_type type) 4657 { 4658 uint32_t vm = vext_vm(desc); 4659 uint32_t vl = env->vl; 4660 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4661 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4662 uint32_t vma = vext_vma(desc); 4663 int i; 4664 bool first_mask_bit = false; 4665 4666 for (i = env->vstart; i < vl; i++) { 4667 if (!vm && !vext_elem_mask(v0, i)) { 4668 /* set masked-off elements to 1s */ 4669 if (vma) { 4670 vext_set_elem_mask(vd, i, 1); 4671 } 4672 continue; 4673 } 4674 /* write a zero to all following active elements */ 4675 if (first_mask_bit) { 4676 vext_set_elem_mask(vd, i, 0); 4677 continue; 4678 } 4679 if (vext_elem_mask(vs2, i)) { 4680 first_mask_bit = true; 4681 if (type == BEFORE_FIRST) { 4682 vext_set_elem_mask(vd, i, 0); 4683 } else { 4684 vext_set_elem_mask(vd, i, 1); 4685 } 4686 } else { 4687 if (type == ONLY_FIRST) { 4688 vext_set_elem_mask(vd, i, 0); 4689 } else { 4690 vext_set_elem_mask(vd, i, 1); 4691 } 4692 } 4693 } 4694 env->vstart = 0; 4695 /* 4696 * mask destination register are always tail-agnostic 4697 * set tail elements to 1s 4698 */ 4699 if (vta_all_1s) { 4700 for (; i < total_elems; i++) { 4701 vext_set_elem_mask(vd, i, 1); 4702 } 4703 } 4704 } 4705 4706 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4707 uint32_t desc) 4708 { 4709 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4710 } 4711 4712 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4713 uint32_t desc) 4714 { 4715 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4716 } 4717 4718 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4719 uint32_t desc) 4720 { 4721 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4722 } 4723 4724 /* Vector Iota Instruction */ 4725 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4726 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4727 uint32_t desc) \ 4728 { \ 4729 uint32_t vm = vext_vm(desc); \ 4730 uint32_t vl = env->vl; \ 4731 uint32_t esz = sizeof(ETYPE); \ 4732 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4733 uint32_t vta = vext_vta(desc); \ 4734 uint32_t vma = vext_vma(desc); \ 4735 uint32_t sum = 0; \ 4736 int i; \ 4737 \ 4738 for (i = env->vstart; i < vl; i++) { \ 4739 if (!vm && !vext_elem_mask(v0, i)) { \ 4740 /* set masked-off elements to 1s */ \ 4741 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4742 continue; \ 4743 } \ 4744 *((ETYPE *)vd + H(i)) = sum; \ 4745 if (vext_elem_mask(vs2, i)) { \ 4746 sum++; \ 4747 } \ 4748 } \ 4749 env->vstart = 0; \ 4750 /* set tail elements to 1s */ \ 4751 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4752 } 4753 4754 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4755 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4756 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4757 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4758 4759 /* Vector Element Index Instruction */ 4760 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4761 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4762 { \ 4763 uint32_t vm = vext_vm(desc); \ 4764 uint32_t vl = env->vl; \ 4765 uint32_t esz = sizeof(ETYPE); \ 4766 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4767 uint32_t vta = vext_vta(desc); \ 4768 uint32_t vma = vext_vma(desc); \ 4769 int i; \ 4770 \ 4771 for (i = env->vstart; i < vl; i++) { \ 4772 if (!vm && !vext_elem_mask(v0, i)) { \ 4773 /* set masked-off elements to 1s */ \ 4774 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4775 continue; \ 4776 } \ 4777 *((ETYPE *)vd + H(i)) = i; \ 4778 } \ 4779 env->vstart = 0; \ 4780 /* set tail elements to 1s */ \ 4781 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4782 } 4783 4784 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4785 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4786 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4787 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4788 4789 /* 4790 * Vector Permutation Instructions 4791 */ 4792 4793 /* Vector Slide Instructions */ 4794 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4795 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4796 CPURISCVState *env, uint32_t desc) \ 4797 { \ 4798 uint32_t vm = vext_vm(desc); \ 4799 uint32_t vl = env->vl; \ 4800 uint32_t esz = sizeof(ETYPE); \ 4801 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4802 uint32_t vta = vext_vta(desc); \ 4803 uint32_t vma = vext_vma(desc); \ 4804 target_ulong offset = s1, i_min, i; \ 4805 \ 4806 i_min = MAX(env->vstart, offset); \ 4807 for (i = i_min; i < vl; i++) { \ 4808 if (!vm && !vext_elem_mask(v0, i)) { \ 4809 /* set masked-off elements to 1s */ \ 4810 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4811 continue; \ 4812 } \ 4813 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4814 } \ 4815 /* set tail elements to 1s */ \ 4816 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4817 } 4818 4819 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4820 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4821 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4822 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4823 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4824 4825 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4826 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4827 CPURISCVState *env, uint32_t desc) \ 4828 { \ 4829 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4830 uint32_t vm = vext_vm(desc); \ 4831 uint32_t vl = env->vl; \ 4832 uint32_t esz = sizeof(ETYPE); \ 4833 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4834 uint32_t vta = vext_vta(desc); \ 4835 uint32_t vma = vext_vma(desc); \ 4836 target_ulong i_max, i; \ 4837 \ 4838 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \ 4839 for (i = env->vstart; i < i_max; ++i) { \ 4840 if (!vm && !vext_elem_mask(v0, i)) { \ 4841 /* set masked-off elements to 1s */ \ 4842 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4843 continue; \ 4844 } \ 4845 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4846 } \ 4847 \ 4848 for (i = i_max; i < vl; ++i) { \ 4849 if (vm || vext_elem_mask(v0, i)) { \ 4850 *((ETYPE *)vd + H(i)) = 0; \ 4851 } \ 4852 } \ 4853 \ 4854 env->vstart = 0; \ 4855 /* set tail elements to 1s */ \ 4856 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4857 } 4858 4859 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4860 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4861 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4862 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4863 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4864 4865 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4866 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4867 void *vs2, CPURISCVState *env, \ 4868 uint32_t desc) \ 4869 { \ 4870 typedef uint##BITWIDTH##_t ETYPE; \ 4871 uint32_t vm = vext_vm(desc); \ 4872 uint32_t vl = env->vl; \ 4873 uint32_t esz = sizeof(ETYPE); \ 4874 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4875 uint32_t vta = vext_vta(desc); \ 4876 uint32_t vma = vext_vma(desc); \ 4877 uint32_t i; \ 4878 \ 4879 for (i = env->vstart; i < vl; i++) { \ 4880 if (!vm && !vext_elem_mask(v0, i)) { \ 4881 /* set masked-off elements to 1s */ \ 4882 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4883 continue; \ 4884 } \ 4885 if (i == 0) { \ 4886 *((ETYPE *)vd + H(i)) = s1; \ 4887 } else { \ 4888 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4889 } \ 4890 } \ 4891 env->vstart = 0; \ 4892 /* set tail elements to 1s */ \ 4893 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4894 } 4895 4896 GEN_VEXT_VSLIE1UP(8, H1) 4897 GEN_VEXT_VSLIE1UP(16, H2) 4898 GEN_VEXT_VSLIE1UP(32, H4) 4899 GEN_VEXT_VSLIE1UP(64, H8) 4900 4901 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4902 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4903 CPURISCVState *env, uint32_t desc) \ 4904 { \ 4905 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4906 } 4907 4908 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4909 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4910 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4911 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4912 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4913 4914 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4915 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4916 void *vs2, CPURISCVState *env, \ 4917 uint32_t desc) \ 4918 { \ 4919 typedef uint##BITWIDTH##_t ETYPE; \ 4920 uint32_t vm = vext_vm(desc); \ 4921 uint32_t vl = env->vl; \ 4922 uint32_t esz = sizeof(ETYPE); \ 4923 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4924 uint32_t vta = vext_vta(desc); \ 4925 uint32_t vma = vext_vma(desc); \ 4926 uint32_t i; \ 4927 \ 4928 for (i = env->vstart; i < vl; i++) { \ 4929 if (!vm && !vext_elem_mask(v0, i)) { \ 4930 /* set masked-off elements to 1s */ \ 4931 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4932 continue; \ 4933 } \ 4934 if (i == vl - 1) { \ 4935 *((ETYPE *)vd + H(i)) = s1; \ 4936 } else { \ 4937 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4938 } \ 4939 } \ 4940 env->vstart = 0; \ 4941 /* set tail elements to 1s */ \ 4942 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4943 } 4944 4945 GEN_VEXT_VSLIDE1DOWN(8, H1) 4946 GEN_VEXT_VSLIDE1DOWN(16, H2) 4947 GEN_VEXT_VSLIDE1DOWN(32, H4) 4948 GEN_VEXT_VSLIDE1DOWN(64, H8) 4949 4950 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4951 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4952 CPURISCVState *env, uint32_t desc) \ 4953 { \ 4954 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4955 } 4956 4957 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4958 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4959 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4960 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4961 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4962 4963 /* Vector Floating-Point Slide Instructions */ 4964 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4965 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4966 CPURISCVState *env, uint32_t desc) \ 4967 { \ 4968 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4969 } 4970 4971 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4972 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4973 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4974 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4975 4976 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4977 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4978 CPURISCVState *env, uint32_t desc) \ 4979 { \ 4980 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4981 } 4982 4983 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4984 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4985 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4986 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4987 4988 /* Vector Register Gather Instruction */ 4989 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4990 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4991 CPURISCVState *env, uint32_t desc) \ 4992 { \ 4993 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4994 uint32_t vm = vext_vm(desc); \ 4995 uint32_t vl = env->vl; \ 4996 uint32_t esz = sizeof(TS2); \ 4997 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4998 uint32_t vta = vext_vta(desc); \ 4999 uint32_t vma = vext_vma(desc); \ 5000 uint64_t index; \ 5001 uint32_t i; \ 5002 \ 5003 for (i = env->vstart; i < vl; i++) { \ 5004 if (!vm && !vext_elem_mask(v0, i)) { \ 5005 /* set masked-off elements to 1s */ \ 5006 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5007 continue; \ 5008 } \ 5009 index = *((TS1 *)vs1 + HS1(i)); \ 5010 if (index >= vlmax) { \ 5011 *((TS2 *)vd + HS2(i)) = 0; \ 5012 } else { \ 5013 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 5014 } \ 5015 } \ 5016 env->vstart = 0; \ 5017 /* set tail elements to 1s */ \ 5018 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5019 } 5020 5021 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 5022 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 5023 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 5024 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 5025 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 5026 5027 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 5028 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 5029 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 5030 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 5031 5032 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 5033 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 5034 CPURISCVState *env, uint32_t desc) \ 5035 { \ 5036 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 5037 uint32_t vm = vext_vm(desc); \ 5038 uint32_t vl = env->vl; \ 5039 uint32_t esz = sizeof(ETYPE); \ 5040 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5041 uint32_t vta = vext_vta(desc); \ 5042 uint32_t vma = vext_vma(desc); \ 5043 uint64_t index = s1; \ 5044 uint32_t i; \ 5045 \ 5046 for (i = env->vstart; i < vl; i++) { \ 5047 if (!vm && !vext_elem_mask(v0, i)) { \ 5048 /* set masked-off elements to 1s */ \ 5049 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5050 continue; \ 5051 } \ 5052 if (index >= vlmax) { \ 5053 *((ETYPE *)vd + H(i)) = 0; \ 5054 } else { \ 5055 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5056 } \ 5057 } \ 5058 env->vstart = 0; \ 5059 /* set tail elements to 1s */ \ 5060 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5061 } 5062 5063 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5064 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5065 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5066 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5067 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5068 5069 /* Vector Compress Instruction */ 5070 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5071 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5072 CPURISCVState *env, uint32_t desc) \ 5073 { \ 5074 uint32_t vl = env->vl; \ 5075 uint32_t esz = sizeof(ETYPE); \ 5076 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5077 uint32_t vta = vext_vta(desc); \ 5078 uint32_t num = 0, i; \ 5079 \ 5080 for (i = env->vstart; i < vl; i++) { \ 5081 if (!vext_elem_mask(vs1, i)) { \ 5082 continue; \ 5083 } \ 5084 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5085 num++; \ 5086 } \ 5087 env->vstart = 0; \ 5088 /* set tail elements to 1s */ \ 5089 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5090 } 5091 5092 /* Compress into vd elements of vs2 where vs1 is enabled */ 5093 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5094 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5095 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5096 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5097 5098 /* Vector Whole Register Move */ 5099 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5100 { 5101 /* EEW = SEW */ 5102 uint32_t maxsz = simd_maxsz(desc); 5103 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5104 uint32_t startb = env->vstart * sewb; 5105 uint32_t i = startb; 5106 5107 memcpy((uint8_t *)vd + H1(i), 5108 (uint8_t *)vs2 + H1(i), 5109 maxsz - startb); 5110 5111 env->vstart = 0; 5112 } 5113 5114 /* Vector Integer Extension */ 5115 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5116 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5117 CPURISCVState *env, uint32_t desc) \ 5118 { \ 5119 uint32_t vl = env->vl; \ 5120 uint32_t vm = vext_vm(desc); \ 5121 uint32_t esz = sizeof(ETYPE); \ 5122 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5123 uint32_t vta = vext_vta(desc); \ 5124 uint32_t vma = vext_vma(desc); \ 5125 uint32_t i; \ 5126 \ 5127 for (i = env->vstart; i < vl; i++) { \ 5128 if (!vm && !vext_elem_mask(v0, i)) { \ 5129 /* set masked-off elements to 1s */ \ 5130 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5131 continue; \ 5132 } \ 5133 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5134 } \ 5135 env->vstart = 0; \ 5136 /* set tail elements to 1s */ \ 5137 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5138 } 5139 5140 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5141 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5142 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5143 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5144 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5145 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5146 5147 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5148 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5149 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5150 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5151 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5152 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5153