1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "tcg/tcg-gvec-desc.h" 29 #include "internals.h" 30 #include "vector_internals.h" 31 #include <math.h> 32 33 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 34 target_ulong s2) 35 { 36 int vlmax, vl; 37 RISCVCPU *cpu = env_archcpu(env); 38 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 39 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 40 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 41 int xlen = riscv_cpu_xlen(env); 42 bool vill = (s2 >> (xlen - 1)) & 0x1; 43 target_ulong reserved = s2 & 44 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 45 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 46 47 if (lmul & 4) { 48 /* Fractional LMUL - check LMUL * VLEN >= SEW */ 49 if (lmul == 4 || 50 cpu->cfg.vlen >> (8 - lmul) < sew) { 51 vill = true; 52 } 53 } 54 55 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 56 /* only set vill bit. */ 57 env->vill = 1; 58 env->vtype = 0; 59 env->vl = 0; 60 env->vstart = 0; 61 return 0; 62 } 63 64 vlmax = vext_get_vlmax(cpu, s2); 65 if (s1 <= vlmax) { 66 vl = s1; 67 } else { 68 vl = vlmax; 69 } 70 env->vl = vl; 71 env->vtype = s2; 72 env->vstart = 0; 73 env->vill = 0; 74 return vl; 75 } 76 77 /* 78 * Get the maximum number of elements can be operated. 79 * 80 * log2_esz: log2 of element size in bytes. 81 */ 82 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 83 { 84 /* 85 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 86 * so vlen in bytes (vlenb) is encoded as maxsz. 87 */ 88 uint32_t vlenb = simd_maxsz(desc); 89 90 /* Return VLMAX */ 91 int scale = vext_lmul(desc) - log2_esz; 92 return scale < 0 ? vlenb >> -scale : vlenb << scale; 93 } 94 95 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 96 { 97 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 98 } 99 100 /* 101 * This function checks watchpoint before real load operation. 102 * 103 * In system mode, the TLB API probe_access is enough for watchpoint check. 104 * In user mode, there is no watchpoint support now. 105 * 106 * It will trigger an exception if there is no mapping in TLB 107 * and page table walk can't fill the TLB entry. Then the guest 108 * software can return here after process the exception or never return. 109 */ 110 static void probe_pages(CPURISCVState *env, target_ulong addr, 111 target_ulong len, uintptr_t ra, 112 MMUAccessType access_type) 113 { 114 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 115 target_ulong curlen = MIN(pagelen, len); 116 117 probe_access(env, adjust_addr(env, addr), curlen, access_type, 118 cpu_mmu_index(env, false), ra); 119 if (len > curlen) { 120 addr += curlen; 121 curlen = len - curlen; 122 probe_access(env, adjust_addr(env, addr), curlen, access_type, 123 cpu_mmu_index(env, false), ra); 124 } 125 } 126 127 static inline void vext_set_elem_mask(void *v0, int index, 128 uint8_t value) 129 { 130 int idx = index / 64; 131 int pos = index % 64; 132 uint64_t old = ((uint64_t *)v0)[idx]; 133 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 134 } 135 136 /* elements operations for load and store */ 137 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 138 uint32_t idx, void *vd, uintptr_t retaddr); 139 140 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 141 static void NAME(CPURISCVState *env, abi_ptr addr, \ 142 uint32_t idx, void *vd, uintptr_t retaddr)\ 143 { \ 144 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 145 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 146 } \ 147 148 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 149 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 150 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 151 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 152 153 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 154 static void NAME(CPURISCVState *env, abi_ptr addr, \ 155 uint32_t idx, void *vd, uintptr_t retaddr)\ 156 { \ 157 ETYPE data = *((ETYPE *)vd + H(idx)); \ 158 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 159 } 160 161 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 162 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 163 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 164 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 165 166 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 167 uint32_t desc, uint32_t nf, 168 uint32_t esz, uint32_t max_elems) 169 { 170 uint32_t vta = vext_vta(desc); 171 int k; 172 173 if (vta == 0) { 174 return; 175 } 176 177 for (k = 0; k < nf; ++k) { 178 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 179 (k * max_elems + max_elems) * esz); 180 } 181 } 182 183 /* 184 * stride: access vector element from strided memory 185 */ 186 static void 187 vext_ldst_stride(void *vd, void *v0, target_ulong base, 188 target_ulong stride, CPURISCVState *env, 189 uint32_t desc, uint32_t vm, 190 vext_ldst_elem_fn *ldst_elem, 191 uint32_t log2_esz, uintptr_t ra) 192 { 193 uint32_t i, k; 194 uint32_t nf = vext_nf(desc); 195 uint32_t max_elems = vext_max_elems(desc, log2_esz); 196 uint32_t esz = 1 << log2_esz; 197 uint32_t vma = vext_vma(desc); 198 199 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 200 k = 0; 201 while (k < nf) { 202 if (!vm && !vext_elem_mask(v0, i)) { 203 /* set masked-off elements to 1s */ 204 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 205 (i + k * max_elems + 1) * esz); 206 k++; 207 continue; 208 } 209 target_ulong addr = base + stride * i + (k << log2_esz); 210 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 211 k++; 212 } 213 } 214 env->vstart = 0; 215 216 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 217 } 218 219 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 220 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 221 target_ulong stride, CPURISCVState *env, \ 222 uint32_t desc) \ 223 { \ 224 uint32_t vm = vext_vm(desc); \ 225 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 226 ctzl(sizeof(ETYPE)), GETPC()); \ 227 } 228 229 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 230 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 231 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 232 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 233 234 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 235 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 236 target_ulong stride, CPURISCVState *env, \ 237 uint32_t desc) \ 238 { \ 239 uint32_t vm = vext_vm(desc); \ 240 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 241 ctzl(sizeof(ETYPE)), GETPC()); \ 242 } 243 244 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 245 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 246 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 247 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 248 249 /* 250 * unit-stride: access elements stored contiguously in memory 251 */ 252 253 /* unmasked unit-stride load and store operation */ 254 static void 255 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 256 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 257 uintptr_t ra) 258 { 259 uint32_t i, k; 260 uint32_t nf = vext_nf(desc); 261 uint32_t max_elems = vext_max_elems(desc, log2_esz); 262 uint32_t esz = 1 << log2_esz; 263 264 /* load bytes from guest memory */ 265 for (i = env->vstart; i < evl; i++, env->vstart++) { 266 k = 0; 267 while (k < nf) { 268 target_ulong addr = base + ((i * nf + k) << log2_esz); 269 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 270 k++; 271 } 272 } 273 env->vstart = 0; 274 275 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 276 } 277 278 /* 279 * masked unit-stride load and store operation will be a special case of 280 * stride, stride = NF * sizeof (ETYPE) 281 */ 282 283 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 284 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 285 CPURISCVState *env, uint32_t desc) \ 286 { \ 287 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 288 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 289 ctzl(sizeof(ETYPE)), GETPC()); \ 290 } \ 291 \ 292 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 293 CPURISCVState *env, uint32_t desc) \ 294 { \ 295 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 296 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 297 } 298 299 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 300 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 301 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 302 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 303 304 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 305 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 306 CPURISCVState *env, uint32_t desc) \ 307 { \ 308 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 309 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 310 ctzl(sizeof(ETYPE)), GETPC()); \ 311 } \ 312 \ 313 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 314 CPURISCVState *env, uint32_t desc) \ 315 { \ 316 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 317 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 318 } 319 320 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 321 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 322 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 323 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 324 325 /* 326 * unit stride mask load and store, EEW = 1 327 */ 328 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 329 CPURISCVState *env, uint32_t desc) 330 { 331 /* evl = ceil(vl/8) */ 332 uint8_t evl = (env->vl + 7) >> 3; 333 vext_ldst_us(vd, base, env, desc, lde_b, 334 0, evl, GETPC()); 335 } 336 337 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 338 CPURISCVState *env, uint32_t desc) 339 { 340 /* evl = ceil(vl/8) */ 341 uint8_t evl = (env->vl + 7) >> 3; 342 vext_ldst_us(vd, base, env, desc, ste_b, 343 0, evl, GETPC()); 344 } 345 346 /* 347 * index: access vector element from indexed memory 348 */ 349 typedef target_ulong vext_get_index_addr(target_ulong base, 350 uint32_t idx, void *vs2); 351 352 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 353 static target_ulong NAME(target_ulong base, \ 354 uint32_t idx, void *vs2) \ 355 { \ 356 return (base + *((ETYPE *)vs2 + H(idx))); \ 357 } 358 359 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 360 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 361 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 362 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 363 364 static inline void 365 vext_ldst_index(void *vd, void *v0, target_ulong base, 366 void *vs2, CPURISCVState *env, uint32_t desc, 367 vext_get_index_addr get_index_addr, 368 vext_ldst_elem_fn *ldst_elem, 369 uint32_t log2_esz, uintptr_t ra) 370 { 371 uint32_t i, k; 372 uint32_t nf = vext_nf(desc); 373 uint32_t vm = vext_vm(desc); 374 uint32_t max_elems = vext_max_elems(desc, log2_esz); 375 uint32_t esz = 1 << log2_esz; 376 uint32_t vma = vext_vma(desc); 377 378 /* load bytes from guest memory */ 379 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 380 k = 0; 381 while (k < nf) { 382 if (!vm && !vext_elem_mask(v0, i)) { 383 /* set masked-off elements to 1s */ 384 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 385 (i + k * max_elems + 1) * esz); 386 k++; 387 continue; 388 } 389 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 390 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 391 k++; 392 } 393 } 394 env->vstart = 0; 395 396 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 397 } 398 399 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 400 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 401 void *vs2, CPURISCVState *env, uint32_t desc) \ 402 { \ 403 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 404 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 405 } 406 407 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 408 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 409 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 410 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 411 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 412 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 413 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 414 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 415 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 416 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 417 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 418 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 419 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 420 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 421 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 422 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 423 424 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 425 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 426 void *vs2, CPURISCVState *env, uint32_t desc) \ 427 { \ 428 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 429 STORE_FN, ctzl(sizeof(ETYPE)), \ 430 GETPC()); \ 431 } 432 433 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 434 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 435 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 436 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 437 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 438 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 439 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 440 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 441 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 442 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 443 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 444 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 445 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 446 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 447 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 448 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 449 450 /* 451 * unit-stride fault-only-fisrt load instructions 452 */ 453 static inline void 454 vext_ldff(void *vd, void *v0, target_ulong base, 455 CPURISCVState *env, uint32_t desc, 456 vext_ldst_elem_fn *ldst_elem, 457 uint32_t log2_esz, uintptr_t ra) 458 { 459 void *host; 460 uint32_t i, k, vl = 0; 461 uint32_t nf = vext_nf(desc); 462 uint32_t vm = vext_vm(desc); 463 uint32_t max_elems = vext_max_elems(desc, log2_esz); 464 uint32_t esz = 1 << log2_esz; 465 uint32_t vma = vext_vma(desc); 466 target_ulong addr, offset, remain; 467 468 /* probe every access */ 469 for (i = env->vstart; i < env->vl; i++) { 470 if (!vm && !vext_elem_mask(v0, i)) { 471 continue; 472 } 473 addr = adjust_addr(env, base + i * (nf << log2_esz)); 474 if (i == 0) { 475 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 476 } else { 477 /* if it triggers an exception, no need to check watchpoint */ 478 remain = nf << log2_esz; 479 while (remain > 0) { 480 offset = -(addr | TARGET_PAGE_MASK); 481 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 482 cpu_mmu_index(env, false)); 483 if (host) { 484 #ifdef CONFIG_USER_ONLY 485 if (!page_check_range(addr, offset, PAGE_READ)) { 486 vl = i; 487 goto ProbeSuccess; 488 } 489 #else 490 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 491 #endif 492 } else { 493 vl = i; 494 goto ProbeSuccess; 495 } 496 if (remain <= offset) { 497 break; 498 } 499 remain -= offset; 500 addr = adjust_addr(env, addr + offset); 501 } 502 } 503 } 504 ProbeSuccess: 505 /* load bytes from guest memory */ 506 if (vl != 0) { 507 env->vl = vl; 508 } 509 for (i = env->vstart; i < env->vl; i++) { 510 k = 0; 511 while (k < nf) { 512 if (!vm && !vext_elem_mask(v0, i)) { 513 /* set masked-off elements to 1s */ 514 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 515 (i + k * max_elems + 1) * esz); 516 k++; 517 continue; 518 } 519 addr = base + ((i * nf + k) << log2_esz); 520 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 521 k++; 522 } 523 } 524 env->vstart = 0; 525 526 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 527 } 528 529 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 530 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 531 CPURISCVState *env, uint32_t desc) \ 532 { \ 533 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 534 ctzl(sizeof(ETYPE)), GETPC()); \ 535 } 536 537 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 538 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 539 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 540 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 541 542 #define DO_SWAP(N, M) (M) 543 #define DO_AND(N, M) (N & M) 544 #define DO_XOR(N, M) (N ^ M) 545 #define DO_OR(N, M) (N | M) 546 #define DO_ADD(N, M) (N + M) 547 548 /* Signed min/max */ 549 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 550 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 551 552 /* 553 * load and store whole register instructions 554 */ 555 static void 556 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 557 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 558 { 559 uint32_t i, k, off, pos; 560 uint32_t nf = vext_nf(desc); 561 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 562 uint32_t max_elems = vlenb >> log2_esz; 563 564 k = env->vstart / max_elems; 565 off = env->vstart % max_elems; 566 567 if (off) { 568 /* load/store rest of elements of current segment pointed by vstart */ 569 for (pos = off; pos < max_elems; pos++, env->vstart++) { 570 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 571 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 572 ra); 573 } 574 k++; 575 } 576 577 /* load/store elements for rest of segments */ 578 for (; k < nf; k++) { 579 for (i = 0; i < max_elems; i++, env->vstart++) { 580 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 581 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 582 } 583 } 584 585 env->vstart = 0; 586 } 587 588 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 589 void HELPER(NAME)(void *vd, target_ulong base, \ 590 CPURISCVState *env, uint32_t desc) \ 591 { \ 592 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 593 ctzl(sizeof(ETYPE)), GETPC()); \ 594 } 595 596 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 597 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 598 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 599 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 600 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 601 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 602 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 603 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 604 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 605 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 606 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 607 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 608 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 609 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 610 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 611 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 612 613 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 614 void HELPER(NAME)(void *vd, target_ulong base, \ 615 CPURISCVState *env, uint32_t desc) \ 616 { \ 617 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 618 ctzl(sizeof(ETYPE)), GETPC()); \ 619 } 620 621 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 622 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 623 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 624 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 625 626 /* 627 * Vector Integer Arithmetic Instructions 628 */ 629 630 /* (TD, T1, T2, TX1, TX2) */ 631 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 632 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 633 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 634 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 635 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 636 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 637 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 638 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 639 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 640 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 641 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 642 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 643 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 644 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 645 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 646 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 647 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 648 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 649 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 650 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 651 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 652 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 653 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 654 655 #define DO_SUB(N, M) (N - M) 656 #define DO_RSUB(N, M) (M - N) 657 658 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 659 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 660 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 661 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 662 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 663 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 664 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 665 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 666 667 GEN_VEXT_VV(vadd_vv_b, 1) 668 GEN_VEXT_VV(vadd_vv_h, 2) 669 GEN_VEXT_VV(vadd_vv_w, 4) 670 GEN_VEXT_VV(vadd_vv_d, 8) 671 GEN_VEXT_VV(vsub_vv_b, 1) 672 GEN_VEXT_VV(vsub_vv_h, 2) 673 GEN_VEXT_VV(vsub_vv_w, 4) 674 GEN_VEXT_VV(vsub_vv_d, 8) 675 676 677 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 678 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 679 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 680 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 681 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 682 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 683 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 684 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 685 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 686 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 687 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 688 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 689 690 GEN_VEXT_VX(vadd_vx_b, 1) 691 GEN_VEXT_VX(vadd_vx_h, 2) 692 GEN_VEXT_VX(vadd_vx_w, 4) 693 GEN_VEXT_VX(vadd_vx_d, 8) 694 GEN_VEXT_VX(vsub_vx_b, 1) 695 GEN_VEXT_VX(vsub_vx_h, 2) 696 GEN_VEXT_VX(vsub_vx_w, 4) 697 GEN_VEXT_VX(vsub_vx_d, 8) 698 GEN_VEXT_VX(vrsub_vx_b, 1) 699 GEN_VEXT_VX(vrsub_vx_h, 2) 700 GEN_VEXT_VX(vrsub_vx_w, 4) 701 GEN_VEXT_VX(vrsub_vx_d, 8) 702 703 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 704 { 705 intptr_t oprsz = simd_oprsz(desc); 706 intptr_t i; 707 708 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 709 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 710 } 711 } 712 713 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 714 { 715 intptr_t oprsz = simd_oprsz(desc); 716 intptr_t i; 717 718 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 719 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 720 } 721 } 722 723 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 724 { 725 intptr_t oprsz = simd_oprsz(desc); 726 intptr_t i; 727 728 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 729 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 730 } 731 } 732 733 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 734 { 735 intptr_t oprsz = simd_oprsz(desc); 736 intptr_t i; 737 738 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 739 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 740 } 741 } 742 743 /* Vector Widening Integer Add/Subtract */ 744 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 745 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 746 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 747 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 748 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 749 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 750 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 751 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 752 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 753 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 754 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 755 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 756 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 757 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 758 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 759 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 760 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 761 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 762 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 763 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 764 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 765 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 766 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 767 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 768 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 769 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 770 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 771 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 772 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 773 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 774 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 775 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 776 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 777 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 778 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 779 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 780 GEN_VEXT_VV(vwaddu_vv_b, 2) 781 GEN_VEXT_VV(vwaddu_vv_h, 4) 782 GEN_VEXT_VV(vwaddu_vv_w, 8) 783 GEN_VEXT_VV(vwsubu_vv_b, 2) 784 GEN_VEXT_VV(vwsubu_vv_h, 4) 785 GEN_VEXT_VV(vwsubu_vv_w, 8) 786 GEN_VEXT_VV(vwadd_vv_b, 2) 787 GEN_VEXT_VV(vwadd_vv_h, 4) 788 GEN_VEXT_VV(vwadd_vv_w, 8) 789 GEN_VEXT_VV(vwsub_vv_b, 2) 790 GEN_VEXT_VV(vwsub_vv_h, 4) 791 GEN_VEXT_VV(vwsub_vv_w, 8) 792 GEN_VEXT_VV(vwaddu_wv_b, 2) 793 GEN_VEXT_VV(vwaddu_wv_h, 4) 794 GEN_VEXT_VV(vwaddu_wv_w, 8) 795 GEN_VEXT_VV(vwsubu_wv_b, 2) 796 GEN_VEXT_VV(vwsubu_wv_h, 4) 797 GEN_VEXT_VV(vwsubu_wv_w, 8) 798 GEN_VEXT_VV(vwadd_wv_b, 2) 799 GEN_VEXT_VV(vwadd_wv_h, 4) 800 GEN_VEXT_VV(vwadd_wv_w, 8) 801 GEN_VEXT_VV(vwsub_wv_b, 2) 802 GEN_VEXT_VV(vwsub_wv_h, 4) 803 GEN_VEXT_VV(vwsub_wv_w, 8) 804 805 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 806 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 807 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 808 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 809 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 810 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 811 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 812 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 813 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 814 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 815 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 816 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 817 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 818 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 819 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 820 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 821 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 822 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 823 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 824 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 825 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 826 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 827 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 828 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 829 GEN_VEXT_VX(vwaddu_vx_b, 2) 830 GEN_VEXT_VX(vwaddu_vx_h, 4) 831 GEN_VEXT_VX(vwaddu_vx_w, 8) 832 GEN_VEXT_VX(vwsubu_vx_b, 2) 833 GEN_VEXT_VX(vwsubu_vx_h, 4) 834 GEN_VEXT_VX(vwsubu_vx_w, 8) 835 GEN_VEXT_VX(vwadd_vx_b, 2) 836 GEN_VEXT_VX(vwadd_vx_h, 4) 837 GEN_VEXT_VX(vwadd_vx_w, 8) 838 GEN_VEXT_VX(vwsub_vx_b, 2) 839 GEN_VEXT_VX(vwsub_vx_h, 4) 840 GEN_VEXT_VX(vwsub_vx_w, 8) 841 GEN_VEXT_VX(vwaddu_wx_b, 2) 842 GEN_VEXT_VX(vwaddu_wx_h, 4) 843 GEN_VEXT_VX(vwaddu_wx_w, 8) 844 GEN_VEXT_VX(vwsubu_wx_b, 2) 845 GEN_VEXT_VX(vwsubu_wx_h, 4) 846 GEN_VEXT_VX(vwsubu_wx_w, 8) 847 GEN_VEXT_VX(vwadd_wx_b, 2) 848 GEN_VEXT_VX(vwadd_wx_h, 4) 849 GEN_VEXT_VX(vwadd_wx_w, 8) 850 GEN_VEXT_VX(vwsub_wx_b, 2) 851 GEN_VEXT_VX(vwsub_wx_h, 4) 852 GEN_VEXT_VX(vwsub_wx_w, 8) 853 854 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 855 #define DO_VADC(N, M, C) (N + M + C) 856 #define DO_VSBC(N, M, C) (N - M - C) 857 858 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 859 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 860 CPURISCVState *env, uint32_t desc) \ 861 { \ 862 uint32_t vl = env->vl; \ 863 uint32_t esz = sizeof(ETYPE); \ 864 uint32_t total_elems = \ 865 vext_get_total_elems(env, desc, esz); \ 866 uint32_t vta = vext_vta(desc); \ 867 uint32_t i; \ 868 \ 869 for (i = env->vstart; i < vl; i++) { \ 870 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 871 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 872 ETYPE carry = vext_elem_mask(v0, i); \ 873 \ 874 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 875 } \ 876 env->vstart = 0; \ 877 /* set tail elements to 1s */ \ 878 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 879 } 880 881 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 882 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 883 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 884 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 885 886 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 887 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 888 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 889 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 890 891 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 892 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 893 CPURISCVState *env, uint32_t desc) \ 894 { \ 895 uint32_t vl = env->vl; \ 896 uint32_t esz = sizeof(ETYPE); \ 897 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 898 uint32_t vta = vext_vta(desc); \ 899 uint32_t i; \ 900 \ 901 for (i = env->vstart; i < vl; i++) { \ 902 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 903 ETYPE carry = vext_elem_mask(v0, i); \ 904 \ 905 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 906 } \ 907 env->vstart = 0; \ 908 /* set tail elements to 1s */ \ 909 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 910 } 911 912 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 913 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 914 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 915 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 916 917 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 918 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 919 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 920 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 921 922 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 923 (__typeof(N))(N + M) < N) 924 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 925 926 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 927 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 928 CPURISCVState *env, uint32_t desc) \ 929 { \ 930 uint32_t vl = env->vl; \ 931 uint32_t vm = vext_vm(desc); \ 932 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 933 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 934 uint32_t i; \ 935 \ 936 for (i = env->vstart; i < vl; i++) { \ 937 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 938 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 939 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 940 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 941 } \ 942 env->vstart = 0; \ 943 /* 944 * mask destination register are always tail-agnostic 945 * set tail elements to 1s 946 */ \ 947 if (vta_all_1s) { \ 948 for (; i < total_elems; i++) { \ 949 vext_set_elem_mask(vd, i, 1); \ 950 } \ 951 } \ 952 } 953 954 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 955 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 956 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 957 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 958 959 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 960 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 961 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 962 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 963 964 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 965 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 966 void *vs2, CPURISCVState *env, uint32_t desc) \ 967 { \ 968 uint32_t vl = env->vl; \ 969 uint32_t vm = vext_vm(desc); \ 970 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 971 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 972 uint32_t i; \ 973 \ 974 for (i = env->vstart; i < vl; i++) { \ 975 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 976 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 977 vext_set_elem_mask(vd, i, \ 978 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 979 } \ 980 env->vstart = 0; \ 981 /* 982 * mask destination register are always tail-agnostic 983 * set tail elements to 1s 984 */ \ 985 if (vta_all_1s) { \ 986 for (; i < total_elems; i++) { \ 987 vext_set_elem_mask(vd, i, 1); \ 988 } \ 989 } \ 990 } 991 992 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 993 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 994 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 995 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 996 997 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 998 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 999 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1000 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1001 1002 /* Vector Bitwise Logical Instructions */ 1003 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1004 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1005 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1006 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1007 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1008 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1009 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1010 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1011 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1012 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1013 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1014 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1015 GEN_VEXT_VV(vand_vv_b, 1) 1016 GEN_VEXT_VV(vand_vv_h, 2) 1017 GEN_VEXT_VV(vand_vv_w, 4) 1018 GEN_VEXT_VV(vand_vv_d, 8) 1019 GEN_VEXT_VV(vor_vv_b, 1) 1020 GEN_VEXT_VV(vor_vv_h, 2) 1021 GEN_VEXT_VV(vor_vv_w, 4) 1022 GEN_VEXT_VV(vor_vv_d, 8) 1023 GEN_VEXT_VV(vxor_vv_b, 1) 1024 GEN_VEXT_VV(vxor_vv_h, 2) 1025 GEN_VEXT_VV(vxor_vv_w, 4) 1026 GEN_VEXT_VV(vxor_vv_d, 8) 1027 1028 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1029 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1030 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1031 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1032 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1033 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1034 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1035 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1036 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1037 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1038 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1039 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1040 GEN_VEXT_VX(vand_vx_b, 1) 1041 GEN_VEXT_VX(vand_vx_h, 2) 1042 GEN_VEXT_VX(vand_vx_w, 4) 1043 GEN_VEXT_VX(vand_vx_d, 8) 1044 GEN_VEXT_VX(vor_vx_b, 1) 1045 GEN_VEXT_VX(vor_vx_h, 2) 1046 GEN_VEXT_VX(vor_vx_w, 4) 1047 GEN_VEXT_VX(vor_vx_d, 8) 1048 GEN_VEXT_VX(vxor_vx_b, 1) 1049 GEN_VEXT_VX(vxor_vx_h, 2) 1050 GEN_VEXT_VX(vxor_vx_w, 4) 1051 GEN_VEXT_VX(vxor_vx_d, 8) 1052 1053 /* Vector Single-Width Bit Shift Instructions */ 1054 #define DO_SLL(N, M) (N << (M)) 1055 #define DO_SRL(N, M) (N >> (M)) 1056 1057 /* generate the helpers for shift instructions with two vector operators */ 1058 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1059 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1060 void *vs2, CPURISCVState *env, uint32_t desc) \ 1061 { \ 1062 uint32_t vm = vext_vm(desc); \ 1063 uint32_t vl = env->vl; \ 1064 uint32_t esz = sizeof(TS1); \ 1065 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1066 uint32_t vta = vext_vta(desc); \ 1067 uint32_t vma = vext_vma(desc); \ 1068 uint32_t i; \ 1069 \ 1070 for (i = env->vstart; i < vl; i++) { \ 1071 if (!vm && !vext_elem_mask(v0, i)) { \ 1072 /* set masked-off elements to 1s */ \ 1073 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1074 continue; \ 1075 } \ 1076 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1077 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1078 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1079 } \ 1080 env->vstart = 0; \ 1081 /* set tail elements to 1s */ \ 1082 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1083 } 1084 1085 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1086 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1087 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1088 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1089 1090 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1091 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1092 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1093 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1094 1095 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1096 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1097 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1098 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1099 1100 /* 1101 * generate the helpers for shift instructions with one vector and one scalar 1102 */ 1103 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1104 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1105 void *vs2, CPURISCVState *env, \ 1106 uint32_t desc) \ 1107 { \ 1108 uint32_t vm = vext_vm(desc); \ 1109 uint32_t vl = env->vl; \ 1110 uint32_t esz = sizeof(TD); \ 1111 uint32_t total_elems = \ 1112 vext_get_total_elems(env, desc, esz); \ 1113 uint32_t vta = vext_vta(desc); \ 1114 uint32_t vma = vext_vma(desc); \ 1115 uint32_t i; \ 1116 \ 1117 for (i = env->vstart; i < vl; i++) { \ 1118 if (!vm && !vext_elem_mask(v0, i)) { \ 1119 /* set masked-off elements to 1s */ \ 1120 vext_set_elems_1s(vd, vma, i * esz, \ 1121 (i + 1) * esz); \ 1122 continue; \ 1123 } \ 1124 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1125 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1126 } \ 1127 env->vstart = 0; \ 1128 /* set tail elements to 1s */ \ 1129 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1130 } 1131 1132 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1133 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1134 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1135 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1136 1137 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1138 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1139 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1140 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1141 1142 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1143 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1144 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1145 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1146 1147 /* Vector Narrowing Integer Right Shift Instructions */ 1148 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1149 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1150 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1151 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1152 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1153 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1154 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1155 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1156 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1157 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1158 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1159 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1160 1161 /* Vector Integer Comparison Instructions */ 1162 #define DO_MSEQ(N, M) (N == M) 1163 #define DO_MSNE(N, M) (N != M) 1164 #define DO_MSLT(N, M) (N < M) 1165 #define DO_MSLE(N, M) (N <= M) 1166 #define DO_MSGT(N, M) (N > M) 1167 1168 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1169 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1170 CPURISCVState *env, uint32_t desc) \ 1171 { \ 1172 uint32_t vm = vext_vm(desc); \ 1173 uint32_t vl = env->vl; \ 1174 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1175 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1176 uint32_t vma = vext_vma(desc); \ 1177 uint32_t i; \ 1178 \ 1179 for (i = env->vstart; i < vl; i++) { \ 1180 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1181 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1182 if (!vm && !vext_elem_mask(v0, i)) { \ 1183 /* set masked-off elements to 1s */ \ 1184 if (vma) { \ 1185 vext_set_elem_mask(vd, i, 1); \ 1186 } \ 1187 continue; \ 1188 } \ 1189 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1190 } \ 1191 env->vstart = 0; \ 1192 /* 1193 * mask destination register are always tail-agnostic 1194 * set tail elements to 1s 1195 */ \ 1196 if (vta_all_1s) { \ 1197 for (; i < total_elems; i++) { \ 1198 vext_set_elem_mask(vd, i, 1); \ 1199 } \ 1200 } \ 1201 } 1202 1203 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1204 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1205 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1206 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1207 1208 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1209 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1210 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1211 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1212 1213 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1214 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1215 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1216 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1217 1218 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1219 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1220 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1221 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1222 1223 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1224 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1225 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1226 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1227 1228 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1229 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1230 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1231 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1232 1233 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1234 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1235 CPURISCVState *env, uint32_t desc) \ 1236 { \ 1237 uint32_t vm = vext_vm(desc); \ 1238 uint32_t vl = env->vl; \ 1239 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1240 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1241 uint32_t vma = vext_vma(desc); \ 1242 uint32_t i; \ 1243 \ 1244 for (i = env->vstart; i < vl; i++) { \ 1245 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1246 if (!vm && !vext_elem_mask(v0, i)) { \ 1247 /* set masked-off elements to 1s */ \ 1248 if (vma) { \ 1249 vext_set_elem_mask(vd, i, 1); \ 1250 } \ 1251 continue; \ 1252 } \ 1253 vext_set_elem_mask(vd, i, \ 1254 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1255 } \ 1256 env->vstart = 0; \ 1257 /* 1258 * mask destination register are always tail-agnostic 1259 * set tail elements to 1s 1260 */ \ 1261 if (vta_all_1s) { \ 1262 for (; i < total_elems; i++) { \ 1263 vext_set_elem_mask(vd, i, 1); \ 1264 } \ 1265 } \ 1266 } 1267 1268 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1269 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1270 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1271 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1272 1273 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1274 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1275 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1276 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1277 1278 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1279 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1280 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1281 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1282 1283 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1284 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1285 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1286 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1287 1288 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1289 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1290 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1291 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1292 1293 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1294 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1295 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1296 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1297 1298 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1299 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1300 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1301 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1302 1303 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1304 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1305 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1306 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1307 1308 /* Vector Integer Min/Max Instructions */ 1309 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1310 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1311 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1312 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1313 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1314 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1315 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1316 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1317 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1318 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1319 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1320 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1321 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1322 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1323 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1324 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1325 GEN_VEXT_VV(vminu_vv_b, 1) 1326 GEN_VEXT_VV(vminu_vv_h, 2) 1327 GEN_VEXT_VV(vminu_vv_w, 4) 1328 GEN_VEXT_VV(vminu_vv_d, 8) 1329 GEN_VEXT_VV(vmin_vv_b, 1) 1330 GEN_VEXT_VV(vmin_vv_h, 2) 1331 GEN_VEXT_VV(vmin_vv_w, 4) 1332 GEN_VEXT_VV(vmin_vv_d, 8) 1333 GEN_VEXT_VV(vmaxu_vv_b, 1) 1334 GEN_VEXT_VV(vmaxu_vv_h, 2) 1335 GEN_VEXT_VV(vmaxu_vv_w, 4) 1336 GEN_VEXT_VV(vmaxu_vv_d, 8) 1337 GEN_VEXT_VV(vmax_vv_b, 1) 1338 GEN_VEXT_VV(vmax_vv_h, 2) 1339 GEN_VEXT_VV(vmax_vv_w, 4) 1340 GEN_VEXT_VV(vmax_vv_d, 8) 1341 1342 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1343 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1344 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1345 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1346 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1347 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1348 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1349 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1350 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1351 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1352 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1353 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1354 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1355 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1356 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1357 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1358 GEN_VEXT_VX(vminu_vx_b, 1) 1359 GEN_VEXT_VX(vminu_vx_h, 2) 1360 GEN_VEXT_VX(vminu_vx_w, 4) 1361 GEN_VEXT_VX(vminu_vx_d, 8) 1362 GEN_VEXT_VX(vmin_vx_b, 1) 1363 GEN_VEXT_VX(vmin_vx_h, 2) 1364 GEN_VEXT_VX(vmin_vx_w, 4) 1365 GEN_VEXT_VX(vmin_vx_d, 8) 1366 GEN_VEXT_VX(vmaxu_vx_b, 1) 1367 GEN_VEXT_VX(vmaxu_vx_h, 2) 1368 GEN_VEXT_VX(vmaxu_vx_w, 4) 1369 GEN_VEXT_VX(vmaxu_vx_d, 8) 1370 GEN_VEXT_VX(vmax_vx_b, 1) 1371 GEN_VEXT_VX(vmax_vx_h, 2) 1372 GEN_VEXT_VX(vmax_vx_w, 4) 1373 GEN_VEXT_VX(vmax_vx_d, 8) 1374 1375 /* Vector Single-Width Integer Multiply Instructions */ 1376 #define DO_MUL(N, M) (N * M) 1377 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1378 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1379 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1380 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1381 GEN_VEXT_VV(vmul_vv_b, 1) 1382 GEN_VEXT_VV(vmul_vv_h, 2) 1383 GEN_VEXT_VV(vmul_vv_w, 4) 1384 GEN_VEXT_VV(vmul_vv_d, 8) 1385 1386 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1387 { 1388 return (int16_t)s2 * (int16_t)s1 >> 8; 1389 } 1390 1391 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1392 { 1393 return (int32_t)s2 * (int32_t)s1 >> 16; 1394 } 1395 1396 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1397 { 1398 return (int64_t)s2 * (int64_t)s1 >> 32; 1399 } 1400 1401 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1402 { 1403 uint64_t hi_64, lo_64; 1404 1405 muls64(&lo_64, &hi_64, s1, s2); 1406 return hi_64; 1407 } 1408 1409 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1410 { 1411 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1412 } 1413 1414 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1415 { 1416 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1417 } 1418 1419 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1420 { 1421 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1422 } 1423 1424 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1425 { 1426 uint64_t hi_64, lo_64; 1427 1428 mulu64(&lo_64, &hi_64, s2, s1); 1429 return hi_64; 1430 } 1431 1432 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1433 { 1434 return (int16_t)s2 * (uint16_t)s1 >> 8; 1435 } 1436 1437 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1438 { 1439 return (int32_t)s2 * (uint32_t)s1 >> 16; 1440 } 1441 1442 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1443 { 1444 return (int64_t)s2 * (uint64_t)s1 >> 32; 1445 } 1446 1447 /* 1448 * Let A = signed operand, 1449 * B = unsigned operand 1450 * P = mulu64(A, B), unsigned product 1451 * 1452 * LET X = 2 ** 64 - A, 2's complement of A 1453 * SP = signed product 1454 * THEN 1455 * IF A < 0 1456 * SP = -X * B 1457 * = -(2 ** 64 - A) * B 1458 * = A * B - 2 ** 64 * B 1459 * = P - 2 ** 64 * B 1460 * ELSE 1461 * SP = P 1462 * THEN 1463 * HI_P -= (A < 0 ? B : 0) 1464 */ 1465 1466 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1467 { 1468 uint64_t hi_64, lo_64; 1469 1470 mulu64(&lo_64, &hi_64, s2, s1); 1471 1472 hi_64 -= s2 < 0 ? s1 : 0; 1473 return hi_64; 1474 } 1475 1476 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1477 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1478 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1479 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1480 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1481 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1482 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1483 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1484 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1485 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1486 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1487 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1488 GEN_VEXT_VV(vmulh_vv_b, 1) 1489 GEN_VEXT_VV(vmulh_vv_h, 2) 1490 GEN_VEXT_VV(vmulh_vv_w, 4) 1491 GEN_VEXT_VV(vmulh_vv_d, 8) 1492 GEN_VEXT_VV(vmulhu_vv_b, 1) 1493 GEN_VEXT_VV(vmulhu_vv_h, 2) 1494 GEN_VEXT_VV(vmulhu_vv_w, 4) 1495 GEN_VEXT_VV(vmulhu_vv_d, 8) 1496 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1497 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1498 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1499 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1500 1501 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1502 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1503 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1504 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1505 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1506 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1507 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1508 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1509 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1510 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1511 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1512 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1513 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1514 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1515 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1516 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1517 GEN_VEXT_VX(vmul_vx_b, 1) 1518 GEN_VEXT_VX(vmul_vx_h, 2) 1519 GEN_VEXT_VX(vmul_vx_w, 4) 1520 GEN_VEXT_VX(vmul_vx_d, 8) 1521 GEN_VEXT_VX(vmulh_vx_b, 1) 1522 GEN_VEXT_VX(vmulh_vx_h, 2) 1523 GEN_VEXT_VX(vmulh_vx_w, 4) 1524 GEN_VEXT_VX(vmulh_vx_d, 8) 1525 GEN_VEXT_VX(vmulhu_vx_b, 1) 1526 GEN_VEXT_VX(vmulhu_vx_h, 2) 1527 GEN_VEXT_VX(vmulhu_vx_w, 4) 1528 GEN_VEXT_VX(vmulhu_vx_d, 8) 1529 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1530 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1531 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1532 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1533 1534 /* Vector Integer Divide Instructions */ 1535 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1536 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1537 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1538 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1539 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1540 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1541 1542 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1543 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1544 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1545 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1546 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1547 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1548 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1549 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1550 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1551 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1552 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1553 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1554 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1555 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1556 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1557 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1558 GEN_VEXT_VV(vdivu_vv_b, 1) 1559 GEN_VEXT_VV(vdivu_vv_h, 2) 1560 GEN_VEXT_VV(vdivu_vv_w, 4) 1561 GEN_VEXT_VV(vdivu_vv_d, 8) 1562 GEN_VEXT_VV(vdiv_vv_b, 1) 1563 GEN_VEXT_VV(vdiv_vv_h, 2) 1564 GEN_VEXT_VV(vdiv_vv_w, 4) 1565 GEN_VEXT_VV(vdiv_vv_d, 8) 1566 GEN_VEXT_VV(vremu_vv_b, 1) 1567 GEN_VEXT_VV(vremu_vv_h, 2) 1568 GEN_VEXT_VV(vremu_vv_w, 4) 1569 GEN_VEXT_VV(vremu_vv_d, 8) 1570 GEN_VEXT_VV(vrem_vv_b, 1) 1571 GEN_VEXT_VV(vrem_vv_h, 2) 1572 GEN_VEXT_VV(vrem_vv_w, 4) 1573 GEN_VEXT_VV(vrem_vv_d, 8) 1574 1575 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1576 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1577 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1578 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1579 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1580 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1581 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1582 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1583 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1584 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1585 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1586 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1587 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1588 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1589 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1590 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1591 GEN_VEXT_VX(vdivu_vx_b, 1) 1592 GEN_VEXT_VX(vdivu_vx_h, 2) 1593 GEN_VEXT_VX(vdivu_vx_w, 4) 1594 GEN_VEXT_VX(vdivu_vx_d, 8) 1595 GEN_VEXT_VX(vdiv_vx_b, 1) 1596 GEN_VEXT_VX(vdiv_vx_h, 2) 1597 GEN_VEXT_VX(vdiv_vx_w, 4) 1598 GEN_VEXT_VX(vdiv_vx_d, 8) 1599 GEN_VEXT_VX(vremu_vx_b, 1) 1600 GEN_VEXT_VX(vremu_vx_h, 2) 1601 GEN_VEXT_VX(vremu_vx_w, 4) 1602 GEN_VEXT_VX(vremu_vx_d, 8) 1603 GEN_VEXT_VX(vrem_vx_b, 1) 1604 GEN_VEXT_VX(vrem_vx_h, 2) 1605 GEN_VEXT_VX(vrem_vx_w, 4) 1606 GEN_VEXT_VX(vrem_vx_d, 8) 1607 1608 /* Vector Widening Integer Multiply Instructions */ 1609 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1610 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1611 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1612 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1613 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1614 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1615 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1616 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1617 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1618 GEN_VEXT_VV(vwmul_vv_b, 2) 1619 GEN_VEXT_VV(vwmul_vv_h, 4) 1620 GEN_VEXT_VV(vwmul_vv_w, 8) 1621 GEN_VEXT_VV(vwmulu_vv_b, 2) 1622 GEN_VEXT_VV(vwmulu_vv_h, 4) 1623 GEN_VEXT_VV(vwmulu_vv_w, 8) 1624 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1625 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1626 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1627 1628 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1629 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1630 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1631 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1632 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1633 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1634 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1635 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1636 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1637 GEN_VEXT_VX(vwmul_vx_b, 2) 1638 GEN_VEXT_VX(vwmul_vx_h, 4) 1639 GEN_VEXT_VX(vwmul_vx_w, 8) 1640 GEN_VEXT_VX(vwmulu_vx_b, 2) 1641 GEN_VEXT_VX(vwmulu_vx_h, 4) 1642 GEN_VEXT_VX(vwmulu_vx_w, 8) 1643 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1644 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1645 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1646 1647 /* Vector Single-Width Integer Multiply-Add Instructions */ 1648 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1649 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1650 { \ 1651 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1652 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1653 TD d = *((TD *)vd + HD(i)); \ 1654 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1655 } 1656 1657 #define DO_MACC(N, M, D) (M * N + D) 1658 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1659 #define DO_MADD(N, M, D) (M * D + N) 1660 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1661 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1662 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1663 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1664 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1665 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1666 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1667 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1668 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1669 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1670 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1671 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1672 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1673 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1674 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1675 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1676 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1677 GEN_VEXT_VV(vmacc_vv_b, 1) 1678 GEN_VEXT_VV(vmacc_vv_h, 2) 1679 GEN_VEXT_VV(vmacc_vv_w, 4) 1680 GEN_VEXT_VV(vmacc_vv_d, 8) 1681 GEN_VEXT_VV(vnmsac_vv_b, 1) 1682 GEN_VEXT_VV(vnmsac_vv_h, 2) 1683 GEN_VEXT_VV(vnmsac_vv_w, 4) 1684 GEN_VEXT_VV(vnmsac_vv_d, 8) 1685 GEN_VEXT_VV(vmadd_vv_b, 1) 1686 GEN_VEXT_VV(vmadd_vv_h, 2) 1687 GEN_VEXT_VV(vmadd_vv_w, 4) 1688 GEN_VEXT_VV(vmadd_vv_d, 8) 1689 GEN_VEXT_VV(vnmsub_vv_b, 1) 1690 GEN_VEXT_VV(vnmsub_vv_h, 2) 1691 GEN_VEXT_VV(vnmsub_vv_w, 4) 1692 GEN_VEXT_VV(vnmsub_vv_d, 8) 1693 1694 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1695 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1696 { \ 1697 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1698 TD d = *((TD *)vd + HD(i)); \ 1699 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1700 } 1701 1702 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1703 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1704 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1705 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1706 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1707 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1708 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1709 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1710 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1711 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1712 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1713 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1714 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1715 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1716 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1717 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1718 GEN_VEXT_VX(vmacc_vx_b, 1) 1719 GEN_VEXT_VX(vmacc_vx_h, 2) 1720 GEN_VEXT_VX(vmacc_vx_w, 4) 1721 GEN_VEXT_VX(vmacc_vx_d, 8) 1722 GEN_VEXT_VX(vnmsac_vx_b, 1) 1723 GEN_VEXT_VX(vnmsac_vx_h, 2) 1724 GEN_VEXT_VX(vnmsac_vx_w, 4) 1725 GEN_VEXT_VX(vnmsac_vx_d, 8) 1726 GEN_VEXT_VX(vmadd_vx_b, 1) 1727 GEN_VEXT_VX(vmadd_vx_h, 2) 1728 GEN_VEXT_VX(vmadd_vx_w, 4) 1729 GEN_VEXT_VX(vmadd_vx_d, 8) 1730 GEN_VEXT_VX(vnmsub_vx_b, 1) 1731 GEN_VEXT_VX(vnmsub_vx_h, 2) 1732 GEN_VEXT_VX(vnmsub_vx_w, 4) 1733 GEN_VEXT_VX(vnmsub_vx_d, 8) 1734 1735 /* Vector Widening Integer Multiply-Add Instructions */ 1736 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1737 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1738 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1739 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1740 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1741 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1742 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1743 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1744 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1745 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1746 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1747 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1748 GEN_VEXT_VV(vwmacc_vv_b, 2) 1749 GEN_VEXT_VV(vwmacc_vv_h, 4) 1750 GEN_VEXT_VV(vwmacc_vv_w, 8) 1751 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1752 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1753 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1754 1755 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1756 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1757 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1758 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1759 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1760 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1761 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1762 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1763 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1764 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1765 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1766 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1767 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1768 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1769 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1770 GEN_VEXT_VX(vwmacc_vx_b, 2) 1771 GEN_VEXT_VX(vwmacc_vx_h, 4) 1772 GEN_VEXT_VX(vwmacc_vx_w, 8) 1773 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1774 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1775 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1776 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1777 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1778 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1779 1780 /* Vector Integer Merge and Move Instructions */ 1781 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1782 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1783 uint32_t desc) \ 1784 { \ 1785 uint32_t vl = env->vl; \ 1786 uint32_t esz = sizeof(ETYPE); \ 1787 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1788 uint32_t vta = vext_vta(desc); \ 1789 uint32_t i; \ 1790 \ 1791 for (i = env->vstart; i < vl; i++) { \ 1792 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1793 *((ETYPE *)vd + H(i)) = s1; \ 1794 } \ 1795 env->vstart = 0; \ 1796 /* set tail elements to 1s */ \ 1797 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1798 } 1799 1800 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1801 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1802 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1803 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1804 1805 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1806 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1807 uint32_t desc) \ 1808 { \ 1809 uint32_t vl = env->vl; \ 1810 uint32_t esz = sizeof(ETYPE); \ 1811 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1812 uint32_t vta = vext_vta(desc); \ 1813 uint32_t i; \ 1814 \ 1815 for (i = env->vstart; i < vl; i++) { \ 1816 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1817 } \ 1818 env->vstart = 0; \ 1819 /* set tail elements to 1s */ \ 1820 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1821 } 1822 1823 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1824 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1825 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1826 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1827 1828 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1829 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1830 CPURISCVState *env, uint32_t desc) \ 1831 { \ 1832 uint32_t vl = env->vl; \ 1833 uint32_t esz = sizeof(ETYPE); \ 1834 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1835 uint32_t vta = vext_vta(desc); \ 1836 uint32_t i; \ 1837 \ 1838 for (i = env->vstart; i < vl; i++) { \ 1839 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1840 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1841 } \ 1842 env->vstart = 0; \ 1843 /* set tail elements to 1s */ \ 1844 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1845 } 1846 1847 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1848 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1849 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1851 1852 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1853 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1854 void *vs2, CPURISCVState *env, uint32_t desc) \ 1855 { \ 1856 uint32_t vl = env->vl; \ 1857 uint32_t esz = sizeof(ETYPE); \ 1858 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1859 uint32_t vta = vext_vta(desc); \ 1860 uint32_t i; \ 1861 \ 1862 for (i = env->vstart; i < vl; i++) { \ 1863 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1864 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1865 (ETYPE)(target_long)s1); \ 1866 *((ETYPE *)vd + H(i)) = d; \ 1867 } \ 1868 env->vstart = 0; \ 1869 /* set tail elements to 1s */ \ 1870 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1871 } 1872 1873 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1874 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1875 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1877 1878 /* 1879 * Vector Fixed-Point Arithmetic Instructions 1880 */ 1881 1882 /* Vector Single-Width Saturating Add and Subtract */ 1883 1884 /* 1885 * As fixed point instructions probably have round mode and saturation, 1886 * define common macros for fixed point here. 1887 */ 1888 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1889 CPURISCVState *env, int vxrm); 1890 1891 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1892 static inline void \ 1893 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1894 CPURISCVState *env, int vxrm) \ 1895 { \ 1896 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1897 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1898 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1899 } 1900 1901 static inline void 1902 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1903 CPURISCVState *env, 1904 uint32_t vl, uint32_t vm, int vxrm, 1905 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 1906 { 1907 for (uint32_t i = env->vstart; i < vl; i++) { 1908 if (!vm && !vext_elem_mask(v0, i)) { 1909 /* set masked-off elements to 1s */ 1910 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 1911 continue; 1912 } 1913 fn(vd, vs1, vs2, i, env, vxrm); 1914 } 1915 env->vstart = 0; 1916 } 1917 1918 static inline void 1919 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1920 CPURISCVState *env, 1921 uint32_t desc, 1922 opivv2_rm_fn *fn, uint32_t esz) 1923 { 1924 uint32_t vm = vext_vm(desc); 1925 uint32_t vl = env->vl; 1926 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 1927 uint32_t vta = vext_vta(desc); 1928 uint32_t vma = vext_vma(desc); 1929 1930 switch (env->vxrm) { 1931 case 0: /* rnu */ 1932 vext_vv_rm_1(vd, v0, vs1, vs2, 1933 env, vl, vm, 0, fn, vma, esz); 1934 break; 1935 case 1: /* rne */ 1936 vext_vv_rm_1(vd, v0, vs1, vs2, 1937 env, vl, vm, 1, fn, vma, esz); 1938 break; 1939 case 2: /* rdn */ 1940 vext_vv_rm_1(vd, v0, vs1, vs2, 1941 env, vl, vm, 2, fn, vma, esz); 1942 break; 1943 default: /* rod */ 1944 vext_vv_rm_1(vd, v0, vs1, vs2, 1945 env, vl, vm, 3, fn, vma, esz); 1946 break; 1947 } 1948 /* set tail elements to 1s */ 1949 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 1950 } 1951 1952 /* generate helpers for fixed point instructions with OPIVV format */ 1953 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 1954 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1955 CPURISCVState *env, uint32_t desc) \ 1956 { \ 1957 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1958 do_##NAME, ESZ); \ 1959 } 1960 1961 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 1962 uint8_t b) 1963 { 1964 uint8_t res = a + b; 1965 if (res < a) { 1966 res = UINT8_MAX; 1967 env->vxsat = 0x1; 1968 } 1969 return res; 1970 } 1971 1972 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1973 uint16_t b) 1974 { 1975 uint16_t res = a + b; 1976 if (res < a) { 1977 res = UINT16_MAX; 1978 env->vxsat = 0x1; 1979 } 1980 return res; 1981 } 1982 1983 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1984 uint32_t b) 1985 { 1986 uint32_t res = a + b; 1987 if (res < a) { 1988 res = UINT32_MAX; 1989 env->vxsat = 0x1; 1990 } 1991 return res; 1992 } 1993 1994 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1995 uint64_t b) 1996 { 1997 uint64_t res = a + b; 1998 if (res < a) { 1999 res = UINT64_MAX; 2000 env->vxsat = 0x1; 2001 } 2002 return res; 2003 } 2004 2005 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2006 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2007 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2008 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2009 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2010 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2011 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2012 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2013 2014 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2015 CPURISCVState *env, int vxrm); 2016 2017 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2018 static inline void \ 2019 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2020 CPURISCVState *env, int vxrm) \ 2021 { \ 2022 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2023 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2024 } 2025 2026 static inline void 2027 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2028 CPURISCVState *env, 2029 uint32_t vl, uint32_t vm, int vxrm, 2030 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2031 { 2032 for (uint32_t i = env->vstart; i < vl; i++) { 2033 if (!vm && !vext_elem_mask(v0, i)) { 2034 /* set masked-off elements to 1s */ 2035 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2036 continue; 2037 } 2038 fn(vd, s1, vs2, i, env, vxrm); 2039 } 2040 env->vstart = 0; 2041 } 2042 2043 static inline void 2044 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2045 CPURISCVState *env, 2046 uint32_t desc, 2047 opivx2_rm_fn *fn, uint32_t esz) 2048 { 2049 uint32_t vm = vext_vm(desc); 2050 uint32_t vl = env->vl; 2051 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2052 uint32_t vta = vext_vta(desc); 2053 uint32_t vma = vext_vma(desc); 2054 2055 switch (env->vxrm) { 2056 case 0: /* rnu */ 2057 vext_vx_rm_1(vd, v0, s1, vs2, 2058 env, vl, vm, 0, fn, vma, esz); 2059 break; 2060 case 1: /* rne */ 2061 vext_vx_rm_1(vd, v0, s1, vs2, 2062 env, vl, vm, 1, fn, vma, esz); 2063 break; 2064 case 2: /* rdn */ 2065 vext_vx_rm_1(vd, v0, s1, vs2, 2066 env, vl, vm, 2, fn, vma, esz); 2067 break; 2068 default: /* rod */ 2069 vext_vx_rm_1(vd, v0, s1, vs2, 2070 env, vl, vm, 3, fn, vma, esz); 2071 break; 2072 } 2073 /* set tail elements to 1s */ 2074 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2075 } 2076 2077 /* generate helpers for fixed point instructions with OPIVX format */ 2078 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2079 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2080 void *vs2, CPURISCVState *env, \ 2081 uint32_t desc) \ 2082 { \ 2083 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2084 do_##NAME, ESZ); \ 2085 } 2086 2087 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2088 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2089 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2090 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2091 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2092 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2093 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2094 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2095 2096 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2097 { 2098 int8_t res = a + b; 2099 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2100 res = a > 0 ? INT8_MAX : INT8_MIN; 2101 env->vxsat = 0x1; 2102 } 2103 return res; 2104 } 2105 2106 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2107 int16_t b) 2108 { 2109 int16_t res = a + b; 2110 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2111 res = a > 0 ? INT16_MAX : INT16_MIN; 2112 env->vxsat = 0x1; 2113 } 2114 return res; 2115 } 2116 2117 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2118 int32_t b) 2119 { 2120 int32_t res = a + b; 2121 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2122 res = a > 0 ? INT32_MAX : INT32_MIN; 2123 env->vxsat = 0x1; 2124 } 2125 return res; 2126 } 2127 2128 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2129 int64_t b) 2130 { 2131 int64_t res = a + b; 2132 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2133 res = a > 0 ? INT64_MAX : INT64_MIN; 2134 env->vxsat = 0x1; 2135 } 2136 return res; 2137 } 2138 2139 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2140 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2141 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2142 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2143 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2144 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2145 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2146 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2147 2148 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2149 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2150 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2151 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2152 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2153 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2154 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2155 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2156 2157 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2158 uint8_t b) 2159 { 2160 uint8_t res = a - b; 2161 if (res > a) { 2162 res = 0; 2163 env->vxsat = 0x1; 2164 } 2165 return res; 2166 } 2167 2168 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2169 uint16_t b) 2170 { 2171 uint16_t res = a - b; 2172 if (res > a) { 2173 res = 0; 2174 env->vxsat = 0x1; 2175 } 2176 return res; 2177 } 2178 2179 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2180 uint32_t b) 2181 { 2182 uint32_t res = a - b; 2183 if (res > a) { 2184 res = 0; 2185 env->vxsat = 0x1; 2186 } 2187 return res; 2188 } 2189 2190 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2191 uint64_t b) 2192 { 2193 uint64_t res = a - b; 2194 if (res > a) { 2195 res = 0; 2196 env->vxsat = 0x1; 2197 } 2198 return res; 2199 } 2200 2201 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2202 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2203 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2204 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2205 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2206 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2207 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2208 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2209 2210 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2211 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2212 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2213 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2214 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2215 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2216 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2217 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2218 2219 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2220 { 2221 int8_t res = a - b; 2222 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2223 res = a >= 0 ? INT8_MAX : INT8_MIN; 2224 env->vxsat = 0x1; 2225 } 2226 return res; 2227 } 2228 2229 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2230 int16_t b) 2231 { 2232 int16_t res = a - b; 2233 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2234 res = a >= 0 ? INT16_MAX : INT16_MIN; 2235 env->vxsat = 0x1; 2236 } 2237 return res; 2238 } 2239 2240 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2241 int32_t b) 2242 { 2243 int32_t res = a - b; 2244 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2245 res = a >= 0 ? INT32_MAX : INT32_MIN; 2246 env->vxsat = 0x1; 2247 } 2248 return res; 2249 } 2250 2251 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2252 int64_t b) 2253 { 2254 int64_t res = a - b; 2255 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2256 res = a >= 0 ? INT64_MAX : INT64_MIN; 2257 env->vxsat = 0x1; 2258 } 2259 return res; 2260 } 2261 2262 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2263 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2264 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2265 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2266 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2267 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2268 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2269 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2270 2271 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2272 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2273 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2274 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2275 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2276 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2277 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2278 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2279 2280 /* Vector Single-Width Averaging Add and Subtract */ 2281 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2282 { 2283 uint8_t d = extract64(v, shift, 1); 2284 uint8_t d1; 2285 uint64_t D1, D2; 2286 2287 if (shift == 0 || shift > 64) { 2288 return 0; 2289 } 2290 2291 d1 = extract64(v, shift - 1, 1); 2292 D1 = extract64(v, 0, shift); 2293 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2294 return d1; 2295 } else if (vxrm == 1) { /* round-to-nearest-even */ 2296 if (shift > 1) { 2297 D2 = extract64(v, 0, shift - 1); 2298 return d1 & ((D2 != 0) | d); 2299 } else { 2300 return d1 & d; 2301 } 2302 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2303 return !d & (D1 != 0); 2304 } 2305 return 0; /* round-down (truncate) */ 2306 } 2307 2308 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2309 int32_t b) 2310 { 2311 int64_t res = (int64_t)a + b; 2312 uint8_t round = get_round(vxrm, res, 1); 2313 2314 return (res >> 1) + round; 2315 } 2316 2317 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2318 int64_t b) 2319 { 2320 int64_t res = a + b; 2321 uint8_t round = get_round(vxrm, res, 1); 2322 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2323 2324 /* With signed overflow, bit 64 is inverse of bit 63. */ 2325 return ((res >> 1) ^ over) + round; 2326 } 2327 2328 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2329 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2330 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2331 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2332 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2333 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2334 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2335 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2336 2337 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2338 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2339 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2340 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2341 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2342 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2343 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2344 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2345 2346 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2347 uint32_t a, uint32_t b) 2348 { 2349 uint64_t res = (uint64_t)a + b; 2350 uint8_t round = get_round(vxrm, res, 1); 2351 2352 return (res >> 1) + round; 2353 } 2354 2355 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2356 uint64_t a, uint64_t b) 2357 { 2358 uint64_t res = a + b; 2359 uint8_t round = get_round(vxrm, res, 1); 2360 uint64_t over = (uint64_t)(res < a) << 63; 2361 2362 return ((res >> 1) | over) + round; 2363 } 2364 2365 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2366 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2367 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2368 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2369 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2370 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2371 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2372 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2373 2374 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2375 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2376 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2377 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2378 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2379 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2380 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2381 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2382 2383 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2384 int32_t b) 2385 { 2386 int64_t res = (int64_t)a - b; 2387 uint8_t round = get_round(vxrm, res, 1); 2388 2389 return (res >> 1) + round; 2390 } 2391 2392 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2393 int64_t b) 2394 { 2395 int64_t res = (int64_t)a - b; 2396 uint8_t round = get_round(vxrm, res, 1); 2397 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2398 2399 /* With signed overflow, bit 64 is inverse of bit 63. */ 2400 return ((res >> 1) ^ over) + round; 2401 } 2402 2403 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2404 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2405 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2406 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2407 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2408 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2409 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2410 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2411 2412 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2413 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2414 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2415 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2416 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2417 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2418 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2419 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2420 2421 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2422 uint32_t a, uint32_t b) 2423 { 2424 int64_t res = (int64_t)a - b; 2425 uint8_t round = get_round(vxrm, res, 1); 2426 2427 return (res >> 1) + round; 2428 } 2429 2430 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2431 uint64_t a, uint64_t b) 2432 { 2433 uint64_t res = (uint64_t)a - b; 2434 uint8_t round = get_round(vxrm, res, 1); 2435 uint64_t over = (uint64_t)(res > a) << 63; 2436 2437 return ((res >> 1) | over) + round; 2438 } 2439 2440 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2441 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2442 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2443 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2444 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2445 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2446 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2447 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2448 2449 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2450 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2451 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2452 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2453 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2454 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2455 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2456 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2457 2458 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2459 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2460 { 2461 uint8_t round; 2462 int16_t res; 2463 2464 res = (int16_t)a * (int16_t)b; 2465 round = get_round(vxrm, res, 7); 2466 res = (res >> 7) + round; 2467 2468 if (res > INT8_MAX) { 2469 env->vxsat = 0x1; 2470 return INT8_MAX; 2471 } else if (res < INT8_MIN) { 2472 env->vxsat = 0x1; 2473 return INT8_MIN; 2474 } else { 2475 return res; 2476 } 2477 } 2478 2479 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2480 { 2481 uint8_t round; 2482 int32_t res; 2483 2484 res = (int32_t)a * (int32_t)b; 2485 round = get_round(vxrm, res, 15); 2486 res = (res >> 15) + round; 2487 2488 if (res > INT16_MAX) { 2489 env->vxsat = 0x1; 2490 return INT16_MAX; 2491 } else if (res < INT16_MIN) { 2492 env->vxsat = 0x1; 2493 return INT16_MIN; 2494 } else { 2495 return res; 2496 } 2497 } 2498 2499 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2500 { 2501 uint8_t round; 2502 int64_t res; 2503 2504 res = (int64_t)a * (int64_t)b; 2505 round = get_round(vxrm, res, 31); 2506 res = (res >> 31) + round; 2507 2508 if (res > INT32_MAX) { 2509 env->vxsat = 0x1; 2510 return INT32_MAX; 2511 } else if (res < INT32_MIN) { 2512 env->vxsat = 0x1; 2513 return INT32_MIN; 2514 } else { 2515 return res; 2516 } 2517 } 2518 2519 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2520 { 2521 uint8_t round; 2522 uint64_t hi_64, lo_64; 2523 int64_t res; 2524 2525 if (a == INT64_MIN && b == INT64_MIN) { 2526 env->vxsat = 1; 2527 return INT64_MAX; 2528 } 2529 2530 muls64(&lo_64, &hi_64, a, b); 2531 round = get_round(vxrm, lo_64, 63); 2532 /* 2533 * Cannot overflow, as there are always 2534 * 2 sign bits after multiply. 2535 */ 2536 res = (hi_64 << 1) | (lo_64 >> 63); 2537 if (round) { 2538 if (res == INT64_MAX) { 2539 env->vxsat = 1; 2540 } else { 2541 res += 1; 2542 } 2543 } 2544 return res; 2545 } 2546 2547 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2548 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2549 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2550 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2551 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2552 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2553 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2554 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2555 2556 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2557 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2558 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2559 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2560 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2561 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2562 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2563 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2564 2565 /* Vector Single-Width Scaling Shift Instructions */ 2566 static inline uint8_t 2567 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2568 { 2569 uint8_t round, shift = b & 0x7; 2570 uint8_t res; 2571 2572 round = get_round(vxrm, a, shift); 2573 res = (a >> shift) + round; 2574 return res; 2575 } 2576 static inline uint16_t 2577 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2578 { 2579 uint8_t round, shift = b & 0xf; 2580 2581 round = get_round(vxrm, a, shift); 2582 return (a >> shift) + round; 2583 } 2584 static inline uint32_t 2585 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2586 { 2587 uint8_t round, shift = b & 0x1f; 2588 2589 round = get_round(vxrm, a, shift); 2590 return (a >> shift) + round; 2591 } 2592 static inline uint64_t 2593 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2594 { 2595 uint8_t round, shift = b & 0x3f; 2596 2597 round = get_round(vxrm, a, shift); 2598 return (a >> shift) + round; 2599 } 2600 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2601 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2602 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2603 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2604 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2605 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2606 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2607 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2608 2609 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2610 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2611 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2612 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2613 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2614 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2615 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2616 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2617 2618 static inline int8_t 2619 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2620 { 2621 uint8_t round, shift = b & 0x7; 2622 2623 round = get_round(vxrm, a, shift); 2624 return (a >> shift) + round; 2625 } 2626 static inline int16_t 2627 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2628 { 2629 uint8_t round, shift = b & 0xf; 2630 2631 round = get_round(vxrm, a, shift); 2632 return (a >> shift) + round; 2633 } 2634 static inline int32_t 2635 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2636 { 2637 uint8_t round, shift = b & 0x1f; 2638 2639 round = get_round(vxrm, a, shift); 2640 return (a >> shift) + round; 2641 } 2642 static inline int64_t 2643 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2644 { 2645 uint8_t round, shift = b & 0x3f; 2646 2647 round = get_round(vxrm, a, shift); 2648 return (a >> shift) + round; 2649 } 2650 2651 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2652 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2653 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2654 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2655 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2656 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2657 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2658 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2659 2660 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2661 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2662 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2663 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2664 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2665 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2666 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2667 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2668 2669 /* Vector Narrowing Fixed-Point Clip Instructions */ 2670 static inline int8_t 2671 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2672 { 2673 uint8_t round, shift = b & 0xf; 2674 int16_t res; 2675 2676 round = get_round(vxrm, a, shift); 2677 res = (a >> shift) + round; 2678 if (res > INT8_MAX) { 2679 env->vxsat = 0x1; 2680 return INT8_MAX; 2681 } else if (res < INT8_MIN) { 2682 env->vxsat = 0x1; 2683 return INT8_MIN; 2684 } else { 2685 return res; 2686 } 2687 } 2688 2689 static inline int16_t 2690 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2691 { 2692 uint8_t round, shift = b & 0x1f; 2693 int32_t res; 2694 2695 round = get_round(vxrm, a, shift); 2696 res = (a >> shift) + round; 2697 if (res > INT16_MAX) { 2698 env->vxsat = 0x1; 2699 return INT16_MAX; 2700 } else if (res < INT16_MIN) { 2701 env->vxsat = 0x1; 2702 return INT16_MIN; 2703 } else { 2704 return res; 2705 } 2706 } 2707 2708 static inline int32_t 2709 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2710 { 2711 uint8_t round, shift = b & 0x3f; 2712 int64_t res; 2713 2714 round = get_round(vxrm, a, shift); 2715 res = (a >> shift) + round; 2716 if (res > INT32_MAX) { 2717 env->vxsat = 0x1; 2718 return INT32_MAX; 2719 } else if (res < INT32_MIN) { 2720 env->vxsat = 0x1; 2721 return INT32_MIN; 2722 } else { 2723 return res; 2724 } 2725 } 2726 2727 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2728 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2729 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2730 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2731 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2732 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2733 2734 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2735 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2736 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2737 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2738 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2739 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2740 2741 static inline uint8_t 2742 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2743 { 2744 uint8_t round, shift = b & 0xf; 2745 uint16_t res; 2746 2747 round = get_round(vxrm, a, shift); 2748 res = (a >> shift) + round; 2749 if (res > UINT8_MAX) { 2750 env->vxsat = 0x1; 2751 return UINT8_MAX; 2752 } else { 2753 return res; 2754 } 2755 } 2756 2757 static inline uint16_t 2758 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2759 { 2760 uint8_t round, shift = b & 0x1f; 2761 uint32_t res; 2762 2763 round = get_round(vxrm, a, shift); 2764 res = (a >> shift) + round; 2765 if (res > UINT16_MAX) { 2766 env->vxsat = 0x1; 2767 return UINT16_MAX; 2768 } else { 2769 return res; 2770 } 2771 } 2772 2773 static inline uint32_t 2774 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2775 { 2776 uint8_t round, shift = b & 0x3f; 2777 uint64_t res; 2778 2779 round = get_round(vxrm, a, shift); 2780 res = (a >> shift) + round; 2781 if (res > UINT32_MAX) { 2782 env->vxsat = 0x1; 2783 return UINT32_MAX; 2784 } else { 2785 return res; 2786 } 2787 } 2788 2789 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2790 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2791 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2792 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2793 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2794 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2795 2796 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2797 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2798 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2799 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2800 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2801 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2802 2803 /* 2804 * Vector Float Point Arithmetic Instructions 2805 */ 2806 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2807 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2808 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2809 CPURISCVState *env) \ 2810 { \ 2811 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2812 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2813 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2814 } 2815 2816 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 2817 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2818 void *vs2, CPURISCVState *env, \ 2819 uint32_t desc) \ 2820 { \ 2821 uint32_t vm = vext_vm(desc); \ 2822 uint32_t vl = env->vl; \ 2823 uint32_t total_elems = \ 2824 vext_get_total_elems(env, desc, ESZ); \ 2825 uint32_t vta = vext_vta(desc); \ 2826 uint32_t vma = vext_vma(desc); \ 2827 uint32_t i; \ 2828 \ 2829 for (i = env->vstart; i < vl; i++) { \ 2830 if (!vm && !vext_elem_mask(v0, i)) { \ 2831 /* set masked-off elements to 1s */ \ 2832 vext_set_elems_1s(vd, vma, i * ESZ, \ 2833 (i + 1) * ESZ); \ 2834 continue; \ 2835 } \ 2836 do_##NAME(vd, vs1, vs2, i, env); \ 2837 } \ 2838 env->vstart = 0; \ 2839 /* set tail elements to 1s */ \ 2840 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2841 total_elems * ESZ); \ 2842 } 2843 2844 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2845 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2846 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2847 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 2848 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 2849 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 2850 2851 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2852 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2853 CPURISCVState *env) \ 2854 { \ 2855 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2856 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2857 } 2858 2859 #define GEN_VEXT_VF(NAME, ESZ) \ 2860 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2861 void *vs2, CPURISCVState *env, \ 2862 uint32_t desc) \ 2863 { \ 2864 uint32_t vm = vext_vm(desc); \ 2865 uint32_t vl = env->vl; \ 2866 uint32_t total_elems = \ 2867 vext_get_total_elems(env, desc, ESZ); \ 2868 uint32_t vta = vext_vta(desc); \ 2869 uint32_t vma = vext_vma(desc); \ 2870 uint32_t i; \ 2871 \ 2872 for (i = env->vstart; i < vl; i++) { \ 2873 if (!vm && !vext_elem_mask(v0, i)) { \ 2874 /* set masked-off elements to 1s */ \ 2875 vext_set_elems_1s(vd, vma, i * ESZ, \ 2876 (i + 1) * ESZ); \ 2877 continue; \ 2878 } \ 2879 do_##NAME(vd, s1, vs2, i, env); \ 2880 } \ 2881 env->vstart = 0; \ 2882 /* set tail elements to 1s */ \ 2883 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2884 total_elems * ESZ); \ 2885 } 2886 2887 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2888 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2889 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2890 GEN_VEXT_VF(vfadd_vf_h, 2) 2891 GEN_VEXT_VF(vfadd_vf_w, 4) 2892 GEN_VEXT_VF(vfadd_vf_d, 8) 2893 2894 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2895 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2896 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2897 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 2898 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 2899 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 2900 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2901 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2902 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2903 GEN_VEXT_VF(vfsub_vf_h, 2) 2904 GEN_VEXT_VF(vfsub_vf_w, 4) 2905 GEN_VEXT_VF(vfsub_vf_d, 8) 2906 2907 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2908 { 2909 return float16_sub(b, a, s); 2910 } 2911 2912 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2913 { 2914 return float32_sub(b, a, s); 2915 } 2916 2917 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2918 { 2919 return float64_sub(b, a, s); 2920 } 2921 2922 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2923 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2924 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2925 GEN_VEXT_VF(vfrsub_vf_h, 2) 2926 GEN_VEXT_VF(vfrsub_vf_w, 4) 2927 GEN_VEXT_VF(vfrsub_vf_d, 8) 2928 2929 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2930 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2931 { 2932 return float32_add(float16_to_float32(a, true, s), 2933 float16_to_float32(b, true, s), s); 2934 } 2935 2936 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2937 { 2938 return float64_add(float32_to_float64(a, s), 2939 float32_to_float64(b, s), s); 2940 2941 } 2942 2943 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2944 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2945 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 2946 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 2947 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2948 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2949 GEN_VEXT_VF(vfwadd_vf_h, 4) 2950 GEN_VEXT_VF(vfwadd_vf_w, 8) 2951 2952 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2953 { 2954 return float32_sub(float16_to_float32(a, true, s), 2955 float16_to_float32(b, true, s), s); 2956 } 2957 2958 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2959 { 2960 return float64_sub(float32_to_float64(a, s), 2961 float32_to_float64(b, s), s); 2962 2963 } 2964 2965 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2966 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2967 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 2968 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 2969 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2970 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2971 GEN_VEXT_VF(vfwsub_vf_h, 4) 2972 GEN_VEXT_VF(vfwsub_vf_w, 8) 2973 2974 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2975 { 2976 return float32_add(a, float16_to_float32(b, true, s), s); 2977 } 2978 2979 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2980 { 2981 return float64_add(a, float32_to_float64(b, s), s); 2982 } 2983 2984 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2985 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2986 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 2987 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 2988 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2989 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2990 GEN_VEXT_VF(vfwadd_wf_h, 4) 2991 GEN_VEXT_VF(vfwadd_wf_w, 8) 2992 2993 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2994 { 2995 return float32_sub(a, float16_to_float32(b, true, s), s); 2996 } 2997 2998 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 2999 { 3000 return float64_sub(a, float32_to_float64(b, s), s); 3001 } 3002 3003 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3004 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3005 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3006 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3007 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3008 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3009 GEN_VEXT_VF(vfwsub_wf_h, 4) 3010 GEN_VEXT_VF(vfwsub_wf_w, 8) 3011 3012 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3013 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3014 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3015 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3016 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3017 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3018 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3019 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3020 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3021 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3022 GEN_VEXT_VF(vfmul_vf_h, 2) 3023 GEN_VEXT_VF(vfmul_vf_w, 4) 3024 GEN_VEXT_VF(vfmul_vf_d, 8) 3025 3026 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3027 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3028 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3029 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3030 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3031 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3032 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3033 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3034 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3035 GEN_VEXT_VF(vfdiv_vf_h, 2) 3036 GEN_VEXT_VF(vfdiv_vf_w, 4) 3037 GEN_VEXT_VF(vfdiv_vf_d, 8) 3038 3039 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3040 { 3041 return float16_div(b, a, s); 3042 } 3043 3044 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3045 { 3046 return float32_div(b, a, s); 3047 } 3048 3049 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3050 { 3051 return float64_div(b, a, s); 3052 } 3053 3054 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3055 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3056 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3057 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3058 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3059 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3060 3061 /* Vector Widening Floating-Point Multiply */ 3062 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3063 { 3064 return float32_mul(float16_to_float32(a, true, s), 3065 float16_to_float32(b, true, s), s); 3066 } 3067 3068 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3069 { 3070 return float64_mul(float32_to_float64(a, s), 3071 float32_to_float64(b, s), s); 3072 3073 } 3074 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3075 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3076 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3077 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3078 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3079 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3080 GEN_VEXT_VF(vfwmul_vf_h, 4) 3081 GEN_VEXT_VF(vfwmul_vf_w, 8) 3082 3083 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3084 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3085 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3086 CPURISCVState *env) \ 3087 { \ 3088 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3089 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3090 TD d = *((TD *)vd + HD(i)); \ 3091 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3092 } 3093 3094 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3095 { 3096 return float16_muladd(a, b, d, 0, s); 3097 } 3098 3099 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3100 { 3101 return float32_muladd(a, b, d, 0, s); 3102 } 3103 3104 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3105 { 3106 return float64_muladd(a, b, d, 0, s); 3107 } 3108 3109 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3110 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3111 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3112 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3113 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3114 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3115 3116 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3117 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3118 CPURISCVState *env) \ 3119 { \ 3120 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3121 TD d = *((TD *)vd + HD(i)); \ 3122 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3123 } 3124 3125 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3126 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3127 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3128 GEN_VEXT_VF(vfmacc_vf_h, 2) 3129 GEN_VEXT_VF(vfmacc_vf_w, 4) 3130 GEN_VEXT_VF(vfmacc_vf_d, 8) 3131 3132 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3133 { 3134 return float16_muladd(a, b, d, float_muladd_negate_c | 3135 float_muladd_negate_product, s); 3136 } 3137 3138 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3139 { 3140 return float32_muladd(a, b, d, float_muladd_negate_c | 3141 float_muladd_negate_product, s); 3142 } 3143 3144 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3145 { 3146 return float64_muladd(a, b, d, float_muladd_negate_c | 3147 float_muladd_negate_product, s); 3148 } 3149 3150 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3151 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3152 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3153 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3154 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3155 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3156 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3157 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3158 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3159 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3160 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3161 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3162 3163 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3164 { 3165 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3166 } 3167 3168 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3169 { 3170 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3171 } 3172 3173 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3174 { 3175 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3176 } 3177 3178 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3179 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3180 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3181 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3182 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3183 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3184 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3185 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3186 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3187 GEN_VEXT_VF(vfmsac_vf_h, 2) 3188 GEN_VEXT_VF(vfmsac_vf_w, 4) 3189 GEN_VEXT_VF(vfmsac_vf_d, 8) 3190 3191 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3192 { 3193 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3194 } 3195 3196 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3197 { 3198 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3199 } 3200 3201 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3202 { 3203 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3204 } 3205 3206 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3207 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3208 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3209 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3210 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3211 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3212 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3213 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3214 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3215 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3216 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3217 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3218 3219 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3220 { 3221 return float16_muladd(d, b, a, 0, s); 3222 } 3223 3224 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3225 { 3226 return float32_muladd(d, b, a, 0, s); 3227 } 3228 3229 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3230 { 3231 return float64_muladd(d, b, a, 0, s); 3232 } 3233 3234 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3235 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3236 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3237 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3238 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3239 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3240 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3241 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3242 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3243 GEN_VEXT_VF(vfmadd_vf_h, 2) 3244 GEN_VEXT_VF(vfmadd_vf_w, 4) 3245 GEN_VEXT_VF(vfmadd_vf_d, 8) 3246 3247 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3248 { 3249 return float16_muladd(d, b, a, float_muladd_negate_c | 3250 float_muladd_negate_product, s); 3251 } 3252 3253 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3254 { 3255 return float32_muladd(d, b, a, float_muladd_negate_c | 3256 float_muladd_negate_product, s); 3257 } 3258 3259 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3260 { 3261 return float64_muladd(d, b, a, float_muladd_negate_c | 3262 float_muladd_negate_product, s); 3263 } 3264 3265 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3266 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3267 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3268 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3269 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3270 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3271 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3272 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3273 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3274 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3275 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3276 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3277 3278 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3279 { 3280 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3281 } 3282 3283 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3284 { 3285 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3286 } 3287 3288 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3289 { 3290 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3291 } 3292 3293 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3294 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3295 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3296 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3297 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3298 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3299 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3300 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3301 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3302 GEN_VEXT_VF(vfmsub_vf_h, 2) 3303 GEN_VEXT_VF(vfmsub_vf_w, 4) 3304 GEN_VEXT_VF(vfmsub_vf_d, 8) 3305 3306 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3307 { 3308 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3309 } 3310 3311 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3312 { 3313 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3314 } 3315 3316 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3317 { 3318 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3319 } 3320 3321 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3322 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3323 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3324 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3325 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3326 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3327 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3328 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3329 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3330 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3331 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3332 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3333 3334 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3335 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3336 { 3337 return float32_muladd(float16_to_float32(a, true, s), 3338 float16_to_float32(b, true, s), d, 0, s); 3339 } 3340 3341 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3342 { 3343 return float64_muladd(float32_to_float64(a, s), 3344 float32_to_float64(b, s), d, 0, s); 3345 } 3346 3347 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3348 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3349 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3350 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3351 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3352 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3353 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3354 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3355 3356 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3357 { 3358 return float32_muladd(bfloat16_to_float32(a, s), 3359 bfloat16_to_float32(b, s), d, 0, s); 3360 } 3361 3362 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3363 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3364 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3365 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3366 3367 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3368 { 3369 return float32_muladd(float16_to_float32(a, true, s), 3370 float16_to_float32(b, true, s), d, 3371 float_muladd_negate_c | float_muladd_negate_product, 3372 s); 3373 } 3374 3375 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3376 { 3377 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3378 d, float_muladd_negate_c | 3379 float_muladd_negate_product, s); 3380 } 3381 3382 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3383 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3384 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3385 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3386 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3387 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3388 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3389 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3390 3391 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3392 { 3393 return float32_muladd(float16_to_float32(a, true, s), 3394 float16_to_float32(b, true, s), d, 3395 float_muladd_negate_c, s); 3396 } 3397 3398 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3399 { 3400 return float64_muladd(float32_to_float64(a, s), 3401 float32_to_float64(b, s), d, 3402 float_muladd_negate_c, s); 3403 } 3404 3405 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3406 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3407 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3408 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3409 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3410 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3411 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3412 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3413 3414 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3415 { 3416 return float32_muladd(float16_to_float32(a, true, s), 3417 float16_to_float32(b, true, s), d, 3418 float_muladd_negate_product, s); 3419 } 3420 3421 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3422 { 3423 return float64_muladd(float32_to_float64(a, s), 3424 float32_to_float64(b, s), d, 3425 float_muladd_negate_product, s); 3426 } 3427 3428 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3429 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3430 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3431 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3432 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3433 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3434 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3435 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3436 3437 /* Vector Floating-Point Square-Root Instruction */ 3438 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3439 static void do_##NAME(void *vd, void *vs2, int i, \ 3440 CPURISCVState *env) \ 3441 { \ 3442 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3443 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3444 } 3445 3446 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3447 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3448 CPURISCVState *env, uint32_t desc) \ 3449 { \ 3450 uint32_t vm = vext_vm(desc); \ 3451 uint32_t vl = env->vl; \ 3452 uint32_t total_elems = \ 3453 vext_get_total_elems(env, desc, ESZ); \ 3454 uint32_t vta = vext_vta(desc); \ 3455 uint32_t vma = vext_vma(desc); \ 3456 uint32_t i; \ 3457 \ 3458 if (vl == 0) { \ 3459 return; \ 3460 } \ 3461 for (i = env->vstart; i < vl; i++) { \ 3462 if (!vm && !vext_elem_mask(v0, i)) { \ 3463 /* set masked-off elements to 1s */ \ 3464 vext_set_elems_1s(vd, vma, i * ESZ, \ 3465 (i + 1) * ESZ); \ 3466 continue; \ 3467 } \ 3468 do_##NAME(vd, vs2, i, env); \ 3469 } \ 3470 env->vstart = 0; \ 3471 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3472 total_elems * ESZ); \ 3473 } 3474 3475 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3476 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3477 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3478 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3479 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3480 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3481 3482 /* 3483 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3484 * 3485 * Adapted from riscv-v-spec recip.c: 3486 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3487 */ 3488 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3489 { 3490 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3491 uint64_t exp = extract64(f, frac_size, exp_size); 3492 uint64_t frac = extract64(f, 0, frac_size); 3493 3494 const uint8_t lookup_table[] = { 3495 52, 51, 50, 48, 47, 46, 44, 43, 3496 42, 41, 40, 39, 38, 36, 35, 34, 3497 33, 32, 31, 30, 30, 29, 28, 27, 3498 26, 25, 24, 23, 23, 22, 21, 20, 3499 19, 19, 18, 17, 16, 16, 15, 14, 3500 14, 13, 12, 12, 11, 10, 10, 9, 3501 9, 8, 7, 7, 6, 6, 5, 4, 3502 4, 3, 3, 2, 2, 1, 1, 0, 3503 127, 125, 123, 121, 119, 118, 116, 114, 3504 113, 111, 109, 108, 106, 105, 103, 102, 3505 100, 99, 97, 96, 95, 93, 92, 91, 3506 90, 88, 87, 86, 85, 84, 83, 82, 3507 80, 79, 78, 77, 76, 75, 74, 73, 3508 72, 71, 70, 70, 69, 68, 67, 66, 3509 65, 64, 63, 63, 62, 61, 60, 59, 3510 59, 58, 57, 56, 56, 55, 54, 53 3511 }; 3512 const int precision = 7; 3513 3514 if (exp == 0 && frac != 0) { /* subnormal */ 3515 /* Normalize the subnormal. */ 3516 while (extract64(frac, frac_size - 1, 1) == 0) { 3517 exp--; 3518 frac <<= 1; 3519 } 3520 3521 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3522 } 3523 3524 int idx = ((exp & 1) << (precision - 1)) | 3525 (frac >> (frac_size - precision + 1)); 3526 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3527 (frac_size - precision); 3528 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3529 3530 uint64_t val = 0; 3531 val = deposit64(val, 0, frac_size, out_frac); 3532 val = deposit64(val, frac_size, exp_size, out_exp); 3533 val = deposit64(val, frac_size + exp_size, 1, sign); 3534 return val; 3535 } 3536 3537 static float16 frsqrt7_h(float16 f, float_status *s) 3538 { 3539 int exp_size = 5, frac_size = 10; 3540 bool sign = float16_is_neg(f); 3541 3542 /* 3543 * frsqrt7(sNaN) = canonical NaN 3544 * frsqrt7(-inf) = canonical NaN 3545 * frsqrt7(-normal) = canonical NaN 3546 * frsqrt7(-subnormal) = canonical NaN 3547 */ 3548 if (float16_is_signaling_nan(f, s) || 3549 (float16_is_infinity(f) && sign) || 3550 (float16_is_normal(f) && sign) || 3551 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3552 s->float_exception_flags |= float_flag_invalid; 3553 return float16_default_nan(s); 3554 } 3555 3556 /* frsqrt7(qNaN) = canonical NaN */ 3557 if (float16_is_quiet_nan(f, s)) { 3558 return float16_default_nan(s); 3559 } 3560 3561 /* frsqrt7(+-0) = +-inf */ 3562 if (float16_is_zero(f)) { 3563 s->float_exception_flags |= float_flag_divbyzero; 3564 return float16_set_sign(float16_infinity, sign); 3565 } 3566 3567 /* frsqrt7(+inf) = +0 */ 3568 if (float16_is_infinity(f) && !sign) { 3569 return float16_set_sign(float16_zero, sign); 3570 } 3571 3572 /* +normal, +subnormal */ 3573 uint64_t val = frsqrt7(f, exp_size, frac_size); 3574 return make_float16(val); 3575 } 3576 3577 static float32 frsqrt7_s(float32 f, float_status *s) 3578 { 3579 int exp_size = 8, frac_size = 23; 3580 bool sign = float32_is_neg(f); 3581 3582 /* 3583 * frsqrt7(sNaN) = canonical NaN 3584 * frsqrt7(-inf) = canonical NaN 3585 * frsqrt7(-normal) = canonical NaN 3586 * frsqrt7(-subnormal) = canonical NaN 3587 */ 3588 if (float32_is_signaling_nan(f, s) || 3589 (float32_is_infinity(f) && sign) || 3590 (float32_is_normal(f) && sign) || 3591 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3592 s->float_exception_flags |= float_flag_invalid; 3593 return float32_default_nan(s); 3594 } 3595 3596 /* frsqrt7(qNaN) = canonical NaN */ 3597 if (float32_is_quiet_nan(f, s)) { 3598 return float32_default_nan(s); 3599 } 3600 3601 /* frsqrt7(+-0) = +-inf */ 3602 if (float32_is_zero(f)) { 3603 s->float_exception_flags |= float_flag_divbyzero; 3604 return float32_set_sign(float32_infinity, sign); 3605 } 3606 3607 /* frsqrt7(+inf) = +0 */ 3608 if (float32_is_infinity(f) && !sign) { 3609 return float32_set_sign(float32_zero, sign); 3610 } 3611 3612 /* +normal, +subnormal */ 3613 uint64_t val = frsqrt7(f, exp_size, frac_size); 3614 return make_float32(val); 3615 } 3616 3617 static float64 frsqrt7_d(float64 f, float_status *s) 3618 { 3619 int exp_size = 11, frac_size = 52; 3620 bool sign = float64_is_neg(f); 3621 3622 /* 3623 * frsqrt7(sNaN) = canonical NaN 3624 * frsqrt7(-inf) = canonical NaN 3625 * frsqrt7(-normal) = canonical NaN 3626 * frsqrt7(-subnormal) = canonical NaN 3627 */ 3628 if (float64_is_signaling_nan(f, s) || 3629 (float64_is_infinity(f) && sign) || 3630 (float64_is_normal(f) && sign) || 3631 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3632 s->float_exception_flags |= float_flag_invalid; 3633 return float64_default_nan(s); 3634 } 3635 3636 /* frsqrt7(qNaN) = canonical NaN */ 3637 if (float64_is_quiet_nan(f, s)) { 3638 return float64_default_nan(s); 3639 } 3640 3641 /* frsqrt7(+-0) = +-inf */ 3642 if (float64_is_zero(f)) { 3643 s->float_exception_flags |= float_flag_divbyzero; 3644 return float64_set_sign(float64_infinity, sign); 3645 } 3646 3647 /* frsqrt7(+inf) = +0 */ 3648 if (float64_is_infinity(f) && !sign) { 3649 return float64_set_sign(float64_zero, sign); 3650 } 3651 3652 /* +normal, +subnormal */ 3653 uint64_t val = frsqrt7(f, exp_size, frac_size); 3654 return make_float64(val); 3655 } 3656 3657 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3658 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3659 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3660 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3661 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3662 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3663 3664 /* 3665 * Vector Floating-Point Reciprocal Estimate Instruction 3666 * 3667 * Adapted from riscv-v-spec recip.c: 3668 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3669 */ 3670 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3671 float_status *s) 3672 { 3673 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3674 uint64_t exp = extract64(f, frac_size, exp_size); 3675 uint64_t frac = extract64(f, 0, frac_size); 3676 3677 const uint8_t lookup_table[] = { 3678 127, 125, 123, 121, 119, 117, 116, 114, 3679 112, 110, 109, 107, 105, 104, 102, 100, 3680 99, 97, 96, 94, 93, 91, 90, 88, 3681 87, 85, 84, 83, 81, 80, 79, 77, 3682 76, 75, 74, 72, 71, 70, 69, 68, 3683 66, 65, 64, 63, 62, 61, 60, 59, 3684 58, 57, 56, 55, 54, 53, 52, 51, 3685 50, 49, 48, 47, 46, 45, 44, 43, 3686 42, 41, 40, 40, 39, 38, 37, 36, 3687 35, 35, 34, 33, 32, 31, 31, 30, 3688 29, 28, 28, 27, 26, 25, 25, 24, 3689 23, 23, 22, 21, 21, 20, 19, 19, 3690 18, 17, 17, 16, 15, 15, 14, 14, 3691 13, 12, 12, 11, 11, 10, 9, 9, 3692 8, 8, 7, 7, 6, 5, 5, 4, 3693 4, 3, 3, 2, 2, 1, 1, 0 3694 }; 3695 const int precision = 7; 3696 3697 if (exp == 0 && frac != 0) { /* subnormal */ 3698 /* Normalize the subnormal. */ 3699 while (extract64(frac, frac_size - 1, 1) == 0) { 3700 exp--; 3701 frac <<= 1; 3702 } 3703 3704 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3705 3706 if (exp != 0 && exp != UINT64_MAX) { 3707 /* 3708 * Overflow to inf or max value of same sign, 3709 * depending on sign and rounding mode. 3710 */ 3711 s->float_exception_flags |= (float_flag_inexact | 3712 float_flag_overflow); 3713 3714 if ((s->float_rounding_mode == float_round_to_zero) || 3715 ((s->float_rounding_mode == float_round_down) && !sign) || 3716 ((s->float_rounding_mode == float_round_up) && sign)) { 3717 /* Return greatest/negative finite value. */ 3718 return (sign << (exp_size + frac_size)) | 3719 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3720 } else { 3721 /* Return +-inf. */ 3722 return (sign << (exp_size + frac_size)) | 3723 MAKE_64BIT_MASK(frac_size, exp_size); 3724 } 3725 } 3726 } 3727 3728 int idx = frac >> (frac_size - precision); 3729 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3730 (frac_size - precision); 3731 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3732 3733 if (out_exp == 0 || out_exp == UINT64_MAX) { 3734 /* 3735 * The result is subnormal, but don't raise the underflow exception, 3736 * because there's no additional loss of precision. 3737 */ 3738 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3739 if (out_exp == UINT64_MAX) { 3740 out_frac >>= 1; 3741 out_exp = 0; 3742 } 3743 } 3744 3745 uint64_t val = 0; 3746 val = deposit64(val, 0, frac_size, out_frac); 3747 val = deposit64(val, frac_size, exp_size, out_exp); 3748 val = deposit64(val, frac_size + exp_size, 1, sign); 3749 return val; 3750 } 3751 3752 static float16 frec7_h(float16 f, float_status *s) 3753 { 3754 int exp_size = 5, frac_size = 10; 3755 bool sign = float16_is_neg(f); 3756 3757 /* frec7(+-inf) = +-0 */ 3758 if (float16_is_infinity(f)) { 3759 return float16_set_sign(float16_zero, sign); 3760 } 3761 3762 /* frec7(+-0) = +-inf */ 3763 if (float16_is_zero(f)) { 3764 s->float_exception_flags |= float_flag_divbyzero; 3765 return float16_set_sign(float16_infinity, sign); 3766 } 3767 3768 /* frec7(sNaN) = canonical NaN */ 3769 if (float16_is_signaling_nan(f, s)) { 3770 s->float_exception_flags |= float_flag_invalid; 3771 return float16_default_nan(s); 3772 } 3773 3774 /* frec7(qNaN) = canonical NaN */ 3775 if (float16_is_quiet_nan(f, s)) { 3776 return float16_default_nan(s); 3777 } 3778 3779 /* +-normal, +-subnormal */ 3780 uint64_t val = frec7(f, exp_size, frac_size, s); 3781 return make_float16(val); 3782 } 3783 3784 static float32 frec7_s(float32 f, float_status *s) 3785 { 3786 int exp_size = 8, frac_size = 23; 3787 bool sign = float32_is_neg(f); 3788 3789 /* frec7(+-inf) = +-0 */ 3790 if (float32_is_infinity(f)) { 3791 return float32_set_sign(float32_zero, sign); 3792 } 3793 3794 /* frec7(+-0) = +-inf */ 3795 if (float32_is_zero(f)) { 3796 s->float_exception_flags |= float_flag_divbyzero; 3797 return float32_set_sign(float32_infinity, sign); 3798 } 3799 3800 /* frec7(sNaN) = canonical NaN */ 3801 if (float32_is_signaling_nan(f, s)) { 3802 s->float_exception_flags |= float_flag_invalid; 3803 return float32_default_nan(s); 3804 } 3805 3806 /* frec7(qNaN) = canonical NaN */ 3807 if (float32_is_quiet_nan(f, s)) { 3808 return float32_default_nan(s); 3809 } 3810 3811 /* +-normal, +-subnormal */ 3812 uint64_t val = frec7(f, exp_size, frac_size, s); 3813 return make_float32(val); 3814 } 3815 3816 static float64 frec7_d(float64 f, float_status *s) 3817 { 3818 int exp_size = 11, frac_size = 52; 3819 bool sign = float64_is_neg(f); 3820 3821 /* frec7(+-inf) = +-0 */ 3822 if (float64_is_infinity(f)) { 3823 return float64_set_sign(float64_zero, sign); 3824 } 3825 3826 /* frec7(+-0) = +-inf */ 3827 if (float64_is_zero(f)) { 3828 s->float_exception_flags |= float_flag_divbyzero; 3829 return float64_set_sign(float64_infinity, sign); 3830 } 3831 3832 /* frec7(sNaN) = canonical NaN */ 3833 if (float64_is_signaling_nan(f, s)) { 3834 s->float_exception_flags |= float_flag_invalid; 3835 return float64_default_nan(s); 3836 } 3837 3838 /* frec7(qNaN) = canonical NaN */ 3839 if (float64_is_quiet_nan(f, s)) { 3840 return float64_default_nan(s); 3841 } 3842 3843 /* +-normal, +-subnormal */ 3844 uint64_t val = frec7(f, exp_size, frac_size, s); 3845 return make_float64(val); 3846 } 3847 3848 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3849 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3850 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3851 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 3852 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 3853 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 3854 3855 /* Vector Floating-Point MIN/MAX Instructions */ 3856 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3857 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3858 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3859 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 3860 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 3861 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 3862 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3863 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3864 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3865 GEN_VEXT_VF(vfmin_vf_h, 2) 3866 GEN_VEXT_VF(vfmin_vf_w, 4) 3867 GEN_VEXT_VF(vfmin_vf_d, 8) 3868 3869 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3870 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3871 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3872 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 3873 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 3874 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 3875 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3876 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3877 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3878 GEN_VEXT_VF(vfmax_vf_h, 2) 3879 GEN_VEXT_VF(vfmax_vf_w, 4) 3880 GEN_VEXT_VF(vfmax_vf_d, 8) 3881 3882 /* Vector Floating-Point Sign-Injection Instructions */ 3883 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3884 { 3885 return deposit64(b, 0, 15, a); 3886 } 3887 3888 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3889 { 3890 return deposit64(b, 0, 31, a); 3891 } 3892 3893 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3894 { 3895 return deposit64(b, 0, 63, a); 3896 } 3897 3898 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3899 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3900 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3901 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 3902 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 3903 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 3904 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3905 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3906 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3907 GEN_VEXT_VF(vfsgnj_vf_h, 2) 3908 GEN_VEXT_VF(vfsgnj_vf_w, 4) 3909 GEN_VEXT_VF(vfsgnj_vf_d, 8) 3910 3911 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3912 { 3913 return deposit64(~b, 0, 15, a); 3914 } 3915 3916 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3917 { 3918 return deposit64(~b, 0, 31, a); 3919 } 3920 3921 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3922 { 3923 return deposit64(~b, 0, 63, a); 3924 } 3925 3926 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3927 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3928 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3929 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 3930 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 3931 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 3932 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3933 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3934 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3935 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 3936 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 3937 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 3938 3939 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3940 { 3941 return deposit64(b ^ a, 0, 15, a); 3942 } 3943 3944 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3945 { 3946 return deposit64(b ^ a, 0, 31, a); 3947 } 3948 3949 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3950 { 3951 return deposit64(b ^ a, 0, 63, a); 3952 } 3953 3954 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3955 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3956 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3957 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 3958 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 3959 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 3960 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3961 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3962 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3963 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 3964 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 3965 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 3966 3967 /* Vector Floating-Point Compare Instructions */ 3968 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3969 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3970 CPURISCVState *env, uint32_t desc) \ 3971 { \ 3972 uint32_t vm = vext_vm(desc); \ 3973 uint32_t vl = env->vl; \ 3974 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 3975 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 3976 uint32_t vma = vext_vma(desc); \ 3977 uint32_t i; \ 3978 \ 3979 for (i = env->vstart; i < vl; i++) { \ 3980 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3981 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3982 if (!vm && !vext_elem_mask(v0, i)) { \ 3983 /* set masked-off elements to 1s */ \ 3984 if (vma) { \ 3985 vext_set_elem_mask(vd, i, 1); \ 3986 } \ 3987 continue; \ 3988 } \ 3989 vext_set_elem_mask(vd, i, \ 3990 DO_OP(s2, s1, &env->fp_status)); \ 3991 } \ 3992 env->vstart = 0; \ 3993 /* 3994 * mask destination register are always tail-agnostic 3995 * set tail elements to 1s 3996 */ \ 3997 if (vta_all_1s) { \ 3998 for (; i < total_elems; i++) { \ 3999 vext_set_elem_mask(vd, i, 1); \ 4000 } \ 4001 } \ 4002 } 4003 4004 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4005 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4006 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4007 4008 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4009 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4010 CPURISCVState *env, uint32_t desc) \ 4011 { \ 4012 uint32_t vm = vext_vm(desc); \ 4013 uint32_t vl = env->vl; \ 4014 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4015 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4016 uint32_t vma = vext_vma(desc); \ 4017 uint32_t i; \ 4018 \ 4019 for (i = env->vstart; i < vl; i++) { \ 4020 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4021 if (!vm && !vext_elem_mask(v0, i)) { \ 4022 /* set masked-off elements to 1s */ \ 4023 if (vma) { \ 4024 vext_set_elem_mask(vd, i, 1); \ 4025 } \ 4026 continue; \ 4027 } \ 4028 vext_set_elem_mask(vd, i, \ 4029 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4030 } \ 4031 env->vstart = 0; \ 4032 /* 4033 * mask destination register are always tail-agnostic 4034 * set tail elements to 1s 4035 */ \ 4036 if (vta_all_1s) { \ 4037 for (; i < total_elems; i++) { \ 4038 vext_set_elem_mask(vd, i, 1); \ 4039 } \ 4040 } \ 4041 } 4042 4043 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4044 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4045 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4046 4047 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4048 { 4049 FloatRelation compare = float16_compare_quiet(a, b, s); 4050 return compare != float_relation_equal; 4051 } 4052 4053 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4054 { 4055 FloatRelation compare = float32_compare_quiet(a, b, s); 4056 return compare != float_relation_equal; 4057 } 4058 4059 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4060 { 4061 FloatRelation compare = float64_compare_quiet(a, b, s); 4062 return compare != float_relation_equal; 4063 } 4064 4065 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4066 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4067 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4068 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4069 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4070 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4071 4072 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4073 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4074 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4075 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4076 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4077 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4078 4079 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4080 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4081 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4082 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4083 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4084 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4085 4086 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4087 { 4088 FloatRelation compare = float16_compare(a, b, s); 4089 return compare == float_relation_greater; 4090 } 4091 4092 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4093 { 4094 FloatRelation compare = float32_compare(a, b, s); 4095 return compare == float_relation_greater; 4096 } 4097 4098 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4099 { 4100 FloatRelation compare = float64_compare(a, b, s); 4101 return compare == float_relation_greater; 4102 } 4103 4104 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4105 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4106 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4107 4108 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4109 { 4110 FloatRelation compare = float16_compare(a, b, s); 4111 return compare == float_relation_greater || 4112 compare == float_relation_equal; 4113 } 4114 4115 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4116 { 4117 FloatRelation compare = float32_compare(a, b, s); 4118 return compare == float_relation_greater || 4119 compare == float_relation_equal; 4120 } 4121 4122 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4123 { 4124 FloatRelation compare = float64_compare(a, b, s); 4125 return compare == float_relation_greater || 4126 compare == float_relation_equal; 4127 } 4128 4129 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4130 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4131 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4132 4133 /* Vector Floating-Point Classify Instruction */ 4134 target_ulong fclass_h(uint64_t frs1) 4135 { 4136 float16 f = frs1; 4137 bool sign = float16_is_neg(f); 4138 4139 if (float16_is_infinity(f)) { 4140 return sign ? 1 << 0 : 1 << 7; 4141 } else if (float16_is_zero(f)) { 4142 return sign ? 1 << 3 : 1 << 4; 4143 } else if (float16_is_zero_or_denormal(f)) { 4144 return sign ? 1 << 2 : 1 << 5; 4145 } else if (float16_is_any_nan(f)) { 4146 float_status s = { }; /* for snan_bit_is_one */ 4147 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4148 } else { 4149 return sign ? 1 << 1 : 1 << 6; 4150 } 4151 } 4152 4153 target_ulong fclass_s(uint64_t frs1) 4154 { 4155 float32 f = frs1; 4156 bool sign = float32_is_neg(f); 4157 4158 if (float32_is_infinity(f)) { 4159 return sign ? 1 << 0 : 1 << 7; 4160 } else if (float32_is_zero(f)) { 4161 return sign ? 1 << 3 : 1 << 4; 4162 } else if (float32_is_zero_or_denormal(f)) { 4163 return sign ? 1 << 2 : 1 << 5; 4164 } else if (float32_is_any_nan(f)) { 4165 float_status s = { }; /* for snan_bit_is_one */ 4166 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4167 } else { 4168 return sign ? 1 << 1 : 1 << 6; 4169 } 4170 } 4171 4172 target_ulong fclass_d(uint64_t frs1) 4173 { 4174 float64 f = frs1; 4175 bool sign = float64_is_neg(f); 4176 4177 if (float64_is_infinity(f)) { 4178 return sign ? 1 << 0 : 1 << 7; 4179 } else if (float64_is_zero(f)) { 4180 return sign ? 1 << 3 : 1 << 4; 4181 } else if (float64_is_zero_or_denormal(f)) { 4182 return sign ? 1 << 2 : 1 << 5; 4183 } else if (float64_is_any_nan(f)) { 4184 float_status s = { }; /* for snan_bit_is_one */ 4185 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4186 } else { 4187 return sign ? 1 << 1 : 1 << 6; 4188 } 4189 } 4190 4191 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4192 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4193 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4194 GEN_VEXT_V(vfclass_v_h, 2) 4195 GEN_VEXT_V(vfclass_v_w, 4) 4196 GEN_VEXT_V(vfclass_v_d, 8) 4197 4198 /* Vector Floating-Point Merge Instruction */ 4199 4200 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4201 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4202 CPURISCVState *env, uint32_t desc) \ 4203 { \ 4204 uint32_t vm = vext_vm(desc); \ 4205 uint32_t vl = env->vl; \ 4206 uint32_t esz = sizeof(ETYPE); \ 4207 uint32_t total_elems = \ 4208 vext_get_total_elems(env, desc, esz); \ 4209 uint32_t vta = vext_vta(desc); \ 4210 uint32_t i; \ 4211 \ 4212 for (i = env->vstart; i < vl; i++) { \ 4213 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4214 *((ETYPE *)vd + H(i)) = \ 4215 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4216 } \ 4217 env->vstart = 0; \ 4218 /* set tail elements to 1s */ \ 4219 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4220 } 4221 4222 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4223 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4224 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4225 4226 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4227 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4228 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4229 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4230 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4231 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4232 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4233 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4234 4235 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4236 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4237 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4238 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4239 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4240 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4241 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4242 4243 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4244 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4245 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4246 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4247 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4248 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4249 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4250 4251 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4252 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4253 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4254 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4255 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4256 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4257 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4258 4259 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4260 /* (TD, T2, TX2) */ 4261 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4262 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4263 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4264 /* 4265 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4266 */ 4267 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4268 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4269 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4270 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4271 4272 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4273 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4274 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4275 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4276 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4277 4278 /* 4279 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4280 */ 4281 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4282 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4283 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4284 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4285 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4286 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4287 4288 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4289 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4290 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4291 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4292 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4293 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4294 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4295 4296 /* 4297 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4298 */ 4299 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4300 { 4301 return float16_to_float32(a, true, s); 4302 } 4303 4304 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4305 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4306 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4307 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4308 4309 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4310 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4311 4312 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4313 /* (TD, T2, TX2) */ 4314 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4315 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4316 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4317 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4318 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4319 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4320 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4321 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4322 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4323 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4324 4325 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4326 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4327 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4328 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4329 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4330 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4331 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4332 4333 /* 4334 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4335 */ 4336 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4337 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4338 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4339 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4340 4341 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4342 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4343 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4344 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4345 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4346 4347 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4348 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4349 { 4350 return float32_to_float16(a, true, s); 4351 } 4352 4353 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4354 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4355 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4356 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4357 4358 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4359 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4360 4361 /* 4362 * Vector Reduction Operations 4363 */ 4364 /* Vector Single-Width Integer Reduction Instructions */ 4365 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4366 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4367 void *vs2, CPURISCVState *env, \ 4368 uint32_t desc) \ 4369 { \ 4370 uint32_t vm = vext_vm(desc); \ 4371 uint32_t vl = env->vl; \ 4372 uint32_t esz = sizeof(TD); \ 4373 uint32_t vlenb = simd_maxsz(desc); \ 4374 uint32_t vta = vext_vta(desc); \ 4375 uint32_t i; \ 4376 TD s1 = *((TD *)vs1 + HD(0)); \ 4377 \ 4378 for (i = env->vstart; i < vl; i++) { \ 4379 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4380 if (!vm && !vext_elem_mask(v0, i)) { \ 4381 continue; \ 4382 } \ 4383 s1 = OP(s1, (TD)s2); \ 4384 } \ 4385 *((TD *)vd + HD(0)) = s1; \ 4386 env->vstart = 0; \ 4387 /* set tail elements to 1s */ \ 4388 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4389 } 4390 4391 /* vd[0] = sum(vs1[0], vs2[*]) */ 4392 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4393 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4394 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4395 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4396 4397 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4398 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4399 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4400 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4401 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4402 4403 /* vd[0] = max(vs1[0], vs2[*]) */ 4404 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4405 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4406 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4407 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4408 4409 /* vd[0] = minu(vs1[0], vs2[*]) */ 4410 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4411 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4412 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4413 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4414 4415 /* vd[0] = min(vs1[0], vs2[*]) */ 4416 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4417 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4418 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4419 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4420 4421 /* vd[0] = and(vs1[0], vs2[*]) */ 4422 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4423 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4424 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4425 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4426 4427 /* vd[0] = or(vs1[0], vs2[*]) */ 4428 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4429 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4430 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4431 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4432 4433 /* vd[0] = xor(vs1[0], vs2[*]) */ 4434 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4435 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4436 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4437 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4438 4439 /* Vector Widening Integer Reduction Instructions */ 4440 /* signed sum reduction into double-width accumulator */ 4441 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4442 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4443 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4444 4445 /* Unsigned sum reduction into double-width accumulator */ 4446 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4447 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4448 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4449 4450 /* Vector Single-Width Floating-Point Reduction Instructions */ 4451 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4452 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4453 void *vs2, CPURISCVState *env, \ 4454 uint32_t desc) \ 4455 { \ 4456 uint32_t vm = vext_vm(desc); \ 4457 uint32_t vl = env->vl; \ 4458 uint32_t esz = sizeof(TD); \ 4459 uint32_t vlenb = simd_maxsz(desc); \ 4460 uint32_t vta = vext_vta(desc); \ 4461 uint32_t i; \ 4462 TD s1 = *((TD *)vs1 + HD(0)); \ 4463 \ 4464 for (i = env->vstart; i < vl; i++) { \ 4465 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4466 if (!vm && !vext_elem_mask(v0, i)) { \ 4467 continue; \ 4468 } \ 4469 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4470 } \ 4471 *((TD *)vd + HD(0)) = s1; \ 4472 env->vstart = 0; \ 4473 /* set tail elements to 1s */ \ 4474 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4475 } 4476 4477 /* Unordered sum */ 4478 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4479 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4480 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4481 4482 /* Ordered sum */ 4483 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4484 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4485 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4486 4487 /* Maximum value */ 4488 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4489 float16_maximum_number) 4490 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4491 float32_maximum_number) 4492 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4493 float64_maximum_number) 4494 4495 /* Minimum value */ 4496 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4497 float16_minimum_number) 4498 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4499 float32_minimum_number) 4500 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4501 float64_minimum_number) 4502 4503 /* Vector Widening Floating-Point Add Instructions */ 4504 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4505 { 4506 return float32_add(a, float16_to_float32(b, true, s), s); 4507 } 4508 4509 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4510 { 4511 return float64_add(a, float32_to_float64(b, s), s); 4512 } 4513 4514 /* Vector Widening Floating-Point Reduction Instructions */ 4515 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4516 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4517 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4518 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4519 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4520 4521 /* 4522 * Vector Mask Operations 4523 */ 4524 /* Vector Mask-Register Logical Instructions */ 4525 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4526 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4527 void *vs2, CPURISCVState *env, \ 4528 uint32_t desc) \ 4529 { \ 4530 uint32_t vl = env->vl; \ 4531 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4532 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4533 uint32_t i; \ 4534 int a, b; \ 4535 \ 4536 for (i = env->vstart; i < vl; i++) { \ 4537 a = vext_elem_mask(vs1, i); \ 4538 b = vext_elem_mask(vs2, i); \ 4539 vext_set_elem_mask(vd, i, OP(b, a)); \ 4540 } \ 4541 env->vstart = 0; \ 4542 /* 4543 * mask destination register are always tail-agnostic 4544 * set tail elements to 1s 4545 */ \ 4546 if (vta_all_1s) { \ 4547 for (; i < total_elems; i++) { \ 4548 vext_set_elem_mask(vd, i, 1); \ 4549 } \ 4550 } \ 4551 } 4552 4553 #define DO_NAND(N, M) (!(N & M)) 4554 #define DO_ANDNOT(N, M) (N & !M) 4555 #define DO_NOR(N, M) (!(N | M)) 4556 #define DO_ORNOT(N, M) (N | !M) 4557 #define DO_XNOR(N, M) (!(N ^ M)) 4558 4559 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4560 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4561 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4562 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4563 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4564 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4565 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4566 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4567 4568 /* Vector count population in mask vcpop */ 4569 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4570 uint32_t desc) 4571 { 4572 target_ulong cnt = 0; 4573 uint32_t vm = vext_vm(desc); 4574 uint32_t vl = env->vl; 4575 int i; 4576 4577 for (i = env->vstart; i < vl; i++) { 4578 if (vm || vext_elem_mask(v0, i)) { 4579 if (vext_elem_mask(vs2, i)) { 4580 cnt++; 4581 } 4582 } 4583 } 4584 env->vstart = 0; 4585 return cnt; 4586 } 4587 4588 /* vfirst find-first-set mask bit */ 4589 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4590 uint32_t desc) 4591 { 4592 uint32_t vm = vext_vm(desc); 4593 uint32_t vl = env->vl; 4594 int i; 4595 4596 for (i = env->vstart; i < vl; i++) { 4597 if (vm || vext_elem_mask(v0, i)) { 4598 if (vext_elem_mask(vs2, i)) { 4599 return i; 4600 } 4601 } 4602 } 4603 env->vstart = 0; 4604 return -1LL; 4605 } 4606 4607 enum set_mask_type { 4608 ONLY_FIRST = 1, 4609 INCLUDE_FIRST, 4610 BEFORE_FIRST, 4611 }; 4612 4613 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4614 uint32_t desc, enum set_mask_type type) 4615 { 4616 uint32_t vm = vext_vm(desc); 4617 uint32_t vl = env->vl; 4618 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4619 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4620 uint32_t vma = vext_vma(desc); 4621 int i; 4622 bool first_mask_bit = false; 4623 4624 for (i = env->vstart; i < vl; i++) { 4625 if (!vm && !vext_elem_mask(v0, i)) { 4626 /* set masked-off elements to 1s */ 4627 if (vma) { 4628 vext_set_elem_mask(vd, i, 1); 4629 } 4630 continue; 4631 } 4632 /* write a zero to all following active elements */ 4633 if (first_mask_bit) { 4634 vext_set_elem_mask(vd, i, 0); 4635 continue; 4636 } 4637 if (vext_elem_mask(vs2, i)) { 4638 first_mask_bit = true; 4639 if (type == BEFORE_FIRST) { 4640 vext_set_elem_mask(vd, i, 0); 4641 } else { 4642 vext_set_elem_mask(vd, i, 1); 4643 } 4644 } else { 4645 if (type == ONLY_FIRST) { 4646 vext_set_elem_mask(vd, i, 0); 4647 } else { 4648 vext_set_elem_mask(vd, i, 1); 4649 } 4650 } 4651 } 4652 env->vstart = 0; 4653 /* 4654 * mask destination register are always tail-agnostic 4655 * set tail elements to 1s 4656 */ 4657 if (vta_all_1s) { 4658 for (; i < total_elems; i++) { 4659 vext_set_elem_mask(vd, i, 1); 4660 } 4661 } 4662 } 4663 4664 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4665 uint32_t desc) 4666 { 4667 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4668 } 4669 4670 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4671 uint32_t desc) 4672 { 4673 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4674 } 4675 4676 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4677 uint32_t desc) 4678 { 4679 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4680 } 4681 4682 /* Vector Iota Instruction */ 4683 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4684 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4685 uint32_t desc) \ 4686 { \ 4687 uint32_t vm = vext_vm(desc); \ 4688 uint32_t vl = env->vl; \ 4689 uint32_t esz = sizeof(ETYPE); \ 4690 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4691 uint32_t vta = vext_vta(desc); \ 4692 uint32_t vma = vext_vma(desc); \ 4693 uint32_t sum = 0; \ 4694 int i; \ 4695 \ 4696 for (i = env->vstart; i < vl; i++) { \ 4697 if (!vm && !vext_elem_mask(v0, i)) { \ 4698 /* set masked-off elements to 1s */ \ 4699 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4700 continue; \ 4701 } \ 4702 *((ETYPE *)vd + H(i)) = sum; \ 4703 if (vext_elem_mask(vs2, i)) { \ 4704 sum++; \ 4705 } \ 4706 } \ 4707 env->vstart = 0; \ 4708 /* set tail elements to 1s */ \ 4709 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4710 } 4711 4712 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4713 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4714 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4715 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4716 4717 /* Vector Element Index Instruction */ 4718 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4719 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4720 { \ 4721 uint32_t vm = vext_vm(desc); \ 4722 uint32_t vl = env->vl; \ 4723 uint32_t esz = sizeof(ETYPE); \ 4724 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4725 uint32_t vta = vext_vta(desc); \ 4726 uint32_t vma = vext_vma(desc); \ 4727 int i; \ 4728 \ 4729 for (i = env->vstart; i < vl; i++) { \ 4730 if (!vm && !vext_elem_mask(v0, i)) { \ 4731 /* set masked-off elements to 1s */ \ 4732 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4733 continue; \ 4734 } \ 4735 *((ETYPE *)vd + H(i)) = i; \ 4736 } \ 4737 env->vstart = 0; \ 4738 /* set tail elements to 1s */ \ 4739 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4740 } 4741 4742 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4743 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4744 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4745 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4746 4747 /* 4748 * Vector Permutation Instructions 4749 */ 4750 4751 /* Vector Slide Instructions */ 4752 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4753 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4754 CPURISCVState *env, uint32_t desc) \ 4755 { \ 4756 uint32_t vm = vext_vm(desc); \ 4757 uint32_t vl = env->vl; \ 4758 uint32_t esz = sizeof(ETYPE); \ 4759 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4760 uint32_t vta = vext_vta(desc); \ 4761 uint32_t vma = vext_vma(desc); \ 4762 target_ulong offset = s1, i_min, i; \ 4763 \ 4764 i_min = MAX(env->vstart, offset); \ 4765 for (i = i_min; i < vl; i++) { \ 4766 if (!vm && !vext_elem_mask(v0, i)) { \ 4767 /* set masked-off elements to 1s */ \ 4768 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4769 continue; \ 4770 } \ 4771 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4772 } \ 4773 /* set tail elements to 1s */ \ 4774 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4775 } 4776 4777 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4778 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4779 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4780 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4781 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4782 4783 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4784 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4785 CPURISCVState *env, uint32_t desc) \ 4786 { \ 4787 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4788 uint32_t vm = vext_vm(desc); \ 4789 uint32_t vl = env->vl; \ 4790 uint32_t esz = sizeof(ETYPE); \ 4791 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4792 uint32_t vta = vext_vta(desc); \ 4793 uint32_t vma = vext_vma(desc); \ 4794 target_ulong i_max, i_min, i; \ 4795 \ 4796 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 4797 i_max = MAX(i_min, env->vstart); \ 4798 for (i = env->vstart; i < i_max; ++i) { \ 4799 if (!vm && !vext_elem_mask(v0, i)) { \ 4800 /* set masked-off elements to 1s */ \ 4801 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4802 continue; \ 4803 } \ 4804 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4805 } \ 4806 \ 4807 for (i = i_max; i < vl; ++i) { \ 4808 if (vm || vext_elem_mask(v0, i)) { \ 4809 *((ETYPE *)vd + H(i)) = 0; \ 4810 } \ 4811 } \ 4812 \ 4813 env->vstart = 0; \ 4814 /* set tail elements to 1s */ \ 4815 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4816 } 4817 4818 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4819 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4820 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4821 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4822 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4823 4824 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4825 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4826 void *vs2, CPURISCVState *env, \ 4827 uint32_t desc) \ 4828 { \ 4829 typedef uint##BITWIDTH##_t ETYPE; \ 4830 uint32_t vm = vext_vm(desc); \ 4831 uint32_t vl = env->vl; \ 4832 uint32_t esz = sizeof(ETYPE); \ 4833 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4834 uint32_t vta = vext_vta(desc); \ 4835 uint32_t vma = vext_vma(desc); \ 4836 uint32_t i; \ 4837 \ 4838 for (i = env->vstart; i < vl; i++) { \ 4839 if (!vm && !vext_elem_mask(v0, i)) { \ 4840 /* set masked-off elements to 1s */ \ 4841 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4842 continue; \ 4843 } \ 4844 if (i == 0) { \ 4845 *((ETYPE *)vd + H(i)) = s1; \ 4846 } else { \ 4847 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4848 } \ 4849 } \ 4850 env->vstart = 0; \ 4851 /* set tail elements to 1s */ \ 4852 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4853 } 4854 4855 GEN_VEXT_VSLIE1UP(8, H1) 4856 GEN_VEXT_VSLIE1UP(16, H2) 4857 GEN_VEXT_VSLIE1UP(32, H4) 4858 GEN_VEXT_VSLIE1UP(64, H8) 4859 4860 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4861 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4862 CPURISCVState *env, uint32_t desc) \ 4863 { \ 4864 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4865 } 4866 4867 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4868 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4869 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4870 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4871 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4872 4873 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4874 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4875 void *vs2, CPURISCVState *env, \ 4876 uint32_t desc) \ 4877 { \ 4878 typedef uint##BITWIDTH##_t ETYPE; \ 4879 uint32_t vm = vext_vm(desc); \ 4880 uint32_t vl = env->vl; \ 4881 uint32_t esz = sizeof(ETYPE); \ 4882 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4883 uint32_t vta = vext_vta(desc); \ 4884 uint32_t vma = vext_vma(desc); \ 4885 uint32_t i; \ 4886 \ 4887 for (i = env->vstart; i < vl; i++) { \ 4888 if (!vm && !vext_elem_mask(v0, i)) { \ 4889 /* set masked-off elements to 1s */ \ 4890 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4891 continue; \ 4892 } \ 4893 if (i == vl - 1) { \ 4894 *((ETYPE *)vd + H(i)) = s1; \ 4895 } else { \ 4896 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4897 } \ 4898 } \ 4899 env->vstart = 0; \ 4900 /* set tail elements to 1s */ \ 4901 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4902 } 4903 4904 GEN_VEXT_VSLIDE1DOWN(8, H1) 4905 GEN_VEXT_VSLIDE1DOWN(16, H2) 4906 GEN_VEXT_VSLIDE1DOWN(32, H4) 4907 GEN_VEXT_VSLIDE1DOWN(64, H8) 4908 4909 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4910 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4911 CPURISCVState *env, uint32_t desc) \ 4912 { \ 4913 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4914 } 4915 4916 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4917 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4918 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4919 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4920 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4921 4922 /* Vector Floating-Point Slide Instructions */ 4923 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4924 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4925 CPURISCVState *env, uint32_t desc) \ 4926 { \ 4927 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4928 } 4929 4930 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4931 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4932 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4933 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4934 4935 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4936 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4937 CPURISCVState *env, uint32_t desc) \ 4938 { \ 4939 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4940 } 4941 4942 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4943 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4944 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4945 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4946 4947 /* Vector Register Gather Instruction */ 4948 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4949 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4950 CPURISCVState *env, uint32_t desc) \ 4951 { \ 4952 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4953 uint32_t vm = vext_vm(desc); \ 4954 uint32_t vl = env->vl; \ 4955 uint32_t esz = sizeof(TS2); \ 4956 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4957 uint32_t vta = vext_vta(desc); \ 4958 uint32_t vma = vext_vma(desc); \ 4959 uint64_t index; \ 4960 uint32_t i; \ 4961 \ 4962 for (i = env->vstart; i < vl; i++) { \ 4963 if (!vm && !vext_elem_mask(v0, i)) { \ 4964 /* set masked-off elements to 1s */ \ 4965 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4966 continue; \ 4967 } \ 4968 index = *((TS1 *)vs1 + HS1(i)); \ 4969 if (index >= vlmax) { \ 4970 *((TS2 *)vd + HS2(i)) = 0; \ 4971 } else { \ 4972 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4973 } \ 4974 } \ 4975 env->vstart = 0; \ 4976 /* set tail elements to 1s */ \ 4977 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4978 } 4979 4980 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4981 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4982 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4983 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4984 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4985 4986 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4987 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4988 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4989 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4990 4991 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4992 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4993 CPURISCVState *env, uint32_t desc) \ 4994 { \ 4995 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4996 uint32_t vm = vext_vm(desc); \ 4997 uint32_t vl = env->vl; \ 4998 uint32_t esz = sizeof(ETYPE); \ 4999 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5000 uint32_t vta = vext_vta(desc); \ 5001 uint32_t vma = vext_vma(desc); \ 5002 uint64_t index = s1; \ 5003 uint32_t i; \ 5004 \ 5005 for (i = env->vstart; i < vl; i++) { \ 5006 if (!vm && !vext_elem_mask(v0, i)) { \ 5007 /* set masked-off elements to 1s */ \ 5008 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5009 continue; \ 5010 } \ 5011 if (index >= vlmax) { \ 5012 *((ETYPE *)vd + H(i)) = 0; \ 5013 } else { \ 5014 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5015 } \ 5016 } \ 5017 env->vstart = 0; \ 5018 /* set tail elements to 1s */ \ 5019 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5020 } 5021 5022 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5023 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5024 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5025 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5026 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5027 5028 /* Vector Compress Instruction */ 5029 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5030 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5031 CPURISCVState *env, uint32_t desc) \ 5032 { \ 5033 uint32_t vl = env->vl; \ 5034 uint32_t esz = sizeof(ETYPE); \ 5035 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5036 uint32_t vta = vext_vta(desc); \ 5037 uint32_t num = 0, i; \ 5038 \ 5039 for (i = env->vstart; i < vl; i++) { \ 5040 if (!vext_elem_mask(vs1, i)) { \ 5041 continue; \ 5042 } \ 5043 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5044 num++; \ 5045 } \ 5046 env->vstart = 0; \ 5047 /* set tail elements to 1s */ \ 5048 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5049 } 5050 5051 /* Compress into vd elements of vs2 where vs1 is enabled */ 5052 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5053 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5054 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5055 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5056 5057 /* Vector Whole Register Move */ 5058 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5059 { 5060 /* EEW = SEW */ 5061 uint32_t maxsz = simd_maxsz(desc); 5062 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5063 uint32_t startb = env->vstart * sewb; 5064 uint32_t i = startb; 5065 5066 memcpy((uint8_t *)vd + H1(i), 5067 (uint8_t *)vs2 + H1(i), 5068 maxsz - startb); 5069 5070 env->vstart = 0; 5071 } 5072 5073 /* Vector Integer Extension */ 5074 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5075 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5076 CPURISCVState *env, uint32_t desc) \ 5077 { \ 5078 uint32_t vl = env->vl; \ 5079 uint32_t vm = vext_vm(desc); \ 5080 uint32_t esz = sizeof(ETYPE); \ 5081 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5082 uint32_t vta = vext_vta(desc); \ 5083 uint32_t vma = vext_vma(desc); \ 5084 uint32_t i; \ 5085 \ 5086 for (i = env->vstart; i < vl; i++) { \ 5087 if (!vm && !vext_elem_mask(v0, i)) { \ 5088 /* set masked-off elements to 1s */ \ 5089 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5090 continue; \ 5091 } \ 5092 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5093 } \ 5094 env->vstart = 0; \ 5095 /* set tail elements to 1s */ \ 5096 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5097 } 5098 5099 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5100 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5101 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5102 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5103 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5104 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5105 5106 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5107 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5108 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5109 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5110 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5111 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5112