1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "qemu/host-utils.h" 21 #include "qemu/bitops.h" 22 #include "cpu.h" 23 #include "exec/memop.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "tcg/tcg-gvec-desc.h" 29 #include "internals.h" 30 #include "vector_internals.h" 31 #include <math.h> 32 33 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 34 target_ulong s2) 35 { 36 int vlmax, vl; 37 RISCVCPU *cpu = env_archcpu(env); 38 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 39 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 40 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 41 int xlen = riscv_cpu_xlen(env); 42 bool vill = (s2 >> (xlen - 1)) & 0x1; 43 target_ulong reserved = s2 & 44 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT, 45 xlen - 1 - R_VTYPE_RESERVED_SHIFT); 46 47 if (lmul & 4) { 48 /* Fractional LMUL - check LMUL * VLEN >= SEW */ 49 if (lmul == 4 || 50 cpu->cfg.vlen >> (8 - lmul) < sew) { 51 vill = true; 52 } 53 } 54 55 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 56 /* only set vill bit. */ 57 env->vill = 1; 58 env->vtype = 0; 59 env->vl = 0; 60 env->vstart = 0; 61 return 0; 62 } 63 64 vlmax = vext_get_vlmax(cpu, s2); 65 if (s1 <= vlmax) { 66 vl = s1; 67 } else { 68 vl = vlmax; 69 } 70 env->vl = vl; 71 env->vtype = s2; 72 env->vstart = 0; 73 env->vill = 0; 74 return vl; 75 } 76 77 /* 78 * Get the maximum number of elements can be operated. 79 * 80 * log2_esz: log2 of element size in bytes. 81 */ 82 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) 83 { 84 /* 85 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits. 86 * so vlen in bytes (vlenb) is encoded as maxsz. 87 */ 88 uint32_t vlenb = simd_maxsz(desc); 89 90 /* Return VLMAX */ 91 int scale = vext_lmul(desc) - log2_esz; 92 return scale < 0 ? vlenb >> -scale : vlenb << scale; 93 } 94 95 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) 96 { 97 return (addr & ~env->cur_pmmask) | env->cur_pmbase; 98 } 99 100 /* 101 * This function checks watchpoint before real load operation. 102 * 103 * In system mode, the TLB API probe_access is enough for watchpoint check. 104 * In user mode, there is no watchpoint support now. 105 * 106 * It will trigger an exception if there is no mapping in TLB 107 * and page table walk can't fill the TLB entry. Then the guest 108 * software can return here after process the exception or never return. 109 */ 110 static void probe_pages(CPURISCVState *env, target_ulong addr, 111 target_ulong len, uintptr_t ra, 112 MMUAccessType access_type) 113 { 114 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 115 target_ulong curlen = MIN(pagelen, len); 116 int mmu_index = riscv_env_mmu_index(env, false); 117 118 probe_access(env, adjust_addr(env, addr), curlen, access_type, 119 mmu_index, ra); 120 if (len > curlen) { 121 addr += curlen; 122 curlen = len - curlen; 123 probe_access(env, adjust_addr(env, addr), curlen, access_type, 124 mmu_index, ra); 125 } 126 } 127 128 static inline void vext_set_elem_mask(void *v0, int index, 129 uint8_t value) 130 { 131 int idx = index / 64; 132 int pos = index % 64; 133 uint64_t old = ((uint64_t *)v0)[idx]; 134 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 135 } 136 137 /* elements operations for load and store */ 138 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, 139 uint32_t idx, void *vd, uintptr_t retaddr); 140 141 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 142 static void NAME(CPURISCVState *env, abi_ptr addr, \ 143 uint32_t idx, void *vd, uintptr_t retaddr)\ 144 { \ 145 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 146 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 147 } \ 148 149 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 150 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 151 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 152 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 153 154 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 155 static void NAME(CPURISCVState *env, abi_ptr addr, \ 156 uint32_t idx, void *vd, uintptr_t retaddr)\ 157 { \ 158 ETYPE data = *((ETYPE *)vd + H(idx)); \ 159 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 160 } 161 162 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 163 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 164 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 165 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 166 167 static void vext_set_tail_elems_1s(target_ulong vl, void *vd, 168 uint32_t desc, uint32_t nf, 169 uint32_t esz, uint32_t max_elems) 170 { 171 uint32_t vta = vext_vta(desc); 172 int k; 173 174 if (vta == 0) { 175 return; 176 } 177 178 for (k = 0; k < nf; ++k) { 179 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz, 180 (k * max_elems + max_elems) * esz); 181 } 182 } 183 184 /* 185 * stride: access vector element from strided memory 186 */ 187 static void 188 vext_ldst_stride(void *vd, void *v0, target_ulong base, 189 target_ulong stride, CPURISCVState *env, 190 uint32_t desc, uint32_t vm, 191 vext_ldst_elem_fn *ldst_elem, 192 uint32_t log2_esz, uintptr_t ra) 193 { 194 uint32_t i, k; 195 uint32_t nf = vext_nf(desc); 196 uint32_t max_elems = vext_max_elems(desc, log2_esz); 197 uint32_t esz = 1 << log2_esz; 198 uint32_t vma = vext_vma(desc); 199 200 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 201 k = 0; 202 while (k < nf) { 203 if (!vm && !vext_elem_mask(v0, i)) { 204 /* set masked-off elements to 1s */ 205 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 206 (i + k * max_elems + 1) * esz); 207 k++; 208 continue; 209 } 210 target_ulong addr = base + stride * i + (k << log2_esz); 211 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 212 k++; 213 } 214 } 215 env->vstart = 0; 216 217 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 218 } 219 220 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 221 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 222 target_ulong stride, CPURISCVState *env, \ 223 uint32_t desc) \ 224 { \ 225 uint32_t vm = vext_vm(desc); \ 226 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 227 ctzl(sizeof(ETYPE)), GETPC()); \ 228 } 229 230 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 231 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 232 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 233 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 234 235 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 236 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 237 target_ulong stride, CPURISCVState *env, \ 238 uint32_t desc) \ 239 { \ 240 uint32_t vm = vext_vm(desc); \ 241 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 242 ctzl(sizeof(ETYPE)), GETPC()); \ 243 } 244 245 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 246 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 247 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 248 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 249 250 /* 251 * unit-stride: access elements stored contiguously in memory 252 */ 253 254 /* unmasked unit-stride load and store operation */ 255 static void 256 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 257 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl, 258 uintptr_t ra) 259 { 260 uint32_t i, k; 261 uint32_t nf = vext_nf(desc); 262 uint32_t max_elems = vext_max_elems(desc, log2_esz); 263 uint32_t esz = 1 << log2_esz; 264 265 /* load bytes from guest memory */ 266 for (i = env->vstart; i < evl; i++, env->vstart++) { 267 k = 0; 268 while (k < nf) { 269 target_ulong addr = base + ((i * nf + k) << log2_esz); 270 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 271 k++; 272 } 273 } 274 env->vstart = 0; 275 276 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems); 277 } 278 279 /* 280 * masked unit-stride load and store operation will be a special case of 281 * stride, stride = NF * sizeof (ETYPE) 282 */ 283 284 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 285 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 286 CPURISCVState *env, uint32_t desc) \ 287 { \ 288 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 289 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 290 ctzl(sizeof(ETYPE)), GETPC()); \ 291 } \ 292 \ 293 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 294 CPURISCVState *env, uint32_t desc) \ 295 { \ 296 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 297 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 298 } 299 300 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 301 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 302 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 303 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 304 305 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 306 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 307 CPURISCVState *env, uint32_t desc) \ 308 { \ 309 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \ 310 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 311 ctzl(sizeof(ETYPE)), GETPC()); \ 312 } \ 313 \ 314 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 315 CPURISCVState *env, uint32_t desc) \ 316 { \ 317 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 318 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \ 319 } 320 321 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 322 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 323 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 324 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 325 326 /* 327 * unit stride mask load and store, EEW = 1 328 */ 329 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base, 330 CPURISCVState *env, uint32_t desc) 331 { 332 /* evl = ceil(vl/8) */ 333 uint8_t evl = (env->vl + 7) >> 3; 334 vext_ldst_us(vd, base, env, desc, lde_b, 335 0, evl, GETPC()); 336 } 337 338 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base, 339 CPURISCVState *env, uint32_t desc) 340 { 341 /* evl = ceil(vl/8) */ 342 uint8_t evl = (env->vl + 7) >> 3; 343 vext_ldst_us(vd, base, env, desc, ste_b, 344 0, evl, GETPC()); 345 } 346 347 /* 348 * index: access vector element from indexed memory 349 */ 350 typedef target_ulong vext_get_index_addr(target_ulong base, 351 uint32_t idx, void *vs2); 352 353 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 354 static target_ulong NAME(target_ulong base, \ 355 uint32_t idx, void *vs2) \ 356 { \ 357 return (base + *((ETYPE *)vs2 + H(idx))); \ 358 } 359 360 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 361 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 362 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 363 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 364 365 static inline void 366 vext_ldst_index(void *vd, void *v0, target_ulong base, 367 void *vs2, CPURISCVState *env, uint32_t desc, 368 vext_get_index_addr get_index_addr, 369 vext_ldst_elem_fn *ldst_elem, 370 uint32_t log2_esz, uintptr_t ra) 371 { 372 uint32_t i, k; 373 uint32_t nf = vext_nf(desc); 374 uint32_t vm = vext_vm(desc); 375 uint32_t max_elems = vext_max_elems(desc, log2_esz); 376 uint32_t esz = 1 << log2_esz; 377 uint32_t vma = vext_vma(desc); 378 379 /* load bytes from guest memory */ 380 for (i = env->vstart; i < env->vl; i++, env->vstart++) { 381 k = 0; 382 while (k < nf) { 383 if (!vm && !vext_elem_mask(v0, i)) { 384 /* set masked-off elements to 1s */ 385 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 386 (i + k * max_elems + 1) * esz); 387 k++; 388 continue; 389 } 390 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz); 391 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 392 k++; 393 } 394 } 395 env->vstart = 0; 396 397 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 398 } 399 400 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 401 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 402 void *vs2, CPURISCVState *env, uint32_t desc) \ 403 { \ 404 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 405 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \ 406 } 407 408 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 409 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 410 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 411 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 412 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 413 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 414 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 415 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 416 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 417 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 418 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 419 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 420 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 421 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 422 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 423 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 424 425 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 426 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 427 void *vs2, CPURISCVState *env, uint32_t desc) \ 428 { \ 429 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 430 STORE_FN, ctzl(sizeof(ETYPE)), \ 431 GETPC()); \ 432 } 433 434 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 435 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 436 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 437 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 438 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 439 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 440 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 441 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 442 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 443 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 444 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 445 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 446 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 447 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 448 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 449 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 450 451 /* 452 * unit-stride fault-only-fisrt load instructions 453 */ 454 static inline void 455 vext_ldff(void *vd, void *v0, target_ulong base, 456 CPURISCVState *env, uint32_t desc, 457 vext_ldst_elem_fn *ldst_elem, 458 uint32_t log2_esz, uintptr_t ra) 459 { 460 void *host; 461 uint32_t i, k, vl = 0; 462 uint32_t nf = vext_nf(desc); 463 uint32_t vm = vext_vm(desc); 464 uint32_t max_elems = vext_max_elems(desc, log2_esz); 465 uint32_t esz = 1 << log2_esz; 466 uint32_t vma = vext_vma(desc); 467 target_ulong addr, offset, remain; 468 int mmu_index = riscv_env_mmu_index(env, false); 469 470 /* probe every access */ 471 for (i = env->vstart; i < env->vl; i++) { 472 if (!vm && !vext_elem_mask(v0, i)) { 473 continue; 474 } 475 addr = adjust_addr(env, base + i * (nf << log2_esz)); 476 if (i == 0) { 477 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD); 478 } else { 479 /* if it triggers an exception, no need to check watchpoint */ 480 remain = nf << log2_esz; 481 while (remain > 0) { 482 offset = -(addr | TARGET_PAGE_MASK); 483 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index); 484 if (host) { 485 #ifdef CONFIG_USER_ONLY 486 if (!page_check_range(addr, offset, PAGE_READ)) { 487 vl = i; 488 goto ProbeSuccess; 489 } 490 #else 491 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD); 492 #endif 493 } else { 494 vl = i; 495 goto ProbeSuccess; 496 } 497 if (remain <= offset) { 498 break; 499 } 500 remain -= offset; 501 addr = adjust_addr(env, addr + offset); 502 } 503 } 504 } 505 ProbeSuccess: 506 /* load bytes from guest memory */ 507 if (vl != 0) { 508 env->vl = vl; 509 } 510 for (i = env->vstart; i < env->vl; i++) { 511 k = 0; 512 while (k < nf) { 513 if (!vm && !vext_elem_mask(v0, i)) { 514 /* set masked-off elements to 1s */ 515 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz, 516 (i + k * max_elems + 1) * esz); 517 k++; 518 continue; 519 } 520 addr = base + ((i * nf + k) << log2_esz); 521 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 522 k++; 523 } 524 } 525 env->vstart = 0; 526 527 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems); 528 } 529 530 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \ 531 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 532 CPURISCVState *env, uint32_t desc) \ 533 { \ 534 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 535 ctzl(sizeof(ETYPE)), GETPC()); \ 536 } 537 538 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b) 539 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h) 540 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w) 541 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d) 542 543 #define DO_SWAP(N, M) (M) 544 #define DO_AND(N, M) (N & M) 545 #define DO_XOR(N, M) (N ^ M) 546 #define DO_OR(N, M) (N | M) 547 #define DO_ADD(N, M) (N + M) 548 549 /* Signed min/max */ 550 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 551 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 552 553 /* 554 * load and store whole register instructions 555 */ 556 static void 557 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 558 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra) 559 { 560 uint32_t i, k, off, pos; 561 uint32_t nf = vext_nf(desc); 562 uint32_t vlenb = riscv_cpu_cfg(env)->vlen >> 3; 563 uint32_t max_elems = vlenb >> log2_esz; 564 565 k = env->vstart / max_elems; 566 off = env->vstart % max_elems; 567 568 if (off) { 569 /* load/store rest of elements of current segment pointed by vstart */ 570 for (pos = off; pos < max_elems; pos++, env->vstart++) { 571 target_ulong addr = base + ((pos + k * max_elems) << log2_esz); 572 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, 573 ra); 574 } 575 k++; 576 } 577 578 /* load/store elements for rest of segments */ 579 for (; k < nf; k++) { 580 for (i = 0; i < max_elems; i++, env->vstart++) { 581 target_ulong addr = base + ((i + k * max_elems) << log2_esz); 582 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra); 583 } 584 } 585 586 env->vstart = 0; 587 } 588 589 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \ 590 void HELPER(NAME)(void *vd, target_ulong base, \ 591 CPURISCVState *env, uint32_t desc) \ 592 { \ 593 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \ 594 ctzl(sizeof(ETYPE)), GETPC()); \ 595 } 596 597 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b) 598 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h) 599 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w) 600 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d) 601 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b) 602 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h) 603 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w) 604 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d) 605 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b) 606 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h) 607 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w) 608 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d) 609 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b) 610 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h) 611 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w) 612 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d) 613 614 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \ 615 void HELPER(NAME)(void *vd, target_ulong base, \ 616 CPURISCVState *env, uint32_t desc) \ 617 { \ 618 vext_ldst_whole(vd, base, env, desc, STORE_FN, \ 619 ctzl(sizeof(ETYPE)), GETPC()); \ 620 } 621 622 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b) 623 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b) 624 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b) 625 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) 626 627 /* 628 * Vector Integer Arithmetic Instructions 629 */ 630 631 /* (TD, T1, T2, TX1, TX2) */ 632 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 633 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 634 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 635 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 636 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 637 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 638 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 639 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 640 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 641 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 642 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 643 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 644 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 645 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 646 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 647 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 648 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 649 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 650 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 651 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 652 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 653 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 654 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 655 656 #define DO_SUB(N, M) (N - M) 657 #define DO_RSUB(N, M) (M - N) 658 659 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 660 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 661 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 662 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 663 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 664 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 665 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 666 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 667 668 GEN_VEXT_VV(vadd_vv_b, 1) 669 GEN_VEXT_VV(vadd_vv_h, 2) 670 GEN_VEXT_VV(vadd_vv_w, 4) 671 GEN_VEXT_VV(vadd_vv_d, 8) 672 GEN_VEXT_VV(vsub_vv_b, 1) 673 GEN_VEXT_VV(vsub_vv_h, 2) 674 GEN_VEXT_VV(vsub_vv_w, 4) 675 GEN_VEXT_VV(vsub_vv_d, 8) 676 677 678 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 679 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 680 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 681 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 682 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 683 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 684 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 685 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 686 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 687 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 688 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 689 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 690 691 GEN_VEXT_VX(vadd_vx_b, 1) 692 GEN_VEXT_VX(vadd_vx_h, 2) 693 GEN_VEXT_VX(vadd_vx_w, 4) 694 GEN_VEXT_VX(vadd_vx_d, 8) 695 GEN_VEXT_VX(vsub_vx_b, 1) 696 GEN_VEXT_VX(vsub_vx_h, 2) 697 GEN_VEXT_VX(vsub_vx_w, 4) 698 GEN_VEXT_VX(vsub_vx_d, 8) 699 GEN_VEXT_VX(vrsub_vx_b, 1) 700 GEN_VEXT_VX(vrsub_vx_h, 2) 701 GEN_VEXT_VX(vrsub_vx_w, 4) 702 GEN_VEXT_VX(vrsub_vx_d, 8) 703 704 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 705 { 706 intptr_t oprsz = simd_oprsz(desc); 707 intptr_t i; 708 709 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 710 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 711 } 712 } 713 714 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 715 { 716 intptr_t oprsz = simd_oprsz(desc); 717 intptr_t i; 718 719 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 720 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 721 } 722 } 723 724 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 725 { 726 intptr_t oprsz = simd_oprsz(desc); 727 intptr_t i; 728 729 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 730 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 731 } 732 } 733 734 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 735 { 736 intptr_t oprsz = simd_oprsz(desc); 737 intptr_t i; 738 739 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 740 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 741 } 742 } 743 744 /* Vector Widening Integer Add/Subtract */ 745 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 746 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 747 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 748 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 749 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 750 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 751 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 752 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 753 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 754 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 755 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 756 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 757 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 758 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 759 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 760 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 761 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 762 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 763 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 764 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 765 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 766 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 767 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 768 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 769 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 770 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 771 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 772 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 773 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 774 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 775 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 776 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 777 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 778 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 779 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 780 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 781 GEN_VEXT_VV(vwaddu_vv_b, 2) 782 GEN_VEXT_VV(vwaddu_vv_h, 4) 783 GEN_VEXT_VV(vwaddu_vv_w, 8) 784 GEN_VEXT_VV(vwsubu_vv_b, 2) 785 GEN_VEXT_VV(vwsubu_vv_h, 4) 786 GEN_VEXT_VV(vwsubu_vv_w, 8) 787 GEN_VEXT_VV(vwadd_vv_b, 2) 788 GEN_VEXT_VV(vwadd_vv_h, 4) 789 GEN_VEXT_VV(vwadd_vv_w, 8) 790 GEN_VEXT_VV(vwsub_vv_b, 2) 791 GEN_VEXT_VV(vwsub_vv_h, 4) 792 GEN_VEXT_VV(vwsub_vv_w, 8) 793 GEN_VEXT_VV(vwaddu_wv_b, 2) 794 GEN_VEXT_VV(vwaddu_wv_h, 4) 795 GEN_VEXT_VV(vwaddu_wv_w, 8) 796 GEN_VEXT_VV(vwsubu_wv_b, 2) 797 GEN_VEXT_VV(vwsubu_wv_h, 4) 798 GEN_VEXT_VV(vwsubu_wv_w, 8) 799 GEN_VEXT_VV(vwadd_wv_b, 2) 800 GEN_VEXT_VV(vwadd_wv_h, 4) 801 GEN_VEXT_VV(vwadd_wv_w, 8) 802 GEN_VEXT_VV(vwsub_wv_b, 2) 803 GEN_VEXT_VV(vwsub_wv_h, 4) 804 GEN_VEXT_VV(vwsub_wv_w, 8) 805 806 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 807 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 808 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 809 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 810 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 811 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 812 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 813 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 814 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 815 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 816 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 817 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 818 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 819 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 820 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 821 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 822 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 823 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 824 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 825 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 826 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 827 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 828 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 829 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 830 GEN_VEXT_VX(vwaddu_vx_b, 2) 831 GEN_VEXT_VX(vwaddu_vx_h, 4) 832 GEN_VEXT_VX(vwaddu_vx_w, 8) 833 GEN_VEXT_VX(vwsubu_vx_b, 2) 834 GEN_VEXT_VX(vwsubu_vx_h, 4) 835 GEN_VEXT_VX(vwsubu_vx_w, 8) 836 GEN_VEXT_VX(vwadd_vx_b, 2) 837 GEN_VEXT_VX(vwadd_vx_h, 4) 838 GEN_VEXT_VX(vwadd_vx_w, 8) 839 GEN_VEXT_VX(vwsub_vx_b, 2) 840 GEN_VEXT_VX(vwsub_vx_h, 4) 841 GEN_VEXT_VX(vwsub_vx_w, 8) 842 GEN_VEXT_VX(vwaddu_wx_b, 2) 843 GEN_VEXT_VX(vwaddu_wx_h, 4) 844 GEN_VEXT_VX(vwaddu_wx_w, 8) 845 GEN_VEXT_VX(vwsubu_wx_b, 2) 846 GEN_VEXT_VX(vwsubu_wx_h, 4) 847 GEN_VEXT_VX(vwsubu_wx_w, 8) 848 GEN_VEXT_VX(vwadd_wx_b, 2) 849 GEN_VEXT_VX(vwadd_wx_h, 4) 850 GEN_VEXT_VX(vwadd_wx_w, 8) 851 GEN_VEXT_VX(vwsub_wx_b, 2) 852 GEN_VEXT_VX(vwsub_wx_h, 4) 853 GEN_VEXT_VX(vwsub_wx_w, 8) 854 855 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 856 #define DO_VADC(N, M, C) (N + M + C) 857 #define DO_VSBC(N, M, C) (N - M - C) 858 859 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 860 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 861 CPURISCVState *env, uint32_t desc) \ 862 { \ 863 uint32_t vl = env->vl; \ 864 uint32_t esz = sizeof(ETYPE); \ 865 uint32_t total_elems = \ 866 vext_get_total_elems(env, desc, esz); \ 867 uint32_t vta = vext_vta(desc); \ 868 uint32_t i; \ 869 \ 870 for (i = env->vstart; i < vl; i++) { \ 871 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 872 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 873 ETYPE carry = vext_elem_mask(v0, i); \ 874 \ 875 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 876 } \ 877 env->vstart = 0; \ 878 /* set tail elements to 1s */ \ 879 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 880 } 881 882 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 883 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 884 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 885 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 886 887 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 888 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 889 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 890 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 891 892 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 893 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 894 CPURISCVState *env, uint32_t desc) \ 895 { \ 896 uint32_t vl = env->vl; \ 897 uint32_t esz = sizeof(ETYPE); \ 898 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 899 uint32_t vta = vext_vta(desc); \ 900 uint32_t i; \ 901 \ 902 for (i = env->vstart; i < vl; i++) { \ 903 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 904 ETYPE carry = vext_elem_mask(v0, i); \ 905 \ 906 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 907 } \ 908 env->vstart = 0; \ 909 /* set tail elements to 1s */ \ 910 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 911 } 912 913 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 914 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 915 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 916 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 917 918 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 919 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 920 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 921 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 922 923 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 924 (__typeof(N))(N + M) < N) 925 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 926 927 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 928 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 929 CPURISCVState *env, uint32_t desc) \ 930 { \ 931 uint32_t vl = env->vl; \ 932 uint32_t vm = vext_vm(desc); \ 933 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 934 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 935 uint32_t i; \ 936 \ 937 for (i = env->vstart; i < vl; i++) { \ 938 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 939 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 940 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 941 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 942 } \ 943 env->vstart = 0; \ 944 /* 945 * mask destination register are always tail-agnostic 946 * set tail elements to 1s 947 */ \ 948 if (vta_all_1s) { \ 949 for (; i < total_elems; i++) { \ 950 vext_set_elem_mask(vd, i, 1); \ 951 } \ 952 } \ 953 } 954 955 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 956 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 957 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 958 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 959 960 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 961 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 962 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 963 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 964 965 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 966 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 967 void *vs2, CPURISCVState *env, uint32_t desc) \ 968 { \ 969 uint32_t vl = env->vl; \ 970 uint32_t vm = vext_vm(desc); \ 971 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 972 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 973 uint32_t i; \ 974 \ 975 for (i = env->vstart; i < vl; i++) { \ 976 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 977 ETYPE carry = !vm && vext_elem_mask(v0, i); \ 978 vext_set_elem_mask(vd, i, \ 979 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 980 } \ 981 env->vstart = 0; \ 982 /* 983 * mask destination register are always tail-agnostic 984 * set tail elements to 1s 985 */ \ 986 if (vta_all_1s) { \ 987 for (; i < total_elems; i++) { \ 988 vext_set_elem_mask(vd, i, 1); \ 989 } \ 990 } \ 991 } 992 993 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 994 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 995 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 996 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 997 998 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 999 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1000 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1001 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1002 1003 /* Vector Bitwise Logical Instructions */ 1004 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1005 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1006 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1007 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1008 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1009 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1010 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1011 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1012 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1013 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1014 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1015 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1016 GEN_VEXT_VV(vand_vv_b, 1) 1017 GEN_VEXT_VV(vand_vv_h, 2) 1018 GEN_VEXT_VV(vand_vv_w, 4) 1019 GEN_VEXT_VV(vand_vv_d, 8) 1020 GEN_VEXT_VV(vor_vv_b, 1) 1021 GEN_VEXT_VV(vor_vv_h, 2) 1022 GEN_VEXT_VV(vor_vv_w, 4) 1023 GEN_VEXT_VV(vor_vv_d, 8) 1024 GEN_VEXT_VV(vxor_vv_b, 1) 1025 GEN_VEXT_VV(vxor_vv_h, 2) 1026 GEN_VEXT_VV(vxor_vv_w, 4) 1027 GEN_VEXT_VV(vxor_vv_d, 8) 1028 1029 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1030 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1031 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1032 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1033 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1034 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1035 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1036 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1037 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1038 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1039 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1040 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1041 GEN_VEXT_VX(vand_vx_b, 1) 1042 GEN_VEXT_VX(vand_vx_h, 2) 1043 GEN_VEXT_VX(vand_vx_w, 4) 1044 GEN_VEXT_VX(vand_vx_d, 8) 1045 GEN_VEXT_VX(vor_vx_b, 1) 1046 GEN_VEXT_VX(vor_vx_h, 2) 1047 GEN_VEXT_VX(vor_vx_w, 4) 1048 GEN_VEXT_VX(vor_vx_d, 8) 1049 GEN_VEXT_VX(vxor_vx_b, 1) 1050 GEN_VEXT_VX(vxor_vx_h, 2) 1051 GEN_VEXT_VX(vxor_vx_w, 4) 1052 GEN_VEXT_VX(vxor_vx_d, 8) 1053 1054 /* Vector Single-Width Bit Shift Instructions */ 1055 #define DO_SLL(N, M) (N << (M)) 1056 #define DO_SRL(N, M) (N >> (M)) 1057 1058 /* generate the helpers for shift instructions with two vector operators */ 1059 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1060 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1061 void *vs2, CPURISCVState *env, uint32_t desc) \ 1062 { \ 1063 uint32_t vm = vext_vm(desc); \ 1064 uint32_t vl = env->vl; \ 1065 uint32_t esz = sizeof(TS1); \ 1066 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1067 uint32_t vta = vext_vta(desc); \ 1068 uint32_t vma = vext_vma(desc); \ 1069 uint32_t i; \ 1070 \ 1071 for (i = env->vstart; i < vl; i++) { \ 1072 if (!vm && !vext_elem_mask(v0, i)) { \ 1073 /* set masked-off elements to 1s */ \ 1074 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 1075 continue; \ 1076 } \ 1077 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1078 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1079 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1080 } \ 1081 env->vstart = 0; \ 1082 /* set tail elements to 1s */ \ 1083 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1084 } 1085 1086 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1087 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1088 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1089 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1090 1091 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1092 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1093 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1094 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1095 1096 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1097 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1098 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1099 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1100 1101 /* 1102 * generate the helpers for shift instructions with one vector and one scalar 1103 */ 1104 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1105 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1106 void *vs2, CPURISCVState *env, \ 1107 uint32_t desc) \ 1108 { \ 1109 uint32_t vm = vext_vm(desc); \ 1110 uint32_t vl = env->vl; \ 1111 uint32_t esz = sizeof(TD); \ 1112 uint32_t total_elems = \ 1113 vext_get_total_elems(env, desc, esz); \ 1114 uint32_t vta = vext_vta(desc); \ 1115 uint32_t vma = vext_vma(desc); \ 1116 uint32_t i; \ 1117 \ 1118 for (i = env->vstart; i < vl; i++) { \ 1119 if (!vm && !vext_elem_mask(v0, i)) { \ 1120 /* set masked-off elements to 1s */ \ 1121 vext_set_elems_1s(vd, vma, i * esz, \ 1122 (i + 1) * esz); \ 1123 continue; \ 1124 } \ 1125 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1126 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1127 } \ 1128 env->vstart = 0; \ 1129 /* set tail elements to 1s */ \ 1130 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\ 1131 } 1132 1133 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1134 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1135 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1136 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1137 1138 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1139 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1140 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1141 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1142 1143 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1144 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1145 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1146 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1147 1148 /* Vector Narrowing Integer Right Shift Instructions */ 1149 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1150 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1151 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1152 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1153 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1154 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1155 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1156 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1157 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1158 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1159 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1160 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1161 1162 /* Vector Integer Comparison Instructions */ 1163 #define DO_MSEQ(N, M) (N == M) 1164 #define DO_MSNE(N, M) (N != M) 1165 #define DO_MSLT(N, M) (N < M) 1166 #define DO_MSLE(N, M) (N <= M) 1167 #define DO_MSGT(N, M) (N > M) 1168 1169 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1170 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1171 CPURISCVState *env, uint32_t desc) \ 1172 { \ 1173 uint32_t vm = vext_vm(desc); \ 1174 uint32_t vl = env->vl; \ 1175 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1176 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1177 uint32_t vma = vext_vma(desc); \ 1178 uint32_t i; \ 1179 \ 1180 for (i = env->vstart; i < vl; i++) { \ 1181 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1182 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1183 if (!vm && !vext_elem_mask(v0, i)) { \ 1184 /* set masked-off elements to 1s */ \ 1185 if (vma) { \ 1186 vext_set_elem_mask(vd, i, 1); \ 1187 } \ 1188 continue; \ 1189 } \ 1190 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1191 } \ 1192 env->vstart = 0; \ 1193 /* 1194 * mask destination register are always tail-agnostic 1195 * set tail elements to 1s 1196 */ \ 1197 if (vta_all_1s) { \ 1198 for (; i < total_elems; i++) { \ 1199 vext_set_elem_mask(vd, i, 1); \ 1200 } \ 1201 } \ 1202 } 1203 1204 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1205 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1206 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1207 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1208 1209 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1210 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1211 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1212 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1213 1214 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1215 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1216 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1217 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1218 1219 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1220 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1221 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1222 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1223 1224 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1225 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1226 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1227 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1228 1229 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1230 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1231 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1232 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1233 1234 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1235 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1236 CPURISCVState *env, uint32_t desc) \ 1237 { \ 1238 uint32_t vm = vext_vm(desc); \ 1239 uint32_t vl = env->vl; \ 1240 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 1241 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 1242 uint32_t vma = vext_vma(desc); \ 1243 uint32_t i; \ 1244 \ 1245 for (i = env->vstart; i < vl; i++) { \ 1246 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1247 if (!vm && !vext_elem_mask(v0, i)) { \ 1248 /* set masked-off elements to 1s */ \ 1249 if (vma) { \ 1250 vext_set_elem_mask(vd, i, 1); \ 1251 } \ 1252 continue; \ 1253 } \ 1254 vext_set_elem_mask(vd, i, \ 1255 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1256 } \ 1257 env->vstart = 0; \ 1258 /* 1259 * mask destination register are always tail-agnostic 1260 * set tail elements to 1s 1261 */ \ 1262 if (vta_all_1s) { \ 1263 for (; i < total_elems; i++) { \ 1264 vext_set_elem_mask(vd, i, 1); \ 1265 } \ 1266 } \ 1267 } 1268 1269 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1270 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1271 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1272 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1273 1274 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1275 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1276 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1277 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1278 1279 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1280 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1281 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1282 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1283 1284 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1285 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1286 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1287 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1288 1289 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1290 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1291 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1292 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1293 1294 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1295 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1296 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1297 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1298 1299 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1300 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1301 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1302 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1303 1304 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1305 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1306 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1307 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1308 1309 /* Vector Integer Min/Max Instructions */ 1310 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1311 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1312 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1313 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1314 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1315 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1316 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1317 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1318 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1319 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1320 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1321 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1322 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1323 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1324 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1325 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1326 GEN_VEXT_VV(vminu_vv_b, 1) 1327 GEN_VEXT_VV(vminu_vv_h, 2) 1328 GEN_VEXT_VV(vminu_vv_w, 4) 1329 GEN_VEXT_VV(vminu_vv_d, 8) 1330 GEN_VEXT_VV(vmin_vv_b, 1) 1331 GEN_VEXT_VV(vmin_vv_h, 2) 1332 GEN_VEXT_VV(vmin_vv_w, 4) 1333 GEN_VEXT_VV(vmin_vv_d, 8) 1334 GEN_VEXT_VV(vmaxu_vv_b, 1) 1335 GEN_VEXT_VV(vmaxu_vv_h, 2) 1336 GEN_VEXT_VV(vmaxu_vv_w, 4) 1337 GEN_VEXT_VV(vmaxu_vv_d, 8) 1338 GEN_VEXT_VV(vmax_vv_b, 1) 1339 GEN_VEXT_VV(vmax_vv_h, 2) 1340 GEN_VEXT_VV(vmax_vv_w, 4) 1341 GEN_VEXT_VV(vmax_vv_d, 8) 1342 1343 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1344 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1345 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1346 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1347 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1348 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1349 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1350 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1351 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1352 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1353 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1354 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1355 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1356 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1357 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1358 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1359 GEN_VEXT_VX(vminu_vx_b, 1) 1360 GEN_VEXT_VX(vminu_vx_h, 2) 1361 GEN_VEXT_VX(vminu_vx_w, 4) 1362 GEN_VEXT_VX(vminu_vx_d, 8) 1363 GEN_VEXT_VX(vmin_vx_b, 1) 1364 GEN_VEXT_VX(vmin_vx_h, 2) 1365 GEN_VEXT_VX(vmin_vx_w, 4) 1366 GEN_VEXT_VX(vmin_vx_d, 8) 1367 GEN_VEXT_VX(vmaxu_vx_b, 1) 1368 GEN_VEXT_VX(vmaxu_vx_h, 2) 1369 GEN_VEXT_VX(vmaxu_vx_w, 4) 1370 GEN_VEXT_VX(vmaxu_vx_d, 8) 1371 GEN_VEXT_VX(vmax_vx_b, 1) 1372 GEN_VEXT_VX(vmax_vx_h, 2) 1373 GEN_VEXT_VX(vmax_vx_w, 4) 1374 GEN_VEXT_VX(vmax_vx_d, 8) 1375 1376 /* Vector Single-Width Integer Multiply Instructions */ 1377 #define DO_MUL(N, M) (N * M) 1378 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1379 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1380 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1381 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1382 GEN_VEXT_VV(vmul_vv_b, 1) 1383 GEN_VEXT_VV(vmul_vv_h, 2) 1384 GEN_VEXT_VV(vmul_vv_w, 4) 1385 GEN_VEXT_VV(vmul_vv_d, 8) 1386 1387 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1388 { 1389 return (int16_t)s2 * (int16_t)s1 >> 8; 1390 } 1391 1392 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1393 { 1394 return (int32_t)s2 * (int32_t)s1 >> 16; 1395 } 1396 1397 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1398 { 1399 return (int64_t)s2 * (int64_t)s1 >> 32; 1400 } 1401 1402 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1403 { 1404 uint64_t hi_64, lo_64; 1405 1406 muls64(&lo_64, &hi_64, s1, s2); 1407 return hi_64; 1408 } 1409 1410 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1411 { 1412 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1413 } 1414 1415 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1416 { 1417 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1418 } 1419 1420 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1421 { 1422 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1423 } 1424 1425 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1426 { 1427 uint64_t hi_64, lo_64; 1428 1429 mulu64(&lo_64, &hi_64, s2, s1); 1430 return hi_64; 1431 } 1432 1433 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1434 { 1435 return (int16_t)s2 * (uint16_t)s1 >> 8; 1436 } 1437 1438 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1439 { 1440 return (int32_t)s2 * (uint32_t)s1 >> 16; 1441 } 1442 1443 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1444 { 1445 return (int64_t)s2 * (uint64_t)s1 >> 32; 1446 } 1447 1448 /* 1449 * Let A = signed operand, 1450 * B = unsigned operand 1451 * P = mulu64(A, B), unsigned product 1452 * 1453 * LET X = 2 ** 64 - A, 2's complement of A 1454 * SP = signed product 1455 * THEN 1456 * IF A < 0 1457 * SP = -X * B 1458 * = -(2 ** 64 - A) * B 1459 * = A * B - 2 ** 64 * B 1460 * = P - 2 ** 64 * B 1461 * ELSE 1462 * SP = P 1463 * THEN 1464 * HI_P -= (A < 0 ? B : 0) 1465 */ 1466 1467 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1468 { 1469 uint64_t hi_64, lo_64; 1470 1471 mulu64(&lo_64, &hi_64, s2, s1); 1472 1473 hi_64 -= s2 < 0 ? s1 : 0; 1474 return hi_64; 1475 } 1476 1477 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1478 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1479 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1480 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1481 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1482 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1483 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1484 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1485 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1486 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1487 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1488 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1489 GEN_VEXT_VV(vmulh_vv_b, 1) 1490 GEN_VEXT_VV(vmulh_vv_h, 2) 1491 GEN_VEXT_VV(vmulh_vv_w, 4) 1492 GEN_VEXT_VV(vmulh_vv_d, 8) 1493 GEN_VEXT_VV(vmulhu_vv_b, 1) 1494 GEN_VEXT_VV(vmulhu_vv_h, 2) 1495 GEN_VEXT_VV(vmulhu_vv_w, 4) 1496 GEN_VEXT_VV(vmulhu_vv_d, 8) 1497 GEN_VEXT_VV(vmulhsu_vv_b, 1) 1498 GEN_VEXT_VV(vmulhsu_vv_h, 2) 1499 GEN_VEXT_VV(vmulhsu_vv_w, 4) 1500 GEN_VEXT_VV(vmulhsu_vv_d, 8) 1501 1502 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1503 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1504 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1505 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1506 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1507 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1508 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1509 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1510 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1511 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1512 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1513 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1514 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1515 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1516 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1517 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1518 GEN_VEXT_VX(vmul_vx_b, 1) 1519 GEN_VEXT_VX(vmul_vx_h, 2) 1520 GEN_VEXT_VX(vmul_vx_w, 4) 1521 GEN_VEXT_VX(vmul_vx_d, 8) 1522 GEN_VEXT_VX(vmulh_vx_b, 1) 1523 GEN_VEXT_VX(vmulh_vx_h, 2) 1524 GEN_VEXT_VX(vmulh_vx_w, 4) 1525 GEN_VEXT_VX(vmulh_vx_d, 8) 1526 GEN_VEXT_VX(vmulhu_vx_b, 1) 1527 GEN_VEXT_VX(vmulhu_vx_h, 2) 1528 GEN_VEXT_VX(vmulhu_vx_w, 4) 1529 GEN_VEXT_VX(vmulhu_vx_d, 8) 1530 GEN_VEXT_VX(vmulhsu_vx_b, 1) 1531 GEN_VEXT_VX(vmulhsu_vx_h, 2) 1532 GEN_VEXT_VX(vmulhsu_vx_w, 4) 1533 GEN_VEXT_VX(vmulhsu_vx_d, 8) 1534 1535 /* Vector Integer Divide Instructions */ 1536 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1537 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1538 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \ 1539 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1540 #define DO_REM(N, M) (unlikely(M == 0) ? N : \ 1541 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1542 1543 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1544 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1545 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1546 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1547 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1548 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1549 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1550 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1551 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1552 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1553 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1554 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1555 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1556 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1557 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1558 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1559 GEN_VEXT_VV(vdivu_vv_b, 1) 1560 GEN_VEXT_VV(vdivu_vv_h, 2) 1561 GEN_VEXT_VV(vdivu_vv_w, 4) 1562 GEN_VEXT_VV(vdivu_vv_d, 8) 1563 GEN_VEXT_VV(vdiv_vv_b, 1) 1564 GEN_VEXT_VV(vdiv_vv_h, 2) 1565 GEN_VEXT_VV(vdiv_vv_w, 4) 1566 GEN_VEXT_VV(vdiv_vv_d, 8) 1567 GEN_VEXT_VV(vremu_vv_b, 1) 1568 GEN_VEXT_VV(vremu_vv_h, 2) 1569 GEN_VEXT_VV(vremu_vv_w, 4) 1570 GEN_VEXT_VV(vremu_vv_d, 8) 1571 GEN_VEXT_VV(vrem_vv_b, 1) 1572 GEN_VEXT_VV(vrem_vv_h, 2) 1573 GEN_VEXT_VV(vrem_vv_w, 4) 1574 GEN_VEXT_VV(vrem_vv_d, 8) 1575 1576 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1577 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1578 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1579 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1580 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1581 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1582 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1583 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1584 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1585 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1586 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1587 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1588 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1589 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1590 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1591 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1592 GEN_VEXT_VX(vdivu_vx_b, 1) 1593 GEN_VEXT_VX(vdivu_vx_h, 2) 1594 GEN_VEXT_VX(vdivu_vx_w, 4) 1595 GEN_VEXT_VX(vdivu_vx_d, 8) 1596 GEN_VEXT_VX(vdiv_vx_b, 1) 1597 GEN_VEXT_VX(vdiv_vx_h, 2) 1598 GEN_VEXT_VX(vdiv_vx_w, 4) 1599 GEN_VEXT_VX(vdiv_vx_d, 8) 1600 GEN_VEXT_VX(vremu_vx_b, 1) 1601 GEN_VEXT_VX(vremu_vx_h, 2) 1602 GEN_VEXT_VX(vremu_vx_w, 4) 1603 GEN_VEXT_VX(vremu_vx_d, 8) 1604 GEN_VEXT_VX(vrem_vx_b, 1) 1605 GEN_VEXT_VX(vrem_vx_h, 2) 1606 GEN_VEXT_VX(vrem_vx_w, 4) 1607 GEN_VEXT_VX(vrem_vx_d, 8) 1608 1609 /* Vector Widening Integer Multiply Instructions */ 1610 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1611 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1612 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1613 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1614 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1615 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1616 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1617 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1618 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1619 GEN_VEXT_VV(vwmul_vv_b, 2) 1620 GEN_VEXT_VV(vwmul_vv_h, 4) 1621 GEN_VEXT_VV(vwmul_vv_w, 8) 1622 GEN_VEXT_VV(vwmulu_vv_b, 2) 1623 GEN_VEXT_VV(vwmulu_vv_h, 4) 1624 GEN_VEXT_VV(vwmulu_vv_w, 8) 1625 GEN_VEXT_VV(vwmulsu_vv_b, 2) 1626 GEN_VEXT_VV(vwmulsu_vv_h, 4) 1627 GEN_VEXT_VV(vwmulsu_vv_w, 8) 1628 1629 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1630 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1631 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1632 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1633 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1634 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1635 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1636 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1637 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1638 GEN_VEXT_VX(vwmul_vx_b, 2) 1639 GEN_VEXT_VX(vwmul_vx_h, 4) 1640 GEN_VEXT_VX(vwmul_vx_w, 8) 1641 GEN_VEXT_VX(vwmulu_vx_b, 2) 1642 GEN_VEXT_VX(vwmulu_vx_h, 4) 1643 GEN_VEXT_VX(vwmulu_vx_w, 8) 1644 GEN_VEXT_VX(vwmulsu_vx_b, 2) 1645 GEN_VEXT_VX(vwmulsu_vx_h, 4) 1646 GEN_VEXT_VX(vwmulsu_vx_w, 8) 1647 1648 /* Vector Single-Width Integer Multiply-Add Instructions */ 1649 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1650 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1651 { \ 1652 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1653 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1654 TD d = *((TD *)vd + HD(i)); \ 1655 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1656 } 1657 1658 #define DO_MACC(N, M, D) (M * N + D) 1659 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1660 #define DO_MADD(N, M, D) (M * D + N) 1661 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1662 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1663 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1664 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1665 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1666 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1667 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1668 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1669 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1670 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1671 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1672 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1673 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1674 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1675 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1676 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1677 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1678 GEN_VEXT_VV(vmacc_vv_b, 1) 1679 GEN_VEXT_VV(vmacc_vv_h, 2) 1680 GEN_VEXT_VV(vmacc_vv_w, 4) 1681 GEN_VEXT_VV(vmacc_vv_d, 8) 1682 GEN_VEXT_VV(vnmsac_vv_b, 1) 1683 GEN_VEXT_VV(vnmsac_vv_h, 2) 1684 GEN_VEXT_VV(vnmsac_vv_w, 4) 1685 GEN_VEXT_VV(vnmsac_vv_d, 8) 1686 GEN_VEXT_VV(vmadd_vv_b, 1) 1687 GEN_VEXT_VV(vmadd_vv_h, 2) 1688 GEN_VEXT_VV(vmadd_vv_w, 4) 1689 GEN_VEXT_VV(vmadd_vv_d, 8) 1690 GEN_VEXT_VV(vnmsub_vv_b, 1) 1691 GEN_VEXT_VV(vnmsub_vv_h, 2) 1692 GEN_VEXT_VV(vnmsub_vv_w, 4) 1693 GEN_VEXT_VV(vnmsub_vv_d, 8) 1694 1695 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1696 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1697 { \ 1698 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1699 TD d = *((TD *)vd + HD(i)); \ 1700 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1701 } 1702 1703 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1704 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1705 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1706 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1707 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1708 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1709 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1710 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1711 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1712 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1713 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1714 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1715 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1716 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1717 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1718 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1719 GEN_VEXT_VX(vmacc_vx_b, 1) 1720 GEN_VEXT_VX(vmacc_vx_h, 2) 1721 GEN_VEXT_VX(vmacc_vx_w, 4) 1722 GEN_VEXT_VX(vmacc_vx_d, 8) 1723 GEN_VEXT_VX(vnmsac_vx_b, 1) 1724 GEN_VEXT_VX(vnmsac_vx_h, 2) 1725 GEN_VEXT_VX(vnmsac_vx_w, 4) 1726 GEN_VEXT_VX(vnmsac_vx_d, 8) 1727 GEN_VEXT_VX(vmadd_vx_b, 1) 1728 GEN_VEXT_VX(vmadd_vx_h, 2) 1729 GEN_VEXT_VX(vmadd_vx_w, 4) 1730 GEN_VEXT_VX(vmadd_vx_d, 8) 1731 GEN_VEXT_VX(vnmsub_vx_b, 1) 1732 GEN_VEXT_VX(vnmsub_vx_h, 2) 1733 GEN_VEXT_VX(vnmsub_vx_w, 4) 1734 GEN_VEXT_VX(vnmsub_vx_d, 8) 1735 1736 /* Vector Widening Integer Multiply-Add Instructions */ 1737 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1738 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1739 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1740 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1741 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1742 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1743 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1744 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1745 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1746 GEN_VEXT_VV(vwmaccu_vv_b, 2) 1747 GEN_VEXT_VV(vwmaccu_vv_h, 4) 1748 GEN_VEXT_VV(vwmaccu_vv_w, 8) 1749 GEN_VEXT_VV(vwmacc_vv_b, 2) 1750 GEN_VEXT_VV(vwmacc_vv_h, 4) 1751 GEN_VEXT_VV(vwmacc_vv_w, 8) 1752 GEN_VEXT_VV(vwmaccsu_vv_b, 2) 1753 GEN_VEXT_VV(vwmaccsu_vv_h, 4) 1754 GEN_VEXT_VV(vwmaccsu_vv_w, 8) 1755 1756 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1757 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1758 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1759 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1760 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1761 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1762 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1763 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1764 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1765 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1766 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1767 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1768 GEN_VEXT_VX(vwmaccu_vx_b, 2) 1769 GEN_VEXT_VX(vwmaccu_vx_h, 4) 1770 GEN_VEXT_VX(vwmaccu_vx_w, 8) 1771 GEN_VEXT_VX(vwmacc_vx_b, 2) 1772 GEN_VEXT_VX(vwmacc_vx_h, 4) 1773 GEN_VEXT_VX(vwmacc_vx_w, 8) 1774 GEN_VEXT_VX(vwmaccsu_vx_b, 2) 1775 GEN_VEXT_VX(vwmaccsu_vx_h, 4) 1776 GEN_VEXT_VX(vwmaccsu_vx_w, 8) 1777 GEN_VEXT_VX(vwmaccus_vx_b, 2) 1778 GEN_VEXT_VX(vwmaccus_vx_h, 4) 1779 GEN_VEXT_VX(vwmaccus_vx_w, 8) 1780 1781 /* Vector Integer Merge and Move Instructions */ 1782 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1783 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1784 uint32_t desc) \ 1785 { \ 1786 uint32_t vl = env->vl; \ 1787 uint32_t esz = sizeof(ETYPE); \ 1788 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1789 uint32_t vta = vext_vta(desc); \ 1790 uint32_t i; \ 1791 \ 1792 for (i = env->vstart; i < vl; i++) { \ 1793 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1794 *((ETYPE *)vd + H(i)) = s1; \ 1795 } \ 1796 env->vstart = 0; \ 1797 /* set tail elements to 1s */ \ 1798 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1799 } 1800 1801 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1802 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1803 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1804 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1805 1806 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1807 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1808 uint32_t desc) \ 1809 { \ 1810 uint32_t vl = env->vl; \ 1811 uint32_t esz = sizeof(ETYPE); \ 1812 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1813 uint32_t vta = vext_vta(desc); \ 1814 uint32_t i; \ 1815 \ 1816 for (i = env->vstart; i < vl; i++) { \ 1817 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1818 } \ 1819 env->vstart = 0; \ 1820 /* set tail elements to 1s */ \ 1821 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1822 } 1823 1824 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1825 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1826 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1827 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1828 1829 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1830 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1831 CPURISCVState *env, uint32_t desc) \ 1832 { \ 1833 uint32_t vl = env->vl; \ 1834 uint32_t esz = sizeof(ETYPE); \ 1835 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1836 uint32_t vta = vext_vta(desc); \ 1837 uint32_t i; \ 1838 \ 1839 for (i = env->vstart; i < vl; i++) { \ 1840 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1841 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1842 } \ 1843 env->vstart = 0; \ 1844 /* set tail elements to 1s */ \ 1845 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1846 } 1847 1848 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1849 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1850 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1851 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1852 1853 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1854 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1855 void *vs2, CPURISCVState *env, uint32_t desc) \ 1856 { \ 1857 uint32_t vl = env->vl; \ 1858 uint32_t esz = sizeof(ETYPE); \ 1859 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 1860 uint32_t vta = vext_vta(desc); \ 1861 uint32_t i; \ 1862 \ 1863 for (i = env->vstart; i < vl; i++) { \ 1864 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1865 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1866 (ETYPE)(target_long)s1); \ 1867 *((ETYPE *)vd + H(i)) = d; \ 1868 } \ 1869 env->vstart = 0; \ 1870 /* set tail elements to 1s */ \ 1871 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 1872 } 1873 1874 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1875 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1878 1879 /* 1880 * Vector Fixed-Point Arithmetic Instructions 1881 */ 1882 1883 /* Vector Single-Width Saturating Add and Subtract */ 1884 1885 /* 1886 * As fixed point instructions probably have round mode and saturation, 1887 * define common macros for fixed point here. 1888 */ 1889 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1890 CPURISCVState *env, int vxrm); 1891 1892 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1893 static inline void \ 1894 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1895 CPURISCVState *env, int vxrm) \ 1896 { \ 1897 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1898 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1899 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1900 } 1901 1902 static inline void 1903 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1904 CPURISCVState *env, 1905 uint32_t vl, uint32_t vm, int vxrm, 1906 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz) 1907 { 1908 for (uint32_t i = env->vstart; i < vl; i++) { 1909 if (!vm && !vext_elem_mask(v0, i)) { 1910 /* set masked-off elements to 1s */ 1911 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 1912 continue; 1913 } 1914 fn(vd, vs1, vs2, i, env, vxrm); 1915 } 1916 env->vstart = 0; 1917 } 1918 1919 static inline void 1920 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1921 CPURISCVState *env, 1922 uint32_t desc, 1923 opivv2_rm_fn *fn, uint32_t esz) 1924 { 1925 uint32_t vm = vext_vm(desc); 1926 uint32_t vl = env->vl; 1927 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 1928 uint32_t vta = vext_vta(desc); 1929 uint32_t vma = vext_vma(desc); 1930 1931 switch (env->vxrm) { 1932 case 0: /* rnu */ 1933 vext_vv_rm_1(vd, v0, vs1, vs2, 1934 env, vl, vm, 0, fn, vma, esz); 1935 break; 1936 case 1: /* rne */ 1937 vext_vv_rm_1(vd, v0, vs1, vs2, 1938 env, vl, vm, 1, fn, vma, esz); 1939 break; 1940 case 2: /* rdn */ 1941 vext_vv_rm_1(vd, v0, vs1, vs2, 1942 env, vl, vm, 2, fn, vma, esz); 1943 break; 1944 default: /* rod */ 1945 vext_vv_rm_1(vd, v0, vs1, vs2, 1946 env, vl, vm, 3, fn, vma, esz); 1947 break; 1948 } 1949 /* set tail elements to 1s */ 1950 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 1951 } 1952 1953 /* generate helpers for fixed point instructions with OPIVV format */ 1954 #define GEN_VEXT_VV_RM(NAME, ESZ) \ 1955 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1956 CPURISCVState *env, uint32_t desc) \ 1957 { \ 1958 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \ 1959 do_##NAME, ESZ); \ 1960 } 1961 1962 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, 1963 uint8_t b) 1964 { 1965 uint8_t res = a + b; 1966 if (res < a) { 1967 res = UINT8_MAX; 1968 env->vxsat = 0x1; 1969 } 1970 return res; 1971 } 1972 1973 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1974 uint16_t b) 1975 { 1976 uint16_t res = a + b; 1977 if (res < a) { 1978 res = UINT16_MAX; 1979 env->vxsat = 0x1; 1980 } 1981 return res; 1982 } 1983 1984 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1985 uint32_t b) 1986 { 1987 uint32_t res = a + b; 1988 if (res < a) { 1989 res = UINT32_MAX; 1990 env->vxsat = 0x1; 1991 } 1992 return res; 1993 } 1994 1995 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1996 uint64_t b) 1997 { 1998 uint64_t res = a + b; 1999 if (res < a) { 2000 res = UINT64_MAX; 2001 env->vxsat = 0x1; 2002 } 2003 return res; 2004 } 2005 2006 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2007 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2008 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2009 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2010 GEN_VEXT_VV_RM(vsaddu_vv_b, 1) 2011 GEN_VEXT_VV_RM(vsaddu_vv_h, 2) 2012 GEN_VEXT_VV_RM(vsaddu_vv_w, 4) 2013 GEN_VEXT_VV_RM(vsaddu_vv_d, 8) 2014 2015 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2016 CPURISCVState *env, int vxrm); 2017 2018 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2019 static inline void \ 2020 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2021 CPURISCVState *env, int vxrm) \ 2022 { \ 2023 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2024 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2025 } 2026 2027 static inline void 2028 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2029 CPURISCVState *env, 2030 uint32_t vl, uint32_t vm, int vxrm, 2031 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz) 2032 { 2033 for (uint32_t i = env->vstart; i < vl; i++) { 2034 if (!vm && !vext_elem_mask(v0, i)) { 2035 /* set masked-off elements to 1s */ 2036 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); 2037 continue; 2038 } 2039 fn(vd, s1, vs2, i, env, vxrm); 2040 } 2041 env->vstart = 0; 2042 } 2043 2044 static inline void 2045 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2046 CPURISCVState *env, 2047 uint32_t desc, 2048 opivx2_rm_fn *fn, uint32_t esz) 2049 { 2050 uint32_t vm = vext_vm(desc); 2051 uint32_t vl = env->vl; 2052 uint32_t total_elems = vext_get_total_elems(env, desc, esz); 2053 uint32_t vta = vext_vta(desc); 2054 uint32_t vma = vext_vma(desc); 2055 2056 switch (env->vxrm) { 2057 case 0: /* rnu */ 2058 vext_vx_rm_1(vd, v0, s1, vs2, 2059 env, vl, vm, 0, fn, vma, esz); 2060 break; 2061 case 1: /* rne */ 2062 vext_vx_rm_1(vd, v0, s1, vs2, 2063 env, vl, vm, 1, fn, vma, esz); 2064 break; 2065 case 2: /* rdn */ 2066 vext_vx_rm_1(vd, v0, s1, vs2, 2067 env, vl, vm, 2, fn, vma, esz); 2068 break; 2069 default: /* rod */ 2070 vext_vx_rm_1(vd, v0, s1, vs2, 2071 env, vl, vm, 3, fn, vma, esz); 2072 break; 2073 } 2074 /* set tail elements to 1s */ 2075 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); 2076 } 2077 2078 /* generate helpers for fixed point instructions with OPIVX format */ 2079 #define GEN_VEXT_VX_RM(NAME, ESZ) \ 2080 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2081 void *vs2, CPURISCVState *env, \ 2082 uint32_t desc) \ 2083 { \ 2084 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \ 2085 do_##NAME, ESZ); \ 2086 } 2087 2088 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2089 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2090 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2091 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2092 GEN_VEXT_VX_RM(vsaddu_vx_b, 1) 2093 GEN_VEXT_VX_RM(vsaddu_vx_h, 2) 2094 GEN_VEXT_VX_RM(vsaddu_vx_w, 4) 2095 GEN_VEXT_VX_RM(vsaddu_vx_d, 8) 2096 2097 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2098 { 2099 int8_t res = a + b; 2100 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2101 res = a > 0 ? INT8_MAX : INT8_MIN; 2102 env->vxsat = 0x1; 2103 } 2104 return res; 2105 } 2106 2107 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, 2108 int16_t b) 2109 { 2110 int16_t res = a + b; 2111 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2112 res = a > 0 ? INT16_MAX : INT16_MIN; 2113 env->vxsat = 0x1; 2114 } 2115 return res; 2116 } 2117 2118 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, 2119 int32_t b) 2120 { 2121 int32_t res = a + b; 2122 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2123 res = a > 0 ? INT32_MAX : INT32_MIN; 2124 env->vxsat = 0x1; 2125 } 2126 return res; 2127 } 2128 2129 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, 2130 int64_t b) 2131 { 2132 int64_t res = a + b; 2133 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2134 res = a > 0 ? INT64_MAX : INT64_MIN; 2135 env->vxsat = 0x1; 2136 } 2137 return res; 2138 } 2139 2140 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2141 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2142 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2143 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2144 GEN_VEXT_VV_RM(vsadd_vv_b, 1) 2145 GEN_VEXT_VV_RM(vsadd_vv_h, 2) 2146 GEN_VEXT_VV_RM(vsadd_vv_w, 4) 2147 GEN_VEXT_VV_RM(vsadd_vv_d, 8) 2148 2149 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2150 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2151 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2152 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2153 GEN_VEXT_VX_RM(vsadd_vx_b, 1) 2154 GEN_VEXT_VX_RM(vsadd_vx_h, 2) 2155 GEN_VEXT_VX_RM(vsadd_vx_w, 4) 2156 GEN_VEXT_VX_RM(vsadd_vx_d, 8) 2157 2158 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, 2159 uint8_t b) 2160 { 2161 uint8_t res = a - b; 2162 if (res > a) { 2163 res = 0; 2164 env->vxsat = 0x1; 2165 } 2166 return res; 2167 } 2168 2169 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2170 uint16_t b) 2171 { 2172 uint16_t res = a - b; 2173 if (res > a) { 2174 res = 0; 2175 env->vxsat = 0x1; 2176 } 2177 return res; 2178 } 2179 2180 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2181 uint32_t b) 2182 { 2183 uint32_t res = a - b; 2184 if (res > a) { 2185 res = 0; 2186 env->vxsat = 0x1; 2187 } 2188 return res; 2189 } 2190 2191 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2192 uint64_t b) 2193 { 2194 uint64_t res = a - b; 2195 if (res > a) { 2196 res = 0; 2197 env->vxsat = 0x1; 2198 } 2199 return res; 2200 } 2201 2202 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2203 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2204 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2205 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2206 GEN_VEXT_VV_RM(vssubu_vv_b, 1) 2207 GEN_VEXT_VV_RM(vssubu_vv_h, 2) 2208 GEN_VEXT_VV_RM(vssubu_vv_w, 4) 2209 GEN_VEXT_VV_RM(vssubu_vv_d, 8) 2210 2211 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2212 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2213 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2214 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2215 GEN_VEXT_VX_RM(vssubu_vx_b, 1) 2216 GEN_VEXT_VX_RM(vssubu_vx_h, 2) 2217 GEN_VEXT_VX_RM(vssubu_vx_w, 4) 2218 GEN_VEXT_VX_RM(vssubu_vx_d, 8) 2219 2220 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2221 { 2222 int8_t res = a - b; 2223 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2224 res = a >= 0 ? INT8_MAX : INT8_MIN; 2225 env->vxsat = 0x1; 2226 } 2227 return res; 2228 } 2229 2230 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, 2231 int16_t b) 2232 { 2233 int16_t res = a - b; 2234 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2235 res = a >= 0 ? INT16_MAX : INT16_MIN; 2236 env->vxsat = 0x1; 2237 } 2238 return res; 2239 } 2240 2241 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, 2242 int32_t b) 2243 { 2244 int32_t res = a - b; 2245 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2246 res = a >= 0 ? INT32_MAX : INT32_MIN; 2247 env->vxsat = 0x1; 2248 } 2249 return res; 2250 } 2251 2252 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, 2253 int64_t b) 2254 { 2255 int64_t res = a - b; 2256 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2257 res = a >= 0 ? INT64_MAX : INT64_MIN; 2258 env->vxsat = 0x1; 2259 } 2260 return res; 2261 } 2262 2263 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2264 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2265 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2266 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2267 GEN_VEXT_VV_RM(vssub_vv_b, 1) 2268 GEN_VEXT_VV_RM(vssub_vv_h, 2) 2269 GEN_VEXT_VV_RM(vssub_vv_w, 4) 2270 GEN_VEXT_VV_RM(vssub_vv_d, 8) 2271 2272 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2273 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2274 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2275 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2276 GEN_VEXT_VX_RM(vssub_vx_b, 1) 2277 GEN_VEXT_VX_RM(vssub_vx_h, 2) 2278 GEN_VEXT_VX_RM(vssub_vx_w, 4) 2279 GEN_VEXT_VX_RM(vssub_vx_d, 8) 2280 2281 /* Vector Single-Width Averaging Add and Subtract */ 2282 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2283 { 2284 uint8_t d = extract64(v, shift, 1); 2285 uint8_t d1; 2286 uint64_t D1, D2; 2287 2288 if (shift == 0 || shift > 64) { 2289 return 0; 2290 } 2291 2292 d1 = extract64(v, shift - 1, 1); 2293 D1 = extract64(v, 0, shift); 2294 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2295 return d1; 2296 } else if (vxrm == 1) { /* round-to-nearest-even */ 2297 if (shift > 1) { 2298 D2 = extract64(v, 0, shift - 1); 2299 return d1 & ((D2 != 0) | d); 2300 } else { 2301 return d1 & d; 2302 } 2303 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2304 return !d & (D1 != 0); 2305 } 2306 return 0; /* round-down (truncate) */ 2307 } 2308 2309 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, 2310 int32_t b) 2311 { 2312 int64_t res = (int64_t)a + b; 2313 uint8_t round = get_round(vxrm, res, 1); 2314 2315 return (res >> 1) + round; 2316 } 2317 2318 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, 2319 int64_t b) 2320 { 2321 int64_t res = a + b; 2322 uint8_t round = get_round(vxrm, res, 1); 2323 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2324 2325 /* With signed overflow, bit 64 is inverse of bit 63. */ 2326 return ((res >> 1) ^ over) + round; 2327 } 2328 2329 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2330 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2331 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2332 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2333 GEN_VEXT_VV_RM(vaadd_vv_b, 1) 2334 GEN_VEXT_VV_RM(vaadd_vv_h, 2) 2335 GEN_VEXT_VV_RM(vaadd_vv_w, 4) 2336 GEN_VEXT_VV_RM(vaadd_vv_d, 8) 2337 2338 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2339 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2340 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2341 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2342 GEN_VEXT_VX_RM(vaadd_vx_b, 1) 2343 GEN_VEXT_VX_RM(vaadd_vx_h, 2) 2344 GEN_VEXT_VX_RM(vaadd_vx_w, 4) 2345 GEN_VEXT_VX_RM(vaadd_vx_d, 8) 2346 2347 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm, 2348 uint32_t a, uint32_t b) 2349 { 2350 uint64_t res = (uint64_t)a + b; 2351 uint8_t round = get_round(vxrm, res, 1); 2352 2353 return (res >> 1) + round; 2354 } 2355 2356 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm, 2357 uint64_t a, uint64_t b) 2358 { 2359 uint64_t res = a + b; 2360 uint8_t round = get_round(vxrm, res, 1); 2361 uint64_t over = (uint64_t)(res < a) << 63; 2362 2363 return ((res >> 1) | over) + round; 2364 } 2365 2366 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32) 2367 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32) 2368 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32) 2369 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64) 2370 GEN_VEXT_VV_RM(vaaddu_vv_b, 1) 2371 GEN_VEXT_VV_RM(vaaddu_vv_h, 2) 2372 GEN_VEXT_VV_RM(vaaddu_vv_w, 4) 2373 GEN_VEXT_VV_RM(vaaddu_vv_d, 8) 2374 2375 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32) 2376 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32) 2377 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32) 2378 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64) 2379 GEN_VEXT_VX_RM(vaaddu_vx_b, 1) 2380 GEN_VEXT_VX_RM(vaaddu_vx_h, 2) 2381 GEN_VEXT_VX_RM(vaaddu_vx_w, 4) 2382 GEN_VEXT_VX_RM(vaaddu_vx_d, 8) 2383 2384 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, 2385 int32_t b) 2386 { 2387 int64_t res = (int64_t)a - b; 2388 uint8_t round = get_round(vxrm, res, 1); 2389 2390 return (res >> 1) + round; 2391 } 2392 2393 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, 2394 int64_t b) 2395 { 2396 int64_t res = (int64_t)a - b; 2397 uint8_t round = get_round(vxrm, res, 1); 2398 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2399 2400 /* With signed overflow, bit 64 is inverse of bit 63. */ 2401 return ((res >> 1) ^ over) + round; 2402 } 2403 2404 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2405 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2406 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2407 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2408 GEN_VEXT_VV_RM(vasub_vv_b, 1) 2409 GEN_VEXT_VV_RM(vasub_vv_h, 2) 2410 GEN_VEXT_VV_RM(vasub_vv_w, 4) 2411 GEN_VEXT_VV_RM(vasub_vv_d, 8) 2412 2413 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2414 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2415 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2416 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2417 GEN_VEXT_VX_RM(vasub_vx_b, 1) 2418 GEN_VEXT_VX_RM(vasub_vx_h, 2) 2419 GEN_VEXT_VX_RM(vasub_vx_w, 4) 2420 GEN_VEXT_VX_RM(vasub_vx_d, 8) 2421 2422 static inline uint32_t asubu32(CPURISCVState *env, int vxrm, 2423 uint32_t a, uint32_t b) 2424 { 2425 int64_t res = (int64_t)a - b; 2426 uint8_t round = get_round(vxrm, res, 1); 2427 2428 return (res >> 1) + round; 2429 } 2430 2431 static inline uint64_t asubu64(CPURISCVState *env, int vxrm, 2432 uint64_t a, uint64_t b) 2433 { 2434 uint64_t res = (uint64_t)a - b; 2435 uint8_t round = get_round(vxrm, res, 1); 2436 uint64_t over = (uint64_t)(res > a) << 63; 2437 2438 return ((res >> 1) | over) + round; 2439 } 2440 2441 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32) 2442 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32) 2443 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32) 2444 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64) 2445 GEN_VEXT_VV_RM(vasubu_vv_b, 1) 2446 GEN_VEXT_VV_RM(vasubu_vv_h, 2) 2447 GEN_VEXT_VV_RM(vasubu_vv_w, 4) 2448 GEN_VEXT_VV_RM(vasubu_vv_d, 8) 2449 2450 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32) 2451 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32) 2452 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32) 2453 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64) 2454 GEN_VEXT_VX_RM(vasubu_vx_b, 1) 2455 GEN_VEXT_VX_RM(vasubu_vx_h, 2) 2456 GEN_VEXT_VX_RM(vasubu_vx_w, 4) 2457 GEN_VEXT_VX_RM(vasubu_vx_d, 8) 2458 2459 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2460 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2461 { 2462 uint8_t round; 2463 int16_t res; 2464 2465 res = (int16_t)a * (int16_t)b; 2466 round = get_round(vxrm, res, 7); 2467 res = (res >> 7) + round; 2468 2469 if (res > INT8_MAX) { 2470 env->vxsat = 0x1; 2471 return INT8_MAX; 2472 } else if (res < INT8_MIN) { 2473 env->vxsat = 0x1; 2474 return INT8_MIN; 2475 } else { 2476 return res; 2477 } 2478 } 2479 2480 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2481 { 2482 uint8_t round; 2483 int32_t res; 2484 2485 res = (int32_t)a * (int32_t)b; 2486 round = get_round(vxrm, res, 15); 2487 res = (res >> 15) + round; 2488 2489 if (res > INT16_MAX) { 2490 env->vxsat = 0x1; 2491 return INT16_MAX; 2492 } else if (res < INT16_MIN) { 2493 env->vxsat = 0x1; 2494 return INT16_MIN; 2495 } else { 2496 return res; 2497 } 2498 } 2499 2500 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2501 { 2502 uint8_t round; 2503 int64_t res; 2504 2505 res = (int64_t)a * (int64_t)b; 2506 round = get_round(vxrm, res, 31); 2507 res = (res >> 31) + round; 2508 2509 if (res > INT32_MAX) { 2510 env->vxsat = 0x1; 2511 return INT32_MAX; 2512 } else if (res < INT32_MIN) { 2513 env->vxsat = 0x1; 2514 return INT32_MIN; 2515 } else { 2516 return res; 2517 } 2518 } 2519 2520 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2521 { 2522 uint8_t round; 2523 uint64_t hi_64, lo_64; 2524 int64_t res; 2525 2526 if (a == INT64_MIN && b == INT64_MIN) { 2527 env->vxsat = 1; 2528 return INT64_MAX; 2529 } 2530 2531 muls64(&lo_64, &hi_64, a, b); 2532 round = get_round(vxrm, lo_64, 63); 2533 /* 2534 * Cannot overflow, as there are always 2535 * 2 sign bits after multiply. 2536 */ 2537 res = (hi_64 << 1) | (lo_64 >> 63); 2538 if (round) { 2539 if (res == INT64_MAX) { 2540 env->vxsat = 1; 2541 } else { 2542 res += 1; 2543 } 2544 } 2545 return res; 2546 } 2547 2548 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2549 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2550 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2551 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2552 GEN_VEXT_VV_RM(vsmul_vv_b, 1) 2553 GEN_VEXT_VV_RM(vsmul_vv_h, 2) 2554 GEN_VEXT_VV_RM(vsmul_vv_w, 4) 2555 GEN_VEXT_VV_RM(vsmul_vv_d, 8) 2556 2557 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2558 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2559 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2560 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2561 GEN_VEXT_VX_RM(vsmul_vx_b, 1) 2562 GEN_VEXT_VX_RM(vsmul_vx_h, 2) 2563 GEN_VEXT_VX_RM(vsmul_vx_w, 4) 2564 GEN_VEXT_VX_RM(vsmul_vx_d, 8) 2565 2566 /* Vector Single-Width Scaling Shift Instructions */ 2567 static inline uint8_t 2568 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2569 { 2570 uint8_t round, shift = b & 0x7; 2571 uint8_t res; 2572 2573 round = get_round(vxrm, a, shift); 2574 res = (a >> shift) + round; 2575 return res; 2576 } 2577 static inline uint16_t 2578 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2579 { 2580 uint8_t round, shift = b & 0xf; 2581 2582 round = get_round(vxrm, a, shift); 2583 return (a >> shift) + round; 2584 } 2585 static inline uint32_t 2586 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2587 { 2588 uint8_t round, shift = b & 0x1f; 2589 2590 round = get_round(vxrm, a, shift); 2591 return (a >> shift) + round; 2592 } 2593 static inline uint64_t 2594 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2595 { 2596 uint8_t round, shift = b & 0x3f; 2597 2598 round = get_round(vxrm, a, shift); 2599 return (a >> shift) + round; 2600 } 2601 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2602 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2603 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2604 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2605 GEN_VEXT_VV_RM(vssrl_vv_b, 1) 2606 GEN_VEXT_VV_RM(vssrl_vv_h, 2) 2607 GEN_VEXT_VV_RM(vssrl_vv_w, 4) 2608 GEN_VEXT_VV_RM(vssrl_vv_d, 8) 2609 2610 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2611 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2612 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2613 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2614 GEN_VEXT_VX_RM(vssrl_vx_b, 1) 2615 GEN_VEXT_VX_RM(vssrl_vx_h, 2) 2616 GEN_VEXT_VX_RM(vssrl_vx_w, 4) 2617 GEN_VEXT_VX_RM(vssrl_vx_d, 8) 2618 2619 static inline int8_t 2620 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2621 { 2622 uint8_t round, shift = b & 0x7; 2623 2624 round = get_round(vxrm, a, shift); 2625 return (a >> shift) + round; 2626 } 2627 static inline int16_t 2628 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2629 { 2630 uint8_t round, shift = b & 0xf; 2631 2632 round = get_round(vxrm, a, shift); 2633 return (a >> shift) + round; 2634 } 2635 static inline int32_t 2636 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2637 { 2638 uint8_t round, shift = b & 0x1f; 2639 2640 round = get_round(vxrm, a, shift); 2641 return (a >> shift) + round; 2642 } 2643 static inline int64_t 2644 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2645 { 2646 uint8_t round, shift = b & 0x3f; 2647 2648 round = get_round(vxrm, a, shift); 2649 return (a >> shift) + round; 2650 } 2651 2652 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2653 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2654 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2655 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2656 GEN_VEXT_VV_RM(vssra_vv_b, 1) 2657 GEN_VEXT_VV_RM(vssra_vv_h, 2) 2658 GEN_VEXT_VV_RM(vssra_vv_w, 4) 2659 GEN_VEXT_VV_RM(vssra_vv_d, 8) 2660 2661 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2662 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2663 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2664 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2665 GEN_VEXT_VX_RM(vssra_vx_b, 1) 2666 GEN_VEXT_VX_RM(vssra_vx_h, 2) 2667 GEN_VEXT_VX_RM(vssra_vx_w, 4) 2668 GEN_VEXT_VX_RM(vssra_vx_d, 8) 2669 2670 /* Vector Narrowing Fixed-Point Clip Instructions */ 2671 static inline int8_t 2672 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2673 { 2674 uint8_t round, shift = b & 0xf; 2675 int16_t res; 2676 2677 round = get_round(vxrm, a, shift); 2678 res = (a >> shift) + round; 2679 if (res > INT8_MAX) { 2680 env->vxsat = 0x1; 2681 return INT8_MAX; 2682 } else if (res < INT8_MIN) { 2683 env->vxsat = 0x1; 2684 return INT8_MIN; 2685 } else { 2686 return res; 2687 } 2688 } 2689 2690 static inline int16_t 2691 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2692 { 2693 uint8_t round, shift = b & 0x1f; 2694 int32_t res; 2695 2696 round = get_round(vxrm, a, shift); 2697 res = (a >> shift) + round; 2698 if (res > INT16_MAX) { 2699 env->vxsat = 0x1; 2700 return INT16_MAX; 2701 } else if (res < INT16_MIN) { 2702 env->vxsat = 0x1; 2703 return INT16_MIN; 2704 } else { 2705 return res; 2706 } 2707 } 2708 2709 static inline int32_t 2710 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2711 { 2712 uint8_t round, shift = b & 0x3f; 2713 int64_t res; 2714 2715 round = get_round(vxrm, a, shift); 2716 res = (a >> shift) + round; 2717 if (res > INT32_MAX) { 2718 env->vxsat = 0x1; 2719 return INT32_MAX; 2720 } else if (res < INT32_MIN) { 2721 env->vxsat = 0x1; 2722 return INT32_MIN; 2723 } else { 2724 return res; 2725 } 2726 } 2727 2728 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2729 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2730 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2731 GEN_VEXT_VV_RM(vnclip_wv_b, 1) 2732 GEN_VEXT_VV_RM(vnclip_wv_h, 2) 2733 GEN_VEXT_VV_RM(vnclip_wv_w, 4) 2734 2735 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8) 2736 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16) 2737 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32) 2738 GEN_VEXT_VX_RM(vnclip_wx_b, 1) 2739 GEN_VEXT_VX_RM(vnclip_wx_h, 2) 2740 GEN_VEXT_VX_RM(vnclip_wx_w, 4) 2741 2742 static inline uint8_t 2743 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2744 { 2745 uint8_t round, shift = b & 0xf; 2746 uint16_t res; 2747 2748 round = get_round(vxrm, a, shift); 2749 res = (a >> shift) + round; 2750 if (res > UINT8_MAX) { 2751 env->vxsat = 0x1; 2752 return UINT8_MAX; 2753 } else { 2754 return res; 2755 } 2756 } 2757 2758 static inline uint16_t 2759 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2760 { 2761 uint8_t round, shift = b & 0x1f; 2762 uint32_t res; 2763 2764 round = get_round(vxrm, a, shift); 2765 res = (a >> shift) + round; 2766 if (res > UINT16_MAX) { 2767 env->vxsat = 0x1; 2768 return UINT16_MAX; 2769 } else { 2770 return res; 2771 } 2772 } 2773 2774 static inline uint32_t 2775 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2776 { 2777 uint8_t round, shift = b & 0x3f; 2778 uint64_t res; 2779 2780 round = get_round(vxrm, a, shift); 2781 res = (a >> shift) + round; 2782 if (res > UINT32_MAX) { 2783 env->vxsat = 0x1; 2784 return UINT32_MAX; 2785 } else { 2786 return res; 2787 } 2788 } 2789 2790 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2791 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2792 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2793 GEN_VEXT_VV_RM(vnclipu_wv_b, 1) 2794 GEN_VEXT_VV_RM(vnclipu_wv_h, 2) 2795 GEN_VEXT_VV_RM(vnclipu_wv_w, 4) 2796 2797 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8) 2798 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16) 2799 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32) 2800 GEN_VEXT_VX_RM(vnclipu_wx_b, 1) 2801 GEN_VEXT_VX_RM(vnclipu_wx_h, 2) 2802 GEN_VEXT_VX_RM(vnclipu_wx_w, 4) 2803 2804 /* 2805 * Vector Float Point Arithmetic Instructions 2806 */ 2807 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2808 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2809 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2810 CPURISCVState *env) \ 2811 { \ 2812 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2813 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2814 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2815 } 2816 2817 #define GEN_VEXT_VV_ENV(NAME, ESZ) \ 2818 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2819 void *vs2, CPURISCVState *env, \ 2820 uint32_t desc) \ 2821 { \ 2822 uint32_t vm = vext_vm(desc); \ 2823 uint32_t vl = env->vl; \ 2824 uint32_t total_elems = \ 2825 vext_get_total_elems(env, desc, ESZ); \ 2826 uint32_t vta = vext_vta(desc); \ 2827 uint32_t vma = vext_vma(desc); \ 2828 uint32_t i; \ 2829 \ 2830 for (i = env->vstart; i < vl; i++) { \ 2831 if (!vm && !vext_elem_mask(v0, i)) { \ 2832 /* set masked-off elements to 1s */ \ 2833 vext_set_elems_1s(vd, vma, i * ESZ, \ 2834 (i + 1) * ESZ); \ 2835 continue; \ 2836 } \ 2837 do_##NAME(vd, vs1, vs2, i, env); \ 2838 } \ 2839 env->vstart = 0; \ 2840 /* set tail elements to 1s */ \ 2841 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2842 total_elems * ESZ); \ 2843 } 2844 2845 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2846 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2847 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2848 GEN_VEXT_VV_ENV(vfadd_vv_h, 2) 2849 GEN_VEXT_VV_ENV(vfadd_vv_w, 4) 2850 GEN_VEXT_VV_ENV(vfadd_vv_d, 8) 2851 2852 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2853 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2854 CPURISCVState *env) \ 2855 { \ 2856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2858 } 2859 2860 #define GEN_VEXT_VF(NAME, ESZ) \ 2861 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2862 void *vs2, CPURISCVState *env, \ 2863 uint32_t desc) \ 2864 { \ 2865 uint32_t vm = vext_vm(desc); \ 2866 uint32_t vl = env->vl; \ 2867 uint32_t total_elems = \ 2868 vext_get_total_elems(env, desc, ESZ); \ 2869 uint32_t vta = vext_vta(desc); \ 2870 uint32_t vma = vext_vma(desc); \ 2871 uint32_t i; \ 2872 \ 2873 for (i = env->vstart; i < vl; i++) { \ 2874 if (!vm && !vext_elem_mask(v0, i)) { \ 2875 /* set masked-off elements to 1s */ \ 2876 vext_set_elems_1s(vd, vma, i * ESZ, \ 2877 (i + 1) * ESZ); \ 2878 continue; \ 2879 } \ 2880 do_##NAME(vd, s1, vs2, i, env); \ 2881 } \ 2882 env->vstart = 0; \ 2883 /* set tail elements to 1s */ \ 2884 vext_set_elems_1s(vd, vta, vl * ESZ, \ 2885 total_elems * ESZ); \ 2886 } 2887 2888 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2889 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2890 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2891 GEN_VEXT_VF(vfadd_vf_h, 2) 2892 GEN_VEXT_VF(vfadd_vf_w, 4) 2893 GEN_VEXT_VF(vfadd_vf_d, 8) 2894 2895 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2896 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2897 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2898 GEN_VEXT_VV_ENV(vfsub_vv_h, 2) 2899 GEN_VEXT_VV_ENV(vfsub_vv_w, 4) 2900 GEN_VEXT_VV_ENV(vfsub_vv_d, 8) 2901 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2902 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2903 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2904 GEN_VEXT_VF(vfsub_vf_h, 2) 2905 GEN_VEXT_VF(vfsub_vf_w, 4) 2906 GEN_VEXT_VF(vfsub_vf_d, 8) 2907 2908 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2909 { 2910 return float16_sub(b, a, s); 2911 } 2912 2913 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2914 { 2915 return float32_sub(b, a, s); 2916 } 2917 2918 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2919 { 2920 return float64_sub(b, a, s); 2921 } 2922 2923 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2924 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2925 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2926 GEN_VEXT_VF(vfrsub_vf_h, 2) 2927 GEN_VEXT_VF(vfrsub_vf_w, 4) 2928 GEN_VEXT_VF(vfrsub_vf_d, 8) 2929 2930 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2931 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2932 { 2933 return float32_add(float16_to_float32(a, true, s), 2934 float16_to_float32(b, true, s), s); 2935 } 2936 2937 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2938 { 2939 return float64_add(float32_to_float64(a, s), 2940 float32_to_float64(b, s), s); 2941 2942 } 2943 2944 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2945 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2946 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4) 2947 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8) 2948 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2949 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2950 GEN_VEXT_VF(vfwadd_vf_h, 4) 2951 GEN_VEXT_VF(vfwadd_vf_w, 8) 2952 2953 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2954 { 2955 return float32_sub(float16_to_float32(a, true, s), 2956 float16_to_float32(b, true, s), s); 2957 } 2958 2959 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2960 { 2961 return float64_sub(float32_to_float64(a, s), 2962 float32_to_float64(b, s), s); 2963 2964 } 2965 2966 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 2967 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 2968 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4) 2969 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8) 2970 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 2971 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 2972 GEN_VEXT_VF(vfwsub_vf_h, 4) 2973 GEN_VEXT_VF(vfwsub_vf_w, 8) 2974 2975 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 2976 { 2977 return float32_add(a, float16_to_float32(b, true, s), s); 2978 } 2979 2980 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 2981 { 2982 return float64_add(a, float32_to_float64(b, s), s); 2983 } 2984 2985 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 2986 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 2987 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4) 2988 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8) 2989 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 2990 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 2991 GEN_VEXT_VF(vfwadd_wf_h, 4) 2992 GEN_VEXT_VF(vfwadd_wf_w, 8) 2993 2994 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 2995 { 2996 return float32_sub(a, float16_to_float32(b, true, s), s); 2997 } 2998 2999 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3000 { 3001 return float64_sub(a, float32_to_float64(b, s), s); 3002 } 3003 3004 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3005 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3006 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4) 3007 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8) 3008 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3009 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3010 GEN_VEXT_VF(vfwsub_wf_h, 4) 3011 GEN_VEXT_VF(vfwsub_wf_w, 8) 3012 3013 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3014 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3015 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3016 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3017 GEN_VEXT_VV_ENV(vfmul_vv_h, 2) 3018 GEN_VEXT_VV_ENV(vfmul_vv_w, 4) 3019 GEN_VEXT_VV_ENV(vfmul_vv_d, 8) 3020 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3021 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3022 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3023 GEN_VEXT_VF(vfmul_vf_h, 2) 3024 GEN_VEXT_VF(vfmul_vf_w, 4) 3025 GEN_VEXT_VF(vfmul_vf_d, 8) 3026 3027 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3028 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3029 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3030 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2) 3031 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4) 3032 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8) 3033 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3034 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3035 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3036 GEN_VEXT_VF(vfdiv_vf_h, 2) 3037 GEN_VEXT_VF(vfdiv_vf_w, 4) 3038 GEN_VEXT_VF(vfdiv_vf_d, 8) 3039 3040 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3041 { 3042 return float16_div(b, a, s); 3043 } 3044 3045 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3046 { 3047 return float32_div(b, a, s); 3048 } 3049 3050 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3051 { 3052 return float64_div(b, a, s); 3053 } 3054 3055 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3056 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3057 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3058 GEN_VEXT_VF(vfrdiv_vf_h, 2) 3059 GEN_VEXT_VF(vfrdiv_vf_w, 4) 3060 GEN_VEXT_VF(vfrdiv_vf_d, 8) 3061 3062 /* Vector Widening Floating-Point Multiply */ 3063 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3064 { 3065 return float32_mul(float16_to_float32(a, true, s), 3066 float16_to_float32(b, true, s), s); 3067 } 3068 3069 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3070 { 3071 return float64_mul(float32_to_float64(a, s), 3072 float32_to_float64(b, s), s); 3073 3074 } 3075 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3076 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3077 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4) 3078 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8) 3079 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3080 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3081 GEN_VEXT_VF(vfwmul_vf_h, 4) 3082 GEN_VEXT_VF(vfwmul_vf_w, 8) 3083 3084 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3085 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3086 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3087 CPURISCVState *env) \ 3088 { \ 3089 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3090 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3091 TD d = *((TD *)vd + HD(i)); \ 3092 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3093 } 3094 3095 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3096 { 3097 return float16_muladd(a, b, d, 0, s); 3098 } 3099 3100 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3101 { 3102 return float32_muladd(a, b, d, 0, s); 3103 } 3104 3105 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3106 { 3107 return float64_muladd(a, b, d, 0, s); 3108 } 3109 3110 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3111 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3112 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3113 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2) 3114 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4) 3115 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8) 3116 3117 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3118 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3119 CPURISCVState *env) \ 3120 { \ 3121 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3122 TD d = *((TD *)vd + HD(i)); \ 3123 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3124 } 3125 3126 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3127 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3128 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3129 GEN_VEXT_VF(vfmacc_vf_h, 2) 3130 GEN_VEXT_VF(vfmacc_vf_w, 4) 3131 GEN_VEXT_VF(vfmacc_vf_d, 8) 3132 3133 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3134 { 3135 return float16_muladd(a, b, d, float_muladd_negate_c | 3136 float_muladd_negate_product, s); 3137 } 3138 3139 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3140 { 3141 return float32_muladd(a, b, d, float_muladd_negate_c | 3142 float_muladd_negate_product, s); 3143 } 3144 3145 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3146 { 3147 return float64_muladd(a, b, d, float_muladd_negate_c | 3148 float_muladd_negate_product, s); 3149 } 3150 3151 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3152 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3153 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3154 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2) 3155 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4) 3156 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8) 3157 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3158 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3159 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3160 GEN_VEXT_VF(vfnmacc_vf_h, 2) 3161 GEN_VEXT_VF(vfnmacc_vf_w, 4) 3162 GEN_VEXT_VF(vfnmacc_vf_d, 8) 3163 3164 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3165 { 3166 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3167 } 3168 3169 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3170 { 3171 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3172 } 3173 3174 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3175 { 3176 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3177 } 3178 3179 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3180 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3181 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3182 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2) 3183 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4) 3184 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8) 3185 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3186 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3187 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3188 GEN_VEXT_VF(vfmsac_vf_h, 2) 3189 GEN_VEXT_VF(vfmsac_vf_w, 4) 3190 GEN_VEXT_VF(vfmsac_vf_d, 8) 3191 3192 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3193 { 3194 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3195 } 3196 3197 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3198 { 3199 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3200 } 3201 3202 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3203 { 3204 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3205 } 3206 3207 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3208 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3209 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3210 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2) 3211 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4) 3212 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8) 3213 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3214 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3215 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3216 GEN_VEXT_VF(vfnmsac_vf_h, 2) 3217 GEN_VEXT_VF(vfnmsac_vf_w, 4) 3218 GEN_VEXT_VF(vfnmsac_vf_d, 8) 3219 3220 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3221 { 3222 return float16_muladd(d, b, a, 0, s); 3223 } 3224 3225 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3226 { 3227 return float32_muladd(d, b, a, 0, s); 3228 } 3229 3230 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3231 { 3232 return float64_muladd(d, b, a, 0, s); 3233 } 3234 3235 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3236 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3237 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3238 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2) 3239 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4) 3240 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8) 3241 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3242 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3243 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3244 GEN_VEXT_VF(vfmadd_vf_h, 2) 3245 GEN_VEXT_VF(vfmadd_vf_w, 4) 3246 GEN_VEXT_VF(vfmadd_vf_d, 8) 3247 3248 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3249 { 3250 return float16_muladd(d, b, a, float_muladd_negate_c | 3251 float_muladd_negate_product, s); 3252 } 3253 3254 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3255 { 3256 return float32_muladd(d, b, a, float_muladd_negate_c | 3257 float_muladd_negate_product, s); 3258 } 3259 3260 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3261 { 3262 return float64_muladd(d, b, a, float_muladd_negate_c | 3263 float_muladd_negate_product, s); 3264 } 3265 3266 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3267 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3268 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3269 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2) 3270 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4) 3271 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8) 3272 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3273 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3274 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3275 GEN_VEXT_VF(vfnmadd_vf_h, 2) 3276 GEN_VEXT_VF(vfnmadd_vf_w, 4) 3277 GEN_VEXT_VF(vfnmadd_vf_d, 8) 3278 3279 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3280 { 3281 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3282 } 3283 3284 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3285 { 3286 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3287 } 3288 3289 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3290 { 3291 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3292 } 3293 3294 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3295 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3296 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3297 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2) 3298 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4) 3299 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8) 3300 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3301 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3302 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3303 GEN_VEXT_VF(vfmsub_vf_h, 2) 3304 GEN_VEXT_VF(vfmsub_vf_w, 4) 3305 GEN_VEXT_VF(vfmsub_vf_d, 8) 3306 3307 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3308 { 3309 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3310 } 3311 3312 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3313 { 3314 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3315 } 3316 3317 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3318 { 3319 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3320 } 3321 3322 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3323 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3324 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3325 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2) 3326 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4) 3327 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8) 3328 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3329 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3330 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3331 GEN_VEXT_VF(vfnmsub_vf_h, 2) 3332 GEN_VEXT_VF(vfnmsub_vf_w, 4) 3333 GEN_VEXT_VF(vfnmsub_vf_d, 8) 3334 3335 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3336 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3337 { 3338 return float32_muladd(float16_to_float32(a, true, s), 3339 float16_to_float32(b, true, s), d, 0, s); 3340 } 3341 3342 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3343 { 3344 return float64_muladd(float32_to_float64(a, s), 3345 float32_to_float64(b, s), d, 0, s); 3346 } 3347 3348 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3349 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3350 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4) 3351 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8) 3352 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3353 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3354 GEN_VEXT_VF(vfwmacc_vf_h, 4) 3355 GEN_VEXT_VF(vfwmacc_vf_w, 8) 3356 3357 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3358 { 3359 return float32_muladd(bfloat16_to_float32(a, s), 3360 bfloat16_to_float32(b, s), d, 0, s); 3361 } 3362 3363 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16) 3364 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4) 3365 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16) 3366 GEN_VEXT_VF(vfwmaccbf16_vf, 4) 3367 3368 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3369 { 3370 return float32_muladd(float16_to_float32(a, true, s), 3371 float16_to_float32(b, true, s), d, 3372 float_muladd_negate_c | float_muladd_negate_product, 3373 s); 3374 } 3375 3376 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3377 { 3378 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s), 3379 d, float_muladd_negate_c | 3380 float_muladd_negate_product, s); 3381 } 3382 3383 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3384 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3385 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4) 3386 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8) 3387 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3388 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3389 GEN_VEXT_VF(vfwnmacc_vf_h, 4) 3390 GEN_VEXT_VF(vfwnmacc_vf_w, 8) 3391 3392 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3393 { 3394 return float32_muladd(float16_to_float32(a, true, s), 3395 float16_to_float32(b, true, s), d, 3396 float_muladd_negate_c, s); 3397 } 3398 3399 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3400 { 3401 return float64_muladd(float32_to_float64(a, s), 3402 float32_to_float64(b, s), d, 3403 float_muladd_negate_c, s); 3404 } 3405 3406 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3407 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3408 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4) 3409 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8) 3410 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3411 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3412 GEN_VEXT_VF(vfwmsac_vf_h, 4) 3413 GEN_VEXT_VF(vfwmsac_vf_w, 8) 3414 3415 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3416 { 3417 return float32_muladd(float16_to_float32(a, true, s), 3418 float16_to_float32(b, true, s), d, 3419 float_muladd_negate_product, s); 3420 } 3421 3422 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3423 { 3424 return float64_muladd(float32_to_float64(a, s), 3425 float32_to_float64(b, s), d, 3426 float_muladd_negate_product, s); 3427 } 3428 3429 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3430 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3431 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4) 3432 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8) 3433 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3434 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3435 GEN_VEXT_VF(vfwnmsac_vf_h, 4) 3436 GEN_VEXT_VF(vfwnmsac_vf_w, 8) 3437 3438 /* Vector Floating-Point Square-Root Instruction */ 3439 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3440 static void do_##NAME(void *vd, void *vs2, int i, \ 3441 CPURISCVState *env) \ 3442 { \ 3443 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3444 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3445 } 3446 3447 #define GEN_VEXT_V_ENV(NAME, ESZ) \ 3448 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3449 CPURISCVState *env, uint32_t desc) \ 3450 { \ 3451 uint32_t vm = vext_vm(desc); \ 3452 uint32_t vl = env->vl; \ 3453 uint32_t total_elems = \ 3454 vext_get_total_elems(env, desc, ESZ); \ 3455 uint32_t vta = vext_vta(desc); \ 3456 uint32_t vma = vext_vma(desc); \ 3457 uint32_t i; \ 3458 \ 3459 if (vl == 0) { \ 3460 return; \ 3461 } \ 3462 for (i = env->vstart; i < vl; i++) { \ 3463 if (!vm && !vext_elem_mask(v0, i)) { \ 3464 /* set masked-off elements to 1s */ \ 3465 vext_set_elems_1s(vd, vma, i * ESZ, \ 3466 (i + 1) * ESZ); \ 3467 continue; \ 3468 } \ 3469 do_##NAME(vd, vs2, i, env); \ 3470 } \ 3471 env->vstart = 0; \ 3472 vext_set_elems_1s(vd, vta, vl * ESZ, \ 3473 total_elems * ESZ); \ 3474 } 3475 3476 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3477 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3478 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3479 GEN_VEXT_V_ENV(vfsqrt_v_h, 2) 3480 GEN_VEXT_V_ENV(vfsqrt_v_w, 4) 3481 GEN_VEXT_V_ENV(vfsqrt_v_d, 8) 3482 3483 /* 3484 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction 3485 * 3486 * Adapted from riscv-v-spec recip.c: 3487 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3488 */ 3489 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size) 3490 { 3491 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3492 uint64_t exp = extract64(f, frac_size, exp_size); 3493 uint64_t frac = extract64(f, 0, frac_size); 3494 3495 const uint8_t lookup_table[] = { 3496 52, 51, 50, 48, 47, 46, 44, 43, 3497 42, 41, 40, 39, 38, 36, 35, 34, 3498 33, 32, 31, 30, 30, 29, 28, 27, 3499 26, 25, 24, 23, 23, 22, 21, 20, 3500 19, 19, 18, 17, 16, 16, 15, 14, 3501 14, 13, 12, 12, 11, 10, 10, 9, 3502 9, 8, 7, 7, 6, 6, 5, 4, 3503 4, 3, 3, 2, 2, 1, 1, 0, 3504 127, 125, 123, 121, 119, 118, 116, 114, 3505 113, 111, 109, 108, 106, 105, 103, 102, 3506 100, 99, 97, 96, 95, 93, 92, 91, 3507 90, 88, 87, 86, 85, 84, 83, 82, 3508 80, 79, 78, 77, 76, 75, 74, 73, 3509 72, 71, 70, 70, 69, 68, 67, 66, 3510 65, 64, 63, 63, 62, 61, 60, 59, 3511 59, 58, 57, 56, 56, 55, 54, 53 3512 }; 3513 const int precision = 7; 3514 3515 if (exp == 0 && frac != 0) { /* subnormal */ 3516 /* Normalize the subnormal. */ 3517 while (extract64(frac, frac_size - 1, 1) == 0) { 3518 exp--; 3519 frac <<= 1; 3520 } 3521 3522 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3523 } 3524 3525 int idx = ((exp & 1) << (precision - 1)) | 3526 (frac >> (frac_size - precision + 1)); 3527 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3528 (frac_size - precision); 3529 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2; 3530 3531 uint64_t val = 0; 3532 val = deposit64(val, 0, frac_size, out_frac); 3533 val = deposit64(val, frac_size, exp_size, out_exp); 3534 val = deposit64(val, frac_size + exp_size, 1, sign); 3535 return val; 3536 } 3537 3538 static float16 frsqrt7_h(float16 f, float_status *s) 3539 { 3540 int exp_size = 5, frac_size = 10; 3541 bool sign = float16_is_neg(f); 3542 3543 /* 3544 * frsqrt7(sNaN) = canonical NaN 3545 * frsqrt7(-inf) = canonical NaN 3546 * frsqrt7(-normal) = canonical NaN 3547 * frsqrt7(-subnormal) = canonical NaN 3548 */ 3549 if (float16_is_signaling_nan(f, s) || 3550 (float16_is_infinity(f) && sign) || 3551 (float16_is_normal(f) && sign) || 3552 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) { 3553 s->float_exception_flags |= float_flag_invalid; 3554 return float16_default_nan(s); 3555 } 3556 3557 /* frsqrt7(qNaN) = canonical NaN */ 3558 if (float16_is_quiet_nan(f, s)) { 3559 return float16_default_nan(s); 3560 } 3561 3562 /* frsqrt7(+-0) = +-inf */ 3563 if (float16_is_zero(f)) { 3564 s->float_exception_flags |= float_flag_divbyzero; 3565 return float16_set_sign(float16_infinity, sign); 3566 } 3567 3568 /* frsqrt7(+inf) = +0 */ 3569 if (float16_is_infinity(f) && !sign) { 3570 return float16_set_sign(float16_zero, sign); 3571 } 3572 3573 /* +normal, +subnormal */ 3574 uint64_t val = frsqrt7(f, exp_size, frac_size); 3575 return make_float16(val); 3576 } 3577 3578 static float32 frsqrt7_s(float32 f, float_status *s) 3579 { 3580 int exp_size = 8, frac_size = 23; 3581 bool sign = float32_is_neg(f); 3582 3583 /* 3584 * frsqrt7(sNaN) = canonical NaN 3585 * frsqrt7(-inf) = canonical NaN 3586 * frsqrt7(-normal) = canonical NaN 3587 * frsqrt7(-subnormal) = canonical NaN 3588 */ 3589 if (float32_is_signaling_nan(f, s) || 3590 (float32_is_infinity(f) && sign) || 3591 (float32_is_normal(f) && sign) || 3592 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) { 3593 s->float_exception_flags |= float_flag_invalid; 3594 return float32_default_nan(s); 3595 } 3596 3597 /* frsqrt7(qNaN) = canonical NaN */ 3598 if (float32_is_quiet_nan(f, s)) { 3599 return float32_default_nan(s); 3600 } 3601 3602 /* frsqrt7(+-0) = +-inf */ 3603 if (float32_is_zero(f)) { 3604 s->float_exception_flags |= float_flag_divbyzero; 3605 return float32_set_sign(float32_infinity, sign); 3606 } 3607 3608 /* frsqrt7(+inf) = +0 */ 3609 if (float32_is_infinity(f) && !sign) { 3610 return float32_set_sign(float32_zero, sign); 3611 } 3612 3613 /* +normal, +subnormal */ 3614 uint64_t val = frsqrt7(f, exp_size, frac_size); 3615 return make_float32(val); 3616 } 3617 3618 static float64 frsqrt7_d(float64 f, float_status *s) 3619 { 3620 int exp_size = 11, frac_size = 52; 3621 bool sign = float64_is_neg(f); 3622 3623 /* 3624 * frsqrt7(sNaN) = canonical NaN 3625 * frsqrt7(-inf) = canonical NaN 3626 * frsqrt7(-normal) = canonical NaN 3627 * frsqrt7(-subnormal) = canonical NaN 3628 */ 3629 if (float64_is_signaling_nan(f, s) || 3630 (float64_is_infinity(f) && sign) || 3631 (float64_is_normal(f) && sign) || 3632 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) { 3633 s->float_exception_flags |= float_flag_invalid; 3634 return float64_default_nan(s); 3635 } 3636 3637 /* frsqrt7(qNaN) = canonical NaN */ 3638 if (float64_is_quiet_nan(f, s)) { 3639 return float64_default_nan(s); 3640 } 3641 3642 /* frsqrt7(+-0) = +-inf */ 3643 if (float64_is_zero(f)) { 3644 s->float_exception_flags |= float_flag_divbyzero; 3645 return float64_set_sign(float64_infinity, sign); 3646 } 3647 3648 /* frsqrt7(+inf) = +0 */ 3649 if (float64_is_infinity(f) && !sign) { 3650 return float64_set_sign(float64_zero, sign); 3651 } 3652 3653 /* +normal, +subnormal */ 3654 uint64_t val = frsqrt7(f, exp_size, frac_size); 3655 return make_float64(val); 3656 } 3657 3658 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h) 3659 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s) 3660 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d) 3661 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2) 3662 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4) 3663 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8) 3664 3665 /* 3666 * Vector Floating-Point Reciprocal Estimate Instruction 3667 * 3668 * Adapted from riscv-v-spec recip.c: 3669 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c 3670 */ 3671 static uint64_t frec7(uint64_t f, int exp_size, int frac_size, 3672 float_status *s) 3673 { 3674 uint64_t sign = extract64(f, frac_size + exp_size, 1); 3675 uint64_t exp = extract64(f, frac_size, exp_size); 3676 uint64_t frac = extract64(f, 0, frac_size); 3677 3678 const uint8_t lookup_table[] = { 3679 127, 125, 123, 121, 119, 117, 116, 114, 3680 112, 110, 109, 107, 105, 104, 102, 100, 3681 99, 97, 96, 94, 93, 91, 90, 88, 3682 87, 85, 84, 83, 81, 80, 79, 77, 3683 76, 75, 74, 72, 71, 70, 69, 68, 3684 66, 65, 64, 63, 62, 61, 60, 59, 3685 58, 57, 56, 55, 54, 53, 52, 51, 3686 50, 49, 48, 47, 46, 45, 44, 43, 3687 42, 41, 40, 40, 39, 38, 37, 36, 3688 35, 35, 34, 33, 32, 31, 31, 30, 3689 29, 28, 28, 27, 26, 25, 25, 24, 3690 23, 23, 22, 21, 21, 20, 19, 19, 3691 18, 17, 17, 16, 15, 15, 14, 14, 3692 13, 12, 12, 11, 11, 10, 9, 9, 3693 8, 8, 7, 7, 6, 5, 5, 4, 3694 4, 3, 3, 2, 2, 1, 1, 0 3695 }; 3696 const int precision = 7; 3697 3698 if (exp == 0 && frac != 0) { /* subnormal */ 3699 /* Normalize the subnormal. */ 3700 while (extract64(frac, frac_size - 1, 1) == 0) { 3701 exp--; 3702 frac <<= 1; 3703 } 3704 3705 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size); 3706 3707 if (exp != 0 && exp != UINT64_MAX) { 3708 /* 3709 * Overflow to inf or max value of same sign, 3710 * depending on sign and rounding mode. 3711 */ 3712 s->float_exception_flags |= (float_flag_inexact | 3713 float_flag_overflow); 3714 3715 if ((s->float_rounding_mode == float_round_to_zero) || 3716 ((s->float_rounding_mode == float_round_down) && !sign) || 3717 ((s->float_rounding_mode == float_round_up) && sign)) { 3718 /* Return greatest/negative finite value. */ 3719 return (sign << (exp_size + frac_size)) | 3720 (MAKE_64BIT_MASK(frac_size, exp_size) - 1); 3721 } else { 3722 /* Return +-inf. */ 3723 return (sign << (exp_size + frac_size)) | 3724 MAKE_64BIT_MASK(frac_size, exp_size); 3725 } 3726 } 3727 } 3728 3729 int idx = frac >> (frac_size - precision); 3730 uint64_t out_frac = (uint64_t)(lookup_table[idx]) << 3731 (frac_size - precision); 3732 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp; 3733 3734 if (out_exp == 0 || out_exp == UINT64_MAX) { 3735 /* 3736 * The result is subnormal, but don't raise the underflow exception, 3737 * because there's no additional loss of precision. 3738 */ 3739 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1); 3740 if (out_exp == UINT64_MAX) { 3741 out_frac >>= 1; 3742 out_exp = 0; 3743 } 3744 } 3745 3746 uint64_t val = 0; 3747 val = deposit64(val, 0, frac_size, out_frac); 3748 val = deposit64(val, frac_size, exp_size, out_exp); 3749 val = deposit64(val, frac_size + exp_size, 1, sign); 3750 return val; 3751 } 3752 3753 static float16 frec7_h(float16 f, float_status *s) 3754 { 3755 int exp_size = 5, frac_size = 10; 3756 bool sign = float16_is_neg(f); 3757 3758 /* frec7(+-inf) = +-0 */ 3759 if (float16_is_infinity(f)) { 3760 return float16_set_sign(float16_zero, sign); 3761 } 3762 3763 /* frec7(+-0) = +-inf */ 3764 if (float16_is_zero(f)) { 3765 s->float_exception_flags |= float_flag_divbyzero; 3766 return float16_set_sign(float16_infinity, sign); 3767 } 3768 3769 /* frec7(sNaN) = canonical NaN */ 3770 if (float16_is_signaling_nan(f, s)) { 3771 s->float_exception_flags |= float_flag_invalid; 3772 return float16_default_nan(s); 3773 } 3774 3775 /* frec7(qNaN) = canonical NaN */ 3776 if (float16_is_quiet_nan(f, s)) { 3777 return float16_default_nan(s); 3778 } 3779 3780 /* +-normal, +-subnormal */ 3781 uint64_t val = frec7(f, exp_size, frac_size, s); 3782 return make_float16(val); 3783 } 3784 3785 static float32 frec7_s(float32 f, float_status *s) 3786 { 3787 int exp_size = 8, frac_size = 23; 3788 bool sign = float32_is_neg(f); 3789 3790 /* frec7(+-inf) = +-0 */ 3791 if (float32_is_infinity(f)) { 3792 return float32_set_sign(float32_zero, sign); 3793 } 3794 3795 /* frec7(+-0) = +-inf */ 3796 if (float32_is_zero(f)) { 3797 s->float_exception_flags |= float_flag_divbyzero; 3798 return float32_set_sign(float32_infinity, sign); 3799 } 3800 3801 /* frec7(sNaN) = canonical NaN */ 3802 if (float32_is_signaling_nan(f, s)) { 3803 s->float_exception_flags |= float_flag_invalid; 3804 return float32_default_nan(s); 3805 } 3806 3807 /* frec7(qNaN) = canonical NaN */ 3808 if (float32_is_quiet_nan(f, s)) { 3809 return float32_default_nan(s); 3810 } 3811 3812 /* +-normal, +-subnormal */ 3813 uint64_t val = frec7(f, exp_size, frac_size, s); 3814 return make_float32(val); 3815 } 3816 3817 static float64 frec7_d(float64 f, float_status *s) 3818 { 3819 int exp_size = 11, frac_size = 52; 3820 bool sign = float64_is_neg(f); 3821 3822 /* frec7(+-inf) = +-0 */ 3823 if (float64_is_infinity(f)) { 3824 return float64_set_sign(float64_zero, sign); 3825 } 3826 3827 /* frec7(+-0) = +-inf */ 3828 if (float64_is_zero(f)) { 3829 s->float_exception_flags |= float_flag_divbyzero; 3830 return float64_set_sign(float64_infinity, sign); 3831 } 3832 3833 /* frec7(sNaN) = canonical NaN */ 3834 if (float64_is_signaling_nan(f, s)) { 3835 s->float_exception_flags |= float_flag_invalid; 3836 return float64_default_nan(s); 3837 } 3838 3839 /* frec7(qNaN) = canonical NaN */ 3840 if (float64_is_quiet_nan(f, s)) { 3841 return float64_default_nan(s); 3842 } 3843 3844 /* +-normal, +-subnormal */ 3845 uint64_t val = frec7(f, exp_size, frac_size, s); 3846 return make_float64(val); 3847 } 3848 3849 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h) 3850 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s) 3851 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d) 3852 GEN_VEXT_V_ENV(vfrec7_v_h, 2) 3853 GEN_VEXT_V_ENV(vfrec7_v_w, 4) 3854 GEN_VEXT_V_ENV(vfrec7_v_d, 8) 3855 3856 /* Vector Floating-Point MIN/MAX Instructions */ 3857 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number) 3858 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number) 3859 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number) 3860 GEN_VEXT_VV_ENV(vfmin_vv_h, 2) 3861 GEN_VEXT_VV_ENV(vfmin_vv_w, 4) 3862 GEN_VEXT_VV_ENV(vfmin_vv_d, 8) 3863 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number) 3864 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number) 3865 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number) 3866 GEN_VEXT_VF(vfmin_vf_h, 2) 3867 GEN_VEXT_VF(vfmin_vf_w, 4) 3868 GEN_VEXT_VF(vfmin_vf_d, 8) 3869 3870 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number) 3871 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number) 3872 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number) 3873 GEN_VEXT_VV_ENV(vfmax_vv_h, 2) 3874 GEN_VEXT_VV_ENV(vfmax_vv_w, 4) 3875 GEN_VEXT_VV_ENV(vfmax_vv_d, 8) 3876 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number) 3877 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number) 3878 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number) 3879 GEN_VEXT_VF(vfmax_vf_h, 2) 3880 GEN_VEXT_VF(vfmax_vf_w, 4) 3881 GEN_VEXT_VF(vfmax_vf_d, 8) 3882 3883 /* Vector Floating-Point Sign-Injection Instructions */ 3884 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3885 { 3886 return deposit64(b, 0, 15, a); 3887 } 3888 3889 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3890 { 3891 return deposit64(b, 0, 31, a); 3892 } 3893 3894 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3895 { 3896 return deposit64(b, 0, 63, a); 3897 } 3898 3899 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3900 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3901 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3902 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2) 3903 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4) 3904 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8) 3905 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3906 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3907 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3908 GEN_VEXT_VF(vfsgnj_vf_h, 2) 3909 GEN_VEXT_VF(vfsgnj_vf_w, 4) 3910 GEN_VEXT_VF(vfsgnj_vf_d, 8) 3911 3912 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3913 { 3914 return deposit64(~b, 0, 15, a); 3915 } 3916 3917 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3918 { 3919 return deposit64(~b, 0, 31, a); 3920 } 3921 3922 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3923 { 3924 return deposit64(~b, 0, 63, a); 3925 } 3926 3927 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3928 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3929 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3930 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2) 3931 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4) 3932 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8) 3933 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3934 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3935 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3936 GEN_VEXT_VF(vfsgnjn_vf_h, 2) 3937 GEN_VEXT_VF(vfsgnjn_vf_w, 4) 3938 GEN_VEXT_VF(vfsgnjn_vf_d, 8) 3939 3940 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3941 { 3942 return deposit64(b ^ a, 0, 15, a); 3943 } 3944 3945 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3946 { 3947 return deposit64(b ^ a, 0, 31, a); 3948 } 3949 3950 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3951 { 3952 return deposit64(b ^ a, 0, 63, a); 3953 } 3954 3955 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3956 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3957 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3958 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2) 3959 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4) 3960 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8) 3961 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3962 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3963 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3964 GEN_VEXT_VF(vfsgnjx_vf_h, 2) 3965 GEN_VEXT_VF(vfsgnjx_vf_w, 4) 3966 GEN_VEXT_VF(vfsgnjx_vf_d, 8) 3967 3968 /* Vector Floating-Point Compare Instructions */ 3969 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3970 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3971 CPURISCVState *env, uint32_t desc) \ 3972 { \ 3973 uint32_t vm = vext_vm(desc); \ 3974 uint32_t vl = env->vl; \ 3975 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 3976 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 3977 uint32_t vma = vext_vma(desc); \ 3978 uint32_t i; \ 3979 \ 3980 for (i = env->vstart; i < vl; i++) { \ 3981 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3982 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3983 if (!vm && !vext_elem_mask(v0, i)) { \ 3984 /* set masked-off elements to 1s */ \ 3985 if (vma) { \ 3986 vext_set_elem_mask(vd, i, 1); \ 3987 } \ 3988 continue; \ 3989 } \ 3990 vext_set_elem_mask(vd, i, \ 3991 DO_OP(s2, s1, &env->fp_status)); \ 3992 } \ 3993 env->vstart = 0; \ 3994 /* 3995 * mask destination register are always tail-agnostic 3996 * set tail elements to 1s 3997 */ \ 3998 if (vta_all_1s) { \ 3999 for (; i < total_elems; i++) { \ 4000 vext_set_elem_mask(vd, i, 1); \ 4001 } \ 4002 } \ 4003 } 4004 4005 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 4006 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 4007 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 4008 4009 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 4010 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4011 CPURISCVState *env, uint32_t desc) \ 4012 { \ 4013 uint32_t vm = vext_vm(desc); \ 4014 uint32_t vl = env->vl; \ 4015 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4016 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4017 uint32_t vma = vext_vma(desc); \ 4018 uint32_t i; \ 4019 \ 4020 for (i = env->vstart; i < vl; i++) { \ 4021 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4022 if (!vm && !vext_elem_mask(v0, i)) { \ 4023 /* set masked-off elements to 1s */ \ 4024 if (vma) { \ 4025 vext_set_elem_mask(vd, i, 1); \ 4026 } \ 4027 continue; \ 4028 } \ 4029 vext_set_elem_mask(vd, i, \ 4030 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 4031 } \ 4032 env->vstart = 0; \ 4033 /* 4034 * mask destination register are always tail-agnostic 4035 * set tail elements to 1s 4036 */ \ 4037 if (vta_all_1s) { \ 4038 for (; i < total_elems; i++) { \ 4039 vext_set_elem_mask(vd, i, 1); \ 4040 } \ 4041 } \ 4042 } 4043 4044 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 4045 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 4046 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 4047 4048 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 4049 { 4050 FloatRelation compare = float16_compare_quiet(a, b, s); 4051 return compare != float_relation_equal; 4052 } 4053 4054 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4055 { 4056 FloatRelation compare = float32_compare_quiet(a, b, s); 4057 return compare != float_relation_equal; 4058 } 4059 4060 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4061 { 4062 FloatRelation compare = float64_compare_quiet(a, b, s); 4063 return compare != float_relation_equal; 4064 } 4065 4066 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4067 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4068 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4069 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4070 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4071 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4072 4073 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4074 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4075 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4076 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4077 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4078 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4079 4080 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4081 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4082 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4083 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4084 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4085 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4086 4087 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4088 { 4089 FloatRelation compare = float16_compare(a, b, s); 4090 return compare == float_relation_greater; 4091 } 4092 4093 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4094 { 4095 FloatRelation compare = float32_compare(a, b, s); 4096 return compare == float_relation_greater; 4097 } 4098 4099 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4100 { 4101 FloatRelation compare = float64_compare(a, b, s); 4102 return compare == float_relation_greater; 4103 } 4104 4105 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4106 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4107 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4108 4109 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4110 { 4111 FloatRelation compare = float16_compare(a, b, s); 4112 return compare == float_relation_greater || 4113 compare == float_relation_equal; 4114 } 4115 4116 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4117 { 4118 FloatRelation compare = float32_compare(a, b, s); 4119 return compare == float_relation_greater || 4120 compare == float_relation_equal; 4121 } 4122 4123 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4124 { 4125 FloatRelation compare = float64_compare(a, b, s); 4126 return compare == float_relation_greater || 4127 compare == float_relation_equal; 4128 } 4129 4130 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4131 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4132 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4133 4134 /* Vector Floating-Point Classify Instruction */ 4135 target_ulong fclass_h(uint64_t frs1) 4136 { 4137 float16 f = frs1; 4138 bool sign = float16_is_neg(f); 4139 4140 if (float16_is_infinity(f)) { 4141 return sign ? 1 << 0 : 1 << 7; 4142 } else if (float16_is_zero(f)) { 4143 return sign ? 1 << 3 : 1 << 4; 4144 } else if (float16_is_zero_or_denormal(f)) { 4145 return sign ? 1 << 2 : 1 << 5; 4146 } else if (float16_is_any_nan(f)) { 4147 float_status s = { }; /* for snan_bit_is_one */ 4148 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4149 } else { 4150 return sign ? 1 << 1 : 1 << 6; 4151 } 4152 } 4153 4154 target_ulong fclass_s(uint64_t frs1) 4155 { 4156 float32 f = frs1; 4157 bool sign = float32_is_neg(f); 4158 4159 if (float32_is_infinity(f)) { 4160 return sign ? 1 << 0 : 1 << 7; 4161 } else if (float32_is_zero(f)) { 4162 return sign ? 1 << 3 : 1 << 4; 4163 } else if (float32_is_zero_or_denormal(f)) { 4164 return sign ? 1 << 2 : 1 << 5; 4165 } else if (float32_is_any_nan(f)) { 4166 float_status s = { }; /* for snan_bit_is_one */ 4167 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4168 } else { 4169 return sign ? 1 << 1 : 1 << 6; 4170 } 4171 } 4172 4173 target_ulong fclass_d(uint64_t frs1) 4174 { 4175 float64 f = frs1; 4176 bool sign = float64_is_neg(f); 4177 4178 if (float64_is_infinity(f)) { 4179 return sign ? 1 << 0 : 1 << 7; 4180 } else if (float64_is_zero(f)) { 4181 return sign ? 1 << 3 : 1 << 4; 4182 } else if (float64_is_zero_or_denormal(f)) { 4183 return sign ? 1 << 2 : 1 << 5; 4184 } else if (float64_is_any_nan(f)) { 4185 float_status s = { }; /* for snan_bit_is_one */ 4186 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4187 } else { 4188 return sign ? 1 << 1 : 1 << 6; 4189 } 4190 } 4191 4192 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4193 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4194 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4195 GEN_VEXT_V(vfclass_v_h, 2) 4196 GEN_VEXT_V(vfclass_v_w, 4) 4197 GEN_VEXT_V(vfclass_v_d, 8) 4198 4199 /* Vector Floating-Point Merge Instruction */ 4200 4201 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4202 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4203 CPURISCVState *env, uint32_t desc) \ 4204 { \ 4205 uint32_t vm = vext_vm(desc); \ 4206 uint32_t vl = env->vl; \ 4207 uint32_t esz = sizeof(ETYPE); \ 4208 uint32_t total_elems = \ 4209 vext_get_total_elems(env, desc, esz); \ 4210 uint32_t vta = vext_vta(desc); \ 4211 uint32_t i; \ 4212 \ 4213 for (i = env->vstart; i < vl; i++) { \ 4214 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4215 *((ETYPE *)vd + H(i)) = \ 4216 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4217 } \ 4218 env->vstart = 0; \ 4219 /* set tail elements to 1s */ \ 4220 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4221 } 4222 4223 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4224 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4225 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4226 4227 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4228 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4229 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4230 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4231 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4232 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2) 4233 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4) 4234 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8) 4235 4236 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4237 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4238 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4239 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4240 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2) 4241 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4) 4242 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8) 4243 4244 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4245 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4246 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4247 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4248 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2) 4249 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4) 4250 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8) 4251 4252 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4253 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4254 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4255 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4256 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2) 4257 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4) 4258 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8) 4259 4260 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4261 /* (TD, T2, TX2) */ 4262 #define WOP_UU_B uint16_t, uint8_t, uint8_t 4263 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4264 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4265 /* 4266 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer. 4267 */ 4268 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4269 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4270 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4) 4271 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8) 4272 4273 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4274 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4275 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4276 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4) 4277 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8) 4278 4279 /* 4280 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float. 4281 */ 4282 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16) 4283 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4284 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4285 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2) 4286 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4) 4287 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8) 4288 4289 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4290 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16) 4291 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4292 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4293 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2) 4294 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4) 4295 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8) 4296 4297 /* 4298 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float. 4299 */ 4300 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4301 { 4302 return float16_to_float32(a, true, s); 4303 } 4304 4305 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4306 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4307 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4) 4308 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8) 4309 4310 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32) 4311 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4) 4312 4313 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4314 /* (TD, T2, TX2) */ 4315 #define NOP_UU_B uint8_t, uint16_t, uint32_t 4316 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4317 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4318 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4319 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8) 4320 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16) 4321 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32) 4322 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1) 4323 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2) 4324 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4) 4325 4326 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4327 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8) 4328 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16) 4329 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32) 4330 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1) 4331 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2) 4332 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4) 4333 4334 /* 4335 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float. 4336 */ 4337 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16) 4338 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32) 4339 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2) 4340 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4) 4341 4342 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4343 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16) 4344 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32) 4345 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2) 4346 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4) 4347 4348 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4349 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4350 { 4351 return float32_to_float16(a, true, s); 4352 } 4353 4354 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16) 4355 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32) 4356 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2) 4357 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4) 4358 4359 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16) 4360 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2) 4361 4362 /* 4363 * Vector Reduction Operations 4364 */ 4365 /* Vector Single-Width Integer Reduction Instructions */ 4366 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4367 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4368 void *vs2, CPURISCVState *env, \ 4369 uint32_t desc) \ 4370 { \ 4371 uint32_t vm = vext_vm(desc); \ 4372 uint32_t vl = env->vl; \ 4373 uint32_t esz = sizeof(TD); \ 4374 uint32_t vlenb = simd_maxsz(desc); \ 4375 uint32_t vta = vext_vta(desc); \ 4376 uint32_t i; \ 4377 TD s1 = *((TD *)vs1 + HD(0)); \ 4378 \ 4379 for (i = env->vstart; i < vl; i++) { \ 4380 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4381 if (!vm && !vext_elem_mask(v0, i)) { \ 4382 continue; \ 4383 } \ 4384 s1 = OP(s1, (TD)s2); \ 4385 } \ 4386 *((TD *)vd + HD(0)) = s1; \ 4387 env->vstart = 0; \ 4388 /* set tail elements to 1s */ \ 4389 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4390 } 4391 4392 /* vd[0] = sum(vs1[0], vs2[*]) */ 4393 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4394 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4395 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4396 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4397 4398 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4399 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4400 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4401 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4402 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4403 4404 /* vd[0] = max(vs1[0], vs2[*]) */ 4405 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4406 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4407 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4408 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4409 4410 /* vd[0] = minu(vs1[0], vs2[*]) */ 4411 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4412 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4413 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4414 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4415 4416 /* vd[0] = min(vs1[0], vs2[*]) */ 4417 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4418 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4419 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4420 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4421 4422 /* vd[0] = and(vs1[0], vs2[*]) */ 4423 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4424 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4425 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4426 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4427 4428 /* vd[0] = or(vs1[0], vs2[*]) */ 4429 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4430 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4431 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4432 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4433 4434 /* vd[0] = xor(vs1[0], vs2[*]) */ 4435 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4436 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4437 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4438 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4439 4440 /* Vector Widening Integer Reduction Instructions */ 4441 /* signed sum reduction into double-width accumulator */ 4442 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4443 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4444 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4445 4446 /* Unsigned sum reduction into double-width accumulator */ 4447 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4448 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4449 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4450 4451 /* Vector Single-Width Floating-Point Reduction Instructions */ 4452 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4453 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4454 void *vs2, CPURISCVState *env, \ 4455 uint32_t desc) \ 4456 { \ 4457 uint32_t vm = vext_vm(desc); \ 4458 uint32_t vl = env->vl; \ 4459 uint32_t esz = sizeof(TD); \ 4460 uint32_t vlenb = simd_maxsz(desc); \ 4461 uint32_t vta = vext_vta(desc); \ 4462 uint32_t i; \ 4463 TD s1 = *((TD *)vs1 + HD(0)); \ 4464 \ 4465 for (i = env->vstart; i < vl; i++) { \ 4466 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4467 if (!vm && !vext_elem_mask(v0, i)) { \ 4468 continue; \ 4469 } \ 4470 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4471 } \ 4472 *((TD *)vd + HD(0)) = s1; \ 4473 env->vstart = 0; \ 4474 /* set tail elements to 1s */ \ 4475 vext_set_elems_1s(vd, vta, esz, vlenb); \ 4476 } 4477 4478 /* Unordered sum */ 4479 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4480 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4481 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4482 4483 /* Ordered sum */ 4484 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4485 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4486 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4487 4488 /* Maximum value */ 4489 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, 4490 float16_maximum_number) 4491 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, 4492 float32_maximum_number) 4493 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, 4494 float64_maximum_number) 4495 4496 /* Minimum value */ 4497 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, 4498 float16_minimum_number) 4499 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, 4500 float32_minimum_number) 4501 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, 4502 float64_minimum_number) 4503 4504 /* Vector Widening Floating-Point Add Instructions */ 4505 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s) 4506 { 4507 return float32_add(a, float16_to_float32(b, true, s), s); 4508 } 4509 4510 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s) 4511 { 4512 return float64_add(a, float32_to_float64(b, s), s); 4513 } 4514 4515 /* Vector Widening Floating-Point Reduction Instructions */ 4516 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4517 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4518 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4519 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16) 4520 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32) 4521 4522 /* 4523 * Vector Mask Operations 4524 */ 4525 /* Vector Mask-Register Logical Instructions */ 4526 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4527 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4528 void *vs2, CPURISCVState *env, \ 4529 uint32_t desc) \ 4530 { \ 4531 uint32_t vl = env->vl; \ 4532 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; \ 4533 uint32_t vta_all_1s = vext_vta_all_1s(desc); \ 4534 uint32_t i; \ 4535 int a, b; \ 4536 \ 4537 for (i = env->vstart; i < vl; i++) { \ 4538 a = vext_elem_mask(vs1, i); \ 4539 b = vext_elem_mask(vs2, i); \ 4540 vext_set_elem_mask(vd, i, OP(b, a)); \ 4541 } \ 4542 env->vstart = 0; \ 4543 /* 4544 * mask destination register are always tail-agnostic 4545 * set tail elements to 1s 4546 */ \ 4547 if (vta_all_1s) { \ 4548 for (; i < total_elems; i++) { \ 4549 vext_set_elem_mask(vd, i, 1); \ 4550 } \ 4551 } \ 4552 } 4553 4554 #define DO_NAND(N, M) (!(N & M)) 4555 #define DO_ANDNOT(N, M) (N & !M) 4556 #define DO_NOR(N, M) (!(N | M)) 4557 #define DO_ORNOT(N, M) (N | !M) 4558 #define DO_XNOR(N, M) (!(N ^ M)) 4559 4560 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4561 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4562 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT) 4563 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4564 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4565 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4566 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT) 4567 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4568 4569 /* Vector count population in mask vcpop */ 4570 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env, 4571 uint32_t desc) 4572 { 4573 target_ulong cnt = 0; 4574 uint32_t vm = vext_vm(desc); 4575 uint32_t vl = env->vl; 4576 int i; 4577 4578 for (i = env->vstart; i < vl; i++) { 4579 if (vm || vext_elem_mask(v0, i)) { 4580 if (vext_elem_mask(vs2, i)) { 4581 cnt++; 4582 } 4583 } 4584 } 4585 env->vstart = 0; 4586 return cnt; 4587 } 4588 4589 /* vfirst find-first-set mask bit */ 4590 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4591 uint32_t desc) 4592 { 4593 uint32_t vm = vext_vm(desc); 4594 uint32_t vl = env->vl; 4595 int i; 4596 4597 for (i = env->vstart; i < vl; i++) { 4598 if (vm || vext_elem_mask(v0, i)) { 4599 if (vext_elem_mask(vs2, i)) { 4600 return i; 4601 } 4602 } 4603 } 4604 env->vstart = 0; 4605 return -1LL; 4606 } 4607 4608 enum set_mask_type { 4609 ONLY_FIRST = 1, 4610 INCLUDE_FIRST, 4611 BEFORE_FIRST, 4612 }; 4613 4614 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4615 uint32_t desc, enum set_mask_type type) 4616 { 4617 uint32_t vm = vext_vm(desc); 4618 uint32_t vl = env->vl; 4619 uint32_t total_elems = riscv_cpu_cfg(env)->vlen; 4620 uint32_t vta_all_1s = vext_vta_all_1s(desc); 4621 uint32_t vma = vext_vma(desc); 4622 int i; 4623 bool first_mask_bit = false; 4624 4625 for (i = env->vstart; i < vl; i++) { 4626 if (!vm && !vext_elem_mask(v0, i)) { 4627 /* set masked-off elements to 1s */ 4628 if (vma) { 4629 vext_set_elem_mask(vd, i, 1); 4630 } 4631 continue; 4632 } 4633 /* write a zero to all following active elements */ 4634 if (first_mask_bit) { 4635 vext_set_elem_mask(vd, i, 0); 4636 continue; 4637 } 4638 if (vext_elem_mask(vs2, i)) { 4639 first_mask_bit = true; 4640 if (type == BEFORE_FIRST) { 4641 vext_set_elem_mask(vd, i, 0); 4642 } else { 4643 vext_set_elem_mask(vd, i, 1); 4644 } 4645 } else { 4646 if (type == ONLY_FIRST) { 4647 vext_set_elem_mask(vd, i, 0); 4648 } else { 4649 vext_set_elem_mask(vd, i, 1); 4650 } 4651 } 4652 } 4653 env->vstart = 0; 4654 /* 4655 * mask destination register are always tail-agnostic 4656 * set tail elements to 1s 4657 */ 4658 if (vta_all_1s) { 4659 for (; i < total_elems; i++) { 4660 vext_set_elem_mask(vd, i, 1); 4661 } 4662 } 4663 } 4664 4665 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4666 uint32_t desc) 4667 { 4668 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4669 } 4670 4671 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4672 uint32_t desc) 4673 { 4674 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4675 } 4676 4677 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4678 uint32_t desc) 4679 { 4680 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4681 } 4682 4683 /* Vector Iota Instruction */ 4684 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4685 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4686 uint32_t desc) \ 4687 { \ 4688 uint32_t vm = vext_vm(desc); \ 4689 uint32_t vl = env->vl; \ 4690 uint32_t esz = sizeof(ETYPE); \ 4691 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4692 uint32_t vta = vext_vta(desc); \ 4693 uint32_t vma = vext_vma(desc); \ 4694 uint32_t sum = 0; \ 4695 int i; \ 4696 \ 4697 for (i = env->vstart; i < vl; i++) { \ 4698 if (!vm && !vext_elem_mask(v0, i)) { \ 4699 /* set masked-off elements to 1s */ \ 4700 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4701 continue; \ 4702 } \ 4703 *((ETYPE *)vd + H(i)) = sum; \ 4704 if (vext_elem_mask(vs2, i)) { \ 4705 sum++; \ 4706 } \ 4707 } \ 4708 env->vstart = 0; \ 4709 /* set tail elements to 1s */ \ 4710 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4711 } 4712 4713 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4714 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4715 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4716 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4717 4718 /* Vector Element Index Instruction */ 4719 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4720 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4721 { \ 4722 uint32_t vm = vext_vm(desc); \ 4723 uint32_t vl = env->vl; \ 4724 uint32_t esz = sizeof(ETYPE); \ 4725 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4726 uint32_t vta = vext_vta(desc); \ 4727 uint32_t vma = vext_vma(desc); \ 4728 int i; \ 4729 \ 4730 for (i = env->vstart; i < vl; i++) { \ 4731 if (!vm && !vext_elem_mask(v0, i)) { \ 4732 /* set masked-off elements to 1s */ \ 4733 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4734 continue; \ 4735 } \ 4736 *((ETYPE *)vd + H(i)) = i; \ 4737 } \ 4738 env->vstart = 0; \ 4739 /* set tail elements to 1s */ \ 4740 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4741 } 4742 4743 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4744 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4745 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4746 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4747 4748 /* 4749 * Vector Permutation Instructions 4750 */ 4751 4752 /* Vector Slide Instructions */ 4753 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4754 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4755 CPURISCVState *env, uint32_t desc) \ 4756 { \ 4757 uint32_t vm = vext_vm(desc); \ 4758 uint32_t vl = env->vl; \ 4759 uint32_t esz = sizeof(ETYPE); \ 4760 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4761 uint32_t vta = vext_vta(desc); \ 4762 uint32_t vma = vext_vma(desc); \ 4763 target_ulong offset = s1, i_min, i; \ 4764 \ 4765 i_min = MAX(env->vstart, offset); \ 4766 for (i = i_min; i < vl; i++) { \ 4767 if (!vm && !vext_elem_mask(v0, i)) { \ 4768 /* set masked-off elements to 1s */ \ 4769 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4770 continue; \ 4771 } \ 4772 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4773 } \ 4774 /* set tail elements to 1s */ \ 4775 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4776 } 4777 4778 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4779 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4780 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4781 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4782 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4783 4784 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4785 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4786 CPURISCVState *env, uint32_t desc) \ 4787 { \ 4788 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4789 uint32_t vm = vext_vm(desc); \ 4790 uint32_t vl = env->vl; \ 4791 uint32_t esz = sizeof(ETYPE); \ 4792 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4793 uint32_t vta = vext_vta(desc); \ 4794 uint32_t vma = vext_vma(desc); \ 4795 target_ulong i_max, i_min, i; \ 4796 \ 4797 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \ 4798 i_max = MAX(i_min, env->vstart); \ 4799 for (i = env->vstart; i < i_max; ++i) { \ 4800 if (!vm && !vext_elem_mask(v0, i)) { \ 4801 /* set masked-off elements to 1s */ \ 4802 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4803 continue; \ 4804 } \ 4805 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \ 4806 } \ 4807 \ 4808 for (i = i_max; i < vl; ++i) { \ 4809 if (vm || vext_elem_mask(v0, i)) { \ 4810 *((ETYPE *)vd + H(i)) = 0; \ 4811 } \ 4812 } \ 4813 \ 4814 env->vstart = 0; \ 4815 /* set tail elements to 1s */ \ 4816 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4817 } 4818 4819 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4820 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4821 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4822 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4823 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4824 4825 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \ 4826 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4827 void *vs2, CPURISCVState *env, \ 4828 uint32_t desc) \ 4829 { \ 4830 typedef uint##BITWIDTH##_t ETYPE; \ 4831 uint32_t vm = vext_vm(desc); \ 4832 uint32_t vl = env->vl; \ 4833 uint32_t esz = sizeof(ETYPE); \ 4834 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4835 uint32_t vta = vext_vta(desc); \ 4836 uint32_t vma = vext_vma(desc); \ 4837 uint32_t i; \ 4838 \ 4839 for (i = env->vstart; i < vl; i++) { \ 4840 if (!vm && !vext_elem_mask(v0, i)) { \ 4841 /* set masked-off elements to 1s */ \ 4842 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4843 continue; \ 4844 } \ 4845 if (i == 0) { \ 4846 *((ETYPE *)vd + H(i)) = s1; \ 4847 } else { \ 4848 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4849 } \ 4850 } \ 4851 env->vstart = 0; \ 4852 /* set tail elements to 1s */ \ 4853 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4854 } 4855 4856 GEN_VEXT_VSLIE1UP(8, H1) 4857 GEN_VEXT_VSLIE1UP(16, H2) 4858 GEN_VEXT_VSLIE1UP(32, H4) 4859 GEN_VEXT_VSLIE1UP(64, H8) 4860 4861 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \ 4862 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4863 CPURISCVState *env, uint32_t desc) \ 4864 { \ 4865 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4866 } 4867 4868 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4869 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8) 4870 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16) 4871 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32) 4872 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64) 4873 4874 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \ 4875 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \ 4876 void *vs2, CPURISCVState *env, \ 4877 uint32_t desc) \ 4878 { \ 4879 typedef uint##BITWIDTH##_t ETYPE; \ 4880 uint32_t vm = vext_vm(desc); \ 4881 uint32_t vl = env->vl; \ 4882 uint32_t esz = sizeof(ETYPE); \ 4883 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4884 uint32_t vta = vext_vta(desc); \ 4885 uint32_t vma = vext_vma(desc); \ 4886 uint32_t i; \ 4887 \ 4888 for (i = env->vstart; i < vl; i++) { \ 4889 if (!vm && !vext_elem_mask(v0, i)) { \ 4890 /* set masked-off elements to 1s */ \ 4891 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4892 continue; \ 4893 } \ 4894 if (i == vl - 1) { \ 4895 *((ETYPE *)vd + H(i)) = s1; \ 4896 } else { \ 4897 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4898 } \ 4899 } \ 4900 env->vstart = 0; \ 4901 /* set tail elements to 1s */ \ 4902 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4903 } 4904 4905 GEN_VEXT_VSLIDE1DOWN(8, H1) 4906 GEN_VEXT_VSLIDE1DOWN(16, H2) 4907 GEN_VEXT_VSLIDE1DOWN(32, H4) 4908 GEN_VEXT_VSLIDE1DOWN(64, H8) 4909 4910 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \ 4911 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4912 CPURISCVState *env, uint32_t desc) \ 4913 { \ 4914 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4915 } 4916 4917 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4918 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8) 4919 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16) 4920 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32) 4921 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64) 4922 4923 /* Vector Floating-Point Slide Instructions */ 4924 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \ 4925 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4926 CPURISCVState *env, uint32_t desc) \ 4927 { \ 4928 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4929 } 4930 4931 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */ 4932 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16) 4933 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32) 4934 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64) 4935 4936 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \ 4937 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4938 CPURISCVState *env, uint32_t desc) \ 4939 { \ 4940 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \ 4941 } 4942 4943 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */ 4944 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16) 4945 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32) 4946 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64) 4947 4948 /* Vector Register Gather Instruction */ 4949 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \ 4950 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4951 CPURISCVState *env, uint32_t desc) \ 4952 { \ 4953 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \ 4954 uint32_t vm = vext_vm(desc); \ 4955 uint32_t vl = env->vl; \ 4956 uint32_t esz = sizeof(TS2); \ 4957 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 4958 uint32_t vta = vext_vta(desc); \ 4959 uint32_t vma = vext_vma(desc); \ 4960 uint64_t index; \ 4961 uint32_t i; \ 4962 \ 4963 for (i = env->vstart; i < vl; i++) { \ 4964 if (!vm && !vext_elem_mask(v0, i)) { \ 4965 /* set masked-off elements to 1s */ \ 4966 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 4967 continue; \ 4968 } \ 4969 index = *((TS1 *)vs1 + HS1(i)); \ 4970 if (index >= vlmax) { \ 4971 *((TS2 *)vd + HS2(i)) = 0; \ 4972 } else { \ 4973 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \ 4974 } \ 4975 } \ 4976 env->vstart = 0; \ 4977 /* set tail elements to 1s */ \ 4978 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 4979 } 4980 4981 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4982 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1) 4983 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2) 4984 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4) 4985 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8) 4986 4987 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1) 4988 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2) 4989 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4) 4990 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8) 4991 4992 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4993 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4994 CPURISCVState *env, uint32_t desc) \ 4995 { \ 4996 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \ 4997 uint32_t vm = vext_vm(desc); \ 4998 uint32_t vl = env->vl; \ 4999 uint32_t esz = sizeof(ETYPE); \ 5000 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5001 uint32_t vta = vext_vta(desc); \ 5002 uint32_t vma = vext_vma(desc); \ 5003 uint64_t index = s1; \ 5004 uint32_t i; \ 5005 \ 5006 for (i = env->vstart; i < vl; i++) { \ 5007 if (!vm && !vext_elem_mask(v0, i)) { \ 5008 /* set masked-off elements to 1s */ \ 5009 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5010 continue; \ 5011 } \ 5012 if (index >= vlmax) { \ 5013 *((ETYPE *)vd + H(i)) = 0; \ 5014 } else { \ 5015 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 5016 } \ 5017 } \ 5018 env->vstart = 0; \ 5019 /* set tail elements to 1s */ \ 5020 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5021 } 5022 5023 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 5024 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 5025 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 5026 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 5027 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 5028 5029 /* Vector Compress Instruction */ 5030 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 5031 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 5032 CPURISCVState *env, uint32_t desc) \ 5033 { \ 5034 uint32_t vl = env->vl; \ 5035 uint32_t esz = sizeof(ETYPE); \ 5036 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5037 uint32_t vta = vext_vta(desc); \ 5038 uint32_t num = 0, i; \ 5039 \ 5040 for (i = env->vstart; i < vl; i++) { \ 5041 if (!vext_elem_mask(vs1, i)) { \ 5042 continue; \ 5043 } \ 5044 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 5045 num++; \ 5046 } \ 5047 env->vstart = 0; \ 5048 /* set tail elements to 1s */ \ 5049 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5050 } 5051 5052 /* Compress into vd elements of vs2 where vs1 is enabled */ 5053 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 5054 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 5055 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 5056 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 5057 5058 /* Vector Whole Register Move */ 5059 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) 5060 { 5061 /* EEW = SEW */ 5062 uint32_t maxsz = simd_maxsz(desc); 5063 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); 5064 uint32_t startb = env->vstart * sewb; 5065 uint32_t i = startb; 5066 5067 memcpy((uint8_t *)vd + H1(i), 5068 (uint8_t *)vs2 + H1(i), 5069 maxsz - startb); 5070 5071 env->vstart = 0; 5072 } 5073 5074 /* Vector Integer Extension */ 5075 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \ 5076 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 5077 CPURISCVState *env, uint32_t desc) \ 5078 { \ 5079 uint32_t vl = env->vl; \ 5080 uint32_t vm = vext_vm(desc); \ 5081 uint32_t esz = sizeof(ETYPE); \ 5082 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \ 5083 uint32_t vta = vext_vta(desc); \ 5084 uint32_t vma = vext_vma(desc); \ 5085 uint32_t i; \ 5086 \ 5087 for (i = env->vstart; i < vl; i++) { \ 5088 if (!vm && !vext_elem_mask(v0, i)) { \ 5089 /* set masked-off elements to 1s */ \ 5090 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \ 5091 continue; \ 5092 } \ 5093 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \ 5094 } \ 5095 env->vstart = 0; \ 5096 /* set tail elements to 1s */ \ 5097 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ 5098 } 5099 5100 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1) 5101 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2) 5102 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4) 5103 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1) 5104 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2) 5105 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1) 5106 5107 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1) 5108 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2) 5109 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4) 5110 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1) 5111 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2) 5112 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1) 5113