1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "internals.h" 27 #include <math.h> 28 29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 30 target_ulong s2) 31 { 32 int vlmax, vl; 33 RISCVCPU *cpu = env_archcpu(env); 34 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 35 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 36 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 37 bool vill = FIELD_EX64(s2, VTYPE, VILL); 38 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 39 40 if (lmul & 4) { 41 /* Fractional LMUL. */ 42 if (lmul == 4 || 43 cpu->cfg.elen >> (8 - lmul) < sew) { 44 vill = true; 45 } 46 } 47 48 if ((sew > cpu->cfg.elen) 49 || vill 50 || (ediv != 0) 51 || (reserved != 0)) { 52 /* only set vill bit. */ 53 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 54 env->vl = 0; 55 env->vstart = 0; 56 return 0; 57 } 58 59 vlmax = vext_get_vlmax(cpu, s2); 60 if (s1 <= vlmax) { 61 vl = s1; 62 } else { 63 vl = vlmax; 64 } 65 env->vl = vl; 66 env->vtype = s2; 67 env->vstart = 0; 68 return vl; 69 } 70 71 /* 72 * Note that vector data is stored in host-endian 64-bit chunks, 73 * so addressing units smaller than that needs a host-endian fixup. 74 */ 75 #ifdef HOST_WORDS_BIGENDIAN 76 #define H1(x) ((x) ^ 7) 77 #define H1_2(x) ((x) ^ 6) 78 #define H1_4(x) ((x) ^ 4) 79 #define H2(x) ((x) ^ 3) 80 #define H4(x) ((x) ^ 1) 81 #define H8(x) ((x)) 82 #else 83 #define H1(x) (x) 84 #define H1_2(x) (x) 85 #define H1_4(x) (x) 86 #define H2(x) (x) 87 #define H4(x) (x) 88 #define H8(x) (x) 89 #endif 90 91 static inline uint32_t vext_nf(uint32_t desc) 92 { 93 return FIELD_EX32(simd_data(desc), VDATA, NF); 94 } 95 96 static inline uint32_t vext_vm(uint32_t desc) 97 { 98 return FIELD_EX32(simd_data(desc), VDATA, VM); 99 } 100 101 /* 102 * Encode LMUL to lmul as following: 103 * LMUL vlmul lmul 104 * 1 000 0 105 * 2 001 1 106 * 4 010 2 107 * 8 011 3 108 * - 100 - 109 * 1/8 101 -3 110 * 1/4 110 -2 111 * 1/2 111 -1 112 */ 113 static inline int32_t vext_lmul(uint32_t desc) 114 { 115 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 116 } 117 118 /* 119 * Get vector group length in bytes. Its range is [64, 2048]. 120 * 121 * As simd_desc support at most 256, the max vlen is 512 bits. 122 * So vlen in bytes is encoded as maxsz. 123 */ 124 static inline uint32_t vext_maxsz(uint32_t desc) 125 { 126 return simd_maxsz(desc) << vext_lmul(desc); 127 } 128 129 /* 130 * This function checks watchpoint before real load operation. 131 * 132 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 133 * In user mode, there is no watchpoint support now. 134 * 135 * It will trigger an exception if there is no mapping in TLB 136 * and page table walk can't fill the TLB entry. Then the guest 137 * software can return here after process the exception or never return. 138 */ 139 static void probe_pages(CPURISCVState *env, target_ulong addr, 140 target_ulong len, uintptr_t ra, 141 MMUAccessType access_type) 142 { 143 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 144 target_ulong curlen = MIN(pagelen, len); 145 146 probe_access(env, addr, curlen, access_type, 147 cpu_mmu_index(env, false), ra); 148 if (len > curlen) { 149 addr += curlen; 150 curlen = len - curlen; 151 probe_access(env, addr, curlen, access_type, 152 cpu_mmu_index(env, false), ra); 153 } 154 } 155 156 static inline void vext_set_elem_mask(void *v0, int index, 157 uint8_t value) 158 { 159 int idx = index / 64; 160 int pos = index % 64; 161 uint64_t old = ((uint64_t *)v0)[idx]; 162 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 163 } 164 165 /* 166 * Earlier designs (pre-0.9) had a varying number of bits 167 * per mask value (MLEN). In the 0.9 design, MLEN=1. 168 * (Section 4.5) 169 */ 170 static inline int vext_elem_mask(void *v0, int index) 171 { 172 int idx = index / 64; 173 int pos = index % 64; 174 return (((uint64_t *)v0)[idx] >> pos) & 1; 175 } 176 177 /* elements operations for load and store */ 178 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 179 uint32_t idx, void *vd, uintptr_t retaddr); 180 181 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ 182 static void NAME(CPURISCVState *env, abi_ptr addr, \ 183 uint32_t idx, void *vd, uintptr_t retaddr)\ 184 { \ 185 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 186 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 187 } \ 188 189 GEN_VEXT_LD_ELEM(ldb_b, int8_t, H1, ldsb) 190 GEN_VEXT_LD_ELEM(ldb_h, int16_t, H2, ldsb) 191 GEN_VEXT_LD_ELEM(ldb_w, int32_t, H4, ldsb) 192 GEN_VEXT_LD_ELEM(ldb_d, int64_t, H8, ldsb) 193 GEN_VEXT_LD_ELEM(ldh_h, int16_t, H2, ldsw) 194 GEN_VEXT_LD_ELEM(ldh_w, int32_t, H4, ldsw) 195 GEN_VEXT_LD_ELEM(ldh_d, int64_t, H8, ldsw) 196 GEN_VEXT_LD_ELEM(ldw_w, int32_t, H4, ldl) 197 GEN_VEXT_LD_ELEM(ldw_d, int64_t, H8, ldl) 198 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb) 199 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw) 200 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl) 201 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq) 202 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, H1, ldub) 203 GEN_VEXT_LD_ELEM(ldbu_h, uint16_t, H2, ldub) 204 GEN_VEXT_LD_ELEM(ldbu_w, uint32_t, H4, ldub) 205 GEN_VEXT_LD_ELEM(ldbu_d, uint64_t, H8, ldub) 206 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, H2, lduw) 207 GEN_VEXT_LD_ELEM(ldhu_w, uint32_t, H4, lduw) 208 GEN_VEXT_LD_ELEM(ldhu_d, uint64_t, H8, lduw) 209 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, H4, ldl) 210 GEN_VEXT_LD_ELEM(ldwu_d, uint64_t, H8, ldl) 211 212 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 213 static void NAME(CPURISCVState *env, abi_ptr addr, \ 214 uint32_t idx, void *vd, uintptr_t retaddr)\ 215 { \ 216 ETYPE data = *((ETYPE *)vd + H(idx)); \ 217 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 218 } 219 220 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 221 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 222 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 223 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 224 225 /* 226 *** stride: access vector element from strided memory 227 */ 228 static void 229 vext_ldst_stride(void *vd, void *v0, target_ulong base, 230 target_ulong stride, CPURISCVState *env, 231 uint32_t desc, uint32_t vm, 232 vext_ldst_elem_fn *ldst_elem, 233 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 234 { 235 uint32_t i, k; 236 uint32_t nf = vext_nf(desc); 237 uint32_t vlmax = vext_maxsz(desc) / esz; 238 239 /* probe every access*/ 240 for (i = 0; i < env->vl; i++) { 241 if (!vm && !vext_elem_mask(v0, i)) { 242 continue; 243 } 244 probe_pages(env, base + stride * i, nf * esz, ra, access_type); 245 } 246 /* do real access */ 247 for (i = 0; i < env->vl; i++) { 248 k = 0; 249 if (!vm && !vext_elem_mask(v0, i)) { 250 continue; 251 } 252 while (k < nf) { 253 target_ulong addr = base + stride * i + k * esz; 254 ldst_elem(env, addr, i + k * vlmax, vd, ra); 255 k++; 256 } 257 } 258 } 259 260 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \ 261 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 262 target_ulong stride, CPURISCVState *env, \ 263 uint32_t desc) \ 264 { \ 265 uint32_t vm = vext_vm(desc); \ 266 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 267 sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 268 } 269 270 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b) 271 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h) 272 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w) 273 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d) 274 275 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \ 276 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 277 target_ulong stride, CPURISCVState *env, \ 278 uint32_t desc) \ 279 { \ 280 uint32_t vm = vext_vm(desc); \ 281 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 282 sizeof(ETYPE), GETPC(), MMU_DATA_STORE); \ 283 } 284 285 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b) 286 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h) 287 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w) 288 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d) 289 290 /* 291 *** unit-stride: access elements stored contiguously in memory 292 */ 293 294 /* unmasked unit-stride load and store operation*/ 295 static void 296 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 297 vext_ldst_elem_fn *ldst_elem, 298 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 299 { 300 uint32_t i, k; 301 uint32_t nf = vext_nf(desc); 302 uint32_t vlmax = vext_maxsz(desc) / esz; 303 304 /* probe every access */ 305 probe_pages(env, base, env->vl * nf * esz, ra, access_type); 306 /* load bytes from guest memory */ 307 for (i = 0; i < env->vl; i++) { 308 k = 0; 309 while (k < nf) { 310 target_ulong addr = base + (i * nf + k) * esz; 311 ldst_elem(env, addr, i + k * vlmax, vd, ra); 312 k++; 313 } 314 } 315 } 316 317 /* 318 * masked unit-stride load and store operation will be a special case of stride, 319 * stride = NF * sizeof (MTYPE) 320 */ 321 322 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \ 323 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 324 CPURISCVState *env, uint32_t desc) \ 325 { \ 326 uint32_t stride = vext_nf(desc) * sizeof(ETYPE); \ 327 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 328 sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 329 } \ 330 \ 331 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 332 CPURISCVState *env, uint32_t desc) \ 333 { \ 334 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 335 sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 336 } 337 338 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b) 339 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h) 340 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w) 341 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d) 342 343 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \ 344 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 345 CPURISCVState *env, uint32_t desc) \ 346 { \ 347 uint32_t stride = vext_nf(desc) * sizeof(ETYPE); \ 348 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 349 sizeof(ETYPE), GETPC(), MMU_DATA_STORE); \ 350 } \ 351 \ 352 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 353 CPURISCVState *env, uint32_t desc) \ 354 { \ 355 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 356 sizeof(ETYPE), GETPC(), MMU_DATA_STORE); \ 357 } 358 359 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b) 360 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h) 361 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w) 362 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d) 363 364 /* 365 *** index: access vector element from indexed memory 366 */ 367 typedef target_ulong vext_get_index_addr(target_ulong base, 368 uint32_t idx, void *vs2); 369 370 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 371 static target_ulong NAME(target_ulong base, \ 372 uint32_t idx, void *vs2) \ 373 { \ 374 return (base + *((ETYPE *)vs2 + H(idx))); \ 375 } 376 377 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1) 378 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2) 379 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4) 380 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8) 381 382 static inline void 383 vext_ldst_index(void *vd, void *v0, target_ulong base, 384 void *vs2, CPURISCVState *env, uint32_t desc, 385 vext_get_index_addr get_index_addr, 386 vext_ldst_elem_fn *ldst_elem, 387 uint32_t esz, uintptr_t ra, MMUAccessType access_type) 388 { 389 uint32_t i, k; 390 uint32_t nf = vext_nf(desc); 391 uint32_t vm = vext_vm(desc); 392 uint32_t vlmax = vext_maxsz(desc) / esz; 393 394 /* probe every access*/ 395 for (i = 0; i < env->vl; i++) { 396 if (!vm && !vext_elem_mask(v0, i)) { 397 continue; 398 } 399 probe_pages(env, get_index_addr(base, i, vs2), nf * esz, ra, 400 access_type); 401 } 402 /* load bytes from guest memory */ 403 for (i = 0; i < env->vl; i++) { 404 k = 0; 405 if (!vm && !vext_elem_mask(v0, i)) { 406 continue; 407 } 408 while (k < nf) { 409 abi_ptr addr = get_index_addr(base, i, vs2) + k * esz; 410 ldst_elem(env, addr, i + k * vlmax, vd, ra); 411 k++; 412 } 413 } 414 } 415 416 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \ 417 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 418 void *vs2, CPURISCVState *env, uint32_t desc) \ 419 { \ 420 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 421 LOAD_FN, sizeof(ETYPE), GETPC(), MMU_DATA_LOAD); \ 422 } 423 424 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b) 425 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h) 426 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w) 427 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d) 428 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b) 429 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h) 430 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w) 431 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d) 432 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b) 433 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h) 434 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w) 435 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d) 436 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b) 437 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h) 438 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w) 439 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d) 440 441 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \ 442 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 443 void *vs2, CPURISCVState *env, uint32_t desc) \ 444 { \ 445 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 446 STORE_FN, sizeof(ETYPE), \ 447 GETPC(), MMU_DATA_STORE); \ 448 } 449 450 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b) 451 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h) 452 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w) 453 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d) 454 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b) 455 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h) 456 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w) 457 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d) 458 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b) 459 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h) 460 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w) 461 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d) 462 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b) 463 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h) 464 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w) 465 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d) 466 467 /* 468 *** unit-stride fault-only-fisrt load instructions 469 */ 470 static inline void 471 vext_ldff(void *vd, void *v0, target_ulong base, 472 CPURISCVState *env, uint32_t desc, 473 vext_ldst_elem_fn *ldst_elem, 474 uint32_t esz, uint32_t msz, uintptr_t ra) 475 { 476 void *host; 477 uint32_t i, k, vl = 0; 478 uint32_t nf = vext_nf(desc); 479 uint32_t vm = vext_vm(desc); 480 uint32_t vlmax = vext_maxsz(desc) / esz; 481 target_ulong addr, offset, remain; 482 483 /* probe every access*/ 484 for (i = 0; i < env->vl; i++) { 485 if (!vm && !vext_elem_mask(v0, i)) { 486 continue; 487 } 488 addr = base + nf * i * msz; 489 if (i == 0) { 490 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 491 } else { 492 /* if it triggers an exception, no need to check watchpoint */ 493 remain = nf * msz; 494 while (remain > 0) { 495 offset = -(addr | TARGET_PAGE_MASK); 496 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 497 cpu_mmu_index(env, false)); 498 if (host) { 499 #ifdef CONFIG_USER_ONLY 500 if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { 501 vl = i; 502 goto ProbeSuccess; 503 } 504 #else 505 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 506 #endif 507 } else { 508 vl = i; 509 goto ProbeSuccess; 510 } 511 if (remain <= offset) { 512 break; 513 } 514 remain -= offset; 515 addr += offset; 516 } 517 } 518 } 519 ProbeSuccess: 520 /* load bytes from guest memory */ 521 if (vl != 0) { 522 env->vl = vl; 523 } 524 for (i = 0; i < env->vl; i++) { 525 k = 0; 526 if (!vm && !vext_elem_mask(v0, i)) { 527 continue; 528 } 529 while (k < nf) { 530 target_ulong addr = base + (i * nf + k) * msz; 531 ldst_elem(env, addr, i + k * vlmax, vd, ra); 532 k++; 533 } 534 } 535 } 536 537 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN) \ 538 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 539 CPURISCVState *env, uint32_t desc) \ 540 { \ 541 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 542 sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ 543 } 544 545 GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b) 546 GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h) 547 GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w) 548 GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d) 549 GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h) 550 GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w) 551 GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d) 552 GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w) 553 GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d) 554 GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b) 555 GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h) 556 GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w) 557 GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d) 558 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b) 559 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h) 560 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w) 561 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d) 562 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h) 563 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w) 564 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d) 565 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w) 566 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d) 567 568 #define DO_SWAP(N, M) (M) 569 #define DO_AND(N, M) (N & M) 570 #define DO_XOR(N, M) (N ^ M) 571 #define DO_OR(N, M) (N | M) 572 #define DO_ADD(N, M) (N + M) 573 574 /* Signed min/max */ 575 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 576 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 577 578 /* Unsigned min/max */ 579 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 580 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 581 582 /* 583 *** Vector Integer Arithmetic Instructions 584 */ 585 586 /* expand macro args before macro */ 587 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 588 589 /* (TD, T1, T2, TX1, TX2) */ 590 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 591 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 592 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 593 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 594 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 595 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 596 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 597 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 598 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 599 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 600 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 601 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 602 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 603 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 604 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 605 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 606 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 607 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 608 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 609 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 610 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 611 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 612 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 613 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 614 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 615 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 616 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 617 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 618 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 619 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 620 621 /* operation of two vector elements */ 622 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 623 624 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 625 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 626 { \ 627 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 628 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 629 *((TD *)vd + HD(i)) = OP(s2, s1); \ 630 } 631 #define DO_SUB(N, M) (N - M) 632 #define DO_RSUB(N, M) (M - N) 633 634 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 635 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 636 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 637 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 638 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 639 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 640 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 641 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 642 643 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 644 CPURISCVState *env, uint32_t desc, 645 uint32_t esz, uint32_t dsz, 646 opivv2_fn *fn) 647 { 648 uint32_t vm = vext_vm(desc); 649 uint32_t vl = env->vl; 650 uint32_t i; 651 652 for (i = 0; i < vl; i++) { 653 if (!vm && !vext_elem_mask(v0, i)) { 654 continue; 655 } 656 fn(vd, vs1, vs2, i); 657 } 658 } 659 660 /* generate the helpers for OPIVV */ 661 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 662 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 663 void *vs2, CPURISCVState *env, \ 664 uint32_t desc) \ 665 { \ 666 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 667 do_##NAME); \ 668 } 669 670 GEN_VEXT_VV(vadd_vv_b, 1, 1) 671 GEN_VEXT_VV(vadd_vv_h, 2, 2) 672 GEN_VEXT_VV(vadd_vv_w, 4, 4) 673 GEN_VEXT_VV(vadd_vv_d, 8, 8) 674 GEN_VEXT_VV(vsub_vv_b, 1, 1) 675 GEN_VEXT_VV(vsub_vv_h, 2, 2) 676 GEN_VEXT_VV(vsub_vv_w, 4, 4) 677 GEN_VEXT_VV(vsub_vv_d, 8, 8) 678 679 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 680 681 /* 682 * (T1)s1 gives the real operator type. 683 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 684 */ 685 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 686 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 687 { \ 688 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 689 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 690 } 691 692 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 693 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 694 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 695 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 696 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 697 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 698 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 699 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 700 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 701 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 702 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 703 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 704 705 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 706 CPURISCVState *env, uint32_t desc, 707 uint32_t esz, uint32_t dsz, 708 opivx2_fn fn) 709 { 710 uint32_t vm = vext_vm(desc); 711 uint32_t vl = env->vl; 712 uint32_t i; 713 714 for (i = 0; i < vl; i++) { 715 if (!vm && !vext_elem_mask(v0, i)) { 716 continue; 717 } 718 fn(vd, s1, vs2, i); 719 } 720 } 721 722 /* generate the helpers for OPIVX */ 723 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 724 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 725 void *vs2, CPURISCVState *env, \ 726 uint32_t desc) \ 727 { \ 728 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 729 do_##NAME); \ 730 } 731 732 GEN_VEXT_VX(vadd_vx_b, 1, 1) 733 GEN_VEXT_VX(vadd_vx_h, 2, 2) 734 GEN_VEXT_VX(vadd_vx_w, 4, 4) 735 GEN_VEXT_VX(vadd_vx_d, 8, 8) 736 GEN_VEXT_VX(vsub_vx_b, 1, 1) 737 GEN_VEXT_VX(vsub_vx_h, 2, 2) 738 GEN_VEXT_VX(vsub_vx_w, 4, 4) 739 GEN_VEXT_VX(vsub_vx_d, 8, 8) 740 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 741 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 742 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 743 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 744 745 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 746 { 747 intptr_t oprsz = simd_oprsz(desc); 748 intptr_t i; 749 750 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 751 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 752 } 753 } 754 755 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 756 { 757 intptr_t oprsz = simd_oprsz(desc); 758 intptr_t i; 759 760 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 761 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 762 } 763 } 764 765 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 766 { 767 intptr_t oprsz = simd_oprsz(desc); 768 intptr_t i; 769 770 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 771 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 772 } 773 } 774 775 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 776 { 777 intptr_t oprsz = simd_oprsz(desc); 778 intptr_t i; 779 780 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 781 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 782 } 783 } 784 785 /* Vector Widening Integer Add/Subtract */ 786 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 787 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 788 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 789 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 790 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 791 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 792 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 793 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 794 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 795 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 796 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 797 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 798 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 799 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 800 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 801 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 802 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 803 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 804 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 805 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 806 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 807 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 808 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 809 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 810 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 811 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 812 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 813 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 814 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 815 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 816 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 817 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 818 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 819 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 820 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 821 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 822 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 823 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 824 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 825 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 826 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 827 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 828 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 829 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 830 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 831 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 832 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 833 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 834 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 835 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 836 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 837 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 838 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 839 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 840 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 841 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 842 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 843 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 844 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 845 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 846 847 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 848 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 849 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 850 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 851 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 852 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 853 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 854 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 855 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 856 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 857 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 858 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 859 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 860 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 861 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 862 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 863 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 864 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 865 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 866 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 867 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 868 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 869 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 870 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 871 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 872 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 873 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 874 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 875 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 876 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 877 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 878 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 879 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 880 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 881 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 882 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 883 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 884 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 885 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 886 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 887 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 888 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 889 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 890 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 891 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 892 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 893 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 894 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 895 896 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 897 #define DO_VADC(N, M, C) (N + M + C) 898 #define DO_VSBC(N, M, C) (N - M - C) 899 900 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 901 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 902 CPURISCVState *env, uint32_t desc) \ 903 { \ 904 uint32_t vl = env->vl; \ 905 uint32_t i; \ 906 \ 907 for (i = 0; i < vl; i++) { \ 908 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 909 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 910 uint8_t carry = vext_elem_mask(v0, i); \ 911 \ 912 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 913 } \ 914 } 915 916 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 917 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 918 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 919 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 920 921 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 922 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 923 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 924 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 925 926 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 927 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 928 CPURISCVState *env, uint32_t desc) \ 929 { \ 930 uint32_t vl = env->vl; \ 931 uint32_t i; \ 932 \ 933 for (i = 0; i < vl; i++) { \ 934 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 935 uint8_t carry = vext_elem_mask(v0, i); \ 936 \ 937 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 938 } \ 939 } 940 941 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 942 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 943 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 944 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 945 946 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 947 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 948 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 949 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 950 951 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 952 (__typeof(N))(N + M) < N) 953 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 954 955 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 956 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 957 CPURISCVState *env, uint32_t desc) \ 958 { \ 959 uint32_t vl = env->vl; \ 960 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 961 uint32_t i; \ 962 \ 963 for (i = 0; i < vl; i++) { \ 964 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 965 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 966 uint8_t carry = vext_elem_mask(v0, i); \ 967 \ 968 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 969 } \ 970 for (; i < vlmax; i++) { \ 971 vext_set_elem_mask(vd, i, 0); \ 972 } \ 973 } 974 975 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 976 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 977 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 978 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 979 980 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 981 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 982 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 983 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 984 985 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 986 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 987 void *vs2, CPURISCVState *env, uint32_t desc) \ 988 { \ 989 uint32_t vl = env->vl; \ 990 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 991 uint32_t i; \ 992 \ 993 for (i = 0; i < vl; i++) { \ 994 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 995 uint8_t carry = vext_elem_mask(v0, i); \ 996 \ 997 vext_set_elem_mask(vd, i, \ 998 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 999 } \ 1000 for (; i < vlmax; i++) { \ 1001 vext_set_elem_mask(vd, i, 0); \ 1002 } \ 1003 } 1004 1005 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1006 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1007 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1008 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1009 1010 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1011 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1012 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1013 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1014 1015 /* Vector Bitwise Logical Instructions */ 1016 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1017 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1018 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1019 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1020 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1021 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1022 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1023 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1024 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1025 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1026 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1027 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1028 GEN_VEXT_VV(vand_vv_b, 1, 1) 1029 GEN_VEXT_VV(vand_vv_h, 2, 2) 1030 GEN_VEXT_VV(vand_vv_w, 4, 4) 1031 GEN_VEXT_VV(vand_vv_d, 8, 8) 1032 GEN_VEXT_VV(vor_vv_b, 1, 1) 1033 GEN_VEXT_VV(vor_vv_h, 2, 2) 1034 GEN_VEXT_VV(vor_vv_w, 4, 4) 1035 GEN_VEXT_VV(vor_vv_d, 8, 8) 1036 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1037 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1038 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1039 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1040 1041 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1042 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1043 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1044 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1045 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1046 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1047 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1048 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1049 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1050 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1051 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1052 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1053 GEN_VEXT_VX(vand_vx_b, 1, 1) 1054 GEN_VEXT_VX(vand_vx_h, 2, 2) 1055 GEN_VEXT_VX(vand_vx_w, 4, 4) 1056 GEN_VEXT_VX(vand_vx_d, 8, 8) 1057 GEN_VEXT_VX(vor_vx_b, 1, 1) 1058 GEN_VEXT_VX(vor_vx_h, 2, 2) 1059 GEN_VEXT_VX(vor_vx_w, 4, 4) 1060 GEN_VEXT_VX(vor_vx_d, 8, 8) 1061 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1062 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1063 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1064 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1065 1066 /* Vector Single-Width Bit Shift Instructions */ 1067 #define DO_SLL(N, M) (N << (M)) 1068 #define DO_SRL(N, M) (N >> (M)) 1069 1070 /* generate the helpers for shift instructions with two vector operators */ 1071 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1072 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1073 void *vs2, CPURISCVState *env, uint32_t desc) \ 1074 { \ 1075 uint32_t vm = vext_vm(desc); \ 1076 uint32_t vl = env->vl; \ 1077 uint32_t i; \ 1078 \ 1079 for (i = 0; i < vl; i++) { \ 1080 if (!vm && !vext_elem_mask(v0, i)) { \ 1081 continue; \ 1082 } \ 1083 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1084 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1085 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1086 } \ 1087 } 1088 1089 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1090 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1091 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1092 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1093 1094 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1095 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1096 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1097 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1098 1099 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1100 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1101 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1102 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1103 1104 /* generate the helpers for shift instructions with one vector and one scalar */ 1105 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1106 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1107 void *vs2, CPURISCVState *env, uint32_t desc) \ 1108 { \ 1109 uint32_t vm = vext_vm(desc); \ 1110 uint32_t vl = env->vl; \ 1111 uint32_t i; \ 1112 \ 1113 for (i = 0; i < vl; i++) { \ 1114 if (!vm && !vext_elem_mask(v0, i)) { \ 1115 continue; \ 1116 } \ 1117 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1118 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1119 } \ 1120 } 1121 1122 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1123 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1124 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1125 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1126 1127 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1128 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1129 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1130 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1131 1132 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1133 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1134 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1135 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1136 1137 /* Vector Narrowing Integer Right Shift Instructions */ 1138 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1139 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1140 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1141 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1142 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1143 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1144 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1145 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1146 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1147 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1148 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1149 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1150 1151 /* Vector Integer Comparison Instructions */ 1152 #define DO_MSEQ(N, M) (N == M) 1153 #define DO_MSNE(N, M) (N != M) 1154 #define DO_MSLT(N, M) (N < M) 1155 #define DO_MSLE(N, M) (N <= M) 1156 #define DO_MSGT(N, M) (N > M) 1157 1158 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1159 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1160 CPURISCVState *env, uint32_t desc) \ 1161 { \ 1162 uint32_t vm = vext_vm(desc); \ 1163 uint32_t vl = env->vl; \ 1164 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1165 uint32_t i; \ 1166 \ 1167 for (i = 0; i < vl; i++) { \ 1168 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1169 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1170 if (!vm && !vext_elem_mask(v0, i)) { \ 1171 continue; \ 1172 } \ 1173 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1174 } \ 1175 for (; i < vlmax; i++) { \ 1176 vext_set_elem_mask(vd, i, 0); \ 1177 } \ 1178 } 1179 1180 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1181 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1182 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1183 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1184 1185 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1186 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1187 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1188 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1189 1190 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1191 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1192 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1193 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1194 1195 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1196 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1197 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1198 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1199 1200 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1201 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1202 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1203 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1204 1205 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1206 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1207 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1208 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1209 1210 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1211 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1212 CPURISCVState *env, uint32_t desc) \ 1213 { \ 1214 uint32_t vm = vext_vm(desc); \ 1215 uint32_t vl = env->vl; \ 1216 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1217 uint32_t i; \ 1218 \ 1219 for (i = 0; i < vl; i++) { \ 1220 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1221 if (!vm && !vext_elem_mask(v0, i)) { \ 1222 continue; \ 1223 } \ 1224 vext_set_elem_mask(vd, i, \ 1225 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1226 } \ 1227 for (; i < vlmax; i++) { \ 1228 vext_set_elem_mask(vd, i, 0); \ 1229 } \ 1230 } 1231 1232 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1233 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1234 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1235 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1236 1237 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1238 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1239 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1240 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1241 1242 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1243 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1244 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1245 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1246 1247 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1248 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1249 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1250 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1251 1252 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1253 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1254 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1255 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1256 1257 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1258 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1259 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1260 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1261 1262 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1263 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1264 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1265 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1266 1267 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1268 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1269 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1270 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1271 1272 /* Vector Integer Min/Max Instructions */ 1273 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1274 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1275 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1276 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1277 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1278 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1279 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1280 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1281 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1282 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1283 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1284 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1285 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1286 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1287 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1288 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1289 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1290 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1291 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1292 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1293 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1294 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1295 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1296 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1297 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1298 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1299 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1300 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1301 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1302 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1303 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1304 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1305 1306 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1307 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1308 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1309 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1310 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1311 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1312 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1313 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1314 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1315 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1316 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1317 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1318 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1319 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1320 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1321 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1322 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1323 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1324 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1325 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1326 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1327 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1328 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1329 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1330 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1331 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1332 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1333 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1334 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1335 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1336 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1337 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1338 1339 /* Vector Single-Width Integer Multiply Instructions */ 1340 #define DO_MUL(N, M) (N * M) 1341 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1342 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1343 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1344 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1345 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1346 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1347 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1348 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1349 1350 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1351 { 1352 return (int16_t)s2 * (int16_t)s1 >> 8; 1353 } 1354 1355 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1356 { 1357 return (int32_t)s2 * (int32_t)s1 >> 16; 1358 } 1359 1360 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1361 { 1362 return (int64_t)s2 * (int64_t)s1 >> 32; 1363 } 1364 1365 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1366 { 1367 uint64_t hi_64, lo_64; 1368 1369 muls64(&lo_64, &hi_64, s1, s2); 1370 return hi_64; 1371 } 1372 1373 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1374 { 1375 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1376 } 1377 1378 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1379 { 1380 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1381 } 1382 1383 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1384 { 1385 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1386 } 1387 1388 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1389 { 1390 uint64_t hi_64, lo_64; 1391 1392 mulu64(&lo_64, &hi_64, s2, s1); 1393 return hi_64; 1394 } 1395 1396 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1397 { 1398 return (int16_t)s2 * (uint16_t)s1 >> 8; 1399 } 1400 1401 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1402 { 1403 return (int32_t)s2 * (uint32_t)s1 >> 16; 1404 } 1405 1406 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1407 { 1408 return (int64_t)s2 * (uint64_t)s1 >> 32; 1409 } 1410 1411 /* 1412 * Let A = signed operand, 1413 * B = unsigned operand 1414 * P = mulu64(A, B), unsigned product 1415 * 1416 * LET X = 2 ** 64 - A, 2's complement of A 1417 * SP = signed product 1418 * THEN 1419 * IF A < 0 1420 * SP = -X * B 1421 * = -(2 ** 64 - A) * B 1422 * = A * B - 2 ** 64 * B 1423 * = P - 2 ** 64 * B 1424 * ELSE 1425 * SP = P 1426 * THEN 1427 * HI_P -= (A < 0 ? B : 0) 1428 */ 1429 1430 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1431 { 1432 uint64_t hi_64, lo_64; 1433 1434 mulu64(&lo_64, &hi_64, s2, s1); 1435 1436 hi_64 -= s2 < 0 ? s1 : 0; 1437 return hi_64; 1438 } 1439 1440 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1441 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1442 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1443 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1444 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1445 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1446 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1447 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1448 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1449 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1450 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1451 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1452 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1453 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1454 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1455 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1456 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1457 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1458 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1459 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1460 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1461 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1462 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1463 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1464 1465 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1466 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1467 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1468 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1469 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1470 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1471 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1472 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1473 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1474 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1475 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1476 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1477 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1478 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1479 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1480 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1481 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1482 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1483 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1484 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1485 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1486 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1487 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1488 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1489 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1490 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1491 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1492 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1493 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1494 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1495 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1496 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1497 1498 /* Vector Integer Divide Instructions */ 1499 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1500 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1501 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1502 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1503 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1504 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1505 1506 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1507 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1508 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1509 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1510 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1511 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1512 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1513 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1514 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1515 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1516 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1517 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1518 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1519 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1520 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1521 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1522 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1523 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1524 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1525 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1526 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1527 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1528 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1529 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1530 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1531 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1532 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1533 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1534 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1535 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1536 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1537 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1538 1539 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1540 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1541 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1542 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1543 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1544 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1545 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1546 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1547 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1548 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1549 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1550 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1551 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1552 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1553 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1554 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1555 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1556 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1557 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1558 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1559 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1560 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1561 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1562 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1563 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1564 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1565 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1566 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1567 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1568 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1569 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1570 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1571 1572 /* Vector Widening Integer Multiply Instructions */ 1573 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1574 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1575 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1576 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1577 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1578 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1579 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1580 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1581 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1582 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1583 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1584 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1585 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1586 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1587 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1588 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1589 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1590 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1591 1592 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1593 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1594 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1595 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1596 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1597 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1598 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1599 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1600 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1601 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1602 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1603 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1604 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1605 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1606 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1607 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1608 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1609 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1610 1611 /* Vector Single-Width Integer Multiply-Add Instructions */ 1612 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1613 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1614 { \ 1615 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1616 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1617 TD d = *((TD *)vd + HD(i)); \ 1618 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1619 } 1620 1621 #define DO_MACC(N, M, D) (M * N + D) 1622 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1623 #define DO_MADD(N, M, D) (M * D + N) 1624 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1625 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1626 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1627 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1628 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1629 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1630 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1631 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1632 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1633 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1634 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1635 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1636 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1637 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1638 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1639 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1640 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1641 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1642 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1643 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1644 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1645 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1646 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1647 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1648 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1649 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1650 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1651 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1652 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1653 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1654 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1655 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1656 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1657 1658 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1659 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1660 { \ 1661 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1662 TD d = *((TD *)vd + HD(i)); \ 1663 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1664 } 1665 1666 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1667 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1668 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1669 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1670 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1671 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1672 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1673 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1674 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1675 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1676 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1677 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1678 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1679 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1680 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1681 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1682 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1683 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1684 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1685 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1686 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1687 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1688 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1689 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1690 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1691 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1692 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1693 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1694 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1695 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1696 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1697 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1698 1699 /* Vector Widening Integer Multiply-Add Instructions */ 1700 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1701 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1702 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1703 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1704 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1705 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1706 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1707 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1708 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1709 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1710 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1711 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1712 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1713 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1714 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1715 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1716 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1717 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1718 1719 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1720 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1721 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1722 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1723 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1724 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1725 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1726 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1727 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1728 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1729 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1730 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1731 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1732 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1733 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1734 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1735 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1736 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1737 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1738 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1739 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1740 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1741 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1742 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1743 1744 /* Vector Integer Merge and Move Instructions */ 1745 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1746 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1747 uint32_t desc) \ 1748 { \ 1749 uint32_t vl = env->vl; \ 1750 uint32_t i; \ 1751 \ 1752 for (i = 0; i < vl; i++) { \ 1753 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1754 *((ETYPE *)vd + H(i)) = s1; \ 1755 } \ 1756 } 1757 1758 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1759 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1760 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1761 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1762 1763 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1764 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1765 uint32_t desc) \ 1766 { \ 1767 uint32_t vl = env->vl; \ 1768 uint32_t i; \ 1769 \ 1770 for (i = 0; i < vl; i++) { \ 1771 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1772 } \ 1773 } 1774 1775 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1776 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1777 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1778 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1779 1780 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1781 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1782 CPURISCVState *env, uint32_t desc) \ 1783 { \ 1784 uint32_t vl = env->vl; \ 1785 uint32_t i; \ 1786 \ 1787 for (i = 0; i < vl; i++) { \ 1788 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1789 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1790 } \ 1791 } 1792 1793 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1794 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1795 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1796 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1797 1798 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1799 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1800 void *vs2, CPURISCVState *env, uint32_t desc) \ 1801 { \ 1802 uint32_t vl = env->vl; \ 1803 uint32_t i; \ 1804 \ 1805 for (i = 0; i < vl; i++) { \ 1806 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1807 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1808 (ETYPE)(target_long)s1); \ 1809 *((ETYPE *)vd + H(i)) = d; \ 1810 } \ 1811 } 1812 1813 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1814 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1815 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1816 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1817 1818 /* 1819 *** Vector Fixed-Point Arithmetic Instructions 1820 */ 1821 1822 /* Vector Single-Width Saturating Add and Subtract */ 1823 1824 /* 1825 * As fixed point instructions probably have round mode and saturation, 1826 * define common macros for fixed point here. 1827 */ 1828 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1829 CPURISCVState *env, int vxrm); 1830 1831 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1832 static inline void \ 1833 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1834 CPURISCVState *env, int vxrm) \ 1835 { \ 1836 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1837 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1838 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1839 } 1840 1841 static inline void 1842 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1843 CPURISCVState *env, 1844 uint32_t vl, uint32_t vm, int vxrm, 1845 opivv2_rm_fn *fn) 1846 { 1847 for (uint32_t i = 0; i < vl; i++) { 1848 if (!vm && !vext_elem_mask(v0, i)) { 1849 continue; 1850 } 1851 fn(vd, vs1, vs2, i, env, vxrm); 1852 } 1853 } 1854 1855 static inline void 1856 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1857 CPURISCVState *env, 1858 uint32_t desc, uint32_t esz, uint32_t dsz, 1859 opivv2_rm_fn *fn) 1860 { 1861 uint32_t vm = vext_vm(desc); 1862 uint32_t vl = env->vl; 1863 1864 switch (env->vxrm) { 1865 case 0: /* rnu */ 1866 vext_vv_rm_1(vd, v0, vs1, vs2, 1867 env, vl, vm, 0, fn); 1868 break; 1869 case 1: /* rne */ 1870 vext_vv_rm_1(vd, v0, vs1, vs2, 1871 env, vl, vm, 1, fn); 1872 break; 1873 case 2: /* rdn */ 1874 vext_vv_rm_1(vd, v0, vs1, vs2, 1875 env, vl, vm, 2, fn); 1876 break; 1877 default: /* rod */ 1878 vext_vv_rm_1(vd, v0, vs1, vs2, 1879 env, vl, vm, 3, fn); 1880 break; 1881 } 1882 } 1883 1884 /* generate helpers for fixed point instructions with OPIVV format */ 1885 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1886 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1887 CPURISCVState *env, uint32_t desc) \ 1888 { \ 1889 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1890 do_##NAME); \ 1891 } 1892 1893 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1894 { 1895 uint8_t res = a + b; 1896 if (res < a) { 1897 res = UINT8_MAX; 1898 env->vxsat = 0x1; 1899 } 1900 return res; 1901 } 1902 1903 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1904 uint16_t b) 1905 { 1906 uint16_t res = a + b; 1907 if (res < a) { 1908 res = UINT16_MAX; 1909 env->vxsat = 0x1; 1910 } 1911 return res; 1912 } 1913 1914 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1915 uint32_t b) 1916 { 1917 uint32_t res = a + b; 1918 if (res < a) { 1919 res = UINT32_MAX; 1920 env->vxsat = 0x1; 1921 } 1922 return res; 1923 } 1924 1925 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1926 uint64_t b) 1927 { 1928 uint64_t res = a + b; 1929 if (res < a) { 1930 res = UINT64_MAX; 1931 env->vxsat = 0x1; 1932 } 1933 return res; 1934 } 1935 1936 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 1937 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 1938 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 1939 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 1940 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 1941 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 1942 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 1943 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 1944 1945 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 1946 CPURISCVState *env, int vxrm); 1947 1948 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1949 static inline void \ 1950 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 1951 CPURISCVState *env, int vxrm) \ 1952 { \ 1953 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1954 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 1955 } 1956 1957 static inline void 1958 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 1959 CPURISCVState *env, 1960 uint32_t vl, uint32_t vm, int vxrm, 1961 opivx2_rm_fn *fn) 1962 { 1963 for (uint32_t i = 0; i < vl; i++) { 1964 if (!vm && !vext_elem_mask(v0, i)) { 1965 continue; 1966 } 1967 fn(vd, s1, vs2, i, env, vxrm); 1968 } 1969 } 1970 1971 static inline void 1972 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 1973 CPURISCVState *env, 1974 uint32_t desc, uint32_t esz, uint32_t dsz, 1975 opivx2_rm_fn *fn) 1976 { 1977 uint32_t vm = vext_vm(desc); 1978 uint32_t vl = env->vl; 1979 1980 switch (env->vxrm) { 1981 case 0: /* rnu */ 1982 vext_vx_rm_1(vd, v0, s1, vs2, 1983 env, vl, vm, 0, fn); 1984 break; 1985 case 1: /* rne */ 1986 vext_vx_rm_1(vd, v0, s1, vs2, 1987 env, vl, vm, 1, fn); 1988 break; 1989 case 2: /* rdn */ 1990 vext_vx_rm_1(vd, v0, s1, vs2, 1991 env, vl, vm, 2, fn); 1992 break; 1993 default: /* rod */ 1994 vext_vx_rm_1(vd, v0, s1, vs2, 1995 env, vl, vm, 3, fn); 1996 break; 1997 } 1998 } 1999 2000 /* generate helpers for fixed point instructions with OPIVX format */ 2001 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2002 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2003 void *vs2, CPURISCVState *env, uint32_t desc) \ 2004 { \ 2005 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2006 do_##NAME); \ 2007 } 2008 2009 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2010 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2011 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2012 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2013 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2014 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2015 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2016 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2017 2018 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2019 { 2020 int8_t res = a + b; 2021 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2022 res = a > 0 ? INT8_MAX : INT8_MIN; 2023 env->vxsat = 0x1; 2024 } 2025 return res; 2026 } 2027 2028 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2029 { 2030 int16_t res = a + b; 2031 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2032 res = a > 0 ? INT16_MAX : INT16_MIN; 2033 env->vxsat = 0x1; 2034 } 2035 return res; 2036 } 2037 2038 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2039 { 2040 int32_t res = a + b; 2041 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2042 res = a > 0 ? INT32_MAX : INT32_MIN; 2043 env->vxsat = 0x1; 2044 } 2045 return res; 2046 } 2047 2048 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2049 { 2050 int64_t res = a + b; 2051 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2052 res = a > 0 ? INT64_MAX : INT64_MIN; 2053 env->vxsat = 0x1; 2054 } 2055 return res; 2056 } 2057 2058 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2059 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2060 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2061 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2062 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2063 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2064 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2065 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2066 2067 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2068 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2069 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2070 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2071 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2072 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2073 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2074 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2075 2076 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2077 { 2078 uint8_t res = a - b; 2079 if (res > a) { 2080 res = 0; 2081 env->vxsat = 0x1; 2082 } 2083 return res; 2084 } 2085 2086 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2087 uint16_t b) 2088 { 2089 uint16_t res = a - b; 2090 if (res > a) { 2091 res = 0; 2092 env->vxsat = 0x1; 2093 } 2094 return res; 2095 } 2096 2097 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2098 uint32_t b) 2099 { 2100 uint32_t res = a - b; 2101 if (res > a) { 2102 res = 0; 2103 env->vxsat = 0x1; 2104 } 2105 return res; 2106 } 2107 2108 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2109 uint64_t b) 2110 { 2111 uint64_t res = a - b; 2112 if (res > a) { 2113 res = 0; 2114 env->vxsat = 0x1; 2115 } 2116 return res; 2117 } 2118 2119 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2120 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2121 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2122 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2123 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2124 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2125 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2126 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2127 2128 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2129 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2130 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2131 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2132 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2133 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2134 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2135 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2136 2137 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2138 { 2139 int8_t res = a - b; 2140 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2141 res = a >= 0 ? INT8_MAX : INT8_MIN; 2142 env->vxsat = 0x1; 2143 } 2144 return res; 2145 } 2146 2147 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2148 { 2149 int16_t res = a - b; 2150 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2151 res = a >= 0 ? INT16_MAX : INT16_MIN; 2152 env->vxsat = 0x1; 2153 } 2154 return res; 2155 } 2156 2157 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2158 { 2159 int32_t res = a - b; 2160 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2161 res = a >= 0 ? INT32_MAX : INT32_MIN; 2162 env->vxsat = 0x1; 2163 } 2164 return res; 2165 } 2166 2167 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2168 { 2169 int64_t res = a - b; 2170 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2171 res = a >= 0 ? INT64_MAX : INT64_MIN; 2172 env->vxsat = 0x1; 2173 } 2174 return res; 2175 } 2176 2177 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2178 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2179 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2180 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2181 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2182 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2183 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2184 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2185 2186 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2187 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2188 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2189 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2190 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2191 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2192 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2193 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2194 2195 /* Vector Single-Width Averaging Add and Subtract */ 2196 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2197 { 2198 uint8_t d = extract64(v, shift, 1); 2199 uint8_t d1; 2200 uint64_t D1, D2; 2201 2202 if (shift == 0 || shift > 64) { 2203 return 0; 2204 } 2205 2206 d1 = extract64(v, shift - 1, 1); 2207 D1 = extract64(v, 0, shift); 2208 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2209 return d1; 2210 } else if (vxrm == 1) { /* round-to-nearest-even */ 2211 if (shift > 1) { 2212 D2 = extract64(v, 0, shift - 1); 2213 return d1 & ((D2 != 0) | d); 2214 } else { 2215 return d1 & d; 2216 } 2217 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2218 return !d & (D1 != 0); 2219 } 2220 return 0; /* round-down (truncate) */ 2221 } 2222 2223 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2224 { 2225 int64_t res = (int64_t)a + b; 2226 uint8_t round = get_round(vxrm, res, 1); 2227 2228 return (res >> 1) + round; 2229 } 2230 2231 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2232 { 2233 int64_t res = a + b; 2234 uint8_t round = get_round(vxrm, res, 1); 2235 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2236 2237 /* With signed overflow, bit 64 is inverse of bit 63. */ 2238 return ((res >> 1) ^ over) + round; 2239 } 2240 2241 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2242 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2243 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2244 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2245 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2246 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2247 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2248 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2249 2250 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2251 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2252 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2253 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2254 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2255 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2256 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2257 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2258 2259 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2260 { 2261 int64_t res = (int64_t)a - b; 2262 uint8_t round = get_round(vxrm, res, 1); 2263 2264 return (res >> 1) + round; 2265 } 2266 2267 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2268 { 2269 int64_t res = (int64_t)a - b; 2270 uint8_t round = get_round(vxrm, res, 1); 2271 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2272 2273 /* With signed overflow, bit 64 is inverse of bit 63. */ 2274 return ((res >> 1) ^ over) + round; 2275 } 2276 2277 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2278 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2279 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2280 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2281 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2282 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2283 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2284 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2285 2286 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2287 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2288 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2289 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2290 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2291 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2292 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2293 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2294 2295 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2296 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2297 { 2298 uint8_t round; 2299 int16_t res; 2300 2301 res = (int16_t)a * (int16_t)b; 2302 round = get_round(vxrm, res, 7); 2303 res = (res >> 7) + round; 2304 2305 if (res > INT8_MAX) { 2306 env->vxsat = 0x1; 2307 return INT8_MAX; 2308 } else if (res < INT8_MIN) { 2309 env->vxsat = 0x1; 2310 return INT8_MIN; 2311 } else { 2312 return res; 2313 } 2314 } 2315 2316 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2317 { 2318 uint8_t round; 2319 int32_t res; 2320 2321 res = (int32_t)a * (int32_t)b; 2322 round = get_round(vxrm, res, 15); 2323 res = (res >> 15) + round; 2324 2325 if (res > INT16_MAX) { 2326 env->vxsat = 0x1; 2327 return INT16_MAX; 2328 } else if (res < INT16_MIN) { 2329 env->vxsat = 0x1; 2330 return INT16_MIN; 2331 } else { 2332 return res; 2333 } 2334 } 2335 2336 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2337 { 2338 uint8_t round; 2339 int64_t res; 2340 2341 res = (int64_t)a * (int64_t)b; 2342 round = get_round(vxrm, res, 31); 2343 res = (res >> 31) + round; 2344 2345 if (res > INT32_MAX) { 2346 env->vxsat = 0x1; 2347 return INT32_MAX; 2348 } else if (res < INT32_MIN) { 2349 env->vxsat = 0x1; 2350 return INT32_MIN; 2351 } else { 2352 return res; 2353 } 2354 } 2355 2356 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2357 { 2358 uint8_t round; 2359 uint64_t hi_64, lo_64; 2360 int64_t res; 2361 2362 if (a == INT64_MIN && b == INT64_MIN) { 2363 env->vxsat = 1; 2364 return INT64_MAX; 2365 } 2366 2367 muls64(&lo_64, &hi_64, a, b); 2368 round = get_round(vxrm, lo_64, 63); 2369 /* 2370 * Cannot overflow, as there are always 2371 * 2 sign bits after multiply. 2372 */ 2373 res = (hi_64 << 1) | (lo_64 >> 63); 2374 if (round) { 2375 if (res == INT64_MAX) { 2376 env->vxsat = 1; 2377 } else { 2378 res += 1; 2379 } 2380 } 2381 return res; 2382 } 2383 2384 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2385 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2386 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2387 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2388 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2389 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2390 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2391 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2392 2393 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2394 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2395 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2396 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2397 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2398 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2399 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2400 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2401 2402 /* Vector Widening Saturating Scaled Multiply-Add */ 2403 static inline uint16_t 2404 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2405 uint16_t c) 2406 { 2407 uint8_t round; 2408 uint16_t res = (uint16_t)a * b; 2409 2410 round = get_round(vxrm, res, 4); 2411 res = (res >> 4) + round; 2412 return saddu16(env, vxrm, c, res); 2413 } 2414 2415 static inline uint32_t 2416 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2417 uint32_t c) 2418 { 2419 uint8_t round; 2420 uint32_t res = (uint32_t)a * b; 2421 2422 round = get_round(vxrm, res, 8); 2423 res = (res >> 8) + round; 2424 return saddu32(env, vxrm, c, res); 2425 } 2426 2427 static inline uint64_t 2428 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2429 uint64_t c) 2430 { 2431 uint8_t round; 2432 uint64_t res = (uint64_t)a * b; 2433 2434 round = get_round(vxrm, res, 16); 2435 res = (res >> 16) + round; 2436 return saddu64(env, vxrm, c, res); 2437 } 2438 2439 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2440 static inline void \ 2441 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2442 CPURISCVState *env, int vxrm) \ 2443 { \ 2444 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2445 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2446 TD d = *((TD *)vd + HD(i)); \ 2447 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2448 } 2449 2450 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2451 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2452 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2453 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2454 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2455 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2456 2457 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2458 static inline void \ 2459 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2460 CPURISCVState *env, int vxrm) \ 2461 { \ 2462 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2463 TD d = *((TD *)vd + HD(i)); \ 2464 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2465 } 2466 2467 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2468 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2469 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2470 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2471 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2472 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2473 2474 static inline int16_t 2475 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2476 { 2477 uint8_t round; 2478 int16_t res = (int16_t)a * b; 2479 2480 round = get_round(vxrm, res, 4); 2481 res = (res >> 4) + round; 2482 return sadd16(env, vxrm, c, res); 2483 } 2484 2485 static inline int32_t 2486 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2487 { 2488 uint8_t round; 2489 int32_t res = (int32_t)a * b; 2490 2491 round = get_round(vxrm, res, 8); 2492 res = (res >> 8) + round; 2493 return sadd32(env, vxrm, c, res); 2494 2495 } 2496 2497 static inline int64_t 2498 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2499 { 2500 uint8_t round; 2501 int64_t res = (int64_t)a * b; 2502 2503 round = get_round(vxrm, res, 16); 2504 res = (res >> 16) + round; 2505 return sadd64(env, vxrm, c, res); 2506 } 2507 2508 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2509 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2510 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2511 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2512 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2513 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2514 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2515 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2516 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2517 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2518 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2519 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2520 2521 static inline int16_t 2522 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2523 { 2524 uint8_t round; 2525 int16_t res = a * (int16_t)b; 2526 2527 round = get_round(vxrm, res, 4); 2528 res = (res >> 4) + round; 2529 return ssub16(env, vxrm, c, res); 2530 } 2531 2532 static inline int32_t 2533 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2534 { 2535 uint8_t round; 2536 int32_t res = a * (int32_t)b; 2537 2538 round = get_round(vxrm, res, 8); 2539 res = (res >> 8) + round; 2540 return ssub32(env, vxrm, c, res); 2541 } 2542 2543 static inline int64_t 2544 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2545 { 2546 uint8_t round; 2547 int64_t res = a * (int64_t)b; 2548 2549 round = get_round(vxrm, res, 16); 2550 res = (res >> 16) + round; 2551 return ssub64(env, vxrm, c, res); 2552 } 2553 2554 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2555 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2556 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2557 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2558 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2559 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2560 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2561 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2562 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2563 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2564 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2565 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2566 2567 static inline int16_t 2568 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2569 { 2570 uint8_t round; 2571 int16_t res = (int16_t)a * b; 2572 2573 round = get_round(vxrm, res, 4); 2574 res = (res >> 4) + round; 2575 return ssub16(env, vxrm, c, res); 2576 } 2577 2578 static inline int32_t 2579 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2580 { 2581 uint8_t round; 2582 int32_t res = (int32_t)a * b; 2583 2584 round = get_round(vxrm, res, 8); 2585 res = (res >> 8) + round; 2586 return ssub32(env, vxrm, c, res); 2587 } 2588 2589 static inline int64_t 2590 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2591 { 2592 uint8_t round; 2593 int64_t res = (int64_t)a * b; 2594 2595 round = get_round(vxrm, res, 16); 2596 res = (res >> 16) + round; 2597 return ssub64(env, vxrm, c, res); 2598 } 2599 2600 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2601 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2602 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2603 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2604 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2605 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2606 2607 /* Vector Single-Width Scaling Shift Instructions */ 2608 static inline uint8_t 2609 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2610 { 2611 uint8_t round, shift = b & 0x7; 2612 uint8_t res; 2613 2614 round = get_round(vxrm, a, shift); 2615 res = (a >> shift) + round; 2616 return res; 2617 } 2618 static inline uint16_t 2619 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2620 { 2621 uint8_t round, shift = b & 0xf; 2622 uint16_t res; 2623 2624 round = get_round(vxrm, a, shift); 2625 res = (a >> shift) + round; 2626 return res; 2627 } 2628 static inline uint32_t 2629 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2630 { 2631 uint8_t round, shift = b & 0x1f; 2632 uint32_t res; 2633 2634 round = get_round(vxrm, a, shift); 2635 res = (a >> shift) + round; 2636 return res; 2637 } 2638 static inline uint64_t 2639 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2640 { 2641 uint8_t round, shift = b & 0x3f; 2642 uint64_t res; 2643 2644 round = get_round(vxrm, a, shift); 2645 res = (a >> shift) + round; 2646 return res; 2647 } 2648 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2649 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2650 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2651 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2652 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2653 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2654 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2655 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2656 2657 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2658 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2659 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2660 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2661 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2662 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2663 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2664 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2665 2666 static inline int8_t 2667 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2668 { 2669 uint8_t round, shift = b & 0x7; 2670 int8_t res; 2671 2672 round = get_round(vxrm, a, shift); 2673 res = (a >> shift) + round; 2674 return res; 2675 } 2676 static inline int16_t 2677 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2678 { 2679 uint8_t round, shift = b & 0xf; 2680 int16_t res; 2681 2682 round = get_round(vxrm, a, shift); 2683 res = (a >> shift) + round; 2684 return res; 2685 } 2686 static inline int32_t 2687 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2688 { 2689 uint8_t round, shift = b & 0x1f; 2690 int32_t res; 2691 2692 round = get_round(vxrm, a, shift); 2693 res = (a >> shift) + round; 2694 return res; 2695 } 2696 static inline int64_t 2697 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2698 { 2699 uint8_t round, shift = b & 0x3f; 2700 int64_t res; 2701 2702 round = get_round(vxrm, a, shift); 2703 res = (a >> shift) + round; 2704 return res; 2705 } 2706 2707 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2708 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2709 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2710 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2711 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2712 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2713 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2714 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2715 2716 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2717 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2718 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2719 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2720 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2721 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2722 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2723 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2724 2725 /* Vector Narrowing Fixed-Point Clip Instructions */ 2726 static inline int8_t 2727 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2728 { 2729 uint8_t round, shift = b & 0xf; 2730 int16_t res; 2731 2732 round = get_round(vxrm, a, shift); 2733 res = (a >> shift) + round; 2734 if (res > INT8_MAX) { 2735 env->vxsat = 0x1; 2736 return INT8_MAX; 2737 } else if (res < INT8_MIN) { 2738 env->vxsat = 0x1; 2739 return INT8_MIN; 2740 } else { 2741 return res; 2742 } 2743 } 2744 2745 static inline int16_t 2746 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2747 { 2748 uint8_t round, shift = b & 0x1f; 2749 int32_t res; 2750 2751 round = get_round(vxrm, a, shift); 2752 res = (a >> shift) + round; 2753 if (res > INT16_MAX) { 2754 env->vxsat = 0x1; 2755 return INT16_MAX; 2756 } else if (res < INT16_MIN) { 2757 env->vxsat = 0x1; 2758 return INT16_MIN; 2759 } else { 2760 return res; 2761 } 2762 } 2763 2764 static inline int32_t 2765 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2766 { 2767 uint8_t round, shift = b & 0x3f; 2768 int64_t res; 2769 2770 round = get_round(vxrm, a, shift); 2771 res = (a >> shift) + round; 2772 if (res > INT32_MAX) { 2773 env->vxsat = 0x1; 2774 return INT32_MAX; 2775 } else if (res < INT32_MIN) { 2776 env->vxsat = 0x1; 2777 return INT32_MIN; 2778 } else { 2779 return res; 2780 } 2781 } 2782 2783 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2784 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2785 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2786 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2787 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2788 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2789 2790 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2791 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2792 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2793 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2794 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2795 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2796 2797 static inline uint8_t 2798 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2799 { 2800 uint8_t round, shift = b & 0xf; 2801 uint16_t res; 2802 2803 round = get_round(vxrm, a, shift); 2804 res = (a >> shift) + round; 2805 if (res > UINT8_MAX) { 2806 env->vxsat = 0x1; 2807 return UINT8_MAX; 2808 } else { 2809 return res; 2810 } 2811 } 2812 2813 static inline uint16_t 2814 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2815 { 2816 uint8_t round, shift = b & 0x1f; 2817 uint32_t res; 2818 2819 round = get_round(vxrm, a, shift); 2820 res = (a >> shift) + round; 2821 if (res > UINT16_MAX) { 2822 env->vxsat = 0x1; 2823 return UINT16_MAX; 2824 } else { 2825 return res; 2826 } 2827 } 2828 2829 static inline uint32_t 2830 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2831 { 2832 uint8_t round, shift = b & 0x3f; 2833 int64_t res; 2834 2835 round = get_round(vxrm, a, shift); 2836 res = (a >> shift) + round; 2837 if (res > UINT32_MAX) { 2838 env->vxsat = 0x1; 2839 return UINT32_MAX; 2840 } else { 2841 return res; 2842 } 2843 } 2844 2845 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2846 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2847 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2848 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 2849 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 2850 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 2851 2852 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 2853 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 2854 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 2855 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 2856 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 2857 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 2858 2859 /* 2860 *** Vector Float Point Arithmetic Instructions 2861 */ 2862 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2863 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2864 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2865 CPURISCVState *env) \ 2866 { \ 2867 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2868 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2869 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2870 } 2871 2872 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2873 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2874 void *vs2, CPURISCVState *env, \ 2875 uint32_t desc) \ 2876 { \ 2877 uint32_t vm = vext_vm(desc); \ 2878 uint32_t vl = env->vl; \ 2879 uint32_t i; \ 2880 \ 2881 for (i = 0; i < vl; i++) { \ 2882 if (!vm && !vext_elem_mask(v0, i)) { \ 2883 continue; \ 2884 } \ 2885 do_##NAME(vd, vs1, vs2, i, env); \ 2886 } \ 2887 } 2888 2889 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2890 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2891 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2892 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2893 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2894 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2895 2896 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2897 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2898 CPURISCVState *env) \ 2899 { \ 2900 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2901 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2902 } 2903 2904 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2905 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2906 void *vs2, CPURISCVState *env, \ 2907 uint32_t desc) \ 2908 { \ 2909 uint32_t vm = vext_vm(desc); \ 2910 uint32_t vl = env->vl; \ 2911 uint32_t i; \ 2912 \ 2913 for (i = 0; i < vl; i++) { \ 2914 if (!vm && !vext_elem_mask(v0, i)) { \ 2915 continue; \ 2916 } \ 2917 do_##NAME(vd, s1, vs2, i, env); \ 2918 } \ 2919 } 2920 2921 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2922 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2923 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2924 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2925 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2926 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2927 2928 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2929 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2930 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2931 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2932 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2933 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2934 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2935 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2936 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 2937 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 2938 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 2939 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 2940 2941 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 2942 { 2943 return float16_sub(b, a, s); 2944 } 2945 2946 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 2947 { 2948 return float32_sub(b, a, s); 2949 } 2950 2951 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 2952 { 2953 return float64_sub(b, a, s); 2954 } 2955 2956 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 2957 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 2958 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 2959 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 2960 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 2961 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 2962 2963 /* Vector Widening Floating-Point Add/Subtract Instructions */ 2964 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 2965 { 2966 return float32_add(float16_to_float32(a, true, s), 2967 float16_to_float32(b, true, s), s); 2968 } 2969 2970 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 2971 { 2972 return float64_add(float32_to_float64(a, s), 2973 float32_to_float64(b, s), s); 2974 2975 } 2976 2977 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 2978 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 2979 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 2980 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 2981 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 2982 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 2983 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 2984 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 2985 2986 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 2987 { 2988 return float32_sub(float16_to_float32(a, true, s), 2989 float16_to_float32(b, true, s), s); 2990 } 2991 2992 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 2993 { 2994 return float64_sub(float32_to_float64(a, s), 2995 float32_to_float64(b, s), s); 2996 2997 } 2998 2999 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3000 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3001 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3002 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3003 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3004 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3005 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3006 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3007 3008 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3009 { 3010 return float32_add(a, float16_to_float32(b, true, s), s); 3011 } 3012 3013 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3014 { 3015 return float64_add(a, float32_to_float64(b, s), s); 3016 } 3017 3018 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3019 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3020 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3021 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3022 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3023 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3024 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3025 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3026 3027 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3028 { 3029 return float32_sub(a, float16_to_float32(b, true, s), s); 3030 } 3031 3032 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3033 { 3034 return float64_sub(a, float32_to_float64(b, s), s); 3035 } 3036 3037 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3038 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3039 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3040 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3041 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3042 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3043 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3044 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3045 3046 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3047 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3048 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3049 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3050 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3051 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3052 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3053 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3054 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3055 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3056 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3057 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3058 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3059 3060 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3061 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3062 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3063 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3064 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3065 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3066 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3067 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3068 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3069 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3070 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3071 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3072 3073 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3074 { 3075 return float16_div(b, a, s); 3076 } 3077 3078 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3079 { 3080 return float32_div(b, a, s); 3081 } 3082 3083 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3084 { 3085 return float64_div(b, a, s); 3086 } 3087 3088 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3089 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3090 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3091 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3092 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3093 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3094 3095 /* Vector Widening Floating-Point Multiply */ 3096 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3097 { 3098 return float32_mul(float16_to_float32(a, true, s), 3099 float16_to_float32(b, true, s), s); 3100 } 3101 3102 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3103 { 3104 return float64_mul(float32_to_float64(a, s), 3105 float32_to_float64(b, s), s); 3106 3107 } 3108 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3109 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3110 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3111 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3112 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3113 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3114 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3115 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3116 3117 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3118 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3119 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3120 CPURISCVState *env) \ 3121 { \ 3122 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3123 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3124 TD d = *((TD *)vd + HD(i)); \ 3125 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3126 } 3127 3128 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3129 { 3130 return float16_muladd(a, b, d, 0, s); 3131 } 3132 3133 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3134 { 3135 return float32_muladd(a, b, d, 0, s); 3136 } 3137 3138 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3139 { 3140 return float64_muladd(a, b, d, 0, s); 3141 } 3142 3143 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3144 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3145 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3146 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3147 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3148 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3149 3150 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3151 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3152 CPURISCVState *env) \ 3153 { \ 3154 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3155 TD d = *((TD *)vd + HD(i)); \ 3156 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3157 } 3158 3159 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3160 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3161 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3162 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3163 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3164 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3165 3166 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3167 { 3168 return float16_muladd(a, b, d, 3169 float_muladd_negate_c | float_muladd_negate_product, s); 3170 } 3171 3172 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3173 { 3174 return float32_muladd(a, b, d, 3175 float_muladd_negate_c | float_muladd_negate_product, s); 3176 } 3177 3178 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3179 { 3180 return float64_muladd(a, b, d, 3181 float_muladd_negate_c | float_muladd_negate_product, s); 3182 } 3183 3184 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3185 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3186 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3187 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3188 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3189 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3190 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3191 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3192 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3193 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3194 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3195 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3196 3197 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3198 { 3199 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3200 } 3201 3202 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3203 { 3204 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3205 } 3206 3207 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3208 { 3209 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3210 } 3211 3212 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3213 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3214 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3215 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3216 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3217 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3218 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3219 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3220 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3221 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3222 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3223 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3224 3225 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3226 { 3227 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3228 } 3229 3230 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3231 { 3232 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3233 } 3234 3235 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3236 { 3237 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3238 } 3239 3240 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3241 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3242 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3243 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3244 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3245 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3246 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3247 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3248 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3249 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3250 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3251 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3252 3253 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3254 { 3255 return float16_muladd(d, b, a, 0, s); 3256 } 3257 3258 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3259 { 3260 return float32_muladd(d, b, a, 0, s); 3261 } 3262 3263 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3264 { 3265 return float64_muladd(d, b, a, 0, s); 3266 } 3267 3268 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3269 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3270 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3271 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3272 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3273 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3274 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3275 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3276 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3277 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3278 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3279 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3280 3281 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3282 { 3283 return float16_muladd(d, b, a, 3284 float_muladd_negate_c | float_muladd_negate_product, s); 3285 } 3286 3287 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3288 { 3289 return float32_muladd(d, b, a, 3290 float_muladd_negate_c | float_muladd_negate_product, s); 3291 } 3292 3293 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3294 { 3295 return float64_muladd(d, b, a, 3296 float_muladd_negate_c | float_muladd_negate_product, s); 3297 } 3298 3299 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3300 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3301 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3302 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3303 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3304 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3305 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3306 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3307 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3308 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3309 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3310 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3311 3312 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3313 { 3314 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3315 } 3316 3317 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3318 { 3319 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3320 } 3321 3322 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3323 { 3324 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3325 } 3326 3327 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3328 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3329 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3330 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3331 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3332 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3333 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3334 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3335 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3336 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3337 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3338 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3339 3340 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3341 { 3342 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3343 } 3344 3345 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3346 { 3347 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3348 } 3349 3350 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3351 { 3352 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3353 } 3354 3355 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3356 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3357 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3358 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3359 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3360 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3361 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3362 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3363 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3364 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3365 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3366 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3367 3368 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3369 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3370 { 3371 return float32_muladd(float16_to_float32(a, true, s), 3372 float16_to_float32(b, true, s), d, 0, s); 3373 } 3374 3375 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3376 { 3377 return float64_muladd(float32_to_float64(a, s), 3378 float32_to_float64(b, s), d, 0, s); 3379 } 3380 3381 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3382 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3383 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3384 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3385 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3386 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3387 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3388 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3389 3390 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3391 { 3392 return float32_muladd(float16_to_float32(a, true, s), 3393 float16_to_float32(b, true, s), d, 3394 float_muladd_negate_c | float_muladd_negate_product, s); 3395 } 3396 3397 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3398 { 3399 return float64_muladd(float32_to_float64(a, s), 3400 float32_to_float64(b, s), d, 3401 float_muladd_negate_c | float_muladd_negate_product, s); 3402 } 3403 3404 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3405 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3406 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3407 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3408 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3409 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3410 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3411 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3412 3413 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3414 { 3415 return float32_muladd(float16_to_float32(a, true, s), 3416 float16_to_float32(b, true, s), d, 3417 float_muladd_negate_c, s); 3418 } 3419 3420 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3421 { 3422 return float64_muladd(float32_to_float64(a, s), 3423 float32_to_float64(b, s), d, 3424 float_muladd_negate_c, s); 3425 } 3426 3427 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3428 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3429 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3430 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3431 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3432 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3433 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3434 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3435 3436 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3437 { 3438 return float32_muladd(float16_to_float32(a, true, s), 3439 float16_to_float32(b, true, s), d, 3440 float_muladd_negate_product, s); 3441 } 3442 3443 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3444 { 3445 return float64_muladd(float32_to_float64(a, s), 3446 float32_to_float64(b, s), d, 3447 float_muladd_negate_product, s); 3448 } 3449 3450 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3451 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3452 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3453 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3454 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3455 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3456 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3457 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3458 3459 /* Vector Floating-Point Square-Root Instruction */ 3460 /* (TD, T2, TX2) */ 3461 #define OP_UU_H uint16_t, uint16_t, uint16_t 3462 #define OP_UU_W uint32_t, uint32_t, uint32_t 3463 #define OP_UU_D uint64_t, uint64_t, uint64_t 3464 3465 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3466 static void do_##NAME(void *vd, void *vs2, int i, \ 3467 CPURISCVState *env) \ 3468 { \ 3469 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3470 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3471 } 3472 3473 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3474 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3475 CPURISCVState *env, uint32_t desc) \ 3476 { \ 3477 uint32_t vm = vext_vm(desc); \ 3478 uint32_t vl = env->vl; \ 3479 uint32_t i; \ 3480 \ 3481 if (vl == 0) { \ 3482 return; \ 3483 } \ 3484 for (i = 0; i < vl; i++) { \ 3485 if (!vm && !vext_elem_mask(v0, i)) { \ 3486 continue; \ 3487 } \ 3488 do_##NAME(vd, vs2, i, env); \ 3489 } \ 3490 } 3491 3492 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3493 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3494 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3495 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3496 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3497 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3498 3499 /* Vector Floating-Point MIN/MAX Instructions */ 3500 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3501 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3502 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3503 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3504 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3505 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3506 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3507 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3508 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3509 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3510 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3511 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3512 3513 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3514 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3515 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3516 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3517 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3518 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3519 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3520 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3521 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3522 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3523 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3524 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3525 3526 /* Vector Floating-Point Sign-Injection Instructions */ 3527 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3528 { 3529 return deposit64(b, 0, 15, a); 3530 } 3531 3532 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3533 { 3534 return deposit64(b, 0, 31, a); 3535 } 3536 3537 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3538 { 3539 return deposit64(b, 0, 63, a); 3540 } 3541 3542 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3543 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3544 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3545 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3546 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3547 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3548 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3549 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3550 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3551 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3552 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3553 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3554 3555 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3556 { 3557 return deposit64(~b, 0, 15, a); 3558 } 3559 3560 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3561 { 3562 return deposit64(~b, 0, 31, a); 3563 } 3564 3565 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3566 { 3567 return deposit64(~b, 0, 63, a); 3568 } 3569 3570 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3571 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3572 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3573 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3574 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3575 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3576 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3577 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3578 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3579 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3580 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3581 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3582 3583 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3584 { 3585 return deposit64(b ^ a, 0, 15, a); 3586 } 3587 3588 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3589 { 3590 return deposit64(b ^ a, 0, 31, a); 3591 } 3592 3593 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3594 { 3595 return deposit64(b ^ a, 0, 63, a); 3596 } 3597 3598 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3599 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3600 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3601 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3602 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3603 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3604 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3605 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3606 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3607 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3608 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3609 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3610 3611 /* Vector Floating-Point Compare Instructions */ 3612 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3613 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3614 CPURISCVState *env, uint32_t desc) \ 3615 { \ 3616 uint32_t vm = vext_vm(desc); \ 3617 uint32_t vl = env->vl; \ 3618 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3619 uint32_t i; \ 3620 \ 3621 for (i = 0; i < vl; i++) { \ 3622 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3623 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3624 if (!vm && !vext_elem_mask(v0, i)) { \ 3625 continue; \ 3626 } \ 3627 vext_set_elem_mask(vd, i, \ 3628 DO_OP(s2, s1, &env->fp_status)); \ 3629 } \ 3630 for (; i < vlmax; i++) { \ 3631 vext_set_elem_mask(vd, i, 0); \ 3632 } \ 3633 } 3634 3635 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3636 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3637 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3638 3639 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3640 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3641 CPURISCVState *env, uint32_t desc) \ 3642 { \ 3643 uint32_t vm = vext_vm(desc); \ 3644 uint32_t vl = env->vl; \ 3645 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3646 uint32_t i; \ 3647 \ 3648 for (i = 0; i < vl; i++) { \ 3649 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3650 if (!vm && !vext_elem_mask(v0, i)) { \ 3651 continue; \ 3652 } \ 3653 vext_set_elem_mask(vd, i, \ 3654 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3655 } \ 3656 for (; i < vlmax; i++) { \ 3657 vext_set_elem_mask(vd, i, 0); \ 3658 } \ 3659 } 3660 3661 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3662 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3663 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3664 3665 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3666 { 3667 FloatRelation compare = float16_compare_quiet(a, b, s); 3668 return compare != float_relation_equal; 3669 } 3670 3671 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3672 { 3673 FloatRelation compare = float32_compare_quiet(a, b, s); 3674 return compare != float_relation_equal; 3675 } 3676 3677 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3678 { 3679 FloatRelation compare = float64_compare_quiet(a, b, s); 3680 return compare != float_relation_equal; 3681 } 3682 3683 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3684 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3685 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3686 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3687 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3688 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3689 3690 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3691 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3692 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3693 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3694 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3695 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3696 3697 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3698 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3699 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3700 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3701 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3702 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3703 3704 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3705 { 3706 FloatRelation compare = float16_compare(a, b, s); 3707 return compare == float_relation_greater; 3708 } 3709 3710 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3711 { 3712 FloatRelation compare = float32_compare(a, b, s); 3713 return compare == float_relation_greater; 3714 } 3715 3716 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3717 { 3718 FloatRelation compare = float64_compare(a, b, s); 3719 return compare == float_relation_greater; 3720 } 3721 3722 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3723 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3724 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3725 3726 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3727 { 3728 FloatRelation compare = float16_compare(a, b, s); 3729 return compare == float_relation_greater || 3730 compare == float_relation_equal; 3731 } 3732 3733 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3734 { 3735 FloatRelation compare = float32_compare(a, b, s); 3736 return compare == float_relation_greater || 3737 compare == float_relation_equal; 3738 } 3739 3740 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3741 { 3742 FloatRelation compare = float64_compare(a, b, s); 3743 return compare == float_relation_greater || 3744 compare == float_relation_equal; 3745 } 3746 3747 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3748 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3749 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3750 3751 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3752 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3753 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3754 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3755 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3756 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3757 3758 /* Vector Floating-Point Classify Instruction */ 3759 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3760 static void do_##NAME(void *vd, void *vs2, int i) \ 3761 { \ 3762 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3763 *((TD *)vd + HD(i)) = OP(s2); \ 3764 } 3765 3766 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3767 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3768 CPURISCVState *env, uint32_t desc) \ 3769 { \ 3770 uint32_t vm = vext_vm(desc); \ 3771 uint32_t vl = env->vl; \ 3772 uint32_t i; \ 3773 \ 3774 for (i = 0; i < vl; i++) { \ 3775 if (!vm && !vext_elem_mask(v0, i)) { \ 3776 continue; \ 3777 } \ 3778 do_##NAME(vd, vs2, i); \ 3779 } \ 3780 } 3781 3782 target_ulong fclass_h(uint64_t frs1) 3783 { 3784 float16 f = frs1; 3785 bool sign = float16_is_neg(f); 3786 3787 if (float16_is_infinity(f)) { 3788 return sign ? 1 << 0 : 1 << 7; 3789 } else if (float16_is_zero(f)) { 3790 return sign ? 1 << 3 : 1 << 4; 3791 } else if (float16_is_zero_or_denormal(f)) { 3792 return sign ? 1 << 2 : 1 << 5; 3793 } else if (float16_is_any_nan(f)) { 3794 float_status s = { }; /* for snan_bit_is_one */ 3795 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3796 } else { 3797 return sign ? 1 << 1 : 1 << 6; 3798 } 3799 } 3800 3801 target_ulong fclass_s(uint64_t frs1) 3802 { 3803 float32 f = frs1; 3804 bool sign = float32_is_neg(f); 3805 3806 if (float32_is_infinity(f)) { 3807 return sign ? 1 << 0 : 1 << 7; 3808 } else if (float32_is_zero(f)) { 3809 return sign ? 1 << 3 : 1 << 4; 3810 } else if (float32_is_zero_or_denormal(f)) { 3811 return sign ? 1 << 2 : 1 << 5; 3812 } else if (float32_is_any_nan(f)) { 3813 float_status s = { }; /* for snan_bit_is_one */ 3814 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3815 } else { 3816 return sign ? 1 << 1 : 1 << 6; 3817 } 3818 } 3819 3820 target_ulong fclass_d(uint64_t frs1) 3821 { 3822 float64 f = frs1; 3823 bool sign = float64_is_neg(f); 3824 3825 if (float64_is_infinity(f)) { 3826 return sign ? 1 << 0 : 1 << 7; 3827 } else if (float64_is_zero(f)) { 3828 return sign ? 1 << 3 : 1 << 4; 3829 } else if (float64_is_zero_or_denormal(f)) { 3830 return sign ? 1 << 2 : 1 << 5; 3831 } else if (float64_is_any_nan(f)) { 3832 float_status s = { }; /* for snan_bit_is_one */ 3833 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3834 } else { 3835 return sign ? 1 << 1 : 1 << 6; 3836 } 3837 } 3838 3839 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3840 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3841 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3842 GEN_VEXT_V(vfclass_v_h, 2, 2) 3843 GEN_VEXT_V(vfclass_v_w, 4, 4) 3844 GEN_VEXT_V(vfclass_v_d, 8, 8) 3845 3846 /* Vector Floating-Point Merge Instruction */ 3847 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3848 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3849 CPURISCVState *env, uint32_t desc) \ 3850 { \ 3851 uint32_t vm = vext_vm(desc); \ 3852 uint32_t vl = env->vl; \ 3853 uint32_t i; \ 3854 \ 3855 for (i = 0; i < vl; i++) { \ 3856 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3857 *((ETYPE *)vd + H(i)) \ 3858 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3859 } \ 3860 } 3861 3862 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3863 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3864 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3865 3866 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3867 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3868 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3869 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3870 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3871 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3872 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3873 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3874 3875 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3876 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3877 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3878 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3879 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3880 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3881 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3882 3883 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3884 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3885 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3886 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3887 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3888 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3889 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3890 3891 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3892 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3893 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3894 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3895 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3896 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3897 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3898 3899 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3900 /* (TD, T2, TX2) */ 3901 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3902 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3903 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3904 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3905 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3906 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3907 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3908 3909 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3910 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3911 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3912 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3913 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3914 3915 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3916 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3917 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 3918 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 3919 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 3920 3921 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 3922 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 3923 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 3924 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 3925 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 3926 3927 /* 3928 * vfwcvt.f.f.v vd, vs2, vm # 3929 * Convert single-width float to double-width float. 3930 */ 3931 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 3932 { 3933 return float16_to_float32(a, true, s); 3934 } 3935 3936 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 3937 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 3938 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 3939 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 3940 3941 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 3942 /* (TD, T2, TX2) */ 3943 #define NOP_UU_H uint16_t, uint32_t, uint32_t 3944 #define NOP_UU_W uint32_t, uint64_t, uint64_t 3945 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3946 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 3947 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 3948 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 3949 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 3950 3951 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 3952 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 3953 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 3954 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 3955 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 3956 3957 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 3958 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 3959 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 3960 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 3961 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 3962 3963 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 3964 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 3965 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 3966 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 3967 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 3968 3969 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 3970 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 3971 { 3972 return float32_to_float16(a, true, s); 3973 } 3974 3975 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 3976 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 3977 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 3978 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 3979 3980 /* 3981 *** Vector Reduction Operations 3982 */ 3983 /* Vector Single-Width Integer Reduction Instructions */ 3984 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 3985 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3986 void *vs2, CPURISCVState *env, uint32_t desc) \ 3987 { \ 3988 uint32_t vm = vext_vm(desc); \ 3989 uint32_t vl = env->vl; \ 3990 uint32_t i; \ 3991 TD s1 = *((TD *)vs1 + HD(0)); \ 3992 \ 3993 for (i = 0; i < vl; i++) { \ 3994 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 3995 if (!vm && !vext_elem_mask(v0, i)) { \ 3996 continue; \ 3997 } \ 3998 s1 = OP(s1, (TD)s2); \ 3999 } \ 4000 *((TD *)vd + HD(0)) = s1; \ 4001 } 4002 4003 /* vd[0] = sum(vs1[0], vs2[*]) */ 4004 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4005 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4006 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4007 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4008 4009 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4010 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4011 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4012 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4013 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4014 4015 /* vd[0] = max(vs1[0], vs2[*]) */ 4016 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4017 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4018 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4019 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4020 4021 /* vd[0] = minu(vs1[0], vs2[*]) */ 4022 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4023 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4024 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4025 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4026 4027 /* vd[0] = min(vs1[0], vs2[*]) */ 4028 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4029 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4030 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4031 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4032 4033 /* vd[0] = and(vs1[0], vs2[*]) */ 4034 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4035 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4036 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4037 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4038 4039 /* vd[0] = or(vs1[0], vs2[*]) */ 4040 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4041 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4042 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4043 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4044 4045 /* vd[0] = xor(vs1[0], vs2[*]) */ 4046 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4047 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4048 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4049 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4050 4051 /* Vector Widening Integer Reduction Instructions */ 4052 /* signed sum reduction into double-width accumulator */ 4053 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4054 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4055 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4056 4057 /* Unsigned sum reduction into double-width accumulator */ 4058 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4059 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4060 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4061 4062 /* Vector Single-Width Floating-Point Reduction Instructions */ 4063 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4064 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4065 void *vs2, CPURISCVState *env, \ 4066 uint32_t desc) \ 4067 { \ 4068 uint32_t vm = vext_vm(desc); \ 4069 uint32_t vl = env->vl; \ 4070 uint32_t i; \ 4071 TD s1 = *((TD *)vs1 + HD(0)); \ 4072 \ 4073 for (i = 0; i < vl; i++) { \ 4074 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4075 if (!vm && !vext_elem_mask(v0, i)) { \ 4076 continue; \ 4077 } \ 4078 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4079 } \ 4080 *((TD *)vd + HD(0)) = s1; \ 4081 } 4082 4083 /* Unordered sum */ 4084 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4085 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4086 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4087 4088 /* Maximum value */ 4089 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4090 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4091 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4092 4093 /* Minimum value */ 4094 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4095 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4096 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4097 4098 /* Vector Widening Floating-Point Reduction Instructions */ 4099 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4100 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4101 void *vs2, CPURISCVState *env, uint32_t desc) 4102 { 4103 uint32_t vm = vext_vm(desc); 4104 uint32_t vl = env->vl; 4105 uint32_t i; 4106 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4107 4108 for (i = 0; i < vl; i++) { 4109 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4110 if (!vm && !vext_elem_mask(v0, i)) { 4111 continue; 4112 } 4113 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4114 &env->fp_status); 4115 } 4116 *((uint32_t *)vd + H4(0)) = s1; 4117 } 4118 4119 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4120 void *vs2, CPURISCVState *env, uint32_t desc) 4121 { 4122 uint32_t vm = vext_vm(desc); 4123 uint32_t vl = env->vl; 4124 uint32_t i; 4125 uint64_t s1 = *((uint64_t *)vs1); 4126 4127 for (i = 0; i < vl; i++) { 4128 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4129 if (!vm && !vext_elem_mask(v0, i)) { 4130 continue; 4131 } 4132 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4133 &env->fp_status); 4134 } 4135 *((uint64_t *)vd) = s1; 4136 } 4137 4138 /* 4139 *** Vector Mask Operations 4140 */ 4141 /* Vector Mask-Register Logical Instructions */ 4142 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4143 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4144 void *vs2, CPURISCVState *env, \ 4145 uint32_t desc) \ 4146 { \ 4147 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4148 uint32_t vl = env->vl; \ 4149 uint32_t i; \ 4150 int a, b; \ 4151 \ 4152 for (i = 0; i < vl; i++) { \ 4153 a = vext_elem_mask(vs1, i); \ 4154 b = vext_elem_mask(vs2, i); \ 4155 vext_set_elem_mask(vd, i, OP(b, a)); \ 4156 } \ 4157 for (; i < vlmax; i++) { \ 4158 vext_set_elem_mask(vd, i, 0); \ 4159 } \ 4160 } 4161 4162 #define DO_NAND(N, M) (!(N & M)) 4163 #define DO_ANDNOT(N, M) (N & !M) 4164 #define DO_NOR(N, M) (!(N | M)) 4165 #define DO_ORNOT(N, M) (N | !M) 4166 #define DO_XNOR(N, M) (!(N ^ M)) 4167 4168 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4169 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4170 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4171 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4172 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4173 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4174 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4175 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4176 4177 /* Vector mask population count vmpopc */ 4178 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, 4179 uint32_t desc) 4180 { 4181 target_ulong cnt = 0; 4182 uint32_t vm = vext_vm(desc); 4183 uint32_t vl = env->vl; 4184 int i; 4185 4186 for (i = 0; i < vl; i++) { 4187 if (vm || vext_elem_mask(v0, i)) { 4188 if (vext_elem_mask(vs2, i)) { 4189 cnt++; 4190 } 4191 } 4192 } 4193 return cnt; 4194 } 4195 4196 /* vmfirst find-first-set mask bit*/ 4197 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4198 uint32_t desc) 4199 { 4200 uint32_t vm = vext_vm(desc); 4201 uint32_t vl = env->vl; 4202 int i; 4203 4204 for (i = 0; i < vl; i++) { 4205 if (vm || vext_elem_mask(v0, i)) { 4206 if (vext_elem_mask(vs2, i)) { 4207 return i; 4208 } 4209 } 4210 } 4211 return -1LL; 4212 } 4213 4214 enum set_mask_type { 4215 ONLY_FIRST = 1, 4216 INCLUDE_FIRST, 4217 BEFORE_FIRST, 4218 }; 4219 4220 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4221 uint32_t desc, enum set_mask_type type) 4222 { 4223 uint32_t vlmax = env_archcpu(env)->cfg.vlen; 4224 uint32_t vm = vext_vm(desc); 4225 uint32_t vl = env->vl; 4226 int i; 4227 bool first_mask_bit = false; 4228 4229 for (i = 0; i < vl; i++) { 4230 if (!vm && !vext_elem_mask(v0, i)) { 4231 continue; 4232 } 4233 /* write a zero to all following active elements */ 4234 if (first_mask_bit) { 4235 vext_set_elem_mask(vd, i, 0); 4236 continue; 4237 } 4238 if (vext_elem_mask(vs2, i)) { 4239 first_mask_bit = true; 4240 if (type == BEFORE_FIRST) { 4241 vext_set_elem_mask(vd, i, 0); 4242 } else { 4243 vext_set_elem_mask(vd, i, 1); 4244 } 4245 } else { 4246 if (type == ONLY_FIRST) { 4247 vext_set_elem_mask(vd, i, 0); 4248 } else { 4249 vext_set_elem_mask(vd, i, 1); 4250 } 4251 } 4252 } 4253 for (; i < vlmax; i++) { 4254 vext_set_elem_mask(vd, i, 0); 4255 } 4256 } 4257 4258 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4259 uint32_t desc) 4260 { 4261 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4262 } 4263 4264 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4265 uint32_t desc) 4266 { 4267 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4268 } 4269 4270 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4271 uint32_t desc) 4272 { 4273 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4274 } 4275 4276 /* Vector Iota Instruction */ 4277 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4278 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4279 uint32_t desc) \ 4280 { \ 4281 uint32_t vm = vext_vm(desc); \ 4282 uint32_t vl = env->vl; \ 4283 uint32_t sum = 0; \ 4284 int i; \ 4285 \ 4286 for (i = 0; i < vl; i++) { \ 4287 if (!vm && !vext_elem_mask(v0, i)) { \ 4288 continue; \ 4289 } \ 4290 *((ETYPE *)vd + H(i)) = sum; \ 4291 if (vext_elem_mask(vs2, i)) { \ 4292 sum++; \ 4293 } \ 4294 } \ 4295 } 4296 4297 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4298 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4299 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4300 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4301 4302 /* Vector Element Index Instruction */ 4303 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4304 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4305 { \ 4306 uint32_t vm = vext_vm(desc); \ 4307 uint32_t vl = env->vl; \ 4308 int i; \ 4309 \ 4310 for (i = 0; i < vl; i++) { \ 4311 if (!vm && !vext_elem_mask(v0, i)) { \ 4312 continue; \ 4313 } \ 4314 *((ETYPE *)vd + H(i)) = i; \ 4315 } \ 4316 } 4317 4318 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4319 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4320 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4321 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4322 4323 /* 4324 *** Vector Permutation Instructions 4325 */ 4326 4327 /* Vector Slide Instructions */ 4328 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4329 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4330 CPURISCVState *env, uint32_t desc) \ 4331 { \ 4332 uint32_t vm = vext_vm(desc); \ 4333 uint32_t vl = env->vl; \ 4334 target_ulong offset = s1, i; \ 4335 \ 4336 for (i = offset; i < vl; i++) { \ 4337 if (!vm && !vext_elem_mask(v0, i)) { \ 4338 continue; \ 4339 } \ 4340 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4341 } \ 4342 } 4343 4344 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4345 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4346 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4347 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4348 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4349 4350 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4351 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4352 CPURISCVState *env, uint32_t desc) \ 4353 { \ 4354 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4355 uint32_t vm = vext_vm(desc); \ 4356 uint32_t vl = env->vl; \ 4357 target_ulong offset = s1, i; \ 4358 \ 4359 for (i = 0; i < vl; ++i) { \ 4360 target_ulong j = i + offset; \ 4361 if (!vm && !vext_elem_mask(v0, i)) { \ 4362 continue; \ 4363 } \ 4364 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4365 } \ 4366 } 4367 4368 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4369 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4370 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4371 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4372 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4373 4374 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4375 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4376 CPURISCVState *env, uint32_t desc) \ 4377 { \ 4378 uint32_t vm = vext_vm(desc); \ 4379 uint32_t vl = env->vl; \ 4380 uint32_t i; \ 4381 \ 4382 for (i = 0; i < vl; i++) { \ 4383 if (!vm && !vext_elem_mask(v0, i)) { \ 4384 continue; \ 4385 } \ 4386 if (i == 0) { \ 4387 *((ETYPE *)vd + H(i)) = s1; \ 4388 } else { \ 4389 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4390 } \ 4391 } \ 4392 } 4393 4394 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4395 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4396 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4397 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4398 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4399 4400 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4401 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4402 CPURISCVState *env, uint32_t desc) \ 4403 { \ 4404 uint32_t vm = vext_vm(desc); \ 4405 uint32_t vl = env->vl; \ 4406 uint32_t i; \ 4407 \ 4408 for (i = 0; i < vl; i++) { \ 4409 if (!vm && !vext_elem_mask(v0, i)) { \ 4410 continue; \ 4411 } \ 4412 if (i == vl - 1) { \ 4413 *((ETYPE *)vd + H(i)) = s1; \ 4414 } else { \ 4415 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4416 } \ 4417 } \ 4418 } 4419 4420 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4421 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4422 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4423 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4424 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4425 4426 /* Vector Register Gather Instruction */ 4427 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H) \ 4428 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4429 CPURISCVState *env, uint32_t desc) \ 4430 { \ 4431 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4432 uint32_t vm = vext_vm(desc); \ 4433 uint32_t vl = env->vl; \ 4434 uint64_t index; \ 4435 uint32_t i; \ 4436 \ 4437 for (i = 0; i < vl; i++) { \ 4438 if (!vm && !vext_elem_mask(v0, i)) { \ 4439 continue; \ 4440 } \ 4441 index = *((ETYPE *)vs1 + H(i)); \ 4442 if (index >= vlmax) { \ 4443 *((ETYPE *)vd + H(i)) = 0; \ 4444 } else { \ 4445 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4446 } \ 4447 } \ 4448 } 4449 4450 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4451 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1) 4452 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2) 4453 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4) 4454 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8) 4455 4456 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4457 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4458 CPURISCVState *env, uint32_t desc) \ 4459 { \ 4460 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4461 uint32_t vm = vext_vm(desc); \ 4462 uint32_t vl = env->vl; \ 4463 uint64_t index = s1; \ 4464 uint32_t i; \ 4465 \ 4466 for (i = 0; i < vl; i++) { \ 4467 if (!vm && !vext_elem_mask(v0, i)) { \ 4468 continue; \ 4469 } \ 4470 if (index >= vlmax) { \ 4471 *((ETYPE *)vd + H(i)) = 0; \ 4472 } else { \ 4473 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4474 } \ 4475 } \ 4476 } 4477 4478 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4479 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4480 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4481 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4482 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4483 4484 /* Vector Compress Instruction */ 4485 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4486 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4487 CPURISCVState *env, uint32_t desc) \ 4488 { \ 4489 uint32_t vl = env->vl; \ 4490 uint32_t num = 0, i; \ 4491 \ 4492 for (i = 0; i < vl; i++) { \ 4493 if (!vext_elem_mask(vs1, i)) { \ 4494 continue; \ 4495 } \ 4496 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4497 num++; \ 4498 } \ 4499 } 4500 4501 /* Compress into vd elements of vs2 where vs1 is enabled */ 4502 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4503 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4504 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4505 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4506