1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "internals.h" 27 #include <math.h> 28 29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 30 target_ulong s2) 31 { 32 int vlmax, vl; 33 RISCVCPU *cpu = env_archcpu(env); 34 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL); 35 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 36 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 37 bool vill = FIELD_EX64(s2, VTYPE, VILL); 38 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 39 40 if (lmul & 4) { 41 /* Fractional LMUL. */ 42 if (lmul == 4 || 43 cpu->cfg.elen >> (8 - lmul) < sew) { 44 vill = true; 45 } 46 } 47 48 if ((sew > cpu->cfg.elen) 49 || vill 50 || (ediv != 0) 51 || (reserved != 0)) { 52 /* only set vill bit. */ 53 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 54 env->vl = 0; 55 env->vstart = 0; 56 return 0; 57 } 58 59 vlmax = vext_get_vlmax(cpu, s2); 60 if (s1 <= vlmax) { 61 vl = s1; 62 } else { 63 vl = vlmax; 64 } 65 env->vl = vl; 66 env->vtype = s2; 67 env->vstart = 0; 68 return vl; 69 } 70 71 /* 72 * Note that vector data is stored in host-endian 64-bit chunks, 73 * so addressing units smaller than that needs a host-endian fixup. 74 */ 75 #ifdef HOST_WORDS_BIGENDIAN 76 #define H1(x) ((x) ^ 7) 77 #define H1_2(x) ((x) ^ 6) 78 #define H1_4(x) ((x) ^ 4) 79 #define H2(x) ((x) ^ 3) 80 #define H4(x) ((x) ^ 1) 81 #define H8(x) ((x)) 82 #else 83 #define H1(x) (x) 84 #define H1_2(x) (x) 85 #define H1_4(x) (x) 86 #define H2(x) (x) 87 #define H4(x) (x) 88 #define H8(x) (x) 89 #endif 90 91 static inline uint32_t vext_nf(uint32_t desc) 92 { 93 return FIELD_EX32(simd_data(desc), VDATA, NF); 94 } 95 96 static inline uint32_t vext_vm(uint32_t desc) 97 { 98 return FIELD_EX32(simd_data(desc), VDATA, VM); 99 } 100 101 /* 102 * Encode LMUL to lmul as following: 103 * LMUL vlmul lmul 104 * 1 000 0 105 * 2 001 1 106 * 4 010 2 107 * 8 011 3 108 * - 100 - 109 * 1/8 101 -3 110 * 1/4 110 -2 111 * 1/2 111 -1 112 */ 113 static inline int32_t vext_lmul(uint32_t desc) 114 { 115 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 116 } 117 118 /* 119 * Get vector group length in bytes. Its range is [64, 2048]. 120 * 121 * As simd_desc support at most 256, the max vlen is 512 bits. 122 * So vlen in bytes is encoded as maxsz. 123 */ 124 static inline uint32_t vext_maxsz(uint32_t desc) 125 { 126 return simd_maxsz(desc) << vext_lmul(desc); 127 } 128 129 /* 130 * This function checks watchpoint before real load operation. 131 * 132 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 133 * In user mode, there is no watchpoint support now. 134 * 135 * It will trigger an exception if there is no mapping in TLB 136 * and page table walk can't fill the TLB entry. Then the guest 137 * software can return here after process the exception or never return. 138 */ 139 static void probe_pages(CPURISCVState *env, target_ulong addr, 140 target_ulong len, uintptr_t ra, 141 MMUAccessType access_type) 142 { 143 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 144 target_ulong curlen = MIN(pagelen, len); 145 146 probe_access(env, addr, curlen, access_type, 147 cpu_mmu_index(env, false), ra); 148 if (len > curlen) { 149 addr += curlen; 150 curlen = len - curlen; 151 probe_access(env, addr, curlen, access_type, 152 cpu_mmu_index(env, false), ra); 153 } 154 } 155 156 static inline void vext_set_elem_mask(void *v0, int index, 157 uint8_t value) 158 { 159 int idx = index / 64; 160 int pos = index % 64; 161 uint64_t old = ((uint64_t *)v0)[idx]; 162 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 163 } 164 165 /* 166 * Earlier designs (pre-0.9) had a varying number of bits 167 * per mask value (MLEN). In the 0.9 design, MLEN=1. 168 * (Section 4.5) 169 */ 170 static inline int vext_elem_mask(void *v0, int index) 171 { 172 int idx = index / 64; 173 int pos = index % 64; 174 return (((uint64_t *)v0)[idx] >> pos) & 1; 175 } 176 177 /* elements operations for load and store */ 178 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 179 uint32_t idx, void *vd, uintptr_t retaddr); 180 181 #define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ 182 static void NAME(CPURISCVState *env, abi_ptr addr, \ 183 uint32_t idx, void *vd, uintptr_t retaddr)\ 184 { \ 185 MTYPE data; \ 186 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 187 data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 188 *cur = data; \ 189 } \ 190 191 GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) 192 GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) 193 GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) 194 GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) 195 GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) 196 GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) 197 GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) 198 GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) 199 GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) 200 GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) 201 GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) 202 GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) 203 GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) 204 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) 205 GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) 206 GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) 207 GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) 208 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) 209 GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) 210 GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) 211 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) 212 GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) 213 214 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 215 static void NAME(CPURISCVState *env, abi_ptr addr, \ 216 uint32_t idx, void *vd, uintptr_t retaddr)\ 217 { \ 218 ETYPE data = *((ETYPE *)vd + H(idx)); \ 219 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 220 } 221 222 GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) 223 GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) 224 GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) 225 GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) 226 GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) 227 GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) 228 GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) 229 GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) 230 GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) 231 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 232 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 233 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 234 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 235 236 /* 237 *** stride: access vector element from strided memory 238 */ 239 static void 240 vext_ldst_stride(void *vd, void *v0, target_ulong base, 241 target_ulong stride, CPURISCVState *env, 242 uint32_t desc, uint32_t vm, 243 vext_ldst_elem_fn *ldst_elem, 244 uint32_t esz, uint32_t msz, uintptr_t ra, 245 MMUAccessType access_type) 246 { 247 uint32_t i, k; 248 uint32_t nf = vext_nf(desc); 249 uint32_t vlmax = vext_maxsz(desc) / esz; 250 251 /* probe every access*/ 252 for (i = 0; i < env->vl; i++) { 253 if (!vm && !vext_elem_mask(v0, i)) { 254 continue; 255 } 256 probe_pages(env, base + stride * i, nf * msz, ra, access_type); 257 } 258 /* do real access */ 259 for (i = 0; i < env->vl; i++) { 260 k = 0; 261 if (!vm && !vext_elem_mask(v0, i)) { 262 continue; 263 } 264 while (k < nf) { 265 target_ulong addr = base + stride * i + k * msz; 266 ldst_elem(env, addr, i + k * vlmax, vd, ra); 267 k++; 268 } 269 } 270 } 271 272 #define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN) \ 273 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 274 target_ulong stride, CPURISCVState *env, \ 275 uint32_t desc) \ 276 { \ 277 uint32_t vm = vext_vm(desc); \ 278 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 279 sizeof(ETYPE), sizeof(MTYPE), \ 280 GETPC(), MMU_DATA_LOAD); \ 281 } 282 283 GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b) 284 GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h) 285 GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w) 286 GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d) 287 GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h) 288 GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w) 289 GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d) 290 GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w) 291 GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d) 292 GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b) 293 GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h) 294 GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w) 295 GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d) 296 GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b) 297 GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h) 298 GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w) 299 GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d) 300 GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h) 301 GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w) 302 GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d) 303 GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w) 304 GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d) 305 306 #define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ 307 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 308 target_ulong stride, CPURISCVState *env, \ 309 uint32_t desc) \ 310 { \ 311 uint32_t vm = vext_vm(desc); \ 312 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 313 sizeof(ETYPE), sizeof(MTYPE), \ 314 GETPC(), MMU_DATA_STORE); \ 315 } 316 317 GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) 318 GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) 319 GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) 320 GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) 321 GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) 322 GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) 323 GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) 324 GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) 325 GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) 326 GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) 327 GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) 328 GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) 329 GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) 330 331 /* 332 *** unit-stride: access elements stored contiguously in memory 333 */ 334 335 /* unmasked unit-stride load and store operation*/ 336 static void 337 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 338 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t msz, 339 uintptr_t ra, MMUAccessType access_type) 340 { 341 uint32_t i, k; 342 uint32_t nf = vext_nf(desc); 343 uint32_t vlmax = vext_maxsz(desc) / esz; 344 345 /* probe every access */ 346 probe_pages(env, base, env->vl * nf * msz, ra, access_type); 347 /* load bytes from guest memory */ 348 for (i = 0; i < env->vl; i++) { 349 k = 0; 350 while (k < nf) { 351 target_ulong addr = base + (i * nf + k) * msz; 352 ldst_elem(env, addr, i + k * vlmax, vd, ra); 353 k++; 354 } 355 } 356 } 357 358 /* 359 * masked unit-stride load and store operation will be a special case of stride, 360 * stride = NF * sizeof (MTYPE) 361 */ 362 363 #define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN) \ 364 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 365 CPURISCVState *env, uint32_t desc) \ 366 { \ 367 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 368 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 369 sizeof(ETYPE), sizeof(MTYPE), \ 370 GETPC(), MMU_DATA_LOAD); \ 371 } \ 372 \ 373 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 374 CPURISCVState *env, uint32_t desc) \ 375 { \ 376 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 377 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ 378 } 379 380 GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b) 381 GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h) 382 GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w) 383 GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d) 384 GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h) 385 GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w) 386 GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d) 387 GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w) 388 GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d) 389 GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b) 390 GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h) 391 GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w) 392 GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d) 393 GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b) 394 GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h) 395 GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w) 396 GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d) 397 GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h) 398 GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w) 399 GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d) 400 GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w) 401 GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d) 402 403 #define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ 404 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 405 CPURISCVState *env, uint32_t desc) \ 406 { \ 407 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 408 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 409 sizeof(ETYPE), sizeof(MTYPE), \ 410 GETPC(), MMU_DATA_STORE); \ 411 } \ 412 \ 413 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 414 CPURISCVState *env, uint32_t desc) \ 415 { \ 416 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 417 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ 418 } 419 420 GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) 421 GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) 422 GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) 423 GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) 424 GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) 425 GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) 426 GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) 427 GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) 428 GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) 429 GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) 430 GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) 431 GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) 432 GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) 433 434 /* 435 *** index: access vector element from indexed memory 436 */ 437 typedef target_ulong vext_get_index_addr(target_ulong base, 438 uint32_t idx, void *vs2); 439 440 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 441 static target_ulong NAME(target_ulong base, \ 442 uint32_t idx, void *vs2) \ 443 { \ 444 return (base + *((ETYPE *)vs2 + H(idx))); \ 445 } 446 447 GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) 448 GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) 449 GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) 450 GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) 451 452 static inline void 453 vext_ldst_index(void *vd, void *v0, target_ulong base, 454 void *vs2, CPURISCVState *env, uint32_t desc, 455 vext_get_index_addr get_index_addr, 456 vext_ldst_elem_fn *ldst_elem, 457 uint32_t esz, uint32_t msz, uintptr_t ra, 458 MMUAccessType access_type) 459 { 460 uint32_t i, k; 461 uint32_t nf = vext_nf(desc); 462 uint32_t vm = vext_vm(desc); 463 uint32_t vlmax = vext_maxsz(desc) / esz; 464 465 /* probe every access*/ 466 for (i = 0; i < env->vl; i++) { 467 if (!vm && !vext_elem_mask(v0, i)) { 468 continue; 469 } 470 probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, 471 access_type); 472 } 473 /* load bytes from guest memory */ 474 for (i = 0; i < env->vl; i++) { 475 k = 0; 476 if (!vm && !vext_elem_mask(v0, i)) { 477 continue; 478 } 479 while (k < nf) { 480 abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; 481 ldst_elem(env, addr, i + k * vlmax, vd, ra); 482 k++; 483 } 484 } 485 } 486 487 #define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN) \ 488 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 489 void *vs2, CPURISCVState *env, uint32_t desc) \ 490 { \ 491 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 492 LOAD_FN, sizeof(ETYPE), sizeof(MTYPE), \ 493 GETPC(), MMU_DATA_LOAD); \ 494 } 495 496 GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b) 497 GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h) 498 GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w) 499 GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d) 500 GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h) 501 GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w) 502 GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d) 503 GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w) 504 GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d) 505 GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b) 506 GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h) 507 GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w) 508 GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d) 509 GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b) 510 GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h) 511 GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w) 512 GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d) 513 GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h) 514 GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w) 515 GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d) 516 GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w) 517 GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d) 518 519 #define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ 520 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 521 void *vs2, CPURISCVState *env, uint32_t desc) \ 522 { \ 523 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 524 STORE_FN, sizeof(ETYPE), sizeof(MTYPE), \ 525 GETPC(), MMU_DATA_STORE); \ 526 } 527 528 GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) 529 GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) 530 GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) 531 GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) 532 GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) 533 GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) 534 GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) 535 GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) 536 GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) 537 GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) 538 GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) 539 GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) 540 GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) 541 542 /* 543 *** unit-stride fault-only-fisrt load instructions 544 */ 545 static inline void 546 vext_ldff(void *vd, void *v0, target_ulong base, 547 CPURISCVState *env, uint32_t desc, 548 vext_ldst_elem_fn *ldst_elem, 549 uint32_t esz, uint32_t msz, uintptr_t ra) 550 { 551 void *host; 552 uint32_t i, k, vl = 0; 553 uint32_t nf = vext_nf(desc); 554 uint32_t vm = vext_vm(desc); 555 uint32_t vlmax = vext_maxsz(desc) / esz; 556 target_ulong addr, offset, remain; 557 558 /* probe every access*/ 559 for (i = 0; i < env->vl; i++) { 560 if (!vm && !vext_elem_mask(v0, i)) { 561 continue; 562 } 563 addr = base + nf * i * msz; 564 if (i == 0) { 565 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 566 } else { 567 /* if it triggers an exception, no need to check watchpoint */ 568 remain = nf * msz; 569 while (remain > 0) { 570 offset = -(addr | TARGET_PAGE_MASK); 571 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 572 cpu_mmu_index(env, false)); 573 if (host) { 574 #ifdef CONFIG_USER_ONLY 575 if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { 576 vl = i; 577 goto ProbeSuccess; 578 } 579 #else 580 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 581 #endif 582 } else { 583 vl = i; 584 goto ProbeSuccess; 585 } 586 if (remain <= offset) { 587 break; 588 } 589 remain -= offset; 590 addr += offset; 591 } 592 } 593 } 594 ProbeSuccess: 595 /* load bytes from guest memory */ 596 if (vl != 0) { 597 env->vl = vl; 598 } 599 for (i = 0; i < env->vl; i++) { 600 k = 0; 601 if (!vm && !vext_elem_mask(v0, i)) { 602 continue; 603 } 604 while (k < nf) { 605 target_ulong addr = base + (i * nf + k) * msz; 606 ldst_elem(env, addr, i + k * vlmax, vd, ra); 607 k++; 608 } 609 } 610 } 611 612 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN) \ 613 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 614 CPURISCVState *env, uint32_t desc) \ 615 { \ 616 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 617 sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ 618 } 619 620 GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b) 621 GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h) 622 GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w) 623 GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d) 624 GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h) 625 GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w) 626 GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d) 627 GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w) 628 GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d) 629 GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b) 630 GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h) 631 GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w) 632 GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d) 633 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b) 634 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h) 635 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w) 636 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d) 637 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h) 638 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w) 639 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d) 640 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w) 641 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d) 642 643 #define DO_SWAP(N, M) (M) 644 #define DO_AND(N, M) (N & M) 645 #define DO_XOR(N, M) (N ^ M) 646 #define DO_OR(N, M) (N | M) 647 #define DO_ADD(N, M) (N + M) 648 649 /* Signed min/max */ 650 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 651 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 652 653 /* Unsigned min/max */ 654 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 655 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 656 657 /* 658 *** Vector Integer Arithmetic Instructions 659 */ 660 661 /* expand macro args before macro */ 662 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 663 664 /* (TD, T1, T2, TX1, TX2) */ 665 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 666 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 667 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 668 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 669 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 670 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 671 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 672 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 673 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 674 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 675 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 676 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 677 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 678 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 679 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 680 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 681 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 682 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 683 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 684 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 685 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 686 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 687 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 688 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 689 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 690 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 691 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 692 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 693 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 694 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 695 696 /* operation of two vector elements */ 697 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 698 699 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 700 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 701 { \ 702 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 703 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 704 *((TD *)vd + HD(i)) = OP(s2, s1); \ 705 } 706 #define DO_SUB(N, M) (N - M) 707 #define DO_RSUB(N, M) (M - N) 708 709 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 710 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 711 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 712 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 713 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 714 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 715 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 716 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 717 718 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 719 CPURISCVState *env, uint32_t desc, 720 uint32_t esz, uint32_t dsz, 721 opivv2_fn *fn) 722 { 723 uint32_t vm = vext_vm(desc); 724 uint32_t vl = env->vl; 725 uint32_t i; 726 727 for (i = 0; i < vl; i++) { 728 if (!vm && !vext_elem_mask(v0, i)) { 729 continue; 730 } 731 fn(vd, vs1, vs2, i); 732 } 733 } 734 735 /* generate the helpers for OPIVV */ 736 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 737 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 738 void *vs2, CPURISCVState *env, \ 739 uint32_t desc) \ 740 { \ 741 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 742 do_##NAME); \ 743 } 744 745 GEN_VEXT_VV(vadd_vv_b, 1, 1) 746 GEN_VEXT_VV(vadd_vv_h, 2, 2) 747 GEN_VEXT_VV(vadd_vv_w, 4, 4) 748 GEN_VEXT_VV(vadd_vv_d, 8, 8) 749 GEN_VEXT_VV(vsub_vv_b, 1, 1) 750 GEN_VEXT_VV(vsub_vv_h, 2, 2) 751 GEN_VEXT_VV(vsub_vv_w, 4, 4) 752 GEN_VEXT_VV(vsub_vv_d, 8, 8) 753 754 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 755 756 /* 757 * (T1)s1 gives the real operator type. 758 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 759 */ 760 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 761 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 762 { \ 763 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 764 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 765 } 766 767 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 768 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 769 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 770 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 771 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 772 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 773 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 774 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 775 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 776 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 777 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 778 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 779 780 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 781 CPURISCVState *env, uint32_t desc, 782 uint32_t esz, uint32_t dsz, 783 opivx2_fn fn) 784 { 785 uint32_t vm = vext_vm(desc); 786 uint32_t vl = env->vl; 787 uint32_t i; 788 789 for (i = 0; i < vl; i++) { 790 if (!vm && !vext_elem_mask(v0, i)) { 791 continue; 792 } 793 fn(vd, s1, vs2, i); 794 } 795 } 796 797 /* generate the helpers for OPIVX */ 798 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 799 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 800 void *vs2, CPURISCVState *env, \ 801 uint32_t desc) \ 802 { \ 803 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 804 do_##NAME); \ 805 } 806 807 GEN_VEXT_VX(vadd_vx_b, 1, 1) 808 GEN_VEXT_VX(vadd_vx_h, 2, 2) 809 GEN_VEXT_VX(vadd_vx_w, 4, 4) 810 GEN_VEXT_VX(vadd_vx_d, 8, 8) 811 GEN_VEXT_VX(vsub_vx_b, 1, 1) 812 GEN_VEXT_VX(vsub_vx_h, 2, 2) 813 GEN_VEXT_VX(vsub_vx_w, 4, 4) 814 GEN_VEXT_VX(vsub_vx_d, 8, 8) 815 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 816 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 817 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 818 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 819 820 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 821 { 822 intptr_t oprsz = simd_oprsz(desc); 823 intptr_t i; 824 825 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 826 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 827 } 828 } 829 830 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 831 { 832 intptr_t oprsz = simd_oprsz(desc); 833 intptr_t i; 834 835 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 836 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 837 } 838 } 839 840 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 841 { 842 intptr_t oprsz = simd_oprsz(desc); 843 intptr_t i; 844 845 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 846 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 847 } 848 } 849 850 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 851 { 852 intptr_t oprsz = simd_oprsz(desc); 853 intptr_t i; 854 855 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 856 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 857 } 858 } 859 860 /* Vector Widening Integer Add/Subtract */ 861 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 862 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 863 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 864 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 865 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 866 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 867 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 868 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 869 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 870 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 871 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 872 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 873 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 874 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 875 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 876 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 877 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 878 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 879 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 880 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 881 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 882 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 883 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 884 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 885 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 886 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 887 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 888 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 889 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 890 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 891 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 892 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 893 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 894 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 895 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 896 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 897 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 898 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 899 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 900 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 901 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 902 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 903 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 904 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 905 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 906 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 907 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 908 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 909 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 910 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 911 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 912 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 913 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 914 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 915 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 916 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 917 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 918 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 919 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 920 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 921 922 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 923 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 924 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 925 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 926 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 927 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 928 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 929 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 930 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 931 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 932 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 933 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 934 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 935 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 936 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 937 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 938 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 939 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 940 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 941 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 942 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 943 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 944 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 945 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 946 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 947 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 948 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 949 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 950 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 951 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 952 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 953 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 954 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 955 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 956 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 957 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 958 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 959 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 960 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 961 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 962 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 963 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 964 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 965 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 966 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 967 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 968 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 969 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 970 971 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 972 #define DO_VADC(N, M, C) (N + M + C) 973 #define DO_VSBC(N, M, C) (N - M - C) 974 975 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 976 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 977 CPURISCVState *env, uint32_t desc) \ 978 { \ 979 uint32_t vl = env->vl; \ 980 uint32_t i; \ 981 \ 982 for (i = 0; i < vl; i++) { \ 983 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 984 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 985 uint8_t carry = vext_elem_mask(v0, i); \ 986 \ 987 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 988 } \ 989 } 990 991 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 992 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 993 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 994 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 995 996 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 997 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 998 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 999 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1000 1001 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1002 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1003 CPURISCVState *env, uint32_t desc) \ 1004 { \ 1005 uint32_t vl = env->vl; \ 1006 uint32_t i; \ 1007 \ 1008 for (i = 0; i < vl; i++) { \ 1009 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1010 uint8_t carry = vext_elem_mask(v0, i); \ 1011 \ 1012 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1013 } \ 1014 } 1015 1016 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1017 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1018 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1019 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1020 1021 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1022 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1023 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1024 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1025 1026 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1027 (__typeof(N))(N + M) < N) 1028 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1029 1030 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1031 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1032 CPURISCVState *env, uint32_t desc) \ 1033 { \ 1034 uint32_t vl = env->vl; \ 1035 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1036 uint32_t i; \ 1037 \ 1038 for (i = 0; i < vl; i++) { \ 1039 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1040 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1041 uint8_t carry = vext_elem_mask(v0, i); \ 1042 \ 1043 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1044 } \ 1045 for (; i < vlmax; i++) { \ 1046 vext_set_elem_mask(vd, i, 0); \ 1047 } \ 1048 } 1049 1050 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1051 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1052 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1053 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1054 1055 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1056 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1057 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1058 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1059 1060 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1061 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1062 void *vs2, CPURISCVState *env, uint32_t desc) \ 1063 { \ 1064 uint32_t vl = env->vl; \ 1065 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1066 uint32_t i; \ 1067 \ 1068 for (i = 0; i < vl; i++) { \ 1069 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1070 uint8_t carry = vext_elem_mask(v0, i); \ 1071 \ 1072 vext_set_elem_mask(vd, i, \ 1073 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1074 } \ 1075 for (; i < vlmax; i++) { \ 1076 vext_set_elem_mask(vd, i, 0); \ 1077 } \ 1078 } 1079 1080 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1081 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1082 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1083 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1084 1085 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1086 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1087 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1088 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1089 1090 /* Vector Bitwise Logical Instructions */ 1091 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1092 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1093 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1094 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1095 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1096 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1097 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1098 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1099 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1100 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1101 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1102 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1103 GEN_VEXT_VV(vand_vv_b, 1, 1) 1104 GEN_VEXT_VV(vand_vv_h, 2, 2) 1105 GEN_VEXT_VV(vand_vv_w, 4, 4) 1106 GEN_VEXT_VV(vand_vv_d, 8, 8) 1107 GEN_VEXT_VV(vor_vv_b, 1, 1) 1108 GEN_VEXT_VV(vor_vv_h, 2, 2) 1109 GEN_VEXT_VV(vor_vv_w, 4, 4) 1110 GEN_VEXT_VV(vor_vv_d, 8, 8) 1111 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1112 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1113 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1114 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1115 1116 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1117 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1118 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1119 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1120 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1121 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1122 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1123 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1124 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1125 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1126 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1127 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1128 GEN_VEXT_VX(vand_vx_b, 1, 1) 1129 GEN_VEXT_VX(vand_vx_h, 2, 2) 1130 GEN_VEXT_VX(vand_vx_w, 4, 4) 1131 GEN_VEXT_VX(vand_vx_d, 8, 8) 1132 GEN_VEXT_VX(vor_vx_b, 1, 1) 1133 GEN_VEXT_VX(vor_vx_h, 2, 2) 1134 GEN_VEXT_VX(vor_vx_w, 4, 4) 1135 GEN_VEXT_VX(vor_vx_d, 8, 8) 1136 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1137 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1138 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1139 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1140 1141 /* Vector Single-Width Bit Shift Instructions */ 1142 #define DO_SLL(N, M) (N << (M)) 1143 #define DO_SRL(N, M) (N >> (M)) 1144 1145 /* generate the helpers for shift instructions with two vector operators */ 1146 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1147 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1148 void *vs2, CPURISCVState *env, uint32_t desc) \ 1149 { \ 1150 uint32_t vm = vext_vm(desc); \ 1151 uint32_t vl = env->vl; \ 1152 uint32_t i; \ 1153 \ 1154 for (i = 0; i < vl; i++) { \ 1155 if (!vm && !vext_elem_mask(v0, i)) { \ 1156 continue; \ 1157 } \ 1158 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1159 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1160 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1161 } \ 1162 } 1163 1164 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1165 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1166 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1167 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1168 1169 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1170 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1171 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1172 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1173 1174 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1175 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1176 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1177 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1178 1179 /* generate the helpers for shift instructions with one vector and one scalar */ 1180 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1181 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1182 void *vs2, CPURISCVState *env, uint32_t desc) \ 1183 { \ 1184 uint32_t vm = vext_vm(desc); \ 1185 uint32_t vl = env->vl; \ 1186 uint32_t i; \ 1187 \ 1188 for (i = 0; i < vl; i++) { \ 1189 if (!vm && !vext_elem_mask(v0, i)) { \ 1190 continue; \ 1191 } \ 1192 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1193 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1194 } \ 1195 } 1196 1197 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1198 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1199 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1200 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1201 1202 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1203 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1204 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1205 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1206 1207 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1208 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1209 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1210 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1211 1212 /* Vector Narrowing Integer Right Shift Instructions */ 1213 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1214 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1215 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1216 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1217 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1218 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1219 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1220 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1221 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1222 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1223 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1224 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1225 1226 /* Vector Integer Comparison Instructions */ 1227 #define DO_MSEQ(N, M) (N == M) 1228 #define DO_MSNE(N, M) (N != M) 1229 #define DO_MSLT(N, M) (N < M) 1230 #define DO_MSLE(N, M) (N <= M) 1231 #define DO_MSGT(N, M) (N > M) 1232 1233 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1234 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1235 CPURISCVState *env, uint32_t desc) \ 1236 { \ 1237 uint32_t vm = vext_vm(desc); \ 1238 uint32_t vl = env->vl; \ 1239 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1240 uint32_t i; \ 1241 \ 1242 for (i = 0; i < vl; i++) { \ 1243 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1244 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1245 if (!vm && !vext_elem_mask(v0, i)) { \ 1246 continue; \ 1247 } \ 1248 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1249 } \ 1250 for (; i < vlmax; i++) { \ 1251 vext_set_elem_mask(vd, i, 0); \ 1252 } \ 1253 } 1254 1255 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1256 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1257 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1258 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1259 1260 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1261 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1262 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1263 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1264 1265 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1266 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1267 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1268 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1269 1270 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1271 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1272 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1273 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1274 1275 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1276 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1277 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1278 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1279 1280 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1281 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1282 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1283 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1284 1285 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1286 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1287 CPURISCVState *env, uint32_t desc) \ 1288 { \ 1289 uint32_t vm = vext_vm(desc); \ 1290 uint32_t vl = env->vl; \ 1291 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1292 uint32_t i; \ 1293 \ 1294 for (i = 0; i < vl; i++) { \ 1295 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1296 if (!vm && !vext_elem_mask(v0, i)) { \ 1297 continue; \ 1298 } \ 1299 vext_set_elem_mask(vd, i, \ 1300 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1301 } \ 1302 for (; i < vlmax; i++) { \ 1303 vext_set_elem_mask(vd, i, 0); \ 1304 } \ 1305 } 1306 1307 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1308 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1309 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1310 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1311 1312 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1313 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1314 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1315 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1316 1317 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1318 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1319 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1320 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1321 1322 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1323 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1324 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1325 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1326 1327 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1328 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1329 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1330 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1331 1332 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1333 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1334 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1335 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1336 1337 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1338 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1339 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1340 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1341 1342 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1343 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1344 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1345 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1346 1347 /* Vector Integer Min/Max Instructions */ 1348 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1349 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1350 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1351 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1352 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1353 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1354 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1355 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1356 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1357 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1358 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1359 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1360 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1361 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1362 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1363 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1364 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1365 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1366 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1367 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1368 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1369 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1370 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1371 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1372 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1373 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1374 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1375 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1376 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1377 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1378 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1379 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1380 1381 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1382 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1383 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1384 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1385 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1386 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1387 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1388 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1389 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1390 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1391 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1392 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1393 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1394 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1395 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1396 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1397 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1398 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1399 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1400 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1401 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1402 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1403 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1404 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1405 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1406 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1407 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1408 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1409 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1410 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1411 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1412 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1413 1414 /* Vector Single-Width Integer Multiply Instructions */ 1415 #define DO_MUL(N, M) (N * M) 1416 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1417 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1418 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1419 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1420 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1421 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1422 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1423 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1424 1425 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1426 { 1427 return (int16_t)s2 * (int16_t)s1 >> 8; 1428 } 1429 1430 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1431 { 1432 return (int32_t)s2 * (int32_t)s1 >> 16; 1433 } 1434 1435 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1436 { 1437 return (int64_t)s2 * (int64_t)s1 >> 32; 1438 } 1439 1440 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1441 { 1442 uint64_t hi_64, lo_64; 1443 1444 muls64(&lo_64, &hi_64, s1, s2); 1445 return hi_64; 1446 } 1447 1448 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1449 { 1450 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1451 } 1452 1453 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1454 { 1455 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1456 } 1457 1458 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1459 { 1460 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1461 } 1462 1463 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1464 { 1465 uint64_t hi_64, lo_64; 1466 1467 mulu64(&lo_64, &hi_64, s2, s1); 1468 return hi_64; 1469 } 1470 1471 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1472 { 1473 return (int16_t)s2 * (uint16_t)s1 >> 8; 1474 } 1475 1476 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1477 { 1478 return (int32_t)s2 * (uint32_t)s1 >> 16; 1479 } 1480 1481 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1482 { 1483 return (int64_t)s2 * (uint64_t)s1 >> 32; 1484 } 1485 1486 /* 1487 * Let A = signed operand, 1488 * B = unsigned operand 1489 * P = mulu64(A, B), unsigned product 1490 * 1491 * LET X = 2 ** 64 - A, 2's complement of A 1492 * SP = signed product 1493 * THEN 1494 * IF A < 0 1495 * SP = -X * B 1496 * = -(2 ** 64 - A) * B 1497 * = A * B - 2 ** 64 * B 1498 * = P - 2 ** 64 * B 1499 * ELSE 1500 * SP = P 1501 * THEN 1502 * HI_P -= (A < 0 ? B : 0) 1503 */ 1504 1505 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1506 { 1507 uint64_t hi_64, lo_64; 1508 1509 mulu64(&lo_64, &hi_64, s2, s1); 1510 1511 hi_64 -= s2 < 0 ? s1 : 0; 1512 return hi_64; 1513 } 1514 1515 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1516 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1517 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1518 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1519 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1520 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1521 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1522 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1523 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1524 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1525 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1526 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1527 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1528 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1529 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1530 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1531 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1532 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1533 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1534 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1535 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1536 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1537 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1538 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1539 1540 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1541 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1542 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1543 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1544 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1545 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1546 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1547 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1548 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1549 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1550 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1551 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1552 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1553 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1554 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1555 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1556 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1557 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1558 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1559 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1560 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1561 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1562 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1563 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1564 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1565 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1566 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1567 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1568 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1569 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1570 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1571 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1572 1573 /* Vector Integer Divide Instructions */ 1574 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1575 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1576 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1577 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1578 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1579 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1580 1581 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1582 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1583 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1584 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1585 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1586 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1587 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1588 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1589 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1590 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1591 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1592 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1593 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1594 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1595 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1596 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1597 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1598 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1599 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1600 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1601 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1602 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1603 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1604 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1605 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1606 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1607 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1608 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1609 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1610 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1611 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1612 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1613 1614 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1615 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1616 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1617 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1618 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1619 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1620 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1621 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1622 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1623 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1624 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1625 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1626 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1627 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1628 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1629 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1630 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1631 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1632 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1633 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1634 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1635 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1636 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1637 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1638 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1639 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1640 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1641 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1642 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1643 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1644 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1645 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1646 1647 /* Vector Widening Integer Multiply Instructions */ 1648 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1649 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1650 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1651 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1652 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1653 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1654 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1655 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1656 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1657 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1658 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1659 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1660 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1661 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1662 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1663 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1664 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1665 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1666 1667 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1668 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1669 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1670 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1671 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1672 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1673 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1674 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1675 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1676 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1677 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1678 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1679 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1680 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1681 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1682 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1683 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1684 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1685 1686 /* Vector Single-Width Integer Multiply-Add Instructions */ 1687 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1688 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1689 { \ 1690 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1691 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1692 TD d = *((TD *)vd + HD(i)); \ 1693 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1694 } 1695 1696 #define DO_MACC(N, M, D) (M * N + D) 1697 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1698 #define DO_MADD(N, M, D) (M * D + N) 1699 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1700 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1701 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1702 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1703 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1704 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1705 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1706 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1707 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1708 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1709 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1710 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1711 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1712 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1713 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1714 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1715 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1716 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1717 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1718 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1719 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1720 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1721 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1722 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1723 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1724 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1725 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1726 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1727 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1728 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1729 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1730 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1731 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1732 1733 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1734 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1735 { \ 1736 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1737 TD d = *((TD *)vd + HD(i)); \ 1738 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1739 } 1740 1741 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1742 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1743 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1744 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1745 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1746 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1747 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1748 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1749 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1750 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1751 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1752 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1753 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1754 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1755 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1756 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1757 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1758 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1759 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1760 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1761 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1762 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1763 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1764 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1765 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1766 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1767 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1768 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1769 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1770 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1771 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1772 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1773 1774 /* Vector Widening Integer Multiply-Add Instructions */ 1775 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1776 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1777 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1778 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1779 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1780 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1781 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1782 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1783 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1784 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1785 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1786 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1787 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1788 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1789 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1790 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1791 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1792 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1793 1794 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1795 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1796 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1797 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1798 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1799 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1800 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1801 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1802 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1803 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1804 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1805 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1806 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1807 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1808 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1809 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1810 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1811 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1812 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1813 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1814 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1815 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1816 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1817 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1818 1819 /* Vector Integer Merge and Move Instructions */ 1820 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1821 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1822 uint32_t desc) \ 1823 { \ 1824 uint32_t vl = env->vl; \ 1825 uint32_t i; \ 1826 \ 1827 for (i = 0; i < vl; i++) { \ 1828 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1829 *((ETYPE *)vd + H(i)) = s1; \ 1830 } \ 1831 } 1832 1833 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1834 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1835 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1836 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1837 1838 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1839 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1840 uint32_t desc) \ 1841 { \ 1842 uint32_t vl = env->vl; \ 1843 uint32_t i; \ 1844 \ 1845 for (i = 0; i < vl; i++) { \ 1846 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1847 } \ 1848 } 1849 1850 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1851 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1852 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1853 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1854 1855 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1856 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1857 CPURISCVState *env, uint32_t desc) \ 1858 { \ 1859 uint32_t vl = env->vl; \ 1860 uint32_t i; \ 1861 \ 1862 for (i = 0; i < vl; i++) { \ 1863 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1864 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1865 } \ 1866 } 1867 1868 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1869 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1870 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1871 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1872 1873 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1874 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1875 void *vs2, CPURISCVState *env, uint32_t desc) \ 1876 { \ 1877 uint32_t vl = env->vl; \ 1878 uint32_t i; \ 1879 \ 1880 for (i = 0; i < vl; i++) { \ 1881 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1882 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1883 (ETYPE)(target_long)s1); \ 1884 *((ETYPE *)vd + H(i)) = d; \ 1885 } \ 1886 } 1887 1888 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1889 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1890 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1891 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1892 1893 /* 1894 *** Vector Fixed-Point Arithmetic Instructions 1895 */ 1896 1897 /* Vector Single-Width Saturating Add and Subtract */ 1898 1899 /* 1900 * As fixed point instructions probably have round mode and saturation, 1901 * define common macros for fixed point here. 1902 */ 1903 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1904 CPURISCVState *env, int vxrm); 1905 1906 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1907 static inline void \ 1908 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1909 CPURISCVState *env, int vxrm) \ 1910 { \ 1911 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1912 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1913 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1914 } 1915 1916 static inline void 1917 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1918 CPURISCVState *env, 1919 uint32_t vl, uint32_t vm, int vxrm, 1920 opivv2_rm_fn *fn) 1921 { 1922 for (uint32_t i = 0; i < vl; i++) { 1923 if (!vm && !vext_elem_mask(v0, i)) { 1924 continue; 1925 } 1926 fn(vd, vs1, vs2, i, env, vxrm); 1927 } 1928 } 1929 1930 static inline void 1931 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1932 CPURISCVState *env, 1933 uint32_t desc, uint32_t esz, uint32_t dsz, 1934 opivv2_rm_fn *fn) 1935 { 1936 uint32_t vm = vext_vm(desc); 1937 uint32_t vl = env->vl; 1938 1939 switch (env->vxrm) { 1940 case 0: /* rnu */ 1941 vext_vv_rm_1(vd, v0, vs1, vs2, 1942 env, vl, vm, 0, fn); 1943 break; 1944 case 1: /* rne */ 1945 vext_vv_rm_1(vd, v0, vs1, vs2, 1946 env, vl, vm, 1, fn); 1947 break; 1948 case 2: /* rdn */ 1949 vext_vv_rm_1(vd, v0, vs1, vs2, 1950 env, vl, vm, 2, fn); 1951 break; 1952 default: /* rod */ 1953 vext_vv_rm_1(vd, v0, vs1, vs2, 1954 env, vl, vm, 3, fn); 1955 break; 1956 } 1957 } 1958 1959 /* generate helpers for fixed point instructions with OPIVV format */ 1960 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1961 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1962 CPURISCVState *env, uint32_t desc) \ 1963 { \ 1964 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1965 do_##NAME); \ 1966 } 1967 1968 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1969 { 1970 uint8_t res = a + b; 1971 if (res < a) { 1972 res = UINT8_MAX; 1973 env->vxsat = 0x1; 1974 } 1975 return res; 1976 } 1977 1978 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1979 uint16_t b) 1980 { 1981 uint16_t res = a + b; 1982 if (res < a) { 1983 res = UINT16_MAX; 1984 env->vxsat = 0x1; 1985 } 1986 return res; 1987 } 1988 1989 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1990 uint32_t b) 1991 { 1992 uint32_t res = a + b; 1993 if (res < a) { 1994 res = UINT32_MAX; 1995 env->vxsat = 0x1; 1996 } 1997 return res; 1998 } 1999 2000 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2001 uint64_t b) 2002 { 2003 uint64_t res = a + b; 2004 if (res < a) { 2005 res = UINT64_MAX; 2006 env->vxsat = 0x1; 2007 } 2008 return res; 2009 } 2010 2011 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2012 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2013 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2014 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2015 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 2016 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 2017 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2018 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2019 2020 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2021 CPURISCVState *env, int vxrm); 2022 2023 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2024 static inline void \ 2025 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2026 CPURISCVState *env, int vxrm) \ 2027 { \ 2028 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2029 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2030 } 2031 2032 static inline void 2033 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2034 CPURISCVState *env, 2035 uint32_t vl, uint32_t vm, int vxrm, 2036 opivx2_rm_fn *fn) 2037 { 2038 for (uint32_t i = 0; i < vl; i++) { 2039 if (!vm && !vext_elem_mask(v0, i)) { 2040 continue; 2041 } 2042 fn(vd, s1, vs2, i, env, vxrm); 2043 } 2044 } 2045 2046 static inline void 2047 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2048 CPURISCVState *env, 2049 uint32_t desc, uint32_t esz, uint32_t dsz, 2050 opivx2_rm_fn *fn) 2051 { 2052 uint32_t vm = vext_vm(desc); 2053 uint32_t vl = env->vl; 2054 2055 switch (env->vxrm) { 2056 case 0: /* rnu */ 2057 vext_vx_rm_1(vd, v0, s1, vs2, 2058 env, vl, vm, 0, fn); 2059 break; 2060 case 1: /* rne */ 2061 vext_vx_rm_1(vd, v0, s1, vs2, 2062 env, vl, vm, 1, fn); 2063 break; 2064 case 2: /* rdn */ 2065 vext_vx_rm_1(vd, v0, s1, vs2, 2066 env, vl, vm, 2, fn); 2067 break; 2068 default: /* rod */ 2069 vext_vx_rm_1(vd, v0, s1, vs2, 2070 env, vl, vm, 3, fn); 2071 break; 2072 } 2073 } 2074 2075 /* generate helpers for fixed point instructions with OPIVX format */ 2076 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2077 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2078 void *vs2, CPURISCVState *env, uint32_t desc) \ 2079 { \ 2080 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2081 do_##NAME); \ 2082 } 2083 2084 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2085 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2086 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2087 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2088 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2089 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2090 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2091 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2092 2093 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2094 { 2095 int8_t res = a + b; 2096 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2097 res = a > 0 ? INT8_MAX : INT8_MIN; 2098 env->vxsat = 0x1; 2099 } 2100 return res; 2101 } 2102 2103 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2104 { 2105 int16_t res = a + b; 2106 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2107 res = a > 0 ? INT16_MAX : INT16_MIN; 2108 env->vxsat = 0x1; 2109 } 2110 return res; 2111 } 2112 2113 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2114 { 2115 int32_t res = a + b; 2116 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2117 res = a > 0 ? INT32_MAX : INT32_MIN; 2118 env->vxsat = 0x1; 2119 } 2120 return res; 2121 } 2122 2123 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2124 { 2125 int64_t res = a + b; 2126 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2127 res = a > 0 ? INT64_MAX : INT64_MIN; 2128 env->vxsat = 0x1; 2129 } 2130 return res; 2131 } 2132 2133 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2134 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2135 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2136 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2137 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2138 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2139 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2140 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2141 2142 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2143 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2144 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2145 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2146 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2147 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2148 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2149 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2150 2151 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2152 { 2153 uint8_t res = a - b; 2154 if (res > a) { 2155 res = 0; 2156 env->vxsat = 0x1; 2157 } 2158 return res; 2159 } 2160 2161 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2162 uint16_t b) 2163 { 2164 uint16_t res = a - b; 2165 if (res > a) { 2166 res = 0; 2167 env->vxsat = 0x1; 2168 } 2169 return res; 2170 } 2171 2172 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2173 uint32_t b) 2174 { 2175 uint32_t res = a - b; 2176 if (res > a) { 2177 res = 0; 2178 env->vxsat = 0x1; 2179 } 2180 return res; 2181 } 2182 2183 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2184 uint64_t b) 2185 { 2186 uint64_t res = a - b; 2187 if (res > a) { 2188 res = 0; 2189 env->vxsat = 0x1; 2190 } 2191 return res; 2192 } 2193 2194 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2195 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2196 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2197 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2198 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2199 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2200 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2201 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2202 2203 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2204 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2205 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2206 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2207 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2208 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2209 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2210 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2211 2212 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2213 { 2214 int8_t res = a - b; 2215 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2216 res = a >= 0 ? INT8_MAX : INT8_MIN; 2217 env->vxsat = 0x1; 2218 } 2219 return res; 2220 } 2221 2222 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2223 { 2224 int16_t res = a - b; 2225 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2226 res = a >= 0 ? INT16_MAX : INT16_MIN; 2227 env->vxsat = 0x1; 2228 } 2229 return res; 2230 } 2231 2232 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2233 { 2234 int32_t res = a - b; 2235 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2236 res = a >= 0 ? INT32_MAX : INT32_MIN; 2237 env->vxsat = 0x1; 2238 } 2239 return res; 2240 } 2241 2242 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2243 { 2244 int64_t res = a - b; 2245 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2246 res = a >= 0 ? INT64_MAX : INT64_MIN; 2247 env->vxsat = 0x1; 2248 } 2249 return res; 2250 } 2251 2252 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2253 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2254 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2255 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2256 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2257 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2258 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2259 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2260 2261 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2262 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2263 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2264 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2265 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2266 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2267 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2268 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2269 2270 /* Vector Single-Width Averaging Add and Subtract */ 2271 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2272 { 2273 uint8_t d = extract64(v, shift, 1); 2274 uint8_t d1; 2275 uint64_t D1, D2; 2276 2277 if (shift == 0 || shift > 64) { 2278 return 0; 2279 } 2280 2281 d1 = extract64(v, shift - 1, 1); 2282 D1 = extract64(v, 0, shift); 2283 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2284 return d1; 2285 } else if (vxrm == 1) { /* round-to-nearest-even */ 2286 if (shift > 1) { 2287 D2 = extract64(v, 0, shift - 1); 2288 return d1 & ((D2 != 0) | d); 2289 } else { 2290 return d1 & d; 2291 } 2292 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2293 return !d & (D1 != 0); 2294 } 2295 return 0; /* round-down (truncate) */ 2296 } 2297 2298 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2299 { 2300 int64_t res = (int64_t)a + b; 2301 uint8_t round = get_round(vxrm, res, 1); 2302 2303 return (res >> 1) + round; 2304 } 2305 2306 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2307 { 2308 int64_t res = a + b; 2309 uint8_t round = get_round(vxrm, res, 1); 2310 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2311 2312 /* With signed overflow, bit 64 is inverse of bit 63. */ 2313 return ((res >> 1) ^ over) + round; 2314 } 2315 2316 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2317 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2318 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2319 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2320 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2321 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2322 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2323 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2324 2325 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2326 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2327 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2328 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2329 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2330 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2331 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2332 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2333 2334 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2335 { 2336 int64_t res = (int64_t)a - b; 2337 uint8_t round = get_round(vxrm, res, 1); 2338 2339 return (res >> 1) + round; 2340 } 2341 2342 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2343 { 2344 int64_t res = (int64_t)a - b; 2345 uint8_t round = get_round(vxrm, res, 1); 2346 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2347 2348 /* With signed overflow, bit 64 is inverse of bit 63. */ 2349 return ((res >> 1) ^ over) + round; 2350 } 2351 2352 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2353 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2354 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2355 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2356 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2357 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2358 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2359 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2360 2361 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2362 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2363 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2364 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2365 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2366 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2367 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2368 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2369 2370 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2371 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2372 { 2373 uint8_t round; 2374 int16_t res; 2375 2376 res = (int16_t)a * (int16_t)b; 2377 round = get_round(vxrm, res, 7); 2378 res = (res >> 7) + round; 2379 2380 if (res > INT8_MAX) { 2381 env->vxsat = 0x1; 2382 return INT8_MAX; 2383 } else if (res < INT8_MIN) { 2384 env->vxsat = 0x1; 2385 return INT8_MIN; 2386 } else { 2387 return res; 2388 } 2389 } 2390 2391 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2392 { 2393 uint8_t round; 2394 int32_t res; 2395 2396 res = (int32_t)a * (int32_t)b; 2397 round = get_round(vxrm, res, 15); 2398 res = (res >> 15) + round; 2399 2400 if (res > INT16_MAX) { 2401 env->vxsat = 0x1; 2402 return INT16_MAX; 2403 } else if (res < INT16_MIN) { 2404 env->vxsat = 0x1; 2405 return INT16_MIN; 2406 } else { 2407 return res; 2408 } 2409 } 2410 2411 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2412 { 2413 uint8_t round; 2414 int64_t res; 2415 2416 res = (int64_t)a * (int64_t)b; 2417 round = get_round(vxrm, res, 31); 2418 res = (res >> 31) + round; 2419 2420 if (res > INT32_MAX) { 2421 env->vxsat = 0x1; 2422 return INT32_MAX; 2423 } else if (res < INT32_MIN) { 2424 env->vxsat = 0x1; 2425 return INT32_MIN; 2426 } else { 2427 return res; 2428 } 2429 } 2430 2431 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2432 { 2433 uint8_t round; 2434 uint64_t hi_64, lo_64; 2435 int64_t res; 2436 2437 if (a == INT64_MIN && b == INT64_MIN) { 2438 env->vxsat = 1; 2439 return INT64_MAX; 2440 } 2441 2442 muls64(&lo_64, &hi_64, a, b); 2443 round = get_round(vxrm, lo_64, 63); 2444 /* 2445 * Cannot overflow, as there are always 2446 * 2 sign bits after multiply. 2447 */ 2448 res = (hi_64 << 1) | (lo_64 >> 63); 2449 if (round) { 2450 if (res == INT64_MAX) { 2451 env->vxsat = 1; 2452 } else { 2453 res += 1; 2454 } 2455 } 2456 return res; 2457 } 2458 2459 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2460 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2461 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2462 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2463 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2464 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2465 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2466 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2467 2468 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2469 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2470 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2471 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2472 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2473 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2474 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2475 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2476 2477 /* Vector Widening Saturating Scaled Multiply-Add */ 2478 static inline uint16_t 2479 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2480 uint16_t c) 2481 { 2482 uint8_t round; 2483 uint16_t res = (uint16_t)a * b; 2484 2485 round = get_round(vxrm, res, 4); 2486 res = (res >> 4) + round; 2487 return saddu16(env, vxrm, c, res); 2488 } 2489 2490 static inline uint32_t 2491 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2492 uint32_t c) 2493 { 2494 uint8_t round; 2495 uint32_t res = (uint32_t)a * b; 2496 2497 round = get_round(vxrm, res, 8); 2498 res = (res >> 8) + round; 2499 return saddu32(env, vxrm, c, res); 2500 } 2501 2502 static inline uint64_t 2503 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2504 uint64_t c) 2505 { 2506 uint8_t round; 2507 uint64_t res = (uint64_t)a * b; 2508 2509 round = get_round(vxrm, res, 16); 2510 res = (res >> 16) + round; 2511 return saddu64(env, vxrm, c, res); 2512 } 2513 2514 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2515 static inline void \ 2516 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2517 CPURISCVState *env, int vxrm) \ 2518 { \ 2519 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2520 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2521 TD d = *((TD *)vd + HD(i)); \ 2522 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2523 } 2524 2525 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2526 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2527 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2528 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2529 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2530 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2531 2532 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2533 static inline void \ 2534 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2535 CPURISCVState *env, int vxrm) \ 2536 { \ 2537 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2538 TD d = *((TD *)vd + HD(i)); \ 2539 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2540 } 2541 2542 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2543 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2544 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2545 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2546 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2547 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2548 2549 static inline int16_t 2550 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2551 { 2552 uint8_t round; 2553 int16_t res = (int16_t)a * b; 2554 2555 round = get_round(vxrm, res, 4); 2556 res = (res >> 4) + round; 2557 return sadd16(env, vxrm, c, res); 2558 } 2559 2560 static inline int32_t 2561 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2562 { 2563 uint8_t round; 2564 int32_t res = (int32_t)a * b; 2565 2566 round = get_round(vxrm, res, 8); 2567 res = (res >> 8) + round; 2568 return sadd32(env, vxrm, c, res); 2569 2570 } 2571 2572 static inline int64_t 2573 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2574 { 2575 uint8_t round; 2576 int64_t res = (int64_t)a * b; 2577 2578 round = get_round(vxrm, res, 16); 2579 res = (res >> 16) + round; 2580 return sadd64(env, vxrm, c, res); 2581 } 2582 2583 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2584 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2585 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2586 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2587 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2588 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2589 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2590 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2591 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2592 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2593 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2594 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2595 2596 static inline int16_t 2597 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2598 { 2599 uint8_t round; 2600 int16_t res = a * (int16_t)b; 2601 2602 round = get_round(vxrm, res, 4); 2603 res = (res >> 4) + round; 2604 return ssub16(env, vxrm, c, res); 2605 } 2606 2607 static inline int32_t 2608 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2609 { 2610 uint8_t round; 2611 int32_t res = a * (int32_t)b; 2612 2613 round = get_round(vxrm, res, 8); 2614 res = (res >> 8) + round; 2615 return ssub32(env, vxrm, c, res); 2616 } 2617 2618 static inline int64_t 2619 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2620 { 2621 uint8_t round; 2622 int64_t res = a * (int64_t)b; 2623 2624 round = get_round(vxrm, res, 16); 2625 res = (res >> 16) + round; 2626 return ssub64(env, vxrm, c, res); 2627 } 2628 2629 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2630 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2631 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2632 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2633 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2634 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2635 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2636 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2637 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2638 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2639 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2640 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2641 2642 static inline int16_t 2643 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2644 { 2645 uint8_t round; 2646 int16_t res = (int16_t)a * b; 2647 2648 round = get_round(vxrm, res, 4); 2649 res = (res >> 4) + round; 2650 return ssub16(env, vxrm, c, res); 2651 } 2652 2653 static inline int32_t 2654 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2655 { 2656 uint8_t round; 2657 int32_t res = (int32_t)a * b; 2658 2659 round = get_round(vxrm, res, 8); 2660 res = (res >> 8) + round; 2661 return ssub32(env, vxrm, c, res); 2662 } 2663 2664 static inline int64_t 2665 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2666 { 2667 uint8_t round; 2668 int64_t res = (int64_t)a * b; 2669 2670 round = get_round(vxrm, res, 16); 2671 res = (res >> 16) + round; 2672 return ssub64(env, vxrm, c, res); 2673 } 2674 2675 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2676 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2677 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2678 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2679 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2680 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2681 2682 /* Vector Single-Width Scaling Shift Instructions */ 2683 static inline uint8_t 2684 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2685 { 2686 uint8_t round, shift = b & 0x7; 2687 uint8_t res; 2688 2689 round = get_round(vxrm, a, shift); 2690 res = (a >> shift) + round; 2691 return res; 2692 } 2693 static inline uint16_t 2694 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2695 { 2696 uint8_t round, shift = b & 0xf; 2697 uint16_t res; 2698 2699 round = get_round(vxrm, a, shift); 2700 res = (a >> shift) + round; 2701 return res; 2702 } 2703 static inline uint32_t 2704 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2705 { 2706 uint8_t round, shift = b & 0x1f; 2707 uint32_t res; 2708 2709 round = get_round(vxrm, a, shift); 2710 res = (a >> shift) + round; 2711 return res; 2712 } 2713 static inline uint64_t 2714 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2715 { 2716 uint8_t round, shift = b & 0x3f; 2717 uint64_t res; 2718 2719 round = get_round(vxrm, a, shift); 2720 res = (a >> shift) + round; 2721 return res; 2722 } 2723 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2724 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2725 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2726 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2727 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2728 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2729 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2730 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2731 2732 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2733 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2734 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2735 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2736 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2737 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2738 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2739 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2740 2741 static inline int8_t 2742 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2743 { 2744 uint8_t round, shift = b & 0x7; 2745 int8_t res; 2746 2747 round = get_round(vxrm, a, shift); 2748 res = (a >> shift) + round; 2749 return res; 2750 } 2751 static inline int16_t 2752 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2753 { 2754 uint8_t round, shift = b & 0xf; 2755 int16_t res; 2756 2757 round = get_round(vxrm, a, shift); 2758 res = (a >> shift) + round; 2759 return res; 2760 } 2761 static inline int32_t 2762 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2763 { 2764 uint8_t round, shift = b & 0x1f; 2765 int32_t res; 2766 2767 round = get_round(vxrm, a, shift); 2768 res = (a >> shift) + round; 2769 return res; 2770 } 2771 static inline int64_t 2772 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2773 { 2774 uint8_t round, shift = b & 0x3f; 2775 int64_t res; 2776 2777 round = get_round(vxrm, a, shift); 2778 res = (a >> shift) + round; 2779 return res; 2780 } 2781 2782 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2783 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2784 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2785 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2786 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2787 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2788 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2789 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2790 2791 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2792 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2793 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2794 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2795 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2796 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2797 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2798 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2799 2800 /* Vector Narrowing Fixed-Point Clip Instructions */ 2801 static inline int8_t 2802 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2803 { 2804 uint8_t round, shift = b & 0xf; 2805 int16_t res; 2806 2807 round = get_round(vxrm, a, shift); 2808 res = (a >> shift) + round; 2809 if (res > INT8_MAX) { 2810 env->vxsat = 0x1; 2811 return INT8_MAX; 2812 } else if (res < INT8_MIN) { 2813 env->vxsat = 0x1; 2814 return INT8_MIN; 2815 } else { 2816 return res; 2817 } 2818 } 2819 2820 static inline int16_t 2821 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2822 { 2823 uint8_t round, shift = b & 0x1f; 2824 int32_t res; 2825 2826 round = get_round(vxrm, a, shift); 2827 res = (a >> shift) + round; 2828 if (res > INT16_MAX) { 2829 env->vxsat = 0x1; 2830 return INT16_MAX; 2831 } else if (res < INT16_MIN) { 2832 env->vxsat = 0x1; 2833 return INT16_MIN; 2834 } else { 2835 return res; 2836 } 2837 } 2838 2839 static inline int32_t 2840 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2841 { 2842 uint8_t round, shift = b & 0x3f; 2843 int64_t res; 2844 2845 round = get_round(vxrm, a, shift); 2846 res = (a >> shift) + round; 2847 if (res > INT32_MAX) { 2848 env->vxsat = 0x1; 2849 return INT32_MAX; 2850 } else if (res < INT32_MIN) { 2851 env->vxsat = 0x1; 2852 return INT32_MIN; 2853 } else { 2854 return res; 2855 } 2856 } 2857 2858 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2859 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2860 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2861 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2862 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2863 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2864 2865 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2866 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2867 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2868 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2869 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2870 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2871 2872 static inline uint8_t 2873 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2874 { 2875 uint8_t round, shift = b & 0xf; 2876 uint16_t res; 2877 2878 round = get_round(vxrm, a, shift); 2879 res = (a >> shift) + round; 2880 if (res > UINT8_MAX) { 2881 env->vxsat = 0x1; 2882 return UINT8_MAX; 2883 } else { 2884 return res; 2885 } 2886 } 2887 2888 static inline uint16_t 2889 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2890 { 2891 uint8_t round, shift = b & 0x1f; 2892 uint32_t res; 2893 2894 round = get_round(vxrm, a, shift); 2895 res = (a >> shift) + round; 2896 if (res > UINT16_MAX) { 2897 env->vxsat = 0x1; 2898 return UINT16_MAX; 2899 } else { 2900 return res; 2901 } 2902 } 2903 2904 static inline uint32_t 2905 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2906 { 2907 uint8_t round, shift = b & 0x3f; 2908 int64_t res; 2909 2910 round = get_round(vxrm, a, shift); 2911 res = (a >> shift) + round; 2912 if (res > UINT32_MAX) { 2913 env->vxsat = 0x1; 2914 return UINT32_MAX; 2915 } else { 2916 return res; 2917 } 2918 } 2919 2920 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2921 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2922 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2923 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 2924 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 2925 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 2926 2927 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 2928 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 2929 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 2930 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 2931 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 2932 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 2933 2934 /* 2935 *** Vector Float Point Arithmetic Instructions 2936 */ 2937 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2938 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2939 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2940 CPURISCVState *env) \ 2941 { \ 2942 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2943 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2944 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2945 } 2946 2947 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2948 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2949 void *vs2, CPURISCVState *env, \ 2950 uint32_t desc) \ 2951 { \ 2952 uint32_t vm = vext_vm(desc); \ 2953 uint32_t vl = env->vl; \ 2954 uint32_t i; \ 2955 \ 2956 for (i = 0; i < vl; i++) { \ 2957 if (!vm && !vext_elem_mask(v0, i)) { \ 2958 continue; \ 2959 } \ 2960 do_##NAME(vd, vs1, vs2, i, env); \ 2961 } \ 2962 } 2963 2964 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2965 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2966 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2967 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2968 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2969 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2970 2971 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2972 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2973 CPURISCVState *env) \ 2974 { \ 2975 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2976 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2977 } 2978 2979 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2980 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2981 void *vs2, CPURISCVState *env, \ 2982 uint32_t desc) \ 2983 { \ 2984 uint32_t vm = vext_vm(desc); \ 2985 uint32_t vl = env->vl; \ 2986 uint32_t i; \ 2987 \ 2988 for (i = 0; i < vl; i++) { \ 2989 if (!vm && !vext_elem_mask(v0, i)) { \ 2990 continue; \ 2991 } \ 2992 do_##NAME(vd, s1, vs2, i, env); \ 2993 } \ 2994 } 2995 2996 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2997 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2998 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2999 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 3000 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 3001 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 3002 3003 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3004 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3005 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3006 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 3007 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 3008 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 3009 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3010 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3011 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3012 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 3013 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 3014 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 3015 3016 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3017 { 3018 return float16_sub(b, a, s); 3019 } 3020 3021 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3022 { 3023 return float32_sub(b, a, s); 3024 } 3025 3026 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3027 { 3028 return float64_sub(b, a, s); 3029 } 3030 3031 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3032 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3033 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3034 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 3035 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 3036 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 3037 3038 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3039 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3040 { 3041 return float32_add(float16_to_float32(a, true, s), 3042 float16_to_float32(b, true, s), s); 3043 } 3044 3045 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3046 { 3047 return float64_add(float32_to_float64(a, s), 3048 float32_to_float64(b, s), s); 3049 3050 } 3051 3052 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3053 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3054 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 3055 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 3056 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3057 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3058 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 3059 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 3060 3061 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3062 { 3063 return float32_sub(float16_to_float32(a, true, s), 3064 float16_to_float32(b, true, s), s); 3065 } 3066 3067 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3068 { 3069 return float64_sub(float32_to_float64(a, s), 3070 float32_to_float64(b, s), s); 3071 3072 } 3073 3074 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3075 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3076 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3077 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3078 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3079 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3080 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3081 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3082 3083 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3084 { 3085 return float32_add(a, float16_to_float32(b, true, s), s); 3086 } 3087 3088 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3089 { 3090 return float64_add(a, float32_to_float64(b, s), s); 3091 } 3092 3093 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3094 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3095 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3096 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3097 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3098 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3099 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3100 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3101 3102 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3103 { 3104 return float32_sub(a, float16_to_float32(b, true, s), s); 3105 } 3106 3107 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3108 { 3109 return float64_sub(a, float32_to_float64(b, s), s); 3110 } 3111 3112 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3113 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3114 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3115 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3116 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3117 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3118 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3119 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3120 3121 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3122 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3123 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3124 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3125 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3126 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3127 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3128 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3129 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3130 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3131 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3132 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3133 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3134 3135 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3136 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3137 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3138 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3139 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3140 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3141 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3142 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3143 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3144 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3145 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3146 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3147 3148 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3149 { 3150 return float16_div(b, a, s); 3151 } 3152 3153 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3154 { 3155 return float32_div(b, a, s); 3156 } 3157 3158 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3159 { 3160 return float64_div(b, a, s); 3161 } 3162 3163 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3164 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3165 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3166 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3167 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3168 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3169 3170 /* Vector Widening Floating-Point Multiply */ 3171 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3172 { 3173 return float32_mul(float16_to_float32(a, true, s), 3174 float16_to_float32(b, true, s), s); 3175 } 3176 3177 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3178 { 3179 return float64_mul(float32_to_float64(a, s), 3180 float32_to_float64(b, s), s); 3181 3182 } 3183 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3184 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3185 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3186 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3187 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3188 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3189 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3190 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3191 3192 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3193 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3194 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3195 CPURISCVState *env) \ 3196 { \ 3197 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3198 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3199 TD d = *((TD *)vd + HD(i)); \ 3200 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3201 } 3202 3203 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3204 { 3205 return float16_muladd(a, b, d, 0, s); 3206 } 3207 3208 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3209 { 3210 return float32_muladd(a, b, d, 0, s); 3211 } 3212 3213 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3214 { 3215 return float64_muladd(a, b, d, 0, s); 3216 } 3217 3218 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3219 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3220 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3221 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3222 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3223 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3224 3225 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3226 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3227 CPURISCVState *env) \ 3228 { \ 3229 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3230 TD d = *((TD *)vd + HD(i)); \ 3231 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3232 } 3233 3234 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3235 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3236 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3237 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3238 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3239 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3240 3241 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3242 { 3243 return float16_muladd(a, b, d, 3244 float_muladd_negate_c | float_muladd_negate_product, s); 3245 } 3246 3247 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3248 { 3249 return float32_muladd(a, b, d, 3250 float_muladd_negate_c | float_muladd_negate_product, s); 3251 } 3252 3253 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3254 { 3255 return float64_muladd(a, b, d, 3256 float_muladd_negate_c | float_muladd_negate_product, s); 3257 } 3258 3259 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3260 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3261 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3262 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3263 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3264 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3265 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3266 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3267 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3268 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3269 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3270 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3271 3272 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3273 { 3274 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3275 } 3276 3277 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3278 { 3279 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3280 } 3281 3282 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3283 { 3284 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3285 } 3286 3287 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3288 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3289 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3290 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3291 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3292 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3293 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3294 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3295 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3296 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3297 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3298 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3299 3300 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3301 { 3302 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3303 } 3304 3305 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3306 { 3307 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3308 } 3309 3310 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3311 { 3312 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3313 } 3314 3315 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3316 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3317 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3318 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3319 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3320 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3321 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3322 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3323 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3324 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3325 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3326 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3327 3328 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3329 { 3330 return float16_muladd(d, b, a, 0, s); 3331 } 3332 3333 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3334 { 3335 return float32_muladd(d, b, a, 0, s); 3336 } 3337 3338 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3339 { 3340 return float64_muladd(d, b, a, 0, s); 3341 } 3342 3343 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3344 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3345 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3346 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3347 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3348 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3349 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3350 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3351 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3352 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3353 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3354 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3355 3356 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3357 { 3358 return float16_muladd(d, b, a, 3359 float_muladd_negate_c | float_muladd_negate_product, s); 3360 } 3361 3362 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3363 { 3364 return float32_muladd(d, b, a, 3365 float_muladd_negate_c | float_muladd_negate_product, s); 3366 } 3367 3368 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3369 { 3370 return float64_muladd(d, b, a, 3371 float_muladd_negate_c | float_muladd_negate_product, s); 3372 } 3373 3374 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3375 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3376 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3377 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3378 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3379 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3380 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3381 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3382 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3383 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3384 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3385 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3386 3387 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3388 { 3389 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3390 } 3391 3392 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3393 { 3394 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3395 } 3396 3397 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3398 { 3399 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3400 } 3401 3402 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3403 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3404 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3405 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3406 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3407 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3408 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3409 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3410 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3411 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3412 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3413 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3414 3415 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3416 { 3417 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3418 } 3419 3420 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3421 { 3422 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3423 } 3424 3425 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3426 { 3427 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3428 } 3429 3430 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3431 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3432 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3433 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3434 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3435 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3436 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3437 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3438 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3439 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3440 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3441 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3442 3443 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3444 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3445 { 3446 return float32_muladd(float16_to_float32(a, true, s), 3447 float16_to_float32(b, true, s), d, 0, s); 3448 } 3449 3450 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3451 { 3452 return float64_muladd(float32_to_float64(a, s), 3453 float32_to_float64(b, s), d, 0, s); 3454 } 3455 3456 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3457 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3458 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3459 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3460 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3461 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3462 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3463 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3464 3465 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3466 { 3467 return float32_muladd(float16_to_float32(a, true, s), 3468 float16_to_float32(b, true, s), d, 3469 float_muladd_negate_c | float_muladd_negate_product, s); 3470 } 3471 3472 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3473 { 3474 return float64_muladd(float32_to_float64(a, s), 3475 float32_to_float64(b, s), d, 3476 float_muladd_negate_c | float_muladd_negate_product, s); 3477 } 3478 3479 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3480 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3481 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3482 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3483 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3484 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3485 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3486 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3487 3488 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3489 { 3490 return float32_muladd(float16_to_float32(a, true, s), 3491 float16_to_float32(b, true, s), d, 3492 float_muladd_negate_c, s); 3493 } 3494 3495 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3496 { 3497 return float64_muladd(float32_to_float64(a, s), 3498 float32_to_float64(b, s), d, 3499 float_muladd_negate_c, s); 3500 } 3501 3502 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3503 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3504 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3505 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3506 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3507 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3508 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3509 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3510 3511 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3512 { 3513 return float32_muladd(float16_to_float32(a, true, s), 3514 float16_to_float32(b, true, s), d, 3515 float_muladd_negate_product, s); 3516 } 3517 3518 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3519 { 3520 return float64_muladd(float32_to_float64(a, s), 3521 float32_to_float64(b, s), d, 3522 float_muladd_negate_product, s); 3523 } 3524 3525 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3526 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3527 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3528 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3529 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3530 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3531 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3532 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3533 3534 /* Vector Floating-Point Square-Root Instruction */ 3535 /* (TD, T2, TX2) */ 3536 #define OP_UU_H uint16_t, uint16_t, uint16_t 3537 #define OP_UU_W uint32_t, uint32_t, uint32_t 3538 #define OP_UU_D uint64_t, uint64_t, uint64_t 3539 3540 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3541 static void do_##NAME(void *vd, void *vs2, int i, \ 3542 CPURISCVState *env) \ 3543 { \ 3544 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3545 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3546 } 3547 3548 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3549 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3550 CPURISCVState *env, uint32_t desc) \ 3551 { \ 3552 uint32_t vm = vext_vm(desc); \ 3553 uint32_t vl = env->vl; \ 3554 uint32_t i; \ 3555 \ 3556 if (vl == 0) { \ 3557 return; \ 3558 } \ 3559 for (i = 0; i < vl; i++) { \ 3560 if (!vm && !vext_elem_mask(v0, i)) { \ 3561 continue; \ 3562 } \ 3563 do_##NAME(vd, vs2, i, env); \ 3564 } \ 3565 } 3566 3567 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3568 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3569 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3570 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3571 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3572 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3573 3574 /* Vector Floating-Point MIN/MAX Instructions */ 3575 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3576 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3577 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3578 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3579 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3580 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3581 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3582 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3583 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3584 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3585 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3586 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3587 3588 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3589 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3590 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3591 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3592 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3593 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3594 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3595 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3596 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3597 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3598 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3599 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3600 3601 /* Vector Floating-Point Sign-Injection Instructions */ 3602 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3603 { 3604 return deposit64(b, 0, 15, a); 3605 } 3606 3607 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3608 { 3609 return deposit64(b, 0, 31, a); 3610 } 3611 3612 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3613 { 3614 return deposit64(b, 0, 63, a); 3615 } 3616 3617 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3618 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3619 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3620 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3621 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3622 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3623 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3624 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3625 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3626 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3627 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3628 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3629 3630 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3631 { 3632 return deposit64(~b, 0, 15, a); 3633 } 3634 3635 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3636 { 3637 return deposit64(~b, 0, 31, a); 3638 } 3639 3640 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3641 { 3642 return deposit64(~b, 0, 63, a); 3643 } 3644 3645 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3646 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3647 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3648 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3649 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3650 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3651 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3652 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3653 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3654 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3655 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3656 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3657 3658 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3659 { 3660 return deposit64(b ^ a, 0, 15, a); 3661 } 3662 3663 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3664 { 3665 return deposit64(b ^ a, 0, 31, a); 3666 } 3667 3668 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3669 { 3670 return deposit64(b ^ a, 0, 63, a); 3671 } 3672 3673 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3674 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3675 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3676 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3677 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3678 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3679 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3680 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3681 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3682 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3683 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3684 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3685 3686 /* Vector Floating-Point Compare Instructions */ 3687 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3688 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3689 CPURISCVState *env, uint32_t desc) \ 3690 { \ 3691 uint32_t vm = vext_vm(desc); \ 3692 uint32_t vl = env->vl; \ 3693 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3694 uint32_t i; \ 3695 \ 3696 for (i = 0; i < vl; i++) { \ 3697 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3698 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3699 if (!vm && !vext_elem_mask(v0, i)) { \ 3700 continue; \ 3701 } \ 3702 vext_set_elem_mask(vd, i, \ 3703 DO_OP(s2, s1, &env->fp_status)); \ 3704 } \ 3705 for (; i < vlmax; i++) { \ 3706 vext_set_elem_mask(vd, i, 0); \ 3707 } \ 3708 } 3709 3710 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3711 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3712 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3713 3714 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3715 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3716 CPURISCVState *env, uint32_t desc) \ 3717 { \ 3718 uint32_t vm = vext_vm(desc); \ 3719 uint32_t vl = env->vl; \ 3720 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3721 uint32_t i; \ 3722 \ 3723 for (i = 0; i < vl; i++) { \ 3724 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3725 if (!vm && !vext_elem_mask(v0, i)) { \ 3726 continue; \ 3727 } \ 3728 vext_set_elem_mask(vd, i, \ 3729 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3730 } \ 3731 for (; i < vlmax; i++) { \ 3732 vext_set_elem_mask(vd, i, 0); \ 3733 } \ 3734 } 3735 3736 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3737 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3738 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3739 3740 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3741 { 3742 FloatRelation compare = float16_compare_quiet(a, b, s); 3743 return compare != float_relation_equal; 3744 } 3745 3746 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3747 { 3748 FloatRelation compare = float32_compare_quiet(a, b, s); 3749 return compare != float_relation_equal; 3750 } 3751 3752 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3753 { 3754 FloatRelation compare = float64_compare_quiet(a, b, s); 3755 return compare != float_relation_equal; 3756 } 3757 3758 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3759 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3760 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3761 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3762 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3763 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3764 3765 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3766 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3767 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3768 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3769 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3770 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3771 3772 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3773 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3774 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3775 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3776 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3777 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3778 3779 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3780 { 3781 FloatRelation compare = float16_compare(a, b, s); 3782 return compare == float_relation_greater; 3783 } 3784 3785 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3786 { 3787 FloatRelation compare = float32_compare(a, b, s); 3788 return compare == float_relation_greater; 3789 } 3790 3791 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3792 { 3793 FloatRelation compare = float64_compare(a, b, s); 3794 return compare == float_relation_greater; 3795 } 3796 3797 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3798 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3799 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3800 3801 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3802 { 3803 FloatRelation compare = float16_compare(a, b, s); 3804 return compare == float_relation_greater || 3805 compare == float_relation_equal; 3806 } 3807 3808 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3809 { 3810 FloatRelation compare = float32_compare(a, b, s); 3811 return compare == float_relation_greater || 3812 compare == float_relation_equal; 3813 } 3814 3815 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3816 { 3817 FloatRelation compare = float64_compare(a, b, s); 3818 return compare == float_relation_greater || 3819 compare == float_relation_equal; 3820 } 3821 3822 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3823 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3824 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3825 3826 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3827 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3828 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3829 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3830 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3831 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3832 3833 /* Vector Floating-Point Classify Instruction */ 3834 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3835 static void do_##NAME(void *vd, void *vs2, int i) \ 3836 { \ 3837 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3838 *((TD *)vd + HD(i)) = OP(s2); \ 3839 } 3840 3841 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3842 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3843 CPURISCVState *env, uint32_t desc) \ 3844 { \ 3845 uint32_t vm = vext_vm(desc); \ 3846 uint32_t vl = env->vl; \ 3847 uint32_t i; \ 3848 \ 3849 for (i = 0; i < vl; i++) { \ 3850 if (!vm && !vext_elem_mask(v0, i)) { \ 3851 continue; \ 3852 } \ 3853 do_##NAME(vd, vs2, i); \ 3854 } \ 3855 } 3856 3857 target_ulong fclass_h(uint64_t frs1) 3858 { 3859 float16 f = frs1; 3860 bool sign = float16_is_neg(f); 3861 3862 if (float16_is_infinity(f)) { 3863 return sign ? 1 << 0 : 1 << 7; 3864 } else if (float16_is_zero(f)) { 3865 return sign ? 1 << 3 : 1 << 4; 3866 } else if (float16_is_zero_or_denormal(f)) { 3867 return sign ? 1 << 2 : 1 << 5; 3868 } else if (float16_is_any_nan(f)) { 3869 float_status s = { }; /* for snan_bit_is_one */ 3870 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3871 } else { 3872 return sign ? 1 << 1 : 1 << 6; 3873 } 3874 } 3875 3876 target_ulong fclass_s(uint64_t frs1) 3877 { 3878 float32 f = frs1; 3879 bool sign = float32_is_neg(f); 3880 3881 if (float32_is_infinity(f)) { 3882 return sign ? 1 << 0 : 1 << 7; 3883 } else if (float32_is_zero(f)) { 3884 return sign ? 1 << 3 : 1 << 4; 3885 } else if (float32_is_zero_or_denormal(f)) { 3886 return sign ? 1 << 2 : 1 << 5; 3887 } else if (float32_is_any_nan(f)) { 3888 float_status s = { }; /* for snan_bit_is_one */ 3889 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3890 } else { 3891 return sign ? 1 << 1 : 1 << 6; 3892 } 3893 } 3894 3895 target_ulong fclass_d(uint64_t frs1) 3896 { 3897 float64 f = frs1; 3898 bool sign = float64_is_neg(f); 3899 3900 if (float64_is_infinity(f)) { 3901 return sign ? 1 << 0 : 1 << 7; 3902 } else if (float64_is_zero(f)) { 3903 return sign ? 1 << 3 : 1 << 4; 3904 } else if (float64_is_zero_or_denormal(f)) { 3905 return sign ? 1 << 2 : 1 << 5; 3906 } else if (float64_is_any_nan(f)) { 3907 float_status s = { }; /* for snan_bit_is_one */ 3908 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3909 } else { 3910 return sign ? 1 << 1 : 1 << 6; 3911 } 3912 } 3913 3914 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3915 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3916 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3917 GEN_VEXT_V(vfclass_v_h, 2, 2) 3918 GEN_VEXT_V(vfclass_v_w, 4, 4) 3919 GEN_VEXT_V(vfclass_v_d, 8, 8) 3920 3921 /* Vector Floating-Point Merge Instruction */ 3922 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3923 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3924 CPURISCVState *env, uint32_t desc) \ 3925 { \ 3926 uint32_t vm = vext_vm(desc); \ 3927 uint32_t vl = env->vl; \ 3928 uint32_t i; \ 3929 \ 3930 for (i = 0; i < vl; i++) { \ 3931 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3932 *((ETYPE *)vd + H(i)) \ 3933 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3934 } \ 3935 } 3936 3937 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3938 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3939 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3940 3941 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3942 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3943 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3944 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3945 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3946 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3947 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3948 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3949 3950 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3951 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3952 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3953 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3954 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3955 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3956 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3957 3958 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3959 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3960 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3961 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3962 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3963 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3964 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3965 3966 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3967 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3968 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3969 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3970 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3971 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3972 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3973 3974 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3975 /* (TD, T2, TX2) */ 3976 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3977 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3978 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3979 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3980 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3981 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3982 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3983 3984 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3985 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3986 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3987 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3988 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3989 3990 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3991 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3992 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 3993 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 3994 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 3995 3996 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 3997 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 3998 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 3999 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4000 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4001 4002 /* 4003 * vfwcvt.f.f.v vd, vs2, vm # 4004 * Convert single-width float to double-width float. 4005 */ 4006 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4007 { 4008 return float16_to_float32(a, true, s); 4009 } 4010 4011 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4012 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4013 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4014 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4015 4016 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4017 /* (TD, T2, TX2) */ 4018 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4019 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4020 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4021 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 4022 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 4023 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 4024 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 4025 4026 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4027 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 4028 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 4029 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 4030 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 4031 4032 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4033 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 4034 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4035 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 4036 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 4037 4038 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4039 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4040 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4041 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 4042 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 4043 4044 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4045 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4046 { 4047 return float32_to_float16(a, true, s); 4048 } 4049 4050 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4051 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4052 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 4053 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 4054 4055 /* 4056 *** Vector Reduction Operations 4057 */ 4058 /* Vector Single-Width Integer Reduction Instructions */ 4059 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4060 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4061 void *vs2, CPURISCVState *env, uint32_t desc) \ 4062 { \ 4063 uint32_t vm = vext_vm(desc); \ 4064 uint32_t vl = env->vl; \ 4065 uint32_t i; \ 4066 TD s1 = *((TD *)vs1 + HD(0)); \ 4067 \ 4068 for (i = 0; i < vl; i++) { \ 4069 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4070 if (!vm && !vext_elem_mask(v0, i)) { \ 4071 continue; \ 4072 } \ 4073 s1 = OP(s1, (TD)s2); \ 4074 } \ 4075 *((TD *)vd + HD(0)) = s1; \ 4076 } 4077 4078 /* vd[0] = sum(vs1[0], vs2[*]) */ 4079 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4080 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4081 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4082 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4083 4084 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4085 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4086 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4087 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4088 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4089 4090 /* vd[0] = max(vs1[0], vs2[*]) */ 4091 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4092 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4093 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4094 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4095 4096 /* vd[0] = minu(vs1[0], vs2[*]) */ 4097 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4098 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4099 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4100 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4101 4102 /* vd[0] = min(vs1[0], vs2[*]) */ 4103 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4104 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4105 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4106 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4107 4108 /* vd[0] = and(vs1[0], vs2[*]) */ 4109 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4110 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4111 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4112 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4113 4114 /* vd[0] = or(vs1[0], vs2[*]) */ 4115 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4116 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4117 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4118 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4119 4120 /* vd[0] = xor(vs1[0], vs2[*]) */ 4121 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4122 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4123 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4124 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4125 4126 /* Vector Widening Integer Reduction Instructions */ 4127 /* signed sum reduction into double-width accumulator */ 4128 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4129 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4130 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4131 4132 /* Unsigned sum reduction into double-width accumulator */ 4133 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4134 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4135 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4136 4137 /* Vector Single-Width Floating-Point Reduction Instructions */ 4138 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4139 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4140 void *vs2, CPURISCVState *env, \ 4141 uint32_t desc) \ 4142 { \ 4143 uint32_t vm = vext_vm(desc); \ 4144 uint32_t vl = env->vl; \ 4145 uint32_t i; \ 4146 TD s1 = *((TD *)vs1 + HD(0)); \ 4147 \ 4148 for (i = 0; i < vl; i++) { \ 4149 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4150 if (!vm && !vext_elem_mask(v0, i)) { \ 4151 continue; \ 4152 } \ 4153 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4154 } \ 4155 *((TD *)vd + HD(0)) = s1; \ 4156 } 4157 4158 /* Unordered sum */ 4159 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4160 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4161 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4162 4163 /* Maximum value */ 4164 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4165 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4166 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4167 4168 /* Minimum value */ 4169 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4170 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4171 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4172 4173 /* Vector Widening Floating-Point Reduction Instructions */ 4174 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4175 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4176 void *vs2, CPURISCVState *env, uint32_t desc) 4177 { 4178 uint32_t vm = vext_vm(desc); 4179 uint32_t vl = env->vl; 4180 uint32_t i; 4181 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4182 4183 for (i = 0; i < vl; i++) { 4184 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4185 if (!vm && !vext_elem_mask(v0, i)) { 4186 continue; 4187 } 4188 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4189 &env->fp_status); 4190 } 4191 *((uint32_t *)vd + H4(0)) = s1; 4192 } 4193 4194 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4195 void *vs2, CPURISCVState *env, uint32_t desc) 4196 { 4197 uint32_t vm = vext_vm(desc); 4198 uint32_t vl = env->vl; 4199 uint32_t i; 4200 uint64_t s1 = *((uint64_t *)vs1); 4201 4202 for (i = 0; i < vl; i++) { 4203 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4204 if (!vm && !vext_elem_mask(v0, i)) { 4205 continue; 4206 } 4207 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4208 &env->fp_status); 4209 } 4210 *((uint64_t *)vd) = s1; 4211 } 4212 4213 /* 4214 *** Vector Mask Operations 4215 */ 4216 /* Vector Mask-Register Logical Instructions */ 4217 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4218 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4219 void *vs2, CPURISCVState *env, \ 4220 uint32_t desc) \ 4221 { \ 4222 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4223 uint32_t vl = env->vl; \ 4224 uint32_t i; \ 4225 int a, b; \ 4226 \ 4227 for (i = 0; i < vl; i++) { \ 4228 a = vext_elem_mask(vs1, i); \ 4229 b = vext_elem_mask(vs2, i); \ 4230 vext_set_elem_mask(vd, i, OP(b, a)); \ 4231 } \ 4232 for (; i < vlmax; i++) { \ 4233 vext_set_elem_mask(vd, i, 0); \ 4234 } \ 4235 } 4236 4237 #define DO_NAND(N, M) (!(N & M)) 4238 #define DO_ANDNOT(N, M) (N & !M) 4239 #define DO_NOR(N, M) (!(N | M)) 4240 #define DO_ORNOT(N, M) (N | !M) 4241 #define DO_XNOR(N, M) (!(N ^ M)) 4242 4243 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4244 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4245 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4246 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4247 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4248 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4249 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4250 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4251 4252 /* Vector mask population count vmpopc */ 4253 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, 4254 uint32_t desc) 4255 { 4256 target_ulong cnt = 0; 4257 uint32_t vm = vext_vm(desc); 4258 uint32_t vl = env->vl; 4259 int i; 4260 4261 for (i = 0; i < vl; i++) { 4262 if (vm || vext_elem_mask(v0, i)) { 4263 if (vext_elem_mask(vs2, i)) { 4264 cnt++; 4265 } 4266 } 4267 } 4268 return cnt; 4269 } 4270 4271 /* vmfirst find-first-set mask bit*/ 4272 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4273 uint32_t desc) 4274 { 4275 uint32_t vm = vext_vm(desc); 4276 uint32_t vl = env->vl; 4277 int i; 4278 4279 for (i = 0; i < vl; i++) { 4280 if (vm || vext_elem_mask(v0, i)) { 4281 if (vext_elem_mask(vs2, i)) { 4282 return i; 4283 } 4284 } 4285 } 4286 return -1LL; 4287 } 4288 4289 enum set_mask_type { 4290 ONLY_FIRST = 1, 4291 INCLUDE_FIRST, 4292 BEFORE_FIRST, 4293 }; 4294 4295 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4296 uint32_t desc, enum set_mask_type type) 4297 { 4298 uint32_t vlmax = env_archcpu(env)->cfg.vlen; 4299 uint32_t vm = vext_vm(desc); 4300 uint32_t vl = env->vl; 4301 int i; 4302 bool first_mask_bit = false; 4303 4304 for (i = 0; i < vl; i++) { 4305 if (!vm && !vext_elem_mask(v0, i)) { 4306 continue; 4307 } 4308 /* write a zero to all following active elements */ 4309 if (first_mask_bit) { 4310 vext_set_elem_mask(vd, i, 0); 4311 continue; 4312 } 4313 if (vext_elem_mask(vs2, i)) { 4314 first_mask_bit = true; 4315 if (type == BEFORE_FIRST) { 4316 vext_set_elem_mask(vd, i, 0); 4317 } else { 4318 vext_set_elem_mask(vd, i, 1); 4319 } 4320 } else { 4321 if (type == ONLY_FIRST) { 4322 vext_set_elem_mask(vd, i, 0); 4323 } else { 4324 vext_set_elem_mask(vd, i, 1); 4325 } 4326 } 4327 } 4328 for (; i < vlmax; i++) { 4329 vext_set_elem_mask(vd, i, 0); 4330 } 4331 } 4332 4333 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4334 uint32_t desc) 4335 { 4336 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4337 } 4338 4339 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4340 uint32_t desc) 4341 { 4342 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4343 } 4344 4345 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4346 uint32_t desc) 4347 { 4348 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4349 } 4350 4351 /* Vector Iota Instruction */ 4352 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4353 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4354 uint32_t desc) \ 4355 { \ 4356 uint32_t vm = vext_vm(desc); \ 4357 uint32_t vl = env->vl; \ 4358 uint32_t sum = 0; \ 4359 int i; \ 4360 \ 4361 for (i = 0; i < vl; i++) { \ 4362 if (!vm && !vext_elem_mask(v0, i)) { \ 4363 continue; \ 4364 } \ 4365 *((ETYPE *)vd + H(i)) = sum; \ 4366 if (vext_elem_mask(vs2, i)) { \ 4367 sum++; \ 4368 } \ 4369 } \ 4370 } 4371 4372 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4373 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4374 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4375 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4376 4377 /* Vector Element Index Instruction */ 4378 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4379 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4380 { \ 4381 uint32_t vm = vext_vm(desc); \ 4382 uint32_t vl = env->vl; \ 4383 int i; \ 4384 \ 4385 for (i = 0; i < vl; i++) { \ 4386 if (!vm && !vext_elem_mask(v0, i)) { \ 4387 continue; \ 4388 } \ 4389 *((ETYPE *)vd + H(i)) = i; \ 4390 } \ 4391 } 4392 4393 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4394 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4395 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4396 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4397 4398 /* 4399 *** Vector Permutation Instructions 4400 */ 4401 4402 /* Vector Slide Instructions */ 4403 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4404 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4405 CPURISCVState *env, uint32_t desc) \ 4406 { \ 4407 uint32_t vm = vext_vm(desc); \ 4408 uint32_t vl = env->vl; \ 4409 target_ulong offset = s1, i; \ 4410 \ 4411 for (i = offset; i < vl; i++) { \ 4412 if (!vm && !vext_elem_mask(v0, i)) { \ 4413 continue; \ 4414 } \ 4415 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4416 } \ 4417 } 4418 4419 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4420 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4421 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4422 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4423 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4424 4425 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4426 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4427 CPURISCVState *env, uint32_t desc) \ 4428 { \ 4429 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4430 uint32_t vm = vext_vm(desc); \ 4431 uint32_t vl = env->vl; \ 4432 target_ulong offset = s1, i; \ 4433 \ 4434 for (i = 0; i < vl; ++i) { \ 4435 target_ulong j = i + offset; \ 4436 if (!vm && !vext_elem_mask(v0, i)) { \ 4437 continue; \ 4438 } \ 4439 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4440 } \ 4441 } 4442 4443 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4444 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4445 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4446 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4447 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4448 4449 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4450 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4451 CPURISCVState *env, uint32_t desc) \ 4452 { \ 4453 uint32_t vm = vext_vm(desc); \ 4454 uint32_t vl = env->vl; \ 4455 uint32_t i; \ 4456 \ 4457 for (i = 0; i < vl; i++) { \ 4458 if (!vm && !vext_elem_mask(v0, i)) { \ 4459 continue; \ 4460 } \ 4461 if (i == 0) { \ 4462 *((ETYPE *)vd + H(i)) = s1; \ 4463 } else { \ 4464 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4465 } \ 4466 } \ 4467 } 4468 4469 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4470 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4471 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4472 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4473 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4474 4475 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4476 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4477 CPURISCVState *env, uint32_t desc) \ 4478 { \ 4479 uint32_t vm = vext_vm(desc); \ 4480 uint32_t vl = env->vl; \ 4481 uint32_t i; \ 4482 \ 4483 for (i = 0; i < vl; i++) { \ 4484 if (!vm && !vext_elem_mask(v0, i)) { \ 4485 continue; \ 4486 } \ 4487 if (i == vl - 1) { \ 4488 *((ETYPE *)vd + H(i)) = s1; \ 4489 } else { \ 4490 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4491 } \ 4492 } \ 4493 } 4494 4495 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4496 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4497 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4498 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4499 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4500 4501 /* Vector Register Gather Instruction */ 4502 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H) \ 4503 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4504 CPURISCVState *env, uint32_t desc) \ 4505 { \ 4506 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4507 uint32_t vm = vext_vm(desc); \ 4508 uint32_t vl = env->vl; \ 4509 uint64_t index; \ 4510 uint32_t i; \ 4511 \ 4512 for (i = 0; i < vl; i++) { \ 4513 if (!vm && !vext_elem_mask(v0, i)) { \ 4514 continue; \ 4515 } \ 4516 index = *((ETYPE *)vs1 + H(i)); \ 4517 if (index >= vlmax) { \ 4518 *((ETYPE *)vd + H(i)) = 0; \ 4519 } else { \ 4520 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4521 } \ 4522 } \ 4523 } 4524 4525 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4526 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1) 4527 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2) 4528 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4) 4529 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8) 4530 4531 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4532 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4533 CPURISCVState *env, uint32_t desc) \ 4534 { \ 4535 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4536 uint32_t vm = vext_vm(desc); \ 4537 uint32_t vl = env->vl; \ 4538 uint64_t index = s1; \ 4539 uint32_t i; \ 4540 \ 4541 for (i = 0; i < vl; i++) { \ 4542 if (!vm && !vext_elem_mask(v0, i)) { \ 4543 continue; \ 4544 } \ 4545 if (index >= vlmax) { \ 4546 *((ETYPE *)vd + H(i)) = 0; \ 4547 } else { \ 4548 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4549 } \ 4550 } \ 4551 } 4552 4553 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4554 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4555 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4556 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4557 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4558 4559 /* Vector Compress Instruction */ 4560 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4561 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4562 CPURISCVState *env, uint32_t desc) \ 4563 { \ 4564 uint32_t vl = env->vl; \ 4565 uint32_t num = 0, i; \ 4566 \ 4567 for (i = 0; i < vl; i++) { \ 4568 if (!vext_elem_mask(vs1, i)) { \ 4569 continue; \ 4570 } \ 4571 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4572 num++; \ 4573 } \ 4574 } 4575 4576 /* Compress into vd elements of vs2 where vs1 is enabled */ 4577 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4578 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4579 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4580 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4581