1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "internals.h" 27 #include <math.h> 28 29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 30 target_ulong s2) 31 { 32 int vlmax, vl; 33 RISCVCPU *cpu = env_archcpu(env); 34 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 35 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 36 bool vill = FIELD_EX64(s2, VTYPE, VILL); 37 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 38 39 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 40 /* only set vill bit. */ 41 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 42 env->vl = 0; 43 env->vstart = 0; 44 return 0; 45 } 46 47 vlmax = vext_get_vlmax(cpu, s2); 48 if (s1 <= vlmax) { 49 vl = s1; 50 } else { 51 vl = vlmax; 52 } 53 env->vl = vl; 54 env->vtype = s2; 55 env->vstart = 0; 56 return vl; 57 } 58 59 /* 60 * Note that vector data is stored in host-endian 64-bit chunks, 61 * so addressing units smaller than that needs a host-endian fixup. 62 */ 63 #ifdef HOST_WORDS_BIGENDIAN 64 #define H1(x) ((x) ^ 7) 65 #define H1_2(x) ((x) ^ 6) 66 #define H1_4(x) ((x) ^ 4) 67 #define H2(x) ((x) ^ 3) 68 #define H4(x) ((x) ^ 1) 69 #define H8(x) ((x)) 70 #else 71 #define H1(x) (x) 72 #define H1_2(x) (x) 73 #define H1_4(x) (x) 74 #define H2(x) (x) 75 #define H4(x) (x) 76 #define H8(x) (x) 77 #endif 78 79 static inline uint32_t vext_nf(uint32_t desc) 80 { 81 return FIELD_EX32(simd_data(desc), VDATA, NF); 82 } 83 84 static inline uint32_t vext_vm(uint32_t desc) 85 { 86 return FIELD_EX32(simd_data(desc), VDATA, VM); 87 } 88 89 /* 90 * Encode LMUL to lmul as following: 91 * LMUL vlmul lmul 92 * 1 000 0 93 * 2 001 1 94 * 4 010 2 95 * 8 011 3 96 * - 100 - 97 * 1/8 101 -3 98 * 1/4 110 -2 99 * 1/2 111 -1 100 */ 101 static inline int32_t vext_lmul(uint32_t desc) 102 { 103 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 104 } 105 106 /* 107 * Get vector group length in bytes. Its range is [64, 2048]. 108 * 109 * As simd_desc support at most 256, the max vlen is 512 bits. 110 * So vlen in bytes is encoded as maxsz. 111 */ 112 static inline uint32_t vext_maxsz(uint32_t desc) 113 { 114 return simd_maxsz(desc) << vext_lmul(desc); 115 } 116 117 /* 118 * This function checks watchpoint before real load operation. 119 * 120 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 121 * In user mode, there is no watchpoint support now. 122 * 123 * It will trigger an exception if there is no mapping in TLB 124 * and page table walk can't fill the TLB entry. Then the guest 125 * software can return here after process the exception or never return. 126 */ 127 static void probe_pages(CPURISCVState *env, target_ulong addr, 128 target_ulong len, uintptr_t ra, 129 MMUAccessType access_type) 130 { 131 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 132 target_ulong curlen = MIN(pagelen, len); 133 134 probe_access(env, addr, curlen, access_type, 135 cpu_mmu_index(env, false), ra); 136 if (len > curlen) { 137 addr += curlen; 138 curlen = len - curlen; 139 probe_access(env, addr, curlen, access_type, 140 cpu_mmu_index(env, false), ra); 141 } 142 } 143 144 static inline void vext_set_elem_mask(void *v0, int index, 145 uint8_t value) 146 { 147 int idx = index / 64; 148 int pos = index % 64; 149 uint64_t old = ((uint64_t *)v0)[idx]; 150 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 151 } 152 153 /* 154 * Earlier designs (pre-0.9) had a varying number of bits 155 * per mask value (MLEN). In the 0.9 design, MLEN=1. 156 * (Section 4.5) 157 */ 158 static inline int vext_elem_mask(void *v0, int index) 159 { 160 int idx = index / 64; 161 int pos = index % 64; 162 return (((uint64_t *)v0)[idx] >> pos) & 1; 163 } 164 165 /* elements operations for load and store */ 166 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 167 uint32_t idx, void *vd, uintptr_t retaddr); 168 169 #define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ 170 static void NAME(CPURISCVState *env, abi_ptr addr, \ 171 uint32_t idx, void *vd, uintptr_t retaddr)\ 172 { \ 173 MTYPE data; \ 174 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 175 data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 176 *cur = data; \ 177 } \ 178 179 GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) 180 GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) 181 GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) 182 GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) 183 GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) 184 GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) 185 GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) 186 GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) 187 GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) 188 GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) 189 GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) 190 GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) 191 GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) 192 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) 193 GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) 194 GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) 195 GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) 196 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) 197 GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) 198 GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) 199 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) 200 GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) 201 202 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 203 static void NAME(CPURISCVState *env, abi_ptr addr, \ 204 uint32_t idx, void *vd, uintptr_t retaddr)\ 205 { \ 206 ETYPE data = *((ETYPE *)vd + H(idx)); \ 207 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 208 } 209 210 GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) 211 GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) 212 GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) 213 GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) 214 GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) 215 GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) 216 GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) 217 GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) 218 GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) 219 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 220 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 221 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 222 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 223 224 /* 225 *** stride: access vector element from strided memory 226 */ 227 static void 228 vext_ldst_stride(void *vd, void *v0, target_ulong base, 229 target_ulong stride, CPURISCVState *env, 230 uint32_t desc, uint32_t vm, 231 vext_ldst_elem_fn *ldst_elem, 232 uint32_t esz, uint32_t msz, uintptr_t ra, 233 MMUAccessType access_type) 234 { 235 uint32_t i, k; 236 uint32_t nf = vext_nf(desc); 237 uint32_t vlmax = vext_maxsz(desc) / esz; 238 239 /* probe every access*/ 240 for (i = 0; i < env->vl; i++) { 241 if (!vm && !vext_elem_mask(v0, i)) { 242 continue; 243 } 244 probe_pages(env, base + stride * i, nf * msz, ra, access_type); 245 } 246 /* do real access */ 247 for (i = 0; i < env->vl; i++) { 248 k = 0; 249 if (!vm && !vext_elem_mask(v0, i)) { 250 continue; 251 } 252 while (k < nf) { 253 target_ulong addr = base + stride * i + k * msz; 254 ldst_elem(env, addr, i + k * vlmax, vd, ra); 255 k++; 256 } 257 } 258 } 259 260 #define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN) \ 261 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 262 target_ulong stride, CPURISCVState *env, \ 263 uint32_t desc) \ 264 { \ 265 uint32_t vm = vext_vm(desc); \ 266 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 267 sizeof(ETYPE), sizeof(MTYPE), \ 268 GETPC(), MMU_DATA_LOAD); \ 269 } 270 271 GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b) 272 GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h) 273 GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w) 274 GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d) 275 GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h) 276 GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w) 277 GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d) 278 GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w) 279 GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d) 280 GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b) 281 GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h) 282 GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w) 283 GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d) 284 GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b) 285 GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h) 286 GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w) 287 GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d) 288 GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h) 289 GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w) 290 GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d) 291 GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w) 292 GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d) 293 294 #define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ 295 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 296 target_ulong stride, CPURISCVState *env, \ 297 uint32_t desc) \ 298 { \ 299 uint32_t vm = vext_vm(desc); \ 300 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 301 sizeof(ETYPE), sizeof(MTYPE), \ 302 GETPC(), MMU_DATA_STORE); \ 303 } 304 305 GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) 306 GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) 307 GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) 308 GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) 309 GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) 310 GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) 311 GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) 312 GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) 313 GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) 314 GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) 315 GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) 316 GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) 317 GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) 318 319 /* 320 *** unit-stride: access elements stored contiguously in memory 321 */ 322 323 /* unmasked unit-stride load and store operation*/ 324 static void 325 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 326 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t msz, 327 uintptr_t ra, MMUAccessType access_type) 328 { 329 uint32_t i, k; 330 uint32_t nf = vext_nf(desc); 331 uint32_t vlmax = vext_maxsz(desc) / esz; 332 333 /* probe every access */ 334 probe_pages(env, base, env->vl * nf * msz, ra, access_type); 335 /* load bytes from guest memory */ 336 for (i = 0; i < env->vl; i++) { 337 k = 0; 338 while (k < nf) { 339 target_ulong addr = base + (i * nf + k) * msz; 340 ldst_elem(env, addr, i + k * vlmax, vd, ra); 341 k++; 342 } 343 } 344 } 345 346 /* 347 * masked unit-stride load and store operation will be a special case of stride, 348 * stride = NF * sizeof (MTYPE) 349 */ 350 351 #define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN) \ 352 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 353 CPURISCVState *env, uint32_t desc) \ 354 { \ 355 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 356 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 357 sizeof(ETYPE), sizeof(MTYPE), \ 358 GETPC(), MMU_DATA_LOAD); \ 359 } \ 360 \ 361 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 362 CPURISCVState *env, uint32_t desc) \ 363 { \ 364 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 365 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ 366 } 367 368 GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b) 369 GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h) 370 GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w) 371 GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d) 372 GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h) 373 GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w) 374 GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d) 375 GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w) 376 GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d) 377 GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b) 378 GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h) 379 GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w) 380 GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d) 381 GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b) 382 GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h) 383 GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w) 384 GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d) 385 GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h) 386 GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w) 387 GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d) 388 GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w) 389 GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d) 390 391 #define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ 392 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 393 CPURISCVState *env, uint32_t desc) \ 394 { \ 395 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 396 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 397 sizeof(ETYPE), sizeof(MTYPE), \ 398 GETPC(), MMU_DATA_STORE); \ 399 } \ 400 \ 401 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 402 CPURISCVState *env, uint32_t desc) \ 403 { \ 404 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 405 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ 406 } 407 408 GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) 409 GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) 410 GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) 411 GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) 412 GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) 413 GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) 414 GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) 415 GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) 416 GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) 417 GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) 418 GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) 419 GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) 420 GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) 421 422 /* 423 *** index: access vector element from indexed memory 424 */ 425 typedef target_ulong vext_get_index_addr(target_ulong base, 426 uint32_t idx, void *vs2); 427 428 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 429 static target_ulong NAME(target_ulong base, \ 430 uint32_t idx, void *vs2) \ 431 { \ 432 return (base + *((ETYPE *)vs2 + H(idx))); \ 433 } 434 435 GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) 436 GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) 437 GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) 438 GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) 439 440 static inline void 441 vext_ldst_index(void *vd, void *v0, target_ulong base, 442 void *vs2, CPURISCVState *env, uint32_t desc, 443 vext_get_index_addr get_index_addr, 444 vext_ldst_elem_fn *ldst_elem, 445 uint32_t esz, uint32_t msz, uintptr_t ra, 446 MMUAccessType access_type) 447 { 448 uint32_t i, k; 449 uint32_t nf = vext_nf(desc); 450 uint32_t vm = vext_vm(desc); 451 uint32_t vlmax = vext_maxsz(desc) / esz; 452 453 /* probe every access*/ 454 for (i = 0; i < env->vl; i++) { 455 if (!vm && !vext_elem_mask(v0, i)) { 456 continue; 457 } 458 probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, 459 access_type); 460 } 461 /* load bytes from guest memory */ 462 for (i = 0; i < env->vl; i++) { 463 k = 0; 464 if (!vm && !vext_elem_mask(v0, i)) { 465 continue; 466 } 467 while (k < nf) { 468 abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; 469 ldst_elem(env, addr, i + k * vlmax, vd, ra); 470 k++; 471 } 472 } 473 } 474 475 #define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN) \ 476 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 477 void *vs2, CPURISCVState *env, uint32_t desc) \ 478 { \ 479 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 480 LOAD_FN, sizeof(ETYPE), sizeof(MTYPE), \ 481 GETPC(), MMU_DATA_LOAD); \ 482 } 483 484 GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b) 485 GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h) 486 GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w) 487 GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d) 488 GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h) 489 GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w) 490 GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d) 491 GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w) 492 GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d) 493 GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b) 494 GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h) 495 GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w) 496 GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d) 497 GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b) 498 GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h) 499 GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w) 500 GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d) 501 GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h) 502 GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w) 503 GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d) 504 GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w) 505 GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d) 506 507 #define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ 508 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 509 void *vs2, CPURISCVState *env, uint32_t desc) \ 510 { \ 511 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 512 STORE_FN, sizeof(ETYPE), sizeof(MTYPE), \ 513 GETPC(), MMU_DATA_STORE); \ 514 } 515 516 GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) 517 GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) 518 GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) 519 GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) 520 GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) 521 GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) 522 GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) 523 GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) 524 GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) 525 GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) 526 GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) 527 GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) 528 GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) 529 530 /* 531 *** unit-stride fault-only-fisrt load instructions 532 */ 533 static inline void 534 vext_ldff(void *vd, void *v0, target_ulong base, 535 CPURISCVState *env, uint32_t desc, 536 vext_ldst_elem_fn *ldst_elem, 537 uint32_t esz, uint32_t msz, uintptr_t ra) 538 { 539 void *host; 540 uint32_t i, k, vl = 0; 541 uint32_t nf = vext_nf(desc); 542 uint32_t vm = vext_vm(desc); 543 uint32_t vlmax = vext_maxsz(desc) / esz; 544 target_ulong addr, offset, remain; 545 546 /* probe every access*/ 547 for (i = 0; i < env->vl; i++) { 548 if (!vm && !vext_elem_mask(v0, i)) { 549 continue; 550 } 551 addr = base + nf * i * msz; 552 if (i == 0) { 553 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 554 } else { 555 /* if it triggers an exception, no need to check watchpoint */ 556 remain = nf * msz; 557 while (remain > 0) { 558 offset = -(addr | TARGET_PAGE_MASK); 559 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 560 cpu_mmu_index(env, false)); 561 if (host) { 562 #ifdef CONFIG_USER_ONLY 563 if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { 564 vl = i; 565 goto ProbeSuccess; 566 } 567 #else 568 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 569 #endif 570 } else { 571 vl = i; 572 goto ProbeSuccess; 573 } 574 if (remain <= offset) { 575 break; 576 } 577 remain -= offset; 578 addr += offset; 579 } 580 } 581 } 582 ProbeSuccess: 583 /* load bytes from guest memory */ 584 if (vl != 0) { 585 env->vl = vl; 586 } 587 for (i = 0; i < env->vl; i++) { 588 k = 0; 589 if (!vm && !vext_elem_mask(v0, i)) { 590 continue; 591 } 592 while (k < nf) { 593 target_ulong addr = base + (i * nf + k) * msz; 594 ldst_elem(env, addr, i + k * vlmax, vd, ra); 595 k++; 596 } 597 } 598 } 599 600 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN) \ 601 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 602 CPURISCVState *env, uint32_t desc) \ 603 { \ 604 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 605 sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ 606 } 607 608 GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b) 609 GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h) 610 GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w) 611 GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d) 612 GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h) 613 GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w) 614 GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d) 615 GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w) 616 GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d) 617 GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b) 618 GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h) 619 GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w) 620 GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d) 621 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b) 622 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h) 623 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w) 624 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d) 625 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h) 626 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w) 627 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d) 628 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w) 629 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d) 630 631 #define DO_SWAP(N, M) (M) 632 #define DO_AND(N, M) (N & M) 633 #define DO_XOR(N, M) (N ^ M) 634 #define DO_OR(N, M) (N | M) 635 #define DO_ADD(N, M) (N + M) 636 637 /* Signed min/max */ 638 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 639 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 640 641 /* Unsigned min/max */ 642 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 643 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 644 645 /* 646 *** Vector Integer Arithmetic Instructions 647 */ 648 649 /* expand macro args before macro */ 650 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 651 652 /* (TD, T1, T2, TX1, TX2) */ 653 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 654 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 655 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 656 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 657 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 658 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 659 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 660 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 661 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 662 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 663 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 664 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 665 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 666 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 667 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 668 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 669 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 670 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 671 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 672 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 673 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 674 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 675 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 676 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 677 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 678 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 679 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 680 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 681 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 682 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 683 684 /* operation of two vector elements */ 685 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 686 687 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 688 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 689 { \ 690 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 691 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 692 *((TD *)vd + HD(i)) = OP(s2, s1); \ 693 } 694 #define DO_SUB(N, M) (N - M) 695 #define DO_RSUB(N, M) (M - N) 696 697 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 698 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 699 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 700 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 701 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 702 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 703 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 704 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 705 706 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 707 CPURISCVState *env, uint32_t desc, 708 uint32_t esz, uint32_t dsz, 709 opivv2_fn *fn) 710 { 711 uint32_t vm = vext_vm(desc); 712 uint32_t vl = env->vl; 713 uint32_t i; 714 715 for (i = 0; i < vl; i++) { 716 if (!vm && !vext_elem_mask(v0, i)) { 717 continue; 718 } 719 fn(vd, vs1, vs2, i); 720 } 721 } 722 723 /* generate the helpers for OPIVV */ 724 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 725 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 726 void *vs2, CPURISCVState *env, \ 727 uint32_t desc) \ 728 { \ 729 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 730 do_##NAME); \ 731 } 732 733 GEN_VEXT_VV(vadd_vv_b, 1, 1) 734 GEN_VEXT_VV(vadd_vv_h, 2, 2) 735 GEN_VEXT_VV(vadd_vv_w, 4, 4) 736 GEN_VEXT_VV(vadd_vv_d, 8, 8) 737 GEN_VEXT_VV(vsub_vv_b, 1, 1) 738 GEN_VEXT_VV(vsub_vv_h, 2, 2) 739 GEN_VEXT_VV(vsub_vv_w, 4, 4) 740 GEN_VEXT_VV(vsub_vv_d, 8, 8) 741 742 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 743 744 /* 745 * (T1)s1 gives the real operator type. 746 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 747 */ 748 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 749 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 750 { \ 751 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 752 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 753 } 754 755 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 756 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 757 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 758 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 759 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 760 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 761 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 762 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 763 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 764 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 765 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 766 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 767 768 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 769 CPURISCVState *env, uint32_t desc, 770 uint32_t esz, uint32_t dsz, 771 opivx2_fn fn) 772 { 773 uint32_t vm = vext_vm(desc); 774 uint32_t vl = env->vl; 775 uint32_t i; 776 777 for (i = 0; i < vl; i++) { 778 if (!vm && !vext_elem_mask(v0, i)) { 779 continue; 780 } 781 fn(vd, s1, vs2, i); 782 } 783 } 784 785 /* generate the helpers for OPIVX */ 786 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 787 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 788 void *vs2, CPURISCVState *env, \ 789 uint32_t desc) \ 790 { \ 791 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 792 do_##NAME); \ 793 } 794 795 GEN_VEXT_VX(vadd_vx_b, 1, 1) 796 GEN_VEXT_VX(vadd_vx_h, 2, 2) 797 GEN_VEXT_VX(vadd_vx_w, 4, 4) 798 GEN_VEXT_VX(vadd_vx_d, 8, 8) 799 GEN_VEXT_VX(vsub_vx_b, 1, 1) 800 GEN_VEXT_VX(vsub_vx_h, 2, 2) 801 GEN_VEXT_VX(vsub_vx_w, 4, 4) 802 GEN_VEXT_VX(vsub_vx_d, 8, 8) 803 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 804 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 805 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 806 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 807 808 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 809 { 810 intptr_t oprsz = simd_oprsz(desc); 811 intptr_t i; 812 813 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 814 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 815 } 816 } 817 818 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 819 { 820 intptr_t oprsz = simd_oprsz(desc); 821 intptr_t i; 822 823 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 824 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 825 } 826 } 827 828 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 829 { 830 intptr_t oprsz = simd_oprsz(desc); 831 intptr_t i; 832 833 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 834 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 835 } 836 } 837 838 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 839 { 840 intptr_t oprsz = simd_oprsz(desc); 841 intptr_t i; 842 843 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 844 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 845 } 846 } 847 848 /* Vector Widening Integer Add/Subtract */ 849 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 850 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 851 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 852 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 853 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 854 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 855 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 856 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 857 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 858 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 859 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 860 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 861 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 862 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 863 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 864 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 865 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 866 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 867 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 868 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 869 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 870 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 871 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 872 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 873 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 874 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 875 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 876 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 877 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 878 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 879 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 880 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 881 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 882 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 883 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 884 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 885 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 886 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 887 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 888 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 889 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 890 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 891 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 892 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 893 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 894 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 895 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 896 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 897 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 898 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 899 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 900 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 901 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 902 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 903 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 904 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 905 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 906 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 907 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 908 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 909 910 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 911 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 912 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 913 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 914 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 915 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 916 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 917 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 918 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 919 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 920 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 921 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 922 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 923 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 924 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 925 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 926 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 927 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 928 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 929 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 930 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 931 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 932 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 933 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 934 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 935 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 936 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 937 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 938 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 939 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 940 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 941 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 942 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 943 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 944 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 945 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 946 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 947 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 948 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 949 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 950 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 951 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 952 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 953 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 954 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 955 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 956 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 957 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 958 959 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 960 #define DO_VADC(N, M, C) (N + M + C) 961 #define DO_VSBC(N, M, C) (N - M - C) 962 963 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 964 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 965 CPURISCVState *env, uint32_t desc) \ 966 { \ 967 uint32_t vl = env->vl; \ 968 uint32_t i; \ 969 \ 970 for (i = 0; i < vl; i++) { \ 971 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 972 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 973 uint8_t carry = vext_elem_mask(v0, i); \ 974 \ 975 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 976 } \ 977 } 978 979 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 980 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 981 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 982 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 983 984 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 985 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 986 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 987 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 988 989 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 990 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 991 CPURISCVState *env, uint32_t desc) \ 992 { \ 993 uint32_t vl = env->vl; \ 994 uint32_t i; \ 995 \ 996 for (i = 0; i < vl; i++) { \ 997 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 998 uint8_t carry = vext_elem_mask(v0, i); \ 999 \ 1000 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1001 } \ 1002 } 1003 1004 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1005 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1006 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1007 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1008 1009 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1010 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1011 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1012 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1013 1014 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1015 (__typeof(N))(N + M) < N) 1016 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1017 1018 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1019 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1020 CPURISCVState *env, uint32_t desc) \ 1021 { \ 1022 uint32_t vl = env->vl; \ 1023 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1024 uint32_t i; \ 1025 \ 1026 for (i = 0; i < vl; i++) { \ 1027 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1028 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1029 uint8_t carry = vext_elem_mask(v0, i); \ 1030 \ 1031 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1032 } \ 1033 for (; i < vlmax; i++) { \ 1034 vext_set_elem_mask(vd, i, 0); \ 1035 } \ 1036 } 1037 1038 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1039 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1040 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1041 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1042 1043 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1044 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1045 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1046 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1047 1048 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1049 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1050 void *vs2, CPURISCVState *env, uint32_t desc) \ 1051 { \ 1052 uint32_t vl = env->vl; \ 1053 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1054 uint32_t i; \ 1055 \ 1056 for (i = 0; i < vl; i++) { \ 1057 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1058 uint8_t carry = vext_elem_mask(v0, i); \ 1059 \ 1060 vext_set_elem_mask(vd, i, \ 1061 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1062 } \ 1063 for (; i < vlmax; i++) { \ 1064 vext_set_elem_mask(vd, i, 0); \ 1065 } \ 1066 } 1067 1068 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1069 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1070 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1071 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1072 1073 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1074 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1075 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1076 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1077 1078 /* Vector Bitwise Logical Instructions */ 1079 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1080 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1081 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1082 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1083 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1084 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1085 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1086 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1087 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1088 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1089 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1090 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1091 GEN_VEXT_VV(vand_vv_b, 1, 1) 1092 GEN_VEXT_VV(vand_vv_h, 2, 2) 1093 GEN_VEXT_VV(vand_vv_w, 4, 4) 1094 GEN_VEXT_VV(vand_vv_d, 8, 8) 1095 GEN_VEXT_VV(vor_vv_b, 1, 1) 1096 GEN_VEXT_VV(vor_vv_h, 2, 2) 1097 GEN_VEXT_VV(vor_vv_w, 4, 4) 1098 GEN_VEXT_VV(vor_vv_d, 8, 8) 1099 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1100 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1101 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1102 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1103 1104 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1105 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1106 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1107 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1108 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1109 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1110 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1111 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1112 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1113 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1114 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1115 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1116 GEN_VEXT_VX(vand_vx_b, 1, 1) 1117 GEN_VEXT_VX(vand_vx_h, 2, 2) 1118 GEN_VEXT_VX(vand_vx_w, 4, 4) 1119 GEN_VEXT_VX(vand_vx_d, 8, 8) 1120 GEN_VEXT_VX(vor_vx_b, 1, 1) 1121 GEN_VEXT_VX(vor_vx_h, 2, 2) 1122 GEN_VEXT_VX(vor_vx_w, 4, 4) 1123 GEN_VEXT_VX(vor_vx_d, 8, 8) 1124 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1125 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1126 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1127 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1128 1129 /* Vector Single-Width Bit Shift Instructions */ 1130 #define DO_SLL(N, M) (N << (M)) 1131 #define DO_SRL(N, M) (N >> (M)) 1132 1133 /* generate the helpers for shift instructions with two vector operators */ 1134 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1135 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1136 void *vs2, CPURISCVState *env, uint32_t desc) \ 1137 { \ 1138 uint32_t vm = vext_vm(desc); \ 1139 uint32_t vl = env->vl; \ 1140 uint32_t i; \ 1141 \ 1142 for (i = 0; i < vl; i++) { \ 1143 if (!vm && !vext_elem_mask(v0, i)) { \ 1144 continue; \ 1145 } \ 1146 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1147 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1148 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1149 } \ 1150 } 1151 1152 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1153 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1154 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1155 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1156 1157 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1158 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1159 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1160 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1161 1162 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1163 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1164 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1165 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1166 1167 /* generate the helpers for shift instructions with one vector and one scalar */ 1168 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1169 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1170 void *vs2, CPURISCVState *env, uint32_t desc) \ 1171 { \ 1172 uint32_t vm = vext_vm(desc); \ 1173 uint32_t vl = env->vl; \ 1174 uint32_t i; \ 1175 \ 1176 for (i = 0; i < vl; i++) { \ 1177 if (!vm && !vext_elem_mask(v0, i)) { \ 1178 continue; \ 1179 } \ 1180 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1181 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1182 } \ 1183 } 1184 1185 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1186 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1187 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1188 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1189 1190 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1191 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1192 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1193 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1194 1195 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1196 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1197 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1198 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1199 1200 /* Vector Narrowing Integer Right Shift Instructions */ 1201 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1202 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1203 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1204 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1205 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1206 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1207 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1208 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1209 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1210 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1211 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1212 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1213 1214 /* Vector Integer Comparison Instructions */ 1215 #define DO_MSEQ(N, M) (N == M) 1216 #define DO_MSNE(N, M) (N != M) 1217 #define DO_MSLT(N, M) (N < M) 1218 #define DO_MSLE(N, M) (N <= M) 1219 #define DO_MSGT(N, M) (N > M) 1220 1221 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1222 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1223 CPURISCVState *env, uint32_t desc) \ 1224 { \ 1225 uint32_t vm = vext_vm(desc); \ 1226 uint32_t vl = env->vl; \ 1227 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1228 uint32_t i; \ 1229 \ 1230 for (i = 0; i < vl; i++) { \ 1231 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1232 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1233 if (!vm && !vext_elem_mask(v0, i)) { \ 1234 continue; \ 1235 } \ 1236 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1237 } \ 1238 for (; i < vlmax; i++) { \ 1239 vext_set_elem_mask(vd, i, 0); \ 1240 } \ 1241 } 1242 1243 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1244 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1245 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1246 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1247 1248 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1249 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1250 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1251 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1252 1253 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1254 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1255 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1256 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1257 1258 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1259 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1260 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1261 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1262 1263 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1264 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1265 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1266 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1267 1268 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1269 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1270 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1271 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1272 1273 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1274 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1275 CPURISCVState *env, uint32_t desc) \ 1276 { \ 1277 uint32_t vm = vext_vm(desc); \ 1278 uint32_t vl = env->vl; \ 1279 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1280 uint32_t i; \ 1281 \ 1282 for (i = 0; i < vl; i++) { \ 1283 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1284 if (!vm && !vext_elem_mask(v0, i)) { \ 1285 continue; \ 1286 } \ 1287 vext_set_elem_mask(vd, i, \ 1288 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1289 } \ 1290 for (; i < vlmax; i++) { \ 1291 vext_set_elem_mask(vd, i, 0); \ 1292 } \ 1293 } 1294 1295 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1296 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1297 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1298 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1299 1300 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1301 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1302 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1303 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1304 1305 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1306 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1307 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1308 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1309 1310 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1311 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1312 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1313 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1314 1315 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1316 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1317 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1318 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1319 1320 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1321 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1322 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1323 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1324 1325 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1326 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1327 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1328 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1329 1330 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1331 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1332 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1333 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1334 1335 /* Vector Integer Min/Max Instructions */ 1336 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1337 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1338 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1339 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1340 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1341 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1342 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1343 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1344 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1345 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1346 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1347 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1348 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1349 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1350 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1351 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1352 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1353 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1354 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1355 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1356 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1357 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1358 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1359 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1360 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1361 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1362 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1363 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1364 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1365 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1366 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1367 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1368 1369 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1370 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1371 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1372 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1373 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1374 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1375 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1376 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1377 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1378 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1379 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1380 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1381 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1382 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1383 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1384 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1385 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1386 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1387 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1388 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1389 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1390 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1391 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1392 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1393 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1394 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1395 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1396 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1397 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1398 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1399 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1400 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1401 1402 /* Vector Single-Width Integer Multiply Instructions */ 1403 #define DO_MUL(N, M) (N * M) 1404 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1405 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1406 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1407 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1408 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1409 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1410 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1411 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1412 1413 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1414 { 1415 return (int16_t)s2 * (int16_t)s1 >> 8; 1416 } 1417 1418 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1419 { 1420 return (int32_t)s2 * (int32_t)s1 >> 16; 1421 } 1422 1423 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1424 { 1425 return (int64_t)s2 * (int64_t)s1 >> 32; 1426 } 1427 1428 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1429 { 1430 uint64_t hi_64, lo_64; 1431 1432 muls64(&lo_64, &hi_64, s1, s2); 1433 return hi_64; 1434 } 1435 1436 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1437 { 1438 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1439 } 1440 1441 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1442 { 1443 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1444 } 1445 1446 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1447 { 1448 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1449 } 1450 1451 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1452 { 1453 uint64_t hi_64, lo_64; 1454 1455 mulu64(&lo_64, &hi_64, s2, s1); 1456 return hi_64; 1457 } 1458 1459 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1460 { 1461 return (int16_t)s2 * (uint16_t)s1 >> 8; 1462 } 1463 1464 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1465 { 1466 return (int32_t)s2 * (uint32_t)s1 >> 16; 1467 } 1468 1469 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1470 { 1471 return (int64_t)s2 * (uint64_t)s1 >> 32; 1472 } 1473 1474 /* 1475 * Let A = signed operand, 1476 * B = unsigned operand 1477 * P = mulu64(A, B), unsigned product 1478 * 1479 * LET X = 2 ** 64 - A, 2's complement of A 1480 * SP = signed product 1481 * THEN 1482 * IF A < 0 1483 * SP = -X * B 1484 * = -(2 ** 64 - A) * B 1485 * = A * B - 2 ** 64 * B 1486 * = P - 2 ** 64 * B 1487 * ELSE 1488 * SP = P 1489 * THEN 1490 * HI_P -= (A < 0 ? B : 0) 1491 */ 1492 1493 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1494 { 1495 uint64_t hi_64, lo_64; 1496 1497 mulu64(&lo_64, &hi_64, s2, s1); 1498 1499 hi_64 -= s2 < 0 ? s1 : 0; 1500 return hi_64; 1501 } 1502 1503 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1504 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1505 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1506 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1507 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1508 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1509 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1510 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1511 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1512 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1513 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1514 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1515 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1516 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1517 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1518 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1519 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1520 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1521 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1522 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1523 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1524 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1525 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1526 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1527 1528 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1529 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1530 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1531 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1532 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1533 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1534 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1535 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1536 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1537 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1538 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1539 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1540 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1541 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1542 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1543 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1544 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1545 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1546 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1547 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1548 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1549 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1550 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1551 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1552 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1553 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1554 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1555 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1556 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1557 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1558 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1559 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1560 1561 /* Vector Integer Divide Instructions */ 1562 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1563 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1564 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1565 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1566 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1567 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1568 1569 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1570 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1571 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1572 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1573 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1574 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1575 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1576 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1577 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1578 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1579 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1580 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1581 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1582 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1583 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1584 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1585 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1586 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1587 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1588 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1589 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1590 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1591 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1592 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1593 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1594 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1595 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1596 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1597 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1598 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1599 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1600 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1601 1602 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1603 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1604 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1605 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1606 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1607 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1608 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1609 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1610 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1611 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1612 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1613 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1614 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1615 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1616 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1617 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1618 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1619 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1620 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1621 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1622 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1623 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1624 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1625 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1626 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1627 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1628 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1629 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1630 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1631 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1632 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1633 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1634 1635 /* Vector Widening Integer Multiply Instructions */ 1636 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1637 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1638 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1639 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1640 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1641 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1642 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1643 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1644 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1645 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1646 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1647 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1648 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1649 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1650 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1651 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1652 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1653 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1654 1655 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1656 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1657 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1658 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1659 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1660 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1661 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1662 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1663 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1664 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1665 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1666 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1667 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1668 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1669 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1670 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1671 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1672 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1673 1674 /* Vector Single-Width Integer Multiply-Add Instructions */ 1675 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1676 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1677 { \ 1678 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1679 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1680 TD d = *((TD *)vd + HD(i)); \ 1681 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1682 } 1683 1684 #define DO_MACC(N, M, D) (M * N + D) 1685 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1686 #define DO_MADD(N, M, D) (M * D + N) 1687 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1688 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1689 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1690 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1691 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1692 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1693 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1694 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1695 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1696 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1697 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1698 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1699 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1700 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1701 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1702 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1703 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1704 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1705 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1706 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1707 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1708 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1709 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1710 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1711 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1712 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1713 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1714 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1715 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1716 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1717 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1718 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1719 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1720 1721 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1722 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1723 { \ 1724 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1725 TD d = *((TD *)vd + HD(i)); \ 1726 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1727 } 1728 1729 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1730 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1731 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1732 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1733 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1734 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1735 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1736 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1737 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1738 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1739 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1740 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1741 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1742 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1743 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1744 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1745 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1746 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1747 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1748 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1749 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1750 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1751 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1752 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1753 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1754 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1755 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1756 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1757 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1758 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1759 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1760 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1761 1762 /* Vector Widening Integer Multiply-Add Instructions */ 1763 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1764 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1765 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1766 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1767 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1768 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1769 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1770 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1771 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1772 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1773 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1774 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1775 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1776 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1777 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1778 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1779 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1780 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1781 1782 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1783 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1784 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1785 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1786 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1787 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1788 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1789 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1790 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1791 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1792 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1793 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1794 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1795 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1796 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1797 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1798 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1799 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1800 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1801 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1802 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1803 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1804 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1805 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1806 1807 /* Vector Integer Merge and Move Instructions */ 1808 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1809 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1810 uint32_t desc) \ 1811 { \ 1812 uint32_t vl = env->vl; \ 1813 uint32_t i; \ 1814 \ 1815 for (i = 0; i < vl; i++) { \ 1816 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1817 *((ETYPE *)vd + H(i)) = s1; \ 1818 } \ 1819 } 1820 1821 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1822 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1823 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1824 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1825 1826 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1827 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1828 uint32_t desc) \ 1829 { \ 1830 uint32_t vl = env->vl; \ 1831 uint32_t i; \ 1832 \ 1833 for (i = 0; i < vl; i++) { \ 1834 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1835 } \ 1836 } 1837 1838 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1839 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1840 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1841 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1842 1843 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1844 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1845 CPURISCVState *env, uint32_t desc) \ 1846 { \ 1847 uint32_t vl = env->vl; \ 1848 uint32_t i; \ 1849 \ 1850 for (i = 0; i < vl; i++) { \ 1851 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1852 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1853 } \ 1854 } 1855 1856 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1857 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1858 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1859 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1860 1861 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1862 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1863 void *vs2, CPURISCVState *env, uint32_t desc) \ 1864 { \ 1865 uint32_t vl = env->vl; \ 1866 uint32_t i; \ 1867 \ 1868 for (i = 0; i < vl; i++) { \ 1869 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1870 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1871 (ETYPE)(target_long)s1); \ 1872 *((ETYPE *)vd + H(i)) = d; \ 1873 } \ 1874 } 1875 1876 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 1877 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 1878 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 1879 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 1880 1881 /* 1882 *** Vector Fixed-Point Arithmetic Instructions 1883 */ 1884 1885 /* Vector Single-Width Saturating Add and Subtract */ 1886 1887 /* 1888 * As fixed point instructions probably have round mode and saturation, 1889 * define common macros for fixed point here. 1890 */ 1891 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 1892 CPURISCVState *env, int vxrm); 1893 1894 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1895 static inline void \ 1896 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 1897 CPURISCVState *env, int vxrm) \ 1898 { \ 1899 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1900 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1901 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 1902 } 1903 1904 static inline void 1905 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 1906 CPURISCVState *env, 1907 uint32_t vl, uint32_t vm, int vxrm, 1908 opivv2_rm_fn *fn) 1909 { 1910 for (uint32_t i = 0; i < vl; i++) { 1911 if (!vm && !vext_elem_mask(v0, i)) { 1912 continue; 1913 } 1914 fn(vd, vs1, vs2, i, env, vxrm); 1915 } 1916 } 1917 1918 static inline void 1919 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 1920 CPURISCVState *env, 1921 uint32_t desc, uint32_t esz, uint32_t dsz, 1922 opivv2_rm_fn *fn) 1923 { 1924 uint32_t vm = vext_vm(desc); 1925 uint32_t vl = env->vl; 1926 1927 switch (env->vxrm) { 1928 case 0: /* rnu */ 1929 vext_vv_rm_1(vd, v0, vs1, vs2, 1930 env, vl, vm, 0, fn); 1931 break; 1932 case 1: /* rne */ 1933 vext_vv_rm_1(vd, v0, vs1, vs2, 1934 env, vl, vm, 1, fn); 1935 break; 1936 case 2: /* rdn */ 1937 vext_vv_rm_1(vd, v0, vs1, vs2, 1938 env, vl, vm, 2, fn); 1939 break; 1940 default: /* rod */ 1941 vext_vv_rm_1(vd, v0, vs1, vs2, 1942 env, vl, vm, 3, fn); 1943 break; 1944 } 1945 } 1946 1947 /* generate helpers for fixed point instructions with OPIVV format */ 1948 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 1949 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1950 CPURISCVState *env, uint32_t desc) \ 1951 { \ 1952 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 1953 do_##NAME); \ 1954 } 1955 1956 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 1957 { 1958 uint8_t res = a + b; 1959 if (res < a) { 1960 res = UINT8_MAX; 1961 env->vxsat = 0x1; 1962 } 1963 return res; 1964 } 1965 1966 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 1967 uint16_t b) 1968 { 1969 uint16_t res = a + b; 1970 if (res < a) { 1971 res = UINT16_MAX; 1972 env->vxsat = 0x1; 1973 } 1974 return res; 1975 } 1976 1977 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 1978 uint32_t b) 1979 { 1980 uint32_t res = a + b; 1981 if (res < a) { 1982 res = UINT32_MAX; 1983 env->vxsat = 0x1; 1984 } 1985 return res; 1986 } 1987 1988 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 1989 uint64_t b) 1990 { 1991 uint64_t res = a + b; 1992 if (res < a) { 1993 res = UINT64_MAX; 1994 env->vxsat = 0x1; 1995 } 1996 return res; 1997 } 1998 1999 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2000 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2001 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2002 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2003 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 2004 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 2005 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2006 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2007 2008 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2009 CPURISCVState *env, int vxrm); 2010 2011 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2012 static inline void \ 2013 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2014 CPURISCVState *env, int vxrm) \ 2015 { \ 2016 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2017 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2018 } 2019 2020 static inline void 2021 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2022 CPURISCVState *env, 2023 uint32_t vl, uint32_t vm, int vxrm, 2024 opivx2_rm_fn *fn) 2025 { 2026 for (uint32_t i = 0; i < vl; i++) { 2027 if (!vm && !vext_elem_mask(v0, i)) { 2028 continue; 2029 } 2030 fn(vd, s1, vs2, i, env, vxrm); 2031 } 2032 } 2033 2034 static inline void 2035 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2036 CPURISCVState *env, 2037 uint32_t desc, uint32_t esz, uint32_t dsz, 2038 opivx2_rm_fn *fn) 2039 { 2040 uint32_t vm = vext_vm(desc); 2041 uint32_t vl = env->vl; 2042 2043 switch (env->vxrm) { 2044 case 0: /* rnu */ 2045 vext_vx_rm_1(vd, v0, s1, vs2, 2046 env, vl, vm, 0, fn); 2047 break; 2048 case 1: /* rne */ 2049 vext_vx_rm_1(vd, v0, s1, vs2, 2050 env, vl, vm, 1, fn); 2051 break; 2052 case 2: /* rdn */ 2053 vext_vx_rm_1(vd, v0, s1, vs2, 2054 env, vl, vm, 2, fn); 2055 break; 2056 default: /* rod */ 2057 vext_vx_rm_1(vd, v0, s1, vs2, 2058 env, vl, vm, 3, fn); 2059 break; 2060 } 2061 } 2062 2063 /* generate helpers for fixed point instructions with OPIVX format */ 2064 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2065 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2066 void *vs2, CPURISCVState *env, uint32_t desc) \ 2067 { \ 2068 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2069 do_##NAME); \ 2070 } 2071 2072 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2073 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2074 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2075 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2076 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2077 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2078 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2079 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2080 2081 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2082 { 2083 int8_t res = a + b; 2084 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2085 res = a > 0 ? INT8_MAX : INT8_MIN; 2086 env->vxsat = 0x1; 2087 } 2088 return res; 2089 } 2090 2091 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2092 { 2093 int16_t res = a + b; 2094 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2095 res = a > 0 ? INT16_MAX : INT16_MIN; 2096 env->vxsat = 0x1; 2097 } 2098 return res; 2099 } 2100 2101 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2102 { 2103 int32_t res = a + b; 2104 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2105 res = a > 0 ? INT32_MAX : INT32_MIN; 2106 env->vxsat = 0x1; 2107 } 2108 return res; 2109 } 2110 2111 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2112 { 2113 int64_t res = a + b; 2114 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2115 res = a > 0 ? INT64_MAX : INT64_MIN; 2116 env->vxsat = 0x1; 2117 } 2118 return res; 2119 } 2120 2121 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2122 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2123 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2124 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2125 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2126 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2127 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2128 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2129 2130 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2131 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2132 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2133 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2134 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2135 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2136 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2137 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2138 2139 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2140 { 2141 uint8_t res = a - b; 2142 if (res > a) { 2143 res = 0; 2144 env->vxsat = 0x1; 2145 } 2146 return res; 2147 } 2148 2149 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2150 uint16_t b) 2151 { 2152 uint16_t res = a - b; 2153 if (res > a) { 2154 res = 0; 2155 env->vxsat = 0x1; 2156 } 2157 return res; 2158 } 2159 2160 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2161 uint32_t b) 2162 { 2163 uint32_t res = a - b; 2164 if (res > a) { 2165 res = 0; 2166 env->vxsat = 0x1; 2167 } 2168 return res; 2169 } 2170 2171 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2172 uint64_t b) 2173 { 2174 uint64_t res = a - b; 2175 if (res > a) { 2176 res = 0; 2177 env->vxsat = 0x1; 2178 } 2179 return res; 2180 } 2181 2182 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2183 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2184 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2185 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2186 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2187 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2188 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2189 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2190 2191 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2192 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2193 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2194 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2195 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2196 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2197 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2198 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2199 2200 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2201 { 2202 int8_t res = a - b; 2203 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2204 res = a >= 0 ? INT8_MAX : INT8_MIN; 2205 env->vxsat = 0x1; 2206 } 2207 return res; 2208 } 2209 2210 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2211 { 2212 int16_t res = a - b; 2213 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2214 res = a >= 0 ? INT16_MAX : INT16_MIN; 2215 env->vxsat = 0x1; 2216 } 2217 return res; 2218 } 2219 2220 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2221 { 2222 int32_t res = a - b; 2223 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2224 res = a >= 0 ? INT32_MAX : INT32_MIN; 2225 env->vxsat = 0x1; 2226 } 2227 return res; 2228 } 2229 2230 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2231 { 2232 int64_t res = a - b; 2233 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2234 res = a >= 0 ? INT64_MAX : INT64_MIN; 2235 env->vxsat = 0x1; 2236 } 2237 return res; 2238 } 2239 2240 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2241 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2242 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2243 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2244 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2245 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2246 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2247 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2248 2249 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2250 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2251 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2252 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2253 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2254 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2255 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2256 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2257 2258 /* Vector Single-Width Averaging Add and Subtract */ 2259 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2260 { 2261 uint8_t d = extract64(v, shift, 1); 2262 uint8_t d1; 2263 uint64_t D1, D2; 2264 2265 if (shift == 0 || shift > 64) { 2266 return 0; 2267 } 2268 2269 d1 = extract64(v, shift - 1, 1); 2270 D1 = extract64(v, 0, shift); 2271 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2272 return d1; 2273 } else if (vxrm == 1) { /* round-to-nearest-even */ 2274 if (shift > 1) { 2275 D2 = extract64(v, 0, shift - 1); 2276 return d1 & ((D2 != 0) | d); 2277 } else { 2278 return d1 & d; 2279 } 2280 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2281 return !d & (D1 != 0); 2282 } 2283 return 0; /* round-down (truncate) */ 2284 } 2285 2286 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2287 { 2288 int64_t res = (int64_t)a + b; 2289 uint8_t round = get_round(vxrm, res, 1); 2290 2291 return (res >> 1) + round; 2292 } 2293 2294 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2295 { 2296 int64_t res = a + b; 2297 uint8_t round = get_round(vxrm, res, 1); 2298 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2299 2300 /* With signed overflow, bit 64 is inverse of bit 63. */ 2301 return ((res >> 1) ^ over) + round; 2302 } 2303 2304 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2305 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2306 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2307 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2308 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2309 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2310 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2311 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2312 2313 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2314 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2315 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2316 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2317 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2318 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2319 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2320 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2321 2322 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2323 { 2324 int64_t res = (int64_t)a - b; 2325 uint8_t round = get_round(vxrm, res, 1); 2326 2327 return (res >> 1) + round; 2328 } 2329 2330 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2331 { 2332 int64_t res = (int64_t)a - b; 2333 uint8_t round = get_round(vxrm, res, 1); 2334 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2335 2336 /* With signed overflow, bit 64 is inverse of bit 63. */ 2337 return ((res >> 1) ^ over) + round; 2338 } 2339 2340 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2341 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2342 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2343 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2344 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2345 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2346 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2347 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2348 2349 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2350 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2351 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2352 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2353 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2354 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2355 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2356 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2357 2358 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2359 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2360 { 2361 uint8_t round; 2362 int16_t res; 2363 2364 res = (int16_t)a * (int16_t)b; 2365 round = get_round(vxrm, res, 7); 2366 res = (res >> 7) + round; 2367 2368 if (res > INT8_MAX) { 2369 env->vxsat = 0x1; 2370 return INT8_MAX; 2371 } else if (res < INT8_MIN) { 2372 env->vxsat = 0x1; 2373 return INT8_MIN; 2374 } else { 2375 return res; 2376 } 2377 } 2378 2379 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2380 { 2381 uint8_t round; 2382 int32_t res; 2383 2384 res = (int32_t)a * (int32_t)b; 2385 round = get_round(vxrm, res, 15); 2386 res = (res >> 15) + round; 2387 2388 if (res > INT16_MAX) { 2389 env->vxsat = 0x1; 2390 return INT16_MAX; 2391 } else if (res < INT16_MIN) { 2392 env->vxsat = 0x1; 2393 return INT16_MIN; 2394 } else { 2395 return res; 2396 } 2397 } 2398 2399 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2400 { 2401 uint8_t round; 2402 int64_t res; 2403 2404 res = (int64_t)a * (int64_t)b; 2405 round = get_round(vxrm, res, 31); 2406 res = (res >> 31) + round; 2407 2408 if (res > INT32_MAX) { 2409 env->vxsat = 0x1; 2410 return INT32_MAX; 2411 } else if (res < INT32_MIN) { 2412 env->vxsat = 0x1; 2413 return INT32_MIN; 2414 } else { 2415 return res; 2416 } 2417 } 2418 2419 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2420 { 2421 uint8_t round; 2422 uint64_t hi_64, lo_64; 2423 int64_t res; 2424 2425 if (a == INT64_MIN && b == INT64_MIN) { 2426 env->vxsat = 1; 2427 return INT64_MAX; 2428 } 2429 2430 muls64(&lo_64, &hi_64, a, b); 2431 round = get_round(vxrm, lo_64, 63); 2432 /* 2433 * Cannot overflow, as there are always 2434 * 2 sign bits after multiply. 2435 */ 2436 res = (hi_64 << 1) | (lo_64 >> 63); 2437 if (round) { 2438 if (res == INT64_MAX) { 2439 env->vxsat = 1; 2440 } else { 2441 res += 1; 2442 } 2443 } 2444 return res; 2445 } 2446 2447 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2448 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2449 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2450 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2451 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2452 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2453 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2454 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2455 2456 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2457 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2458 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2459 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2460 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2461 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2462 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2463 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2464 2465 /* Vector Widening Saturating Scaled Multiply-Add */ 2466 static inline uint16_t 2467 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2468 uint16_t c) 2469 { 2470 uint8_t round; 2471 uint16_t res = (uint16_t)a * b; 2472 2473 round = get_round(vxrm, res, 4); 2474 res = (res >> 4) + round; 2475 return saddu16(env, vxrm, c, res); 2476 } 2477 2478 static inline uint32_t 2479 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2480 uint32_t c) 2481 { 2482 uint8_t round; 2483 uint32_t res = (uint32_t)a * b; 2484 2485 round = get_round(vxrm, res, 8); 2486 res = (res >> 8) + round; 2487 return saddu32(env, vxrm, c, res); 2488 } 2489 2490 static inline uint64_t 2491 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2492 uint64_t c) 2493 { 2494 uint8_t round; 2495 uint64_t res = (uint64_t)a * b; 2496 2497 round = get_round(vxrm, res, 16); 2498 res = (res >> 16) + round; 2499 return saddu64(env, vxrm, c, res); 2500 } 2501 2502 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2503 static inline void \ 2504 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2505 CPURISCVState *env, int vxrm) \ 2506 { \ 2507 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2508 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2509 TD d = *((TD *)vd + HD(i)); \ 2510 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2511 } 2512 2513 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2514 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2515 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2516 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2517 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2518 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2519 2520 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2521 static inline void \ 2522 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2523 CPURISCVState *env, int vxrm) \ 2524 { \ 2525 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2526 TD d = *((TD *)vd + HD(i)); \ 2527 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2528 } 2529 2530 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2531 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2532 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2533 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2534 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2535 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2536 2537 static inline int16_t 2538 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2539 { 2540 uint8_t round; 2541 int16_t res = (int16_t)a * b; 2542 2543 round = get_round(vxrm, res, 4); 2544 res = (res >> 4) + round; 2545 return sadd16(env, vxrm, c, res); 2546 } 2547 2548 static inline int32_t 2549 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2550 { 2551 uint8_t round; 2552 int32_t res = (int32_t)a * b; 2553 2554 round = get_round(vxrm, res, 8); 2555 res = (res >> 8) + round; 2556 return sadd32(env, vxrm, c, res); 2557 2558 } 2559 2560 static inline int64_t 2561 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2562 { 2563 uint8_t round; 2564 int64_t res = (int64_t)a * b; 2565 2566 round = get_round(vxrm, res, 16); 2567 res = (res >> 16) + round; 2568 return sadd64(env, vxrm, c, res); 2569 } 2570 2571 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2572 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2573 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2574 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2575 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2576 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2577 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2578 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2579 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2580 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2581 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2582 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2583 2584 static inline int16_t 2585 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2586 { 2587 uint8_t round; 2588 int16_t res = a * (int16_t)b; 2589 2590 round = get_round(vxrm, res, 4); 2591 res = (res >> 4) + round; 2592 return ssub16(env, vxrm, c, res); 2593 } 2594 2595 static inline int32_t 2596 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2597 { 2598 uint8_t round; 2599 int32_t res = a * (int32_t)b; 2600 2601 round = get_round(vxrm, res, 8); 2602 res = (res >> 8) + round; 2603 return ssub32(env, vxrm, c, res); 2604 } 2605 2606 static inline int64_t 2607 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2608 { 2609 uint8_t round; 2610 int64_t res = a * (int64_t)b; 2611 2612 round = get_round(vxrm, res, 16); 2613 res = (res >> 16) + round; 2614 return ssub64(env, vxrm, c, res); 2615 } 2616 2617 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2618 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2619 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2620 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2621 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2622 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2623 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2624 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2625 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2626 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2627 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2628 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2629 2630 static inline int16_t 2631 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2632 { 2633 uint8_t round; 2634 int16_t res = (int16_t)a * b; 2635 2636 round = get_round(vxrm, res, 4); 2637 res = (res >> 4) + round; 2638 return ssub16(env, vxrm, c, res); 2639 } 2640 2641 static inline int32_t 2642 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2643 { 2644 uint8_t round; 2645 int32_t res = (int32_t)a * b; 2646 2647 round = get_round(vxrm, res, 8); 2648 res = (res >> 8) + round; 2649 return ssub32(env, vxrm, c, res); 2650 } 2651 2652 static inline int64_t 2653 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2654 { 2655 uint8_t round; 2656 int64_t res = (int64_t)a * b; 2657 2658 round = get_round(vxrm, res, 16); 2659 res = (res >> 16) + round; 2660 return ssub64(env, vxrm, c, res); 2661 } 2662 2663 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2664 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2665 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2666 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2667 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2668 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2669 2670 /* Vector Single-Width Scaling Shift Instructions */ 2671 static inline uint8_t 2672 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2673 { 2674 uint8_t round, shift = b & 0x7; 2675 uint8_t res; 2676 2677 round = get_round(vxrm, a, shift); 2678 res = (a >> shift) + round; 2679 return res; 2680 } 2681 static inline uint16_t 2682 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2683 { 2684 uint8_t round, shift = b & 0xf; 2685 uint16_t res; 2686 2687 round = get_round(vxrm, a, shift); 2688 res = (a >> shift) + round; 2689 return res; 2690 } 2691 static inline uint32_t 2692 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2693 { 2694 uint8_t round, shift = b & 0x1f; 2695 uint32_t res; 2696 2697 round = get_round(vxrm, a, shift); 2698 res = (a >> shift) + round; 2699 return res; 2700 } 2701 static inline uint64_t 2702 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2703 { 2704 uint8_t round, shift = b & 0x3f; 2705 uint64_t res; 2706 2707 round = get_round(vxrm, a, shift); 2708 res = (a >> shift) + round; 2709 return res; 2710 } 2711 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2712 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2713 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2714 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2715 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2716 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2717 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2718 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2719 2720 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2721 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2722 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2723 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2724 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2725 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2726 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2727 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2728 2729 static inline int8_t 2730 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2731 { 2732 uint8_t round, shift = b & 0x7; 2733 int8_t res; 2734 2735 round = get_round(vxrm, a, shift); 2736 res = (a >> shift) + round; 2737 return res; 2738 } 2739 static inline int16_t 2740 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2741 { 2742 uint8_t round, shift = b & 0xf; 2743 int16_t res; 2744 2745 round = get_round(vxrm, a, shift); 2746 res = (a >> shift) + round; 2747 return res; 2748 } 2749 static inline int32_t 2750 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2751 { 2752 uint8_t round, shift = b & 0x1f; 2753 int32_t res; 2754 2755 round = get_round(vxrm, a, shift); 2756 res = (a >> shift) + round; 2757 return res; 2758 } 2759 static inline int64_t 2760 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2761 { 2762 uint8_t round, shift = b & 0x3f; 2763 int64_t res; 2764 2765 round = get_round(vxrm, a, shift); 2766 res = (a >> shift) + round; 2767 return res; 2768 } 2769 2770 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2771 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2772 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2773 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2774 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2775 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2776 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2777 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2778 2779 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2780 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2781 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2782 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2783 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2784 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2785 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2786 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2787 2788 /* Vector Narrowing Fixed-Point Clip Instructions */ 2789 static inline int8_t 2790 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2791 { 2792 uint8_t round, shift = b & 0xf; 2793 int16_t res; 2794 2795 round = get_round(vxrm, a, shift); 2796 res = (a >> shift) + round; 2797 if (res > INT8_MAX) { 2798 env->vxsat = 0x1; 2799 return INT8_MAX; 2800 } else if (res < INT8_MIN) { 2801 env->vxsat = 0x1; 2802 return INT8_MIN; 2803 } else { 2804 return res; 2805 } 2806 } 2807 2808 static inline int16_t 2809 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2810 { 2811 uint8_t round, shift = b & 0x1f; 2812 int32_t res; 2813 2814 round = get_round(vxrm, a, shift); 2815 res = (a >> shift) + round; 2816 if (res > INT16_MAX) { 2817 env->vxsat = 0x1; 2818 return INT16_MAX; 2819 } else if (res < INT16_MIN) { 2820 env->vxsat = 0x1; 2821 return INT16_MIN; 2822 } else { 2823 return res; 2824 } 2825 } 2826 2827 static inline int32_t 2828 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2829 { 2830 uint8_t round, shift = b & 0x3f; 2831 int64_t res; 2832 2833 round = get_round(vxrm, a, shift); 2834 res = (a >> shift) + round; 2835 if (res > INT32_MAX) { 2836 env->vxsat = 0x1; 2837 return INT32_MAX; 2838 } else if (res < INT32_MIN) { 2839 env->vxsat = 0x1; 2840 return INT32_MIN; 2841 } else { 2842 return res; 2843 } 2844 } 2845 2846 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2847 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2848 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2849 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2850 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2851 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2852 2853 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2854 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2855 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2856 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2857 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2858 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2859 2860 static inline uint8_t 2861 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2862 { 2863 uint8_t round, shift = b & 0xf; 2864 uint16_t res; 2865 2866 round = get_round(vxrm, a, shift); 2867 res = (a >> shift) + round; 2868 if (res > UINT8_MAX) { 2869 env->vxsat = 0x1; 2870 return UINT8_MAX; 2871 } else { 2872 return res; 2873 } 2874 } 2875 2876 static inline uint16_t 2877 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 2878 { 2879 uint8_t round, shift = b & 0x1f; 2880 uint32_t res; 2881 2882 round = get_round(vxrm, a, shift); 2883 res = (a >> shift) + round; 2884 if (res > UINT16_MAX) { 2885 env->vxsat = 0x1; 2886 return UINT16_MAX; 2887 } else { 2888 return res; 2889 } 2890 } 2891 2892 static inline uint32_t 2893 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 2894 { 2895 uint8_t round, shift = b & 0x3f; 2896 int64_t res; 2897 2898 round = get_round(vxrm, a, shift); 2899 res = (a >> shift) + round; 2900 if (res > UINT32_MAX) { 2901 env->vxsat = 0x1; 2902 return UINT32_MAX; 2903 } else { 2904 return res; 2905 } 2906 } 2907 2908 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 2909 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 2910 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 2911 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 2912 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 2913 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 2914 2915 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 2916 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 2917 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 2918 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 2919 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 2920 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 2921 2922 /* 2923 *** Vector Float Point Arithmetic Instructions 2924 */ 2925 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 2926 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2927 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2928 CPURISCVState *env) \ 2929 { \ 2930 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2931 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2932 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 2933 } 2934 2935 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 2936 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 2937 void *vs2, CPURISCVState *env, \ 2938 uint32_t desc) \ 2939 { \ 2940 uint32_t vm = vext_vm(desc); \ 2941 uint32_t vl = env->vl; \ 2942 uint32_t i; \ 2943 \ 2944 for (i = 0; i < vl; i++) { \ 2945 if (!vm && !vext_elem_mask(v0, i)) { \ 2946 continue; \ 2947 } \ 2948 do_##NAME(vd, vs1, vs2, i, env); \ 2949 } \ 2950 } 2951 2952 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 2953 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 2954 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 2955 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 2956 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 2957 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 2958 2959 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2960 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 2961 CPURISCVState *env) \ 2962 { \ 2963 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2964 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 2965 } 2966 2967 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 2968 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 2969 void *vs2, CPURISCVState *env, \ 2970 uint32_t desc) \ 2971 { \ 2972 uint32_t vm = vext_vm(desc); \ 2973 uint32_t vl = env->vl; \ 2974 uint32_t i; \ 2975 \ 2976 for (i = 0; i < vl; i++) { \ 2977 if (!vm && !vext_elem_mask(v0, i)) { \ 2978 continue; \ 2979 } \ 2980 do_##NAME(vd, s1, vs2, i, env); \ 2981 } \ 2982 } 2983 2984 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 2985 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 2986 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 2987 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 2988 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 2989 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 2990 2991 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 2992 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 2993 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 2994 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 2995 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 2996 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 2997 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 2998 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 2999 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3000 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 3001 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 3002 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 3003 3004 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3005 { 3006 return float16_sub(b, a, s); 3007 } 3008 3009 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3010 { 3011 return float32_sub(b, a, s); 3012 } 3013 3014 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3015 { 3016 return float64_sub(b, a, s); 3017 } 3018 3019 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3020 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3021 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3022 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 3023 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 3024 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 3025 3026 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3027 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3028 { 3029 return float32_add(float16_to_float32(a, true, s), 3030 float16_to_float32(b, true, s), s); 3031 } 3032 3033 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3034 { 3035 return float64_add(float32_to_float64(a, s), 3036 float32_to_float64(b, s), s); 3037 3038 } 3039 3040 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3041 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3042 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 3043 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 3044 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3045 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3046 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 3047 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 3048 3049 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3050 { 3051 return float32_sub(float16_to_float32(a, true, s), 3052 float16_to_float32(b, true, s), s); 3053 } 3054 3055 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3056 { 3057 return float64_sub(float32_to_float64(a, s), 3058 float32_to_float64(b, s), s); 3059 3060 } 3061 3062 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3063 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3064 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3065 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3066 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3067 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3068 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3069 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3070 3071 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3072 { 3073 return float32_add(a, float16_to_float32(b, true, s), s); 3074 } 3075 3076 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3077 { 3078 return float64_add(a, float32_to_float64(b, s), s); 3079 } 3080 3081 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3082 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3083 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3084 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3085 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3086 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3087 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3088 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3089 3090 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3091 { 3092 return float32_sub(a, float16_to_float32(b, true, s), s); 3093 } 3094 3095 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3096 { 3097 return float64_sub(a, float32_to_float64(b, s), s); 3098 } 3099 3100 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3101 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3102 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3103 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3104 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3105 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3106 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3107 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3108 3109 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3110 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3111 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3112 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3113 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3114 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3115 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3116 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3117 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3118 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3119 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3120 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3121 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3122 3123 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3124 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3125 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3126 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3127 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3128 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3129 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3130 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3131 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3132 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3133 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3134 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3135 3136 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3137 { 3138 return float16_div(b, a, s); 3139 } 3140 3141 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3142 { 3143 return float32_div(b, a, s); 3144 } 3145 3146 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3147 { 3148 return float64_div(b, a, s); 3149 } 3150 3151 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3152 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3153 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3154 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3155 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3156 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3157 3158 /* Vector Widening Floating-Point Multiply */ 3159 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3160 { 3161 return float32_mul(float16_to_float32(a, true, s), 3162 float16_to_float32(b, true, s), s); 3163 } 3164 3165 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3166 { 3167 return float64_mul(float32_to_float64(a, s), 3168 float32_to_float64(b, s), s); 3169 3170 } 3171 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3172 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3173 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3174 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3175 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3176 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3177 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3178 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3179 3180 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3181 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3182 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3183 CPURISCVState *env) \ 3184 { \ 3185 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3186 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3187 TD d = *((TD *)vd + HD(i)); \ 3188 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3189 } 3190 3191 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3192 { 3193 return float16_muladd(a, b, d, 0, s); 3194 } 3195 3196 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3197 { 3198 return float32_muladd(a, b, d, 0, s); 3199 } 3200 3201 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3202 { 3203 return float64_muladd(a, b, d, 0, s); 3204 } 3205 3206 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3207 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3208 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3209 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3210 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3211 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3212 3213 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3214 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3215 CPURISCVState *env) \ 3216 { \ 3217 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3218 TD d = *((TD *)vd + HD(i)); \ 3219 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3220 } 3221 3222 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3223 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3224 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3225 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3226 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3227 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3228 3229 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3230 { 3231 return float16_muladd(a, b, d, 3232 float_muladd_negate_c | float_muladd_negate_product, s); 3233 } 3234 3235 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3236 { 3237 return float32_muladd(a, b, d, 3238 float_muladd_negate_c | float_muladd_negate_product, s); 3239 } 3240 3241 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3242 { 3243 return float64_muladd(a, b, d, 3244 float_muladd_negate_c | float_muladd_negate_product, s); 3245 } 3246 3247 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3248 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3249 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3250 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3251 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3252 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3253 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3254 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3255 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3256 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3257 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3258 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3259 3260 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3261 { 3262 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3263 } 3264 3265 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3266 { 3267 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3268 } 3269 3270 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3271 { 3272 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3273 } 3274 3275 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3276 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3277 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3278 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3279 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3280 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3281 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3282 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3283 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3284 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3285 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3286 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3287 3288 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3289 { 3290 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3291 } 3292 3293 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3294 { 3295 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3296 } 3297 3298 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3299 { 3300 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3301 } 3302 3303 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3304 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3305 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3306 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3307 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3308 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3309 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3310 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3311 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3312 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3313 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3314 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3315 3316 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3317 { 3318 return float16_muladd(d, b, a, 0, s); 3319 } 3320 3321 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3322 { 3323 return float32_muladd(d, b, a, 0, s); 3324 } 3325 3326 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3327 { 3328 return float64_muladd(d, b, a, 0, s); 3329 } 3330 3331 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3332 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3333 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3334 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3335 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3336 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3337 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3338 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3339 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3340 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3341 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3342 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3343 3344 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3345 { 3346 return float16_muladd(d, b, a, 3347 float_muladd_negate_c | float_muladd_negate_product, s); 3348 } 3349 3350 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3351 { 3352 return float32_muladd(d, b, a, 3353 float_muladd_negate_c | float_muladd_negate_product, s); 3354 } 3355 3356 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3357 { 3358 return float64_muladd(d, b, a, 3359 float_muladd_negate_c | float_muladd_negate_product, s); 3360 } 3361 3362 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3363 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3364 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3365 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3366 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3367 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3368 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3369 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3370 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3371 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3372 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3373 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3374 3375 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3376 { 3377 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3378 } 3379 3380 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3381 { 3382 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3383 } 3384 3385 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3386 { 3387 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3388 } 3389 3390 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3391 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3392 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3393 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3394 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3395 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3396 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3397 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3398 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3399 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3400 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3401 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3402 3403 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3404 { 3405 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3406 } 3407 3408 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3409 { 3410 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3411 } 3412 3413 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3414 { 3415 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3416 } 3417 3418 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3419 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3420 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3421 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3422 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3423 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3424 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3425 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3426 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3427 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3428 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3429 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3430 3431 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3432 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3433 { 3434 return float32_muladd(float16_to_float32(a, true, s), 3435 float16_to_float32(b, true, s), d, 0, s); 3436 } 3437 3438 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3439 { 3440 return float64_muladd(float32_to_float64(a, s), 3441 float32_to_float64(b, s), d, 0, s); 3442 } 3443 3444 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3445 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3446 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3447 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3448 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3449 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3450 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3451 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3452 3453 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3454 { 3455 return float32_muladd(float16_to_float32(a, true, s), 3456 float16_to_float32(b, true, s), d, 3457 float_muladd_negate_c | float_muladd_negate_product, s); 3458 } 3459 3460 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3461 { 3462 return float64_muladd(float32_to_float64(a, s), 3463 float32_to_float64(b, s), d, 3464 float_muladd_negate_c | float_muladd_negate_product, s); 3465 } 3466 3467 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3468 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3469 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3470 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3471 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3472 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3473 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3474 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3475 3476 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3477 { 3478 return float32_muladd(float16_to_float32(a, true, s), 3479 float16_to_float32(b, true, s), d, 3480 float_muladd_negate_c, s); 3481 } 3482 3483 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3484 { 3485 return float64_muladd(float32_to_float64(a, s), 3486 float32_to_float64(b, s), d, 3487 float_muladd_negate_c, s); 3488 } 3489 3490 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3491 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3492 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3493 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3494 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3495 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3496 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3497 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3498 3499 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3500 { 3501 return float32_muladd(float16_to_float32(a, true, s), 3502 float16_to_float32(b, true, s), d, 3503 float_muladd_negate_product, s); 3504 } 3505 3506 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3507 { 3508 return float64_muladd(float32_to_float64(a, s), 3509 float32_to_float64(b, s), d, 3510 float_muladd_negate_product, s); 3511 } 3512 3513 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3514 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3515 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3516 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3517 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3518 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3519 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3520 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3521 3522 /* Vector Floating-Point Square-Root Instruction */ 3523 /* (TD, T2, TX2) */ 3524 #define OP_UU_H uint16_t, uint16_t, uint16_t 3525 #define OP_UU_W uint32_t, uint32_t, uint32_t 3526 #define OP_UU_D uint64_t, uint64_t, uint64_t 3527 3528 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3529 static void do_##NAME(void *vd, void *vs2, int i, \ 3530 CPURISCVState *env) \ 3531 { \ 3532 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3533 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3534 } 3535 3536 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3537 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3538 CPURISCVState *env, uint32_t desc) \ 3539 { \ 3540 uint32_t vm = vext_vm(desc); \ 3541 uint32_t vl = env->vl; \ 3542 uint32_t i; \ 3543 \ 3544 if (vl == 0) { \ 3545 return; \ 3546 } \ 3547 for (i = 0; i < vl; i++) { \ 3548 if (!vm && !vext_elem_mask(v0, i)) { \ 3549 continue; \ 3550 } \ 3551 do_##NAME(vd, vs2, i, env); \ 3552 } \ 3553 } 3554 3555 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3556 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3557 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3558 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3559 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3560 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3561 3562 /* Vector Floating-Point MIN/MAX Instructions */ 3563 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3564 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3565 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3566 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3567 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3568 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3569 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3570 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3571 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3572 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3573 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3574 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3575 3576 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3577 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3578 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3579 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3580 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3581 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3582 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3583 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3584 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3585 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3586 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3587 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3588 3589 /* Vector Floating-Point Sign-Injection Instructions */ 3590 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3591 { 3592 return deposit64(b, 0, 15, a); 3593 } 3594 3595 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3596 { 3597 return deposit64(b, 0, 31, a); 3598 } 3599 3600 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3601 { 3602 return deposit64(b, 0, 63, a); 3603 } 3604 3605 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3606 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3607 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3608 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3609 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3610 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3611 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3612 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3613 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3614 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3615 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3616 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3617 3618 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3619 { 3620 return deposit64(~b, 0, 15, a); 3621 } 3622 3623 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3624 { 3625 return deposit64(~b, 0, 31, a); 3626 } 3627 3628 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3629 { 3630 return deposit64(~b, 0, 63, a); 3631 } 3632 3633 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3634 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3635 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3636 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3637 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3638 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3639 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3640 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3641 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3642 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3643 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3644 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3645 3646 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3647 { 3648 return deposit64(b ^ a, 0, 15, a); 3649 } 3650 3651 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3652 { 3653 return deposit64(b ^ a, 0, 31, a); 3654 } 3655 3656 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3657 { 3658 return deposit64(b ^ a, 0, 63, a); 3659 } 3660 3661 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3662 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3663 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3664 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3665 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3666 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3667 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3668 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3669 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3670 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3671 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3672 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3673 3674 /* Vector Floating-Point Compare Instructions */ 3675 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3676 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3677 CPURISCVState *env, uint32_t desc) \ 3678 { \ 3679 uint32_t vm = vext_vm(desc); \ 3680 uint32_t vl = env->vl; \ 3681 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3682 uint32_t i; \ 3683 \ 3684 for (i = 0; i < vl; i++) { \ 3685 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3686 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3687 if (!vm && !vext_elem_mask(v0, i)) { \ 3688 continue; \ 3689 } \ 3690 vext_set_elem_mask(vd, i, \ 3691 DO_OP(s2, s1, &env->fp_status)); \ 3692 } \ 3693 for (; i < vlmax; i++) { \ 3694 vext_set_elem_mask(vd, i, 0); \ 3695 } \ 3696 } 3697 3698 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3699 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3700 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3701 3702 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3703 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3704 CPURISCVState *env, uint32_t desc) \ 3705 { \ 3706 uint32_t vm = vext_vm(desc); \ 3707 uint32_t vl = env->vl; \ 3708 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3709 uint32_t i; \ 3710 \ 3711 for (i = 0; i < vl; i++) { \ 3712 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3713 if (!vm && !vext_elem_mask(v0, i)) { \ 3714 continue; \ 3715 } \ 3716 vext_set_elem_mask(vd, i, \ 3717 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3718 } \ 3719 for (; i < vlmax; i++) { \ 3720 vext_set_elem_mask(vd, i, 0); \ 3721 } \ 3722 } 3723 3724 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3725 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3726 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3727 3728 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3729 { 3730 FloatRelation compare = float16_compare_quiet(a, b, s); 3731 return compare != float_relation_equal; 3732 } 3733 3734 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3735 { 3736 FloatRelation compare = float32_compare_quiet(a, b, s); 3737 return compare != float_relation_equal; 3738 } 3739 3740 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3741 { 3742 FloatRelation compare = float64_compare_quiet(a, b, s); 3743 return compare != float_relation_equal; 3744 } 3745 3746 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3747 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3748 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3749 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3750 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3751 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3752 3753 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3754 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3755 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3756 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3757 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3758 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3759 3760 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3761 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3762 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3763 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3764 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3765 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3766 3767 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3768 { 3769 FloatRelation compare = float16_compare(a, b, s); 3770 return compare == float_relation_greater; 3771 } 3772 3773 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3774 { 3775 FloatRelation compare = float32_compare(a, b, s); 3776 return compare == float_relation_greater; 3777 } 3778 3779 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3780 { 3781 FloatRelation compare = float64_compare(a, b, s); 3782 return compare == float_relation_greater; 3783 } 3784 3785 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3786 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3787 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3788 3789 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3790 { 3791 FloatRelation compare = float16_compare(a, b, s); 3792 return compare == float_relation_greater || 3793 compare == float_relation_equal; 3794 } 3795 3796 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3797 { 3798 FloatRelation compare = float32_compare(a, b, s); 3799 return compare == float_relation_greater || 3800 compare == float_relation_equal; 3801 } 3802 3803 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3804 { 3805 FloatRelation compare = float64_compare(a, b, s); 3806 return compare == float_relation_greater || 3807 compare == float_relation_equal; 3808 } 3809 3810 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3811 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3812 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3813 3814 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3815 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3816 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3817 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3818 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3819 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3820 3821 /* Vector Floating-Point Classify Instruction */ 3822 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3823 static void do_##NAME(void *vd, void *vs2, int i) \ 3824 { \ 3825 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3826 *((TD *)vd + HD(i)) = OP(s2); \ 3827 } 3828 3829 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3830 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3831 CPURISCVState *env, uint32_t desc) \ 3832 { \ 3833 uint32_t vm = vext_vm(desc); \ 3834 uint32_t vl = env->vl; \ 3835 uint32_t i; \ 3836 \ 3837 for (i = 0; i < vl; i++) { \ 3838 if (!vm && !vext_elem_mask(v0, i)) { \ 3839 continue; \ 3840 } \ 3841 do_##NAME(vd, vs2, i); \ 3842 } \ 3843 } 3844 3845 target_ulong fclass_h(uint64_t frs1) 3846 { 3847 float16 f = frs1; 3848 bool sign = float16_is_neg(f); 3849 3850 if (float16_is_infinity(f)) { 3851 return sign ? 1 << 0 : 1 << 7; 3852 } else if (float16_is_zero(f)) { 3853 return sign ? 1 << 3 : 1 << 4; 3854 } else if (float16_is_zero_or_denormal(f)) { 3855 return sign ? 1 << 2 : 1 << 5; 3856 } else if (float16_is_any_nan(f)) { 3857 float_status s = { }; /* for snan_bit_is_one */ 3858 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3859 } else { 3860 return sign ? 1 << 1 : 1 << 6; 3861 } 3862 } 3863 3864 target_ulong fclass_s(uint64_t frs1) 3865 { 3866 float32 f = frs1; 3867 bool sign = float32_is_neg(f); 3868 3869 if (float32_is_infinity(f)) { 3870 return sign ? 1 << 0 : 1 << 7; 3871 } else if (float32_is_zero(f)) { 3872 return sign ? 1 << 3 : 1 << 4; 3873 } else if (float32_is_zero_or_denormal(f)) { 3874 return sign ? 1 << 2 : 1 << 5; 3875 } else if (float32_is_any_nan(f)) { 3876 float_status s = { }; /* for snan_bit_is_one */ 3877 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3878 } else { 3879 return sign ? 1 << 1 : 1 << 6; 3880 } 3881 } 3882 3883 target_ulong fclass_d(uint64_t frs1) 3884 { 3885 float64 f = frs1; 3886 bool sign = float64_is_neg(f); 3887 3888 if (float64_is_infinity(f)) { 3889 return sign ? 1 << 0 : 1 << 7; 3890 } else if (float64_is_zero(f)) { 3891 return sign ? 1 << 3 : 1 << 4; 3892 } else if (float64_is_zero_or_denormal(f)) { 3893 return sign ? 1 << 2 : 1 << 5; 3894 } else if (float64_is_any_nan(f)) { 3895 float_status s = { }; /* for snan_bit_is_one */ 3896 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3897 } else { 3898 return sign ? 1 << 1 : 1 << 6; 3899 } 3900 } 3901 3902 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 3903 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 3904 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 3905 GEN_VEXT_V(vfclass_v_h, 2, 2) 3906 GEN_VEXT_V(vfclass_v_w, 4, 4) 3907 GEN_VEXT_V(vfclass_v_d, 8, 8) 3908 3909 /* Vector Floating-Point Merge Instruction */ 3910 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 3911 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3912 CPURISCVState *env, uint32_t desc) \ 3913 { \ 3914 uint32_t vm = vext_vm(desc); \ 3915 uint32_t vl = env->vl; \ 3916 uint32_t i; \ 3917 \ 3918 for (i = 0; i < vl; i++) { \ 3919 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3920 *((ETYPE *)vd + H(i)) \ 3921 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 3922 } \ 3923 } 3924 3925 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 3926 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 3927 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 3928 3929 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 3930 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 3931 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 3932 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 3933 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 3934 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 3935 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 3936 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 3937 3938 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 3939 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 3940 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 3941 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 3942 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 3943 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 3944 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 3945 3946 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 3947 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 3948 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 3949 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 3950 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 3951 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 3952 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 3953 3954 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 3955 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 3956 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 3957 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 3958 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 3959 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 3960 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 3961 3962 /* Widening Floating-Point/Integer Type-Convert Instructions */ 3963 /* (TD, T2, TX2) */ 3964 #define WOP_UU_H uint32_t, uint16_t, uint16_t 3965 #define WOP_UU_W uint64_t, uint32_t, uint32_t 3966 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 3967 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 3968 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 3969 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 3970 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 3971 3972 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 3973 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 3974 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 3975 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 3976 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 3977 3978 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 3979 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 3980 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 3981 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 3982 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 3983 3984 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 3985 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 3986 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 3987 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 3988 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 3989 3990 /* 3991 * vfwcvt.f.f.v vd, vs2, vm # 3992 * Convert single-width float to double-width float. 3993 */ 3994 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 3995 { 3996 return float16_to_float32(a, true, s); 3997 } 3998 3999 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4000 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4001 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4002 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4003 4004 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4005 /* (TD, T2, TX2) */ 4006 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4007 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4008 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4009 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 4010 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 4011 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 4012 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 4013 4014 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4015 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 4016 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 4017 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 4018 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 4019 4020 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4021 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 4022 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4023 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 4024 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 4025 4026 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4027 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4028 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4029 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 4030 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 4031 4032 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4033 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4034 { 4035 return float32_to_float16(a, true, s); 4036 } 4037 4038 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4039 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4040 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 4041 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 4042 4043 /* 4044 *** Vector Reduction Operations 4045 */ 4046 /* Vector Single-Width Integer Reduction Instructions */ 4047 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4048 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4049 void *vs2, CPURISCVState *env, uint32_t desc) \ 4050 { \ 4051 uint32_t vm = vext_vm(desc); \ 4052 uint32_t vl = env->vl; \ 4053 uint32_t i; \ 4054 TD s1 = *((TD *)vs1 + HD(0)); \ 4055 \ 4056 for (i = 0; i < vl; i++) { \ 4057 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4058 if (!vm && !vext_elem_mask(v0, i)) { \ 4059 continue; \ 4060 } \ 4061 s1 = OP(s1, (TD)s2); \ 4062 } \ 4063 *((TD *)vd + HD(0)) = s1; \ 4064 } 4065 4066 /* vd[0] = sum(vs1[0], vs2[*]) */ 4067 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4068 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4069 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4070 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4071 4072 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4073 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4074 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4075 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4076 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4077 4078 /* vd[0] = max(vs1[0], vs2[*]) */ 4079 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4080 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4081 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4082 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4083 4084 /* vd[0] = minu(vs1[0], vs2[*]) */ 4085 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4086 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4087 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4088 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4089 4090 /* vd[0] = min(vs1[0], vs2[*]) */ 4091 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4092 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4093 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4094 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4095 4096 /* vd[0] = and(vs1[0], vs2[*]) */ 4097 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4098 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4099 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4100 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4101 4102 /* vd[0] = or(vs1[0], vs2[*]) */ 4103 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4104 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4105 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4106 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4107 4108 /* vd[0] = xor(vs1[0], vs2[*]) */ 4109 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4110 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4111 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4112 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4113 4114 /* Vector Widening Integer Reduction Instructions */ 4115 /* signed sum reduction into double-width accumulator */ 4116 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4117 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4118 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4119 4120 /* Unsigned sum reduction into double-width accumulator */ 4121 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4122 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4123 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4124 4125 /* Vector Single-Width Floating-Point Reduction Instructions */ 4126 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4127 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4128 void *vs2, CPURISCVState *env, \ 4129 uint32_t desc) \ 4130 { \ 4131 uint32_t vm = vext_vm(desc); \ 4132 uint32_t vl = env->vl; \ 4133 uint32_t i; \ 4134 TD s1 = *((TD *)vs1 + HD(0)); \ 4135 \ 4136 for (i = 0; i < vl; i++) { \ 4137 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4138 if (!vm && !vext_elem_mask(v0, i)) { \ 4139 continue; \ 4140 } \ 4141 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4142 } \ 4143 *((TD *)vd + HD(0)) = s1; \ 4144 } 4145 4146 /* Unordered sum */ 4147 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4148 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4149 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4150 4151 /* Maximum value */ 4152 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4153 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4154 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4155 4156 /* Minimum value */ 4157 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4158 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4159 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4160 4161 /* Vector Widening Floating-Point Reduction Instructions */ 4162 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4163 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4164 void *vs2, CPURISCVState *env, uint32_t desc) 4165 { 4166 uint32_t vm = vext_vm(desc); 4167 uint32_t vl = env->vl; 4168 uint32_t i; 4169 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4170 4171 for (i = 0; i < vl; i++) { 4172 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4173 if (!vm && !vext_elem_mask(v0, i)) { 4174 continue; 4175 } 4176 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4177 &env->fp_status); 4178 } 4179 *((uint32_t *)vd + H4(0)) = s1; 4180 } 4181 4182 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4183 void *vs2, CPURISCVState *env, uint32_t desc) 4184 { 4185 uint32_t vm = vext_vm(desc); 4186 uint32_t vl = env->vl; 4187 uint32_t i; 4188 uint64_t s1 = *((uint64_t *)vs1); 4189 4190 for (i = 0; i < vl; i++) { 4191 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4192 if (!vm && !vext_elem_mask(v0, i)) { 4193 continue; 4194 } 4195 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4196 &env->fp_status); 4197 } 4198 *((uint64_t *)vd) = s1; 4199 } 4200 4201 /* 4202 *** Vector Mask Operations 4203 */ 4204 /* Vector Mask-Register Logical Instructions */ 4205 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4206 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4207 void *vs2, CPURISCVState *env, \ 4208 uint32_t desc) \ 4209 { \ 4210 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4211 uint32_t vl = env->vl; \ 4212 uint32_t i; \ 4213 int a, b; \ 4214 \ 4215 for (i = 0; i < vl; i++) { \ 4216 a = vext_elem_mask(vs1, i); \ 4217 b = vext_elem_mask(vs2, i); \ 4218 vext_set_elem_mask(vd, i, OP(b, a)); \ 4219 } \ 4220 for (; i < vlmax; i++) { \ 4221 vext_set_elem_mask(vd, i, 0); \ 4222 } \ 4223 } 4224 4225 #define DO_NAND(N, M) (!(N & M)) 4226 #define DO_ANDNOT(N, M) (N & !M) 4227 #define DO_NOR(N, M) (!(N | M)) 4228 #define DO_ORNOT(N, M) (N | !M) 4229 #define DO_XNOR(N, M) (!(N ^ M)) 4230 4231 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4232 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4233 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4234 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4235 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4236 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4237 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4238 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4239 4240 /* Vector mask population count vmpopc */ 4241 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, 4242 uint32_t desc) 4243 { 4244 target_ulong cnt = 0; 4245 uint32_t vm = vext_vm(desc); 4246 uint32_t vl = env->vl; 4247 int i; 4248 4249 for (i = 0; i < vl; i++) { 4250 if (vm || vext_elem_mask(v0, i)) { 4251 if (vext_elem_mask(vs2, i)) { 4252 cnt++; 4253 } 4254 } 4255 } 4256 return cnt; 4257 } 4258 4259 /* vmfirst find-first-set mask bit*/ 4260 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4261 uint32_t desc) 4262 { 4263 uint32_t vm = vext_vm(desc); 4264 uint32_t vl = env->vl; 4265 int i; 4266 4267 for (i = 0; i < vl; i++) { 4268 if (vm || vext_elem_mask(v0, i)) { 4269 if (vext_elem_mask(vs2, i)) { 4270 return i; 4271 } 4272 } 4273 } 4274 return -1LL; 4275 } 4276 4277 enum set_mask_type { 4278 ONLY_FIRST = 1, 4279 INCLUDE_FIRST, 4280 BEFORE_FIRST, 4281 }; 4282 4283 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4284 uint32_t desc, enum set_mask_type type) 4285 { 4286 uint32_t vlmax = env_archcpu(env)->cfg.vlen; 4287 uint32_t vm = vext_vm(desc); 4288 uint32_t vl = env->vl; 4289 int i; 4290 bool first_mask_bit = false; 4291 4292 for (i = 0; i < vl; i++) { 4293 if (!vm && !vext_elem_mask(v0, i)) { 4294 continue; 4295 } 4296 /* write a zero to all following active elements */ 4297 if (first_mask_bit) { 4298 vext_set_elem_mask(vd, i, 0); 4299 continue; 4300 } 4301 if (vext_elem_mask(vs2, i)) { 4302 first_mask_bit = true; 4303 if (type == BEFORE_FIRST) { 4304 vext_set_elem_mask(vd, i, 0); 4305 } else { 4306 vext_set_elem_mask(vd, i, 1); 4307 } 4308 } else { 4309 if (type == ONLY_FIRST) { 4310 vext_set_elem_mask(vd, i, 0); 4311 } else { 4312 vext_set_elem_mask(vd, i, 1); 4313 } 4314 } 4315 } 4316 for (; i < vlmax; i++) { 4317 vext_set_elem_mask(vd, i, 0); 4318 } 4319 } 4320 4321 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4322 uint32_t desc) 4323 { 4324 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4325 } 4326 4327 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4328 uint32_t desc) 4329 { 4330 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4331 } 4332 4333 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4334 uint32_t desc) 4335 { 4336 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4337 } 4338 4339 /* Vector Iota Instruction */ 4340 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4341 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4342 uint32_t desc) \ 4343 { \ 4344 uint32_t vm = vext_vm(desc); \ 4345 uint32_t vl = env->vl; \ 4346 uint32_t sum = 0; \ 4347 int i; \ 4348 \ 4349 for (i = 0; i < vl; i++) { \ 4350 if (!vm && !vext_elem_mask(v0, i)) { \ 4351 continue; \ 4352 } \ 4353 *((ETYPE *)vd + H(i)) = sum; \ 4354 if (vext_elem_mask(vs2, i)) { \ 4355 sum++; \ 4356 } \ 4357 } \ 4358 } 4359 4360 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4361 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4362 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4363 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4364 4365 /* Vector Element Index Instruction */ 4366 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4367 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4368 { \ 4369 uint32_t vm = vext_vm(desc); \ 4370 uint32_t vl = env->vl; \ 4371 int i; \ 4372 \ 4373 for (i = 0; i < vl; i++) { \ 4374 if (!vm && !vext_elem_mask(v0, i)) { \ 4375 continue; \ 4376 } \ 4377 *((ETYPE *)vd + H(i)) = i; \ 4378 } \ 4379 } 4380 4381 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4382 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4383 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4384 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4385 4386 /* 4387 *** Vector Permutation Instructions 4388 */ 4389 4390 /* Vector Slide Instructions */ 4391 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4392 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4393 CPURISCVState *env, uint32_t desc) \ 4394 { \ 4395 uint32_t vm = vext_vm(desc); \ 4396 uint32_t vl = env->vl; \ 4397 target_ulong offset = s1, i; \ 4398 \ 4399 for (i = offset; i < vl; i++) { \ 4400 if (!vm && !vext_elem_mask(v0, i)) { \ 4401 continue; \ 4402 } \ 4403 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4404 } \ 4405 } 4406 4407 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4408 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4409 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4410 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4411 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4412 4413 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4414 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4415 CPURISCVState *env, uint32_t desc) \ 4416 { \ 4417 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4418 uint32_t vm = vext_vm(desc); \ 4419 uint32_t vl = env->vl; \ 4420 target_ulong offset = s1, i; \ 4421 \ 4422 for (i = 0; i < vl; ++i) { \ 4423 target_ulong j = i + offset; \ 4424 if (!vm && !vext_elem_mask(v0, i)) { \ 4425 continue; \ 4426 } \ 4427 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4428 } \ 4429 } 4430 4431 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4432 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4433 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4434 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4435 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4436 4437 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4438 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4439 CPURISCVState *env, uint32_t desc) \ 4440 { \ 4441 uint32_t vm = vext_vm(desc); \ 4442 uint32_t vl = env->vl; \ 4443 uint32_t i; \ 4444 \ 4445 for (i = 0; i < vl; i++) { \ 4446 if (!vm && !vext_elem_mask(v0, i)) { \ 4447 continue; \ 4448 } \ 4449 if (i == 0) { \ 4450 *((ETYPE *)vd + H(i)) = s1; \ 4451 } else { \ 4452 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4453 } \ 4454 } \ 4455 } 4456 4457 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4458 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4459 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4460 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4461 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4462 4463 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4464 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4465 CPURISCVState *env, uint32_t desc) \ 4466 { \ 4467 uint32_t vm = vext_vm(desc); \ 4468 uint32_t vl = env->vl; \ 4469 uint32_t i; \ 4470 \ 4471 for (i = 0; i < vl; i++) { \ 4472 if (!vm && !vext_elem_mask(v0, i)) { \ 4473 continue; \ 4474 } \ 4475 if (i == vl - 1) { \ 4476 *((ETYPE *)vd + H(i)) = s1; \ 4477 } else { \ 4478 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4479 } \ 4480 } \ 4481 } 4482 4483 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4484 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4485 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4486 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4487 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4488 4489 /* Vector Register Gather Instruction */ 4490 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H) \ 4491 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4492 CPURISCVState *env, uint32_t desc) \ 4493 { \ 4494 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4495 uint32_t vm = vext_vm(desc); \ 4496 uint32_t vl = env->vl; \ 4497 uint64_t index; \ 4498 uint32_t i; \ 4499 \ 4500 for (i = 0; i < vl; i++) { \ 4501 if (!vm && !vext_elem_mask(v0, i)) { \ 4502 continue; \ 4503 } \ 4504 index = *((ETYPE *)vs1 + H(i)); \ 4505 if (index >= vlmax) { \ 4506 *((ETYPE *)vd + H(i)) = 0; \ 4507 } else { \ 4508 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4509 } \ 4510 } \ 4511 } 4512 4513 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4514 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1) 4515 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2) 4516 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4) 4517 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8) 4518 4519 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4520 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4521 CPURISCVState *env, uint32_t desc) \ 4522 { \ 4523 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4524 uint32_t vm = vext_vm(desc); \ 4525 uint32_t vl = env->vl; \ 4526 uint64_t index = s1; \ 4527 uint32_t i; \ 4528 \ 4529 for (i = 0; i < vl; i++) { \ 4530 if (!vm && !vext_elem_mask(v0, i)) { \ 4531 continue; \ 4532 } \ 4533 if (index >= vlmax) { \ 4534 *((ETYPE *)vd + H(i)) = 0; \ 4535 } else { \ 4536 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4537 } \ 4538 } \ 4539 } 4540 4541 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4542 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4543 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4544 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4545 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4546 4547 /* Vector Compress Instruction */ 4548 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4549 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4550 CPURISCVState *env, uint32_t desc) \ 4551 { \ 4552 uint32_t vl = env->vl; \ 4553 uint32_t num = 0, i; \ 4554 \ 4555 for (i = 0; i < vl; i++) { \ 4556 if (!vext_elem_mask(vs1, i)) { \ 4557 continue; \ 4558 } \ 4559 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4560 num++; \ 4561 } \ 4562 } 4563 4564 /* Compress into vd elements of vs2 where vs1 is enabled */ 4565 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4566 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4567 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4568 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4569