1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "internals.h" 27 #include <math.h> 28 29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 30 target_ulong s2) 31 { 32 int vlmax, vl; 33 RISCVCPU *cpu = env_archcpu(env); 34 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 35 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 36 bool vill = FIELD_EX64(s2, VTYPE, VILL); 37 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 38 39 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 40 /* only set vill bit. */ 41 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 42 env->vl = 0; 43 env->vstart = 0; 44 return 0; 45 } 46 47 vlmax = vext_get_vlmax(cpu, s2); 48 if (s1 <= vlmax) { 49 vl = s1; 50 } else { 51 vl = vlmax; 52 } 53 env->vl = vl; 54 env->vtype = s2; 55 env->vstart = 0; 56 return vl; 57 } 58 59 /* 60 * Note that vector data is stored in host-endian 64-bit chunks, 61 * so addressing units smaller than that needs a host-endian fixup. 62 */ 63 #ifdef HOST_WORDS_BIGENDIAN 64 #define H1(x) ((x) ^ 7) 65 #define H1_2(x) ((x) ^ 6) 66 #define H1_4(x) ((x) ^ 4) 67 #define H2(x) ((x) ^ 3) 68 #define H4(x) ((x) ^ 1) 69 #define H8(x) ((x)) 70 #else 71 #define H1(x) (x) 72 #define H1_2(x) (x) 73 #define H1_4(x) (x) 74 #define H2(x) (x) 75 #define H4(x) (x) 76 #define H8(x) (x) 77 #endif 78 79 static inline uint32_t vext_nf(uint32_t desc) 80 { 81 return FIELD_EX32(simd_data(desc), VDATA, NF); 82 } 83 84 static inline uint32_t vext_vm(uint32_t desc) 85 { 86 return FIELD_EX32(simd_data(desc), VDATA, VM); 87 } 88 89 /* 90 * Encode LMUL to lmul as following: 91 * LMUL vlmul lmul 92 * 1 000 0 93 * 2 001 1 94 * 4 010 2 95 * 8 011 3 96 * - 100 - 97 * 1/8 101 -3 98 * 1/4 110 -2 99 * 1/2 111 -1 100 */ 101 static inline int32_t vext_lmul(uint32_t desc) 102 { 103 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); 104 } 105 106 static uint32_t vext_wd(uint32_t desc) 107 { 108 return FIELD_EX32(simd_data(desc), VDATA, WD); 109 } 110 111 /* 112 * Get vector group length in bytes. Its range is [64, 2048]. 113 * 114 * As simd_desc support at most 256, the max vlen is 512 bits. 115 * So vlen in bytes is encoded as maxsz. 116 */ 117 static inline uint32_t vext_maxsz(uint32_t desc) 118 { 119 return simd_maxsz(desc) << vext_lmul(desc); 120 } 121 122 /* 123 * This function checks watchpoint before real load operation. 124 * 125 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 126 * In user mode, there is no watchpoint support now. 127 * 128 * It will trigger an exception if there is no mapping in TLB 129 * and page table walk can't fill the TLB entry. Then the guest 130 * software can return here after process the exception or never return. 131 */ 132 static void probe_pages(CPURISCVState *env, target_ulong addr, 133 target_ulong len, uintptr_t ra, 134 MMUAccessType access_type) 135 { 136 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 137 target_ulong curlen = MIN(pagelen, len); 138 139 probe_access(env, addr, curlen, access_type, 140 cpu_mmu_index(env, false), ra); 141 if (len > curlen) { 142 addr += curlen; 143 curlen = len - curlen; 144 probe_access(env, addr, curlen, access_type, 145 cpu_mmu_index(env, false), ra); 146 } 147 } 148 149 static inline void vext_set_elem_mask(void *v0, int index, 150 uint8_t value) 151 { 152 int idx = index / 64; 153 int pos = index % 64; 154 uint64_t old = ((uint64_t *)v0)[idx]; 155 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); 156 } 157 158 /* 159 * Earlier designs (pre-0.9) had a varying number of bits 160 * per mask value (MLEN). In the 0.9 design, MLEN=1. 161 * (Section 4.5) 162 */ 163 static inline int vext_elem_mask(void *v0, int index) 164 { 165 int idx = index / 64; 166 int pos = index % 64; 167 return (((uint64_t *)v0)[idx] >> pos) & 1; 168 } 169 170 /* elements operations for load and store */ 171 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 172 uint32_t idx, void *vd, uintptr_t retaddr); 173 174 #define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ 175 static void NAME(CPURISCVState *env, abi_ptr addr, \ 176 uint32_t idx, void *vd, uintptr_t retaddr)\ 177 { \ 178 MTYPE data; \ 179 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 180 data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 181 *cur = data; \ 182 } \ 183 184 GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) 185 GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) 186 GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) 187 GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) 188 GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) 189 GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) 190 GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) 191 GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) 192 GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) 193 GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) 194 GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) 195 GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) 196 GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) 197 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) 198 GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) 199 GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) 200 GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) 201 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) 202 GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) 203 GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) 204 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) 205 GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) 206 207 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 208 static void NAME(CPURISCVState *env, abi_ptr addr, \ 209 uint32_t idx, void *vd, uintptr_t retaddr)\ 210 { \ 211 ETYPE data = *((ETYPE *)vd + H(idx)); \ 212 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 213 } 214 215 GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) 216 GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) 217 GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) 218 GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) 219 GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) 220 GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) 221 GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) 222 GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) 223 GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) 224 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 225 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 226 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 227 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 228 229 /* 230 *** stride: access vector element from strided memory 231 */ 232 static void 233 vext_ldst_stride(void *vd, void *v0, target_ulong base, 234 target_ulong stride, CPURISCVState *env, 235 uint32_t desc, uint32_t vm, 236 vext_ldst_elem_fn *ldst_elem, 237 uint32_t esz, uint32_t msz, uintptr_t ra, 238 MMUAccessType access_type) 239 { 240 uint32_t i, k; 241 uint32_t nf = vext_nf(desc); 242 uint32_t vlmax = vext_maxsz(desc) / esz; 243 244 /* probe every access*/ 245 for (i = 0; i < env->vl; i++) { 246 if (!vm && !vext_elem_mask(v0, i)) { 247 continue; 248 } 249 probe_pages(env, base + stride * i, nf * msz, ra, access_type); 250 } 251 /* do real access */ 252 for (i = 0; i < env->vl; i++) { 253 k = 0; 254 if (!vm && !vext_elem_mask(v0, i)) { 255 continue; 256 } 257 while (k < nf) { 258 target_ulong addr = base + stride * i + k * msz; 259 ldst_elem(env, addr, i + k * vlmax, vd, ra); 260 k++; 261 } 262 } 263 } 264 265 #define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN) \ 266 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 267 target_ulong stride, CPURISCVState *env, \ 268 uint32_t desc) \ 269 { \ 270 uint32_t vm = vext_vm(desc); \ 271 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 272 sizeof(ETYPE), sizeof(MTYPE), \ 273 GETPC(), MMU_DATA_LOAD); \ 274 } 275 276 GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b) 277 GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h) 278 GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w) 279 GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d) 280 GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h) 281 GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w) 282 GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d) 283 GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w) 284 GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d) 285 GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b) 286 GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h) 287 GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w) 288 GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d) 289 GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b) 290 GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h) 291 GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w) 292 GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d) 293 GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h) 294 GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w) 295 GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d) 296 GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w) 297 GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d) 298 299 #define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ 300 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 301 target_ulong stride, CPURISCVState *env, \ 302 uint32_t desc) \ 303 { \ 304 uint32_t vm = vext_vm(desc); \ 305 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 306 sizeof(ETYPE), sizeof(MTYPE), \ 307 GETPC(), MMU_DATA_STORE); \ 308 } 309 310 GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) 311 GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) 312 GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) 313 GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) 314 GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) 315 GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) 316 GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) 317 GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) 318 GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) 319 GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) 320 GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) 321 GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) 322 GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) 323 324 /* 325 *** unit-stride: access elements stored contiguously in memory 326 */ 327 328 /* unmasked unit-stride load and store operation*/ 329 static void 330 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 331 vext_ldst_elem_fn *ldst_elem, uint32_t esz, uint32_t msz, 332 uintptr_t ra, MMUAccessType access_type) 333 { 334 uint32_t i, k; 335 uint32_t nf = vext_nf(desc); 336 uint32_t vlmax = vext_maxsz(desc) / esz; 337 338 /* probe every access */ 339 probe_pages(env, base, env->vl * nf * msz, ra, access_type); 340 /* load bytes from guest memory */ 341 for (i = 0; i < env->vl; i++) { 342 k = 0; 343 while (k < nf) { 344 target_ulong addr = base + (i * nf + k) * msz; 345 ldst_elem(env, addr, i + k * vlmax, vd, ra); 346 k++; 347 } 348 } 349 } 350 351 /* 352 * masked unit-stride load and store operation will be a special case of stride, 353 * stride = NF * sizeof (MTYPE) 354 */ 355 356 #define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN) \ 357 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 358 CPURISCVState *env, uint32_t desc) \ 359 { \ 360 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 361 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 362 sizeof(ETYPE), sizeof(MTYPE), \ 363 GETPC(), MMU_DATA_LOAD); \ 364 } \ 365 \ 366 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 367 CPURISCVState *env, uint32_t desc) \ 368 { \ 369 vext_ldst_us(vd, base, env, desc, LOAD_FN, \ 370 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ 371 } 372 373 GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b) 374 GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h) 375 GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w) 376 GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d) 377 GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h) 378 GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w) 379 GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d) 380 GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w) 381 GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d) 382 GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b) 383 GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h) 384 GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w) 385 GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d) 386 GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b) 387 GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h) 388 GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w) 389 GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d) 390 GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h) 391 GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w) 392 GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d) 393 GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w) 394 GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d) 395 396 #define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ 397 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 398 CPURISCVState *env, uint32_t desc) \ 399 { \ 400 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 401 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 402 sizeof(ETYPE), sizeof(MTYPE), \ 403 GETPC(), MMU_DATA_STORE); \ 404 } \ 405 \ 406 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 407 CPURISCVState *env, uint32_t desc) \ 408 { \ 409 vext_ldst_us(vd, base, env, desc, STORE_FN, \ 410 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ 411 } 412 413 GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) 414 GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) 415 GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) 416 GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) 417 GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) 418 GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) 419 GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) 420 GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) 421 GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) 422 GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) 423 GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) 424 GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) 425 GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) 426 427 /* 428 *** index: access vector element from indexed memory 429 */ 430 typedef target_ulong vext_get_index_addr(target_ulong base, 431 uint32_t idx, void *vs2); 432 433 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 434 static target_ulong NAME(target_ulong base, \ 435 uint32_t idx, void *vs2) \ 436 { \ 437 return (base + *((ETYPE *)vs2 + H(idx))); \ 438 } 439 440 GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) 441 GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) 442 GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) 443 GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) 444 445 static inline void 446 vext_ldst_index(void *vd, void *v0, target_ulong base, 447 void *vs2, CPURISCVState *env, uint32_t desc, 448 vext_get_index_addr get_index_addr, 449 vext_ldst_elem_fn *ldst_elem, 450 uint32_t esz, uint32_t msz, uintptr_t ra, 451 MMUAccessType access_type) 452 { 453 uint32_t i, k; 454 uint32_t nf = vext_nf(desc); 455 uint32_t vm = vext_vm(desc); 456 uint32_t vlmax = vext_maxsz(desc) / esz; 457 458 /* probe every access*/ 459 for (i = 0; i < env->vl; i++) { 460 if (!vm && !vext_elem_mask(v0, i)) { 461 continue; 462 } 463 probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, 464 access_type); 465 } 466 /* load bytes from guest memory */ 467 for (i = 0; i < env->vl; i++) { 468 k = 0; 469 if (!vm && !vext_elem_mask(v0, i)) { 470 continue; 471 } 472 while (k < nf) { 473 abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; 474 ldst_elem(env, addr, i + k * vlmax, vd, ra); 475 k++; 476 } 477 } 478 } 479 480 #define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN) \ 481 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 482 void *vs2, CPURISCVState *env, uint32_t desc) \ 483 { \ 484 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 485 LOAD_FN, sizeof(ETYPE), sizeof(MTYPE), \ 486 GETPC(), MMU_DATA_LOAD); \ 487 } 488 489 GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b) 490 GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h) 491 GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w) 492 GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d) 493 GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h) 494 GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w) 495 GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d) 496 GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w) 497 GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d) 498 GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b) 499 GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h) 500 GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w) 501 GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d) 502 GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b) 503 GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h) 504 GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w) 505 GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d) 506 GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h) 507 GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w) 508 GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d) 509 GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w) 510 GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d) 511 512 #define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ 513 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 514 void *vs2, CPURISCVState *env, uint32_t desc) \ 515 { \ 516 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 517 STORE_FN, sizeof(ETYPE), sizeof(MTYPE), \ 518 GETPC(), MMU_DATA_STORE); \ 519 } 520 521 GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) 522 GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) 523 GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) 524 GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) 525 GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) 526 GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) 527 GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) 528 GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) 529 GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) 530 GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) 531 GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) 532 GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) 533 GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) 534 535 /* 536 *** unit-stride fault-only-fisrt load instructions 537 */ 538 static inline void 539 vext_ldff(void *vd, void *v0, target_ulong base, 540 CPURISCVState *env, uint32_t desc, 541 vext_ldst_elem_fn *ldst_elem, 542 uint32_t esz, uint32_t msz, uintptr_t ra) 543 { 544 void *host; 545 uint32_t i, k, vl = 0; 546 uint32_t nf = vext_nf(desc); 547 uint32_t vm = vext_vm(desc); 548 uint32_t vlmax = vext_maxsz(desc) / esz; 549 target_ulong addr, offset, remain; 550 551 /* probe every access*/ 552 for (i = 0; i < env->vl; i++) { 553 if (!vm && !vext_elem_mask(v0, i)) { 554 continue; 555 } 556 addr = base + nf * i * msz; 557 if (i == 0) { 558 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 559 } else { 560 /* if it triggers an exception, no need to check watchpoint */ 561 remain = nf * msz; 562 while (remain > 0) { 563 offset = -(addr | TARGET_PAGE_MASK); 564 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 565 cpu_mmu_index(env, false)); 566 if (host) { 567 #ifdef CONFIG_USER_ONLY 568 if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { 569 vl = i; 570 goto ProbeSuccess; 571 } 572 #else 573 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 574 #endif 575 } else { 576 vl = i; 577 goto ProbeSuccess; 578 } 579 if (remain <= offset) { 580 break; 581 } 582 remain -= offset; 583 addr += offset; 584 } 585 } 586 } 587 ProbeSuccess: 588 /* load bytes from guest memory */ 589 if (vl != 0) { 590 env->vl = vl; 591 } 592 for (i = 0; i < env->vl; i++) { 593 k = 0; 594 if (!vm && !vext_elem_mask(v0, i)) { 595 continue; 596 } 597 while (k < nf) { 598 target_ulong addr = base + (i * nf + k) * msz; 599 ldst_elem(env, addr, i + k * vlmax, vd, ra); 600 k++; 601 } 602 } 603 } 604 605 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN) \ 606 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 607 CPURISCVState *env, uint32_t desc) \ 608 { \ 609 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \ 610 sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ 611 } 612 613 GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b) 614 GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h) 615 GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w) 616 GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d) 617 GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h) 618 GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w) 619 GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d) 620 GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w) 621 GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d) 622 GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b) 623 GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h) 624 GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w) 625 GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d) 626 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b) 627 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h) 628 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w) 629 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d) 630 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h) 631 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w) 632 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d) 633 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w) 634 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d) 635 636 /* 637 *** Vector AMO Operations (Zvamo) 638 */ 639 typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, 640 uint32_t wd, uint32_t idx, CPURISCVState *env, 641 uintptr_t retaddr); 642 643 /* no atomic opreation for vector atomic insructions */ 644 #define DO_SWAP(N, M) (M) 645 #define DO_AND(N, M) (N & M) 646 #define DO_XOR(N, M) (N ^ M) 647 #define DO_OR(N, M) (N | M) 648 #define DO_ADD(N, M) (N + M) 649 650 #define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ 651 static void \ 652 vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ 653 uint32_t wd, uint32_t idx, \ 654 CPURISCVState *env, uintptr_t retaddr)\ 655 { \ 656 typedef int##ESZ##_t ETYPE; \ 657 typedef int##MSZ##_t MTYPE; \ 658 typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \ 659 ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ 660 MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ 661 \ 662 cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ 663 if (wd) { \ 664 *pe3 = a; \ 665 } \ 666 } 667 668 /* Signed min/max */ 669 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 670 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 671 672 /* Unsigned min/max */ 673 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 674 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 675 676 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) 677 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) 678 GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) 679 GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) 680 GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) 681 GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) 682 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) 683 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) 684 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) 685 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) 686 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) 687 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) 688 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) 689 GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) 690 GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) 691 GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) 692 GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) 693 GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) 694 GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) 695 GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) 696 GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) 697 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) 698 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) 699 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) 700 GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) 701 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) 702 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) 703 704 static inline void 705 vext_amo_noatomic(void *vs3, void *v0, target_ulong base, 706 void *vs2, CPURISCVState *env, uint32_t desc, 707 vext_get_index_addr get_index_addr, 708 vext_amo_noatomic_fn *noatomic_op, 709 uint32_t esz, uint32_t msz, uintptr_t ra) 710 { 711 uint32_t i; 712 target_long addr; 713 uint32_t wd = vext_wd(desc); 714 uint32_t vm = vext_vm(desc); 715 716 for (i = 0; i < env->vl; i++) { 717 if (!vm && !vext_elem_mask(v0, i)) { 718 continue; 719 } 720 probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); 721 probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); 722 } 723 for (i = 0; i < env->vl; i++) { 724 if (!vm && !vext_elem_mask(v0, i)) { 725 continue; 726 } 727 addr = get_index_addr(base, i, vs2); 728 noatomic_op(vs3, addr, wd, i, env, ra); 729 } 730 } 731 732 #define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN) \ 733 void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ 734 void *vs2, CPURISCVState *env, uint32_t desc) \ 735 { \ 736 vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ 737 INDEX_FN, vext_##NAME##_noatomic_op, \ 738 sizeof(ETYPE), sizeof(MTYPE), \ 739 GETPC()); \ 740 } 741 742 GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d) 743 GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d) 744 GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d) 745 GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d) 746 GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d) 747 GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d) 748 GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d) 749 GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d) 750 GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d) 751 GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d) 752 GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d) 753 GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d) 754 GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d) 755 GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d) 756 GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d) 757 GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d) 758 GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d) 759 GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d) 760 GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w) 761 GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w) 762 GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w) 763 GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w) 764 GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w) 765 GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w) 766 GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w) 767 GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w) 768 GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w) 769 770 /* 771 *** Vector Integer Arithmetic Instructions 772 */ 773 774 /* expand macro args before macro */ 775 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 776 777 /* (TD, T1, T2, TX1, TX2) */ 778 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 779 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 780 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 781 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 782 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 783 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 784 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 785 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 786 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 787 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 788 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 789 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 790 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 791 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 792 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 793 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 794 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 795 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 796 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 797 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 798 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 799 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 800 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 801 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 802 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 803 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 804 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 805 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 806 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 807 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 808 809 /* operation of two vector elements */ 810 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 811 812 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 813 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 814 { \ 815 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 816 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 817 *((TD *)vd + HD(i)) = OP(s2, s1); \ 818 } 819 #define DO_SUB(N, M) (N - M) 820 #define DO_RSUB(N, M) (M - N) 821 822 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 823 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 824 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 825 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 826 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 827 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 828 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 829 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 830 831 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 832 CPURISCVState *env, uint32_t desc, 833 uint32_t esz, uint32_t dsz, 834 opivv2_fn *fn) 835 { 836 uint32_t vm = vext_vm(desc); 837 uint32_t vl = env->vl; 838 uint32_t i; 839 840 for (i = 0; i < vl; i++) { 841 if (!vm && !vext_elem_mask(v0, i)) { 842 continue; 843 } 844 fn(vd, vs1, vs2, i); 845 } 846 } 847 848 /* generate the helpers for OPIVV */ 849 #define GEN_VEXT_VV(NAME, ESZ, DSZ) \ 850 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 851 void *vs2, CPURISCVState *env, \ 852 uint32_t desc) \ 853 { \ 854 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 855 do_##NAME); \ 856 } 857 858 GEN_VEXT_VV(vadd_vv_b, 1, 1) 859 GEN_VEXT_VV(vadd_vv_h, 2, 2) 860 GEN_VEXT_VV(vadd_vv_w, 4, 4) 861 GEN_VEXT_VV(vadd_vv_d, 8, 8) 862 GEN_VEXT_VV(vsub_vv_b, 1, 1) 863 GEN_VEXT_VV(vsub_vv_h, 2, 2) 864 GEN_VEXT_VV(vsub_vv_w, 4, 4) 865 GEN_VEXT_VV(vsub_vv_d, 8, 8) 866 867 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 868 869 /* 870 * (T1)s1 gives the real operator type. 871 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 872 */ 873 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 874 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 875 { \ 876 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 877 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 878 } 879 880 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 881 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 882 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 883 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 884 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 885 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 886 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 887 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 888 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 889 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 890 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 891 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 892 893 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 894 CPURISCVState *env, uint32_t desc, 895 uint32_t esz, uint32_t dsz, 896 opivx2_fn fn) 897 { 898 uint32_t vm = vext_vm(desc); 899 uint32_t vl = env->vl; 900 uint32_t i; 901 902 for (i = 0; i < vl; i++) { 903 if (!vm && !vext_elem_mask(v0, i)) { 904 continue; 905 } 906 fn(vd, s1, vs2, i); 907 } 908 } 909 910 /* generate the helpers for OPIVX */ 911 #define GEN_VEXT_VX(NAME, ESZ, DSZ) \ 912 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 913 void *vs2, CPURISCVState *env, \ 914 uint32_t desc) \ 915 { \ 916 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 917 do_##NAME); \ 918 } 919 920 GEN_VEXT_VX(vadd_vx_b, 1, 1) 921 GEN_VEXT_VX(vadd_vx_h, 2, 2) 922 GEN_VEXT_VX(vadd_vx_w, 4, 4) 923 GEN_VEXT_VX(vadd_vx_d, 8, 8) 924 GEN_VEXT_VX(vsub_vx_b, 1, 1) 925 GEN_VEXT_VX(vsub_vx_h, 2, 2) 926 GEN_VEXT_VX(vsub_vx_w, 4, 4) 927 GEN_VEXT_VX(vsub_vx_d, 8, 8) 928 GEN_VEXT_VX(vrsub_vx_b, 1, 1) 929 GEN_VEXT_VX(vrsub_vx_h, 2, 2) 930 GEN_VEXT_VX(vrsub_vx_w, 4, 4) 931 GEN_VEXT_VX(vrsub_vx_d, 8, 8) 932 933 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 934 { 935 intptr_t oprsz = simd_oprsz(desc); 936 intptr_t i; 937 938 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 939 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 940 } 941 } 942 943 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 944 { 945 intptr_t oprsz = simd_oprsz(desc); 946 intptr_t i; 947 948 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 949 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 950 } 951 } 952 953 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 954 { 955 intptr_t oprsz = simd_oprsz(desc); 956 intptr_t i; 957 958 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 959 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 960 } 961 } 962 963 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 964 { 965 intptr_t oprsz = simd_oprsz(desc); 966 intptr_t i; 967 968 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 969 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 970 } 971 } 972 973 /* Vector Widening Integer Add/Subtract */ 974 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 975 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 976 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 977 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 978 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 979 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 980 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 981 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 982 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 983 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 984 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 985 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 986 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 987 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 988 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 989 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 990 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 991 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 992 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 993 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 994 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 995 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 996 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 997 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 998 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 999 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1000 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1001 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1002 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1003 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1004 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1005 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1006 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1007 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1008 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1009 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1010 GEN_VEXT_VV(vwaddu_vv_b, 1, 2) 1011 GEN_VEXT_VV(vwaddu_vv_h, 2, 4) 1012 GEN_VEXT_VV(vwaddu_vv_w, 4, 8) 1013 GEN_VEXT_VV(vwsubu_vv_b, 1, 2) 1014 GEN_VEXT_VV(vwsubu_vv_h, 2, 4) 1015 GEN_VEXT_VV(vwsubu_vv_w, 4, 8) 1016 GEN_VEXT_VV(vwadd_vv_b, 1, 2) 1017 GEN_VEXT_VV(vwadd_vv_h, 2, 4) 1018 GEN_VEXT_VV(vwadd_vv_w, 4, 8) 1019 GEN_VEXT_VV(vwsub_vv_b, 1, 2) 1020 GEN_VEXT_VV(vwsub_vv_h, 2, 4) 1021 GEN_VEXT_VV(vwsub_vv_w, 4, 8) 1022 GEN_VEXT_VV(vwaddu_wv_b, 1, 2) 1023 GEN_VEXT_VV(vwaddu_wv_h, 2, 4) 1024 GEN_VEXT_VV(vwaddu_wv_w, 4, 8) 1025 GEN_VEXT_VV(vwsubu_wv_b, 1, 2) 1026 GEN_VEXT_VV(vwsubu_wv_h, 2, 4) 1027 GEN_VEXT_VV(vwsubu_wv_w, 4, 8) 1028 GEN_VEXT_VV(vwadd_wv_b, 1, 2) 1029 GEN_VEXT_VV(vwadd_wv_h, 2, 4) 1030 GEN_VEXT_VV(vwadd_wv_w, 4, 8) 1031 GEN_VEXT_VV(vwsub_wv_b, 1, 2) 1032 GEN_VEXT_VV(vwsub_wv_h, 2, 4) 1033 GEN_VEXT_VV(vwsub_wv_w, 4, 8) 1034 1035 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1036 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1037 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1038 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1039 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1040 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1041 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1042 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1043 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1044 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1045 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1046 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1047 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1048 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1049 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1050 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1051 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1052 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1053 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1054 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1055 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1056 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1057 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1058 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1059 GEN_VEXT_VX(vwaddu_vx_b, 1, 2) 1060 GEN_VEXT_VX(vwaddu_vx_h, 2, 4) 1061 GEN_VEXT_VX(vwaddu_vx_w, 4, 8) 1062 GEN_VEXT_VX(vwsubu_vx_b, 1, 2) 1063 GEN_VEXT_VX(vwsubu_vx_h, 2, 4) 1064 GEN_VEXT_VX(vwsubu_vx_w, 4, 8) 1065 GEN_VEXT_VX(vwadd_vx_b, 1, 2) 1066 GEN_VEXT_VX(vwadd_vx_h, 2, 4) 1067 GEN_VEXT_VX(vwadd_vx_w, 4, 8) 1068 GEN_VEXT_VX(vwsub_vx_b, 1, 2) 1069 GEN_VEXT_VX(vwsub_vx_h, 2, 4) 1070 GEN_VEXT_VX(vwsub_vx_w, 4, 8) 1071 GEN_VEXT_VX(vwaddu_wx_b, 1, 2) 1072 GEN_VEXT_VX(vwaddu_wx_h, 2, 4) 1073 GEN_VEXT_VX(vwaddu_wx_w, 4, 8) 1074 GEN_VEXT_VX(vwsubu_wx_b, 1, 2) 1075 GEN_VEXT_VX(vwsubu_wx_h, 2, 4) 1076 GEN_VEXT_VX(vwsubu_wx_w, 4, 8) 1077 GEN_VEXT_VX(vwadd_wx_b, 1, 2) 1078 GEN_VEXT_VX(vwadd_wx_h, 2, 4) 1079 GEN_VEXT_VX(vwadd_wx_w, 4, 8) 1080 GEN_VEXT_VX(vwsub_wx_b, 1, 2) 1081 GEN_VEXT_VX(vwsub_wx_h, 2, 4) 1082 GEN_VEXT_VX(vwsub_wx_w, 4, 8) 1083 1084 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1085 #define DO_VADC(N, M, C) (N + M + C) 1086 #define DO_VSBC(N, M, C) (N - M - C) 1087 1088 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \ 1089 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1090 CPURISCVState *env, uint32_t desc) \ 1091 { \ 1092 uint32_t vl = env->vl; \ 1093 uint32_t i; \ 1094 \ 1095 for (i = 0; i < vl; i++) { \ 1096 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1097 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1098 uint8_t carry = vext_elem_mask(v0, i); \ 1099 \ 1100 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1101 } \ 1102 } 1103 1104 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC) 1105 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC) 1106 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC) 1107 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC) 1108 1109 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC) 1110 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC) 1111 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC) 1112 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC) 1113 1114 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \ 1115 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1116 CPURISCVState *env, uint32_t desc) \ 1117 { \ 1118 uint32_t vl = env->vl; \ 1119 uint32_t i; \ 1120 \ 1121 for (i = 0; i < vl; i++) { \ 1122 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1123 uint8_t carry = vext_elem_mask(v0, i); \ 1124 \ 1125 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1126 } \ 1127 } 1128 1129 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC) 1130 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC) 1131 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC) 1132 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC) 1133 1134 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC) 1135 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC) 1136 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC) 1137 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC) 1138 1139 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1140 (__typeof(N))(N + M) < N) 1141 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1142 1143 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1144 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1145 CPURISCVState *env, uint32_t desc) \ 1146 { \ 1147 uint32_t vl = env->vl; \ 1148 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1149 uint32_t i; \ 1150 \ 1151 for (i = 0; i < vl; i++) { \ 1152 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1153 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1154 uint8_t carry = vext_elem_mask(v0, i); \ 1155 \ 1156 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \ 1157 } \ 1158 for (; i < vlmax; i++) { \ 1159 vext_set_elem_mask(vd, i, 0); \ 1160 } \ 1161 } 1162 1163 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1164 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1165 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1166 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1167 1168 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1169 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1170 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1171 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1172 1173 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1174 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1175 void *vs2, CPURISCVState *env, uint32_t desc) \ 1176 { \ 1177 uint32_t vl = env->vl; \ 1178 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1179 uint32_t i; \ 1180 \ 1181 for (i = 0; i < vl; i++) { \ 1182 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1183 uint8_t carry = vext_elem_mask(v0, i); \ 1184 \ 1185 vext_set_elem_mask(vd, i, \ 1186 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1187 } \ 1188 for (; i < vlmax; i++) { \ 1189 vext_set_elem_mask(vd, i, 0); \ 1190 } \ 1191 } 1192 1193 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1194 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1195 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1196 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1197 1198 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1199 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1200 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1201 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1202 1203 /* Vector Bitwise Logical Instructions */ 1204 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1205 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1206 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1207 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1208 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1209 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1210 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1211 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1212 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1213 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1214 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1215 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1216 GEN_VEXT_VV(vand_vv_b, 1, 1) 1217 GEN_VEXT_VV(vand_vv_h, 2, 2) 1218 GEN_VEXT_VV(vand_vv_w, 4, 4) 1219 GEN_VEXT_VV(vand_vv_d, 8, 8) 1220 GEN_VEXT_VV(vor_vv_b, 1, 1) 1221 GEN_VEXT_VV(vor_vv_h, 2, 2) 1222 GEN_VEXT_VV(vor_vv_w, 4, 4) 1223 GEN_VEXT_VV(vor_vv_d, 8, 8) 1224 GEN_VEXT_VV(vxor_vv_b, 1, 1) 1225 GEN_VEXT_VV(vxor_vv_h, 2, 2) 1226 GEN_VEXT_VV(vxor_vv_w, 4, 4) 1227 GEN_VEXT_VV(vxor_vv_d, 8, 8) 1228 1229 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1230 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1231 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1232 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1233 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1234 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1235 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1236 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1237 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1238 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1239 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1240 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1241 GEN_VEXT_VX(vand_vx_b, 1, 1) 1242 GEN_VEXT_VX(vand_vx_h, 2, 2) 1243 GEN_VEXT_VX(vand_vx_w, 4, 4) 1244 GEN_VEXT_VX(vand_vx_d, 8, 8) 1245 GEN_VEXT_VX(vor_vx_b, 1, 1) 1246 GEN_VEXT_VX(vor_vx_h, 2, 2) 1247 GEN_VEXT_VX(vor_vx_w, 4, 4) 1248 GEN_VEXT_VX(vor_vx_d, 8, 8) 1249 GEN_VEXT_VX(vxor_vx_b, 1, 1) 1250 GEN_VEXT_VX(vxor_vx_h, 2, 2) 1251 GEN_VEXT_VX(vxor_vx_w, 4, 4) 1252 GEN_VEXT_VX(vxor_vx_d, 8, 8) 1253 1254 /* Vector Single-Width Bit Shift Instructions */ 1255 #define DO_SLL(N, M) (N << (M)) 1256 #define DO_SRL(N, M) (N >> (M)) 1257 1258 /* generate the helpers for shift instructions with two vector operators */ 1259 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \ 1260 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1261 void *vs2, CPURISCVState *env, uint32_t desc) \ 1262 { \ 1263 uint32_t vm = vext_vm(desc); \ 1264 uint32_t vl = env->vl; \ 1265 uint32_t i; \ 1266 \ 1267 for (i = 0; i < vl; i++) { \ 1268 if (!vm && !vext_elem_mask(v0, i)) { \ 1269 continue; \ 1270 } \ 1271 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1272 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1273 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1274 } \ 1275 } 1276 1277 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7) 1278 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf) 1279 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f) 1280 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f) 1281 1282 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1283 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1284 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1285 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1286 1287 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7) 1288 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf) 1289 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1290 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1291 1292 /* generate the helpers for shift instructions with one vector and one scalar */ 1293 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \ 1294 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1295 void *vs2, CPURISCVState *env, uint32_t desc) \ 1296 { \ 1297 uint32_t vm = vext_vm(desc); \ 1298 uint32_t vl = env->vl; \ 1299 uint32_t i; \ 1300 \ 1301 for (i = 0; i < vl; i++) { \ 1302 if (!vm && !vext_elem_mask(v0, i)) { \ 1303 continue; \ 1304 } \ 1305 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1306 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1307 } \ 1308 } 1309 1310 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7) 1311 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf) 1312 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f) 1313 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f) 1314 1315 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7) 1316 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf) 1317 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f) 1318 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f) 1319 1320 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7) 1321 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf) 1322 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f) 1323 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f) 1324 1325 /* Vector Narrowing Integer Right Shift Instructions */ 1326 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1327 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1328 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1329 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf) 1330 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1331 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1332 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf) 1333 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f) 1334 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f) 1335 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf) 1336 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f) 1337 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f) 1338 1339 /* Vector Integer Comparison Instructions */ 1340 #define DO_MSEQ(N, M) (N == M) 1341 #define DO_MSNE(N, M) (N != M) 1342 #define DO_MSLT(N, M) (N < M) 1343 #define DO_MSLE(N, M) (N <= M) 1344 #define DO_MSGT(N, M) (N > M) 1345 1346 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1347 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1348 CPURISCVState *env, uint32_t desc) \ 1349 { \ 1350 uint32_t vm = vext_vm(desc); \ 1351 uint32_t vl = env->vl; \ 1352 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1353 uint32_t i; \ 1354 \ 1355 for (i = 0; i < vl; i++) { \ 1356 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1357 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1358 if (!vm && !vext_elem_mask(v0, i)) { \ 1359 continue; \ 1360 } \ 1361 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \ 1362 } \ 1363 for (; i < vlmax; i++) { \ 1364 vext_set_elem_mask(vd, i, 0); \ 1365 } \ 1366 } 1367 1368 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1369 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1370 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1371 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1372 1373 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1374 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1375 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1376 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1377 1378 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1379 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1380 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1381 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1382 1383 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1384 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1385 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1386 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1387 1388 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1389 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1390 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1391 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1392 1393 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1394 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1395 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1396 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1397 1398 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1399 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1400 CPURISCVState *env, uint32_t desc) \ 1401 { \ 1402 uint32_t vm = vext_vm(desc); \ 1403 uint32_t vl = env->vl; \ 1404 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1405 uint32_t i; \ 1406 \ 1407 for (i = 0; i < vl; i++) { \ 1408 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1409 if (!vm && !vext_elem_mask(v0, i)) { \ 1410 continue; \ 1411 } \ 1412 vext_set_elem_mask(vd, i, \ 1413 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1414 } \ 1415 for (; i < vlmax; i++) { \ 1416 vext_set_elem_mask(vd, i, 0); \ 1417 } \ 1418 } 1419 1420 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1421 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1422 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1423 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1424 1425 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1426 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1427 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1428 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1429 1430 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1431 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1432 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1433 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1434 1435 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1436 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1437 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1438 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1439 1440 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1441 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1442 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1443 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1444 1445 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1446 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1447 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1448 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1449 1450 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1451 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1452 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1453 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1454 1455 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1456 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1457 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1458 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1459 1460 /* Vector Integer Min/Max Instructions */ 1461 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1462 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1463 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1464 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1465 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1466 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1467 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1468 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1469 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1470 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1471 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1472 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1473 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1474 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1475 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1476 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1477 GEN_VEXT_VV(vminu_vv_b, 1, 1) 1478 GEN_VEXT_VV(vminu_vv_h, 2, 2) 1479 GEN_VEXT_VV(vminu_vv_w, 4, 4) 1480 GEN_VEXT_VV(vminu_vv_d, 8, 8) 1481 GEN_VEXT_VV(vmin_vv_b, 1, 1) 1482 GEN_VEXT_VV(vmin_vv_h, 2, 2) 1483 GEN_VEXT_VV(vmin_vv_w, 4, 4) 1484 GEN_VEXT_VV(vmin_vv_d, 8, 8) 1485 GEN_VEXT_VV(vmaxu_vv_b, 1, 1) 1486 GEN_VEXT_VV(vmaxu_vv_h, 2, 2) 1487 GEN_VEXT_VV(vmaxu_vv_w, 4, 4) 1488 GEN_VEXT_VV(vmaxu_vv_d, 8, 8) 1489 GEN_VEXT_VV(vmax_vv_b, 1, 1) 1490 GEN_VEXT_VV(vmax_vv_h, 2, 2) 1491 GEN_VEXT_VV(vmax_vv_w, 4, 4) 1492 GEN_VEXT_VV(vmax_vv_d, 8, 8) 1493 1494 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1495 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1496 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1497 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1498 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1499 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1500 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1501 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1502 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1503 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1504 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1505 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1506 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1507 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1508 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1509 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1510 GEN_VEXT_VX(vminu_vx_b, 1, 1) 1511 GEN_VEXT_VX(vminu_vx_h, 2, 2) 1512 GEN_VEXT_VX(vminu_vx_w, 4, 4) 1513 GEN_VEXT_VX(vminu_vx_d, 8, 8) 1514 GEN_VEXT_VX(vmin_vx_b, 1, 1) 1515 GEN_VEXT_VX(vmin_vx_h, 2, 2) 1516 GEN_VEXT_VX(vmin_vx_w, 4, 4) 1517 GEN_VEXT_VX(vmin_vx_d, 8, 8) 1518 GEN_VEXT_VX(vmaxu_vx_b, 1, 1) 1519 GEN_VEXT_VX(vmaxu_vx_h, 2, 2) 1520 GEN_VEXT_VX(vmaxu_vx_w, 4, 4) 1521 GEN_VEXT_VX(vmaxu_vx_d, 8, 8) 1522 GEN_VEXT_VX(vmax_vx_b, 1, 1) 1523 GEN_VEXT_VX(vmax_vx_h, 2, 2) 1524 GEN_VEXT_VX(vmax_vx_w, 4, 4) 1525 GEN_VEXT_VX(vmax_vx_d, 8, 8) 1526 1527 /* Vector Single-Width Integer Multiply Instructions */ 1528 #define DO_MUL(N, M) (N * M) 1529 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1530 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1531 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1532 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1533 GEN_VEXT_VV(vmul_vv_b, 1, 1) 1534 GEN_VEXT_VV(vmul_vv_h, 2, 2) 1535 GEN_VEXT_VV(vmul_vv_w, 4, 4) 1536 GEN_VEXT_VV(vmul_vv_d, 8, 8) 1537 1538 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1539 { 1540 return (int16_t)s2 * (int16_t)s1 >> 8; 1541 } 1542 1543 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1544 { 1545 return (int32_t)s2 * (int32_t)s1 >> 16; 1546 } 1547 1548 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1549 { 1550 return (int64_t)s2 * (int64_t)s1 >> 32; 1551 } 1552 1553 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1554 { 1555 uint64_t hi_64, lo_64; 1556 1557 muls64(&lo_64, &hi_64, s1, s2); 1558 return hi_64; 1559 } 1560 1561 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1562 { 1563 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1564 } 1565 1566 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1567 { 1568 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1569 } 1570 1571 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1572 { 1573 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1574 } 1575 1576 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1577 { 1578 uint64_t hi_64, lo_64; 1579 1580 mulu64(&lo_64, &hi_64, s2, s1); 1581 return hi_64; 1582 } 1583 1584 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1585 { 1586 return (int16_t)s2 * (uint16_t)s1 >> 8; 1587 } 1588 1589 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1590 { 1591 return (int32_t)s2 * (uint32_t)s1 >> 16; 1592 } 1593 1594 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1595 { 1596 return (int64_t)s2 * (uint64_t)s1 >> 32; 1597 } 1598 1599 /* 1600 * Let A = signed operand, 1601 * B = unsigned operand 1602 * P = mulu64(A, B), unsigned product 1603 * 1604 * LET X = 2 ** 64 - A, 2's complement of A 1605 * SP = signed product 1606 * THEN 1607 * IF A < 0 1608 * SP = -X * B 1609 * = -(2 ** 64 - A) * B 1610 * = A * B - 2 ** 64 * B 1611 * = P - 2 ** 64 * B 1612 * ELSE 1613 * SP = P 1614 * THEN 1615 * HI_P -= (A < 0 ? B : 0) 1616 */ 1617 1618 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1619 { 1620 uint64_t hi_64, lo_64; 1621 1622 mulu64(&lo_64, &hi_64, s2, s1); 1623 1624 hi_64 -= s2 < 0 ? s1 : 0; 1625 return hi_64; 1626 } 1627 1628 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1629 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1630 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1631 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1632 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1633 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1634 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1635 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1636 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1637 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1638 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1639 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1640 GEN_VEXT_VV(vmulh_vv_b, 1, 1) 1641 GEN_VEXT_VV(vmulh_vv_h, 2, 2) 1642 GEN_VEXT_VV(vmulh_vv_w, 4, 4) 1643 GEN_VEXT_VV(vmulh_vv_d, 8, 8) 1644 GEN_VEXT_VV(vmulhu_vv_b, 1, 1) 1645 GEN_VEXT_VV(vmulhu_vv_h, 2, 2) 1646 GEN_VEXT_VV(vmulhu_vv_w, 4, 4) 1647 GEN_VEXT_VV(vmulhu_vv_d, 8, 8) 1648 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1) 1649 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2) 1650 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4) 1651 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8) 1652 1653 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1654 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1655 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1656 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1657 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1658 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1659 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1660 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1661 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1662 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1663 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1664 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1665 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1666 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1667 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1668 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1669 GEN_VEXT_VX(vmul_vx_b, 1, 1) 1670 GEN_VEXT_VX(vmul_vx_h, 2, 2) 1671 GEN_VEXT_VX(vmul_vx_w, 4, 4) 1672 GEN_VEXT_VX(vmul_vx_d, 8, 8) 1673 GEN_VEXT_VX(vmulh_vx_b, 1, 1) 1674 GEN_VEXT_VX(vmulh_vx_h, 2, 2) 1675 GEN_VEXT_VX(vmulh_vx_w, 4, 4) 1676 GEN_VEXT_VX(vmulh_vx_d, 8, 8) 1677 GEN_VEXT_VX(vmulhu_vx_b, 1, 1) 1678 GEN_VEXT_VX(vmulhu_vx_h, 2, 2) 1679 GEN_VEXT_VX(vmulhu_vx_w, 4, 4) 1680 GEN_VEXT_VX(vmulhu_vx_d, 8, 8) 1681 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1) 1682 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2) 1683 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4) 1684 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8) 1685 1686 /* Vector Integer Divide Instructions */ 1687 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1688 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1689 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1690 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1691 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1692 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1693 1694 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1695 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1696 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1697 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1698 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1699 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1700 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1701 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1702 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1703 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1704 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1705 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1706 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1707 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1708 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1709 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1710 GEN_VEXT_VV(vdivu_vv_b, 1, 1) 1711 GEN_VEXT_VV(vdivu_vv_h, 2, 2) 1712 GEN_VEXT_VV(vdivu_vv_w, 4, 4) 1713 GEN_VEXT_VV(vdivu_vv_d, 8, 8) 1714 GEN_VEXT_VV(vdiv_vv_b, 1, 1) 1715 GEN_VEXT_VV(vdiv_vv_h, 2, 2) 1716 GEN_VEXT_VV(vdiv_vv_w, 4, 4) 1717 GEN_VEXT_VV(vdiv_vv_d, 8, 8) 1718 GEN_VEXT_VV(vremu_vv_b, 1, 1) 1719 GEN_VEXT_VV(vremu_vv_h, 2, 2) 1720 GEN_VEXT_VV(vremu_vv_w, 4, 4) 1721 GEN_VEXT_VV(vremu_vv_d, 8, 8) 1722 GEN_VEXT_VV(vrem_vv_b, 1, 1) 1723 GEN_VEXT_VV(vrem_vv_h, 2, 2) 1724 GEN_VEXT_VV(vrem_vv_w, 4, 4) 1725 GEN_VEXT_VV(vrem_vv_d, 8, 8) 1726 1727 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1728 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1729 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1730 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1731 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1732 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1733 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1734 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1735 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1736 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1737 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1738 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1739 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1740 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1741 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1742 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1743 GEN_VEXT_VX(vdivu_vx_b, 1, 1) 1744 GEN_VEXT_VX(vdivu_vx_h, 2, 2) 1745 GEN_VEXT_VX(vdivu_vx_w, 4, 4) 1746 GEN_VEXT_VX(vdivu_vx_d, 8, 8) 1747 GEN_VEXT_VX(vdiv_vx_b, 1, 1) 1748 GEN_VEXT_VX(vdiv_vx_h, 2, 2) 1749 GEN_VEXT_VX(vdiv_vx_w, 4, 4) 1750 GEN_VEXT_VX(vdiv_vx_d, 8, 8) 1751 GEN_VEXT_VX(vremu_vx_b, 1, 1) 1752 GEN_VEXT_VX(vremu_vx_h, 2, 2) 1753 GEN_VEXT_VX(vremu_vx_w, 4, 4) 1754 GEN_VEXT_VX(vremu_vx_d, 8, 8) 1755 GEN_VEXT_VX(vrem_vx_b, 1, 1) 1756 GEN_VEXT_VX(vrem_vx_h, 2, 2) 1757 GEN_VEXT_VX(vrem_vx_w, 4, 4) 1758 GEN_VEXT_VX(vrem_vx_d, 8, 8) 1759 1760 /* Vector Widening Integer Multiply Instructions */ 1761 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1762 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1763 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1764 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1765 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1766 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1767 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1768 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1769 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1770 GEN_VEXT_VV(vwmul_vv_b, 1, 2) 1771 GEN_VEXT_VV(vwmul_vv_h, 2, 4) 1772 GEN_VEXT_VV(vwmul_vv_w, 4, 8) 1773 GEN_VEXT_VV(vwmulu_vv_b, 1, 2) 1774 GEN_VEXT_VV(vwmulu_vv_h, 2, 4) 1775 GEN_VEXT_VV(vwmulu_vv_w, 4, 8) 1776 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2) 1777 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4) 1778 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8) 1779 1780 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1781 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1782 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1783 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1784 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1785 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1786 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1787 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1788 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1789 GEN_VEXT_VX(vwmul_vx_b, 1, 2) 1790 GEN_VEXT_VX(vwmul_vx_h, 2, 4) 1791 GEN_VEXT_VX(vwmul_vx_w, 4, 8) 1792 GEN_VEXT_VX(vwmulu_vx_b, 1, 2) 1793 GEN_VEXT_VX(vwmulu_vx_h, 2, 4) 1794 GEN_VEXT_VX(vwmulu_vx_w, 4, 8) 1795 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2) 1796 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4) 1797 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8) 1798 1799 /* Vector Single-Width Integer Multiply-Add Instructions */ 1800 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1801 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1802 { \ 1803 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1804 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1805 TD d = *((TD *)vd + HD(i)); \ 1806 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1807 } 1808 1809 #define DO_MACC(N, M, D) (M * N + D) 1810 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1811 #define DO_MADD(N, M, D) (M * D + N) 1812 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1813 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1814 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1815 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1816 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1817 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1818 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1819 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1820 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1821 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1822 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1823 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1824 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1825 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1826 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1827 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1828 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1829 GEN_VEXT_VV(vmacc_vv_b, 1, 1) 1830 GEN_VEXT_VV(vmacc_vv_h, 2, 2) 1831 GEN_VEXT_VV(vmacc_vv_w, 4, 4) 1832 GEN_VEXT_VV(vmacc_vv_d, 8, 8) 1833 GEN_VEXT_VV(vnmsac_vv_b, 1, 1) 1834 GEN_VEXT_VV(vnmsac_vv_h, 2, 2) 1835 GEN_VEXT_VV(vnmsac_vv_w, 4, 4) 1836 GEN_VEXT_VV(vnmsac_vv_d, 8, 8) 1837 GEN_VEXT_VV(vmadd_vv_b, 1, 1) 1838 GEN_VEXT_VV(vmadd_vv_h, 2, 2) 1839 GEN_VEXT_VV(vmadd_vv_w, 4, 4) 1840 GEN_VEXT_VV(vmadd_vv_d, 8, 8) 1841 GEN_VEXT_VV(vnmsub_vv_b, 1, 1) 1842 GEN_VEXT_VV(vnmsub_vv_h, 2, 2) 1843 GEN_VEXT_VV(vnmsub_vv_w, 4, 4) 1844 GEN_VEXT_VV(vnmsub_vv_d, 8, 8) 1845 1846 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1847 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1848 { \ 1849 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1850 TD d = *((TD *)vd + HD(i)); \ 1851 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1852 } 1853 1854 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1855 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1856 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1857 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1858 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1859 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1860 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1861 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1862 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1863 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1864 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1865 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1866 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1867 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1868 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1869 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1870 GEN_VEXT_VX(vmacc_vx_b, 1, 1) 1871 GEN_VEXT_VX(vmacc_vx_h, 2, 2) 1872 GEN_VEXT_VX(vmacc_vx_w, 4, 4) 1873 GEN_VEXT_VX(vmacc_vx_d, 8, 8) 1874 GEN_VEXT_VX(vnmsac_vx_b, 1, 1) 1875 GEN_VEXT_VX(vnmsac_vx_h, 2, 2) 1876 GEN_VEXT_VX(vnmsac_vx_w, 4, 4) 1877 GEN_VEXT_VX(vnmsac_vx_d, 8, 8) 1878 GEN_VEXT_VX(vmadd_vx_b, 1, 1) 1879 GEN_VEXT_VX(vmadd_vx_h, 2, 2) 1880 GEN_VEXT_VX(vmadd_vx_w, 4, 4) 1881 GEN_VEXT_VX(vmadd_vx_d, 8, 8) 1882 GEN_VEXT_VX(vnmsub_vx_b, 1, 1) 1883 GEN_VEXT_VX(vnmsub_vx_h, 2, 2) 1884 GEN_VEXT_VX(vnmsub_vx_w, 4, 4) 1885 GEN_VEXT_VX(vnmsub_vx_d, 8, 8) 1886 1887 /* Vector Widening Integer Multiply-Add Instructions */ 1888 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1889 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1890 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1891 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1892 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1893 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1894 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1895 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1896 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 1897 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2) 1898 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4) 1899 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8) 1900 GEN_VEXT_VV(vwmacc_vv_b, 1, 2) 1901 GEN_VEXT_VV(vwmacc_vv_h, 2, 4) 1902 GEN_VEXT_VV(vwmacc_vv_w, 4, 8) 1903 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2) 1904 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4) 1905 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8) 1906 1907 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 1908 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 1909 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 1910 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 1911 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 1912 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 1913 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 1914 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 1915 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 1916 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 1917 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 1918 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 1919 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2) 1920 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4) 1921 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8) 1922 GEN_VEXT_VX(vwmacc_vx_b, 1, 2) 1923 GEN_VEXT_VX(vwmacc_vx_h, 2, 4) 1924 GEN_VEXT_VX(vwmacc_vx_w, 4, 8) 1925 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2) 1926 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4) 1927 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8) 1928 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2) 1929 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4) 1930 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8) 1931 1932 /* Vector Integer Merge and Move Instructions */ 1933 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \ 1934 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 1935 uint32_t desc) \ 1936 { \ 1937 uint32_t vl = env->vl; \ 1938 uint32_t i; \ 1939 \ 1940 for (i = 0; i < vl; i++) { \ 1941 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1942 *((ETYPE *)vd + H(i)) = s1; \ 1943 } \ 1944 } 1945 1946 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1) 1947 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2) 1948 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4) 1949 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8) 1950 1951 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \ 1952 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 1953 uint32_t desc) \ 1954 { \ 1955 uint32_t vl = env->vl; \ 1956 uint32_t i; \ 1957 \ 1958 for (i = 0; i < vl; i++) { \ 1959 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 1960 } \ 1961 } 1962 1963 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1) 1964 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2) 1965 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4) 1966 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8) 1967 1968 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \ 1969 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1970 CPURISCVState *env, uint32_t desc) \ 1971 { \ 1972 uint32_t vl = env->vl; \ 1973 uint32_t i; \ 1974 \ 1975 for (i = 0; i < vl; i++) { \ 1976 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \ 1977 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 1978 } \ 1979 } 1980 1981 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1) 1982 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2) 1983 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4) 1984 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8) 1985 1986 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \ 1987 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1988 void *vs2, CPURISCVState *env, uint32_t desc) \ 1989 { \ 1990 uint32_t vl = env->vl; \ 1991 uint32_t i; \ 1992 \ 1993 for (i = 0; i < vl; i++) { \ 1994 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1995 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \ 1996 (ETYPE)(target_long)s1); \ 1997 *((ETYPE *)vd + H(i)) = d; \ 1998 } \ 1999 } 2000 2001 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1) 2002 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2) 2003 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4) 2004 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8) 2005 2006 /* 2007 *** Vector Fixed-Point Arithmetic Instructions 2008 */ 2009 2010 /* Vector Single-Width Saturating Add and Subtract */ 2011 2012 /* 2013 * As fixed point instructions probably have round mode and saturation, 2014 * define common macros for fixed point here. 2015 */ 2016 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2017 CPURISCVState *env, int vxrm); 2018 2019 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2020 static inline void \ 2021 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2022 CPURISCVState *env, int vxrm) \ 2023 { \ 2024 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2025 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2026 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2027 } 2028 2029 static inline void 2030 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2031 CPURISCVState *env, 2032 uint32_t vl, uint32_t vm, int vxrm, 2033 opivv2_rm_fn *fn) 2034 { 2035 for (uint32_t i = 0; i < vl; i++) { 2036 if (!vm && !vext_elem_mask(v0, i)) { 2037 continue; 2038 } 2039 fn(vd, vs1, vs2, i, env, vxrm); 2040 } 2041 } 2042 2043 static inline void 2044 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2045 CPURISCVState *env, 2046 uint32_t desc, uint32_t esz, uint32_t dsz, 2047 opivv2_rm_fn *fn) 2048 { 2049 uint32_t vm = vext_vm(desc); 2050 uint32_t vl = env->vl; 2051 2052 switch (env->vxrm) { 2053 case 0: /* rnu */ 2054 vext_vv_rm_1(vd, v0, vs1, vs2, 2055 env, vl, vm, 0, fn); 2056 break; 2057 case 1: /* rne */ 2058 vext_vv_rm_1(vd, v0, vs1, vs2, 2059 env, vl, vm, 1, fn); 2060 break; 2061 case 2: /* rdn */ 2062 vext_vv_rm_1(vd, v0, vs1, vs2, 2063 env, vl, vm, 2, fn); 2064 break; 2065 default: /* rod */ 2066 vext_vv_rm_1(vd, v0, vs1, vs2, 2067 env, vl, vm, 3, fn); 2068 break; 2069 } 2070 } 2071 2072 /* generate helpers for fixed point instructions with OPIVV format */ 2073 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ) \ 2074 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2075 CPURISCVState *env, uint32_t desc) \ 2076 { \ 2077 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 2078 do_##NAME); \ 2079 } 2080 2081 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2082 { 2083 uint8_t res = a + b; 2084 if (res < a) { 2085 res = UINT8_MAX; 2086 env->vxsat = 0x1; 2087 } 2088 return res; 2089 } 2090 2091 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2092 uint16_t b) 2093 { 2094 uint16_t res = a + b; 2095 if (res < a) { 2096 res = UINT16_MAX; 2097 env->vxsat = 0x1; 2098 } 2099 return res; 2100 } 2101 2102 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2103 uint32_t b) 2104 { 2105 uint32_t res = a + b; 2106 if (res < a) { 2107 res = UINT32_MAX; 2108 env->vxsat = 0x1; 2109 } 2110 return res; 2111 } 2112 2113 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2114 uint64_t b) 2115 { 2116 uint64_t res = a + b; 2117 if (res < a) { 2118 res = UINT64_MAX; 2119 env->vxsat = 0x1; 2120 } 2121 return res; 2122 } 2123 2124 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2125 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2126 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2127 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2128 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1) 2129 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2) 2130 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4) 2131 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8) 2132 2133 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2134 CPURISCVState *env, int vxrm); 2135 2136 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2137 static inline void \ 2138 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2139 CPURISCVState *env, int vxrm) \ 2140 { \ 2141 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2142 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2143 } 2144 2145 static inline void 2146 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2147 CPURISCVState *env, 2148 uint32_t vl, uint32_t vm, int vxrm, 2149 opivx2_rm_fn *fn) 2150 { 2151 for (uint32_t i = 0; i < vl; i++) { 2152 if (!vm && !vext_elem_mask(v0, i)) { 2153 continue; 2154 } 2155 fn(vd, s1, vs2, i, env, vxrm); 2156 } 2157 } 2158 2159 static inline void 2160 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2161 CPURISCVState *env, 2162 uint32_t desc, uint32_t esz, uint32_t dsz, 2163 opivx2_rm_fn *fn) 2164 { 2165 uint32_t vm = vext_vm(desc); 2166 uint32_t vl = env->vl; 2167 2168 switch (env->vxrm) { 2169 case 0: /* rnu */ 2170 vext_vx_rm_1(vd, v0, s1, vs2, 2171 env, vl, vm, 0, fn); 2172 break; 2173 case 1: /* rne */ 2174 vext_vx_rm_1(vd, v0, s1, vs2, 2175 env, vl, vm, 1, fn); 2176 break; 2177 case 2: /* rdn */ 2178 vext_vx_rm_1(vd, v0, s1, vs2, 2179 env, vl, vm, 2, fn); 2180 break; 2181 default: /* rod */ 2182 vext_vx_rm_1(vd, v0, s1, vs2, 2183 env, vl, vm, 3, fn); 2184 break; 2185 } 2186 } 2187 2188 /* generate helpers for fixed point instructions with OPIVX format */ 2189 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ) \ 2190 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2191 void *vs2, CPURISCVState *env, uint32_t desc) \ 2192 { \ 2193 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2194 do_##NAME); \ 2195 } 2196 2197 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2198 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2199 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2200 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2201 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1) 2202 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2) 2203 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4) 2204 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8) 2205 2206 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2207 { 2208 int8_t res = a + b; 2209 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2210 res = a > 0 ? INT8_MAX : INT8_MIN; 2211 env->vxsat = 0x1; 2212 } 2213 return res; 2214 } 2215 2216 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2217 { 2218 int16_t res = a + b; 2219 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2220 res = a > 0 ? INT16_MAX : INT16_MIN; 2221 env->vxsat = 0x1; 2222 } 2223 return res; 2224 } 2225 2226 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2227 { 2228 int32_t res = a + b; 2229 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2230 res = a > 0 ? INT32_MAX : INT32_MIN; 2231 env->vxsat = 0x1; 2232 } 2233 return res; 2234 } 2235 2236 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2237 { 2238 int64_t res = a + b; 2239 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2240 res = a > 0 ? INT64_MAX : INT64_MIN; 2241 env->vxsat = 0x1; 2242 } 2243 return res; 2244 } 2245 2246 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2247 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2248 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2249 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2250 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1) 2251 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2) 2252 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4) 2253 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8) 2254 2255 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2256 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2257 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2258 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2259 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1) 2260 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2) 2261 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4) 2262 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8) 2263 2264 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2265 { 2266 uint8_t res = a - b; 2267 if (res > a) { 2268 res = 0; 2269 env->vxsat = 0x1; 2270 } 2271 return res; 2272 } 2273 2274 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2275 uint16_t b) 2276 { 2277 uint16_t res = a - b; 2278 if (res > a) { 2279 res = 0; 2280 env->vxsat = 0x1; 2281 } 2282 return res; 2283 } 2284 2285 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2286 uint32_t b) 2287 { 2288 uint32_t res = a - b; 2289 if (res > a) { 2290 res = 0; 2291 env->vxsat = 0x1; 2292 } 2293 return res; 2294 } 2295 2296 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2297 uint64_t b) 2298 { 2299 uint64_t res = a - b; 2300 if (res > a) { 2301 res = 0; 2302 env->vxsat = 0x1; 2303 } 2304 return res; 2305 } 2306 2307 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2308 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2309 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2310 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2311 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1) 2312 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2) 2313 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4) 2314 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8) 2315 2316 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2317 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2318 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2319 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2320 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1) 2321 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2) 2322 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4) 2323 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8) 2324 2325 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2326 { 2327 int8_t res = a - b; 2328 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2329 res = a >= 0 ? INT8_MAX : INT8_MIN; 2330 env->vxsat = 0x1; 2331 } 2332 return res; 2333 } 2334 2335 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2336 { 2337 int16_t res = a - b; 2338 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2339 res = a >= 0 ? INT16_MAX : INT16_MIN; 2340 env->vxsat = 0x1; 2341 } 2342 return res; 2343 } 2344 2345 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2346 { 2347 int32_t res = a - b; 2348 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2349 res = a >= 0 ? INT32_MAX : INT32_MIN; 2350 env->vxsat = 0x1; 2351 } 2352 return res; 2353 } 2354 2355 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2356 { 2357 int64_t res = a - b; 2358 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2359 res = a >= 0 ? INT64_MAX : INT64_MIN; 2360 env->vxsat = 0x1; 2361 } 2362 return res; 2363 } 2364 2365 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2366 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2367 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2368 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2369 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1) 2370 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2) 2371 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4) 2372 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8) 2373 2374 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2375 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2376 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2377 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2378 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1) 2379 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2) 2380 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4) 2381 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8) 2382 2383 /* Vector Single-Width Averaging Add and Subtract */ 2384 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2385 { 2386 uint8_t d = extract64(v, shift, 1); 2387 uint8_t d1; 2388 uint64_t D1, D2; 2389 2390 if (shift == 0 || shift > 64) { 2391 return 0; 2392 } 2393 2394 d1 = extract64(v, shift - 1, 1); 2395 D1 = extract64(v, 0, shift); 2396 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2397 return d1; 2398 } else if (vxrm == 1) { /* round-to-nearest-even */ 2399 if (shift > 1) { 2400 D2 = extract64(v, 0, shift - 1); 2401 return d1 & ((D2 != 0) | d); 2402 } else { 2403 return d1 & d; 2404 } 2405 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2406 return !d & (D1 != 0); 2407 } 2408 return 0; /* round-down (truncate) */ 2409 } 2410 2411 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2412 { 2413 int64_t res = (int64_t)a + b; 2414 uint8_t round = get_round(vxrm, res, 1); 2415 2416 return (res >> 1) + round; 2417 } 2418 2419 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2420 { 2421 int64_t res = a + b; 2422 uint8_t round = get_round(vxrm, res, 1); 2423 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2424 2425 /* With signed overflow, bit 64 is inverse of bit 63. */ 2426 return ((res >> 1) ^ over) + round; 2427 } 2428 2429 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2430 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2431 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2432 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2433 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1) 2434 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2) 2435 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4) 2436 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8) 2437 2438 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2439 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2440 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2441 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2442 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1) 2443 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2) 2444 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4) 2445 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8) 2446 2447 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2448 { 2449 int64_t res = (int64_t)a - b; 2450 uint8_t round = get_round(vxrm, res, 1); 2451 2452 return (res >> 1) + round; 2453 } 2454 2455 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2456 { 2457 int64_t res = (int64_t)a - b; 2458 uint8_t round = get_round(vxrm, res, 1); 2459 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2460 2461 /* With signed overflow, bit 64 is inverse of bit 63. */ 2462 return ((res >> 1) ^ over) + round; 2463 } 2464 2465 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2466 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2467 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2468 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2469 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1) 2470 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2) 2471 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4) 2472 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8) 2473 2474 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2475 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2476 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2477 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2478 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1) 2479 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2) 2480 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4) 2481 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8) 2482 2483 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2484 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2485 { 2486 uint8_t round; 2487 int16_t res; 2488 2489 res = (int16_t)a * (int16_t)b; 2490 round = get_round(vxrm, res, 7); 2491 res = (res >> 7) + round; 2492 2493 if (res > INT8_MAX) { 2494 env->vxsat = 0x1; 2495 return INT8_MAX; 2496 } else if (res < INT8_MIN) { 2497 env->vxsat = 0x1; 2498 return INT8_MIN; 2499 } else { 2500 return res; 2501 } 2502 } 2503 2504 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2505 { 2506 uint8_t round; 2507 int32_t res; 2508 2509 res = (int32_t)a * (int32_t)b; 2510 round = get_round(vxrm, res, 15); 2511 res = (res >> 15) + round; 2512 2513 if (res > INT16_MAX) { 2514 env->vxsat = 0x1; 2515 return INT16_MAX; 2516 } else if (res < INT16_MIN) { 2517 env->vxsat = 0x1; 2518 return INT16_MIN; 2519 } else { 2520 return res; 2521 } 2522 } 2523 2524 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2525 { 2526 uint8_t round; 2527 int64_t res; 2528 2529 res = (int64_t)a * (int64_t)b; 2530 round = get_round(vxrm, res, 31); 2531 res = (res >> 31) + round; 2532 2533 if (res > INT32_MAX) { 2534 env->vxsat = 0x1; 2535 return INT32_MAX; 2536 } else if (res < INT32_MIN) { 2537 env->vxsat = 0x1; 2538 return INT32_MIN; 2539 } else { 2540 return res; 2541 } 2542 } 2543 2544 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2545 { 2546 uint8_t round; 2547 uint64_t hi_64, lo_64; 2548 int64_t res; 2549 2550 if (a == INT64_MIN && b == INT64_MIN) { 2551 env->vxsat = 1; 2552 return INT64_MAX; 2553 } 2554 2555 muls64(&lo_64, &hi_64, a, b); 2556 round = get_round(vxrm, lo_64, 63); 2557 /* 2558 * Cannot overflow, as there are always 2559 * 2 sign bits after multiply. 2560 */ 2561 res = (hi_64 << 1) | (lo_64 >> 63); 2562 if (round) { 2563 if (res == INT64_MAX) { 2564 env->vxsat = 1; 2565 } else { 2566 res += 1; 2567 } 2568 } 2569 return res; 2570 } 2571 2572 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2573 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2574 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2575 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2576 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1) 2577 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2) 2578 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4) 2579 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8) 2580 2581 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2582 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2583 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2584 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2585 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1) 2586 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2) 2587 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4) 2588 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8) 2589 2590 /* Vector Widening Saturating Scaled Multiply-Add */ 2591 static inline uint16_t 2592 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2593 uint16_t c) 2594 { 2595 uint8_t round; 2596 uint16_t res = (uint16_t)a * b; 2597 2598 round = get_round(vxrm, res, 4); 2599 res = (res >> 4) + round; 2600 return saddu16(env, vxrm, c, res); 2601 } 2602 2603 static inline uint32_t 2604 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2605 uint32_t c) 2606 { 2607 uint8_t round; 2608 uint32_t res = (uint32_t)a * b; 2609 2610 round = get_round(vxrm, res, 8); 2611 res = (res >> 8) + round; 2612 return saddu32(env, vxrm, c, res); 2613 } 2614 2615 static inline uint64_t 2616 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2617 uint64_t c) 2618 { 2619 uint8_t round; 2620 uint64_t res = (uint64_t)a * b; 2621 2622 round = get_round(vxrm, res, 16); 2623 res = (res >> 16) + round; 2624 return saddu64(env, vxrm, c, res); 2625 } 2626 2627 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2628 static inline void \ 2629 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2630 CPURISCVState *env, int vxrm) \ 2631 { \ 2632 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2633 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2634 TD d = *((TD *)vd + HD(i)); \ 2635 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2636 } 2637 2638 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2639 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2640 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2641 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2) 2642 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4) 2643 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8) 2644 2645 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2646 static inline void \ 2647 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2648 CPURISCVState *env, int vxrm) \ 2649 { \ 2650 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2651 TD d = *((TD *)vd + HD(i)); \ 2652 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2653 } 2654 2655 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2656 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2657 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2658 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2) 2659 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4) 2660 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8) 2661 2662 static inline int16_t 2663 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2664 { 2665 uint8_t round; 2666 int16_t res = (int16_t)a * b; 2667 2668 round = get_round(vxrm, res, 4); 2669 res = (res >> 4) + round; 2670 return sadd16(env, vxrm, c, res); 2671 } 2672 2673 static inline int32_t 2674 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2675 { 2676 uint8_t round; 2677 int32_t res = (int32_t)a * b; 2678 2679 round = get_round(vxrm, res, 8); 2680 res = (res >> 8) + round; 2681 return sadd32(env, vxrm, c, res); 2682 2683 } 2684 2685 static inline int64_t 2686 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2687 { 2688 uint8_t round; 2689 int64_t res = (int64_t)a * b; 2690 2691 round = get_round(vxrm, res, 16); 2692 res = (res >> 16) + round; 2693 return sadd64(env, vxrm, c, res); 2694 } 2695 2696 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2697 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2698 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2699 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2) 2700 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4) 2701 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8) 2702 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2703 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2704 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2705 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2) 2706 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4) 2707 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8) 2708 2709 static inline int16_t 2710 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2711 { 2712 uint8_t round; 2713 int16_t res = a * (int16_t)b; 2714 2715 round = get_round(vxrm, res, 4); 2716 res = (res >> 4) + round; 2717 return ssub16(env, vxrm, c, res); 2718 } 2719 2720 static inline int32_t 2721 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2722 { 2723 uint8_t round; 2724 int32_t res = a * (int32_t)b; 2725 2726 round = get_round(vxrm, res, 8); 2727 res = (res >> 8) + round; 2728 return ssub32(env, vxrm, c, res); 2729 } 2730 2731 static inline int64_t 2732 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2733 { 2734 uint8_t round; 2735 int64_t res = a * (int64_t)b; 2736 2737 round = get_round(vxrm, res, 16); 2738 res = (res >> 16) + round; 2739 return ssub64(env, vxrm, c, res); 2740 } 2741 2742 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2743 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2744 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2745 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2) 2746 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4) 2747 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8) 2748 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2749 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2750 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2751 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2) 2752 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4) 2753 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8) 2754 2755 static inline int16_t 2756 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2757 { 2758 uint8_t round; 2759 int16_t res = (int16_t)a * b; 2760 2761 round = get_round(vxrm, res, 4); 2762 res = (res >> 4) + round; 2763 return ssub16(env, vxrm, c, res); 2764 } 2765 2766 static inline int32_t 2767 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2768 { 2769 uint8_t round; 2770 int32_t res = (int32_t)a * b; 2771 2772 round = get_round(vxrm, res, 8); 2773 res = (res >> 8) + round; 2774 return ssub32(env, vxrm, c, res); 2775 } 2776 2777 static inline int64_t 2778 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2779 { 2780 uint8_t round; 2781 int64_t res = (int64_t)a * b; 2782 2783 round = get_round(vxrm, res, 16); 2784 res = (res >> 16) + round; 2785 return ssub64(env, vxrm, c, res); 2786 } 2787 2788 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2789 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2790 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2791 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2) 2792 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4) 2793 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8) 2794 2795 /* Vector Single-Width Scaling Shift Instructions */ 2796 static inline uint8_t 2797 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2798 { 2799 uint8_t round, shift = b & 0x7; 2800 uint8_t res; 2801 2802 round = get_round(vxrm, a, shift); 2803 res = (a >> shift) + round; 2804 return res; 2805 } 2806 static inline uint16_t 2807 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2808 { 2809 uint8_t round, shift = b & 0xf; 2810 uint16_t res; 2811 2812 round = get_round(vxrm, a, shift); 2813 res = (a >> shift) + round; 2814 return res; 2815 } 2816 static inline uint32_t 2817 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2818 { 2819 uint8_t round, shift = b & 0x1f; 2820 uint32_t res; 2821 2822 round = get_round(vxrm, a, shift); 2823 res = (a >> shift) + round; 2824 return res; 2825 } 2826 static inline uint64_t 2827 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2828 { 2829 uint8_t round, shift = b & 0x3f; 2830 uint64_t res; 2831 2832 round = get_round(vxrm, a, shift); 2833 res = (a >> shift) + round; 2834 return res; 2835 } 2836 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2837 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2838 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2839 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2840 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1) 2841 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2) 2842 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4) 2843 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8) 2844 2845 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2846 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2847 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2848 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2849 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1) 2850 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2) 2851 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4) 2852 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8) 2853 2854 static inline int8_t 2855 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2856 { 2857 uint8_t round, shift = b & 0x7; 2858 int8_t res; 2859 2860 round = get_round(vxrm, a, shift); 2861 res = (a >> shift) + round; 2862 return res; 2863 } 2864 static inline int16_t 2865 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2866 { 2867 uint8_t round, shift = b & 0xf; 2868 int16_t res; 2869 2870 round = get_round(vxrm, a, shift); 2871 res = (a >> shift) + round; 2872 return res; 2873 } 2874 static inline int32_t 2875 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2876 { 2877 uint8_t round, shift = b & 0x1f; 2878 int32_t res; 2879 2880 round = get_round(vxrm, a, shift); 2881 res = (a >> shift) + round; 2882 return res; 2883 } 2884 static inline int64_t 2885 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2886 { 2887 uint8_t round, shift = b & 0x3f; 2888 int64_t res; 2889 2890 round = get_round(vxrm, a, shift); 2891 res = (a >> shift) + round; 2892 return res; 2893 } 2894 2895 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 2896 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 2897 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 2898 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 2899 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1) 2900 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2) 2901 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4) 2902 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8) 2903 2904 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 2905 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 2906 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 2907 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 2908 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1) 2909 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2) 2910 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4) 2911 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8) 2912 2913 /* Vector Narrowing Fixed-Point Clip Instructions */ 2914 static inline int8_t 2915 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 2916 { 2917 uint8_t round, shift = b & 0xf; 2918 int16_t res; 2919 2920 round = get_round(vxrm, a, shift); 2921 res = (a >> shift) + round; 2922 if (res > INT8_MAX) { 2923 env->vxsat = 0x1; 2924 return INT8_MAX; 2925 } else if (res < INT8_MIN) { 2926 env->vxsat = 0x1; 2927 return INT8_MIN; 2928 } else { 2929 return res; 2930 } 2931 } 2932 2933 static inline int16_t 2934 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 2935 { 2936 uint8_t round, shift = b & 0x1f; 2937 int32_t res; 2938 2939 round = get_round(vxrm, a, shift); 2940 res = (a >> shift) + round; 2941 if (res > INT16_MAX) { 2942 env->vxsat = 0x1; 2943 return INT16_MAX; 2944 } else if (res < INT16_MIN) { 2945 env->vxsat = 0x1; 2946 return INT16_MIN; 2947 } else { 2948 return res; 2949 } 2950 } 2951 2952 static inline int32_t 2953 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 2954 { 2955 uint8_t round, shift = b & 0x3f; 2956 int64_t res; 2957 2958 round = get_round(vxrm, a, shift); 2959 res = (a >> shift) + round; 2960 if (res > INT32_MAX) { 2961 env->vxsat = 0x1; 2962 return INT32_MAX; 2963 } else if (res < INT32_MIN) { 2964 env->vxsat = 0x1; 2965 return INT32_MIN; 2966 } else { 2967 return res; 2968 } 2969 } 2970 2971 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 2972 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 2973 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 2974 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1) 2975 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2) 2976 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4) 2977 2978 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 2979 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 2980 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 2981 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1) 2982 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2) 2983 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4) 2984 2985 static inline uint8_t 2986 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 2987 { 2988 uint8_t round, shift = b & 0xf; 2989 uint16_t res; 2990 2991 round = get_round(vxrm, a, shift); 2992 res = (a >> shift) + round; 2993 if (res > UINT8_MAX) { 2994 env->vxsat = 0x1; 2995 return UINT8_MAX; 2996 } else { 2997 return res; 2998 } 2999 } 3000 3001 static inline uint16_t 3002 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3003 { 3004 uint8_t round, shift = b & 0x1f; 3005 uint32_t res; 3006 3007 round = get_round(vxrm, a, shift); 3008 res = (a >> shift) + round; 3009 if (res > UINT16_MAX) { 3010 env->vxsat = 0x1; 3011 return UINT16_MAX; 3012 } else { 3013 return res; 3014 } 3015 } 3016 3017 static inline uint32_t 3018 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3019 { 3020 uint8_t round, shift = b & 0x3f; 3021 int64_t res; 3022 3023 round = get_round(vxrm, a, shift); 3024 res = (a >> shift) + round; 3025 if (res > UINT32_MAX) { 3026 env->vxsat = 0x1; 3027 return UINT32_MAX; 3028 } else { 3029 return res; 3030 } 3031 } 3032 3033 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3034 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3035 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3036 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1) 3037 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2) 3038 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4) 3039 3040 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 3041 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 3042 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 3043 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1) 3044 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2) 3045 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4) 3046 3047 /* 3048 *** Vector Float Point Arithmetic Instructions 3049 */ 3050 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3051 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3052 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3053 CPURISCVState *env) \ 3054 { \ 3055 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3056 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3057 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3058 } 3059 3060 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ) \ 3061 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3062 void *vs2, CPURISCVState *env, \ 3063 uint32_t desc) \ 3064 { \ 3065 uint32_t vm = vext_vm(desc); \ 3066 uint32_t vl = env->vl; \ 3067 uint32_t i; \ 3068 \ 3069 for (i = 0; i < vl; i++) { \ 3070 if (!vm && !vext_elem_mask(v0, i)) { \ 3071 continue; \ 3072 } \ 3073 do_##NAME(vd, vs1, vs2, i, env); \ 3074 } \ 3075 } 3076 3077 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3078 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3079 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3080 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2) 3081 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4) 3082 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8) 3083 3084 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3085 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3086 CPURISCVState *env) \ 3087 { \ 3088 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3089 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3090 } 3091 3092 #define GEN_VEXT_VF(NAME, ESZ, DSZ) \ 3093 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3094 void *vs2, CPURISCVState *env, \ 3095 uint32_t desc) \ 3096 { \ 3097 uint32_t vm = vext_vm(desc); \ 3098 uint32_t vl = env->vl; \ 3099 uint32_t i; \ 3100 \ 3101 for (i = 0; i < vl; i++) { \ 3102 if (!vm && !vext_elem_mask(v0, i)) { \ 3103 continue; \ 3104 } \ 3105 do_##NAME(vd, s1, vs2, i, env); \ 3106 } \ 3107 } 3108 3109 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3110 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3111 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3112 GEN_VEXT_VF(vfadd_vf_h, 2, 2) 3113 GEN_VEXT_VF(vfadd_vf_w, 4, 4) 3114 GEN_VEXT_VF(vfadd_vf_d, 8, 8) 3115 3116 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3117 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3118 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3119 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2) 3120 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4) 3121 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8) 3122 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3123 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3124 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3125 GEN_VEXT_VF(vfsub_vf_h, 2, 2) 3126 GEN_VEXT_VF(vfsub_vf_w, 4, 4) 3127 GEN_VEXT_VF(vfsub_vf_d, 8, 8) 3128 3129 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3130 { 3131 return float16_sub(b, a, s); 3132 } 3133 3134 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3135 { 3136 return float32_sub(b, a, s); 3137 } 3138 3139 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3140 { 3141 return float64_sub(b, a, s); 3142 } 3143 3144 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3145 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3146 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3147 GEN_VEXT_VF(vfrsub_vf_h, 2, 2) 3148 GEN_VEXT_VF(vfrsub_vf_w, 4, 4) 3149 GEN_VEXT_VF(vfrsub_vf_d, 8, 8) 3150 3151 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3152 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3153 { 3154 return float32_add(float16_to_float32(a, true, s), 3155 float16_to_float32(b, true, s), s); 3156 } 3157 3158 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3159 { 3160 return float64_add(float32_to_float64(a, s), 3161 float32_to_float64(b, s), s); 3162 3163 } 3164 3165 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3166 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3167 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4) 3168 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8) 3169 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3170 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3171 GEN_VEXT_VF(vfwadd_vf_h, 2, 4) 3172 GEN_VEXT_VF(vfwadd_vf_w, 4, 8) 3173 3174 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3175 { 3176 return float32_sub(float16_to_float32(a, true, s), 3177 float16_to_float32(b, true, s), s); 3178 } 3179 3180 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3181 { 3182 return float64_sub(float32_to_float64(a, s), 3183 float32_to_float64(b, s), s); 3184 3185 } 3186 3187 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3188 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3189 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4) 3190 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8) 3191 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3192 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3193 GEN_VEXT_VF(vfwsub_vf_h, 2, 4) 3194 GEN_VEXT_VF(vfwsub_vf_w, 4, 8) 3195 3196 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3197 { 3198 return float32_add(a, float16_to_float32(b, true, s), s); 3199 } 3200 3201 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3202 { 3203 return float64_add(a, float32_to_float64(b, s), s); 3204 } 3205 3206 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3207 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3208 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4) 3209 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8) 3210 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3211 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3212 GEN_VEXT_VF(vfwadd_wf_h, 2, 4) 3213 GEN_VEXT_VF(vfwadd_wf_w, 4, 8) 3214 3215 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3216 { 3217 return float32_sub(a, float16_to_float32(b, true, s), s); 3218 } 3219 3220 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3221 { 3222 return float64_sub(a, float32_to_float64(b, s), s); 3223 } 3224 3225 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3226 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3227 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4) 3228 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8) 3229 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3230 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3231 GEN_VEXT_VF(vfwsub_wf_h, 2, 4) 3232 GEN_VEXT_VF(vfwsub_wf_w, 4, 8) 3233 3234 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3235 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3236 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3237 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3238 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2) 3239 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4) 3240 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8) 3241 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3242 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3243 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3244 GEN_VEXT_VF(vfmul_vf_h, 2, 2) 3245 GEN_VEXT_VF(vfmul_vf_w, 4, 4) 3246 GEN_VEXT_VF(vfmul_vf_d, 8, 8) 3247 3248 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3249 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3250 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3251 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2) 3252 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4) 3253 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8) 3254 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3255 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3256 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3257 GEN_VEXT_VF(vfdiv_vf_h, 2, 2) 3258 GEN_VEXT_VF(vfdiv_vf_w, 4, 4) 3259 GEN_VEXT_VF(vfdiv_vf_d, 8, 8) 3260 3261 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3262 { 3263 return float16_div(b, a, s); 3264 } 3265 3266 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3267 { 3268 return float32_div(b, a, s); 3269 } 3270 3271 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3272 { 3273 return float64_div(b, a, s); 3274 } 3275 3276 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3277 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3278 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3279 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2) 3280 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4) 3281 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8) 3282 3283 /* Vector Widening Floating-Point Multiply */ 3284 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3285 { 3286 return float32_mul(float16_to_float32(a, true, s), 3287 float16_to_float32(b, true, s), s); 3288 } 3289 3290 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3291 { 3292 return float64_mul(float32_to_float64(a, s), 3293 float32_to_float64(b, s), s); 3294 3295 } 3296 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3297 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3298 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4) 3299 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8) 3300 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3301 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3302 GEN_VEXT_VF(vfwmul_vf_h, 2, 4) 3303 GEN_VEXT_VF(vfwmul_vf_w, 4, 8) 3304 3305 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3306 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3307 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3308 CPURISCVState *env) \ 3309 { \ 3310 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3311 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3312 TD d = *((TD *)vd + HD(i)); \ 3313 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3314 } 3315 3316 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3317 { 3318 return float16_muladd(a, b, d, 0, s); 3319 } 3320 3321 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3322 { 3323 return float32_muladd(a, b, d, 0, s); 3324 } 3325 3326 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3327 { 3328 return float64_muladd(a, b, d, 0, s); 3329 } 3330 3331 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3332 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3333 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3334 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2) 3335 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4) 3336 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8) 3337 3338 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3339 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3340 CPURISCVState *env) \ 3341 { \ 3342 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3343 TD d = *((TD *)vd + HD(i)); \ 3344 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3345 } 3346 3347 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3348 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3349 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3350 GEN_VEXT_VF(vfmacc_vf_h, 2, 2) 3351 GEN_VEXT_VF(vfmacc_vf_w, 4, 4) 3352 GEN_VEXT_VF(vfmacc_vf_d, 8, 8) 3353 3354 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3355 { 3356 return float16_muladd(a, b, d, 3357 float_muladd_negate_c | float_muladd_negate_product, s); 3358 } 3359 3360 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3361 { 3362 return float32_muladd(a, b, d, 3363 float_muladd_negate_c | float_muladd_negate_product, s); 3364 } 3365 3366 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3367 { 3368 return float64_muladd(a, b, d, 3369 float_muladd_negate_c | float_muladd_negate_product, s); 3370 } 3371 3372 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3373 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3374 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3375 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2) 3376 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4) 3377 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8) 3378 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3379 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3380 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3381 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2) 3382 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4) 3383 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8) 3384 3385 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3386 { 3387 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3388 } 3389 3390 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3391 { 3392 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3393 } 3394 3395 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3396 { 3397 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3398 } 3399 3400 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3401 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3402 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3403 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2) 3404 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4) 3405 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8) 3406 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3407 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3408 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3409 GEN_VEXT_VF(vfmsac_vf_h, 2, 2) 3410 GEN_VEXT_VF(vfmsac_vf_w, 4, 4) 3411 GEN_VEXT_VF(vfmsac_vf_d, 8, 8) 3412 3413 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3414 { 3415 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3416 } 3417 3418 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3419 { 3420 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3421 } 3422 3423 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3424 { 3425 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3426 } 3427 3428 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3429 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3430 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3431 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2) 3432 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4) 3433 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8) 3434 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3435 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3436 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3437 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2) 3438 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4) 3439 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8) 3440 3441 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3442 { 3443 return float16_muladd(d, b, a, 0, s); 3444 } 3445 3446 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3447 { 3448 return float32_muladd(d, b, a, 0, s); 3449 } 3450 3451 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3452 { 3453 return float64_muladd(d, b, a, 0, s); 3454 } 3455 3456 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3457 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3458 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3459 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2) 3460 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4) 3461 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8) 3462 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3463 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3464 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3465 GEN_VEXT_VF(vfmadd_vf_h, 2, 2) 3466 GEN_VEXT_VF(vfmadd_vf_w, 4, 4) 3467 GEN_VEXT_VF(vfmadd_vf_d, 8, 8) 3468 3469 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3470 { 3471 return float16_muladd(d, b, a, 3472 float_muladd_negate_c | float_muladd_negate_product, s); 3473 } 3474 3475 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3476 { 3477 return float32_muladd(d, b, a, 3478 float_muladd_negate_c | float_muladd_negate_product, s); 3479 } 3480 3481 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3482 { 3483 return float64_muladd(d, b, a, 3484 float_muladd_negate_c | float_muladd_negate_product, s); 3485 } 3486 3487 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3488 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3489 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3490 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2) 3491 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4) 3492 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8) 3493 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3494 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3495 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3496 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2) 3497 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4) 3498 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8) 3499 3500 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3501 { 3502 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3503 } 3504 3505 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3506 { 3507 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3508 } 3509 3510 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3511 { 3512 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3513 } 3514 3515 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3516 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3517 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3518 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2) 3519 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4) 3520 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8) 3521 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3522 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3523 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3524 GEN_VEXT_VF(vfmsub_vf_h, 2, 2) 3525 GEN_VEXT_VF(vfmsub_vf_w, 4, 4) 3526 GEN_VEXT_VF(vfmsub_vf_d, 8, 8) 3527 3528 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3529 { 3530 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3531 } 3532 3533 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3534 { 3535 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3536 } 3537 3538 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3539 { 3540 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3541 } 3542 3543 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3544 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3545 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3546 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2) 3547 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4) 3548 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8) 3549 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3550 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3551 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3552 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2) 3553 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4) 3554 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8) 3555 3556 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3557 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3558 { 3559 return float32_muladd(float16_to_float32(a, true, s), 3560 float16_to_float32(b, true, s), d, 0, s); 3561 } 3562 3563 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3564 { 3565 return float64_muladd(float32_to_float64(a, s), 3566 float32_to_float64(b, s), d, 0, s); 3567 } 3568 3569 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3570 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3571 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4) 3572 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8) 3573 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3574 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3575 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4) 3576 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8) 3577 3578 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3579 { 3580 return float32_muladd(float16_to_float32(a, true, s), 3581 float16_to_float32(b, true, s), d, 3582 float_muladd_negate_c | float_muladd_negate_product, s); 3583 } 3584 3585 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3586 { 3587 return float64_muladd(float32_to_float64(a, s), 3588 float32_to_float64(b, s), d, 3589 float_muladd_negate_c | float_muladd_negate_product, s); 3590 } 3591 3592 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3593 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3594 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4) 3595 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8) 3596 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3597 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3598 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4) 3599 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8) 3600 3601 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3602 { 3603 return float32_muladd(float16_to_float32(a, true, s), 3604 float16_to_float32(b, true, s), d, 3605 float_muladd_negate_c, s); 3606 } 3607 3608 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3609 { 3610 return float64_muladd(float32_to_float64(a, s), 3611 float32_to_float64(b, s), d, 3612 float_muladd_negate_c, s); 3613 } 3614 3615 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3616 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3617 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4) 3618 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8) 3619 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3620 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3621 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4) 3622 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8) 3623 3624 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3625 { 3626 return float32_muladd(float16_to_float32(a, true, s), 3627 float16_to_float32(b, true, s), d, 3628 float_muladd_negate_product, s); 3629 } 3630 3631 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3632 { 3633 return float64_muladd(float32_to_float64(a, s), 3634 float32_to_float64(b, s), d, 3635 float_muladd_negate_product, s); 3636 } 3637 3638 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3639 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3640 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4) 3641 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8) 3642 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3643 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3644 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4) 3645 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8) 3646 3647 /* Vector Floating-Point Square-Root Instruction */ 3648 /* (TD, T2, TX2) */ 3649 #define OP_UU_H uint16_t, uint16_t, uint16_t 3650 #define OP_UU_W uint32_t, uint32_t, uint32_t 3651 #define OP_UU_D uint64_t, uint64_t, uint64_t 3652 3653 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3654 static void do_##NAME(void *vd, void *vs2, int i, \ 3655 CPURISCVState *env) \ 3656 { \ 3657 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3658 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3659 } 3660 3661 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ) \ 3662 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3663 CPURISCVState *env, uint32_t desc) \ 3664 { \ 3665 uint32_t vm = vext_vm(desc); \ 3666 uint32_t vl = env->vl; \ 3667 uint32_t i; \ 3668 \ 3669 if (vl == 0) { \ 3670 return; \ 3671 } \ 3672 for (i = 0; i < vl; i++) { \ 3673 if (!vm && !vext_elem_mask(v0, i)) { \ 3674 continue; \ 3675 } \ 3676 do_##NAME(vd, vs2, i, env); \ 3677 } \ 3678 } 3679 3680 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3681 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3682 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3683 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2) 3684 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4) 3685 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8) 3686 3687 /* Vector Floating-Point MIN/MAX Instructions */ 3688 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3689 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3690 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3691 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2) 3692 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4) 3693 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8) 3694 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3695 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3696 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3697 GEN_VEXT_VF(vfmin_vf_h, 2, 2) 3698 GEN_VEXT_VF(vfmin_vf_w, 4, 4) 3699 GEN_VEXT_VF(vfmin_vf_d, 8, 8) 3700 3701 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3702 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3703 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3704 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2) 3705 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4) 3706 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8) 3707 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3708 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3709 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3710 GEN_VEXT_VF(vfmax_vf_h, 2, 2) 3711 GEN_VEXT_VF(vfmax_vf_w, 4, 4) 3712 GEN_VEXT_VF(vfmax_vf_d, 8, 8) 3713 3714 /* Vector Floating-Point Sign-Injection Instructions */ 3715 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3716 { 3717 return deposit64(b, 0, 15, a); 3718 } 3719 3720 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3721 { 3722 return deposit64(b, 0, 31, a); 3723 } 3724 3725 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3726 { 3727 return deposit64(b, 0, 63, a); 3728 } 3729 3730 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3731 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3732 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3733 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2) 3734 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4) 3735 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8) 3736 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3737 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3738 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3739 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2) 3740 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4) 3741 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8) 3742 3743 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3744 { 3745 return deposit64(~b, 0, 15, a); 3746 } 3747 3748 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3749 { 3750 return deposit64(~b, 0, 31, a); 3751 } 3752 3753 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3754 { 3755 return deposit64(~b, 0, 63, a); 3756 } 3757 3758 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3759 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3760 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3761 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2) 3762 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4) 3763 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8) 3764 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3765 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3766 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3767 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2) 3768 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4) 3769 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8) 3770 3771 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3772 { 3773 return deposit64(b ^ a, 0, 15, a); 3774 } 3775 3776 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3777 { 3778 return deposit64(b ^ a, 0, 31, a); 3779 } 3780 3781 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3782 { 3783 return deposit64(b ^ a, 0, 63, a); 3784 } 3785 3786 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3787 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3788 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3789 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2) 3790 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4) 3791 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8) 3792 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3793 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3794 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3795 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2) 3796 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4) 3797 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8) 3798 3799 /* Vector Floating-Point Compare Instructions */ 3800 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3801 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3802 CPURISCVState *env, uint32_t desc) \ 3803 { \ 3804 uint32_t vm = vext_vm(desc); \ 3805 uint32_t vl = env->vl; \ 3806 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3807 uint32_t i; \ 3808 \ 3809 for (i = 0; i < vl; i++) { \ 3810 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3811 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3812 if (!vm && !vext_elem_mask(v0, i)) { \ 3813 continue; \ 3814 } \ 3815 vext_set_elem_mask(vd, i, \ 3816 DO_OP(s2, s1, &env->fp_status)); \ 3817 } \ 3818 for (; i < vlmax; i++) { \ 3819 vext_set_elem_mask(vd, i, 0); \ 3820 } \ 3821 } 3822 3823 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3824 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3825 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3826 3827 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3828 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3829 CPURISCVState *env, uint32_t desc) \ 3830 { \ 3831 uint32_t vm = vext_vm(desc); \ 3832 uint32_t vl = env->vl; \ 3833 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3834 uint32_t i; \ 3835 \ 3836 for (i = 0; i < vl; i++) { \ 3837 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3838 if (!vm && !vext_elem_mask(v0, i)) { \ 3839 continue; \ 3840 } \ 3841 vext_set_elem_mask(vd, i, \ 3842 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3843 } \ 3844 for (; i < vlmax; i++) { \ 3845 vext_set_elem_mask(vd, i, 0); \ 3846 } \ 3847 } 3848 3849 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3850 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3851 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3852 3853 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3854 { 3855 FloatRelation compare = float16_compare_quiet(a, b, s); 3856 return compare != float_relation_equal; 3857 } 3858 3859 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 3860 { 3861 FloatRelation compare = float32_compare_quiet(a, b, s); 3862 return compare != float_relation_equal; 3863 } 3864 3865 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 3866 { 3867 FloatRelation compare = float64_compare_quiet(a, b, s); 3868 return compare != float_relation_equal; 3869 } 3870 3871 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 3872 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 3873 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 3874 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 3875 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 3876 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 3877 3878 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 3879 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 3880 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 3881 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 3882 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 3883 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 3884 3885 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 3886 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 3887 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 3888 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 3889 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 3890 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 3891 3892 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 3893 { 3894 FloatRelation compare = float16_compare(a, b, s); 3895 return compare == float_relation_greater; 3896 } 3897 3898 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 3899 { 3900 FloatRelation compare = float32_compare(a, b, s); 3901 return compare == float_relation_greater; 3902 } 3903 3904 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 3905 { 3906 FloatRelation compare = float64_compare(a, b, s); 3907 return compare == float_relation_greater; 3908 } 3909 3910 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 3911 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 3912 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 3913 3914 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 3915 { 3916 FloatRelation compare = float16_compare(a, b, s); 3917 return compare == float_relation_greater || 3918 compare == float_relation_equal; 3919 } 3920 3921 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 3922 { 3923 FloatRelation compare = float32_compare(a, b, s); 3924 return compare == float_relation_greater || 3925 compare == float_relation_equal; 3926 } 3927 3928 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 3929 { 3930 FloatRelation compare = float64_compare(a, b, s); 3931 return compare == float_relation_greater || 3932 compare == float_relation_equal; 3933 } 3934 3935 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 3936 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 3937 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 3938 3939 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 3940 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 3941 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 3942 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 3943 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 3944 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 3945 3946 /* Vector Floating-Point Classify Instruction */ 3947 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3948 static void do_##NAME(void *vd, void *vs2, int i) \ 3949 { \ 3950 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3951 *((TD *)vd + HD(i)) = OP(s2); \ 3952 } 3953 3954 #define GEN_VEXT_V(NAME, ESZ, DSZ) \ 3955 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3956 CPURISCVState *env, uint32_t desc) \ 3957 { \ 3958 uint32_t vm = vext_vm(desc); \ 3959 uint32_t vl = env->vl; \ 3960 uint32_t i; \ 3961 \ 3962 for (i = 0; i < vl; i++) { \ 3963 if (!vm && !vext_elem_mask(v0, i)) { \ 3964 continue; \ 3965 } \ 3966 do_##NAME(vd, vs2, i); \ 3967 } \ 3968 } 3969 3970 target_ulong fclass_h(uint64_t frs1) 3971 { 3972 float16 f = frs1; 3973 bool sign = float16_is_neg(f); 3974 3975 if (float16_is_infinity(f)) { 3976 return sign ? 1 << 0 : 1 << 7; 3977 } else if (float16_is_zero(f)) { 3978 return sign ? 1 << 3 : 1 << 4; 3979 } else if (float16_is_zero_or_denormal(f)) { 3980 return sign ? 1 << 2 : 1 << 5; 3981 } else if (float16_is_any_nan(f)) { 3982 float_status s = { }; /* for snan_bit_is_one */ 3983 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 3984 } else { 3985 return sign ? 1 << 1 : 1 << 6; 3986 } 3987 } 3988 3989 target_ulong fclass_s(uint64_t frs1) 3990 { 3991 float32 f = frs1; 3992 bool sign = float32_is_neg(f); 3993 3994 if (float32_is_infinity(f)) { 3995 return sign ? 1 << 0 : 1 << 7; 3996 } else if (float32_is_zero(f)) { 3997 return sign ? 1 << 3 : 1 << 4; 3998 } else if (float32_is_zero_or_denormal(f)) { 3999 return sign ? 1 << 2 : 1 << 5; 4000 } else if (float32_is_any_nan(f)) { 4001 float_status s = { }; /* for snan_bit_is_one */ 4002 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4003 } else { 4004 return sign ? 1 << 1 : 1 << 6; 4005 } 4006 } 4007 4008 target_ulong fclass_d(uint64_t frs1) 4009 { 4010 float64 f = frs1; 4011 bool sign = float64_is_neg(f); 4012 4013 if (float64_is_infinity(f)) { 4014 return sign ? 1 << 0 : 1 << 7; 4015 } else if (float64_is_zero(f)) { 4016 return sign ? 1 << 3 : 1 << 4; 4017 } else if (float64_is_zero_or_denormal(f)) { 4018 return sign ? 1 << 2 : 1 << 5; 4019 } else if (float64_is_any_nan(f)) { 4020 float_status s = { }; /* for snan_bit_is_one */ 4021 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4022 } else { 4023 return sign ? 1 << 1 : 1 << 6; 4024 } 4025 } 4026 4027 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4028 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4029 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4030 GEN_VEXT_V(vfclass_v_h, 2, 2) 4031 GEN_VEXT_V(vfclass_v_w, 4, 4) 4032 GEN_VEXT_V(vfclass_v_d, 8, 8) 4033 4034 /* Vector Floating-Point Merge Instruction */ 4035 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \ 4036 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4037 CPURISCVState *env, uint32_t desc) \ 4038 { \ 4039 uint32_t vm = vext_vm(desc); \ 4040 uint32_t vl = env->vl; \ 4041 uint32_t i; \ 4042 \ 4043 for (i = 0; i < vl; i++) { \ 4044 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4045 *((ETYPE *)vd + H(i)) \ 4046 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \ 4047 } \ 4048 } 4049 4050 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2) 4051 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4) 4052 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8) 4053 4054 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4055 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4056 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4057 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4058 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4059 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2) 4060 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4) 4061 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8) 4062 4063 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4064 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4065 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4066 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4067 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2) 4068 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4) 4069 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8) 4070 4071 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4072 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4073 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4074 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4075 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2) 4076 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4) 4077 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8) 4078 4079 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4080 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4081 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4082 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4083 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2) 4084 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4) 4085 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8) 4086 4087 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4088 /* (TD, T2, TX2) */ 4089 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4090 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4091 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4092 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4093 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4094 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4) 4095 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8) 4096 4097 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4098 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4099 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4100 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4) 4101 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8) 4102 4103 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4104 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4105 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4106 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4) 4107 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8) 4108 4109 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4110 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4111 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4112 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4) 4113 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8) 4114 4115 /* 4116 * vfwcvt.f.f.v vd, vs2, vm # 4117 * Convert single-width float to double-width float. 4118 */ 4119 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4120 { 4121 return float16_to_float32(a, true, s); 4122 } 4123 4124 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4125 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4126 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4) 4127 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8) 4128 4129 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4130 /* (TD, T2, TX2) */ 4131 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4132 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4133 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4134 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 4135 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 4136 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2) 4137 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4) 4138 4139 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4140 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 4141 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 4142 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2) 4143 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4) 4144 4145 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4146 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 4147 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4148 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2) 4149 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4) 4150 4151 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4152 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4153 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4154 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2) 4155 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4) 4156 4157 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4158 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4159 { 4160 return float32_to_float16(a, true, s); 4161 } 4162 4163 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4164 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4165 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2) 4166 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4) 4167 4168 /* 4169 *** Vector Reduction Operations 4170 */ 4171 /* Vector Single-Width Integer Reduction Instructions */ 4172 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \ 4173 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4174 void *vs2, CPURISCVState *env, uint32_t desc) \ 4175 { \ 4176 uint32_t vm = vext_vm(desc); \ 4177 uint32_t vl = env->vl; \ 4178 uint32_t i; \ 4179 TD s1 = *((TD *)vs1 + HD(0)); \ 4180 \ 4181 for (i = 0; i < vl; i++) { \ 4182 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4183 if (!vm && !vext_elem_mask(v0, i)) { \ 4184 continue; \ 4185 } \ 4186 s1 = OP(s1, (TD)s2); \ 4187 } \ 4188 *((TD *)vd + HD(0)) = s1; \ 4189 } 4190 4191 /* vd[0] = sum(vs1[0], vs2[*]) */ 4192 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD) 4193 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD) 4194 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD) 4195 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD) 4196 4197 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4198 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX) 4199 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX) 4200 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX) 4201 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX) 4202 4203 /* vd[0] = max(vs1[0], vs2[*]) */ 4204 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX) 4205 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX) 4206 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX) 4207 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX) 4208 4209 /* vd[0] = minu(vs1[0], vs2[*]) */ 4210 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN) 4211 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN) 4212 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN) 4213 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN) 4214 4215 /* vd[0] = min(vs1[0], vs2[*]) */ 4216 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN) 4217 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN) 4218 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN) 4219 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN) 4220 4221 /* vd[0] = and(vs1[0], vs2[*]) */ 4222 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND) 4223 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND) 4224 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND) 4225 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND) 4226 4227 /* vd[0] = or(vs1[0], vs2[*]) */ 4228 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR) 4229 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR) 4230 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR) 4231 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR) 4232 4233 /* vd[0] = xor(vs1[0], vs2[*]) */ 4234 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR) 4235 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR) 4236 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR) 4237 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR) 4238 4239 /* Vector Widening Integer Reduction Instructions */ 4240 /* signed sum reduction into double-width accumulator */ 4241 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD) 4242 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD) 4243 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD) 4244 4245 /* Unsigned sum reduction into double-width accumulator */ 4246 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD) 4247 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD) 4248 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD) 4249 4250 /* Vector Single-Width Floating-Point Reduction Instructions */ 4251 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \ 4252 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4253 void *vs2, CPURISCVState *env, \ 4254 uint32_t desc) \ 4255 { \ 4256 uint32_t vm = vext_vm(desc); \ 4257 uint32_t vl = env->vl; \ 4258 uint32_t i; \ 4259 TD s1 = *((TD *)vs1 + HD(0)); \ 4260 \ 4261 for (i = 0; i < vl; i++) { \ 4262 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4263 if (!vm && !vext_elem_mask(v0, i)) { \ 4264 continue; \ 4265 } \ 4266 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4267 } \ 4268 *((TD *)vd + HD(0)) = s1; \ 4269 } 4270 4271 /* Unordered sum */ 4272 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add) 4273 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add) 4274 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add) 4275 4276 /* Maximum value */ 4277 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum) 4278 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum) 4279 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum) 4280 4281 /* Minimum value */ 4282 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum) 4283 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum) 4284 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum) 4285 4286 /* Vector Widening Floating-Point Reduction Instructions */ 4287 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4288 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4289 void *vs2, CPURISCVState *env, uint32_t desc) 4290 { 4291 uint32_t vm = vext_vm(desc); 4292 uint32_t vl = env->vl; 4293 uint32_t i; 4294 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4295 4296 for (i = 0; i < vl; i++) { 4297 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4298 if (!vm && !vext_elem_mask(v0, i)) { 4299 continue; 4300 } 4301 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4302 &env->fp_status); 4303 } 4304 *((uint32_t *)vd + H4(0)) = s1; 4305 } 4306 4307 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4308 void *vs2, CPURISCVState *env, uint32_t desc) 4309 { 4310 uint32_t vm = vext_vm(desc); 4311 uint32_t vl = env->vl; 4312 uint32_t i; 4313 uint64_t s1 = *((uint64_t *)vs1); 4314 4315 for (i = 0; i < vl; i++) { 4316 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4317 if (!vm && !vext_elem_mask(v0, i)) { 4318 continue; 4319 } 4320 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4321 &env->fp_status); 4322 } 4323 *((uint64_t *)vd) = s1; 4324 } 4325 4326 /* 4327 *** Vector Mask Operations 4328 */ 4329 /* Vector Mask-Register Logical Instructions */ 4330 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4331 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4332 void *vs2, CPURISCVState *env, \ 4333 uint32_t desc) \ 4334 { \ 4335 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4336 uint32_t vl = env->vl; \ 4337 uint32_t i; \ 4338 int a, b; \ 4339 \ 4340 for (i = 0; i < vl; i++) { \ 4341 a = vext_elem_mask(vs1, i); \ 4342 b = vext_elem_mask(vs2, i); \ 4343 vext_set_elem_mask(vd, i, OP(b, a)); \ 4344 } \ 4345 for (; i < vlmax; i++) { \ 4346 vext_set_elem_mask(vd, i, 0); \ 4347 } \ 4348 } 4349 4350 #define DO_NAND(N, M) (!(N & M)) 4351 #define DO_ANDNOT(N, M) (N & !M) 4352 #define DO_NOR(N, M) (!(N | M)) 4353 #define DO_ORNOT(N, M) (N | !M) 4354 #define DO_XNOR(N, M) (!(N ^ M)) 4355 4356 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4357 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4358 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4359 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4360 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4361 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4362 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4363 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4364 4365 /* Vector mask population count vmpopc */ 4366 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, 4367 uint32_t desc) 4368 { 4369 target_ulong cnt = 0; 4370 uint32_t vm = vext_vm(desc); 4371 uint32_t vl = env->vl; 4372 int i; 4373 4374 for (i = 0; i < vl; i++) { 4375 if (vm || vext_elem_mask(v0, i)) { 4376 if (vext_elem_mask(vs2, i)) { 4377 cnt++; 4378 } 4379 } 4380 } 4381 return cnt; 4382 } 4383 4384 /* vmfirst find-first-set mask bit*/ 4385 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4386 uint32_t desc) 4387 { 4388 uint32_t vm = vext_vm(desc); 4389 uint32_t vl = env->vl; 4390 int i; 4391 4392 for (i = 0; i < vl; i++) { 4393 if (vm || vext_elem_mask(v0, i)) { 4394 if (vext_elem_mask(vs2, i)) { 4395 return i; 4396 } 4397 } 4398 } 4399 return -1LL; 4400 } 4401 4402 enum set_mask_type { 4403 ONLY_FIRST = 1, 4404 INCLUDE_FIRST, 4405 BEFORE_FIRST, 4406 }; 4407 4408 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4409 uint32_t desc, enum set_mask_type type) 4410 { 4411 uint32_t vlmax = env_archcpu(env)->cfg.vlen; 4412 uint32_t vm = vext_vm(desc); 4413 uint32_t vl = env->vl; 4414 int i; 4415 bool first_mask_bit = false; 4416 4417 for (i = 0; i < vl; i++) { 4418 if (!vm && !vext_elem_mask(v0, i)) { 4419 continue; 4420 } 4421 /* write a zero to all following active elements */ 4422 if (first_mask_bit) { 4423 vext_set_elem_mask(vd, i, 0); 4424 continue; 4425 } 4426 if (vext_elem_mask(vs2, i)) { 4427 first_mask_bit = true; 4428 if (type == BEFORE_FIRST) { 4429 vext_set_elem_mask(vd, i, 0); 4430 } else { 4431 vext_set_elem_mask(vd, i, 1); 4432 } 4433 } else { 4434 if (type == ONLY_FIRST) { 4435 vext_set_elem_mask(vd, i, 0); 4436 } else { 4437 vext_set_elem_mask(vd, i, 1); 4438 } 4439 } 4440 } 4441 for (; i < vlmax; i++) { 4442 vext_set_elem_mask(vd, i, 0); 4443 } 4444 } 4445 4446 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4447 uint32_t desc) 4448 { 4449 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4450 } 4451 4452 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4453 uint32_t desc) 4454 { 4455 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4456 } 4457 4458 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4459 uint32_t desc) 4460 { 4461 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4462 } 4463 4464 /* Vector Iota Instruction */ 4465 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \ 4466 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4467 uint32_t desc) \ 4468 { \ 4469 uint32_t vm = vext_vm(desc); \ 4470 uint32_t vl = env->vl; \ 4471 uint32_t sum = 0; \ 4472 int i; \ 4473 \ 4474 for (i = 0; i < vl; i++) { \ 4475 if (!vm && !vext_elem_mask(v0, i)) { \ 4476 continue; \ 4477 } \ 4478 *((ETYPE *)vd + H(i)) = sum; \ 4479 if (vext_elem_mask(vs2, i)) { \ 4480 sum++; \ 4481 } \ 4482 } \ 4483 } 4484 4485 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1) 4486 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2) 4487 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4) 4488 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8) 4489 4490 /* Vector Element Index Instruction */ 4491 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \ 4492 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4493 { \ 4494 uint32_t vm = vext_vm(desc); \ 4495 uint32_t vl = env->vl; \ 4496 int i; \ 4497 \ 4498 for (i = 0; i < vl; i++) { \ 4499 if (!vm && !vext_elem_mask(v0, i)) { \ 4500 continue; \ 4501 } \ 4502 *((ETYPE *)vd + H(i)) = i; \ 4503 } \ 4504 } 4505 4506 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1) 4507 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2) 4508 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4) 4509 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8) 4510 4511 /* 4512 *** Vector Permutation Instructions 4513 */ 4514 4515 /* Vector Slide Instructions */ 4516 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \ 4517 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4518 CPURISCVState *env, uint32_t desc) \ 4519 { \ 4520 uint32_t vm = vext_vm(desc); \ 4521 uint32_t vl = env->vl; \ 4522 target_ulong offset = s1, i; \ 4523 \ 4524 for (i = offset; i < vl; i++) { \ 4525 if (!vm && !vext_elem_mask(v0, i)) { \ 4526 continue; \ 4527 } \ 4528 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4529 } \ 4530 } 4531 4532 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4533 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1) 4534 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2) 4535 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4) 4536 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8) 4537 4538 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \ 4539 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4540 CPURISCVState *env, uint32_t desc) \ 4541 { \ 4542 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4543 uint32_t vm = vext_vm(desc); \ 4544 uint32_t vl = env->vl; \ 4545 target_ulong offset = s1, i; \ 4546 \ 4547 for (i = 0; i < vl; ++i) { \ 4548 target_ulong j = i + offset; \ 4549 if (!vm && !vext_elem_mask(v0, i)) { \ 4550 continue; \ 4551 } \ 4552 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4553 } \ 4554 } 4555 4556 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4557 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1) 4558 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2) 4559 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4) 4560 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8) 4561 4562 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H) \ 4563 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4564 CPURISCVState *env, uint32_t desc) \ 4565 { \ 4566 uint32_t vm = vext_vm(desc); \ 4567 uint32_t vl = env->vl; \ 4568 uint32_t i; \ 4569 \ 4570 for (i = 0; i < vl; i++) { \ 4571 if (!vm && !vext_elem_mask(v0, i)) { \ 4572 continue; \ 4573 } \ 4574 if (i == 0) { \ 4575 *((ETYPE *)vd + H(i)) = s1; \ 4576 } else { \ 4577 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4578 } \ 4579 } \ 4580 } 4581 4582 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4583 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1) 4584 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2) 4585 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4) 4586 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8) 4587 4588 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H) \ 4589 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4590 CPURISCVState *env, uint32_t desc) \ 4591 { \ 4592 uint32_t vm = vext_vm(desc); \ 4593 uint32_t vl = env->vl; \ 4594 uint32_t i; \ 4595 \ 4596 for (i = 0; i < vl; i++) { \ 4597 if (!vm && !vext_elem_mask(v0, i)) { \ 4598 continue; \ 4599 } \ 4600 if (i == vl - 1) { \ 4601 *((ETYPE *)vd + H(i)) = s1; \ 4602 } else { \ 4603 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4604 } \ 4605 } \ 4606 } 4607 4608 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4609 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1) 4610 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2) 4611 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4) 4612 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8) 4613 4614 /* Vector Register Gather Instruction */ 4615 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H) \ 4616 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4617 CPURISCVState *env, uint32_t desc) \ 4618 { \ 4619 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4620 uint32_t vm = vext_vm(desc); \ 4621 uint32_t vl = env->vl; \ 4622 uint64_t index; \ 4623 uint32_t i; \ 4624 \ 4625 for (i = 0; i < vl; i++) { \ 4626 if (!vm && !vext_elem_mask(v0, i)) { \ 4627 continue; \ 4628 } \ 4629 index = *((ETYPE *)vs1 + H(i)); \ 4630 if (index >= vlmax) { \ 4631 *((ETYPE *)vd + H(i)) = 0; \ 4632 } else { \ 4633 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4634 } \ 4635 } \ 4636 } 4637 4638 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4639 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1) 4640 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2) 4641 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4) 4642 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8) 4643 4644 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \ 4645 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4646 CPURISCVState *env, uint32_t desc) \ 4647 { \ 4648 uint32_t vlmax = env_archcpu(env)->cfg.vlen; \ 4649 uint32_t vm = vext_vm(desc); \ 4650 uint32_t vl = env->vl; \ 4651 uint64_t index = s1; \ 4652 uint32_t i; \ 4653 \ 4654 for (i = 0; i < vl; i++) { \ 4655 if (!vm && !vext_elem_mask(v0, i)) { \ 4656 continue; \ 4657 } \ 4658 if (index >= vlmax) { \ 4659 *((ETYPE *)vd + H(i)) = 0; \ 4660 } else { \ 4661 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4662 } \ 4663 } \ 4664 } 4665 4666 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4667 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1) 4668 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2) 4669 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4) 4670 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8) 4671 4672 /* Vector Compress Instruction */ 4673 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \ 4674 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4675 CPURISCVState *env, uint32_t desc) \ 4676 { \ 4677 uint32_t vl = env->vl; \ 4678 uint32_t num = 0, i; \ 4679 \ 4680 for (i = 0; i < vl; i++) { \ 4681 if (!vext_elem_mask(vs1, i)) { \ 4682 continue; \ 4683 } \ 4684 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4685 num++; \ 4686 } \ 4687 } 4688 4689 /* Compress into vd elements of vs2 where vs1 is enabled */ 4690 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1) 4691 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2) 4692 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4) 4693 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8) 4694