1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "tcg/tcg-gvec-desc.h" 26 #include "internals.h" 27 #include <math.h> 28 29 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 30 target_ulong s2) 31 { 32 int vlmax, vl; 33 RISCVCPU *cpu = env_archcpu(env); 34 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 35 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 36 bool vill = FIELD_EX64(s2, VTYPE, VILL); 37 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 38 39 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 40 /* only set vill bit. */ 41 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 42 env->vl = 0; 43 env->vstart = 0; 44 return 0; 45 } 46 47 vlmax = vext_get_vlmax(cpu, s2); 48 if (s1 <= vlmax) { 49 vl = s1; 50 } else { 51 vl = vlmax; 52 } 53 env->vl = vl; 54 env->vtype = s2; 55 env->vstart = 0; 56 return vl; 57 } 58 59 /* 60 * Note that vector data is stored in host-endian 64-bit chunks, 61 * so addressing units smaller than that needs a host-endian fixup. 62 */ 63 #ifdef HOST_WORDS_BIGENDIAN 64 #define H1(x) ((x) ^ 7) 65 #define H1_2(x) ((x) ^ 6) 66 #define H1_4(x) ((x) ^ 4) 67 #define H2(x) ((x) ^ 3) 68 #define H4(x) ((x) ^ 1) 69 #define H8(x) ((x)) 70 #else 71 #define H1(x) (x) 72 #define H1_2(x) (x) 73 #define H1_4(x) (x) 74 #define H2(x) (x) 75 #define H4(x) (x) 76 #define H8(x) (x) 77 #endif 78 79 static inline uint32_t vext_nf(uint32_t desc) 80 { 81 return FIELD_EX32(simd_data(desc), VDATA, NF); 82 } 83 84 static inline uint32_t vext_mlen(uint32_t desc) 85 { 86 return FIELD_EX32(simd_data(desc), VDATA, MLEN); 87 } 88 89 static inline uint32_t vext_vm(uint32_t desc) 90 { 91 return FIELD_EX32(simd_data(desc), VDATA, VM); 92 } 93 94 static inline uint32_t vext_lmul(uint32_t desc) 95 { 96 return FIELD_EX32(simd_data(desc), VDATA, LMUL); 97 } 98 99 static uint32_t vext_wd(uint32_t desc) 100 { 101 return (simd_data(desc) >> 11) & 0x1; 102 } 103 104 /* 105 * Get vector group length in bytes. Its range is [64, 2048]. 106 * 107 * As simd_desc support at most 256, the max vlen is 512 bits. 108 * So vlen in bytes is encoded as maxsz. 109 */ 110 static inline uint32_t vext_maxsz(uint32_t desc) 111 { 112 return simd_maxsz(desc) << vext_lmul(desc); 113 } 114 115 /* 116 * This function checks watchpoint before real load operation. 117 * 118 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 119 * In user mode, there is no watchpoint support now. 120 * 121 * It will trigger an exception if there is no mapping in TLB 122 * and page table walk can't fill the TLB entry. Then the guest 123 * software can return here after process the exception or never return. 124 */ 125 static void probe_pages(CPURISCVState *env, target_ulong addr, 126 target_ulong len, uintptr_t ra, 127 MMUAccessType access_type) 128 { 129 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 130 target_ulong curlen = MIN(pagelen, len); 131 132 probe_access(env, addr, curlen, access_type, 133 cpu_mmu_index(env, false), ra); 134 if (len > curlen) { 135 addr += curlen; 136 curlen = len - curlen; 137 probe_access(env, addr, curlen, access_type, 138 cpu_mmu_index(env, false), ra); 139 } 140 } 141 142 #ifdef HOST_WORDS_BIGENDIAN 143 static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) 144 { 145 /* 146 * Split the remaining range to two parts. 147 * The first part is in the last uint64_t unit. 148 * The second part start from the next uint64_t unit. 149 */ 150 int part1 = 0, part2 = tot - cnt; 151 if (cnt % 8) { 152 part1 = 8 - (cnt % 8); 153 part2 = tot - cnt - part1; 154 memset(QEMU_ALIGN_PTR_DOWN(tail, 8), 0, part1); 155 memset(QEMU_ALIGN_PTR_UP(tail, 8), 0, part2); 156 } else { 157 memset(tail, 0, part2); 158 } 159 } 160 #else 161 static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) 162 { 163 memset(tail, 0, tot - cnt); 164 } 165 #endif 166 167 static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 168 { 169 int8_t *cur = ((int8_t *)vd + H1(idx)); 170 vext_clear(cur, cnt, tot); 171 } 172 173 static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 174 { 175 int16_t *cur = ((int16_t *)vd + H2(idx)); 176 vext_clear(cur, cnt, tot); 177 } 178 179 static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 180 { 181 int32_t *cur = ((int32_t *)vd + H4(idx)); 182 vext_clear(cur, cnt, tot); 183 } 184 185 static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 186 { 187 int64_t *cur = (int64_t *)vd + idx; 188 vext_clear(cur, cnt, tot); 189 } 190 191 static inline void vext_set_elem_mask(void *v0, int mlen, int index, 192 uint8_t value) 193 { 194 int idx = (index * mlen) / 64; 195 int pos = (index * mlen) % 64; 196 uint64_t old = ((uint64_t *)v0)[idx]; 197 ((uint64_t *)v0)[idx] = deposit64(old, pos, mlen, value); 198 } 199 200 static inline int vext_elem_mask(void *v0, int mlen, int index) 201 { 202 int idx = (index * mlen) / 64; 203 int pos = (index * mlen) % 64; 204 return (((uint64_t *)v0)[idx] >> pos) & 1; 205 } 206 207 /* elements operations for load and store */ 208 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 209 uint32_t idx, void *vd, uintptr_t retaddr); 210 typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot); 211 212 #define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ 213 static void NAME(CPURISCVState *env, abi_ptr addr, \ 214 uint32_t idx, void *vd, uintptr_t retaddr)\ 215 { \ 216 MTYPE data; \ 217 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 218 data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 219 *cur = data; \ 220 } \ 221 222 GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) 223 GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) 224 GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) 225 GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) 226 GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) 227 GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) 228 GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) 229 GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) 230 GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) 231 GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) 232 GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) 233 GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) 234 GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) 235 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) 236 GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) 237 GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) 238 GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) 239 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) 240 GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) 241 GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) 242 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) 243 GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) 244 245 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 246 static void NAME(CPURISCVState *env, abi_ptr addr, \ 247 uint32_t idx, void *vd, uintptr_t retaddr)\ 248 { \ 249 ETYPE data = *((ETYPE *)vd + H(idx)); \ 250 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 251 } 252 253 GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) 254 GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) 255 GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) 256 GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) 257 GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) 258 GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) 259 GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) 260 GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) 261 GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) 262 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 263 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 264 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 265 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 266 267 /* 268 *** stride: access vector element from strided memory 269 */ 270 static void 271 vext_ldst_stride(void *vd, void *v0, target_ulong base, 272 target_ulong stride, CPURISCVState *env, 273 uint32_t desc, uint32_t vm, 274 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, 275 uint32_t esz, uint32_t msz, uintptr_t ra, 276 MMUAccessType access_type) 277 { 278 uint32_t i, k; 279 uint32_t nf = vext_nf(desc); 280 uint32_t mlen = vext_mlen(desc); 281 uint32_t vlmax = vext_maxsz(desc) / esz; 282 283 /* probe every access*/ 284 for (i = 0; i < env->vl; i++) { 285 if (!vm && !vext_elem_mask(v0, mlen, i)) { 286 continue; 287 } 288 probe_pages(env, base + stride * i, nf * msz, ra, access_type); 289 } 290 /* do real access */ 291 for (i = 0; i < env->vl; i++) { 292 k = 0; 293 if (!vm && !vext_elem_mask(v0, mlen, i)) { 294 continue; 295 } 296 while (k < nf) { 297 target_ulong addr = base + stride * i + k * msz; 298 ldst_elem(env, addr, i + k * vlmax, vd, ra); 299 k++; 300 } 301 } 302 /* clear tail elements */ 303 if (clear_elem) { 304 for (k = 0; k < nf; k++) { 305 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 306 } 307 } 308 } 309 310 #define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ 311 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 312 target_ulong stride, CPURISCVState *env, \ 313 uint32_t desc) \ 314 { \ 315 uint32_t vm = vext_vm(desc); \ 316 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 317 CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 318 GETPC(), MMU_DATA_LOAD); \ 319 } 320 321 GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb) 322 GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh) 323 GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl) 324 GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq) 325 GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh) 326 GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl) 327 GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq) 328 GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl) 329 GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq) 330 GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb) 331 GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh) 332 GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl) 333 GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq) 334 GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) 335 GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) 336 GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) 337 GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) 338 GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh) 339 GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl) 340 GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq) 341 GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl) 342 GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq) 343 344 #define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ 345 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 346 target_ulong stride, CPURISCVState *env, \ 347 uint32_t desc) \ 348 { \ 349 uint32_t vm = vext_vm(desc); \ 350 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 351 NULL, sizeof(ETYPE), sizeof(MTYPE), \ 352 GETPC(), MMU_DATA_STORE); \ 353 } 354 355 GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) 356 GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) 357 GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) 358 GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) 359 GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) 360 GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) 361 GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) 362 GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) 363 GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) 364 GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) 365 GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) 366 GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) 367 GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) 368 369 /* 370 *** unit-stride: access elements stored contiguously in memory 371 */ 372 373 /* unmasked unit-stride load and store operation*/ 374 static void 375 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 376 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, 377 uint32_t esz, uint32_t msz, uintptr_t ra, 378 MMUAccessType access_type) 379 { 380 uint32_t i, k; 381 uint32_t nf = vext_nf(desc); 382 uint32_t vlmax = vext_maxsz(desc) / esz; 383 384 /* probe every access */ 385 probe_pages(env, base, env->vl * nf * msz, ra, access_type); 386 /* load bytes from guest memory */ 387 for (i = 0; i < env->vl; i++) { 388 k = 0; 389 while (k < nf) { 390 target_ulong addr = base + (i * nf + k) * msz; 391 ldst_elem(env, addr, i + k * vlmax, vd, ra); 392 k++; 393 } 394 } 395 /* clear tail elements */ 396 if (clear_elem) { 397 for (k = 0; k < nf; k++) { 398 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 399 } 400 } 401 } 402 403 /* 404 * masked unit-stride load and store operation will be a special case of stride, 405 * stride = NF * sizeof (MTYPE) 406 */ 407 408 #define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ 409 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 410 CPURISCVState *env, uint32_t desc) \ 411 { \ 412 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 413 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 414 CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 415 GETPC(), MMU_DATA_LOAD); \ 416 } \ 417 \ 418 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 419 CPURISCVState *env, uint32_t desc) \ 420 { \ 421 vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \ 422 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ 423 } 424 425 GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb) 426 GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh) 427 GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl) 428 GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq) 429 GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh) 430 GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl) 431 GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq) 432 GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl) 433 GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq) 434 GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb) 435 GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh) 436 GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl) 437 GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq) 438 GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) 439 GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) 440 GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) 441 GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) 442 GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh) 443 GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl) 444 GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq) 445 GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl) 446 GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq) 447 448 #define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ 449 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 450 CPURISCVState *env, uint32_t desc) \ 451 { \ 452 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 453 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 454 NULL, sizeof(ETYPE), sizeof(MTYPE), \ 455 GETPC(), MMU_DATA_STORE); \ 456 } \ 457 \ 458 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 459 CPURISCVState *env, uint32_t desc) \ 460 { \ 461 vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \ 462 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ 463 } 464 465 GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) 466 GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) 467 GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) 468 GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) 469 GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) 470 GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) 471 GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) 472 GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) 473 GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) 474 GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) 475 GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) 476 GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) 477 GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) 478 479 /* 480 *** index: access vector element from indexed memory 481 */ 482 typedef target_ulong vext_get_index_addr(target_ulong base, 483 uint32_t idx, void *vs2); 484 485 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 486 static target_ulong NAME(target_ulong base, \ 487 uint32_t idx, void *vs2) \ 488 { \ 489 return (base + *((ETYPE *)vs2 + H(idx))); \ 490 } 491 492 GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) 493 GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) 494 GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) 495 GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) 496 497 static inline void 498 vext_ldst_index(void *vd, void *v0, target_ulong base, 499 void *vs2, CPURISCVState *env, uint32_t desc, 500 vext_get_index_addr get_index_addr, 501 vext_ldst_elem_fn *ldst_elem, 502 clear_fn *clear_elem, 503 uint32_t esz, uint32_t msz, uintptr_t ra, 504 MMUAccessType access_type) 505 { 506 uint32_t i, k; 507 uint32_t nf = vext_nf(desc); 508 uint32_t vm = vext_vm(desc); 509 uint32_t mlen = vext_mlen(desc); 510 uint32_t vlmax = vext_maxsz(desc) / esz; 511 512 /* probe every access*/ 513 for (i = 0; i < env->vl; i++) { 514 if (!vm && !vext_elem_mask(v0, mlen, i)) { 515 continue; 516 } 517 probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, 518 access_type); 519 } 520 /* load bytes from guest memory */ 521 for (i = 0; i < env->vl; i++) { 522 k = 0; 523 if (!vm && !vext_elem_mask(v0, mlen, i)) { 524 continue; 525 } 526 while (k < nf) { 527 abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; 528 ldst_elem(env, addr, i + k * vlmax, vd, ra); 529 k++; 530 } 531 } 532 /* clear tail elements */ 533 if (clear_elem) { 534 for (k = 0; k < nf; k++) { 535 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 536 } 537 } 538 } 539 540 #define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \ 541 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 542 void *vs2, CPURISCVState *env, uint32_t desc) \ 543 { \ 544 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 545 LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 546 GETPC(), MMU_DATA_LOAD); \ 547 } 548 549 GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb) 550 GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh) 551 GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl) 552 GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq) 553 GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh) 554 GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl) 555 GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq) 556 GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl) 557 GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq) 558 GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb) 559 GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh) 560 GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl) 561 GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq) 562 GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb) 563 GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh) 564 GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl) 565 GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq) 566 GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh) 567 GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl) 568 GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq) 569 GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl) 570 GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq) 571 572 #define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ 573 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 574 void *vs2, CPURISCVState *env, uint32_t desc) \ 575 { \ 576 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 577 STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\ 578 GETPC(), MMU_DATA_STORE); \ 579 } 580 581 GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) 582 GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) 583 GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) 584 GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) 585 GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) 586 GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) 587 GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) 588 GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) 589 GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) 590 GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) 591 GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) 592 GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) 593 GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) 594 595 /* 596 *** unit-stride fault-only-fisrt load instructions 597 */ 598 static inline void 599 vext_ldff(void *vd, void *v0, target_ulong base, 600 CPURISCVState *env, uint32_t desc, 601 vext_ldst_elem_fn *ldst_elem, 602 clear_fn *clear_elem, 603 uint32_t esz, uint32_t msz, uintptr_t ra) 604 { 605 void *host; 606 uint32_t i, k, vl = 0; 607 uint32_t mlen = vext_mlen(desc); 608 uint32_t nf = vext_nf(desc); 609 uint32_t vm = vext_vm(desc); 610 uint32_t vlmax = vext_maxsz(desc) / esz; 611 target_ulong addr, offset, remain; 612 613 /* probe every access*/ 614 for (i = 0; i < env->vl; i++) { 615 if (!vm && !vext_elem_mask(v0, mlen, i)) { 616 continue; 617 } 618 addr = base + nf * i * msz; 619 if (i == 0) { 620 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 621 } else { 622 /* if it triggers an exception, no need to check watchpoint */ 623 remain = nf * msz; 624 while (remain > 0) { 625 offset = -(addr | TARGET_PAGE_MASK); 626 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 627 cpu_mmu_index(env, false)); 628 if (host) { 629 #ifdef CONFIG_USER_ONLY 630 if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { 631 vl = i; 632 goto ProbeSuccess; 633 } 634 #else 635 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 636 #endif 637 } else { 638 vl = i; 639 goto ProbeSuccess; 640 } 641 if (remain <= offset) { 642 break; 643 } 644 remain -= offset; 645 addr += offset; 646 } 647 } 648 } 649 ProbeSuccess: 650 /* load bytes from guest memory */ 651 if (vl != 0) { 652 env->vl = vl; 653 } 654 for (i = 0; i < env->vl; i++) { 655 k = 0; 656 if (!vm && !vext_elem_mask(v0, mlen, i)) { 657 continue; 658 } 659 while (k < nf) { 660 target_ulong addr = base + (i * nf + k) * msz; 661 ldst_elem(env, addr, i + k * vlmax, vd, ra); 662 k++; 663 } 664 } 665 /* clear tail elements */ 666 if (vl != 0) { 667 return; 668 } 669 for (k = 0; k < nf; k++) { 670 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 671 } 672 } 673 674 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ 675 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 676 CPURISCVState *env, uint32_t desc) \ 677 { \ 678 vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \ 679 sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ 680 } 681 682 GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb) 683 GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh) 684 GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl) 685 GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq) 686 GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh) 687 GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl) 688 GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq) 689 GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl) 690 GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq) 691 GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb) 692 GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh) 693 GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl) 694 GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq) 695 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb) 696 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh) 697 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl) 698 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq) 699 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh) 700 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) 701 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) 702 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) 703 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) 704 705 /* 706 *** Vector AMO Operations (Zvamo) 707 */ 708 typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, 709 uint32_t wd, uint32_t idx, CPURISCVState *env, 710 uintptr_t retaddr); 711 712 /* no atomic opreation for vector atomic insructions */ 713 #define DO_SWAP(N, M) (M) 714 #define DO_AND(N, M) (N & M) 715 #define DO_XOR(N, M) (N ^ M) 716 #define DO_OR(N, M) (N | M) 717 #define DO_ADD(N, M) (N + M) 718 719 #define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ 720 static void \ 721 vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ 722 uint32_t wd, uint32_t idx, \ 723 CPURISCVState *env, uintptr_t retaddr)\ 724 { \ 725 typedef int##ESZ##_t ETYPE; \ 726 typedef int##MSZ##_t MTYPE; \ 727 typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \ 728 ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ 729 MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ 730 \ 731 cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ 732 if (wd) { \ 733 *pe3 = a; \ 734 } \ 735 } 736 737 /* Signed min/max */ 738 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 739 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 740 741 /* Unsigned min/max */ 742 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 743 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 744 745 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) 746 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) 747 GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) 748 GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) 749 GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) 750 GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) 751 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) 752 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) 753 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) 754 #ifdef TARGET_RISCV64 755 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) 756 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) 757 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) 758 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) 759 GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) 760 GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) 761 GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) 762 GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) 763 GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) 764 GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) 765 GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) 766 GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) 767 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) 768 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) 769 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) 770 GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) 771 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) 772 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) 773 #endif 774 775 static inline void 776 vext_amo_noatomic(void *vs3, void *v0, target_ulong base, 777 void *vs2, CPURISCVState *env, uint32_t desc, 778 vext_get_index_addr get_index_addr, 779 vext_amo_noatomic_fn *noatomic_op, 780 clear_fn *clear_elem, 781 uint32_t esz, uint32_t msz, uintptr_t ra) 782 { 783 uint32_t i; 784 target_long addr; 785 uint32_t wd = vext_wd(desc); 786 uint32_t vm = vext_vm(desc); 787 uint32_t mlen = vext_mlen(desc); 788 uint32_t vlmax = vext_maxsz(desc) / esz; 789 790 for (i = 0; i < env->vl; i++) { 791 if (!vm && !vext_elem_mask(v0, mlen, i)) { 792 continue; 793 } 794 probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); 795 probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); 796 } 797 for (i = 0; i < env->vl; i++) { 798 if (!vm && !vext_elem_mask(v0, mlen, i)) { 799 continue; 800 } 801 addr = get_index_addr(base, i, vs2); 802 noatomic_op(vs3, addr, wd, i, env, ra); 803 } 804 clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz); 805 } 806 807 #define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \ 808 void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ 809 void *vs2, CPURISCVState *env, uint32_t desc) \ 810 { \ 811 vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ 812 INDEX_FN, vext_##NAME##_noatomic_op, \ 813 CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 814 GETPC()); \ 815 } 816 817 #ifdef TARGET_RISCV64 818 GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq) 819 GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq) 820 GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq) 821 GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq) 822 GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq) 823 GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq) 824 GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq) 825 GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq) 826 GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq) 827 GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq) 828 GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq) 829 GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq) 830 GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq) 831 GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq) 832 GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq) 833 GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq) 834 GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq) 835 GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq) 836 #endif 837 GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl) 838 GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl) 839 GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl) 840 GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl) 841 GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl) 842 GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) 843 GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) 844 GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) 845 GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) 846 847 /* 848 *** Vector Integer Arithmetic Instructions 849 */ 850 851 /* expand macro args before macro */ 852 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 853 854 /* (TD, T1, T2, TX1, TX2) */ 855 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 856 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 857 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 858 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 859 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t 860 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t 861 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t 862 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t 863 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t 864 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t 865 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t 866 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t 867 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 868 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 869 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 870 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 871 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 872 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 873 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t 874 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t 875 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t 876 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t 877 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t 878 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t 879 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t 880 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t 881 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t 882 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t 883 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t 884 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t 885 886 /* operation of two vector elements */ 887 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 888 889 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 890 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 891 { \ 892 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 893 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 894 *((TD *)vd + HD(i)) = OP(s2, s1); \ 895 } 896 #define DO_SUB(N, M) (N - M) 897 #define DO_RSUB(N, M) (M - N) 898 899 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 900 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 901 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 902 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 903 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 904 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 905 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 906 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 907 908 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 909 CPURISCVState *env, uint32_t desc, 910 uint32_t esz, uint32_t dsz, 911 opivv2_fn *fn, clear_fn *clearfn) 912 { 913 uint32_t vlmax = vext_maxsz(desc) / esz; 914 uint32_t mlen = vext_mlen(desc); 915 uint32_t vm = vext_vm(desc); 916 uint32_t vl = env->vl; 917 uint32_t i; 918 919 for (i = 0; i < vl; i++) { 920 if (!vm && !vext_elem_mask(v0, mlen, i)) { 921 continue; 922 } 923 fn(vd, vs1, vs2, i); 924 } 925 clearfn(vd, vl, vl * dsz, vlmax * dsz); 926 } 927 928 /* generate the helpers for OPIVV */ 929 #define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ 930 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 931 void *vs2, CPURISCVState *env, \ 932 uint32_t desc) \ 933 { \ 934 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 935 do_##NAME, CLEAR_FN); \ 936 } 937 938 GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) 939 GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) 940 GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) 941 GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) 942 GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) 943 GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) 944 GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) 945 GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) 946 947 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 948 949 /* 950 * (T1)s1 gives the real operator type. 951 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 952 */ 953 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 954 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 955 { \ 956 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 957 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 958 } 959 960 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 961 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 962 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 963 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 964 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 965 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 966 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 967 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 968 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 969 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 970 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 971 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 972 973 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 974 CPURISCVState *env, uint32_t desc, 975 uint32_t esz, uint32_t dsz, 976 opivx2_fn fn, clear_fn *clearfn) 977 { 978 uint32_t vlmax = vext_maxsz(desc) / esz; 979 uint32_t mlen = vext_mlen(desc); 980 uint32_t vm = vext_vm(desc); 981 uint32_t vl = env->vl; 982 uint32_t i; 983 984 for (i = 0; i < vl; i++) { 985 if (!vm && !vext_elem_mask(v0, mlen, i)) { 986 continue; 987 } 988 fn(vd, s1, vs2, i); 989 } 990 clearfn(vd, vl, vl * dsz, vlmax * dsz); 991 } 992 993 /* generate the helpers for OPIVX */ 994 #define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ 995 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 996 void *vs2, CPURISCVState *env, \ 997 uint32_t desc) \ 998 { \ 999 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 1000 do_##NAME, CLEAR_FN); \ 1001 } 1002 1003 GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) 1004 GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) 1005 GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) 1006 GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) 1007 GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) 1008 GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) 1009 GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) 1010 GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) 1011 GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) 1012 GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) 1013 GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) 1014 GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) 1015 1016 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 1017 { 1018 intptr_t oprsz = simd_oprsz(desc); 1019 intptr_t i; 1020 1021 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 1022 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 1023 } 1024 } 1025 1026 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 1027 { 1028 intptr_t oprsz = simd_oprsz(desc); 1029 intptr_t i; 1030 1031 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 1032 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 1033 } 1034 } 1035 1036 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 1037 { 1038 intptr_t oprsz = simd_oprsz(desc); 1039 intptr_t i; 1040 1041 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1042 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 1043 } 1044 } 1045 1046 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 1047 { 1048 intptr_t oprsz = simd_oprsz(desc); 1049 intptr_t i; 1050 1051 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1052 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 1053 } 1054 } 1055 1056 /* Vector Widening Integer Add/Subtract */ 1057 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 1058 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 1059 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 1060 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 1061 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 1062 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 1063 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 1064 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1065 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1066 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1067 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1068 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1069 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1070 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1071 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1072 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1073 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1074 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1075 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1076 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1077 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1078 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1079 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1080 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1081 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1082 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1083 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1084 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1085 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1086 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1087 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1088 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1089 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1090 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1091 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1092 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1093 GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh) 1094 GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl) 1095 GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq) 1096 GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh) 1097 GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl) 1098 GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq) 1099 GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh) 1100 GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl) 1101 GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq) 1102 GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh) 1103 GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl) 1104 GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq) 1105 GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh) 1106 GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl) 1107 GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq) 1108 GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh) 1109 GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl) 1110 GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq) 1111 GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh) 1112 GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl) 1113 GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq) 1114 GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh) 1115 GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl) 1116 GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq) 1117 1118 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1119 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1120 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1121 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1122 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1123 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1124 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1125 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1126 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1127 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1128 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1129 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1130 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1131 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1132 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1133 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1134 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1135 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1136 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1137 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1138 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1139 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1140 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1141 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1142 GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh) 1143 GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl) 1144 GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq) 1145 GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh) 1146 GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl) 1147 GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq) 1148 GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh) 1149 GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl) 1150 GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq) 1151 GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh) 1152 GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl) 1153 GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq) 1154 GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh) 1155 GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl) 1156 GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq) 1157 GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh) 1158 GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl) 1159 GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq) 1160 GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh) 1161 GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl) 1162 GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) 1163 GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) 1164 GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) 1165 GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) 1166 1167 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ 1168 #define DO_VADC(N, M, C) (N + M + C) 1169 #define DO_VSBC(N, M, C) (N - M - C) 1170 1171 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ 1172 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1173 CPURISCVState *env, uint32_t desc) \ 1174 { \ 1175 uint32_t mlen = vext_mlen(desc); \ 1176 uint32_t vl = env->vl; \ 1177 uint32_t esz = sizeof(ETYPE); \ 1178 uint32_t vlmax = vext_maxsz(desc) / esz; \ 1179 uint32_t i; \ 1180 \ 1181 for (i = 0; i < vl; i++) { \ 1182 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1183 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1184 uint8_t carry = vext_elem_mask(v0, mlen, i); \ 1185 \ 1186 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \ 1187 } \ 1188 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 1189 } 1190 1191 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC, clearb) 1192 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC, clearh) 1193 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC, clearl) 1194 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC, clearq) 1195 1196 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC, clearb) 1197 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC, clearh) 1198 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC, clearl) 1199 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC, clearq) 1200 1201 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP, CLEAR_FN) \ 1202 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1203 CPURISCVState *env, uint32_t desc) \ 1204 { \ 1205 uint32_t mlen = vext_mlen(desc); \ 1206 uint32_t vl = env->vl; \ 1207 uint32_t esz = sizeof(ETYPE); \ 1208 uint32_t vlmax = vext_maxsz(desc) / esz; \ 1209 uint32_t i; \ 1210 \ 1211 for (i = 0; i < vl; i++) { \ 1212 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1213 uint8_t carry = vext_elem_mask(v0, mlen, i); \ 1214 \ 1215 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\ 1216 } \ 1217 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 1218 } 1219 1220 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC, clearb) 1221 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC, clearh) 1222 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC, clearl) 1223 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC, clearq) 1224 1225 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC, clearb) 1226 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC, clearh) 1227 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC, clearl) 1228 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC, clearq) 1229 1230 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \ 1231 (__typeof(N))(N + M) < N) 1232 #define DO_MSBC(N, M, C) (C ? N <= M : N < M) 1233 1234 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \ 1235 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1236 CPURISCVState *env, uint32_t desc) \ 1237 { \ 1238 uint32_t mlen = vext_mlen(desc); \ 1239 uint32_t vl = env->vl; \ 1240 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1241 uint32_t i; \ 1242 \ 1243 for (i = 0; i < vl; i++) { \ 1244 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1245 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1246 uint8_t carry = vext_elem_mask(v0, mlen, i); \ 1247 \ 1248 vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1, carry));\ 1249 } \ 1250 for (; i < vlmax; i++) { \ 1251 vext_set_elem_mask(vd, mlen, i, 0); \ 1252 } \ 1253 } 1254 1255 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC) 1256 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC) 1257 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC) 1258 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC) 1259 1260 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC) 1261 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC) 1262 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC) 1263 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC) 1264 1265 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \ 1266 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1267 void *vs2, CPURISCVState *env, uint32_t desc) \ 1268 { \ 1269 uint32_t mlen = vext_mlen(desc); \ 1270 uint32_t vl = env->vl; \ 1271 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1272 uint32_t i; \ 1273 \ 1274 for (i = 0; i < vl; i++) { \ 1275 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1276 uint8_t carry = vext_elem_mask(v0, mlen, i); \ 1277 \ 1278 vext_set_elem_mask(vd, mlen, i, \ 1279 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \ 1280 } \ 1281 for (; i < vlmax; i++) { \ 1282 vext_set_elem_mask(vd, mlen, i, 0); \ 1283 } \ 1284 } 1285 1286 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC) 1287 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC) 1288 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC) 1289 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC) 1290 1291 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC) 1292 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC) 1293 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC) 1294 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC) 1295 1296 /* Vector Bitwise Logical Instructions */ 1297 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND) 1298 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND) 1299 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND) 1300 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND) 1301 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR) 1302 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR) 1303 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR) 1304 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR) 1305 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR) 1306 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR) 1307 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR) 1308 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR) 1309 GEN_VEXT_VV(vand_vv_b, 1, 1, clearb) 1310 GEN_VEXT_VV(vand_vv_h, 2, 2, clearh) 1311 GEN_VEXT_VV(vand_vv_w, 4, 4, clearl) 1312 GEN_VEXT_VV(vand_vv_d, 8, 8, clearq) 1313 GEN_VEXT_VV(vor_vv_b, 1, 1, clearb) 1314 GEN_VEXT_VV(vor_vv_h, 2, 2, clearh) 1315 GEN_VEXT_VV(vor_vv_w, 4, 4, clearl) 1316 GEN_VEXT_VV(vor_vv_d, 8, 8, clearq) 1317 GEN_VEXT_VV(vxor_vv_b, 1, 1, clearb) 1318 GEN_VEXT_VV(vxor_vv_h, 2, 2, clearh) 1319 GEN_VEXT_VV(vxor_vv_w, 4, 4, clearl) 1320 GEN_VEXT_VV(vxor_vv_d, 8, 8, clearq) 1321 1322 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND) 1323 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND) 1324 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND) 1325 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND) 1326 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR) 1327 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR) 1328 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR) 1329 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR) 1330 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR) 1331 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR) 1332 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR) 1333 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR) 1334 GEN_VEXT_VX(vand_vx_b, 1, 1, clearb) 1335 GEN_VEXT_VX(vand_vx_h, 2, 2, clearh) 1336 GEN_VEXT_VX(vand_vx_w, 4, 4, clearl) 1337 GEN_VEXT_VX(vand_vx_d, 8, 8, clearq) 1338 GEN_VEXT_VX(vor_vx_b, 1, 1, clearb) 1339 GEN_VEXT_VX(vor_vx_h, 2, 2, clearh) 1340 GEN_VEXT_VX(vor_vx_w, 4, 4, clearl) 1341 GEN_VEXT_VX(vor_vx_d, 8, 8, clearq) 1342 GEN_VEXT_VX(vxor_vx_b, 1, 1, clearb) 1343 GEN_VEXT_VX(vxor_vx_h, 2, 2, clearh) 1344 GEN_VEXT_VX(vxor_vx_w, 4, 4, clearl) 1345 GEN_VEXT_VX(vxor_vx_d, 8, 8, clearq) 1346 1347 /* Vector Single-Width Bit Shift Instructions */ 1348 #define DO_SLL(N, M) (N << (M)) 1349 #define DO_SRL(N, M) (N >> (M)) 1350 1351 /* generate the helpers for shift instructions with two vector operators */ 1352 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK, CLEAR_FN) \ 1353 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 1354 void *vs2, CPURISCVState *env, uint32_t desc) \ 1355 { \ 1356 uint32_t mlen = vext_mlen(desc); \ 1357 uint32_t vm = vext_vm(desc); \ 1358 uint32_t vl = env->vl; \ 1359 uint32_t esz = sizeof(TS1); \ 1360 uint32_t vlmax = vext_maxsz(desc) / esz; \ 1361 uint32_t i; \ 1362 \ 1363 for (i = 0; i < vl; i++) { \ 1364 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 1365 continue; \ 1366 } \ 1367 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \ 1368 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1369 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \ 1370 } \ 1371 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 1372 } 1373 1374 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7, clearb) 1375 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf, clearh) 1376 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f, clearl) 1377 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f, clearq) 1378 1379 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) 1380 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) 1381 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) 1382 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) 1383 1384 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) 1385 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) 1386 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) 1387 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) 1388 1389 /* generate the helpers for shift instructions with one vector and one scalar */ 1390 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK, CLEAR_FN) \ 1391 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 1392 void *vs2, CPURISCVState *env, uint32_t desc) \ 1393 { \ 1394 uint32_t mlen = vext_mlen(desc); \ 1395 uint32_t vm = vext_vm(desc); \ 1396 uint32_t vl = env->vl; \ 1397 uint32_t esz = sizeof(TD); \ 1398 uint32_t vlmax = vext_maxsz(desc) / esz; \ 1399 uint32_t i; \ 1400 \ 1401 for (i = 0; i < vl; i++) { \ 1402 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 1403 continue; \ 1404 } \ 1405 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 1406 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \ 1407 } \ 1408 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 1409 } 1410 1411 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7, clearb) 1412 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf, clearh) 1413 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f, clearl) 1414 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f, clearq) 1415 1416 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7, clearb) 1417 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf, clearh) 1418 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f, clearl) 1419 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f, clearq) 1420 1421 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7, clearb) 1422 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf, clearh) 1423 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f, clearl) 1424 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f, clearq) 1425 1426 /* Vector Narrowing Integer Right Shift Instructions */ 1427 GEN_VEXT_SHIFT_VV(vnsrl_vv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) 1428 GEN_VEXT_SHIFT_VV(vnsrl_vv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) 1429 GEN_VEXT_SHIFT_VV(vnsrl_vv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) 1430 GEN_VEXT_SHIFT_VV(vnsra_vv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) 1431 GEN_VEXT_SHIFT_VV(vnsra_vv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) 1432 GEN_VEXT_SHIFT_VV(vnsra_vv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) 1433 GEN_VEXT_SHIFT_VX(vnsrl_vx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf, clearb) 1434 GEN_VEXT_SHIFT_VX(vnsrl_vx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f, clearh) 1435 GEN_VEXT_SHIFT_VX(vnsrl_vx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f, clearl) 1436 GEN_VEXT_SHIFT_VX(vnsra_vx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf, clearb) 1437 GEN_VEXT_SHIFT_VX(vnsra_vx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f, clearh) 1438 GEN_VEXT_SHIFT_VX(vnsra_vx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f, clearl) 1439 1440 /* Vector Integer Comparison Instructions */ 1441 #define DO_MSEQ(N, M) (N == M) 1442 #define DO_MSNE(N, M) (N != M) 1443 #define DO_MSLT(N, M) (N < M) 1444 #define DO_MSLE(N, M) (N <= M) 1445 #define DO_MSGT(N, M) (N > M) 1446 1447 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \ 1448 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 1449 CPURISCVState *env, uint32_t desc) \ 1450 { \ 1451 uint32_t mlen = vext_mlen(desc); \ 1452 uint32_t vm = vext_vm(desc); \ 1453 uint32_t vl = env->vl; \ 1454 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1455 uint32_t i; \ 1456 \ 1457 for (i = 0; i < vl; i++) { \ 1458 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 1459 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1460 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 1461 continue; \ 1462 } \ 1463 vext_set_elem_mask(vd, mlen, i, DO_OP(s2, s1)); \ 1464 } \ 1465 for (; i < vlmax; i++) { \ 1466 vext_set_elem_mask(vd, mlen, i, 0); \ 1467 } \ 1468 } 1469 1470 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ) 1471 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ) 1472 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ) 1473 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ) 1474 1475 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE) 1476 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE) 1477 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE) 1478 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE) 1479 1480 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT) 1481 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT) 1482 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT) 1483 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT) 1484 1485 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT) 1486 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT) 1487 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT) 1488 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT) 1489 1490 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE) 1491 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE) 1492 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE) 1493 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE) 1494 1495 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE) 1496 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE) 1497 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE) 1498 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE) 1499 1500 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \ 1501 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 1502 CPURISCVState *env, uint32_t desc) \ 1503 { \ 1504 uint32_t mlen = vext_mlen(desc); \ 1505 uint32_t vm = vext_vm(desc); \ 1506 uint32_t vl = env->vl; \ 1507 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 1508 uint32_t i; \ 1509 \ 1510 for (i = 0; i < vl; i++) { \ 1511 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 1512 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 1513 continue; \ 1514 } \ 1515 vext_set_elem_mask(vd, mlen, i, \ 1516 DO_OP(s2, (ETYPE)(target_long)s1)); \ 1517 } \ 1518 for (; i < vlmax; i++) { \ 1519 vext_set_elem_mask(vd, mlen, i, 0); \ 1520 } \ 1521 } 1522 1523 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ) 1524 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ) 1525 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ) 1526 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ) 1527 1528 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE) 1529 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE) 1530 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE) 1531 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE) 1532 1533 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT) 1534 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT) 1535 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT) 1536 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT) 1537 1538 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT) 1539 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT) 1540 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT) 1541 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT) 1542 1543 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE) 1544 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE) 1545 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE) 1546 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE) 1547 1548 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE) 1549 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE) 1550 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE) 1551 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE) 1552 1553 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT) 1554 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT) 1555 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT) 1556 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT) 1557 1558 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT) 1559 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT) 1560 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT) 1561 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT) 1562 1563 /* Vector Integer Min/Max Instructions */ 1564 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN) 1565 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN) 1566 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN) 1567 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN) 1568 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN) 1569 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN) 1570 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN) 1571 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN) 1572 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX) 1573 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX) 1574 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX) 1575 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX) 1576 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX) 1577 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX) 1578 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX) 1579 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX) 1580 GEN_VEXT_VV(vminu_vv_b, 1, 1, clearb) 1581 GEN_VEXT_VV(vminu_vv_h, 2, 2, clearh) 1582 GEN_VEXT_VV(vminu_vv_w, 4, 4, clearl) 1583 GEN_VEXT_VV(vminu_vv_d, 8, 8, clearq) 1584 GEN_VEXT_VV(vmin_vv_b, 1, 1, clearb) 1585 GEN_VEXT_VV(vmin_vv_h, 2, 2, clearh) 1586 GEN_VEXT_VV(vmin_vv_w, 4, 4, clearl) 1587 GEN_VEXT_VV(vmin_vv_d, 8, 8, clearq) 1588 GEN_VEXT_VV(vmaxu_vv_b, 1, 1, clearb) 1589 GEN_VEXT_VV(vmaxu_vv_h, 2, 2, clearh) 1590 GEN_VEXT_VV(vmaxu_vv_w, 4, 4, clearl) 1591 GEN_VEXT_VV(vmaxu_vv_d, 8, 8, clearq) 1592 GEN_VEXT_VV(vmax_vv_b, 1, 1, clearb) 1593 GEN_VEXT_VV(vmax_vv_h, 2, 2, clearh) 1594 GEN_VEXT_VV(vmax_vv_w, 4, 4, clearl) 1595 GEN_VEXT_VV(vmax_vv_d, 8, 8, clearq) 1596 1597 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN) 1598 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN) 1599 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN) 1600 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN) 1601 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN) 1602 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN) 1603 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN) 1604 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN) 1605 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX) 1606 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX) 1607 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX) 1608 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX) 1609 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX) 1610 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX) 1611 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX) 1612 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX) 1613 GEN_VEXT_VX(vminu_vx_b, 1, 1, clearb) 1614 GEN_VEXT_VX(vminu_vx_h, 2, 2, clearh) 1615 GEN_VEXT_VX(vminu_vx_w, 4, 4, clearl) 1616 GEN_VEXT_VX(vminu_vx_d, 8, 8, clearq) 1617 GEN_VEXT_VX(vmin_vx_b, 1, 1, clearb) 1618 GEN_VEXT_VX(vmin_vx_h, 2, 2, clearh) 1619 GEN_VEXT_VX(vmin_vx_w, 4, 4, clearl) 1620 GEN_VEXT_VX(vmin_vx_d, 8, 8, clearq) 1621 GEN_VEXT_VX(vmaxu_vx_b, 1, 1, clearb) 1622 GEN_VEXT_VX(vmaxu_vx_h, 2, 2, clearh) 1623 GEN_VEXT_VX(vmaxu_vx_w, 4, 4, clearl) 1624 GEN_VEXT_VX(vmaxu_vx_d, 8, 8, clearq) 1625 GEN_VEXT_VX(vmax_vx_b, 1, 1, clearb) 1626 GEN_VEXT_VX(vmax_vx_h, 2, 2, clearh) 1627 GEN_VEXT_VX(vmax_vx_w, 4, 4, clearl) 1628 GEN_VEXT_VX(vmax_vx_d, 8, 8, clearq) 1629 1630 /* Vector Single-Width Integer Multiply Instructions */ 1631 #define DO_MUL(N, M) (N * M) 1632 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL) 1633 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL) 1634 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL) 1635 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL) 1636 GEN_VEXT_VV(vmul_vv_b, 1, 1, clearb) 1637 GEN_VEXT_VV(vmul_vv_h, 2, 2, clearh) 1638 GEN_VEXT_VV(vmul_vv_w, 4, 4, clearl) 1639 GEN_VEXT_VV(vmul_vv_d, 8, 8, clearq) 1640 1641 static int8_t do_mulh_b(int8_t s2, int8_t s1) 1642 { 1643 return (int16_t)s2 * (int16_t)s1 >> 8; 1644 } 1645 1646 static int16_t do_mulh_h(int16_t s2, int16_t s1) 1647 { 1648 return (int32_t)s2 * (int32_t)s1 >> 16; 1649 } 1650 1651 static int32_t do_mulh_w(int32_t s2, int32_t s1) 1652 { 1653 return (int64_t)s2 * (int64_t)s1 >> 32; 1654 } 1655 1656 static int64_t do_mulh_d(int64_t s2, int64_t s1) 1657 { 1658 uint64_t hi_64, lo_64; 1659 1660 muls64(&lo_64, &hi_64, s1, s2); 1661 return hi_64; 1662 } 1663 1664 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1) 1665 { 1666 return (uint16_t)s2 * (uint16_t)s1 >> 8; 1667 } 1668 1669 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1) 1670 { 1671 return (uint32_t)s2 * (uint32_t)s1 >> 16; 1672 } 1673 1674 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1) 1675 { 1676 return (uint64_t)s2 * (uint64_t)s1 >> 32; 1677 } 1678 1679 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1) 1680 { 1681 uint64_t hi_64, lo_64; 1682 1683 mulu64(&lo_64, &hi_64, s2, s1); 1684 return hi_64; 1685 } 1686 1687 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1) 1688 { 1689 return (int16_t)s2 * (uint16_t)s1 >> 8; 1690 } 1691 1692 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1) 1693 { 1694 return (int32_t)s2 * (uint32_t)s1 >> 16; 1695 } 1696 1697 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1) 1698 { 1699 return (int64_t)s2 * (uint64_t)s1 >> 32; 1700 } 1701 1702 /* 1703 * Let A = signed operand, 1704 * B = unsigned operand 1705 * P = mulu64(A, B), unsigned product 1706 * 1707 * LET X = 2 ** 64 - A, 2's complement of A 1708 * SP = signed product 1709 * THEN 1710 * IF A < 0 1711 * SP = -X * B 1712 * = -(2 ** 64 - A) * B 1713 * = A * B - 2 ** 64 * B 1714 * = P - 2 ** 64 * B 1715 * ELSE 1716 * SP = P 1717 * THEN 1718 * HI_P -= (A < 0 ? B : 0) 1719 */ 1720 1721 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1) 1722 { 1723 uint64_t hi_64, lo_64; 1724 1725 mulu64(&lo_64, &hi_64, s2, s1); 1726 1727 hi_64 -= s2 < 0 ? s1 : 0; 1728 return hi_64; 1729 } 1730 1731 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b) 1732 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h) 1733 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w) 1734 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d) 1735 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b) 1736 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h) 1737 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w) 1738 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d) 1739 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b) 1740 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h) 1741 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w) 1742 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d) 1743 GEN_VEXT_VV(vmulh_vv_b, 1, 1, clearb) 1744 GEN_VEXT_VV(vmulh_vv_h, 2, 2, clearh) 1745 GEN_VEXT_VV(vmulh_vv_w, 4, 4, clearl) 1746 GEN_VEXT_VV(vmulh_vv_d, 8, 8, clearq) 1747 GEN_VEXT_VV(vmulhu_vv_b, 1, 1, clearb) 1748 GEN_VEXT_VV(vmulhu_vv_h, 2, 2, clearh) 1749 GEN_VEXT_VV(vmulhu_vv_w, 4, 4, clearl) 1750 GEN_VEXT_VV(vmulhu_vv_d, 8, 8, clearq) 1751 GEN_VEXT_VV(vmulhsu_vv_b, 1, 1, clearb) 1752 GEN_VEXT_VV(vmulhsu_vv_h, 2, 2, clearh) 1753 GEN_VEXT_VV(vmulhsu_vv_w, 4, 4, clearl) 1754 GEN_VEXT_VV(vmulhsu_vv_d, 8, 8, clearq) 1755 1756 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL) 1757 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL) 1758 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL) 1759 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL) 1760 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b) 1761 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h) 1762 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w) 1763 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d) 1764 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b) 1765 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h) 1766 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w) 1767 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d) 1768 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b) 1769 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h) 1770 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w) 1771 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d) 1772 GEN_VEXT_VX(vmul_vx_b, 1, 1, clearb) 1773 GEN_VEXT_VX(vmul_vx_h, 2, 2, clearh) 1774 GEN_VEXT_VX(vmul_vx_w, 4, 4, clearl) 1775 GEN_VEXT_VX(vmul_vx_d, 8, 8, clearq) 1776 GEN_VEXT_VX(vmulh_vx_b, 1, 1, clearb) 1777 GEN_VEXT_VX(vmulh_vx_h, 2, 2, clearh) 1778 GEN_VEXT_VX(vmulh_vx_w, 4, 4, clearl) 1779 GEN_VEXT_VX(vmulh_vx_d, 8, 8, clearq) 1780 GEN_VEXT_VX(vmulhu_vx_b, 1, 1, clearb) 1781 GEN_VEXT_VX(vmulhu_vx_h, 2, 2, clearh) 1782 GEN_VEXT_VX(vmulhu_vx_w, 4, 4, clearl) 1783 GEN_VEXT_VX(vmulhu_vx_d, 8, 8, clearq) 1784 GEN_VEXT_VX(vmulhsu_vx_b, 1, 1, clearb) 1785 GEN_VEXT_VX(vmulhsu_vx_h, 2, 2, clearh) 1786 GEN_VEXT_VX(vmulhsu_vx_w, 4, 4, clearl) 1787 GEN_VEXT_VX(vmulhsu_vx_d, 8, 8, clearq) 1788 1789 /* Vector Integer Divide Instructions */ 1790 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M) 1791 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M) 1792 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\ 1793 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M) 1794 #define DO_REM(N, M) (unlikely(M == 0) ? N :\ 1795 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M) 1796 1797 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU) 1798 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU) 1799 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU) 1800 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU) 1801 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV) 1802 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV) 1803 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV) 1804 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV) 1805 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU) 1806 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU) 1807 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU) 1808 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU) 1809 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM) 1810 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM) 1811 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM) 1812 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM) 1813 GEN_VEXT_VV(vdivu_vv_b, 1, 1, clearb) 1814 GEN_VEXT_VV(vdivu_vv_h, 2, 2, clearh) 1815 GEN_VEXT_VV(vdivu_vv_w, 4, 4, clearl) 1816 GEN_VEXT_VV(vdivu_vv_d, 8, 8, clearq) 1817 GEN_VEXT_VV(vdiv_vv_b, 1, 1, clearb) 1818 GEN_VEXT_VV(vdiv_vv_h, 2, 2, clearh) 1819 GEN_VEXT_VV(vdiv_vv_w, 4, 4, clearl) 1820 GEN_VEXT_VV(vdiv_vv_d, 8, 8, clearq) 1821 GEN_VEXT_VV(vremu_vv_b, 1, 1, clearb) 1822 GEN_VEXT_VV(vremu_vv_h, 2, 2, clearh) 1823 GEN_VEXT_VV(vremu_vv_w, 4, 4, clearl) 1824 GEN_VEXT_VV(vremu_vv_d, 8, 8, clearq) 1825 GEN_VEXT_VV(vrem_vv_b, 1, 1, clearb) 1826 GEN_VEXT_VV(vrem_vv_h, 2, 2, clearh) 1827 GEN_VEXT_VV(vrem_vv_w, 4, 4, clearl) 1828 GEN_VEXT_VV(vrem_vv_d, 8, 8, clearq) 1829 1830 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU) 1831 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU) 1832 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU) 1833 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU) 1834 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV) 1835 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV) 1836 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV) 1837 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV) 1838 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU) 1839 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU) 1840 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU) 1841 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU) 1842 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM) 1843 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM) 1844 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM) 1845 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM) 1846 GEN_VEXT_VX(vdivu_vx_b, 1, 1, clearb) 1847 GEN_VEXT_VX(vdivu_vx_h, 2, 2, clearh) 1848 GEN_VEXT_VX(vdivu_vx_w, 4, 4, clearl) 1849 GEN_VEXT_VX(vdivu_vx_d, 8, 8, clearq) 1850 GEN_VEXT_VX(vdiv_vx_b, 1, 1, clearb) 1851 GEN_VEXT_VX(vdiv_vx_h, 2, 2, clearh) 1852 GEN_VEXT_VX(vdiv_vx_w, 4, 4, clearl) 1853 GEN_VEXT_VX(vdiv_vx_d, 8, 8, clearq) 1854 GEN_VEXT_VX(vremu_vx_b, 1, 1, clearb) 1855 GEN_VEXT_VX(vremu_vx_h, 2, 2, clearh) 1856 GEN_VEXT_VX(vremu_vx_w, 4, 4, clearl) 1857 GEN_VEXT_VX(vremu_vx_d, 8, 8, clearq) 1858 GEN_VEXT_VX(vrem_vx_b, 1, 1, clearb) 1859 GEN_VEXT_VX(vrem_vx_h, 2, 2, clearh) 1860 GEN_VEXT_VX(vrem_vx_w, 4, 4, clearl) 1861 GEN_VEXT_VX(vrem_vx_d, 8, 8, clearq) 1862 1863 /* Vector Widening Integer Multiply Instructions */ 1864 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL) 1865 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL) 1866 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL) 1867 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL) 1868 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL) 1869 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL) 1870 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL) 1871 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL) 1872 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL) 1873 GEN_VEXT_VV(vwmul_vv_b, 1, 2, clearh) 1874 GEN_VEXT_VV(vwmul_vv_h, 2, 4, clearl) 1875 GEN_VEXT_VV(vwmul_vv_w, 4, 8, clearq) 1876 GEN_VEXT_VV(vwmulu_vv_b, 1, 2, clearh) 1877 GEN_VEXT_VV(vwmulu_vv_h, 2, 4, clearl) 1878 GEN_VEXT_VV(vwmulu_vv_w, 4, 8, clearq) 1879 GEN_VEXT_VV(vwmulsu_vv_b, 1, 2, clearh) 1880 GEN_VEXT_VV(vwmulsu_vv_h, 2, 4, clearl) 1881 GEN_VEXT_VV(vwmulsu_vv_w, 4, 8, clearq) 1882 1883 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL) 1884 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL) 1885 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL) 1886 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL) 1887 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL) 1888 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL) 1889 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL) 1890 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL) 1891 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL) 1892 GEN_VEXT_VX(vwmul_vx_b, 1, 2, clearh) 1893 GEN_VEXT_VX(vwmul_vx_h, 2, 4, clearl) 1894 GEN_VEXT_VX(vwmul_vx_w, 4, 8, clearq) 1895 GEN_VEXT_VX(vwmulu_vx_b, 1, 2, clearh) 1896 GEN_VEXT_VX(vwmulu_vx_h, 2, 4, clearl) 1897 GEN_VEXT_VX(vwmulu_vx_w, 4, 8, clearq) 1898 GEN_VEXT_VX(vwmulsu_vx_b, 1, 2, clearh) 1899 GEN_VEXT_VX(vwmulsu_vx_h, 2, 4, clearl) 1900 GEN_VEXT_VX(vwmulsu_vx_w, 4, 8, clearq) 1901 1902 /* Vector Single-Width Integer Multiply-Add Instructions */ 1903 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 1904 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 1905 { \ 1906 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 1907 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1908 TD d = *((TD *)vd + HD(i)); \ 1909 *((TD *)vd + HD(i)) = OP(s2, s1, d); \ 1910 } 1911 1912 #define DO_MACC(N, M, D) (M * N + D) 1913 #define DO_NMSAC(N, M, D) (-(M * N) + D) 1914 #define DO_MADD(N, M, D) (M * D + N) 1915 #define DO_NMSUB(N, M, D) (-(M * D) + N) 1916 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC) 1917 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC) 1918 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC) 1919 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC) 1920 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC) 1921 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC) 1922 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC) 1923 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC) 1924 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD) 1925 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD) 1926 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD) 1927 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD) 1928 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB) 1929 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB) 1930 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB) 1931 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB) 1932 GEN_VEXT_VV(vmacc_vv_b, 1, 1, clearb) 1933 GEN_VEXT_VV(vmacc_vv_h, 2, 2, clearh) 1934 GEN_VEXT_VV(vmacc_vv_w, 4, 4, clearl) 1935 GEN_VEXT_VV(vmacc_vv_d, 8, 8, clearq) 1936 GEN_VEXT_VV(vnmsac_vv_b, 1, 1, clearb) 1937 GEN_VEXT_VV(vnmsac_vv_h, 2, 2, clearh) 1938 GEN_VEXT_VV(vnmsac_vv_w, 4, 4, clearl) 1939 GEN_VEXT_VV(vnmsac_vv_d, 8, 8, clearq) 1940 GEN_VEXT_VV(vmadd_vv_b, 1, 1, clearb) 1941 GEN_VEXT_VV(vmadd_vv_h, 2, 2, clearh) 1942 GEN_VEXT_VV(vmadd_vv_w, 4, 4, clearl) 1943 GEN_VEXT_VV(vmadd_vv_d, 8, 8, clearq) 1944 GEN_VEXT_VV(vnmsub_vv_b, 1, 1, clearb) 1945 GEN_VEXT_VV(vnmsub_vv_h, 2, 2, clearh) 1946 GEN_VEXT_VV(vnmsub_vv_w, 4, 4, clearl) 1947 GEN_VEXT_VV(vnmsub_vv_d, 8, 8, clearq) 1948 1949 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 1950 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 1951 { \ 1952 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 1953 TD d = *((TD *)vd + HD(i)); \ 1954 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \ 1955 } 1956 1957 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC) 1958 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC) 1959 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC) 1960 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC) 1961 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC) 1962 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC) 1963 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC) 1964 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC) 1965 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD) 1966 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD) 1967 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD) 1968 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD) 1969 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB) 1970 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB) 1971 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB) 1972 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB) 1973 GEN_VEXT_VX(vmacc_vx_b, 1, 1, clearb) 1974 GEN_VEXT_VX(vmacc_vx_h, 2, 2, clearh) 1975 GEN_VEXT_VX(vmacc_vx_w, 4, 4, clearl) 1976 GEN_VEXT_VX(vmacc_vx_d, 8, 8, clearq) 1977 GEN_VEXT_VX(vnmsac_vx_b, 1, 1, clearb) 1978 GEN_VEXT_VX(vnmsac_vx_h, 2, 2, clearh) 1979 GEN_VEXT_VX(vnmsac_vx_w, 4, 4, clearl) 1980 GEN_VEXT_VX(vnmsac_vx_d, 8, 8, clearq) 1981 GEN_VEXT_VX(vmadd_vx_b, 1, 1, clearb) 1982 GEN_VEXT_VX(vmadd_vx_h, 2, 2, clearh) 1983 GEN_VEXT_VX(vmadd_vx_w, 4, 4, clearl) 1984 GEN_VEXT_VX(vmadd_vx_d, 8, 8, clearq) 1985 GEN_VEXT_VX(vnmsub_vx_b, 1, 1, clearb) 1986 GEN_VEXT_VX(vnmsub_vx_h, 2, 2, clearh) 1987 GEN_VEXT_VX(vnmsub_vx_w, 4, 4, clearl) 1988 GEN_VEXT_VX(vnmsub_vx_d, 8, 8, clearq) 1989 1990 /* Vector Widening Integer Multiply-Add Instructions */ 1991 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC) 1992 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC) 1993 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC) 1994 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC) 1995 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC) 1996 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC) 1997 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC) 1998 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC) 1999 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC) 2000 GEN_VEXT_VV(vwmaccu_vv_b, 1, 2, clearh) 2001 GEN_VEXT_VV(vwmaccu_vv_h, 2, 4, clearl) 2002 GEN_VEXT_VV(vwmaccu_vv_w, 4, 8, clearq) 2003 GEN_VEXT_VV(vwmacc_vv_b, 1, 2, clearh) 2004 GEN_VEXT_VV(vwmacc_vv_h, 2, 4, clearl) 2005 GEN_VEXT_VV(vwmacc_vv_w, 4, 8, clearq) 2006 GEN_VEXT_VV(vwmaccsu_vv_b, 1, 2, clearh) 2007 GEN_VEXT_VV(vwmaccsu_vv_h, 2, 4, clearl) 2008 GEN_VEXT_VV(vwmaccsu_vv_w, 4, 8, clearq) 2009 2010 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC) 2011 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC) 2012 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC) 2013 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC) 2014 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC) 2015 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC) 2016 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC) 2017 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC) 2018 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC) 2019 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC) 2020 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC) 2021 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC) 2022 GEN_VEXT_VX(vwmaccu_vx_b, 1, 2, clearh) 2023 GEN_VEXT_VX(vwmaccu_vx_h, 2, 4, clearl) 2024 GEN_VEXT_VX(vwmaccu_vx_w, 4, 8, clearq) 2025 GEN_VEXT_VX(vwmacc_vx_b, 1, 2, clearh) 2026 GEN_VEXT_VX(vwmacc_vx_h, 2, 4, clearl) 2027 GEN_VEXT_VX(vwmacc_vx_w, 4, 8, clearq) 2028 GEN_VEXT_VX(vwmaccsu_vx_b, 1, 2, clearh) 2029 GEN_VEXT_VX(vwmaccsu_vx_h, 2, 4, clearl) 2030 GEN_VEXT_VX(vwmaccsu_vx_w, 4, 8, clearq) 2031 GEN_VEXT_VX(vwmaccus_vx_b, 1, 2, clearh) 2032 GEN_VEXT_VX(vwmaccus_vx_h, 2, 4, clearl) 2033 GEN_VEXT_VX(vwmaccus_vx_w, 4, 8, clearq) 2034 2035 /* Vector Integer Merge and Move Instructions */ 2036 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H, CLEAR_FN) \ 2037 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \ 2038 uint32_t desc) \ 2039 { \ 2040 uint32_t vl = env->vl; \ 2041 uint32_t esz = sizeof(ETYPE); \ 2042 uint32_t vlmax = vext_maxsz(desc) / esz; \ 2043 uint32_t i; \ 2044 \ 2045 for (i = 0; i < vl; i++) { \ 2046 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 2047 *((ETYPE *)vd + H(i)) = s1; \ 2048 } \ 2049 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 2050 } 2051 2052 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1, clearb) 2053 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2, clearh) 2054 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4, clearl) 2055 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8, clearq) 2056 2057 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H, CLEAR_FN) \ 2058 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \ 2059 uint32_t desc) \ 2060 { \ 2061 uint32_t vl = env->vl; \ 2062 uint32_t esz = sizeof(ETYPE); \ 2063 uint32_t vlmax = vext_maxsz(desc) / esz; \ 2064 uint32_t i; \ 2065 \ 2066 for (i = 0; i < vl; i++) { \ 2067 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \ 2068 } \ 2069 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 2070 } 2071 2072 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1, clearb) 2073 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2, clearh) 2074 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4, clearl) 2075 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8, clearq) 2076 2077 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H, CLEAR_FN) \ 2078 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2079 CPURISCVState *env, uint32_t desc) \ 2080 { \ 2081 uint32_t mlen = vext_mlen(desc); \ 2082 uint32_t vl = env->vl; \ 2083 uint32_t esz = sizeof(ETYPE); \ 2084 uint32_t vlmax = vext_maxsz(desc) / esz; \ 2085 uint32_t i; \ 2086 \ 2087 for (i = 0; i < vl; i++) { \ 2088 ETYPE *vt = (!vext_elem_mask(v0, mlen, i) ? vs2 : vs1); \ 2089 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \ 2090 } \ 2091 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 2092 } 2093 2094 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1, clearb) 2095 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2, clearh) 2096 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4, clearl) 2097 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8, clearq) 2098 2099 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H, CLEAR_FN) \ 2100 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2101 void *vs2, CPURISCVState *env, uint32_t desc) \ 2102 { \ 2103 uint32_t mlen = vext_mlen(desc); \ 2104 uint32_t vl = env->vl; \ 2105 uint32_t esz = sizeof(ETYPE); \ 2106 uint32_t vlmax = vext_maxsz(desc) / esz; \ 2107 uint32_t i; \ 2108 \ 2109 for (i = 0; i < vl; i++) { \ 2110 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 2111 ETYPE d = (!vext_elem_mask(v0, mlen, i) ? s2 : \ 2112 (ETYPE)(target_long)s1); \ 2113 *((ETYPE *)vd + H(i)) = d; \ 2114 } \ 2115 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 2116 } 2117 2118 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1, clearb) 2119 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2, clearh) 2120 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4, clearl) 2121 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8, clearq) 2122 2123 /* 2124 *** Vector Fixed-Point Arithmetic Instructions 2125 */ 2126 2127 /* Vector Single-Width Saturating Add and Subtract */ 2128 2129 /* 2130 * As fixed point instructions probably have round mode and saturation, 2131 * define common macros for fixed point here. 2132 */ 2133 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i, 2134 CPURISCVState *env, int vxrm); 2135 2136 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2137 static inline void \ 2138 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2139 CPURISCVState *env, int vxrm) \ 2140 { \ 2141 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2142 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2143 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \ 2144 } 2145 2146 static inline void 2147 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2, 2148 CPURISCVState *env, 2149 uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, 2150 opivv2_rm_fn *fn) 2151 { 2152 for (uint32_t i = 0; i < vl; i++) { 2153 if (!vm && !vext_elem_mask(v0, mlen, i)) { 2154 continue; 2155 } 2156 fn(vd, vs1, vs2, i, env, vxrm); 2157 } 2158 } 2159 2160 static inline void 2161 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2, 2162 CPURISCVState *env, 2163 uint32_t desc, uint32_t esz, uint32_t dsz, 2164 opivv2_rm_fn *fn, clear_fn *clearfn) 2165 { 2166 uint32_t vlmax = vext_maxsz(desc) / esz; 2167 uint32_t mlen = vext_mlen(desc); 2168 uint32_t vm = vext_vm(desc); 2169 uint32_t vl = env->vl; 2170 2171 switch (env->vxrm) { 2172 case 0: /* rnu */ 2173 vext_vv_rm_1(vd, v0, vs1, vs2, 2174 env, vl, vm, mlen, 0, fn); 2175 break; 2176 case 1: /* rne */ 2177 vext_vv_rm_1(vd, v0, vs1, vs2, 2178 env, vl, vm, mlen, 1, fn); 2179 break; 2180 case 2: /* rdn */ 2181 vext_vv_rm_1(vd, v0, vs1, vs2, 2182 env, vl, vm, mlen, 2, fn); 2183 break; 2184 default: /* rod */ 2185 vext_vv_rm_1(vd, v0, vs1, vs2, 2186 env, vl, vm, mlen, 3, fn); 2187 break; 2188 } 2189 2190 clearfn(vd, vl, vl * dsz, vlmax * dsz); 2191 } 2192 2193 /* generate helpers for fixed point instructions with OPIVV format */ 2194 #define GEN_VEXT_VV_RM(NAME, ESZ, DSZ, CLEAR_FN) \ 2195 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 2196 CPURISCVState *env, uint32_t desc) \ 2197 { \ 2198 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 2199 do_##NAME, CLEAR_FN); \ 2200 } 2201 2202 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2203 { 2204 uint8_t res = a + b; 2205 if (res < a) { 2206 res = UINT8_MAX; 2207 env->vxsat = 0x1; 2208 } 2209 return res; 2210 } 2211 2212 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a, 2213 uint16_t b) 2214 { 2215 uint16_t res = a + b; 2216 if (res < a) { 2217 res = UINT16_MAX; 2218 env->vxsat = 0x1; 2219 } 2220 return res; 2221 } 2222 2223 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a, 2224 uint32_t b) 2225 { 2226 uint32_t res = a + b; 2227 if (res < a) { 2228 res = UINT32_MAX; 2229 env->vxsat = 0x1; 2230 } 2231 return res; 2232 } 2233 2234 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a, 2235 uint64_t b) 2236 { 2237 uint64_t res = a + b; 2238 if (res < a) { 2239 res = UINT64_MAX; 2240 env->vxsat = 0x1; 2241 } 2242 return res; 2243 } 2244 2245 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8) 2246 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16) 2247 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32) 2248 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64) 2249 GEN_VEXT_VV_RM(vsaddu_vv_b, 1, 1, clearb) 2250 GEN_VEXT_VV_RM(vsaddu_vv_h, 2, 2, clearh) 2251 GEN_VEXT_VV_RM(vsaddu_vv_w, 4, 4, clearl) 2252 GEN_VEXT_VV_RM(vsaddu_vv_d, 8, 8, clearq) 2253 2254 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i, 2255 CPURISCVState *env, int vxrm); 2256 2257 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2258 static inline void \ 2259 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2260 CPURISCVState *env, int vxrm) \ 2261 { \ 2262 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2263 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \ 2264 } 2265 2266 static inline void 2267 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2, 2268 CPURISCVState *env, 2269 uint32_t vl, uint32_t vm, uint32_t mlen, int vxrm, 2270 opivx2_rm_fn *fn) 2271 { 2272 for (uint32_t i = 0; i < vl; i++) { 2273 if (!vm && !vext_elem_mask(v0, mlen, i)) { 2274 continue; 2275 } 2276 fn(vd, s1, vs2, i, env, vxrm); 2277 } 2278 } 2279 2280 static inline void 2281 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2, 2282 CPURISCVState *env, 2283 uint32_t desc, uint32_t esz, uint32_t dsz, 2284 opivx2_rm_fn *fn, clear_fn *clearfn) 2285 { 2286 uint32_t vlmax = vext_maxsz(desc) / esz; 2287 uint32_t mlen = vext_mlen(desc); 2288 uint32_t vm = vext_vm(desc); 2289 uint32_t vl = env->vl; 2290 2291 switch (env->vxrm) { 2292 case 0: /* rnu */ 2293 vext_vx_rm_1(vd, v0, s1, vs2, 2294 env, vl, vm, mlen, 0, fn); 2295 break; 2296 case 1: /* rne */ 2297 vext_vx_rm_1(vd, v0, s1, vs2, 2298 env, vl, vm, mlen, 1, fn); 2299 break; 2300 case 2: /* rdn */ 2301 vext_vx_rm_1(vd, v0, s1, vs2, 2302 env, vl, vm, mlen, 2, fn); 2303 break; 2304 default: /* rod */ 2305 vext_vx_rm_1(vd, v0, s1, vs2, 2306 env, vl, vm, mlen, 3, fn); 2307 break; 2308 } 2309 2310 clearfn(vd, vl, vl * dsz, vlmax * dsz); 2311 } 2312 2313 /* generate helpers for fixed point instructions with OPIVX format */ 2314 #define GEN_VEXT_VX_RM(NAME, ESZ, DSZ, CLEAR_FN) \ 2315 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 2316 void *vs2, CPURISCVState *env, uint32_t desc) \ 2317 { \ 2318 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 2319 do_##NAME, CLEAR_FN); \ 2320 } 2321 2322 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8) 2323 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16) 2324 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32) 2325 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64) 2326 GEN_VEXT_VX_RM(vsaddu_vx_b, 1, 1, clearb) 2327 GEN_VEXT_VX_RM(vsaddu_vx_h, 2, 2, clearh) 2328 GEN_VEXT_VX_RM(vsaddu_vx_w, 4, 4, clearl) 2329 GEN_VEXT_VX_RM(vsaddu_vx_d, 8, 8, clearq) 2330 2331 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2332 { 2333 int8_t res = a + b; 2334 if ((res ^ a) & (res ^ b) & INT8_MIN) { 2335 res = a > 0 ? INT8_MAX : INT8_MIN; 2336 env->vxsat = 0x1; 2337 } 2338 return res; 2339 } 2340 2341 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2342 { 2343 int16_t res = a + b; 2344 if ((res ^ a) & (res ^ b) & INT16_MIN) { 2345 res = a > 0 ? INT16_MAX : INT16_MIN; 2346 env->vxsat = 0x1; 2347 } 2348 return res; 2349 } 2350 2351 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2352 { 2353 int32_t res = a + b; 2354 if ((res ^ a) & (res ^ b) & INT32_MIN) { 2355 res = a > 0 ? INT32_MAX : INT32_MIN; 2356 env->vxsat = 0x1; 2357 } 2358 return res; 2359 } 2360 2361 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2362 { 2363 int64_t res = a + b; 2364 if ((res ^ a) & (res ^ b) & INT64_MIN) { 2365 res = a > 0 ? INT64_MAX : INT64_MIN; 2366 env->vxsat = 0x1; 2367 } 2368 return res; 2369 } 2370 2371 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8) 2372 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16) 2373 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32) 2374 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64) 2375 GEN_VEXT_VV_RM(vsadd_vv_b, 1, 1, clearb) 2376 GEN_VEXT_VV_RM(vsadd_vv_h, 2, 2, clearh) 2377 GEN_VEXT_VV_RM(vsadd_vv_w, 4, 4, clearl) 2378 GEN_VEXT_VV_RM(vsadd_vv_d, 8, 8, clearq) 2379 2380 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8) 2381 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16) 2382 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32) 2383 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64) 2384 GEN_VEXT_VX_RM(vsadd_vx_b, 1, 1, clearb) 2385 GEN_VEXT_VX_RM(vsadd_vx_h, 2, 2, clearh) 2386 GEN_VEXT_VX_RM(vsadd_vx_w, 4, 4, clearl) 2387 GEN_VEXT_VX_RM(vsadd_vx_d, 8, 8, clearq) 2388 2389 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2390 { 2391 uint8_t res = a - b; 2392 if (res > a) { 2393 res = 0; 2394 env->vxsat = 0x1; 2395 } 2396 return res; 2397 } 2398 2399 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a, 2400 uint16_t b) 2401 { 2402 uint16_t res = a - b; 2403 if (res > a) { 2404 res = 0; 2405 env->vxsat = 0x1; 2406 } 2407 return res; 2408 } 2409 2410 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a, 2411 uint32_t b) 2412 { 2413 uint32_t res = a - b; 2414 if (res > a) { 2415 res = 0; 2416 env->vxsat = 0x1; 2417 } 2418 return res; 2419 } 2420 2421 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a, 2422 uint64_t b) 2423 { 2424 uint64_t res = a - b; 2425 if (res > a) { 2426 res = 0; 2427 env->vxsat = 0x1; 2428 } 2429 return res; 2430 } 2431 2432 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8) 2433 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16) 2434 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32) 2435 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64) 2436 GEN_VEXT_VV_RM(vssubu_vv_b, 1, 1, clearb) 2437 GEN_VEXT_VV_RM(vssubu_vv_h, 2, 2, clearh) 2438 GEN_VEXT_VV_RM(vssubu_vv_w, 4, 4, clearl) 2439 GEN_VEXT_VV_RM(vssubu_vv_d, 8, 8, clearq) 2440 2441 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8) 2442 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16) 2443 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32) 2444 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64) 2445 GEN_VEXT_VX_RM(vssubu_vx_b, 1, 1, clearb) 2446 GEN_VEXT_VX_RM(vssubu_vx_h, 2, 2, clearh) 2447 GEN_VEXT_VX_RM(vssubu_vx_w, 4, 4, clearl) 2448 GEN_VEXT_VX_RM(vssubu_vx_d, 8, 8, clearq) 2449 2450 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2451 { 2452 int8_t res = a - b; 2453 if ((res ^ a) & (a ^ b) & INT8_MIN) { 2454 res = a > 0 ? INT8_MAX : INT8_MIN; 2455 env->vxsat = 0x1; 2456 } 2457 return res; 2458 } 2459 2460 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2461 { 2462 int16_t res = a - b; 2463 if ((res ^ a) & (a ^ b) & INT16_MIN) { 2464 res = a > 0 ? INT16_MAX : INT16_MIN; 2465 env->vxsat = 0x1; 2466 } 2467 return res; 2468 } 2469 2470 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2471 { 2472 int32_t res = a - b; 2473 if ((res ^ a) & (a ^ b) & INT32_MIN) { 2474 res = a > 0 ? INT32_MAX : INT32_MIN; 2475 env->vxsat = 0x1; 2476 } 2477 return res; 2478 } 2479 2480 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2481 { 2482 int64_t res = a - b; 2483 if ((res ^ a) & (a ^ b) & INT64_MIN) { 2484 res = a > 0 ? INT64_MAX : INT64_MIN; 2485 env->vxsat = 0x1; 2486 } 2487 return res; 2488 } 2489 2490 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8) 2491 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16) 2492 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32) 2493 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64) 2494 GEN_VEXT_VV_RM(vssub_vv_b, 1, 1, clearb) 2495 GEN_VEXT_VV_RM(vssub_vv_h, 2, 2, clearh) 2496 GEN_VEXT_VV_RM(vssub_vv_w, 4, 4, clearl) 2497 GEN_VEXT_VV_RM(vssub_vv_d, 8, 8, clearq) 2498 2499 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8) 2500 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16) 2501 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32) 2502 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64) 2503 GEN_VEXT_VX_RM(vssub_vx_b, 1, 1, clearb) 2504 GEN_VEXT_VX_RM(vssub_vx_h, 2, 2, clearh) 2505 GEN_VEXT_VX_RM(vssub_vx_w, 4, 4, clearl) 2506 GEN_VEXT_VX_RM(vssub_vx_d, 8, 8, clearq) 2507 2508 /* Vector Single-Width Averaging Add and Subtract */ 2509 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) 2510 { 2511 uint8_t d = extract64(v, shift, 1); 2512 uint8_t d1; 2513 uint64_t D1, D2; 2514 2515 if (shift == 0 || shift > 64) { 2516 return 0; 2517 } 2518 2519 d1 = extract64(v, shift - 1, 1); 2520 D1 = extract64(v, 0, shift); 2521 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */ 2522 return d1; 2523 } else if (vxrm == 1) { /* round-to-nearest-even */ 2524 if (shift > 1) { 2525 D2 = extract64(v, 0, shift - 1); 2526 return d1 & ((D2 != 0) | d); 2527 } else { 2528 return d1 & d; 2529 } 2530 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */ 2531 return !d & (D1 != 0); 2532 } 2533 return 0; /* round-down (truncate) */ 2534 } 2535 2536 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2537 { 2538 int64_t res = (int64_t)a + b; 2539 uint8_t round = get_round(vxrm, res, 1); 2540 2541 return (res >> 1) + round; 2542 } 2543 2544 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2545 { 2546 int64_t res = a + b; 2547 uint8_t round = get_round(vxrm, res, 1); 2548 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN; 2549 2550 /* With signed overflow, bit 64 is inverse of bit 63. */ 2551 return ((res >> 1) ^ over) + round; 2552 } 2553 2554 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32) 2555 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32) 2556 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32) 2557 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64) 2558 GEN_VEXT_VV_RM(vaadd_vv_b, 1, 1, clearb) 2559 GEN_VEXT_VV_RM(vaadd_vv_h, 2, 2, clearh) 2560 GEN_VEXT_VV_RM(vaadd_vv_w, 4, 4, clearl) 2561 GEN_VEXT_VV_RM(vaadd_vv_d, 8, 8, clearq) 2562 2563 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32) 2564 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32) 2565 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32) 2566 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64) 2567 GEN_VEXT_VX_RM(vaadd_vx_b, 1, 1, clearb) 2568 GEN_VEXT_VX_RM(vaadd_vx_h, 2, 2, clearh) 2569 GEN_VEXT_VX_RM(vaadd_vx_w, 4, 4, clearl) 2570 GEN_VEXT_VX_RM(vaadd_vx_d, 8, 8, clearq) 2571 2572 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2573 { 2574 int64_t res = (int64_t)a - b; 2575 uint8_t round = get_round(vxrm, res, 1); 2576 2577 return (res >> 1) + round; 2578 } 2579 2580 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2581 { 2582 int64_t res = (int64_t)a - b; 2583 uint8_t round = get_round(vxrm, res, 1); 2584 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN; 2585 2586 /* With signed overflow, bit 64 is inverse of bit 63. */ 2587 return ((res >> 1) ^ over) + round; 2588 } 2589 2590 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32) 2591 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32) 2592 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32) 2593 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64) 2594 GEN_VEXT_VV_RM(vasub_vv_b, 1, 1, clearb) 2595 GEN_VEXT_VV_RM(vasub_vv_h, 2, 2, clearh) 2596 GEN_VEXT_VV_RM(vasub_vv_w, 4, 4, clearl) 2597 GEN_VEXT_VV_RM(vasub_vv_d, 8, 8, clearq) 2598 2599 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32) 2600 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32) 2601 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32) 2602 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64) 2603 GEN_VEXT_VX_RM(vasub_vx_b, 1, 1, clearb) 2604 GEN_VEXT_VX_RM(vasub_vx_h, 2, 2, clearh) 2605 GEN_VEXT_VX_RM(vasub_vx_w, 4, 4, clearl) 2606 GEN_VEXT_VX_RM(vasub_vx_d, 8, 8, clearq) 2607 2608 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */ 2609 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2610 { 2611 uint8_t round; 2612 int16_t res; 2613 2614 res = (int16_t)a * (int16_t)b; 2615 round = get_round(vxrm, res, 7); 2616 res = (res >> 7) + round; 2617 2618 if (res > INT8_MAX) { 2619 env->vxsat = 0x1; 2620 return INT8_MAX; 2621 } else if (res < INT8_MIN) { 2622 env->vxsat = 0x1; 2623 return INT8_MIN; 2624 } else { 2625 return res; 2626 } 2627 } 2628 2629 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2630 { 2631 uint8_t round; 2632 int32_t res; 2633 2634 res = (int32_t)a * (int32_t)b; 2635 round = get_round(vxrm, res, 15); 2636 res = (res >> 15) + round; 2637 2638 if (res > INT16_MAX) { 2639 env->vxsat = 0x1; 2640 return INT16_MAX; 2641 } else if (res < INT16_MIN) { 2642 env->vxsat = 0x1; 2643 return INT16_MIN; 2644 } else { 2645 return res; 2646 } 2647 } 2648 2649 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 2650 { 2651 uint8_t round; 2652 int64_t res; 2653 2654 res = (int64_t)a * (int64_t)b; 2655 round = get_round(vxrm, res, 31); 2656 res = (res >> 31) + round; 2657 2658 if (res > INT32_MAX) { 2659 env->vxsat = 0x1; 2660 return INT32_MAX; 2661 } else if (res < INT32_MIN) { 2662 env->vxsat = 0x1; 2663 return INT32_MIN; 2664 } else { 2665 return res; 2666 } 2667 } 2668 2669 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 2670 { 2671 uint8_t round; 2672 uint64_t hi_64, lo_64; 2673 int64_t res; 2674 2675 if (a == INT64_MIN && b == INT64_MIN) { 2676 env->vxsat = 1; 2677 return INT64_MAX; 2678 } 2679 2680 muls64(&lo_64, &hi_64, a, b); 2681 round = get_round(vxrm, lo_64, 63); 2682 /* 2683 * Cannot overflow, as there are always 2684 * 2 sign bits after multiply. 2685 */ 2686 res = (hi_64 << 1) | (lo_64 >> 63); 2687 if (round) { 2688 if (res == INT64_MAX) { 2689 env->vxsat = 1; 2690 } else { 2691 res += 1; 2692 } 2693 } 2694 return res; 2695 } 2696 2697 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8) 2698 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16) 2699 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32) 2700 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64) 2701 GEN_VEXT_VV_RM(vsmul_vv_b, 1, 1, clearb) 2702 GEN_VEXT_VV_RM(vsmul_vv_h, 2, 2, clearh) 2703 GEN_VEXT_VV_RM(vsmul_vv_w, 4, 4, clearl) 2704 GEN_VEXT_VV_RM(vsmul_vv_d, 8, 8, clearq) 2705 2706 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8) 2707 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16) 2708 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32) 2709 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64) 2710 GEN_VEXT_VX_RM(vsmul_vx_b, 1, 1, clearb) 2711 GEN_VEXT_VX_RM(vsmul_vx_h, 2, 2, clearh) 2712 GEN_VEXT_VX_RM(vsmul_vx_w, 4, 4, clearl) 2713 GEN_VEXT_VX_RM(vsmul_vx_d, 8, 8, clearq) 2714 2715 /* Vector Widening Saturating Scaled Multiply-Add */ 2716 static inline uint16_t 2717 vwsmaccu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b, 2718 uint16_t c) 2719 { 2720 uint8_t round; 2721 uint16_t res = (uint16_t)a * b; 2722 2723 round = get_round(vxrm, res, 4); 2724 res = (res >> 4) + round; 2725 return saddu16(env, vxrm, c, res); 2726 } 2727 2728 static inline uint32_t 2729 vwsmaccu16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b, 2730 uint32_t c) 2731 { 2732 uint8_t round; 2733 uint32_t res = (uint32_t)a * b; 2734 2735 round = get_round(vxrm, res, 8); 2736 res = (res >> 8) + round; 2737 return saddu32(env, vxrm, c, res); 2738 } 2739 2740 static inline uint64_t 2741 vwsmaccu32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b, 2742 uint64_t c) 2743 { 2744 uint8_t round; 2745 uint64_t res = (uint64_t)a * b; 2746 2747 round = get_round(vxrm, res, 16); 2748 res = (res >> 16) + round; 2749 return saddu64(env, vxrm, c, res); 2750 } 2751 2752 #define OPIVV3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 2753 static inline void \ 2754 do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 2755 CPURISCVState *env, int vxrm) \ 2756 { \ 2757 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 2758 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2759 TD d = *((TD *)vd + HD(i)); \ 2760 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1, d); \ 2761 } 2762 2763 RVVCALL(OPIVV3_RM, vwsmaccu_vv_b, WOP_UUU_B, H2, H1, H1, vwsmaccu8) 2764 RVVCALL(OPIVV3_RM, vwsmaccu_vv_h, WOP_UUU_H, H4, H2, H2, vwsmaccu16) 2765 RVVCALL(OPIVV3_RM, vwsmaccu_vv_w, WOP_UUU_W, H8, H4, H4, vwsmaccu32) 2766 GEN_VEXT_VV_RM(vwsmaccu_vv_b, 1, 2, clearh) 2767 GEN_VEXT_VV_RM(vwsmaccu_vv_h, 2, 4, clearl) 2768 GEN_VEXT_VV_RM(vwsmaccu_vv_w, 4, 8, clearq) 2769 2770 #define OPIVX3_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 2771 static inline void \ 2772 do_##NAME(void *vd, target_long s1, void *vs2, int i, \ 2773 CPURISCVState *env, int vxrm) \ 2774 { \ 2775 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 2776 TD d = *((TD *)vd + HD(i)); \ 2777 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1, d); \ 2778 } 2779 2780 RVVCALL(OPIVX3_RM, vwsmaccu_vx_b, WOP_UUU_B, H2, H1, vwsmaccu8) 2781 RVVCALL(OPIVX3_RM, vwsmaccu_vx_h, WOP_UUU_H, H4, H2, vwsmaccu16) 2782 RVVCALL(OPIVX3_RM, vwsmaccu_vx_w, WOP_UUU_W, H8, H4, vwsmaccu32) 2783 GEN_VEXT_VX_RM(vwsmaccu_vx_b, 1, 2, clearh) 2784 GEN_VEXT_VX_RM(vwsmaccu_vx_h, 2, 4, clearl) 2785 GEN_VEXT_VX_RM(vwsmaccu_vx_w, 4, 8, clearq) 2786 2787 static inline int16_t 2788 vwsmacc8(CPURISCVState *env, int vxrm, int8_t a, int8_t b, int16_t c) 2789 { 2790 uint8_t round; 2791 int16_t res = (int16_t)a * b; 2792 2793 round = get_round(vxrm, res, 4); 2794 res = (res >> 4) + round; 2795 return sadd16(env, vxrm, c, res); 2796 } 2797 2798 static inline int32_t 2799 vwsmacc16(CPURISCVState *env, int vxrm, int16_t a, int16_t b, int32_t c) 2800 { 2801 uint8_t round; 2802 int32_t res = (int32_t)a * b; 2803 2804 round = get_round(vxrm, res, 8); 2805 res = (res >> 8) + round; 2806 return sadd32(env, vxrm, c, res); 2807 2808 } 2809 2810 static inline int64_t 2811 vwsmacc32(CPURISCVState *env, int vxrm, int32_t a, int32_t b, int64_t c) 2812 { 2813 uint8_t round; 2814 int64_t res = (int64_t)a * b; 2815 2816 round = get_round(vxrm, res, 16); 2817 res = (res >> 16) + round; 2818 return sadd64(env, vxrm, c, res); 2819 } 2820 2821 RVVCALL(OPIVV3_RM, vwsmacc_vv_b, WOP_SSS_B, H2, H1, H1, vwsmacc8) 2822 RVVCALL(OPIVV3_RM, vwsmacc_vv_h, WOP_SSS_H, H4, H2, H2, vwsmacc16) 2823 RVVCALL(OPIVV3_RM, vwsmacc_vv_w, WOP_SSS_W, H8, H4, H4, vwsmacc32) 2824 GEN_VEXT_VV_RM(vwsmacc_vv_b, 1, 2, clearh) 2825 GEN_VEXT_VV_RM(vwsmacc_vv_h, 2, 4, clearl) 2826 GEN_VEXT_VV_RM(vwsmacc_vv_w, 4, 8, clearq) 2827 RVVCALL(OPIVX3_RM, vwsmacc_vx_b, WOP_SSS_B, H2, H1, vwsmacc8) 2828 RVVCALL(OPIVX3_RM, vwsmacc_vx_h, WOP_SSS_H, H4, H2, vwsmacc16) 2829 RVVCALL(OPIVX3_RM, vwsmacc_vx_w, WOP_SSS_W, H8, H4, vwsmacc32) 2830 GEN_VEXT_VX_RM(vwsmacc_vx_b, 1, 2, clearh) 2831 GEN_VEXT_VX_RM(vwsmacc_vx_h, 2, 4, clearl) 2832 GEN_VEXT_VX_RM(vwsmacc_vx_w, 4, 8, clearq) 2833 2834 static inline int16_t 2835 vwsmaccsu8(CPURISCVState *env, int vxrm, uint8_t a, int8_t b, int16_t c) 2836 { 2837 uint8_t round; 2838 int16_t res = a * (int16_t)b; 2839 2840 round = get_round(vxrm, res, 4); 2841 res = (res >> 4) + round; 2842 return ssub16(env, vxrm, c, res); 2843 } 2844 2845 static inline int32_t 2846 vwsmaccsu16(CPURISCVState *env, int vxrm, uint16_t a, int16_t b, uint32_t c) 2847 { 2848 uint8_t round; 2849 int32_t res = a * (int32_t)b; 2850 2851 round = get_round(vxrm, res, 8); 2852 res = (res >> 8) + round; 2853 return ssub32(env, vxrm, c, res); 2854 } 2855 2856 static inline int64_t 2857 vwsmaccsu32(CPURISCVState *env, int vxrm, uint32_t a, int32_t b, int64_t c) 2858 { 2859 uint8_t round; 2860 int64_t res = a * (int64_t)b; 2861 2862 round = get_round(vxrm, res, 16); 2863 res = (res >> 16) + round; 2864 return ssub64(env, vxrm, c, res); 2865 } 2866 2867 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, vwsmaccsu8) 2868 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, vwsmaccsu16) 2869 RVVCALL(OPIVV3_RM, vwsmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, vwsmaccsu32) 2870 GEN_VEXT_VV_RM(vwsmaccsu_vv_b, 1, 2, clearh) 2871 GEN_VEXT_VV_RM(vwsmaccsu_vv_h, 2, 4, clearl) 2872 GEN_VEXT_VV_RM(vwsmaccsu_vv_w, 4, 8, clearq) 2873 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_b, WOP_SSU_B, H2, H1, vwsmaccsu8) 2874 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_h, WOP_SSU_H, H4, H2, vwsmaccsu16) 2875 RVVCALL(OPIVX3_RM, vwsmaccsu_vx_w, WOP_SSU_W, H8, H4, vwsmaccsu32) 2876 GEN_VEXT_VX_RM(vwsmaccsu_vx_b, 1, 2, clearh) 2877 GEN_VEXT_VX_RM(vwsmaccsu_vx_h, 2, 4, clearl) 2878 GEN_VEXT_VX_RM(vwsmaccsu_vx_w, 4, 8, clearq) 2879 2880 static inline int16_t 2881 vwsmaccus8(CPURISCVState *env, int vxrm, int8_t a, uint8_t b, int16_t c) 2882 { 2883 uint8_t round; 2884 int16_t res = (int16_t)a * b; 2885 2886 round = get_round(vxrm, res, 4); 2887 res = (res >> 4) + round; 2888 return ssub16(env, vxrm, c, res); 2889 } 2890 2891 static inline int32_t 2892 vwsmaccus16(CPURISCVState *env, int vxrm, int16_t a, uint16_t b, int32_t c) 2893 { 2894 uint8_t round; 2895 int32_t res = (int32_t)a * b; 2896 2897 round = get_round(vxrm, res, 8); 2898 res = (res >> 8) + round; 2899 return ssub32(env, vxrm, c, res); 2900 } 2901 2902 static inline int64_t 2903 vwsmaccus32(CPURISCVState *env, int vxrm, int32_t a, uint32_t b, int64_t c) 2904 { 2905 uint8_t round; 2906 int64_t res = (int64_t)a * b; 2907 2908 round = get_round(vxrm, res, 16); 2909 res = (res >> 16) + round; 2910 return ssub64(env, vxrm, c, res); 2911 } 2912 2913 RVVCALL(OPIVX3_RM, vwsmaccus_vx_b, WOP_SUS_B, H2, H1, vwsmaccus8) 2914 RVVCALL(OPIVX3_RM, vwsmaccus_vx_h, WOP_SUS_H, H4, H2, vwsmaccus16) 2915 RVVCALL(OPIVX3_RM, vwsmaccus_vx_w, WOP_SUS_W, H8, H4, vwsmaccus32) 2916 GEN_VEXT_VX_RM(vwsmaccus_vx_b, 1, 2, clearh) 2917 GEN_VEXT_VX_RM(vwsmaccus_vx_h, 2, 4, clearl) 2918 GEN_VEXT_VX_RM(vwsmaccus_vx_w, 4, 8, clearq) 2919 2920 /* Vector Single-Width Scaling Shift Instructions */ 2921 static inline uint8_t 2922 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b) 2923 { 2924 uint8_t round, shift = b & 0x7; 2925 uint8_t res; 2926 2927 round = get_round(vxrm, a, shift); 2928 res = (a >> shift) + round; 2929 return res; 2930 } 2931 static inline uint16_t 2932 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b) 2933 { 2934 uint8_t round, shift = b & 0xf; 2935 uint16_t res; 2936 2937 round = get_round(vxrm, a, shift); 2938 res = (a >> shift) + round; 2939 return res; 2940 } 2941 static inline uint32_t 2942 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b) 2943 { 2944 uint8_t round, shift = b & 0x1f; 2945 uint32_t res; 2946 2947 round = get_round(vxrm, a, shift); 2948 res = (a >> shift) + round; 2949 return res; 2950 } 2951 static inline uint64_t 2952 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b) 2953 { 2954 uint8_t round, shift = b & 0x3f; 2955 uint64_t res; 2956 2957 round = get_round(vxrm, a, shift); 2958 res = (a >> shift) + round; 2959 return res; 2960 } 2961 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8) 2962 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16) 2963 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32) 2964 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64) 2965 GEN_VEXT_VV_RM(vssrl_vv_b, 1, 1, clearb) 2966 GEN_VEXT_VV_RM(vssrl_vv_h, 2, 2, clearh) 2967 GEN_VEXT_VV_RM(vssrl_vv_w, 4, 4, clearl) 2968 GEN_VEXT_VV_RM(vssrl_vv_d, 8, 8, clearq) 2969 2970 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8) 2971 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16) 2972 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32) 2973 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64) 2974 GEN_VEXT_VX_RM(vssrl_vx_b, 1, 1, clearb) 2975 GEN_VEXT_VX_RM(vssrl_vx_h, 2, 2, clearh) 2976 GEN_VEXT_VX_RM(vssrl_vx_w, 4, 4, clearl) 2977 GEN_VEXT_VX_RM(vssrl_vx_d, 8, 8, clearq) 2978 2979 static inline int8_t 2980 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b) 2981 { 2982 uint8_t round, shift = b & 0x7; 2983 int8_t res; 2984 2985 round = get_round(vxrm, a, shift); 2986 res = (a >> shift) + round; 2987 return res; 2988 } 2989 static inline int16_t 2990 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b) 2991 { 2992 uint8_t round, shift = b & 0xf; 2993 int16_t res; 2994 2995 round = get_round(vxrm, a, shift); 2996 res = (a >> shift) + round; 2997 return res; 2998 } 2999 static inline int32_t 3000 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b) 3001 { 3002 uint8_t round, shift = b & 0x1f; 3003 int32_t res; 3004 3005 round = get_round(vxrm, a, shift); 3006 res = (a >> shift) + round; 3007 return res; 3008 } 3009 static inline int64_t 3010 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b) 3011 { 3012 uint8_t round, shift = b & 0x3f; 3013 int64_t res; 3014 3015 round = get_round(vxrm, a, shift); 3016 res = (a >> shift) + round; 3017 return res; 3018 } 3019 3020 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8) 3021 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16) 3022 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32) 3023 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64) 3024 GEN_VEXT_VV_RM(vssra_vv_b, 1, 1, clearb) 3025 GEN_VEXT_VV_RM(vssra_vv_h, 2, 2, clearh) 3026 GEN_VEXT_VV_RM(vssra_vv_w, 4, 4, clearl) 3027 GEN_VEXT_VV_RM(vssra_vv_d, 8, 8, clearq) 3028 3029 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8) 3030 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16) 3031 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32) 3032 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64) 3033 GEN_VEXT_VX_RM(vssra_vx_b, 1, 1, clearb) 3034 GEN_VEXT_VX_RM(vssra_vx_h, 2, 2, clearh) 3035 GEN_VEXT_VX_RM(vssra_vx_w, 4, 4, clearl) 3036 GEN_VEXT_VX_RM(vssra_vx_d, 8, 8, clearq) 3037 3038 /* Vector Narrowing Fixed-Point Clip Instructions */ 3039 static inline int8_t 3040 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b) 3041 { 3042 uint8_t round, shift = b & 0xf; 3043 int16_t res; 3044 3045 round = get_round(vxrm, a, shift); 3046 res = (a >> shift) + round; 3047 if (res > INT8_MAX) { 3048 env->vxsat = 0x1; 3049 return INT8_MAX; 3050 } else if (res < INT8_MIN) { 3051 env->vxsat = 0x1; 3052 return INT8_MIN; 3053 } else { 3054 return res; 3055 } 3056 } 3057 3058 static inline int16_t 3059 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b) 3060 { 3061 uint8_t round, shift = b & 0x1f; 3062 int32_t res; 3063 3064 round = get_round(vxrm, a, shift); 3065 res = (a >> shift) + round; 3066 if (res > INT16_MAX) { 3067 env->vxsat = 0x1; 3068 return INT16_MAX; 3069 } else if (res < INT16_MIN) { 3070 env->vxsat = 0x1; 3071 return INT16_MIN; 3072 } else { 3073 return res; 3074 } 3075 } 3076 3077 static inline int32_t 3078 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b) 3079 { 3080 uint8_t round, shift = b & 0x3f; 3081 int64_t res; 3082 3083 round = get_round(vxrm, a, shift); 3084 res = (a >> shift) + round; 3085 if (res > INT32_MAX) { 3086 env->vxsat = 0x1; 3087 return INT32_MAX; 3088 } else if (res < INT32_MIN) { 3089 env->vxsat = 0x1; 3090 return INT32_MIN; 3091 } else { 3092 return res; 3093 } 3094 } 3095 3096 RVVCALL(OPIVV2_RM, vnclip_vv_b, NOP_SSS_B, H1, H2, H1, vnclip8) 3097 RVVCALL(OPIVV2_RM, vnclip_vv_h, NOP_SSS_H, H2, H4, H2, vnclip16) 3098 RVVCALL(OPIVV2_RM, vnclip_vv_w, NOP_SSS_W, H4, H8, H4, vnclip32) 3099 GEN_VEXT_VV_RM(vnclip_vv_b, 1, 1, clearb) 3100 GEN_VEXT_VV_RM(vnclip_vv_h, 2, 2, clearh) 3101 GEN_VEXT_VV_RM(vnclip_vv_w, 4, 4, clearl) 3102 3103 RVVCALL(OPIVX2_RM, vnclip_vx_b, NOP_SSS_B, H1, H2, vnclip8) 3104 RVVCALL(OPIVX2_RM, vnclip_vx_h, NOP_SSS_H, H2, H4, vnclip16) 3105 RVVCALL(OPIVX2_RM, vnclip_vx_w, NOP_SSS_W, H4, H8, vnclip32) 3106 GEN_VEXT_VX_RM(vnclip_vx_b, 1, 1, clearb) 3107 GEN_VEXT_VX_RM(vnclip_vx_h, 2, 2, clearh) 3108 GEN_VEXT_VX_RM(vnclip_vx_w, 4, 4, clearl) 3109 3110 static inline uint8_t 3111 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b) 3112 { 3113 uint8_t round, shift = b & 0xf; 3114 uint16_t res; 3115 3116 round = get_round(vxrm, a, shift); 3117 res = (a >> shift) + round; 3118 if (res > UINT8_MAX) { 3119 env->vxsat = 0x1; 3120 return UINT8_MAX; 3121 } else { 3122 return res; 3123 } 3124 } 3125 3126 static inline uint16_t 3127 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b) 3128 { 3129 uint8_t round, shift = b & 0x1f; 3130 uint32_t res; 3131 3132 round = get_round(vxrm, a, shift); 3133 res = (a >> shift) + round; 3134 if (res > UINT16_MAX) { 3135 env->vxsat = 0x1; 3136 return UINT16_MAX; 3137 } else { 3138 return res; 3139 } 3140 } 3141 3142 static inline uint32_t 3143 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b) 3144 { 3145 uint8_t round, shift = b & 0x3f; 3146 int64_t res; 3147 3148 round = get_round(vxrm, a, shift); 3149 res = (a >> shift) + round; 3150 if (res > UINT32_MAX) { 3151 env->vxsat = 0x1; 3152 return UINT32_MAX; 3153 } else { 3154 return res; 3155 } 3156 } 3157 3158 RVVCALL(OPIVV2_RM, vnclipu_vv_b, NOP_UUU_B, H1, H2, H1, vnclipu8) 3159 RVVCALL(OPIVV2_RM, vnclipu_vv_h, NOP_UUU_H, H2, H4, H2, vnclipu16) 3160 RVVCALL(OPIVV2_RM, vnclipu_vv_w, NOP_UUU_W, H4, H8, H4, vnclipu32) 3161 GEN_VEXT_VV_RM(vnclipu_vv_b, 1, 1, clearb) 3162 GEN_VEXT_VV_RM(vnclipu_vv_h, 2, 2, clearh) 3163 GEN_VEXT_VV_RM(vnclipu_vv_w, 4, 4, clearl) 3164 3165 RVVCALL(OPIVX2_RM, vnclipu_vx_b, NOP_UUU_B, H1, H2, vnclipu8) 3166 RVVCALL(OPIVX2_RM, vnclipu_vx_h, NOP_UUU_H, H2, H4, vnclipu16) 3167 RVVCALL(OPIVX2_RM, vnclipu_vx_w, NOP_UUU_W, H4, H8, vnclipu32) 3168 GEN_VEXT_VX_RM(vnclipu_vx_b, 1, 1, clearb) 3169 GEN_VEXT_VX_RM(vnclipu_vx_h, 2, 2, clearh) 3170 GEN_VEXT_VX_RM(vnclipu_vx_w, 4, 4, clearl) 3171 3172 /* 3173 *** Vector Float Point Arithmetic Instructions 3174 */ 3175 /* Vector Single-Width Floating-Point Add/Subtract Instructions */ 3176 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3177 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3178 CPURISCVState *env) \ 3179 { \ 3180 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3181 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3182 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \ 3183 } 3184 3185 #define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ 3186 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 3187 void *vs2, CPURISCVState *env, \ 3188 uint32_t desc) \ 3189 { \ 3190 uint32_t vlmax = vext_maxsz(desc) / ESZ; \ 3191 uint32_t mlen = vext_mlen(desc); \ 3192 uint32_t vm = vext_vm(desc); \ 3193 uint32_t vl = env->vl; \ 3194 uint32_t i; \ 3195 \ 3196 for (i = 0; i < vl; i++) { \ 3197 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 3198 continue; \ 3199 } \ 3200 do_##NAME(vd, vs1, vs2, i, env); \ 3201 } \ 3202 CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ 3203 } 3204 3205 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add) 3206 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add) 3207 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add) 3208 GEN_VEXT_VV_ENV(vfadd_vv_h, 2, 2, clearh) 3209 GEN_VEXT_VV_ENV(vfadd_vv_w, 4, 4, clearl) 3210 GEN_VEXT_VV_ENV(vfadd_vv_d, 8, 8, clearq) 3211 3212 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3213 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3214 CPURISCVState *env) \ 3215 { \ 3216 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3217 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\ 3218 } 3219 3220 #define GEN_VEXT_VF(NAME, ESZ, DSZ, CLEAR_FN) \ 3221 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \ 3222 void *vs2, CPURISCVState *env, \ 3223 uint32_t desc) \ 3224 { \ 3225 uint32_t vlmax = vext_maxsz(desc) / ESZ; \ 3226 uint32_t mlen = vext_mlen(desc); \ 3227 uint32_t vm = vext_vm(desc); \ 3228 uint32_t vl = env->vl; \ 3229 uint32_t i; \ 3230 \ 3231 for (i = 0; i < vl; i++) { \ 3232 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 3233 continue; \ 3234 } \ 3235 do_##NAME(vd, s1, vs2, i, env); \ 3236 } \ 3237 CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ 3238 } 3239 3240 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add) 3241 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add) 3242 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add) 3243 GEN_VEXT_VF(vfadd_vf_h, 2, 2, clearh) 3244 GEN_VEXT_VF(vfadd_vf_w, 4, 4, clearl) 3245 GEN_VEXT_VF(vfadd_vf_d, 8, 8, clearq) 3246 3247 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub) 3248 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub) 3249 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub) 3250 GEN_VEXT_VV_ENV(vfsub_vv_h, 2, 2, clearh) 3251 GEN_VEXT_VV_ENV(vfsub_vv_w, 4, 4, clearl) 3252 GEN_VEXT_VV_ENV(vfsub_vv_d, 8, 8, clearq) 3253 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub) 3254 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub) 3255 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub) 3256 GEN_VEXT_VF(vfsub_vf_h, 2, 2, clearh) 3257 GEN_VEXT_VF(vfsub_vf_w, 4, 4, clearl) 3258 GEN_VEXT_VF(vfsub_vf_d, 8, 8, clearq) 3259 3260 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s) 3261 { 3262 return float16_sub(b, a, s); 3263 } 3264 3265 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s) 3266 { 3267 return float32_sub(b, a, s); 3268 } 3269 3270 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s) 3271 { 3272 return float64_sub(b, a, s); 3273 } 3274 3275 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub) 3276 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub) 3277 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub) 3278 GEN_VEXT_VF(vfrsub_vf_h, 2, 2, clearh) 3279 GEN_VEXT_VF(vfrsub_vf_w, 4, 4, clearl) 3280 GEN_VEXT_VF(vfrsub_vf_d, 8, 8, clearq) 3281 3282 /* Vector Widening Floating-Point Add/Subtract Instructions */ 3283 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s) 3284 { 3285 return float32_add(float16_to_float32(a, true, s), 3286 float16_to_float32(b, true, s), s); 3287 } 3288 3289 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s) 3290 { 3291 return float64_add(float32_to_float64(a, s), 3292 float32_to_float64(b, s), s); 3293 3294 } 3295 3296 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16) 3297 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32) 3298 GEN_VEXT_VV_ENV(vfwadd_vv_h, 2, 4, clearl) 3299 GEN_VEXT_VV_ENV(vfwadd_vv_w, 4, 8, clearq) 3300 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16) 3301 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32) 3302 GEN_VEXT_VF(vfwadd_vf_h, 2, 4, clearl) 3303 GEN_VEXT_VF(vfwadd_vf_w, 4, 8, clearq) 3304 3305 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s) 3306 { 3307 return float32_sub(float16_to_float32(a, true, s), 3308 float16_to_float32(b, true, s), s); 3309 } 3310 3311 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s) 3312 { 3313 return float64_sub(float32_to_float64(a, s), 3314 float32_to_float64(b, s), s); 3315 3316 } 3317 3318 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16) 3319 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32) 3320 GEN_VEXT_VV_ENV(vfwsub_vv_h, 2, 4, clearl) 3321 GEN_VEXT_VV_ENV(vfwsub_vv_w, 4, 8, clearq) 3322 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16) 3323 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32) 3324 GEN_VEXT_VF(vfwsub_vf_h, 2, 4, clearl) 3325 GEN_VEXT_VF(vfwsub_vf_w, 4, 8, clearq) 3326 3327 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s) 3328 { 3329 return float32_add(a, float16_to_float32(b, true, s), s); 3330 } 3331 3332 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s) 3333 { 3334 return float64_add(a, float32_to_float64(b, s), s); 3335 } 3336 3337 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16) 3338 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32) 3339 GEN_VEXT_VV_ENV(vfwadd_wv_h, 2, 4, clearl) 3340 GEN_VEXT_VV_ENV(vfwadd_wv_w, 4, 8, clearq) 3341 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16) 3342 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32) 3343 GEN_VEXT_VF(vfwadd_wf_h, 2, 4, clearl) 3344 GEN_VEXT_VF(vfwadd_wf_w, 4, 8, clearq) 3345 3346 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s) 3347 { 3348 return float32_sub(a, float16_to_float32(b, true, s), s); 3349 } 3350 3351 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s) 3352 { 3353 return float64_sub(a, float32_to_float64(b, s), s); 3354 } 3355 3356 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16) 3357 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32) 3358 GEN_VEXT_VV_ENV(vfwsub_wv_h, 2, 4, clearl) 3359 GEN_VEXT_VV_ENV(vfwsub_wv_w, 4, 8, clearq) 3360 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16) 3361 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32) 3362 GEN_VEXT_VF(vfwsub_wf_h, 2, 4, clearl) 3363 GEN_VEXT_VF(vfwsub_wf_w, 4, 8, clearq) 3364 3365 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */ 3366 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul) 3367 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul) 3368 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul) 3369 GEN_VEXT_VV_ENV(vfmul_vv_h, 2, 2, clearh) 3370 GEN_VEXT_VV_ENV(vfmul_vv_w, 4, 4, clearl) 3371 GEN_VEXT_VV_ENV(vfmul_vv_d, 8, 8, clearq) 3372 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul) 3373 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul) 3374 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul) 3375 GEN_VEXT_VF(vfmul_vf_h, 2, 2, clearh) 3376 GEN_VEXT_VF(vfmul_vf_w, 4, 4, clearl) 3377 GEN_VEXT_VF(vfmul_vf_d, 8, 8, clearq) 3378 3379 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div) 3380 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div) 3381 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div) 3382 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2, 2, clearh) 3383 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4, 4, clearl) 3384 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8, 8, clearq) 3385 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div) 3386 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div) 3387 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div) 3388 GEN_VEXT_VF(vfdiv_vf_h, 2, 2, clearh) 3389 GEN_VEXT_VF(vfdiv_vf_w, 4, 4, clearl) 3390 GEN_VEXT_VF(vfdiv_vf_d, 8, 8, clearq) 3391 3392 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s) 3393 { 3394 return float16_div(b, a, s); 3395 } 3396 3397 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s) 3398 { 3399 return float32_div(b, a, s); 3400 } 3401 3402 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s) 3403 { 3404 return float64_div(b, a, s); 3405 } 3406 3407 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv) 3408 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv) 3409 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv) 3410 GEN_VEXT_VF(vfrdiv_vf_h, 2, 2, clearh) 3411 GEN_VEXT_VF(vfrdiv_vf_w, 4, 4, clearl) 3412 GEN_VEXT_VF(vfrdiv_vf_d, 8, 8, clearq) 3413 3414 /* Vector Widening Floating-Point Multiply */ 3415 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s) 3416 { 3417 return float32_mul(float16_to_float32(a, true, s), 3418 float16_to_float32(b, true, s), s); 3419 } 3420 3421 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s) 3422 { 3423 return float64_mul(float32_to_float64(a, s), 3424 float32_to_float64(b, s), s); 3425 3426 } 3427 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16) 3428 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32) 3429 GEN_VEXT_VV_ENV(vfwmul_vv_h, 2, 4, clearl) 3430 GEN_VEXT_VV_ENV(vfwmul_vv_w, 4, 8, clearq) 3431 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16) 3432 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32) 3433 GEN_VEXT_VF(vfwmul_vf_h, 2, 4, clearl) 3434 GEN_VEXT_VF(vfwmul_vf_w, 4, 8, clearq) 3435 3436 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ 3437 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 3438 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \ 3439 CPURISCVState *env) \ 3440 { \ 3441 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 3442 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3443 TD d = *((TD *)vd + HD(i)); \ 3444 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \ 3445 } 3446 3447 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3448 { 3449 return float16_muladd(a, b, d, 0, s); 3450 } 3451 3452 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3453 { 3454 return float32_muladd(a, b, d, 0, s); 3455 } 3456 3457 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3458 { 3459 return float64_muladd(a, b, d, 0, s); 3460 } 3461 3462 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16) 3463 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32) 3464 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64) 3465 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2, 2, clearh) 3466 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4, 4, clearl) 3467 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8, 8, clearq) 3468 3469 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 3470 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \ 3471 CPURISCVState *env) \ 3472 { \ 3473 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3474 TD d = *((TD *)vd + HD(i)); \ 3475 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\ 3476 } 3477 3478 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16) 3479 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32) 3480 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64) 3481 GEN_VEXT_VF(vfmacc_vf_h, 2, 2, clearh) 3482 GEN_VEXT_VF(vfmacc_vf_w, 4, 4, clearl) 3483 GEN_VEXT_VF(vfmacc_vf_d, 8, 8, clearq) 3484 3485 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3486 { 3487 return float16_muladd(a, b, d, 3488 float_muladd_negate_c | float_muladd_negate_product, s); 3489 } 3490 3491 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3492 { 3493 return float32_muladd(a, b, d, 3494 float_muladd_negate_c | float_muladd_negate_product, s); 3495 } 3496 3497 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3498 { 3499 return float64_muladd(a, b, d, 3500 float_muladd_negate_c | float_muladd_negate_product, s); 3501 } 3502 3503 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16) 3504 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32) 3505 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64) 3506 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2, 2, clearh) 3507 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4, 4, clearl) 3508 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8, 8, clearq) 3509 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16) 3510 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32) 3511 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64) 3512 GEN_VEXT_VF(vfnmacc_vf_h, 2, 2, clearh) 3513 GEN_VEXT_VF(vfnmacc_vf_w, 4, 4, clearl) 3514 GEN_VEXT_VF(vfnmacc_vf_d, 8, 8, clearq) 3515 3516 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3517 { 3518 return float16_muladd(a, b, d, float_muladd_negate_c, s); 3519 } 3520 3521 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3522 { 3523 return float32_muladd(a, b, d, float_muladd_negate_c, s); 3524 } 3525 3526 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3527 { 3528 return float64_muladd(a, b, d, float_muladd_negate_c, s); 3529 } 3530 3531 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16) 3532 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32) 3533 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64) 3534 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2, 2, clearh) 3535 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4, 4, clearl) 3536 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8, 8, clearq) 3537 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16) 3538 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32) 3539 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64) 3540 GEN_VEXT_VF(vfmsac_vf_h, 2, 2, clearh) 3541 GEN_VEXT_VF(vfmsac_vf_w, 4, 4, clearl) 3542 GEN_VEXT_VF(vfmsac_vf_d, 8, 8, clearq) 3543 3544 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3545 { 3546 return float16_muladd(a, b, d, float_muladd_negate_product, s); 3547 } 3548 3549 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3550 { 3551 return float32_muladd(a, b, d, float_muladd_negate_product, s); 3552 } 3553 3554 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3555 { 3556 return float64_muladd(a, b, d, float_muladd_negate_product, s); 3557 } 3558 3559 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16) 3560 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32) 3561 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64) 3562 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2, 2, clearh) 3563 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4, 4, clearl) 3564 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8, 8, clearq) 3565 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16) 3566 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32) 3567 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64) 3568 GEN_VEXT_VF(vfnmsac_vf_h, 2, 2, clearh) 3569 GEN_VEXT_VF(vfnmsac_vf_w, 4, 4, clearl) 3570 GEN_VEXT_VF(vfnmsac_vf_d, 8, 8, clearq) 3571 3572 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3573 { 3574 return float16_muladd(d, b, a, 0, s); 3575 } 3576 3577 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3578 { 3579 return float32_muladd(d, b, a, 0, s); 3580 } 3581 3582 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3583 { 3584 return float64_muladd(d, b, a, 0, s); 3585 } 3586 3587 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16) 3588 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32) 3589 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64) 3590 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2, 2, clearh) 3591 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4, 4, clearl) 3592 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8, 8, clearq) 3593 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16) 3594 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32) 3595 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64) 3596 GEN_VEXT_VF(vfmadd_vf_h, 2, 2, clearh) 3597 GEN_VEXT_VF(vfmadd_vf_w, 4, 4, clearl) 3598 GEN_VEXT_VF(vfmadd_vf_d, 8, 8, clearq) 3599 3600 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3601 { 3602 return float16_muladd(d, b, a, 3603 float_muladd_negate_c | float_muladd_negate_product, s); 3604 } 3605 3606 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3607 { 3608 return float32_muladd(d, b, a, 3609 float_muladd_negate_c | float_muladd_negate_product, s); 3610 } 3611 3612 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3613 { 3614 return float64_muladd(d, b, a, 3615 float_muladd_negate_c | float_muladd_negate_product, s); 3616 } 3617 3618 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16) 3619 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32) 3620 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64) 3621 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2, 2, clearh) 3622 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4, 4, clearl) 3623 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8, 8, clearq) 3624 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16) 3625 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32) 3626 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64) 3627 GEN_VEXT_VF(vfnmadd_vf_h, 2, 2, clearh) 3628 GEN_VEXT_VF(vfnmadd_vf_w, 4, 4, clearl) 3629 GEN_VEXT_VF(vfnmadd_vf_d, 8, 8, clearq) 3630 3631 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3632 { 3633 return float16_muladd(d, b, a, float_muladd_negate_c, s); 3634 } 3635 3636 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3637 { 3638 return float32_muladd(d, b, a, float_muladd_negate_c, s); 3639 } 3640 3641 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3642 { 3643 return float64_muladd(d, b, a, float_muladd_negate_c, s); 3644 } 3645 3646 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16) 3647 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32) 3648 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64) 3649 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2, 2, clearh) 3650 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4, 4, clearl) 3651 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8, 8, clearq) 3652 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16) 3653 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32) 3654 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64) 3655 GEN_VEXT_VF(vfmsub_vf_h, 2, 2, clearh) 3656 GEN_VEXT_VF(vfmsub_vf_w, 4, 4, clearl) 3657 GEN_VEXT_VF(vfmsub_vf_d, 8, 8, clearq) 3658 3659 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s) 3660 { 3661 return float16_muladd(d, b, a, float_muladd_negate_product, s); 3662 } 3663 3664 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s) 3665 { 3666 return float32_muladd(d, b, a, float_muladd_negate_product, s); 3667 } 3668 3669 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s) 3670 { 3671 return float64_muladd(d, b, a, float_muladd_negate_product, s); 3672 } 3673 3674 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16) 3675 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32) 3676 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64) 3677 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2, 2, clearh) 3678 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4, 4, clearl) 3679 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8, 8, clearq) 3680 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16) 3681 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32) 3682 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64) 3683 GEN_VEXT_VF(vfnmsub_vf_h, 2, 2, clearh) 3684 GEN_VEXT_VF(vfnmsub_vf_w, 4, 4, clearl) 3685 GEN_VEXT_VF(vfnmsub_vf_d, 8, 8, clearq) 3686 3687 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ 3688 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3689 { 3690 return float32_muladd(float16_to_float32(a, true, s), 3691 float16_to_float32(b, true, s), d, 0, s); 3692 } 3693 3694 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3695 { 3696 return float64_muladd(float32_to_float64(a, s), 3697 float32_to_float64(b, s), d, 0, s); 3698 } 3699 3700 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16) 3701 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32) 3702 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 2, 4, clearl) 3703 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 4, 8, clearq) 3704 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16) 3705 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32) 3706 GEN_VEXT_VF(vfwmacc_vf_h, 2, 4, clearl) 3707 GEN_VEXT_VF(vfwmacc_vf_w, 4, 8, clearq) 3708 3709 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3710 { 3711 return float32_muladd(float16_to_float32(a, true, s), 3712 float16_to_float32(b, true, s), d, 3713 float_muladd_negate_c | float_muladd_negate_product, s); 3714 } 3715 3716 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3717 { 3718 return float64_muladd(float32_to_float64(a, s), 3719 float32_to_float64(b, s), d, 3720 float_muladd_negate_c | float_muladd_negate_product, s); 3721 } 3722 3723 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16) 3724 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32) 3725 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 2, 4, clearl) 3726 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 4, 8, clearq) 3727 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16) 3728 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32) 3729 GEN_VEXT_VF(vfwnmacc_vf_h, 2, 4, clearl) 3730 GEN_VEXT_VF(vfwnmacc_vf_w, 4, 8, clearq) 3731 3732 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3733 { 3734 return float32_muladd(float16_to_float32(a, true, s), 3735 float16_to_float32(b, true, s), d, 3736 float_muladd_negate_c, s); 3737 } 3738 3739 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3740 { 3741 return float64_muladd(float32_to_float64(a, s), 3742 float32_to_float64(b, s), d, 3743 float_muladd_negate_c, s); 3744 } 3745 3746 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16) 3747 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32) 3748 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 2, 4, clearl) 3749 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 4, 8, clearq) 3750 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16) 3751 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32) 3752 GEN_VEXT_VF(vfwmsac_vf_h, 2, 4, clearl) 3753 GEN_VEXT_VF(vfwmsac_vf_w, 4, 8, clearq) 3754 3755 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s) 3756 { 3757 return float32_muladd(float16_to_float32(a, true, s), 3758 float16_to_float32(b, true, s), d, 3759 float_muladd_negate_product, s); 3760 } 3761 3762 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s) 3763 { 3764 return float64_muladd(float32_to_float64(a, s), 3765 float32_to_float64(b, s), d, 3766 float_muladd_negate_product, s); 3767 } 3768 3769 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16) 3770 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32) 3771 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 2, 4, clearl) 3772 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 4, 8, clearq) 3773 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16) 3774 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32) 3775 GEN_VEXT_VF(vfwnmsac_vf_h, 2, 4, clearl) 3776 GEN_VEXT_VF(vfwnmsac_vf_w, 4, 8, clearq) 3777 3778 /* Vector Floating-Point Square-Root Instruction */ 3779 /* (TD, T2, TX2) */ 3780 #define OP_UU_H uint16_t, uint16_t, uint16_t 3781 #define OP_UU_W uint32_t, uint32_t, uint32_t 3782 #define OP_UU_D uint64_t, uint64_t, uint64_t 3783 3784 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 3785 static void do_##NAME(void *vd, void *vs2, int i, \ 3786 CPURISCVState *env) \ 3787 { \ 3788 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 3789 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \ 3790 } 3791 3792 #define GEN_VEXT_V_ENV(NAME, ESZ, DSZ, CLEAR_FN) \ 3793 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 3794 CPURISCVState *env, uint32_t desc) \ 3795 { \ 3796 uint32_t vlmax = vext_maxsz(desc) / ESZ; \ 3797 uint32_t mlen = vext_mlen(desc); \ 3798 uint32_t vm = vext_vm(desc); \ 3799 uint32_t vl = env->vl; \ 3800 uint32_t i; \ 3801 \ 3802 if (vl == 0) { \ 3803 return; \ 3804 } \ 3805 for (i = 0; i < vl; i++) { \ 3806 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 3807 continue; \ 3808 } \ 3809 do_##NAME(vd, vs2, i, env); \ 3810 } \ 3811 CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ 3812 } 3813 3814 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt) 3815 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt) 3816 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt) 3817 GEN_VEXT_V_ENV(vfsqrt_v_h, 2, 2, clearh) 3818 GEN_VEXT_V_ENV(vfsqrt_v_w, 4, 4, clearl) 3819 GEN_VEXT_V_ENV(vfsqrt_v_d, 8, 8, clearq) 3820 3821 /* Vector Floating-Point MIN/MAX Instructions */ 3822 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minnum) 3823 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minnum) 3824 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minnum) 3825 GEN_VEXT_VV_ENV(vfmin_vv_h, 2, 2, clearh) 3826 GEN_VEXT_VV_ENV(vfmin_vv_w, 4, 4, clearl) 3827 GEN_VEXT_VV_ENV(vfmin_vv_d, 8, 8, clearq) 3828 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minnum) 3829 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minnum) 3830 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minnum) 3831 GEN_VEXT_VF(vfmin_vf_h, 2, 2, clearh) 3832 GEN_VEXT_VF(vfmin_vf_w, 4, 4, clearl) 3833 GEN_VEXT_VF(vfmin_vf_d, 8, 8, clearq) 3834 3835 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maxnum) 3836 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maxnum) 3837 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maxnum) 3838 GEN_VEXT_VV_ENV(vfmax_vv_h, 2, 2, clearh) 3839 GEN_VEXT_VV_ENV(vfmax_vv_w, 4, 4, clearl) 3840 GEN_VEXT_VV_ENV(vfmax_vv_d, 8, 8, clearq) 3841 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maxnum) 3842 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maxnum) 3843 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maxnum) 3844 GEN_VEXT_VF(vfmax_vf_h, 2, 2, clearh) 3845 GEN_VEXT_VF(vfmax_vf_w, 4, 4, clearl) 3846 GEN_VEXT_VF(vfmax_vf_d, 8, 8, clearq) 3847 3848 /* Vector Floating-Point Sign-Injection Instructions */ 3849 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s) 3850 { 3851 return deposit64(b, 0, 15, a); 3852 } 3853 3854 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s) 3855 { 3856 return deposit64(b, 0, 31, a); 3857 } 3858 3859 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s) 3860 { 3861 return deposit64(b, 0, 63, a); 3862 } 3863 3864 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16) 3865 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32) 3866 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64) 3867 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2, 2, clearh) 3868 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4, 4, clearl) 3869 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8, 8, clearq) 3870 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16) 3871 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32) 3872 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64) 3873 GEN_VEXT_VF(vfsgnj_vf_h, 2, 2, clearh) 3874 GEN_VEXT_VF(vfsgnj_vf_w, 4, 4, clearl) 3875 GEN_VEXT_VF(vfsgnj_vf_d, 8, 8, clearq) 3876 3877 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s) 3878 { 3879 return deposit64(~b, 0, 15, a); 3880 } 3881 3882 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s) 3883 { 3884 return deposit64(~b, 0, 31, a); 3885 } 3886 3887 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s) 3888 { 3889 return deposit64(~b, 0, 63, a); 3890 } 3891 3892 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16) 3893 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32) 3894 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64) 3895 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2, 2, clearh) 3896 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4, 4, clearl) 3897 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8, 8, clearq) 3898 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16) 3899 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32) 3900 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64) 3901 GEN_VEXT_VF(vfsgnjn_vf_h, 2, 2, clearh) 3902 GEN_VEXT_VF(vfsgnjn_vf_w, 4, 4, clearl) 3903 GEN_VEXT_VF(vfsgnjn_vf_d, 8, 8, clearq) 3904 3905 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s) 3906 { 3907 return deposit64(b ^ a, 0, 15, a); 3908 } 3909 3910 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s) 3911 { 3912 return deposit64(b ^ a, 0, 31, a); 3913 } 3914 3915 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s) 3916 { 3917 return deposit64(b ^ a, 0, 63, a); 3918 } 3919 3920 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16) 3921 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32) 3922 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64) 3923 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2, 2, clearh) 3924 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4, 4, clearl) 3925 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8, 8, clearq) 3926 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16) 3927 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32) 3928 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64) 3929 GEN_VEXT_VF(vfsgnjx_vf_h, 2, 2, clearh) 3930 GEN_VEXT_VF(vfsgnjx_vf_w, 4, 4, clearl) 3931 GEN_VEXT_VF(vfsgnjx_vf_d, 8, 8, clearq) 3932 3933 /* Vector Floating-Point Compare Instructions */ 3934 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \ 3935 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 3936 CPURISCVState *env, uint32_t desc) \ 3937 { \ 3938 uint32_t mlen = vext_mlen(desc); \ 3939 uint32_t vm = vext_vm(desc); \ 3940 uint32_t vl = env->vl; \ 3941 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3942 uint32_t i; \ 3943 \ 3944 for (i = 0; i < vl; i++) { \ 3945 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \ 3946 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3947 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 3948 continue; \ 3949 } \ 3950 vext_set_elem_mask(vd, mlen, i, \ 3951 DO_OP(s2, s1, &env->fp_status)); \ 3952 } \ 3953 for (; i < vlmax; i++) { \ 3954 vext_set_elem_mask(vd, mlen, i, 0); \ 3955 } \ 3956 } 3957 3958 static bool float16_eq_quiet(uint16_t a, uint16_t b, float_status *s) 3959 { 3960 FloatRelation compare = float16_compare_quiet(a, b, s); 3961 return compare == float_relation_equal; 3962 } 3963 3964 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet) 3965 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet) 3966 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet) 3967 3968 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \ 3969 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 3970 CPURISCVState *env, uint32_t desc) \ 3971 { \ 3972 uint32_t mlen = vext_mlen(desc); \ 3973 uint32_t vm = vext_vm(desc); \ 3974 uint32_t vl = env->vl; \ 3975 uint32_t vlmax = vext_maxsz(desc) / sizeof(ETYPE); \ 3976 uint32_t i; \ 3977 \ 3978 for (i = 0; i < vl; i++) { \ 3979 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 3980 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 3981 continue; \ 3982 } \ 3983 vext_set_elem_mask(vd, mlen, i, \ 3984 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \ 3985 } \ 3986 for (; i < vlmax; i++) { \ 3987 vext_set_elem_mask(vd, mlen, i, 0); \ 3988 } \ 3989 } 3990 3991 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet) 3992 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet) 3993 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet) 3994 3995 static bool vmfne16(uint16_t a, uint16_t b, float_status *s) 3996 { 3997 FloatRelation compare = float16_compare_quiet(a, b, s); 3998 return compare != float_relation_equal; 3999 } 4000 4001 static bool vmfne32(uint32_t a, uint32_t b, float_status *s) 4002 { 4003 FloatRelation compare = float32_compare_quiet(a, b, s); 4004 return compare != float_relation_equal; 4005 } 4006 4007 static bool vmfne64(uint64_t a, uint64_t b, float_status *s) 4008 { 4009 FloatRelation compare = float64_compare_quiet(a, b, s); 4010 return compare != float_relation_equal; 4011 } 4012 4013 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16) 4014 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32) 4015 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64) 4016 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16) 4017 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32) 4018 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64) 4019 4020 static bool float16_lt(uint16_t a, uint16_t b, float_status *s) 4021 { 4022 FloatRelation compare = float16_compare(a, b, s); 4023 return compare == float_relation_less; 4024 } 4025 4026 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt) 4027 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt) 4028 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt) 4029 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt) 4030 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt) 4031 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt) 4032 4033 static bool float16_le(uint16_t a, uint16_t b, float_status *s) 4034 { 4035 FloatRelation compare = float16_compare(a, b, s); 4036 return compare == float_relation_less || 4037 compare == float_relation_equal; 4038 } 4039 4040 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le) 4041 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le) 4042 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le) 4043 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le) 4044 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le) 4045 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le) 4046 4047 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s) 4048 { 4049 FloatRelation compare = float16_compare(a, b, s); 4050 return compare == float_relation_greater; 4051 } 4052 4053 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s) 4054 { 4055 FloatRelation compare = float32_compare(a, b, s); 4056 return compare == float_relation_greater; 4057 } 4058 4059 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s) 4060 { 4061 FloatRelation compare = float64_compare(a, b, s); 4062 return compare == float_relation_greater; 4063 } 4064 4065 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16) 4066 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32) 4067 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64) 4068 4069 static bool vmfge16(uint16_t a, uint16_t b, float_status *s) 4070 { 4071 FloatRelation compare = float16_compare(a, b, s); 4072 return compare == float_relation_greater || 4073 compare == float_relation_equal; 4074 } 4075 4076 static bool vmfge32(uint32_t a, uint32_t b, float_status *s) 4077 { 4078 FloatRelation compare = float32_compare(a, b, s); 4079 return compare == float_relation_greater || 4080 compare == float_relation_equal; 4081 } 4082 4083 static bool vmfge64(uint64_t a, uint64_t b, float_status *s) 4084 { 4085 FloatRelation compare = float64_compare(a, b, s); 4086 return compare == float_relation_greater || 4087 compare == float_relation_equal; 4088 } 4089 4090 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16) 4091 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) 4092 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) 4093 4094 static bool float16_unordered_quiet(uint16_t a, uint16_t b, float_status *s) 4095 { 4096 FloatRelation compare = float16_compare_quiet(a, b, s); 4097 return compare == float_relation_unordered; 4098 } 4099 4100 GEN_VEXT_CMP_VV_ENV(vmford_vv_h, uint16_t, H2, !float16_unordered_quiet) 4101 GEN_VEXT_CMP_VV_ENV(vmford_vv_w, uint32_t, H4, !float32_unordered_quiet) 4102 GEN_VEXT_CMP_VV_ENV(vmford_vv_d, uint64_t, H8, !float64_unordered_quiet) 4103 GEN_VEXT_CMP_VF(vmford_vf_h, uint16_t, H2, !float16_unordered_quiet) 4104 GEN_VEXT_CMP_VF(vmford_vf_w, uint32_t, H4, !float32_unordered_quiet) 4105 GEN_VEXT_CMP_VF(vmford_vf_d, uint64_t, H8, !float64_unordered_quiet) 4106 4107 /* Vector Floating-Point Classify Instruction */ 4108 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ 4109 static void do_##NAME(void *vd, void *vs2, int i) \ 4110 { \ 4111 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 4112 *((TD *)vd + HD(i)) = OP(s2); \ 4113 } 4114 4115 #define GEN_VEXT_V(NAME, ESZ, DSZ, CLEAR_FN) \ 4116 void HELPER(NAME)(void *vd, void *v0, void *vs2, \ 4117 CPURISCVState *env, uint32_t desc) \ 4118 { \ 4119 uint32_t vlmax = vext_maxsz(desc) / ESZ; \ 4120 uint32_t mlen = vext_mlen(desc); \ 4121 uint32_t vm = vext_vm(desc); \ 4122 uint32_t vl = env->vl; \ 4123 uint32_t i; \ 4124 \ 4125 for (i = 0; i < vl; i++) { \ 4126 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4127 continue; \ 4128 } \ 4129 do_##NAME(vd, vs2, i); \ 4130 } \ 4131 CLEAR_FN(vd, vl, vl * DSZ, vlmax * DSZ); \ 4132 } 4133 4134 target_ulong fclass_h(uint64_t frs1) 4135 { 4136 float16 f = frs1; 4137 bool sign = float16_is_neg(f); 4138 4139 if (float16_is_infinity(f)) { 4140 return sign ? 1 << 0 : 1 << 7; 4141 } else if (float16_is_zero(f)) { 4142 return sign ? 1 << 3 : 1 << 4; 4143 } else if (float16_is_zero_or_denormal(f)) { 4144 return sign ? 1 << 2 : 1 << 5; 4145 } else if (float16_is_any_nan(f)) { 4146 float_status s = { }; /* for snan_bit_is_one */ 4147 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4148 } else { 4149 return sign ? 1 << 1 : 1 << 6; 4150 } 4151 } 4152 4153 target_ulong fclass_s(uint64_t frs1) 4154 { 4155 float32 f = frs1; 4156 bool sign = float32_is_neg(f); 4157 4158 if (float32_is_infinity(f)) { 4159 return sign ? 1 << 0 : 1 << 7; 4160 } else if (float32_is_zero(f)) { 4161 return sign ? 1 << 3 : 1 << 4; 4162 } else if (float32_is_zero_or_denormal(f)) { 4163 return sign ? 1 << 2 : 1 << 5; 4164 } else if (float32_is_any_nan(f)) { 4165 float_status s = { }; /* for snan_bit_is_one */ 4166 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4167 } else { 4168 return sign ? 1 << 1 : 1 << 6; 4169 } 4170 } 4171 4172 target_ulong fclass_d(uint64_t frs1) 4173 { 4174 float64 f = frs1; 4175 bool sign = float64_is_neg(f); 4176 4177 if (float64_is_infinity(f)) { 4178 return sign ? 1 << 0 : 1 << 7; 4179 } else if (float64_is_zero(f)) { 4180 return sign ? 1 << 3 : 1 << 4; 4181 } else if (float64_is_zero_or_denormal(f)) { 4182 return sign ? 1 << 2 : 1 << 5; 4183 } else if (float64_is_any_nan(f)) { 4184 float_status s = { }; /* for snan_bit_is_one */ 4185 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8; 4186 } else { 4187 return sign ? 1 << 1 : 1 << 6; 4188 } 4189 } 4190 4191 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h) 4192 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s) 4193 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d) 4194 GEN_VEXT_V(vfclass_v_h, 2, 2, clearh) 4195 GEN_VEXT_V(vfclass_v_w, 4, 4, clearl) 4196 GEN_VEXT_V(vfclass_v_d, 8, 8, clearq) 4197 4198 /* Vector Floating-Point Merge Instruction */ 4199 #define GEN_VFMERGE_VF(NAME, ETYPE, H, CLEAR_FN) \ 4200 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \ 4201 CPURISCVState *env, uint32_t desc) \ 4202 { \ 4203 uint32_t mlen = vext_mlen(desc); \ 4204 uint32_t vm = vext_vm(desc); \ 4205 uint32_t vl = env->vl; \ 4206 uint32_t esz = sizeof(ETYPE); \ 4207 uint32_t vlmax = vext_maxsz(desc) / esz; \ 4208 uint32_t i; \ 4209 \ 4210 for (i = 0; i < vl; i++) { \ 4211 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \ 4212 *((ETYPE *)vd + H(i)) \ 4213 = (!vm && !vext_elem_mask(v0, mlen, i) ? s2 : s1); \ 4214 } \ 4215 CLEAR_FN(vd, vl, vl * esz, vlmax * esz); \ 4216 } 4217 4218 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2, clearh) 4219 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4, clearl) 4220 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8, clearq) 4221 4222 /* Single-Width Floating-Point/Integer Type-Convert Instructions */ 4223 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4224 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16) 4225 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32) 4226 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64) 4227 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2, 2, clearh) 4228 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4, 4, clearl) 4229 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8, 8, clearq) 4230 4231 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */ 4232 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16) 4233 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32) 4234 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64) 4235 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2, 2, clearh) 4236 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4, 4, clearl) 4237 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8, 8, clearq) 4238 4239 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */ 4240 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16) 4241 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32) 4242 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64) 4243 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2, 2, clearh) 4244 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4, 4, clearl) 4245 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8, 8, clearq) 4246 4247 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */ 4248 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16) 4249 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32) 4250 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64) 4251 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2, 2, clearh) 4252 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4, 4, clearl) 4253 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8, 8, clearq) 4254 4255 /* Widening Floating-Point/Integer Type-Convert Instructions */ 4256 /* (TD, T2, TX2) */ 4257 #define WOP_UU_H uint32_t, uint16_t, uint16_t 4258 #define WOP_UU_W uint64_t, uint32_t, uint32_t 4259 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/ 4260 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32) 4261 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64) 4262 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 2, 4, clearl) 4263 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 4, 8, clearq) 4264 4265 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */ 4266 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32) 4267 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64) 4268 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 2, 4, clearl) 4269 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 4, 8, clearq) 4270 4271 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */ 4272 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32) 4273 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64) 4274 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 2, 4, clearl) 4275 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 4, 8, clearq) 4276 4277 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */ 4278 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32) 4279 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64) 4280 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 2, 4, clearl) 4281 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 4, 8, clearq) 4282 4283 /* 4284 * vfwcvt.f.f.v vd, vs2, vm # 4285 * Convert single-width float to double-width float. 4286 */ 4287 static uint32_t vfwcvtffv16(uint16_t a, float_status *s) 4288 { 4289 return float16_to_float32(a, true, s); 4290 } 4291 4292 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16) 4293 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64) 4294 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 2, 4, clearl) 4295 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 4, 8, clearq) 4296 4297 /* Narrowing Floating-Point/Integer Type-Convert Instructions */ 4298 /* (TD, T2, TX2) */ 4299 #define NOP_UU_H uint16_t, uint32_t, uint32_t 4300 #define NOP_UU_W uint32_t, uint64_t, uint64_t 4301 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */ 4302 RVVCALL(OPFVV1, vfncvt_xu_f_v_h, NOP_UU_H, H2, H4, float32_to_uint16) 4303 RVVCALL(OPFVV1, vfncvt_xu_f_v_w, NOP_UU_W, H4, H8, float64_to_uint32) 4304 GEN_VEXT_V_ENV(vfncvt_xu_f_v_h, 2, 2, clearh) 4305 GEN_VEXT_V_ENV(vfncvt_xu_f_v_w, 4, 4, clearl) 4306 4307 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */ 4308 RVVCALL(OPFVV1, vfncvt_x_f_v_h, NOP_UU_H, H2, H4, float32_to_int16) 4309 RVVCALL(OPFVV1, vfncvt_x_f_v_w, NOP_UU_W, H4, H8, float64_to_int32) 4310 GEN_VEXT_V_ENV(vfncvt_x_f_v_h, 2, 2, clearh) 4311 GEN_VEXT_V_ENV(vfncvt_x_f_v_w, 4, 4, clearl) 4312 4313 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */ 4314 RVVCALL(OPFVV1, vfncvt_f_xu_v_h, NOP_UU_H, H2, H4, uint32_to_float16) 4315 RVVCALL(OPFVV1, vfncvt_f_xu_v_w, NOP_UU_W, H4, H8, uint64_to_float32) 4316 GEN_VEXT_V_ENV(vfncvt_f_xu_v_h, 2, 2, clearh) 4317 GEN_VEXT_V_ENV(vfncvt_f_xu_v_w, 4, 4, clearl) 4318 4319 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */ 4320 RVVCALL(OPFVV1, vfncvt_f_x_v_h, NOP_UU_H, H2, H4, int32_to_float16) 4321 RVVCALL(OPFVV1, vfncvt_f_x_v_w, NOP_UU_W, H4, H8, int64_to_float32) 4322 GEN_VEXT_V_ENV(vfncvt_f_x_v_h, 2, 2, clearh) 4323 GEN_VEXT_V_ENV(vfncvt_f_x_v_w, 4, 4, clearl) 4324 4325 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */ 4326 static uint16_t vfncvtffv16(uint32_t a, float_status *s) 4327 { 4328 return float32_to_float16(a, true, s); 4329 } 4330 4331 RVVCALL(OPFVV1, vfncvt_f_f_v_h, NOP_UU_H, H2, H4, vfncvtffv16) 4332 RVVCALL(OPFVV1, vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32) 4333 GEN_VEXT_V_ENV(vfncvt_f_f_v_h, 2, 2, clearh) 4334 GEN_VEXT_V_ENV(vfncvt_f_f_v_w, 4, 4, clearl) 4335 4336 /* 4337 *** Vector Reduction Operations 4338 */ 4339 /* Vector Single-Width Integer Reduction Instructions */ 4340 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ 4341 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4342 void *vs2, CPURISCVState *env, uint32_t desc) \ 4343 { \ 4344 uint32_t mlen = vext_mlen(desc); \ 4345 uint32_t vm = vext_vm(desc); \ 4346 uint32_t vl = env->vl; \ 4347 uint32_t i; \ 4348 uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ 4349 TD s1 = *((TD *)vs1 + HD(0)); \ 4350 \ 4351 for (i = 0; i < vl; i++) { \ 4352 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4353 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4354 continue; \ 4355 } \ 4356 s1 = OP(s1, (TD)s2); \ 4357 } \ 4358 *((TD *)vd + HD(0)) = s1; \ 4359 CLEAR_FN(vd, 1, sizeof(TD), tot); \ 4360 } 4361 4362 /* vd[0] = sum(vs1[0], vs2[*]) */ 4363 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD, clearb) 4364 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD, clearh) 4365 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD, clearl) 4366 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD, clearq) 4367 4368 /* vd[0] = maxu(vs1[0], vs2[*]) */ 4369 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX, clearb) 4370 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX, clearh) 4371 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX, clearl) 4372 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX, clearq) 4373 4374 /* vd[0] = max(vs1[0], vs2[*]) */ 4375 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX, clearb) 4376 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX, clearh) 4377 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX, clearl) 4378 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX, clearq) 4379 4380 /* vd[0] = minu(vs1[0], vs2[*]) */ 4381 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN, clearb) 4382 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN, clearh) 4383 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN, clearl) 4384 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN, clearq) 4385 4386 /* vd[0] = min(vs1[0], vs2[*]) */ 4387 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN, clearb) 4388 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN, clearh) 4389 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN, clearl) 4390 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN, clearq) 4391 4392 /* vd[0] = and(vs1[0], vs2[*]) */ 4393 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND, clearb) 4394 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND, clearh) 4395 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND, clearl) 4396 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND, clearq) 4397 4398 /* vd[0] = or(vs1[0], vs2[*]) */ 4399 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR, clearb) 4400 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR, clearh) 4401 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR, clearl) 4402 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR, clearq) 4403 4404 /* vd[0] = xor(vs1[0], vs2[*]) */ 4405 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR, clearb) 4406 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR, clearh) 4407 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR, clearl) 4408 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR, clearq) 4409 4410 /* Vector Widening Integer Reduction Instructions */ 4411 /* signed sum reduction into double-width accumulator */ 4412 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD, clearh) 4413 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD, clearl) 4414 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD, clearq) 4415 4416 /* Unsigned sum reduction into double-width accumulator */ 4417 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD, clearh) 4418 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD, clearl) 4419 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD, clearq) 4420 4421 /* Vector Single-Width Floating-Point Reduction Instructions */ 4422 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)\ 4423 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4424 void *vs2, CPURISCVState *env, \ 4425 uint32_t desc) \ 4426 { \ 4427 uint32_t mlen = vext_mlen(desc); \ 4428 uint32_t vm = vext_vm(desc); \ 4429 uint32_t vl = env->vl; \ 4430 uint32_t i; \ 4431 uint32_t tot = env_archcpu(env)->cfg.vlen / 8; \ 4432 TD s1 = *((TD *)vs1 + HD(0)); \ 4433 \ 4434 for (i = 0; i < vl; i++) { \ 4435 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \ 4436 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4437 continue; \ 4438 } \ 4439 s1 = OP(s1, (TD)s2, &env->fp_status); \ 4440 } \ 4441 *((TD *)vd + HD(0)) = s1; \ 4442 CLEAR_FN(vd, 1, sizeof(TD), tot); \ 4443 } 4444 4445 /* Unordered sum */ 4446 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add, clearh) 4447 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add, clearl) 4448 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add, clearq) 4449 4450 /* Maximum value */ 4451 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maxnum, clearh) 4452 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maxnum, clearl) 4453 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maxnum, clearq) 4454 4455 /* Minimum value */ 4456 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minnum, clearh) 4457 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minnum, clearl) 4458 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minnum, clearq) 4459 4460 /* Vector Widening Floating-Point Reduction Instructions */ 4461 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */ 4462 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1, 4463 void *vs2, CPURISCVState *env, uint32_t desc) 4464 { 4465 uint32_t mlen = vext_mlen(desc); 4466 uint32_t vm = vext_vm(desc); 4467 uint32_t vl = env->vl; 4468 uint32_t i; 4469 uint32_t tot = env_archcpu(env)->cfg.vlen / 8; 4470 uint32_t s1 = *((uint32_t *)vs1 + H4(0)); 4471 4472 for (i = 0; i < vl; i++) { 4473 uint16_t s2 = *((uint16_t *)vs2 + H2(i)); 4474 if (!vm && !vext_elem_mask(v0, mlen, i)) { 4475 continue; 4476 } 4477 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status), 4478 &env->fp_status); 4479 } 4480 *((uint32_t *)vd + H4(0)) = s1; 4481 clearl(vd, 1, sizeof(uint32_t), tot); 4482 } 4483 4484 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1, 4485 void *vs2, CPURISCVState *env, uint32_t desc) 4486 { 4487 uint32_t mlen = vext_mlen(desc); 4488 uint32_t vm = vext_vm(desc); 4489 uint32_t vl = env->vl; 4490 uint32_t i; 4491 uint32_t tot = env_archcpu(env)->cfg.vlen / 8; 4492 uint64_t s1 = *((uint64_t *)vs1); 4493 4494 for (i = 0; i < vl; i++) { 4495 uint32_t s2 = *((uint32_t *)vs2 + H4(i)); 4496 if (!vm && !vext_elem_mask(v0, mlen, i)) { 4497 continue; 4498 } 4499 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status), 4500 &env->fp_status); 4501 } 4502 *((uint64_t *)vd) = s1; 4503 clearq(vd, 1, sizeof(uint64_t), tot); 4504 } 4505 4506 /* 4507 *** Vector Mask Operations 4508 */ 4509 /* Vector Mask-Register Logical Instructions */ 4510 #define GEN_VEXT_MASK_VV(NAME, OP) \ 4511 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 4512 void *vs2, CPURISCVState *env, \ 4513 uint32_t desc) \ 4514 { \ 4515 uint32_t mlen = vext_mlen(desc); \ 4516 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4517 uint32_t vl = env->vl; \ 4518 uint32_t i; \ 4519 int a, b; \ 4520 \ 4521 for (i = 0; i < vl; i++) { \ 4522 a = vext_elem_mask(vs1, mlen, i); \ 4523 b = vext_elem_mask(vs2, mlen, i); \ 4524 vext_set_elem_mask(vd, mlen, i, OP(b, a)); \ 4525 } \ 4526 for (; i < vlmax; i++) { \ 4527 vext_set_elem_mask(vd, mlen, i, 0); \ 4528 } \ 4529 } 4530 4531 #define DO_NAND(N, M) (!(N & M)) 4532 #define DO_ANDNOT(N, M) (N & !M) 4533 #define DO_NOR(N, M) (!(N | M)) 4534 #define DO_ORNOT(N, M) (N | !M) 4535 #define DO_XNOR(N, M) (!(N ^ M)) 4536 4537 GEN_VEXT_MASK_VV(vmand_mm, DO_AND) 4538 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND) 4539 GEN_VEXT_MASK_VV(vmandnot_mm, DO_ANDNOT) 4540 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR) 4541 GEN_VEXT_MASK_VV(vmor_mm, DO_OR) 4542 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR) 4543 GEN_VEXT_MASK_VV(vmornot_mm, DO_ORNOT) 4544 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR) 4545 4546 /* Vector mask population count vmpopc */ 4547 target_ulong HELPER(vmpopc_m)(void *v0, void *vs2, CPURISCVState *env, 4548 uint32_t desc) 4549 { 4550 target_ulong cnt = 0; 4551 uint32_t mlen = vext_mlen(desc); 4552 uint32_t vm = vext_vm(desc); 4553 uint32_t vl = env->vl; 4554 int i; 4555 4556 for (i = 0; i < vl; i++) { 4557 if (vm || vext_elem_mask(v0, mlen, i)) { 4558 if (vext_elem_mask(vs2, mlen, i)) { 4559 cnt++; 4560 } 4561 } 4562 } 4563 return cnt; 4564 } 4565 4566 /* vmfirst find-first-set mask bit*/ 4567 target_ulong HELPER(vmfirst_m)(void *v0, void *vs2, CPURISCVState *env, 4568 uint32_t desc) 4569 { 4570 uint32_t mlen = vext_mlen(desc); 4571 uint32_t vm = vext_vm(desc); 4572 uint32_t vl = env->vl; 4573 int i; 4574 4575 for (i = 0; i < vl; i++) { 4576 if (vm || vext_elem_mask(v0, mlen, i)) { 4577 if (vext_elem_mask(vs2, mlen, i)) { 4578 return i; 4579 } 4580 } 4581 } 4582 return -1LL; 4583 } 4584 4585 enum set_mask_type { 4586 ONLY_FIRST = 1, 4587 INCLUDE_FIRST, 4588 BEFORE_FIRST, 4589 }; 4590 4591 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env, 4592 uint32_t desc, enum set_mask_type type) 4593 { 4594 uint32_t mlen = vext_mlen(desc); 4595 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; 4596 uint32_t vm = vext_vm(desc); 4597 uint32_t vl = env->vl; 4598 int i; 4599 bool first_mask_bit = false; 4600 4601 for (i = 0; i < vl; i++) { 4602 if (!vm && !vext_elem_mask(v0, mlen, i)) { 4603 continue; 4604 } 4605 /* write a zero to all following active elements */ 4606 if (first_mask_bit) { 4607 vext_set_elem_mask(vd, mlen, i, 0); 4608 continue; 4609 } 4610 if (vext_elem_mask(vs2, mlen, i)) { 4611 first_mask_bit = true; 4612 if (type == BEFORE_FIRST) { 4613 vext_set_elem_mask(vd, mlen, i, 0); 4614 } else { 4615 vext_set_elem_mask(vd, mlen, i, 1); 4616 } 4617 } else { 4618 if (type == ONLY_FIRST) { 4619 vext_set_elem_mask(vd, mlen, i, 0); 4620 } else { 4621 vext_set_elem_mask(vd, mlen, i, 1); 4622 } 4623 } 4624 } 4625 for (; i < vlmax; i++) { 4626 vext_set_elem_mask(vd, mlen, i, 0); 4627 } 4628 } 4629 4630 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4631 uint32_t desc) 4632 { 4633 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST); 4634 } 4635 4636 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4637 uint32_t desc) 4638 { 4639 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST); 4640 } 4641 4642 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env, 4643 uint32_t desc) 4644 { 4645 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST); 4646 } 4647 4648 /* Vector Iota Instruction */ 4649 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H, CLEAR_FN) \ 4650 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \ 4651 uint32_t desc) \ 4652 { \ 4653 uint32_t mlen = vext_mlen(desc); \ 4654 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4655 uint32_t vm = vext_vm(desc); \ 4656 uint32_t vl = env->vl; \ 4657 uint32_t sum = 0; \ 4658 int i; \ 4659 \ 4660 for (i = 0; i < vl; i++) { \ 4661 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4662 continue; \ 4663 } \ 4664 *((ETYPE *)vd + H(i)) = sum; \ 4665 if (vext_elem_mask(vs2, mlen, i)) { \ 4666 sum++; \ 4667 } \ 4668 } \ 4669 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4670 } 4671 4672 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1, clearb) 4673 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2, clearh) 4674 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4, clearl) 4675 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8, clearq) 4676 4677 /* Vector Element Index Instruction */ 4678 #define GEN_VEXT_VID_V(NAME, ETYPE, H, CLEAR_FN) \ 4679 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \ 4680 { \ 4681 uint32_t mlen = vext_mlen(desc); \ 4682 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4683 uint32_t vm = vext_vm(desc); \ 4684 uint32_t vl = env->vl; \ 4685 int i; \ 4686 \ 4687 for (i = 0; i < vl; i++) { \ 4688 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4689 continue; \ 4690 } \ 4691 *((ETYPE *)vd + H(i)) = i; \ 4692 } \ 4693 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4694 } 4695 4696 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1, clearb) 4697 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2, clearh) 4698 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4, clearl) 4699 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8, clearq) 4700 4701 /* 4702 *** Vector Permutation Instructions 4703 */ 4704 4705 /* Vector Slide Instructions */ 4706 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H, CLEAR_FN) \ 4707 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4708 CPURISCVState *env, uint32_t desc) \ 4709 { \ 4710 uint32_t mlen = vext_mlen(desc); \ 4711 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4712 uint32_t vm = vext_vm(desc); \ 4713 uint32_t vl = env->vl; \ 4714 target_ulong offset = s1, i; \ 4715 \ 4716 for (i = offset; i < vl; i++) { \ 4717 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4718 continue; \ 4719 } \ 4720 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \ 4721 } \ 4722 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4723 } 4724 4725 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */ 4726 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1, clearb) 4727 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2, clearh) 4728 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4, clearl) 4729 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8, clearq) 4730 4731 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ 4732 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4733 CPURISCVState *env, uint32_t desc) \ 4734 { \ 4735 uint32_t mlen = vext_mlen(desc); \ 4736 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4737 uint32_t vm = vext_vm(desc); \ 4738 uint32_t vl = env->vl; \ 4739 target_ulong offset = s1, i; \ 4740 \ 4741 for (i = 0; i < vl; ++i) { \ 4742 target_ulong j = i + offset; \ 4743 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4744 continue; \ 4745 } \ 4746 *((ETYPE *)vd + H(i)) = j >= vlmax ? 0 : *((ETYPE *)vs2 + H(j)); \ 4747 } \ 4748 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4749 } 4750 4751 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */ 4752 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1, clearb) 4753 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2, clearh) 4754 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4, clearl) 4755 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8, clearq) 4756 4757 #define GEN_VEXT_VSLIDE1UP_VX(NAME, ETYPE, H, CLEAR_FN) \ 4758 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4759 CPURISCVState *env, uint32_t desc) \ 4760 { \ 4761 uint32_t mlen = vext_mlen(desc); \ 4762 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4763 uint32_t vm = vext_vm(desc); \ 4764 uint32_t vl = env->vl; \ 4765 uint32_t i; \ 4766 \ 4767 for (i = 0; i < vl; i++) { \ 4768 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4769 continue; \ 4770 } \ 4771 if (i == 0) { \ 4772 *((ETYPE *)vd + H(i)) = s1; \ 4773 } else { \ 4774 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \ 4775 } \ 4776 } \ 4777 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4778 } 4779 4780 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */ 4781 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, uint8_t, H1, clearb) 4782 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, uint16_t, H2, clearh) 4783 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, uint32_t, H4, clearl) 4784 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, uint64_t, H8, clearq) 4785 4786 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, ETYPE, H, CLEAR_FN) \ 4787 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4788 CPURISCVState *env, uint32_t desc) \ 4789 { \ 4790 uint32_t mlen = vext_mlen(desc); \ 4791 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4792 uint32_t vm = vext_vm(desc); \ 4793 uint32_t vl = env->vl; \ 4794 uint32_t i; \ 4795 \ 4796 for (i = 0; i < vl; i++) { \ 4797 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4798 continue; \ 4799 } \ 4800 if (i == vl - 1) { \ 4801 *((ETYPE *)vd + H(i)) = s1; \ 4802 } else { \ 4803 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \ 4804 } \ 4805 } \ 4806 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4807 } 4808 4809 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */ 4810 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, uint8_t, H1, clearb) 4811 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, uint16_t, H2, clearh) 4812 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, uint32_t, H4, clearl) 4813 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, uint64_t, H8, clearq) 4814 4815 /* Vector Register Gather Instruction */ 4816 #define GEN_VEXT_VRGATHER_VV(NAME, ETYPE, H, CLEAR_FN) \ 4817 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4818 CPURISCVState *env, uint32_t desc) \ 4819 { \ 4820 uint32_t mlen = vext_mlen(desc); \ 4821 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4822 uint32_t vm = vext_vm(desc); \ 4823 uint32_t vl = env->vl; \ 4824 uint32_t index, i; \ 4825 \ 4826 for (i = 0; i < vl; i++) { \ 4827 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4828 continue; \ 4829 } \ 4830 index = *((ETYPE *)vs1 + H(i)); \ 4831 if (index >= vlmax) { \ 4832 *((ETYPE *)vd + H(i)) = 0; \ 4833 } else { \ 4834 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4835 } \ 4836 } \ 4837 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4838 } 4839 4840 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */ 4841 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, H1, clearb) 4842 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, H2, clearh) 4843 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, H4, clearl) 4844 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, H8, clearq) 4845 4846 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H, CLEAR_FN) \ 4847 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \ 4848 CPURISCVState *env, uint32_t desc) \ 4849 { \ 4850 uint32_t mlen = vext_mlen(desc); \ 4851 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4852 uint32_t vm = vext_vm(desc); \ 4853 uint32_t vl = env->vl; \ 4854 uint32_t index = s1, i; \ 4855 \ 4856 for (i = 0; i < vl; i++) { \ 4857 if (!vm && !vext_elem_mask(v0, mlen, i)) { \ 4858 continue; \ 4859 } \ 4860 if (index >= vlmax) { \ 4861 *((ETYPE *)vd + H(i)) = 0; \ 4862 } else { \ 4863 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \ 4864 } \ 4865 } \ 4866 CLEAR_FN(vd, vl, vl * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4867 } 4868 4869 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ 4870 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1, clearb) 4871 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2, clearh) 4872 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4, clearl) 4873 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8, clearq) 4874 4875 /* Vector Compress Instruction */ 4876 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H, CLEAR_FN) \ 4877 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ 4878 CPURISCVState *env, uint32_t desc) \ 4879 { \ 4880 uint32_t mlen = vext_mlen(desc); \ 4881 uint32_t vlmax = env_archcpu(env)->cfg.vlen / mlen; \ 4882 uint32_t vl = env->vl; \ 4883 uint32_t num = 0, i; \ 4884 \ 4885 for (i = 0; i < vl; i++) { \ 4886 if (!vext_elem_mask(vs1, mlen, i)) { \ 4887 continue; \ 4888 } \ 4889 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \ 4890 num++; \ 4891 } \ 4892 CLEAR_FN(vd, num, num * sizeof(ETYPE), vlmax * sizeof(ETYPE)); \ 4893 } 4894 4895 /* Compress into vd elements of vs2 where vs1 is enabled */ 4896 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1, clearb) 4897 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2, clearh) 4898 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4, clearl) 4899 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8, clearq) 4900