1 /* 2 * RISC-V Vector Extension Helpers for QEMU. 3 * 4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms and conditions of the GNU General Public License, 8 * version 2 or later, as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19 #include "qemu/osdep.h" 20 #include "cpu.h" 21 #include "exec/memop.h" 22 #include "exec/exec-all.h" 23 #include "exec/helper-proto.h" 24 #include "tcg/tcg-gvec-desc.h" 25 #include "internals.h" 26 #include <math.h> 27 28 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, 29 target_ulong s2) 30 { 31 int vlmax, vl; 32 RISCVCPU *cpu = env_archcpu(env); 33 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW); 34 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV); 35 bool vill = FIELD_EX64(s2, VTYPE, VILL); 36 target_ulong reserved = FIELD_EX64(s2, VTYPE, RESERVED); 37 38 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) { 39 /* only set vill bit. */ 40 env->vtype = FIELD_DP64(0, VTYPE, VILL, 1); 41 env->vl = 0; 42 env->vstart = 0; 43 return 0; 44 } 45 46 vlmax = vext_get_vlmax(cpu, s2); 47 if (s1 <= vlmax) { 48 vl = s1; 49 } else { 50 vl = vlmax; 51 } 52 env->vl = vl; 53 env->vtype = s2; 54 env->vstart = 0; 55 return vl; 56 } 57 58 /* 59 * Note that vector data is stored in host-endian 64-bit chunks, 60 * so addressing units smaller than that needs a host-endian fixup. 61 */ 62 #ifdef HOST_WORDS_BIGENDIAN 63 #define H1(x) ((x) ^ 7) 64 #define H1_2(x) ((x) ^ 6) 65 #define H1_4(x) ((x) ^ 4) 66 #define H2(x) ((x) ^ 3) 67 #define H4(x) ((x) ^ 1) 68 #define H8(x) ((x)) 69 #else 70 #define H1(x) (x) 71 #define H1_2(x) (x) 72 #define H1_4(x) (x) 73 #define H2(x) (x) 74 #define H4(x) (x) 75 #define H8(x) (x) 76 #endif 77 78 static inline uint32_t vext_nf(uint32_t desc) 79 { 80 return FIELD_EX32(simd_data(desc), VDATA, NF); 81 } 82 83 static inline uint32_t vext_mlen(uint32_t desc) 84 { 85 return FIELD_EX32(simd_data(desc), VDATA, MLEN); 86 } 87 88 static inline uint32_t vext_vm(uint32_t desc) 89 { 90 return FIELD_EX32(simd_data(desc), VDATA, VM); 91 } 92 93 static inline uint32_t vext_lmul(uint32_t desc) 94 { 95 return FIELD_EX32(simd_data(desc), VDATA, LMUL); 96 } 97 98 static uint32_t vext_wd(uint32_t desc) 99 { 100 return (simd_data(desc) >> 11) & 0x1; 101 } 102 103 /* 104 * Get vector group length in bytes. Its range is [64, 2048]. 105 * 106 * As simd_desc support at most 256, the max vlen is 512 bits. 107 * So vlen in bytes is encoded as maxsz. 108 */ 109 static inline uint32_t vext_maxsz(uint32_t desc) 110 { 111 return simd_maxsz(desc) << vext_lmul(desc); 112 } 113 114 /* 115 * This function checks watchpoint before real load operation. 116 * 117 * In softmmu mode, the TLB API probe_access is enough for watchpoint check. 118 * In user mode, there is no watchpoint support now. 119 * 120 * It will trigger an exception if there is no mapping in TLB 121 * and page table walk can't fill the TLB entry. Then the guest 122 * software can return here after process the exception or never return. 123 */ 124 static void probe_pages(CPURISCVState *env, target_ulong addr, 125 target_ulong len, uintptr_t ra, 126 MMUAccessType access_type) 127 { 128 target_ulong pagelen = -(addr | TARGET_PAGE_MASK); 129 target_ulong curlen = MIN(pagelen, len); 130 131 probe_access(env, addr, curlen, access_type, 132 cpu_mmu_index(env, false), ra); 133 if (len > curlen) { 134 addr += curlen; 135 curlen = len - curlen; 136 probe_access(env, addr, curlen, access_type, 137 cpu_mmu_index(env, false), ra); 138 } 139 } 140 141 #ifdef HOST_WORDS_BIGENDIAN 142 static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) 143 { 144 /* 145 * Split the remaining range to two parts. 146 * The first part is in the last uint64_t unit. 147 * The second part start from the next uint64_t unit. 148 */ 149 int part1 = 0, part2 = tot - cnt; 150 if (cnt % 8) { 151 part1 = 8 - (cnt % 8); 152 part2 = tot - cnt - part1; 153 memset((void *)((uintptr_t)tail & ~(7ULL)), 0, part1); 154 memset((void *)(((uintptr_t)tail + 8) & ~(7ULL)), 0, part2); 155 } else { 156 memset(tail, 0, part2); 157 } 158 } 159 #else 160 static void vext_clear(void *tail, uint32_t cnt, uint32_t tot) 161 { 162 memset(tail, 0, tot - cnt); 163 } 164 #endif 165 166 static void clearb(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 167 { 168 int8_t *cur = ((int8_t *)vd + H1(idx)); 169 vext_clear(cur, cnt, tot); 170 } 171 172 static void clearh(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 173 { 174 int16_t *cur = ((int16_t *)vd + H2(idx)); 175 vext_clear(cur, cnt, tot); 176 } 177 178 static void clearl(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 179 { 180 int32_t *cur = ((int32_t *)vd + H4(idx)); 181 vext_clear(cur, cnt, tot); 182 } 183 184 static void clearq(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot) 185 { 186 int64_t *cur = (int64_t *)vd + idx; 187 vext_clear(cur, cnt, tot); 188 } 189 190 191 static inline int vext_elem_mask(void *v0, int mlen, int index) 192 { 193 int idx = (index * mlen) / 64; 194 int pos = (index * mlen) % 64; 195 return (((uint64_t *)v0)[idx] >> pos) & 1; 196 } 197 198 /* elements operations for load and store */ 199 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr, 200 uint32_t idx, void *vd, uintptr_t retaddr); 201 typedef void clear_fn(void *vd, uint32_t idx, uint32_t cnt, uint32_t tot); 202 203 #define GEN_VEXT_LD_ELEM(NAME, MTYPE, ETYPE, H, LDSUF) \ 204 static void NAME(CPURISCVState *env, abi_ptr addr, \ 205 uint32_t idx, void *vd, uintptr_t retaddr)\ 206 { \ 207 MTYPE data; \ 208 ETYPE *cur = ((ETYPE *)vd + H(idx)); \ 209 data = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ 210 *cur = data; \ 211 } \ 212 213 GEN_VEXT_LD_ELEM(ldb_b, int8_t, int8_t, H1, ldsb) 214 GEN_VEXT_LD_ELEM(ldb_h, int8_t, int16_t, H2, ldsb) 215 GEN_VEXT_LD_ELEM(ldb_w, int8_t, int32_t, H4, ldsb) 216 GEN_VEXT_LD_ELEM(ldb_d, int8_t, int64_t, H8, ldsb) 217 GEN_VEXT_LD_ELEM(ldh_h, int16_t, int16_t, H2, ldsw) 218 GEN_VEXT_LD_ELEM(ldh_w, int16_t, int32_t, H4, ldsw) 219 GEN_VEXT_LD_ELEM(ldh_d, int16_t, int64_t, H8, ldsw) 220 GEN_VEXT_LD_ELEM(ldw_w, int32_t, int32_t, H4, ldl) 221 GEN_VEXT_LD_ELEM(ldw_d, int32_t, int64_t, H8, ldl) 222 GEN_VEXT_LD_ELEM(lde_b, int8_t, int8_t, H1, ldsb) 223 GEN_VEXT_LD_ELEM(lde_h, int16_t, int16_t, H2, ldsw) 224 GEN_VEXT_LD_ELEM(lde_w, int32_t, int32_t, H4, ldl) 225 GEN_VEXT_LD_ELEM(lde_d, int64_t, int64_t, H8, ldq) 226 GEN_VEXT_LD_ELEM(ldbu_b, uint8_t, uint8_t, H1, ldub) 227 GEN_VEXT_LD_ELEM(ldbu_h, uint8_t, uint16_t, H2, ldub) 228 GEN_VEXT_LD_ELEM(ldbu_w, uint8_t, uint32_t, H4, ldub) 229 GEN_VEXT_LD_ELEM(ldbu_d, uint8_t, uint64_t, H8, ldub) 230 GEN_VEXT_LD_ELEM(ldhu_h, uint16_t, uint16_t, H2, lduw) 231 GEN_VEXT_LD_ELEM(ldhu_w, uint16_t, uint32_t, H4, lduw) 232 GEN_VEXT_LD_ELEM(ldhu_d, uint16_t, uint64_t, H8, lduw) 233 GEN_VEXT_LD_ELEM(ldwu_w, uint32_t, uint32_t, H4, ldl) 234 GEN_VEXT_LD_ELEM(ldwu_d, uint32_t, uint64_t, H8, ldl) 235 236 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ 237 static void NAME(CPURISCVState *env, abi_ptr addr, \ 238 uint32_t idx, void *vd, uintptr_t retaddr)\ 239 { \ 240 ETYPE data = *((ETYPE *)vd + H(idx)); \ 241 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ 242 } 243 244 GEN_VEXT_ST_ELEM(stb_b, int8_t, H1, stb) 245 GEN_VEXT_ST_ELEM(stb_h, int16_t, H2, stb) 246 GEN_VEXT_ST_ELEM(stb_w, int32_t, H4, stb) 247 GEN_VEXT_ST_ELEM(stb_d, int64_t, H8, stb) 248 GEN_VEXT_ST_ELEM(sth_h, int16_t, H2, stw) 249 GEN_VEXT_ST_ELEM(sth_w, int32_t, H4, stw) 250 GEN_VEXT_ST_ELEM(sth_d, int64_t, H8, stw) 251 GEN_VEXT_ST_ELEM(stw_w, int32_t, H4, stl) 252 GEN_VEXT_ST_ELEM(stw_d, int64_t, H8, stl) 253 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb) 254 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw) 255 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl) 256 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq) 257 258 /* 259 *** stride: access vector element from strided memory 260 */ 261 static void 262 vext_ldst_stride(void *vd, void *v0, target_ulong base, 263 target_ulong stride, CPURISCVState *env, 264 uint32_t desc, uint32_t vm, 265 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, 266 uint32_t esz, uint32_t msz, uintptr_t ra, 267 MMUAccessType access_type) 268 { 269 uint32_t i, k; 270 uint32_t nf = vext_nf(desc); 271 uint32_t mlen = vext_mlen(desc); 272 uint32_t vlmax = vext_maxsz(desc) / esz; 273 274 /* probe every access*/ 275 for (i = 0; i < env->vl; i++) { 276 if (!vm && !vext_elem_mask(v0, mlen, i)) { 277 continue; 278 } 279 probe_pages(env, base + stride * i, nf * msz, ra, access_type); 280 } 281 /* do real access */ 282 for (i = 0; i < env->vl; i++) { 283 k = 0; 284 if (!vm && !vext_elem_mask(v0, mlen, i)) { 285 continue; 286 } 287 while (k < nf) { 288 target_ulong addr = base + stride * i + k * msz; 289 ldst_elem(env, addr, i + k * vlmax, vd, ra); 290 k++; 291 } 292 } 293 /* clear tail elements */ 294 if (clear_elem) { 295 for (k = 0; k < nf; k++) { 296 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 297 } 298 } 299 } 300 301 #define GEN_VEXT_LD_STRIDE(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ 302 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \ 303 target_ulong stride, CPURISCVState *env, \ 304 uint32_t desc) \ 305 { \ 306 uint32_t vm = vext_vm(desc); \ 307 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \ 308 CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 309 GETPC(), MMU_DATA_LOAD); \ 310 } 311 312 GEN_VEXT_LD_STRIDE(vlsb_v_b, int8_t, int8_t, ldb_b, clearb) 313 GEN_VEXT_LD_STRIDE(vlsb_v_h, int8_t, int16_t, ldb_h, clearh) 314 GEN_VEXT_LD_STRIDE(vlsb_v_w, int8_t, int32_t, ldb_w, clearl) 315 GEN_VEXT_LD_STRIDE(vlsb_v_d, int8_t, int64_t, ldb_d, clearq) 316 GEN_VEXT_LD_STRIDE(vlsh_v_h, int16_t, int16_t, ldh_h, clearh) 317 GEN_VEXT_LD_STRIDE(vlsh_v_w, int16_t, int32_t, ldh_w, clearl) 318 GEN_VEXT_LD_STRIDE(vlsh_v_d, int16_t, int64_t, ldh_d, clearq) 319 GEN_VEXT_LD_STRIDE(vlsw_v_w, int32_t, int32_t, ldw_w, clearl) 320 GEN_VEXT_LD_STRIDE(vlsw_v_d, int32_t, int64_t, ldw_d, clearq) 321 GEN_VEXT_LD_STRIDE(vlse_v_b, int8_t, int8_t, lde_b, clearb) 322 GEN_VEXT_LD_STRIDE(vlse_v_h, int16_t, int16_t, lde_h, clearh) 323 GEN_VEXT_LD_STRIDE(vlse_v_w, int32_t, int32_t, lde_w, clearl) 324 GEN_VEXT_LD_STRIDE(vlse_v_d, int64_t, int64_t, lde_d, clearq) 325 GEN_VEXT_LD_STRIDE(vlsbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) 326 GEN_VEXT_LD_STRIDE(vlsbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) 327 GEN_VEXT_LD_STRIDE(vlsbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) 328 GEN_VEXT_LD_STRIDE(vlsbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) 329 GEN_VEXT_LD_STRIDE(vlshu_v_h, uint16_t, uint16_t, ldhu_h, clearh) 330 GEN_VEXT_LD_STRIDE(vlshu_v_w, uint16_t, uint32_t, ldhu_w, clearl) 331 GEN_VEXT_LD_STRIDE(vlshu_v_d, uint16_t, uint64_t, ldhu_d, clearq) 332 GEN_VEXT_LD_STRIDE(vlswu_v_w, uint32_t, uint32_t, ldwu_w, clearl) 333 GEN_VEXT_LD_STRIDE(vlswu_v_d, uint32_t, uint64_t, ldwu_d, clearq) 334 335 #define GEN_VEXT_ST_STRIDE(NAME, MTYPE, ETYPE, STORE_FN) \ 336 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 337 target_ulong stride, CPURISCVState *env, \ 338 uint32_t desc) \ 339 { \ 340 uint32_t vm = vext_vm(desc); \ 341 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \ 342 NULL, sizeof(ETYPE), sizeof(MTYPE), \ 343 GETPC(), MMU_DATA_STORE); \ 344 } 345 346 GEN_VEXT_ST_STRIDE(vssb_v_b, int8_t, int8_t, stb_b) 347 GEN_VEXT_ST_STRIDE(vssb_v_h, int8_t, int16_t, stb_h) 348 GEN_VEXT_ST_STRIDE(vssb_v_w, int8_t, int32_t, stb_w) 349 GEN_VEXT_ST_STRIDE(vssb_v_d, int8_t, int64_t, stb_d) 350 GEN_VEXT_ST_STRIDE(vssh_v_h, int16_t, int16_t, sth_h) 351 GEN_VEXT_ST_STRIDE(vssh_v_w, int16_t, int32_t, sth_w) 352 GEN_VEXT_ST_STRIDE(vssh_v_d, int16_t, int64_t, sth_d) 353 GEN_VEXT_ST_STRIDE(vssw_v_w, int32_t, int32_t, stw_w) 354 GEN_VEXT_ST_STRIDE(vssw_v_d, int32_t, int64_t, stw_d) 355 GEN_VEXT_ST_STRIDE(vsse_v_b, int8_t, int8_t, ste_b) 356 GEN_VEXT_ST_STRIDE(vsse_v_h, int16_t, int16_t, ste_h) 357 GEN_VEXT_ST_STRIDE(vsse_v_w, int32_t, int32_t, ste_w) 358 GEN_VEXT_ST_STRIDE(vsse_v_d, int64_t, int64_t, ste_d) 359 360 /* 361 *** unit-stride: access elements stored contiguously in memory 362 */ 363 364 /* unmasked unit-stride load and store operation*/ 365 static void 366 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, 367 vext_ldst_elem_fn *ldst_elem, clear_fn *clear_elem, 368 uint32_t esz, uint32_t msz, uintptr_t ra, 369 MMUAccessType access_type) 370 { 371 uint32_t i, k; 372 uint32_t nf = vext_nf(desc); 373 uint32_t vlmax = vext_maxsz(desc) / esz; 374 375 /* probe every access */ 376 probe_pages(env, base, env->vl * nf * msz, ra, access_type); 377 /* load bytes from guest memory */ 378 for (i = 0; i < env->vl; i++) { 379 k = 0; 380 while (k < nf) { 381 target_ulong addr = base + (i * nf + k) * msz; 382 ldst_elem(env, addr, i + k * vlmax, vd, ra); 383 k++; 384 } 385 } 386 /* clear tail elements */ 387 if (clear_elem) { 388 for (k = 0; k < nf; k++) { 389 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 390 } 391 } 392 } 393 394 /* 395 * masked unit-stride load and store operation will be a special case of stride, 396 * stride = NF * sizeof (MTYPE) 397 */ 398 399 #define GEN_VEXT_LD_US(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ 400 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 401 CPURISCVState *env, uint32_t desc) \ 402 { \ 403 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 404 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \ 405 CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 406 GETPC(), MMU_DATA_LOAD); \ 407 } \ 408 \ 409 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 410 CPURISCVState *env, uint32_t desc) \ 411 { \ 412 vext_ldst_us(vd, base, env, desc, LOAD_FN, CLEAR_FN, \ 413 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_LOAD); \ 414 } 415 416 GEN_VEXT_LD_US(vlb_v_b, int8_t, int8_t, ldb_b, clearb) 417 GEN_VEXT_LD_US(vlb_v_h, int8_t, int16_t, ldb_h, clearh) 418 GEN_VEXT_LD_US(vlb_v_w, int8_t, int32_t, ldb_w, clearl) 419 GEN_VEXT_LD_US(vlb_v_d, int8_t, int64_t, ldb_d, clearq) 420 GEN_VEXT_LD_US(vlh_v_h, int16_t, int16_t, ldh_h, clearh) 421 GEN_VEXT_LD_US(vlh_v_w, int16_t, int32_t, ldh_w, clearl) 422 GEN_VEXT_LD_US(vlh_v_d, int16_t, int64_t, ldh_d, clearq) 423 GEN_VEXT_LD_US(vlw_v_w, int32_t, int32_t, ldw_w, clearl) 424 GEN_VEXT_LD_US(vlw_v_d, int32_t, int64_t, ldw_d, clearq) 425 GEN_VEXT_LD_US(vle_v_b, int8_t, int8_t, lde_b, clearb) 426 GEN_VEXT_LD_US(vle_v_h, int16_t, int16_t, lde_h, clearh) 427 GEN_VEXT_LD_US(vle_v_w, int32_t, int32_t, lde_w, clearl) 428 GEN_VEXT_LD_US(vle_v_d, int64_t, int64_t, lde_d, clearq) 429 GEN_VEXT_LD_US(vlbu_v_b, uint8_t, uint8_t, ldbu_b, clearb) 430 GEN_VEXT_LD_US(vlbu_v_h, uint8_t, uint16_t, ldbu_h, clearh) 431 GEN_VEXT_LD_US(vlbu_v_w, uint8_t, uint32_t, ldbu_w, clearl) 432 GEN_VEXT_LD_US(vlbu_v_d, uint8_t, uint64_t, ldbu_d, clearq) 433 GEN_VEXT_LD_US(vlhu_v_h, uint16_t, uint16_t, ldhu_h, clearh) 434 GEN_VEXT_LD_US(vlhu_v_w, uint16_t, uint32_t, ldhu_w, clearl) 435 GEN_VEXT_LD_US(vlhu_v_d, uint16_t, uint64_t, ldhu_d, clearq) 436 GEN_VEXT_LD_US(vlwu_v_w, uint32_t, uint32_t, ldwu_w, clearl) 437 GEN_VEXT_LD_US(vlwu_v_d, uint32_t, uint64_t, ldwu_d, clearq) 438 439 #define GEN_VEXT_ST_US(NAME, MTYPE, ETYPE, STORE_FN) \ 440 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \ 441 CPURISCVState *env, uint32_t desc) \ 442 { \ 443 uint32_t stride = vext_nf(desc) * sizeof(MTYPE); \ 444 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \ 445 NULL, sizeof(ETYPE), sizeof(MTYPE), \ 446 GETPC(), MMU_DATA_STORE); \ 447 } \ 448 \ 449 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 450 CPURISCVState *env, uint32_t desc) \ 451 { \ 452 vext_ldst_us(vd, base, env, desc, STORE_FN, NULL, \ 453 sizeof(ETYPE), sizeof(MTYPE), GETPC(), MMU_DATA_STORE);\ 454 } 455 456 GEN_VEXT_ST_US(vsb_v_b, int8_t, int8_t , stb_b) 457 GEN_VEXT_ST_US(vsb_v_h, int8_t, int16_t, stb_h) 458 GEN_VEXT_ST_US(vsb_v_w, int8_t, int32_t, stb_w) 459 GEN_VEXT_ST_US(vsb_v_d, int8_t, int64_t, stb_d) 460 GEN_VEXT_ST_US(vsh_v_h, int16_t, int16_t, sth_h) 461 GEN_VEXT_ST_US(vsh_v_w, int16_t, int32_t, sth_w) 462 GEN_VEXT_ST_US(vsh_v_d, int16_t, int64_t, sth_d) 463 GEN_VEXT_ST_US(vsw_v_w, int32_t, int32_t, stw_w) 464 GEN_VEXT_ST_US(vsw_v_d, int32_t, int64_t, stw_d) 465 GEN_VEXT_ST_US(vse_v_b, int8_t, int8_t , ste_b) 466 GEN_VEXT_ST_US(vse_v_h, int16_t, int16_t, ste_h) 467 GEN_VEXT_ST_US(vse_v_w, int32_t, int32_t, ste_w) 468 GEN_VEXT_ST_US(vse_v_d, int64_t, int64_t, ste_d) 469 470 /* 471 *** index: access vector element from indexed memory 472 */ 473 typedef target_ulong vext_get_index_addr(target_ulong base, 474 uint32_t idx, void *vs2); 475 476 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \ 477 static target_ulong NAME(target_ulong base, \ 478 uint32_t idx, void *vs2) \ 479 { \ 480 return (base + *((ETYPE *)vs2 + H(idx))); \ 481 } 482 483 GEN_VEXT_GET_INDEX_ADDR(idx_b, int8_t, H1) 484 GEN_VEXT_GET_INDEX_ADDR(idx_h, int16_t, H2) 485 GEN_VEXT_GET_INDEX_ADDR(idx_w, int32_t, H4) 486 GEN_VEXT_GET_INDEX_ADDR(idx_d, int64_t, H8) 487 488 static inline void 489 vext_ldst_index(void *vd, void *v0, target_ulong base, 490 void *vs2, CPURISCVState *env, uint32_t desc, 491 vext_get_index_addr get_index_addr, 492 vext_ldst_elem_fn *ldst_elem, 493 clear_fn *clear_elem, 494 uint32_t esz, uint32_t msz, uintptr_t ra, 495 MMUAccessType access_type) 496 { 497 uint32_t i, k; 498 uint32_t nf = vext_nf(desc); 499 uint32_t vm = vext_vm(desc); 500 uint32_t mlen = vext_mlen(desc); 501 uint32_t vlmax = vext_maxsz(desc) / esz; 502 503 /* probe every access*/ 504 for (i = 0; i < env->vl; i++) { 505 if (!vm && !vext_elem_mask(v0, mlen, i)) { 506 continue; 507 } 508 probe_pages(env, get_index_addr(base, i, vs2), nf * msz, ra, 509 access_type); 510 } 511 /* load bytes from guest memory */ 512 for (i = 0; i < env->vl; i++) { 513 k = 0; 514 if (!vm && !vext_elem_mask(v0, mlen, i)) { 515 continue; 516 } 517 while (k < nf) { 518 abi_ptr addr = get_index_addr(base, i, vs2) + k * msz; 519 ldst_elem(env, addr, i + k * vlmax, vd, ra); 520 k++; 521 } 522 } 523 /* clear tail elements */ 524 if (clear_elem) { 525 for (k = 0; k < nf; k++) { 526 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 527 } 528 } 529 } 530 531 #define GEN_VEXT_LD_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, LOAD_FN, CLEAR_FN) \ 532 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 533 void *vs2, CPURISCVState *env, uint32_t desc) \ 534 { \ 535 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 536 LOAD_FN, CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 537 GETPC(), MMU_DATA_LOAD); \ 538 } 539 540 GEN_VEXT_LD_INDEX(vlxb_v_b, int8_t, int8_t, idx_b, ldb_b, clearb) 541 GEN_VEXT_LD_INDEX(vlxb_v_h, int8_t, int16_t, idx_h, ldb_h, clearh) 542 GEN_VEXT_LD_INDEX(vlxb_v_w, int8_t, int32_t, idx_w, ldb_w, clearl) 543 GEN_VEXT_LD_INDEX(vlxb_v_d, int8_t, int64_t, idx_d, ldb_d, clearq) 544 GEN_VEXT_LD_INDEX(vlxh_v_h, int16_t, int16_t, idx_h, ldh_h, clearh) 545 GEN_VEXT_LD_INDEX(vlxh_v_w, int16_t, int32_t, idx_w, ldh_w, clearl) 546 GEN_VEXT_LD_INDEX(vlxh_v_d, int16_t, int64_t, idx_d, ldh_d, clearq) 547 GEN_VEXT_LD_INDEX(vlxw_v_w, int32_t, int32_t, idx_w, ldw_w, clearl) 548 GEN_VEXT_LD_INDEX(vlxw_v_d, int32_t, int64_t, idx_d, ldw_d, clearq) 549 GEN_VEXT_LD_INDEX(vlxe_v_b, int8_t, int8_t, idx_b, lde_b, clearb) 550 GEN_VEXT_LD_INDEX(vlxe_v_h, int16_t, int16_t, idx_h, lde_h, clearh) 551 GEN_VEXT_LD_INDEX(vlxe_v_w, int32_t, int32_t, idx_w, lde_w, clearl) 552 GEN_VEXT_LD_INDEX(vlxe_v_d, int64_t, int64_t, idx_d, lde_d, clearq) 553 GEN_VEXT_LD_INDEX(vlxbu_v_b, uint8_t, uint8_t, idx_b, ldbu_b, clearb) 554 GEN_VEXT_LD_INDEX(vlxbu_v_h, uint8_t, uint16_t, idx_h, ldbu_h, clearh) 555 GEN_VEXT_LD_INDEX(vlxbu_v_w, uint8_t, uint32_t, idx_w, ldbu_w, clearl) 556 GEN_VEXT_LD_INDEX(vlxbu_v_d, uint8_t, uint64_t, idx_d, ldbu_d, clearq) 557 GEN_VEXT_LD_INDEX(vlxhu_v_h, uint16_t, uint16_t, idx_h, ldhu_h, clearh) 558 GEN_VEXT_LD_INDEX(vlxhu_v_w, uint16_t, uint32_t, idx_w, ldhu_w, clearl) 559 GEN_VEXT_LD_INDEX(vlxhu_v_d, uint16_t, uint64_t, idx_d, ldhu_d, clearq) 560 GEN_VEXT_LD_INDEX(vlxwu_v_w, uint32_t, uint32_t, idx_w, ldwu_w, clearl) 561 GEN_VEXT_LD_INDEX(vlxwu_v_d, uint32_t, uint64_t, idx_d, ldwu_d, clearq) 562 563 #define GEN_VEXT_ST_INDEX(NAME, MTYPE, ETYPE, INDEX_FN, STORE_FN)\ 564 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 565 void *vs2, CPURISCVState *env, uint32_t desc) \ 566 { \ 567 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \ 568 STORE_FN, NULL, sizeof(ETYPE), sizeof(MTYPE),\ 569 GETPC(), MMU_DATA_STORE); \ 570 } 571 572 GEN_VEXT_ST_INDEX(vsxb_v_b, int8_t, int8_t, idx_b, stb_b) 573 GEN_VEXT_ST_INDEX(vsxb_v_h, int8_t, int16_t, idx_h, stb_h) 574 GEN_VEXT_ST_INDEX(vsxb_v_w, int8_t, int32_t, idx_w, stb_w) 575 GEN_VEXT_ST_INDEX(vsxb_v_d, int8_t, int64_t, idx_d, stb_d) 576 GEN_VEXT_ST_INDEX(vsxh_v_h, int16_t, int16_t, idx_h, sth_h) 577 GEN_VEXT_ST_INDEX(vsxh_v_w, int16_t, int32_t, idx_w, sth_w) 578 GEN_VEXT_ST_INDEX(vsxh_v_d, int16_t, int64_t, idx_d, sth_d) 579 GEN_VEXT_ST_INDEX(vsxw_v_w, int32_t, int32_t, idx_w, stw_w) 580 GEN_VEXT_ST_INDEX(vsxw_v_d, int32_t, int64_t, idx_d, stw_d) 581 GEN_VEXT_ST_INDEX(vsxe_v_b, int8_t, int8_t, idx_b, ste_b) 582 GEN_VEXT_ST_INDEX(vsxe_v_h, int16_t, int16_t, idx_h, ste_h) 583 GEN_VEXT_ST_INDEX(vsxe_v_w, int32_t, int32_t, idx_w, ste_w) 584 GEN_VEXT_ST_INDEX(vsxe_v_d, int64_t, int64_t, idx_d, ste_d) 585 586 /* 587 *** unit-stride fault-only-fisrt load instructions 588 */ 589 static inline void 590 vext_ldff(void *vd, void *v0, target_ulong base, 591 CPURISCVState *env, uint32_t desc, 592 vext_ldst_elem_fn *ldst_elem, 593 clear_fn *clear_elem, 594 uint32_t esz, uint32_t msz, uintptr_t ra) 595 { 596 void *host; 597 uint32_t i, k, vl = 0; 598 uint32_t mlen = vext_mlen(desc); 599 uint32_t nf = vext_nf(desc); 600 uint32_t vm = vext_vm(desc); 601 uint32_t vlmax = vext_maxsz(desc) / esz; 602 target_ulong addr, offset, remain; 603 604 /* probe every access*/ 605 for (i = 0; i < env->vl; i++) { 606 if (!vm && !vext_elem_mask(v0, mlen, i)) { 607 continue; 608 } 609 addr = base + nf * i * msz; 610 if (i == 0) { 611 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 612 } else { 613 /* if it triggers an exception, no need to check watchpoint */ 614 remain = nf * msz; 615 while (remain > 0) { 616 offset = -(addr | TARGET_PAGE_MASK); 617 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, 618 cpu_mmu_index(env, false)); 619 if (host) { 620 #ifdef CONFIG_USER_ONLY 621 if (page_check_range(addr, nf * msz, PAGE_READ) < 0) { 622 vl = i; 623 goto ProbeSuccess; 624 } 625 #else 626 probe_pages(env, addr, nf * msz, ra, MMU_DATA_LOAD); 627 #endif 628 } else { 629 vl = i; 630 goto ProbeSuccess; 631 } 632 if (remain <= offset) { 633 break; 634 } 635 remain -= offset; 636 addr += offset; 637 } 638 } 639 } 640 ProbeSuccess: 641 /* load bytes from guest memory */ 642 if (vl != 0) { 643 env->vl = vl; 644 } 645 for (i = 0; i < env->vl; i++) { 646 k = 0; 647 if (!vm && !vext_elem_mask(v0, mlen, i)) { 648 continue; 649 } 650 while (k < nf) { 651 target_ulong addr = base + (i * nf + k) * msz; 652 ldst_elem(env, addr, i + k * vlmax, vd, ra); 653 k++; 654 } 655 } 656 /* clear tail elements */ 657 if (vl != 0) { 658 return; 659 } 660 for (k = 0; k < nf; k++) { 661 clear_elem(vd, env->vl + k * vlmax, env->vl * esz, vlmax * esz); 662 } 663 } 664 665 #define GEN_VEXT_LDFF(NAME, MTYPE, ETYPE, LOAD_FN, CLEAR_FN) \ 666 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \ 667 CPURISCVState *env, uint32_t desc) \ 668 { \ 669 vext_ldff(vd, v0, base, env, desc, LOAD_FN, CLEAR_FN, \ 670 sizeof(ETYPE), sizeof(MTYPE), GETPC()); \ 671 } 672 673 GEN_VEXT_LDFF(vlbff_v_b, int8_t, int8_t, ldb_b, clearb) 674 GEN_VEXT_LDFF(vlbff_v_h, int8_t, int16_t, ldb_h, clearh) 675 GEN_VEXT_LDFF(vlbff_v_w, int8_t, int32_t, ldb_w, clearl) 676 GEN_VEXT_LDFF(vlbff_v_d, int8_t, int64_t, ldb_d, clearq) 677 GEN_VEXT_LDFF(vlhff_v_h, int16_t, int16_t, ldh_h, clearh) 678 GEN_VEXT_LDFF(vlhff_v_w, int16_t, int32_t, ldh_w, clearl) 679 GEN_VEXT_LDFF(vlhff_v_d, int16_t, int64_t, ldh_d, clearq) 680 GEN_VEXT_LDFF(vlwff_v_w, int32_t, int32_t, ldw_w, clearl) 681 GEN_VEXT_LDFF(vlwff_v_d, int32_t, int64_t, ldw_d, clearq) 682 GEN_VEXT_LDFF(vleff_v_b, int8_t, int8_t, lde_b, clearb) 683 GEN_VEXT_LDFF(vleff_v_h, int16_t, int16_t, lde_h, clearh) 684 GEN_VEXT_LDFF(vleff_v_w, int32_t, int32_t, lde_w, clearl) 685 GEN_VEXT_LDFF(vleff_v_d, int64_t, int64_t, lde_d, clearq) 686 GEN_VEXT_LDFF(vlbuff_v_b, uint8_t, uint8_t, ldbu_b, clearb) 687 GEN_VEXT_LDFF(vlbuff_v_h, uint8_t, uint16_t, ldbu_h, clearh) 688 GEN_VEXT_LDFF(vlbuff_v_w, uint8_t, uint32_t, ldbu_w, clearl) 689 GEN_VEXT_LDFF(vlbuff_v_d, uint8_t, uint64_t, ldbu_d, clearq) 690 GEN_VEXT_LDFF(vlhuff_v_h, uint16_t, uint16_t, ldhu_h, clearh) 691 GEN_VEXT_LDFF(vlhuff_v_w, uint16_t, uint32_t, ldhu_w, clearl) 692 GEN_VEXT_LDFF(vlhuff_v_d, uint16_t, uint64_t, ldhu_d, clearq) 693 GEN_VEXT_LDFF(vlwuff_v_w, uint32_t, uint32_t, ldwu_w, clearl) 694 GEN_VEXT_LDFF(vlwuff_v_d, uint32_t, uint64_t, ldwu_d, clearq) 695 696 /* 697 *** Vector AMO Operations (Zvamo) 698 */ 699 typedef void vext_amo_noatomic_fn(void *vs3, target_ulong addr, 700 uint32_t wd, uint32_t idx, CPURISCVState *env, 701 uintptr_t retaddr); 702 703 /* no atomic opreation for vector atomic insructions */ 704 #define DO_SWAP(N, M) (M) 705 #define DO_AND(N, M) (N & M) 706 #define DO_XOR(N, M) (N ^ M) 707 #define DO_OR(N, M) (N | M) 708 #define DO_ADD(N, M) (N + M) 709 710 #define GEN_VEXT_AMO_NOATOMIC_OP(NAME, ESZ, MSZ, H, DO_OP, SUF) \ 711 static void \ 712 vext_##NAME##_noatomic_op(void *vs3, target_ulong addr, \ 713 uint32_t wd, uint32_t idx, \ 714 CPURISCVState *env, uintptr_t retaddr)\ 715 { \ 716 typedef int##ESZ##_t ETYPE; \ 717 typedef int##MSZ##_t MTYPE; \ 718 typedef uint##MSZ##_t UMTYPE __attribute__((unused)); \ 719 ETYPE *pe3 = (ETYPE *)vs3 + H(idx); \ 720 MTYPE a = cpu_ld##SUF##_data(env, addr), b = *pe3; \ 721 \ 722 cpu_st##SUF##_data(env, addr, DO_OP(a, b)); \ 723 if (wd) { \ 724 *pe3 = a; \ 725 } \ 726 } 727 728 /* Signed min/max */ 729 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M)) 730 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N)) 731 732 /* Unsigned min/max */ 733 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M) 734 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M) 735 736 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_w, 32, 32, H4, DO_SWAP, l) 737 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_w, 32, 32, H4, DO_ADD, l) 738 GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_w, 32, 32, H4, DO_XOR, l) 739 GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_w, 32, 32, H4, DO_AND, l) 740 GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_w, 32, 32, H4, DO_OR, l) 741 GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_w, 32, 32, H4, DO_MIN, l) 742 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_w, 32, 32, H4, DO_MAX, l) 743 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_w, 32, 32, H4, DO_MINU, l) 744 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_w, 32, 32, H4, DO_MAXU, l) 745 #ifdef TARGET_RISCV64 746 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapw_v_d, 64, 32, H8, DO_SWAP, l) 747 GEN_VEXT_AMO_NOATOMIC_OP(vamoswapd_v_d, 64, 64, H8, DO_SWAP, q) 748 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddw_v_d, 64, 32, H8, DO_ADD, l) 749 GEN_VEXT_AMO_NOATOMIC_OP(vamoaddd_v_d, 64, 64, H8, DO_ADD, q) 750 GEN_VEXT_AMO_NOATOMIC_OP(vamoxorw_v_d, 64, 32, H8, DO_XOR, l) 751 GEN_VEXT_AMO_NOATOMIC_OP(vamoxord_v_d, 64, 64, H8, DO_XOR, q) 752 GEN_VEXT_AMO_NOATOMIC_OP(vamoandw_v_d, 64, 32, H8, DO_AND, l) 753 GEN_VEXT_AMO_NOATOMIC_OP(vamoandd_v_d, 64, 64, H8, DO_AND, q) 754 GEN_VEXT_AMO_NOATOMIC_OP(vamoorw_v_d, 64, 32, H8, DO_OR, l) 755 GEN_VEXT_AMO_NOATOMIC_OP(vamoord_v_d, 64, 64, H8, DO_OR, q) 756 GEN_VEXT_AMO_NOATOMIC_OP(vamominw_v_d, 64, 32, H8, DO_MIN, l) 757 GEN_VEXT_AMO_NOATOMIC_OP(vamomind_v_d, 64, 64, H8, DO_MIN, q) 758 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxw_v_d, 64, 32, H8, DO_MAX, l) 759 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxd_v_d, 64, 64, H8, DO_MAX, q) 760 GEN_VEXT_AMO_NOATOMIC_OP(vamominuw_v_d, 64, 32, H8, DO_MINU, l) 761 GEN_VEXT_AMO_NOATOMIC_OP(vamominud_v_d, 64, 64, H8, DO_MINU, q) 762 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxuw_v_d, 64, 32, H8, DO_MAXU, l) 763 GEN_VEXT_AMO_NOATOMIC_OP(vamomaxud_v_d, 64, 64, H8, DO_MAXU, q) 764 #endif 765 766 static inline void 767 vext_amo_noatomic(void *vs3, void *v0, target_ulong base, 768 void *vs2, CPURISCVState *env, uint32_t desc, 769 vext_get_index_addr get_index_addr, 770 vext_amo_noatomic_fn *noatomic_op, 771 clear_fn *clear_elem, 772 uint32_t esz, uint32_t msz, uintptr_t ra) 773 { 774 uint32_t i; 775 target_long addr; 776 uint32_t wd = vext_wd(desc); 777 uint32_t vm = vext_vm(desc); 778 uint32_t mlen = vext_mlen(desc); 779 uint32_t vlmax = vext_maxsz(desc) / esz; 780 781 for (i = 0; i < env->vl; i++) { 782 if (!vm && !vext_elem_mask(v0, mlen, i)) { 783 continue; 784 } 785 probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_LOAD); 786 probe_pages(env, get_index_addr(base, i, vs2), msz, ra, MMU_DATA_STORE); 787 } 788 for (i = 0; i < env->vl; i++) { 789 if (!vm && !vext_elem_mask(v0, mlen, i)) { 790 continue; 791 } 792 addr = get_index_addr(base, i, vs2); 793 noatomic_op(vs3, addr, wd, i, env, ra); 794 } 795 clear_elem(vs3, env->vl, env->vl * esz, vlmax * esz); 796 } 797 798 #define GEN_VEXT_AMO(NAME, MTYPE, ETYPE, INDEX_FN, CLEAR_FN) \ 799 void HELPER(NAME)(void *vs3, void *v0, target_ulong base, \ 800 void *vs2, CPURISCVState *env, uint32_t desc) \ 801 { \ 802 vext_amo_noatomic(vs3, v0, base, vs2, env, desc, \ 803 INDEX_FN, vext_##NAME##_noatomic_op, \ 804 CLEAR_FN, sizeof(ETYPE), sizeof(MTYPE), \ 805 GETPC()); \ 806 } 807 808 #ifdef TARGET_RISCV64 809 GEN_VEXT_AMO(vamoswapw_v_d, int32_t, int64_t, idx_d, clearq) 810 GEN_VEXT_AMO(vamoswapd_v_d, int64_t, int64_t, idx_d, clearq) 811 GEN_VEXT_AMO(vamoaddw_v_d, int32_t, int64_t, idx_d, clearq) 812 GEN_VEXT_AMO(vamoaddd_v_d, int64_t, int64_t, idx_d, clearq) 813 GEN_VEXT_AMO(vamoxorw_v_d, int32_t, int64_t, idx_d, clearq) 814 GEN_VEXT_AMO(vamoxord_v_d, int64_t, int64_t, idx_d, clearq) 815 GEN_VEXT_AMO(vamoandw_v_d, int32_t, int64_t, idx_d, clearq) 816 GEN_VEXT_AMO(vamoandd_v_d, int64_t, int64_t, idx_d, clearq) 817 GEN_VEXT_AMO(vamoorw_v_d, int32_t, int64_t, idx_d, clearq) 818 GEN_VEXT_AMO(vamoord_v_d, int64_t, int64_t, idx_d, clearq) 819 GEN_VEXT_AMO(vamominw_v_d, int32_t, int64_t, idx_d, clearq) 820 GEN_VEXT_AMO(vamomind_v_d, int64_t, int64_t, idx_d, clearq) 821 GEN_VEXT_AMO(vamomaxw_v_d, int32_t, int64_t, idx_d, clearq) 822 GEN_VEXT_AMO(vamomaxd_v_d, int64_t, int64_t, idx_d, clearq) 823 GEN_VEXT_AMO(vamominuw_v_d, uint32_t, uint64_t, idx_d, clearq) 824 GEN_VEXT_AMO(vamominud_v_d, uint64_t, uint64_t, idx_d, clearq) 825 GEN_VEXT_AMO(vamomaxuw_v_d, uint32_t, uint64_t, idx_d, clearq) 826 GEN_VEXT_AMO(vamomaxud_v_d, uint64_t, uint64_t, idx_d, clearq) 827 #endif 828 GEN_VEXT_AMO(vamoswapw_v_w, int32_t, int32_t, idx_w, clearl) 829 GEN_VEXT_AMO(vamoaddw_v_w, int32_t, int32_t, idx_w, clearl) 830 GEN_VEXT_AMO(vamoxorw_v_w, int32_t, int32_t, idx_w, clearl) 831 GEN_VEXT_AMO(vamoandw_v_w, int32_t, int32_t, idx_w, clearl) 832 GEN_VEXT_AMO(vamoorw_v_w, int32_t, int32_t, idx_w, clearl) 833 GEN_VEXT_AMO(vamominw_v_w, int32_t, int32_t, idx_w, clearl) 834 GEN_VEXT_AMO(vamomaxw_v_w, int32_t, int32_t, idx_w, clearl) 835 GEN_VEXT_AMO(vamominuw_v_w, uint32_t, uint32_t, idx_w, clearl) 836 GEN_VEXT_AMO(vamomaxuw_v_w, uint32_t, uint32_t, idx_w, clearl) 837 838 /* 839 *** Vector Integer Arithmetic Instructions 840 */ 841 842 /* expand macro args before macro */ 843 #define RVVCALL(macro, ...) macro(__VA_ARGS__) 844 845 /* (TD, T1, T2, TX1, TX2) */ 846 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t 847 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t 848 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t 849 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t 850 851 /* operation of two vector elements */ 852 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); 853 854 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ 855 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ 856 { \ 857 TX1 s1 = *((T1 *)vs1 + HS1(i)); \ 858 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 859 *((TD *)vd + HD(i)) = OP(s2, s1); \ 860 } 861 #define DO_SUB(N, M) (N - M) 862 #define DO_RSUB(N, M) (M - N) 863 864 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD) 865 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD) 866 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD) 867 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD) 868 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB) 869 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) 870 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) 871 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) 872 873 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, 874 CPURISCVState *env, uint32_t desc, 875 uint32_t esz, uint32_t dsz, 876 opivv2_fn *fn, clear_fn *clearfn) 877 { 878 uint32_t vlmax = vext_maxsz(desc) / esz; 879 uint32_t mlen = vext_mlen(desc); 880 uint32_t vm = vext_vm(desc); 881 uint32_t vl = env->vl; 882 uint32_t i; 883 884 for (i = 0; i < vl; i++) { 885 if (!vm && !vext_elem_mask(v0, mlen, i)) { 886 continue; 887 } 888 fn(vd, vs1, vs2, i); 889 } 890 clearfn(vd, vl, vl * dsz, vlmax * dsz); 891 } 892 893 /* generate the helpers for OPIVV */ 894 #define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN) \ 895 void HELPER(NAME)(void *vd, void *v0, void *vs1, \ 896 void *vs2, CPURISCVState *env, \ 897 uint32_t desc) \ 898 { \ 899 do_vext_vv(vd, v0, vs1, vs2, env, desc, ESZ, DSZ, \ 900 do_##NAME, CLEAR_FN); \ 901 } 902 903 GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb) 904 GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh) 905 GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl) 906 GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq) 907 GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb) 908 GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh) 909 GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl) 910 GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq) 911 912 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); 913 914 /* 915 * (T1)s1 gives the real operator type. 916 * (TX1)(T1)s1 expands the operator type of widen or narrow operations. 917 */ 918 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ 919 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ 920 { \ 921 TX2 s2 = *((T2 *)vs2 + HS2(i)); \ 922 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ 923 } 924 925 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) 926 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) 927 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD) 928 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD) 929 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB) 930 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB) 931 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB) 932 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB) 933 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB) 934 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) 935 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) 936 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) 937 938 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, 939 CPURISCVState *env, uint32_t desc, 940 uint32_t esz, uint32_t dsz, 941 opivx2_fn fn, clear_fn *clearfn) 942 { 943 uint32_t vlmax = vext_maxsz(desc) / esz; 944 uint32_t mlen = vext_mlen(desc); 945 uint32_t vm = vext_vm(desc); 946 uint32_t vl = env->vl; 947 uint32_t i; 948 949 for (i = 0; i < vl; i++) { 950 if (!vm && !vext_elem_mask(v0, mlen, i)) { 951 continue; 952 } 953 fn(vd, s1, vs2, i); 954 } 955 clearfn(vd, vl, vl * dsz, vlmax * dsz); 956 } 957 958 /* generate the helpers for OPIVX */ 959 #define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN) \ 960 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ 961 void *vs2, CPURISCVState *env, \ 962 uint32_t desc) \ 963 { \ 964 do_vext_vx(vd, v0, s1, vs2, env, desc, ESZ, DSZ, \ 965 do_##NAME, CLEAR_FN); \ 966 } 967 968 GEN_VEXT_VX(vadd_vx_b, 1, 1, clearb) 969 GEN_VEXT_VX(vadd_vx_h, 2, 2, clearh) 970 GEN_VEXT_VX(vadd_vx_w, 4, 4, clearl) 971 GEN_VEXT_VX(vadd_vx_d, 8, 8, clearq) 972 GEN_VEXT_VX(vsub_vx_b, 1, 1, clearb) 973 GEN_VEXT_VX(vsub_vx_h, 2, 2, clearh) 974 GEN_VEXT_VX(vsub_vx_w, 4, 4, clearl) 975 GEN_VEXT_VX(vsub_vx_d, 8, 8, clearq) 976 GEN_VEXT_VX(vrsub_vx_b, 1, 1, clearb) 977 GEN_VEXT_VX(vrsub_vx_h, 2, 2, clearh) 978 GEN_VEXT_VX(vrsub_vx_w, 4, 4, clearl) 979 GEN_VEXT_VX(vrsub_vx_d, 8, 8, clearq) 980 981 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc) 982 { 983 intptr_t oprsz = simd_oprsz(desc); 984 intptr_t i; 985 986 for (i = 0; i < oprsz; i += sizeof(uint8_t)) { 987 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i); 988 } 989 } 990 991 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc) 992 { 993 intptr_t oprsz = simd_oprsz(desc); 994 intptr_t i; 995 996 for (i = 0; i < oprsz; i += sizeof(uint16_t)) { 997 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i); 998 } 999 } 1000 1001 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc) 1002 { 1003 intptr_t oprsz = simd_oprsz(desc); 1004 intptr_t i; 1005 1006 for (i = 0; i < oprsz; i += sizeof(uint32_t)) { 1007 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i); 1008 } 1009 } 1010 1011 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc) 1012 { 1013 intptr_t oprsz = simd_oprsz(desc); 1014 intptr_t i; 1015 1016 for (i = 0; i < oprsz; i += sizeof(uint64_t)) { 1017 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i); 1018 } 1019 } 1020 1021 /* Vector Widening Integer Add/Subtract */ 1022 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t 1023 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t 1024 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t 1025 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t 1026 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t 1027 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t 1028 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t 1029 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t 1030 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t 1031 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t 1032 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t 1033 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t 1034 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD) 1035 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD) 1036 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD) 1037 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB) 1038 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB) 1039 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB) 1040 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD) 1041 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD) 1042 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD) 1043 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB) 1044 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB) 1045 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB) 1046 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD) 1047 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD) 1048 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD) 1049 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB) 1050 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB) 1051 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB) 1052 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD) 1053 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD) 1054 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD) 1055 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB) 1056 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB) 1057 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB) 1058 GEN_VEXT_VV(vwaddu_vv_b, 1, 2, clearh) 1059 GEN_VEXT_VV(vwaddu_vv_h, 2, 4, clearl) 1060 GEN_VEXT_VV(vwaddu_vv_w, 4, 8, clearq) 1061 GEN_VEXT_VV(vwsubu_vv_b, 1, 2, clearh) 1062 GEN_VEXT_VV(vwsubu_vv_h, 2, 4, clearl) 1063 GEN_VEXT_VV(vwsubu_vv_w, 4, 8, clearq) 1064 GEN_VEXT_VV(vwadd_vv_b, 1, 2, clearh) 1065 GEN_VEXT_VV(vwadd_vv_h, 2, 4, clearl) 1066 GEN_VEXT_VV(vwadd_vv_w, 4, 8, clearq) 1067 GEN_VEXT_VV(vwsub_vv_b, 1, 2, clearh) 1068 GEN_VEXT_VV(vwsub_vv_h, 2, 4, clearl) 1069 GEN_VEXT_VV(vwsub_vv_w, 4, 8, clearq) 1070 GEN_VEXT_VV(vwaddu_wv_b, 1, 2, clearh) 1071 GEN_VEXT_VV(vwaddu_wv_h, 2, 4, clearl) 1072 GEN_VEXT_VV(vwaddu_wv_w, 4, 8, clearq) 1073 GEN_VEXT_VV(vwsubu_wv_b, 1, 2, clearh) 1074 GEN_VEXT_VV(vwsubu_wv_h, 2, 4, clearl) 1075 GEN_VEXT_VV(vwsubu_wv_w, 4, 8, clearq) 1076 GEN_VEXT_VV(vwadd_wv_b, 1, 2, clearh) 1077 GEN_VEXT_VV(vwadd_wv_h, 2, 4, clearl) 1078 GEN_VEXT_VV(vwadd_wv_w, 4, 8, clearq) 1079 GEN_VEXT_VV(vwsub_wv_b, 1, 2, clearh) 1080 GEN_VEXT_VV(vwsub_wv_h, 2, 4, clearl) 1081 GEN_VEXT_VV(vwsub_wv_w, 4, 8, clearq) 1082 1083 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD) 1084 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD) 1085 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD) 1086 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB) 1087 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB) 1088 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB) 1089 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD) 1090 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD) 1091 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD) 1092 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB) 1093 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB) 1094 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB) 1095 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD) 1096 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD) 1097 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD) 1098 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB) 1099 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB) 1100 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB) 1101 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD) 1102 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD) 1103 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD) 1104 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB) 1105 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB) 1106 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB) 1107 GEN_VEXT_VX(vwaddu_vx_b, 1, 2, clearh) 1108 GEN_VEXT_VX(vwaddu_vx_h, 2, 4, clearl) 1109 GEN_VEXT_VX(vwaddu_vx_w, 4, 8, clearq) 1110 GEN_VEXT_VX(vwsubu_vx_b, 1, 2, clearh) 1111 GEN_VEXT_VX(vwsubu_vx_h, 2, 4, clearl) 1112 GEN_VEXT_VX(vwsubu_vx_w, 4, 8, clearq) 1113 GEN_VEXT_VX(vwadd_vx_b, 1, 2, clearh) 1114 GEN_VEXT_VX(vwadd_vx_h, 2, 4, clearl) 1115 GEN_VEXT_VX(vwadd_vx_w, 4, 8, clearq) 1116 GEN_VEXT_VX(vwsub_vx_b, 1, 2, clearh) 1117 GEN_VEXT_VX(vwsub_vx_h, 2, 4, clearl) 1118 GEN_VEXT_VX(vwsub_vx_w, 4, 8, clearq) 1119 GEN_VEXT_VX(vwaddu_wx_b, 1, 2, clearh) 1120 GEN_VEXT_VX(vwaddu_wx_h, 2, 4, clearl) 1121 GEN_VEXT_VX(vwaddu_wx_w, 4, 8, clearq) 1122 GEN_VEXT_VX(vwsubu_wx_b, 1, 2, clearh) 1123 GEN_VEXT_VX(vwsubu_wx_h, 2, 4, clearl) 1124 GEN_VEXT_VX(vwsubu_wx_w, 4, 8, clearq) 1125 GEN_VEXT_VX(vwadd_wx_b, 1, 2, clearh) 1126 GEN_VEXT_VX(vwadd_wx_h, 2, 4, clearl) 1127 GEN_VEXT_VX(vwadd_wx_w, 4, 8, clearq) 1128 GEN_VEXT_VX(vwsub_wx_b, 1, 2, clearh) 1129 GEN_VEXT_VX(vwsub_wx_h, 2, 4, clearl) 1130 GEN_VEXT_VX(vwsub_wx_w, 4, 8, clearq) 1131